From 8fd71b5814a36e4bb7acbfd785d727d61795c282 Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Tue, 1 Jan 2019 11:59:09 -0800 Subject: [PATCH 01/20] Benchmarks: New SqlBenchmark, add caching & vectorization to some others. - Introduce a new SqlBenchmark geared towards benchmarking a wide variety of SQL queries. Rename the old SqlBenchmark to SqlVsNativeBenchmark. - Add (optional) caching to SegmentGenerator to enable easier benchmarking of larger segments. - Add vectorization to FilteredAggregatorBenchmark and GroupByBenchmark. --- benchmarks/pom.xml | 12 ++ .../ExpressionAggregationBenchmark.java | 22 +- .../ExpressionSelectorBenchmark.java | 21 +- .../FilteredAggregatorBenchmark.java | 33 +-- .../datagen/BenchmarkColumnSchema.java | 22 ++ .../datagen/BenchmarkSchemaInfo.java | 11 + .../benchmark/datagen/BenchmarkSchemas.java | 2 +- .../benchmark/datagen/SegmentGenerator.java | 127 +++++++++--- .../benchmark/query/GroupByBenchmark.java | 49 +++-- .../druid/benchmark/query/SqlBenchmark.java | 196 ++++++++++-------- .../benchmark/query/SqlVsNativeBenchmark.java | 173 ++++++++++++++++ benchmarks/src/main/resources/log4j2.xml | 32 +++ 12 files changed, 533 insertions(+), 167 deletions(-) create mode 100644 benchmarks/src/main/java/org/apache/druid/benchmark/query/SqlVsNativeBenchmark.java create mode 100644 benchmarks/src/main/resources/log4j2.xml diff --git a/benchmarks/pom.xml b/benchmarks/pom.xml index 9e2bd8d22e51..868073426be3 100644 --- a/benchmarks/pom.xml +++ b/benchmarks/pom.xml @@ -61,12 +61,24 @@ druid-sql ${project.parent.version} + + org.apache.druid + druid-core + ${project.parent.version} + test-jar + org.apache.druid druid-processing ${project.parent.version} test-jar + + org.apache.druid + druid-server + ${project.parent.version} + test-jar + org.apache.druid druid-sql diff --git a/benchmarks/src/main/java/org/apache/druid/benchmark/ExpressionAggregationBenchmark.java b/benchmarks/src/main/java/org/apache/druid/benchmark/ExpressionAggregationBenchmark.java index ed56d741f824..74790fbdda42 100644 --- a/benchmarks/src/main/java/org/apache/druid/benchmark/ExpressionAggregationBenchmark.java +++ b/benchmarks/src/main/java/org/apache/druid/benchmark/ExpressionAggregationBenchmark.java @@ -27,6 +27,7 @@ import org.apache.druid.java.util.common.Intervals; import org.apache.druid.java.util.common.granularity.Granularities; import org.apache.druid.java.util.common.guava.Sequence; +import org.apache.druid.java.util.common.io.Closer; import org.apache.druid.js.JavaScriptConfig; import org.apache.druid.query.aggregation.BufferAggregator; import org.apache.druid.query.aggregation.DoubleSumAggregatorFactory; @@ -72,15 +73,17 @@ public class ExpressionAggregationBenchmark @Param({"1000000"}) private int rowsPerSegment; - private SegmentGenerator segmentGenerator; private QueryableIndex index; private JavaScriptAggregatorFactory javaScriptAggregatorFactory; private DoubleSumAggregatorFactory expressionAggregatorFactory; private ByteBuffer aggregationBuffer = ByteBuffer.allocate(Double.BYTES); + private Closer closer; @Setup(Level.Trial) public void setup() { + this.closer = Closer.create(); + final BenchmarkSchemaInfo schemaInfo = new BenchmarkSchemaInfo( ImmutableList.of( BenchmarkColumnSchema.makeNormal("x", ValueType.FLOAT, false, 1, 0d, 0d, 10000d, false), @@ -98,8 +101,10 @@ public void setup() .shardSpec(new LinearShardSpec(0)) .build(); - this.segmentGenerator = new SegmentGenerator(); - this.index = segmentGenerator.generate(dataSegment, schemaInfo, Granularities.NONE, rowsPerSegment); + final SegmentGenerator segmentGenerator = closer.register(new SegmentGenerator()); + this.index = closer.register( + segmentGenerator.generate(dataSegment, schemaInfo, Granularities.NONE, rowsPerSegment) + ); this.javaScriptAggregatorFactory = new JavaScriptAggregatorFactory( "name", ImmutableList.of("x", "y"), @@ -119,15 +124,7 @@ public void setup() @TearDown(Level.Trial) public void tearDown() throws Exception { - if (index != null) { - index.close(); - index = null; - } - - if (segmentGenerator != null) { - segmentGenerator.close(); - segmentGenerator = null; - } + closer.close(); } @Benchmark @@ -240,6 +237,7 @@ public double getDouble(ByteBuffer buf, int position) { throw new UnsupportedOperationException(); } + @Override public void close() { diff --git a/benchmarks/src/main/java/org/apache/druid/benchmark/ExpressionSelectorBenchmark.java b/benchmarks/src/main/java/org/apache/druid/benchmark/ExpressionSelectorBenchmark.java index 9953c0e3a40c..2f92f0b85ba4 100644 --- a/benchmarks/src/main/java/org/apache/druid/benchmark/ExpressionSelectorBenchmark.java +++ b/benchmarks/src/main/java/org/apache/druid/benchmark/ExpressionSelectorBenchmark.java @@ -26,6 +26,7 @@ import org.apache.druid.java.util.common.Intervals; import org.apache.druid.java.util.common.granularity.Granularities; import org.apache.druid.java.util.common.guava.Sequence; +import org.apache.druid.java.util.common.io.Closer; import org.apache.druid.query.dimension.DefaultDimensionSpec; import org.apache.druid.query.dimension.ExtractionDimensionSpec; import org.apache.druid.query.expression.TestExprMacroTable; @@ -72,12 +73,14 @@ public class ExpressionSelectorBenchmark @Param({"1000000"}) private int rowsPerSegment; - private SegmentGenerator segmentGenerator; private QueryableIndex index; + private Closer closer; @Setup(Level.Trial) public void setup() { + this.closer = Closer.create(); + final BenchmarkSchemaInfo schemaInfo = new BenchmarkSchemaInfo( ImmutableList.of( BenchmarkColumnSchema.makeZipf( @@ -113,22 +116,16 @@ public void setup() .shardSpec(new LinearShardSpec(0)) .build(); - this.segmentGenerator = new SegmentGenerator(); - this.index = segmentGenerator.generate(dataSegment, schemaInfo, Granularities.HOUR, rowsPerSegment); + final SegmentGenerator segmentGenerator = closer.register(new SegmentGenerator()); + this.index = closer.register( + segmentGenerator.generate(dataSegment, schemaInfo, Granularities.HOUR, rowsPerSegment) + ); } @TearDown(Level.Trial) public void tearDown() throws Exception { - if (index != null) { - index.close(); - index = null; - } - - if (segmentGenerator != null) { - segmentGenerator.close(); - segmentGenerator = null; - } + closer.close(); } @Benchmark diff --git a/benchmarks/src/main/java/org/apache/druid/benchmark/FilteredAggregatorBenchmark.java b/benchmarks/src/main/java/org/apache/druid/benchmark/FilteredAggregatorBenchmark.java index 821986fd0971..bdba6090b1b8 100644 --- a/benchmarks/src/main/java/org/apache/druid/benchmark/FilteredAggregatorBenchmark.java +++ b/benchmarks/src/main/java/org/apache/druid/benchmark/FilteredAggregatorBenchmark.java @@ -20,6 +20,7 @@ package org.apache.druid.benchmark; import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.collect.ImmutableMap; import com.google.common.io.Files; import org.apache.commons.io.FileUtils; import org.apache.druid.benchmark.datagen.BenchmarkDataGenerator; @@ -32,7 +33,6 @@ import org.apache.druid.java.util.common.granularity.Granularities; import org.apache.druid.java.util.common.guava.Sequence; import org.apache.druid.java.util.common.logger.Logger; -import org.apache.druid.js.JavaScriptConfig; import org.apache.druid.query.Druids; import org.apache.druid.query.FinalizeResultsQueryRunner; import org.apache.druid.query.Query; @@ -48,7 +48,6 @@ import org.apache.druid.query.filter.BoundDimFilter; import org.apache.druid.query.filter.DimFilter; import org.apache.druid.query.filter.InDimFilter; -import org.apache.druid.query.filter.JavaScriptDimFilter; import org.apache.druid.query.filter.OrDimFilter; import org.apache.druid.query.filter.RegexDimFilter; import org.apache.druid.query.filter.SearchQueryDimFilter; @@ -106,6 +105,9 @@ public class FilteredAggregatorBenchmark @Param({"basic"}) private String schema; + @Param({"false", "true"}) + private String vectorize; + private static final Logger log = new Logger(FilteredAggregatorBenchmark.class); private static final int RNG_SEED = 9999; private static final IndexMergerV9 INDEX_MERGER_V9; @@ -162,12 +164,6 @@ public void setup() throws IOException filter = new OrDimFilter( Arrays.asList( new BoundDimFilter("dimSequential", "-1", "-1", true, true, null, null, StringComparators.ALPHANUMERIC), - new JavaScriptDimFilter( - "dimSequential", - "function(x) { return false }", - null, - JavaScriptConfig.getEnabledInstance() - ), new RegexDimFilter("dimSequential", "X", null), new SearchQueryDimFilter("dimSequential", new ContainsSearchQuerySpec("X", false), null), new InDimFilter("dimSequential", Collections.singletonList("X"), null) @@ -235,7 +231,7 @@ private IncrementalIndex makeIncIndex(AggregatorFactory[] metrics) .buildOnheap(); } - private static List runQuery(QueryRunnerFactory factory, QueryRunner runner, Query query) + private static List runQuery(QueryRunnerFactory factory, QueryRunner runner, Query query, String vectorize) { QueryToolChest toolChest = factory.getToolchest(); QueryRunner theRunner = new FinalizeResultsQueryRunner<>( @@ -243,7 +239,10 @@ private static List runQuery(QueryRunnerFactory factory, QueryRunner runn toolChest ); - Sequence queryResult = theRunner.run(QueryPlus.wrap(query), new HashMap<>()); + final QueryPlus queryToRun = QueryPlus.wrap( + query.withOverriddenContext(ImmutableMap.of("vectorize", vectorize)) + ); + Sequence queryResult = theRunner.run(queryToRun, new HashMap<>()); return queryResult.toList(); } @@ -270,7 +269,12 @@ public void querySingleIncrementalIndex(Blackhole blackhole) new IncrementalIndexSegment(incIndex, "incIndex") ); - List> results = FilteredAggregatorBenchmark.runQuery(factory, runner, query); + List> results = FilteredAggregatorBenchmark.runQuery( + factory, + runner, + query, + vectorize + ); for (Result result : results) { blackhole.consume(result); } @@ -287,7 +291,12 @@ public void querySingleQueryableIndex(Blackhole blackhole) new QueryableIndexSegment("qIndex", qIndex) ); - List> results = FilteredAggregatorBenchmark.runQuery(factory, runner, query); + List> results = FilteredAggregatorBenchmark.runQuery( + factory, + runner, + query, + vectorize + ); for (Result result : results) { blackhole.consume(result); } diff --git a/benchmarks/src/main/java/org/apache/druid/benchmark/datagen/BenchmarkColumnSchema.java b/benchmarks/src/main/java/org/apache/druid/benchmark/datagen/BenchmarkColumnSchema.java index 0cf6d7c473fa..549140e62bd4 100644 --- a/benchmarks/src/main/java/org/apache/druid/benchmark/datagen/BenchmarkColumnSchema.java +++ b/benchmarks/src/main/java/org/apache/druid/benchmark/datagen/BenchmarkColumnSchema.java @@ -424,4 +424,26 @@ public static BenchmarkColumnSchema makeEnumerated( schema.enumeratedProbabilities = enumeratedProbabilities; return schema; } + + @Override + public String toString() + { + return "BenchmarkColumnSchema{" + + "distributionType=" + distributionType + + ", name='" + name + '\'' + + ", type=" + type + + ", isMetric=" + isMetric + + ", rowSize=" + rowSize + + ", nullProbability=" + nullProbability + + ", enumeratedValues=" + enumeratedValues + + ", enumeratedProbabilities=" + enumeratedProbabilities + + ", startInt=" + startInt + + ", endInt=" + endInt + + ", startDouble=" + startDouble + + ", endDouble=" + endDouble + + ", zipfExponent=" + zipfExponent + + ", mean=" + mean + + ", standardDeviation=" + standardDeviation + + '}'; + } } diff --git a/benchmarks/src/main/java/org/apache/druid/benchmark/datagen/BenchmarkSchemaInfo.java b/benchmarks/src/main/java/org/apache/druid/benchmark/datagen/BenchmarkSchemaInfo.java index 1f43ce22d333..7a2720931e8d 100644 --- a/benchmarks/src/main/java/org/apache/druid/benchmark/datagen/BenchmarkSchemaInfo.java +++ b/benchmarks/src/main/java/org/apache/druid/benchmark/datagen/BenchmarkSchemaInfo.java @@ -68,4 +68,15 @@ public boolean isWithRollup() { return withRollup; } + + @Override + public String toString() + { + return "BenchmarkSchemaInfo{" + + "columnSchemas=" + columnSchemas + + ", aggs=" + aggs + + ", dataInterval=" + dataInterval + + ", withRollup=" + withRollup + + '}'; + } } diff --git a/benchmarks/src/main/java/org/apache/druid/benchmark/datagen/BenchmarkSchemas.java b/benchmarks/src/main/java/org/apache/druid/benchmark/datagen/BenchmarkSchemas.java index cda9f47c4e2e..d6bbabeb6b53 100644 --- a/benchmarks/src/main/java/org/apache/druid/benchmark/datagen/BenchmarkSchemas.java +++ b/benchmarks/src/main/java/org/apache/druid/benchmark/datagen/BenchmarkSchemas.java @@ -85,7 +85,7 @@ public class BenchmarkSchemas basicSchemaIngestAggs.add(new DoubleMinAggregatorFactory("minFloatZipf", "metFloatZipf")); basicSchemaIngestAggs.add(new HyperUniquesAggregatorFactory("hyper", "dimHyperUnique")); - Interval basicSchemaDataInterval = Intervals.utc(0, 1000000); + Interval basicSchemaDataInterval = Intervals.of("2000-01-01/P1D"); BenchmarkSchemaInfo basicSchema = new BenchmarkSchemaInfo( basicSchemaColumns, diff --git a/benchmarks/src/main/java/org/apache/druid/benchmark/datagen/SegmentGenerator.java b/benchmarks/src/main/java/org/apache/druid/benchmark/datagen/SegmentGenerator.java index 46904205d257..911d43d18bdd 100644 --- a/benchmarks/src/main/java/org/apache/druid/benchmark/datagen/SegmentGenerator.java +++ b/benchmarks/src/main/java/org/apache/druid/benchmark/datagen/SegmentGenerator.java @@ -21,7 +21,7 @@ import com.google.common.base.Throwables; import com.google.common.collect.ImmutableList; -import com.google.common.collect.Iterables; +import com.google.common.hash.Hashing; import com.google.common.io.Files; import org.apache.commons.io.FileUtils; import org.apache.druid.data.input.InputRow; @@ -33,6 +33,7 @@ import org.apache.druid.data.input.impl.StringDimensionSchema; import org.apache.druid.hll.HyperLogLogHash; import org.apache.druid.java.util.common.ISE; +import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.java.util.common.granularity.Granularity; import org.apache.druid.java.util.common.logger.Logger; import org.apache.druid.query.aggregation.AggregatorFactory; @@ -42,17 +43,19 @@ import org.apache.druid.segment.QueryableIndex; import org.apache.druid.segment.QueryableIndexIndexableAdapter; import org.apache.druid.segment.TestHelper; +import org.apache.druid.segment.data.RoaringBitmapSerdeFactory; import org.apache.druid.segment.incremental.IncrementalIndexSchema; import org.apache.druid.segment.serde.ComplexMetrics; import org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMediumFactory; import org.apache.druid.timeline.DataSegment; +import javax.annotation.Nullable; import java.io.Closeable; import java.io.File; import java.io.IOException; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.List; -import java.util.concurrent.atomic.AtomicInteger; import java.util.stream.Collectors; public class SegmentGenerator implements Closeable @@ -60,15 +63,43 @@ public class SegmentGenerator implements Closeable private static final Logger log = new Logger(SegmentGenerator.class); private static final int MAX_ROWS_IN_MEMORY = 200000; - private static final int STARTING_SEED = 9999; // Consistent seed for reproducibility - private final File tempDir; - private final AtomicInteger seed; + // Setup can take a long time due to the need to generate large segments. + // Allow users to specify a cache directory via a JVM property or an environment variable. + private static final String CACHE_DIR_PROPERTY = "druid.benchmark.cacheDir"; + private static final String CACHE_DIR_ENV_VAR = "DRUID_BENCHMARK_CACHE_DIR"; + + private final File cacheDir; + private final boolean cleanupCacheDir; public SegmentGenerator() { - this.tempDir = Files.createTempDir(); - this.seed = new AtomicInteger(STARTING_SEED); + this(null); + } + + public SegmentGenerator(@Nullable final File cacheDir) + { + if (cacheDir != null) { + this.cacheDir = cacheDir; + this.cleanupCacheDir = false; + } else { + final String userConfiguredCacheDir = System.getProperty(CACHE_DIR_PROPERTY, System.getenv(CACHE_DIR_ENV_VAR)); + if (userConfiguredCacheDir != null) { + this.cacheDir = new File(userConfiguredCacheDir); + this.cleanupCacheDir = false; + } else { + log.warn("No cache directory provided; benchmark data caching is disabled. " + + "Set the 'druid.benchmark.cacheDir' property or 'DRUID_BENCHMARK_CACHE_DIR' environment variable " + + "to use caching."); + this.cacheDir = Files.createTempDir(); + this.cleanupCacheDir = true; + } + } + } + + public File getCacheDir() + { + return cacheDir; } public QueryableIndex generate( @@ -83,9 +114,32 @@ public QueryableIndex generate( ComplexMetrics.registerSerde("hyperUnique", new HyperUniquesSerde(HyperLogLogHash.getDefault())); } + final String dataHash = Hashing.sha256() + .newHasher() + .putString(dataSegment.getIdentifier(), StandardCharsets.UTF_8) + .putString(schemaInfo.toString(), StandardCharsets.UTF_8) + .putString(granularity.toString(), StandardCharsets.UTF_8) + .putInt(numRows) + .hash() + .toString(); + + final File outDir = new File(getSegmentDir(dataSegment.getIdentifier(), dataHash), "merged"); + + if (outDir.exists()) { + try { + log.info("Found segment with hash[%s] cached in directory[%s].", dataHash, outDir); + return TestHelper.getTestIndexIO().loadIndex(outDir); + } + catch (IOException e) { + throw new RuntimeException(e); + } + } + + log.info("Writing segment with hash[%s] to directory[%s].", dataHash, outDir); + final BenchmarkDataGenerator dataGenerator = new BenchmarkDataGenerator( schemaInfo.getColumnSchemas(), - seed.getAndIncrement(), + dataSegment.getIdentifier().hashCode(), /* Use segment identifier hashCode as seed */ schemaInfo.getDataInterval(), numRows ); @@ -127,61 +181,69 @@ public QueryableIndex generate( rows.add(row); if ((i + 1) % 20000 == 0) { - log.info("%,d/%,d rows generated.", i + 1, numRows); + log.info("%,d/%,d rows generated for[%s].", i + 1, numRows, dataSegment); } if (rows.size() % MAX_ROWS_IN_MEMORY == 0) { - indexes.add(makeIndex(dataSegment.getIdentifier(), indexes.size(), rows, indexSchema)); + indexes.add(makeIndex(dataSegment.getIdentifier(), dataHash, indexes.size(), rows, indexSchema)); rows.clear(); } } - log.info("%,d/%,d rows generated.", numRows, numRows); + log.info("%,d/%,d rows generated for[%s].", numRows, numRows, dataSegment); if (rows.size() > 0) { - indexes.add(makeIndex(dataSegment.getIdentifier(), indexes.size(), rows, indexSchema)); + indexes.add(makeIndex(dataSegment.getIdentifier(), dataHash, indexes.size(), rows, indexSchema)); rows.clear(); } + final QueryableIndex retVal; + if (indexes.isEmpty()) { throw new ISE("No rows to index?"); - } else if (indexes.size() == 1) { - return Iterables.getOnlyElement(indexes); } else { try { - final QueryableIndex merged = TestHelper.getTestIndexIO().loadIndex( - TestHelper.getTestIndexMergerV9(OffHeapMemorySegmentWriteOutMediumFactory.instance()).merge( - indexes.stream().map(QueryableIndexIndexableAdapter::new).collect(Collectors.toList()), - false, - schemaInfo.getAggs() - .stream() - .map(AggregatorFactory::getCombiningFactory) - .toArray(AggregatorFactory[]::new), - new File(tempDir, "merged"), - new IndexSpec() - ) - ); + retVal = TestHelper + .getTestIndexIO() + .loadIndex( + TestHelper.getTestIndexMergerV9(OffHeapMemorySegmentWriteOutMediumFactory.instance()) + .merge( + indexes.stream().map(QueryableIndexIndexableAdapter::new).collect(Collectors.toList()), + false, + schemaInfo.getAggs() + .stream() + .map(AggregatorFactory::getCombiningFactory) + .toArray(AggregatorFactory[]::new), + outDir, + new IndexSpec(new RoaringBitmapSerdeFactory(true), null, null, null) + ) + ); for (QueryableIndex index : indexes) { index.close(); } - - return merged; } catch (IOException e) { throw Throwables.propagate(e); } } + + log.info("Finished writing segment[%s] to[%s]", dataSegment, outDir); + + return retVal; } @Override public void close() throws IOException { - FileUtils.deleteDirectory(tempDir); + if (cleanupCacheDir) { + FileUtils.deleteDirectory(cacheDir); + } } private QueryableIndex makeIndex( final String identifier, + final String dataHash, final int indexNumber, final List rows, final IncrementalIndexSchema indexSchema @@ -190,9 +252,14 @@ private QueryableIndex makeIndex( return IndexBuilder .create() .schema(indexSchema) - .tmpDir(new File(new File(tempDir, identifier), String.valueOf(indexNumber))) + .tmpDir(new File(getSegmentDir(identifier, dataHash), String.valueOf(indexNumber))) .segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance()) .rows(rows) .buildMMappedIndex(); } + + private File getSegmentDir(final String identifier, final String dataHash) + { + return new File(cacheDir, StringUtils.format("%s_%s", identifier, dataHash)); + } } diff --git a/benchmarks/src/main/java/org/apache/druid/benchmark/query/GroupByBenchmark.java b/benchmarks/src/main/java/org/apache/druid/benchmark/query/GroupByBenchmark.java index 65365f850f65..46f943c93cd6 100644 --- a/benchmarks/src/main/java/org/apache/druid/benchmark/query/GroupByBenchmark.java +++ b/benchmarks/src/main/java/org/apache/druid/benchmark/query/GroupByBenchmark.java @@ -53,6 +53,7 @@ import org.apache.druid.query.QueryRunnerFactory; import org.apache.druid.query.QueryToolChest; import org.apache.druid.query.aggregation.AggregatorFactory; +import org.apache.druid.query.aggregation.CountAggregatorFactory; import org.apache.druid.query.aggregation.DoubleMinAggregatorFactory; import org.apache.druid.query.aggregation.DoubleSumAggregatorFactory; import org.apache.druid.query.aggregation.LongSumAggregatorFactory; @@ -135,6 +136,9 @@ public class GroupByBenchmark @Param({"all", "day"}) private String queryGranularity; + @Param({"force", "false"}) + private String vectorize; + private static final Logger log = new Logger(GroupByBenchmark.class); private static final int RNG_SEED = 9999; private static final IndexMergerV9 INDEX_MERGER_V9; @@ -179,10 +183,8 @@ private void setupQueries() { // basic.A QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(basicSchema.getDataInterval())); List queryAggs = new ArrayList<>(); - queryAggs.add(new LongSumAggregatorFactory( - "sumLongSequential", - "sumLongSequential" - )); + queryAggs.add(new CountAggregatorFactory("cnt")); + queryAggs.add(new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential")); GroupByQuery queryA = GroupByQuery .builder() .setDataSource("blah") @@ -190,6 +192,7 @@ private void setupQueries() .setDimensions(new DefaultDimensionSpec("dimSequential", null), new DefaultDimensionSpec("dimZipf", null)) .setAggregatorSpecs(queryAggs) .setGranularity(Granularity.fromString(queryGranularity)) + .setContext(ImmutableMap.of("vectorize", vectorize)) .build(); basicQueries.put("A", queryA); @@ -210,6 +213,7 @@ private void setupQueries() .setDimensions(new DefaultDimensionSpec("dimSequential", null), new DefaultDimensionSpec("dimZipf", null)) .setAggregatorSpecs(queryAggs) .setGranularity(Granularities.DAY) + .setContext(ImmutableMap.of("vectorize", vectorize)) .build(); GroupByQuery queryA = GroupByQuery @@ -219,6 +223,7 @@ private void setupQueries() .setDimensions(new DefaultDimensionSpec("dimSequential", null)) .setAggregatorSpecs(queryAggs) .setGranularity(Granularities.WEEK) + .setContext(ImmutableMap.of("vectorize", vectorize)) .build(); basicQueries.put("nested", queryA); @@ -243,6 +248,7 @@ private void setupQueries() .setAggregatorSpecs(queryAggs) .setGranularity(Granularity.fromString(queryGranularity)) .setDimFilter(new BoundDimFilter("dimUniform", "0", "100", true, true, null, null, null)) + .setContext(ImmutableMap.of("vectorize", vectorize)) .build(); basicQueries.put("filter", queryA); @@ -266,6 +272,7 @@ private void setupQueries() .setDimensions(new DefaultDimensionSpec("dimZipf", null)) .setAggregatorSpecs(queryAggs) .setGranularity(Granularity.fromString(queryGranularity)) + .setContext(ImmutableMap.of("vectorize", vectorize)) .build(); basicQueries.put("singleZipf", queryA); @@ -293,6 +300,7 @@ private void setupQueries() queryAggs ) .setGranularity(Granularity.fromString(queryGranularity)) + .setContext(ImmutableMap.of("vectorize", vectorize)) .build(); simpleQueries.put("A", queryA); @@ -318,6 +326,7 @@ private void setupQueries() queryAggs ) .setGranularity(Granularity.fromString(queryGranularity)) + .setContext(ImmutableMap.of("vectorize", vectorize)) .build(); simpleLongQueries.put("A", queryA); @@ -341,6 +350,7 @@ private void setupQueries() .setDimensions(new DefaultDimensionSpec("dimSequential", "dimSequential", ValueType.FLOAT)) .setAggregatorSpecs(queryAggs) .setGranularity(Granularity.fromString(queryGranularity)) + .setContext(ImmutableMap.of("vectorize", vectorize)) .build(); simpleFloatQueries.put("A", queryA); @@ -504,9 +514,9 @@ private IncrementalIndex makeIncIndex(boolean withRollup) return new IncrementalIndex.Builder() .setIndexSchema( new IncrementalIndexSchema.Builder() - .withMetrics(schemaInfo.getAggsArray()) - .withRollup(withRollup) - .build() + .withMetrics(schemaInfo.getAggsArray()) + .withRollup(withRollup) + .build() ) .setReportParseExceptions(false) .setConcurrentEventAdd(true) @@ -538,7 +548,7 @@ public void tearDown() } } - private static List runQuery(QueryRunnerFactory factory, QueryRunner runner, Query query) + private static Sequence runQuery(QueryRunnerFactory factory, QueryRunner runner, Query query) { QueryToolChest toolChest = factory.getToolchest(); QueryRunner theRunner = new FinalizeResultsQueryRunner<>( @@ -546,8 +556,7 @@ private static List runQuery(QueryRunnerFactory factory, QueryRunner runn toolChest ); - Sequence queryResult = theRunner.run(QueryPlus.wrap(query), new HashMap<>()); - return queryResult.toList(); + return theRunner.run(QueryPlus.wrap(query), new HashMap<>()); } @Benchmark @@ -561,11 +570,13 @@ public void querySingleIncrementalIndex(Blackhole blackhole) new IncrementalIndexSegment(anIncrementalIndex, "incIndex") ); - List results = GroupByBenchmark.runQuery(factory, runner, query); + final Sequence results = GroupByBenchmark.runQuery(factory, runner, query); + final Row lastRow = results.accumulate( + null, + (accumulated, in) -> in + ); - for (Row result : results) { - blackhole.consume(result); - } + blackhole.consume(lastRow); } @Benchmark @@ -579,11 +590,13 @@ public void querySingleQueryableIndex(Blackhole blackhole) new QueryableIndexSegment("qIndex", queryableIndexes.get(0)) ); - List results = GroupByBenchmark.runQuery(factory, runner, query); + final Sequence results = GroupByBenchmark.runQuery(factory, runner, query); + final Row lastRow = results.accumulate( + null, + (accumulated, in) -> in + ); - for (Row result : results) { - blackhole.consume(result); - } + blackhole.consume(lastRow); } @Benchmark diff --git a/benchmarks/src/main/java/org/apache/druid/benchmark/query/SqlBenchmark.java b/benchmarks/src/main/java/org/apache/druid/benchmark/query/SqlBenchmark.java index 50e0fe6e476c..23ab98c1913d 100644 --- a/benchmarks/src/main/java/org/apache/druid/benchmark/query/SqlBenchmark.java +++ b/benchmarks/src/main/java/org/apache/druid/benchmark/query/SqlBenchmark.java @@ -19,26 +19,21 @@ package org.apache.druid.benchmark.query; -import com.google.common.io.Files; -import org.apache.commons.io.FileUtils; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; import org.apache.druid.benchmark.datagen.BenchmarkSchemaInfo; import org.apache.druid.benchmark.datagen.BenchmarkSchemas; import org.apache.druid.benchmark.datagen.SegmentGenerator; -import org.apache.druid.data.input.Row; -import org.apache.druid.java.util.common.Intervals; import org.apache.druid.java.util.common.Pair; import org.apache.druid.java.util.common.granularity.Granularities; import org.apache.druid.java.util.common.guava.Sequence; import org.apache.druid.java.util.common.io.Closer; import org.apache.druid.java.util.common.logger.Logger; -import org.apache.druid.query.QueryPlus; import org.apache.druid.query.QueryRunnerFactoryConglomerate; -import org.apache.druid.query.aggregation.CountAggregatorFactory; -import org.apache.druid.query.dimension.DefaultDimensionSpec; -import org.apache.druid.query.groupby.GroupByQuery; import org.apache.druid.segment.QueryableIndex; import org.apache.druid.server.security.AuthTestUtils; import org.apache.druid.server.security.NoopEscalator; +import org.apache.druid.sql.calcite.planner.Calcites; import org.apache.druid.sql.calcite.planner.DruidPlanner; import org.apache.druid.sql.calcite.planner.PlannerConfig; import org.apache.druid.sql.calcite.planner.PlannerFactory; @@ -64,39 +59,111 @@ import org.openjdk.jmh.annotations.Warmup; import org.openjdk.jmh.infra.Blackhole; -import java.io.File; -import java.util.HashMap; +import javax.annotation.Nullable; import java.util.List; import java.util.concurrent.TimeUnit; /** - * Benchmark that compares the same groupBy query through the native query layer and through the SQL layer. + * Benchmark that tests various SQL queries. */ @State(Scope.Benchmark) -@Fork(value = 1) +@Fork(value = 3) @Warmup(iterations = 15) -@Measurement(iterations = 30) +@Measurement(iterations = 25) public class SqlBenchmark { - @Param({"200000", "1000000"}) - private int rowsPerSegment; + static { + Calcites.setSystemProperties(); + } private static final Logger log = new Logger(SqlBenchmark.class); - private File tmpDir; - private SegmentGenerator segmentGenerator; - private SpecificSegmentsQuerySegmentWalker walker; + private static final List QUERIES = ImmutableList.of( + // 0, 1, 2, 3: Timeseries, unfiltered + "SELECT COUNT(*) FROM foo", + "SELECT COUNT(DISTINCT hyper) FROM foo", + "SELECT SUM(sumLongSequential), SUM(sumFloatNormal) FROM foo", + "SELECT FLOOR(__time TO MINUTE), SUM(sumLongSequential), SUM(sumFloatNormal) FROM foo GROUP BY 1", + + // 4: Timeseries, low selectivity filter (90% of rows match) + "SELECT SUM(sumLongSequential), SUM(sumFloatNormal) FROM foo WHERE dimSequential NOT LIKE '%3'", + + // 5: Timeseries, high selectivity filter (0.1% of rows match) + "SELECT SUM(sumLongSequential), SUM(sumFloatNormal) FROM foo WHERE dimSequential = '311'", + + // 6: Timeseries, mixing low selectivity index-capable filter (90% of rows match) + cursor filter + "SELECT SUM(sumLongSequential), SUM(sumFloatNormal) FROM foo\n" + + "WHERE dimSequential NOT LIKE '%3' AND maxLongUniform > 10", + + // 7: Timeseries, low selectivity toplevel filter (90%), high selectivity filtered aggregator (0.1%) + "SELECT\n" + + " SUM(sumLongSequential) FILTER(WHERE dimSequential = '311'),\n" + + " SUM(sumFloatNormal)\n" + + "FROM foo\n" + + "WHERE dimSequential NOT LIKE '%3'", + + // 8: Timeseries, no toplevel filter, various filtered aggregators with clauses repeated. + "SELECT\n" + + " SUM(sumLongSequential) FILTER(WHERE dimSequential = '311'),\n" + + " SUM(sumLongSequential) FILTER(WHERE dimSequential <> '311'),\n" + + " SUM(sumLongSequential) FILTER(WHERE dimSequential LIKE '%3'),\n" + + " SUM(sumLongSequential) FILTER(WHERE dimSequential NOT LIKE '%3'),\n" + + " SUM(sumLongSequential),\n" + + " SUM(sumFloatNormal) FILTER(WHERE dimSequential = '311'),\n" + + " SUM(sumFloatNormal) FILTER(WHERE dimSequential <> '311'),\n" + + " SUM(sumFloatNormal) FILTER(WHERE dimSequential LIKE '%3'),\n" + + " SUM(sumFloatNormal) FILTER(WHERE dimSequential NOT LIKE '%3'),\n" + + " SUM(sumFloatNormal),\n" + + " COUNT(*) FILTER(WHERE dimSequential = '311'),\n" + + " COUNT(*) FILTER(WHERE dimSequential <> '311'),\n" + + " COUNT(*) FILTER(WHERE dimSequential LIKE '%3'),\n" + + " COUNT(*) FILTER(WHERE dimSequential NOT LIKE '%3'),\n" + + " COUNT(*)\n" + + "FROM foo", + + // 9: Timeseries, toplevel time filter, time-comparison filtered aggregators + "SELECT\n" + + " SUM(sumLongSequential)\n" + + " FILTER(WHERE __time >= TIMESTAMP '2000-01-01 00:00:00' AND __time < TIMESTAMP '2000-01-01 12:00:00'),\n" + + " SUM(sumLongSequential)\n" + + " FILTER(WHERE __time >= TIMESTAMP '2000-01-01 12:00:00' AND __time < TIMESTAMP '2000-01-02 00:00:00')\n" + + "FROM foo\n" + + "WHERE __time >= TIMESTAMP '2000-01-01 00:00:00' AND __time < TIMESTAMP '2000-01-02 00:00:00'", + + // 10, 11: GroupBy two strings, unfiltered, unordered + "SELECT dimSequential, dimZipf, SUM(sumLongSequential) FROM foo GROUP BY 1, 2", + "SELECT dimSequential, dimZipf, SUM(sumLongSequential), COUNT(*) FROM foo GROUP BY 1, 2", + + // 12, 13, 14: GroupBy one string, unfiltered, various aggregator configurations + "SELECT dimZipf FROM foo GROUP BY 1", + "SELECT dimZipf, COUNT(*) FROM foo GROUP BY 1 ORDER BY COUNT(*) DESC", + "SELECT dimZipf, SUM(sumLongSequential), COUNT(*) FROM foo GROUP BY 1 ORDER BY COUNT(*) DESC", + + // 15, 16: GroupBy long, unfiltered, unordered; with and without aggregators + "SELECT maxLongUniform FROM foo GROUP BY 1", + "SELECT maxLongUniform, SUM(sumLongSequential), COUNT(*) FROM foo GROUP BY 1", + + // 17, 18: GroupBy long, filter by long, unordered; with and without aggregators + "SELECT maxLongUniform FROM foo WHERE maxLongUniform > 10 GROUP BY 1", + "SELECT maxLongUniform, SUM(sumLongSequential), COUNT(*) FROM foo WHERE maxLongUniform > 10 GROUP BY 1" + ); + + @Param({"5000000"}) + private int rowsPerSegment; + + @Param({"false", "force"}) + private String vectorize; + + @Param({"0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18"}) + private String query; + + @Nullable private PlannerFactory plannerFactory; - private GroupByQuery groupByQuery; - private String sqlQuery; - private Closer resourceCloser; + private Closer closer = Closer.create(); @Setup(Level.Trial) public void setup() { - tmpDir = Files.createTempDir(); - log.info("Starting benchmark setup using tmpDir[%s], rows[%,d].", tmpDir, rowsPerSegment); - final BenchmarkSchemaInfo schemaInfo = BenchmarkSchemas.SCHEMA_MAP.get("basic"); final DataSegment dataSegment = DataSegment.builder() @@ -106,90 +173,55 @@ public void setup() .shardSpec(new LinearShardSpec(0)) .build(); - this.segmentGenerator = new SegmentGenerator(); + final PlannerConfig plannerConfig = new PlannerConfig(); + final SegmentGenerator segmentGenerator = closer.register(new SegmentGenerator()); + log.info("Starting benchmark setup using cacheDir[%s], rows[%,d].", segmentGenerator.getCacheDir(), rowsPerSegment); final QueryableIndex index = segmentGenerator.generate(dataSegment, schemaInfo, Granularities.NONE, rowsPerSegment); - final Pair conglomerateCloserPair = CalciteTests - .createQueryRunnerFactoryConglomerate(); - final QueryRunnerFactoryConglomerate conglomerate = conglomerateCloserPair.lhs; - final PlannerConfig plannerConfig = new PlannerConfig(); - final DruidSchema druidSchema = CalciteTests.createMockSchema(conglomerate, walker, plannerConfig); + + final Pair conglomerate = CalciteTests.createQueryRunnerFactoryConglomerate(); + closer.register(conglomerate.rhs); + + final SpecificSegmentsQuerySegmentWalker walker = new SpecificSegmentsQuerySegmentWalker(conglomerate.lhs).add( + dataSegment, + index + ); + closer.register(walker); + + final DruidSchema druidSchema = CalciteTests.createMockSchema(conglomerate.lhs, walker, plannerConfig); final SystemSchema systemSchema = CalciteTests.createMockSystemSchema(druidSchema, walker); - this.walker = new SpecificSegmentsQuerySegmentWalker(conglomerate).add(dataSegment, index); + plannerFactory = new PlannerFactory( druidSchema, systemSchema, - CalciteTests.createMockQueryLifecycleFactory(walker, conglomerate), + CalciteTests.createMockQueryLifecycleFactory(walker, conglomerate.lhs), CalciteTests.createOperatorTable(), CalciteTests.createExprMacroTable(), plannerConfig, AuthTestUtils.TEST_AUTHORIZER_MAPPER, CalciteTests.getJsonMapper() ); - groupByQuery = GroupByQuery - .builder() - .setDataSource("foo") - .setInterval(Intervals.ETERNITY) - .setDimensions(new DefaultDimensionSpec("dimZipf", "d0"), new DefaultDimensionSpec("dimSequential", "d1")) - .setAggregatorSpecs(new CountAggregatorFactory("c")) - .setGranularity(Granularities.ALL) - .build(); - - sqlQuery = "SELECT\n" - + " dimZipf AS d0," - + " dimSequential AS d1,\n" - + " COUNT(*) AS c\n" - + "FROM druid.foo\n" - + "GROUP BY dimZipf, dimSequential"; } @TearDown(Level.Trial) public void tearDown() throws Exception { - if (walker != null) { - walker.close(); - walker = null; - } - - if (segmentGenerator != null) { - segmentGenerator.close(); - segmentGenerator = null; - } - - if (resourceCloser != null) { - resourceCloser.close(); - } - - if (tmpDir != null) { - FileUtils.deleteDirectory(tmpDir); - } - } - - @Benchmark - @BenchmarkMode(Mode.AverageTime) - @OutputTimeUnit(TimeUnit.MILLISECONDS) - public void queryNative(Blackhole blackhole) - { - final Sequence resultSequence = QueryPlus.wrap(groupByQuery).run(walker, new HashMap<>()); - final List resultList = resultSequence.toList(); - - for (Row row : resultList) { - blackhole.consume(row); - } + closer.close(); } @Benchmark @BenchmarkMode(Mode.AverageTime) @OutputTimeUnit(TimeUnit.MILLISECONDS) - public void queryPlanner(Blackhole blackhole) throws Exception + public void querySql(Blackhole blackhole) throws Exception { - try (final DruidPlanner planner = plannerFactory.createPlanner(null)) { + try (final DruidPlanner planner = plannerFactory.createPlanner(ImmutableMap.of("vectorize", vectorize))) { final PlannerResult plannerResult = planner.plan( - sqlQuery, + QUERIES.get(Integer.parseInt(query)), NoopEscalator.getInstance().createEscalatedAuthenticationResult() ); - final List results = plannerResult.run().toList(); - blackhole.consume(results); + final Sequence resultSequence = plannerResult.run(); + final Object[] lastRow = resultSequence.accumulate(null, (accumulated, in) -> in); + blackhole.consume(lastRow); } } } diff --git a/benchmarks/src/main/java/org/apache/druid/benchmark/query/SqlVsNativeBenchmark.java b/benchmarks/src/main/java/org/apache/druid/benchmark/query/SqlVsNativeBenchmark.java new file mode 100644 index 000000000000..61895c2069d8 --- /dev/null +++ b/benchmarks/src/main/java/org/apache/druid/benchmark/query/SqlVsNativeBenchmark.java @@ -0,0 +1,173 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.benchmark.query; + +import org.apache.druid.benchmark.datagen.BenchmarkSchemaInfo; +import org.apache.druid.benchmark.datagen.BenchmarkSchemas; +import org.apache.druid.benchmark.datagen.SegmentGenerator; +import org.apache.druid.data.input.Row; +import org.apache.druid.java.util.common.Intervals; +import org.apache.druid.java.util.common.Pair; +import org.apache.druid.java.util.common.granularity.Granularities; +import org.apache.druid.java.util.common.guava.Sequence; +import org.apache.druid.java.util.common.io.Closer; +import org.apache.druid.java.util.common.logger.Logger; +import org.apache.druid.query.QueryPlus; +import org.apache.druid.query.QueryRunnerFactoryConglomerate; +import org.apache.druid.query.aggregation.CountAggregatorFactory; +import org.apache.druid.query.dimension.DefaultDimensionSpec; +import org.apache.druid.query.groupby.GroupByQuery; +import org.apache.druid.segment.QueryableIndex; +import org.apache.druid.server.security.AuthTestUtils; +import org.apache.druid.server.security.NoopEscalator; +import org.apache.druid.sql.calcite.planner.DruidPlanner; +import org.apache.druid.sql.calcite.planner.PlannerConfig; +import org.apache.druid.sql.calcite.planner.PlannerFactory; +import org.apache.druid.sql.calcite.planner.PlannerResult; +import org.apache.druid.sql.calcite.schema.DruidSchema; +import org.apache.druid.sql.calcite.schema.SystemSchema; +import org.apache.druid.sql.calcite.util.CalciteTests; +import org.apache.druid.sql.calcite.util.SpecificSegmentsQuerySegmentWalker; +import org.apache.druid.timeline.DataSegment; +import org.apache.druid.timeline.partition.LinearShardSpec; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Level; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.TearDown; +import org.openjdk.jmh.annotations.Warmup; +import org.openjdk.jmh.infra.Blackhole; + +import java.util.HashMap; +import java.util.concurrent.TimeUnit; + +/** + * Benchmark that compares the same groupBy query through the native query layer and through the SQL layer. + */ +@State(Scope.Benchmark) +@Fork(value = 1) +@Warmup(iterations = 15) +@Measurement(iterations = 30) +public class SqlVsNativeBenchmark +{ + @Param({"200000", "1000000"}) + private int rowsPerSegment; + + private static final Logger log = new Logger(SqlVsNativeBenchmark.class); + + private SpecificSegmentsQuerySegmentWalker walker; + private PlannerFactory plannerFactory; + private GroupByQuery groupByQuery; + private String sqlQuery; + private Closer closer; + + @Setup(Level.Trial) + public void setup() + { + this.closer = Closer.create(); + + final BenchmarkSchemaInfo schemaInfo = BenchmarkSchemas.SCHEMA_MAP.get("basic"); + + final DataSegment dataSegment = DataSegment.builder() + .dataSource("foo") + .interval(schemaInfo.getDataInterval()) + .version("1") + .shardSpec(new LinearShardSpec(0)) + .build(); + + final SegmentGenerator segmentGenerator = closer.register(new SegmentGenerator()); + log.info("Starting benchmark setup using tmpDir[%s], rows[%,d].", segmentGenerator.getCacheDir(), rowsPerSegment); + + final QueryableIndex index = segmentGenerator.generate(dataSegment, schemaInfo, Granularities.NONE, rowsPerSegment); + final Pair conglomerateCloserPair = CalciteTests + .createQueryRunnerFactoryConglomerate(); + final QueryRunnerFactoryConglomerate conglomerate = conglomerateCloserPair.lhs; + final PlannerConfig plannerConfig = new PlannerConfig(); + + this.walker = closer.register(new SpecificSegmentsQuerySegmentWalker(conglomerate).add(dataSegment, index)); + final DruidSchema druidSchema = CalciteTests.createMockSchema(conglomerate, walker, plannerConfig); + final SystemSchema systemSchema = CalciteTests.createMockSystemSchema(druidSchema, walker); + + plannerFactory = new PlannerFactory( + druidSchema, + systemSchema, + CalciteTests.createMockQueryLifecycleFactory(walker, conglomerate), + CalciteTests.createOperatorTable(), + CalciteTests.createExprMacroTable(), + plannerConfig, + AuthTestUtils.TEST_AUTHORIZER_MAPPER, + CalciteTests.getJsonMapper() + ); + groupByQuery = GroupByQuery + .builder() + .setDataSource("foo") + .setInterval(Intervals.ETERNITY) + .setDimensions(new DefaultDimensionSpec("dimZipf", "d0"), new DefaultDimensionSpec("dimSequential", "d1")) + .setAggregatorSpecs(new CountAggregatorFactory("c")) + .setGranularity(Granularities.ALL) + .build(); + + sqlQuery = "SELECT\n" + + " dimZipf AS d0," + + " dimSequential AS d1,\n" + + " COUNT(*) AS c\n" + + "FROM druid.foo\n" + + "GROUP BY dimZipf, dimSequential"; + } + + @TearDown(Level.Trial) + public void tearDown() throws Exception + { + closer.close(); + } + + @Benchmark + @BenchmarkMode(Mode.AverageTime) + @OutputTimeUnit(TimeUnit.MILLISECONDS) + public void queryNative(Blackhole blackhole) + { + final Sequence resultSequence = QueryPlus.wrap(groupByQuery).run(walker, new HashMap<>()); + final Row lastRow = resultSequence.accumulate(null, (accumulated, in) -> in); + blackhole.consume(lastRow); + } + + @Benchmark + @BenchmarkMode(Mode.AverageTime) + @OutputTimeUnit(TimeUnit.MILLISECONDS) + public void queryPlanner(Blackhole blackhole) throws Exception + { + try (final DruidPlanner planner = plannerFactory.createPlanner(null)) { + final PlannerResult plannerResult = planner.plan( + sqlQuery, + NoopEscalator.getInstance().createEscalatedAuthenticationResult() + ); + final Sequence resultSequence = plannerResult.run(); + final Object[] lastRow = resultSequence.accumulate(null, (accumulated, in) -> in); + blackhole.consume(lastRow); + } + } +} diff --git a/benchmarks/src/main/resources/log4j2.xml b/benchmarks/src/main/resources/log4j2.xml new file mode 100644 index 000000000000..dbce142e7f60 --- /dev/null +++ b/benchmarks/src/main/resources/log4j2.xml @@ -0,0 +1,32 @@ + + + + + + + + + + + + + + + From d3731fdc5ca9e948823a97d2b2e4fe01bd887c52 Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Tue, 1 Jan 2019 22:50:22 -0800 Subject: [PATCH 02/20] Query vectorization. This patch includes vectorized timeseries and groupBy engines, as well as some analogs of your favorite Druid classes: - VectorCursor is like Cursor. (It comes from StorageAdapter.makeVectorCursor.) - VectorColumnSelectorFactory is like ColumnSelectorFactory, and it has methods to create analogs of the column selectors you know and love. - VectorOffset and ReadableVectorOffset are like Offset and ReadableOffset. - VectorAggregator is like BufferAggregator. - VectorValueMatcher is like ValueMatcher. There are some noticeable differences between vectorized and regular execution: - Unlike regular cursors, vector cursors do not understand time granularity. They expect query engines to handle this on their own, which a new VectorCursorGranularizer class helps with. This is to avoid too much batch-splitting and to respect the fact that vector selectors are somewhat more heavyweight than regular selectors. - Unlike FilteredOffset, FilteredVectorOffset does not leverage indexes for filters that might partially support them (like an OR of one filter that supports indexing and another that doesn't). I'm not sure that this behavior is desirable anyway (it is potentially too eager) but, at any rate, it'd be better to harmonize it between the two classes. Potentially they should both do some different thing that is smarter than what either of them is doing right now. - When vector cursors are created by QueryableIndexCursorSequenceBuilder, they use a morphing binary-then-linear search to find their start and end rows, rather than linear search. Limitations in this patch are: - Only timeseries and groupBy have vectorized engines. - GroupBy doesn't handle multi-value dimensions yet. - Vector cursors cannot handle virtual columns or descending order. - Only some filters have vectorized matchers: "selector", "bound", "in", "like", "regex", "search", "and", "or", and "not". - Only some aggregators have vectorized implementations: "count", "doubleSum", "floatSum", "longSum", "hyperUnique", and "filtered". - Dimension specs other than "default" don't work yet (no extraction functions or filtered dimension specs). Currently, the testing strategy includes adding vectorization-enabled tests to TimeseriesQueryRunnerTest, GroupByQueryRunnerTest, GroupByTimeseriesQueryRunnerTest, CalciteQueryTest, and all of the filtering tests that extend BaseFilterTest. In all of those classes, there are some test cases that don't support vectorization. They are marked by special function calls like "cannotVectorize" or "skipVectorize" that tell the test harness to either expect an exception or to skip the test case. Testing should be expanded in the future -- a project in and of itself. Related to #3011. --- .../druid/benchmark/query/SqlBenchmark.java | 4 +- .../util/common/guava/FilteredSequence.java | 4 +- .../common/guava/FilteringAccumulator.java | 4 +- .../guava/FilteringYieldingAccumulator.java | 4 +- .../java/util/common/guava/Sequence.java | 6 + .../java/util/common/guava/Sequences.java | 2 +- docs/content/querying/query-context.md | 28 + ...UsingSketchMergeAggregatorFactoryTest.java | 15 +- .../query/filter/BloomDimFilterTest.java | 15 +- .../variance/VarianceGroupByQueryTest.java | 18 +- .../variance/VarianceTimeseriesQueryTest.java | 15 +- .../bitmap/BatchIteratorAdapter.java | 51 + .../collections/bitmap/ImmutableBitmap.java | 9 + .../bitmap/WrappedImmutableRoaringBitmap.java | 7 + .../druid/query/DefaultQueryMetrics.java | 6 + .../org/apache/druid/query/QueryContexts.java | 60 + .../org/apache/druid/query/QueryMetrics.java | 6 + .../apache/druid/query/QueryRunnerHelper.java | 24 +- .../query/aggregation/AggregatorAdapter.java | 43 + .../query/aggregation/AggregatorAdapters.java | 313 +++++ .../query/aggregation/AggregatorFactory.java | 18 + .../query/aggregation/BufferAggregator.java | 4 + .../aggregation/CountAggregatorFactory.java | 13 + .../aggregation/CountVectorAggregator.java | 66 + .../DoubleSumAggregatorFactory.java | 23 + .../DoubleSumVectorAggregator.java | 83 ++ .../FilteredAggregatorFactory.java | 61 +- .../aggregation/FilteredVectorAggregator.java | 153 +++ .../FloatSumAggregatorFactory.java | 23 + .../aggregation/FloatSumVectorAggregator.java | 84 ++ .../aggregation/LongSumAggregatorFactory.java | 27 +- .../aggregation/LongSumVectorAggregator.java | 83 ++ .../aggregation/NoopVectorAggregator.java | 74 ++ .../NullableAggregatorFactory.java | 70 +- .../aggregation/NullableBufferAggregator.java | 26 +- .../aggregation/NullableVectorAggregator.java | 165 +++ .../SuppressedAggregatorFactory.java | 93 ++ .../query/aggregation/VectorAggregator.java | 86 ++ .../CardinalityBufferAggregator.java | 13 +- .../HyperUniquesAggregatorFactory.java | 22 + .../HyperUniquesBufferAggregator.java | 30 +- .../HyperUniquesVectorAggregator.java | 116 ++ .../query/dimension/DefaultDimensionSpec.java | 20 + .../druid/query/dimension/DimensionSpec.java | 22 + .../dimension/VectorColumnStrategizer.java | 41 + .../org/apache/druid/query/filter/Filter.java | 22 + .../druid/query/filter/IntervalDimFilter.java | 6 + ...ingValueMatcherColumnSelectorStrategy.java | 64 +- ...eMatcherColumnSelectorStrategyFactory.java | 2 +- .../filter/vector/BaseVectorValueMatcher.java | 44 + .../vector/BooleanVectorValueMatcher.java | 63 + .../vector/DoubleVectorValueMatcher.java | 105 ++ .../vector/FloatVectorValueMatcher.java | 105 ++ .../filter/vector/LongVectorValueMatcher.java | 105 ++ .../MultiValueStringVectorValueMatcher.java | 208 ++++ .../filter/vector/ReadableVectorMatch.java | 66 + .../SingleValueStringVectorValueMatcher.java | 192 +++ .../query/filter/vector/VectorMatch.java | 267 ++++ .../filter/vector/VectorValueMatcher.java | 39 + .../VectorValueMatcherColumnStrategizer.java | 74 ++ .../vector/VectorValueMatcherFactory.java | 31 + .../druid/query/groupby/GroupByQuery.java | 1 + .../query/groupby/GroupByQueryConfig.java | 16 +- .../AbstractBufferHashGrouper.java | 49 +- .../epinephelinae/AggregateResult.java | 48 +- .../epinephelinae/BufferArrayGrouper.java | 165 ++- .../epinephelinae/BufferHashGrouper.java | 166 ++- .../epinephelinae/ByteBufferHashTable.java | 12 +- .../epinephelinae/ByteBufferKeySerde.java | 91 ++ .../CloseableGrouperIterator.java | 5 +- .../epinephelinae/ConcurrentGrouper.java | 5 +- .../epinephelinae/GroupByQueryEngineV2.java | 175 ++- .../query/groupby/epinephelinae/Grouper.java | 8 +- .../query/groupby/epinephelinae/Groupers.java | 66 +- .../LimitedBufferHashGrouper.java | 40 +- .../epinephelinae/RowBasedGrouperHelper.java | 17 +- .../epinephelinae/SpillingGrouper.java | 21 +- .../groupby/epinephelinae/VectorGrouper.java | 77 ++ .../DoubleGroupByVectorColumnSelector.java | 71 ++ .../FloatGroupByVectorColumnSelector.java | 69 ++ .../vector/GroupByVectorColumnSelector.java | 37 + .../GroupByVectorColumnStrategizer.java | 70 ++ .../LongGroupByVectorColumnSelector.java | 70 ++ ...alueStringGroupByVectorColumnSelector.java | 69 ++ .../vector/VectorGroupByEngine.java | 428 +++++++ .../search/DefaultSearchQueryMetrics.java | 6 + .../select/DefaultSelectQueryMetrics.java | 6 + .../timeseries/TimeseriesQueryEngine.java | 265 +++- .../TimeseriesQueryQueryToolChest.java | 2 +- .../timeseries/TimeseriesResultBuilder.java | 7 +- .../vector/VectorCursorGranularizer.java | 171 +++ .../druid/segment/ColumnSelectorFactory.java | 2 + .../java/org/apache/druid/segment/Cursor.java | 4 +- .../apache/druid/segment/CursorFactory.java | 44 + .../segment/DimensionDictionarySelector.java | 102 ++ .../druid/segment/DimensionHandlerUtils.java | 78 ++ .../druid/segment/DimensionSelector.java | 78 +- .../QueryableIndexCursorSequenceBuilder.java | 618 ++++++++++ .../druid/segment/QueryableIndexSegment.java | 8 +- .../segment/QueryableIndexStorageAdapter.java | 604 +++------ .../apache/druid/segment/VirtualColumns.java | 5 + .../druid/segment/column/BaseColumn.java | 14 + .../column/ColumnCapabilitiesImpl.java | 7 + .../druid/segment/column/ComplexColumn.java | 52 + .../column/DictionaryEncodedColumn.java | 13 + .../druid/segment/column/DoublesColumn.java | 9 +- .../column/DoublesColumnWithNulls.java | 8 + .../druid/segment/column/FloatsColumn.java | 8 + .../segment/column/FloatsColumnWithNulls.java | 8 + .../druid/segment/column/LongsColumn.java | 8 + .../segment/column/LongsColumnWithNulls.java | 8 + .../column/StringDictionaryEncodedColumn.java | 163 ++- .../BlockLayoutColumnarDoublesSupplier.java | 61 +- .../BlockLayoutColumnarFloatsSupplier.java | 65 +- .../BlockLayoutColumnarLongsSupplier.java | 48 +- .../druid/segment/data/ColumnarDoubles.java | 76 +- .../druid/segment/data/ColumnarFloats.java | 75 +- .../druid/segment/data/ColumnarLongs.java | 75 +- .../druid/segment/data/ColumnarMultiInts.java | 10 + .../CompressedVSizeColumnarIntsSupplier.java | 64 +- ...pressedVSizeColumnarMultiIntsSupplier.java | 45 +- .../segment/data/CompressionFactory.java | 21 + .../EntireLayoutColumnarFloatsSupplier.java | 16 - .../EntireLayoutColumnarLongsSupplier.java | 16 - .../druid/segment/data/IndexedInts.java | 15 + .../segment/data/LongsLongEncodingReader.java | 13 + .../druid/segment/data/ReadableOffset.java | 2 + .../segment/data/VSizeColumnarMultiInts.java | 6 + .../druid/segment/filter/AndFilter.java | 50 +- .../druid/segment/filter/BoundFilter.java | 20 + .../filter/DimensionPredicateFilter.java | 20 + .../apache/druid/segment/filter/InFilter.java | 20 + .../druid/segment/filter/LikeFilter.java | 20 + .../druid/segment/filter/NotFilter.java | 33 + .../apache/druid/segment/filter/OrFilter.java | 125 +- .../druid/segment/filter/SelectorFilter.java | 20 + ...IncrementalIndexColumnSelectorFactory.java | 9 +- .../IncrementalIndexRowIterator.java | 7 +- .../IncrementalIndexStorageAdapter.java | 30 +- .../vector/BaseDoubleVectorValueSelector.java | 88 ++ .../vector/BaseFloatVectorValueSelector.java | 88 ++ .../vector/BaseLongVectorValueSelector.java | 88 ++ .../segment/vector/BitmapVectorOffset.java | 134 ++ .../vector/DimensionVectorSelector.java | 42 + .../segment/vector/FilteredVectorOffset.java | 175 +++ .../MultiValueDimensionVectorSelector.java | 37 + .../segment/vector/NilVectorSelector.java | 178 +++ .../segment/vector/NoFilterVectorOffset.java | 90 ++ ...yableIndexVectorColumnSelectorFactory.java | 194 +++ .../segment/vector/ReadableVectorOffset.java | 67 + .../SingleValueDimensionVectorSelector.java | 37 + .../vector/VectorColumnSelectorFactory.java | 69 ++ .../druid/segment/vector/VectorCursor.java | 75 ++ .../segment/vector/VectorObjectSelector.java | 33 + .../druid/segment/vector/VectorOffset.java | 43 + .../segment/vector/VectorSelectorUtils.java | 63 + .../segment/vector/VectorSizeInspector.java | 39 + .../segment/vector/VectorValueSelector.java | 55 + .../druid/query/QueryRunnerTestHelper.java | 9 + .../query/filter/vector/VectorMatchTest.java | 127 ++ .../query/groupby/GroupByQueryRunnerTest.java | 1081 +++++++++-------- .../GroupByTimeseriesQueryRunnerTest.java | 131 +- .../epinephelinae/BufferArrayGrouperTest.java | 15 +- .../epinephelinae/BufferHashGrouperTest.java | 26 +- .../LimitedBufferHashGrouperTest.java | 15 +- .../metadata/SegmentMetadataQueryTest.java | 24 +- .../SegmentMetadataUnionQueryTest.java | 2 +- .../spec/SpecificSegmentQueryRunnerTest.java | 2 +- .../timeseries/TimeseriesQueryRunnerTest.java | 173 ++- .../data/CompressedFloatsSerdeTest.java | 2 +- .../data/CompressedLongsSerdeTest.java | 2 +- .../druid/segment/filter/BaseFilterTest.java | 240 +++- .../druid/segment/filter/BoundFilterTest.java | 4 +- .../filter/ColumnComparisonFilterTest.java | 52 +- .../segment/filter/ExpressionFilterTest.java | 122 +- .../filter/FloatAndDoubleFilteringTest.java | 6 +- .../segment/filter/JavaScriptFilterTest.java | 118 +- .../segment/filter/LongFilteringTest.java | 4 +- .../segment/filter/SelectorFilterTest.java | 61 +- .../segment/filter/TimeFilteringTest.java | 6 +- .../sql/calcite/BaseCalciteQueryTest.java | 64 +- .../druid/sql/calcite/CalciteQueryTest.java | 197 ++- 182 files changed, 10774 insertions(+), 1783 deletions(-) create mode 100644 processing/src/main/java/org/apache/druid/collections/bitmap/BatchIteratorAdapter.java create mode 100644 processing/src/main/java/org/apache/druid/query/aggregation/AggregatorAdapter.java create mode 100644 processing/src/main/java/org/apache/druid/query/aggregation/AggregatorAdapters.java create mode 100644 processing/src/main/java/org/apache/druid/query/aggregation/CountVectorAggregator.java create mode 100644 processing/src/main/java/org/apache/druid/query/aggregation/DoubleSumVectorAggregator.java create mode 100644 processing/src/main/java/org/apache/druid/query/aggregation/FilteredVectorAggregator.java create mode 100644 processing/src/main/java/org/apache/druid/query/aggregation/FloatSumVectorAggregator.java create mode 100644 processing/src/main/java/org/apache/druid/query/aggregation/LongSumVectorAggregator.java create mode 100644 processing/src/main/java/org/apache/druid/query/aggregation/NoopVectorAggregator.java create mode 100644 processing/src/main/java/org/apache/druid/query/aggregation/NullableVectorAggregator.java create mode 100644 processing/src/main/java/org/apache/druid/query/aggregation/VectorAggregator.java create mode 100644 processing/src/main/java/org/apache/druid/query/aggregation/hyperloglog/HyperUniquesVectorAggregator.java create mode 100644 processing/src/main/java/org/apache/druid/query/dimension/VectorColumnStrategizer.java create mode 100644 processing/src/main/java/org/apache/druid/query/filter/vector/BaseVectorValueMatcher.java create mode 100644 processing/src/main/java/org/apache/druid/query/filter/vector/BooleanVectorValueMatcher.java create mode 100644 processing/src/main/java/org/apache/druid/query/filter/vector/DoubleVectorValueMatcher.java create mode 100644 processing/src/main/java/org/apache/druid/query/filter/vector/FloatVectorValueMatcher.java create mode 100644 processing/src/main/java/org/apache/druid/query/filter/vector/LongVectorValueMatcher.java create mode 100644 processing/src/main/java/org/apache/druid/query/filter/vector/MultiValueStringVectorValueMatcher.java create mode 100644 processing/src/main/java/org/apache/druid/query/filter/vector/ReadableVectorMatch.java create mode 100644 processing/src/main/java/org/apache/druid/query/filter/vector/SingleValueStringVectorValueMatcher.java create mode 100644 processing/src/main/java/org/apache/druid/query/filter/vector/VectorMatch.java create mode 100644 processing/src/main/java/org/apache/druid/query/filter/vector/VectorValueMatcher.java create mode 100644 processing/src/main/java/org/apache/druid/query/filter/vector/VectorValueMatcherColumnStrategizer.java create mode 100644 processing/src/main/java/org/apache/druid/query/filter/vector/VectorValueMatcherFactory.java create mode 100644 processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/ByteBufferKeySerde.java create mode 100644 processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/VectorGrouper.java create mode 100644 processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/DoubleGroupByVectorColumnSelector.java create mode 100644 processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/FloatGroupByVectorColumnSelector.java create mode 100644 processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/GroupByVectorColumnSelector.java create mode 100644 processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/GroupByVectorColumnStrategizer.java create mode 100644 processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/LongGroupByVectorColumnSelector.java create mode 100644 processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/SingleValueStringGroupByVectorColumnSelector.java create mode 100644 processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/VectorGroupByEngine.java create mode 100644 processing/src/main/java/org/apache/druid/query/vector/VectorCursorGranularizer.java create mode 100644 processing/src/main/java/org/apache/druid/segment/DimensionDictionarySelector.java create mode 100644 processing/src/main/java/org/apache/druid/segment/QueryableIndexCursorSequenceBuilder.java create mode 100644 processing/src/main/java/org/apache/druid/segment/vector/BaseDoubleVectorValueSelector.java create mode 100644 processing/src/main/java/org/apache/druid/segment/vector/BaseFloatVectorValueSelector.java create mode 100644 processing/src/main/java/org/apache/druid/segment/vector/BaseLongVectorValueSelector.java create mode 100644 processing/src/main/java/org/apache/druid/segment/vector/BitmapVectorOffset.java create mode 100644 processing/src/main/java/org/apache/druid/segment/vector/DimensionVectorSelector.java create mode 100644 processing/src/main/java/org/apache/druid/segment/vector/FilteredVectorOffset.java create mode 100644 processing/src/main/java/org/apache/druid/segment/vector/MultiValueDimensionVectorSelector.java create mode 100644 processing/src/main/java/org/apache/druid/segment/vector/NilVectorSelector.java create mode 100644 processing/src/main/java/org/apache/druid/segment/vector/NoFilterVectorOffset.java create mode 100644 processing/src/main/java/org/apache/druid/segment/vector/QueryableIndexVectorColumnSelectorFactory.java create mode 100644 processing/src/main/java/org/apache/druid/segment/vector/ReadableVectorOffset.java create mode 100644 processing/src/main/java/org/apache/druid/segment/vector/SingleValueDimensionVectorSelector.java create mode 100644 processing/src/main/java/org/apache/druid/segment/vector/VectorColumnSelectorFactory.java create mode 100644 processing/src/main/java/org/apache/druid/segment/vector/VectorCursor.java create mode 100644 processing/src/main/java/org/apache/druid/segment/vector/VectorObjectSelector.java create mode 100644 processing/src/main/java/org/apache/druid/segment/vector/VectorOffset.java create mode 100644 processing/src/main/java/org/apache/druid/segment/vector/VectorSelectorUtils.java create mode 100644 processing/src/main/java/org/apache/druid/segment/vector/VectorSizeInspector.java create mode 100644 processing/src/main/java/org/apache/druid/segment/vector/VectorValueSelector.java create mode 100644 processing/src/test/java/org/apache/druid/query/filter/vector/VectorMatchTest.java diff --git a/benchmarks/src/main/java/org/apache/druid/benchmark/query/SqlBenchmark.java b/benchmarks/src/main/java/org/apache/druid/benchmark/query/SqlBenchmark.java index 23ab98c1913d..47722abb3b5c 100644 --- a/benchmarks/src/main/java/org/apache/druid/benchmark/query/SqlBenchmark.java +++ b/benchmarks/src/main/java/org/apache/druid/benchmark/query/SqlBenchmark.java @@ -67,7 +67,7 @@ * Benchmark that tests various SQL queries. */ @State(Scope.Benchmark) -@Fork(value = 3) +@Fork(value = 1) @Warmup(iterations = 15) @Measurement(iterations = 25) public class SqlBenchmark @@ -154,7 +154,7 @@ public class SqlBenchmark @Param({"false", "force"}) private String vectorize; - @Param({"0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18"}) + @Param({"10", "15"}) private String query; @Nullable diff --git a/core/src/main/java/org/apache/druid/java/util/common/guava/FilteredSequence.java b/core/src/main/java/org/apache/druid/java/util/common/guava/FilteredSequence.java index d3851ae1a698..81210d76c267 100644 --- a/core/src/main/java/org/apache/druid/java/util/common/guava/FilteredSequence.java +++ b/core/src/main/java/org/apache/druid/java/util/common/guava/FilteredSequence.java @@ -28,11 +28,11 @@ public class FilteredSequence implements Sequence { private final Sequence baseSequence; - private final Predicate pred; + private final Predicate pred; public FilteredSequence( Sequence baseSequence, - Predicate pred + Predicate pred ) { this.baseSequence = baseSequence; diff --git a/core/src/main/java/org/apache/druid/java/util/common/guava/FilteringAccumulator.java b/core/src/main/java/org/apache/druid/java/util/common/guava/FilteringAccumulator.java index 87b5b274510a..36d36bcef040 100644 --- a/core/src/main/java/org/apache/druid/java/util/common/guava/FilteringAccumulator.java +++ b/core/src/main/java/org/apache/druid/java/util/common/guava/FilteringAccumulator.java @@ -25,10 +25,10 @@ */ public class FilteringAccumulator implements Accumulator { - private final Predicate pred; + private final Predicate pred; private final Accumulator accumulator; - public FilteringAccumulator(Predicate pred, Accumulator accumulator) + public FilteringAccumulator(Predicate pred, Accumulator accumulator) { this.pred = pred; this.accumulator = accumulator; diff --git a/core/src/main/java/org/apache/druid/java/util/common/guava/FilteringYieldingAccumulator.java b/core/src/main/java/org/apache/druid/java/util/common/guava/FilteringYieldingAccumulator.java index 5faee68f7cd0..014d54efc8ff 100644 --- a/core/src/main/java/org/apache/druid/java/util/common/guava/FilteringYieldingAccumulator.java +++ b/core/src/main/java/org/apache/druid/java/util/common/guava/FilteringYieldingAccumulator.java @@ -25,12 +25,12 @@ */ public class FilteringYieldingAccumulator extends YieldingAccumulator { - private final Predicate pred; + private final Predicate pred; private final YieldingAccumulator accumulator; private volatile boolean didSomething = false; - public FilteringYieldingAccumulator(Predicate pred, YieldingAccumulator accumulator) + public FilteringYieldingAccumulator(Predicate pred, YieldingAccumulator accumulator) { this.pred = pred; this.accumulator = accumulator; diff --git a/core/src/main/java/org/apache/druid/java/util/common/guava/Sequence.java b/core/src/main/java/org/apache/druid/java/util/common/guava/Sequence.java index 69fe1909970d..f86ebaad24e0 100644 --- a/core/src/main/java/org/apache/druid/java/util/common/guava/Sequence.java +++ b/core/src/main/java/org/apache/druid/java/util/common/guava/Sequence.java @@ -19,6 +19,7 @@ package org.apache.druid.java.util.common.guava; +import com.google.common.base.Predicate; import com.google.common.collect.Ordering; import java.io.Closeable; @@ -71,6 +72,11 @@ default Sequence map(Function mapper) return new MappedSequence<>(this, mapper); } + default Sequence filter(Predicate predicate) + { + return Sequences.filter(this, predicate); + } + default List toList() { return accumulate(new ArrayList<>(), Accumulators.list()); diff --git a/core/src/main/java/org/apache/druid/java/util/common/guava/Sequences.java b/core/src/main/java/org/apache/druid/java/util/common/guava/Sequences.java index 2bab97141d5d..df6fbe5cbd90 100644 --- a/core/src/main/java/org/apache/druid/java/util/common/guava/Sequences.java +++ b/core/src/main/java/org/apache/druid/java/util/common/guava/Sequences.java @@ -84,7 +84,7 @@ public static Sequence map(Sequence sequence, Function(sequence, fn::apply); } - public static Sequence filter(Sequence sequence, Predicate pred) + public static Sequence filter(Sequence sequence, Predicate pred) { return new FilteredSequence<>(sequence, pred); } diff --git a/docs/content/querying/query-context.md b/docs/content/querying/query-context.md index 81b39d8f9510..7b35ca882934 100644 --- a/docs/content/querying/query-context.md +++ b/docs/content/querying/query-context.md @@ -60,3 +60,31 @@ In addition, some query types offer context parameters specific to that query ty ### GroupBy queries See [GroupBy query context](groupbyquery.html#query-context). + +### Vectorizable queries + +The GroupBy and Timeseries query types can run in _vectorized_ mode, which speeds up query execution by processing +batches of rows at a time. Not all queries can be vectorized. In particular, vectorization currently has the following +requirements: + +- All query-level filters must either be able to run on bitmap indexes or must offer vectorized row-matchers. These +include "selector", "bound", "in", "like", "regex", "search", "and", "or", and "not". +- All filters in filtered aggregators must offer vectorized row-matchers. +- All aggregators must offer vectorized implementations. These include "count", "doubleSum", "floatSum", "longSum", +"hyperUnique", and "filtered". +- No virtual columns. +- For GroupBy: All dimension specs must be "default" (no extraction functions or filtered dimension specs). +- For GroupBy: No multi-value dimensions. +- For Timeseries: No "descending" order. +- Only immutable segments (not real-time). + +Other query types (like TopN, Scan, Select, and Search) ignore the "vectorize" parameter, and will execute without +vectorization. These query types will ignore the "vectorize" parameter even if it is set to `"force"`. + +Vectorization is an alpha-quality feature as of Druid #{DRUIDVERSION}. We heartily welcome any feedback and testing +from the community as we work to battle-test it. + +|property|default| description| +|--------|-------|------------| +|vectorize|`false`|Enables or disables vectorized query execution. Possible values are `false` (disabled), `true` (enabled if possible, disabled otherwise, on a per-segment basis), and `force` (enabled, and groupBy or timeseries queries that cannot be vectorized will fail). The `"force"` setting is meant to aid in testing, and is not generally useful in production (since real-time segments can never be processed with vectorized execution, any queries on real-time data will fail).| +|vectorSize|`512`|Sets the row batching size for a particular query.| diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/BufferHashGrouperUsingSketchMergeAggregatorFactoryTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/BufferHashGrouperUsingSketchMergeAggregatorFactoryTest.java index f10ae2cac1bc..db7c377ae48c 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/BufferHashGrouperUsingSketchMergeAggregatorFactoryTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/BufferHashGrouperUsingSketchMergeAggregatorFactoryTest.java @@ -20,12 +20,13 @@ package org.apache.druid.query.aggregation.datasketches.theta; import com.google.common.base.Suppliers; +import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; import com.yahoo.sketches.theta.Sketches; import com.yahoo.sketches.theta.UpdateSketch; import org.apache.druid.data.input.MapBasedRow; -import org.apache.druid.query.aggregation.AggregatorFactory; +import org.apache.druid.query.aggregation.AggregatorAdapters; import org.apache.druid.query.aggregation.CountAggregatorFactory; import org.apache.druid.query.groupby.epinephelinae.BufferHashGrouper; import org.apache.druid.query.groupby.epinephelinae.Grouper; @@ -47,11 +48,13 @@ private static BufferHashGrouper makeGrouper( final BufferHashGrouper grouper = new BufferHashGrouper<>( Suppliers.ofInstance(ByteBuffer.allocate(bufferSize)), GrouperTestUtil.intKeySerde(), - columnSelectorFactory, - new AggregatorFactory[]{ - new SketchMergeAggregatorFactory("sketch", "sketch", 16, false, true, 2), - new CountAggregatorFactory("count") - }, + AggregatorAdapters.factorizeBuffered( + columnSelectorFactory, + ImmutableList.of( + new SketchMergeAggregatorFactory("sketch", "sketch", 16, false, true, 2), + new CountAggregatorFactory("count") + ) + ), Integer.MAX_VALUE, 0.75f, initialBuckets, diff --git a/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/filter/BloomDimFilterTest.java b/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/filter/BloomDimFilterTest.java index fb8d31aa4a78..d01b4c536e03 100644 --- a/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/filter/BloomDimFilterTest.java +++ b/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/filter/BloomDimFilterTest.java @@ -246,21 +246,24 @@ public void testMissingColumnNotSpecifiedInDimensionList() throws IOException @Test public void testExpressionVirtualColumn() throws IOException { - assertFilterMatches( + assertFilterMatchesSkipVectorize( new BloomDimFilter("expr", bloomKFilter(1000, 1.1F), null), ImmutableList.of("0", "1", "2", "3", "4", "5") ); - assertFilterMatches(new BloomDimFilter("expr", bloomKFilter(1000, 1.2F), null), ImmutableList.of()); - assertFilterMatches( + assertFilterMatchesSkipVectorize(new BloomDimFilter("expr", bloomKFilter(1000, 1.2F), null), ImmutableList.of()); + assertFilterMatchesSkipVectorize( new BloomDimFilter("exprDouble", bloomKFilter(1000, 2.1D), null), ImmutableList.of("0", "1", "2", "3", "4", "5") ); - assertFilterMatches(new BloomDimFilter("exprDouble", bloomKFilter(1000, 2.2D), null), ImmutableList.of()); - assertFilterMatches( + assertFilterMatchesSkipVectorize( + new BloomDimFilter("exprDouble", bloomKFilter(1000, 2.2D), null), + ImmutableList.of() + ); + assertFilterMatchesSkipVectorize( new BloomDimFilter("exprLong", bloomKFilter(1000, 3L), null), ImmutableList.of("0", "1", "2", "3", "4", "5") ); - assertFilterMatches(new BloomDimFilter("exprLong", bloomKFilter(1000, 4L), null), ImmutableList.of()); + assertFilterMatchesSkipVectorize(new BloomDimFilter("exprLong", bloomKFilter(1000, 4L), null), ImmutableList.of()); } @Test diff --git a/extensions-core/stats/src/test/java/org/apache/druid/query/aggregation/variance/VarianceGroupByQueryTest.java b/extensions-core/stats/src/test/java/org/apache/druid/query/aggregation/variance/VarianceGroupByQueryTest.java index bd19c74d0f19..33a79c67c597 100644 --- a/extensions-core/stats/src/test/java/org/apache/druid/query/aggregation/variance/VarianceGroupByQueryTest.java +++ b/extensions-core/stats/src/test/java/org/apache/druid/query/aggregation/variance/VarianceGroupByQueryTest.java @@ -45,6 +45,7 @@ import java.util.Collection; import java.util.Collections; import java.util.List; +import java.util.stream.Collectors; /** */ @@ -57,9 +58,22 @@ public class VarianceGroupByQueryTest private final String testName; @Parameterized.Parameters(name = "{0}") - public static Collection constructorFeeder() + public static Collection constructorFeeder() { - return GroupByQueryRunnerTest.constructorFeeder(); + // Use GroupByQueryRunnerTest's constructorFeeder, but remove vectorized tests, since this aggregator + // can't vectorize yet. + return GroupByQueryRunnerTest.constructorFeeder().stream() + .filter(constructor -> !((boolean) constructor[4]) /* !vectorize */) + .map( + constructor -> + new Object[]{ + constructor[0], + constructor[1], + constructor[2], + constructor[3] + } + ) + .collect(Collectors.toList()); } public VarianceGroupByQueryTest( diff --git a/extensions-core/stats/src/test/java/org/apache/druid/query/aggregation/variance/VarianceTimeseriesQueryTest.java b/extensions-core/stats/src/test/java/org/apache/druid/query/aggregation/variance/VarianceTimeseriesQueryTest.java index c5dcecea9bfb..7e0e059178ce 100644 --- a/extensions-core/stats/src/test/java/org/apache/druid/query/aggregation/variance/VarianceTimeseriesQueryTest.java +++ b/extensions-core/stats/src/test/java/org/apache/druid/query/aggregation/variance/VarianceTimeseriesQueryTest.java @@ -37,6 +37,8 @@ import java.util.Arrays; import java.util.HashMap; import java.util.List; +import java.util.stream.Collectors; +import java.util.stream.StreamSupport; @RunWith(Parameterized.class) public class VarianceTimeseriesQueryTest @@ -44,13 +46,22 @@ public class VarianceTimeseriesQueryTest @Parameterized.Parameters(name = "{0}:descending={1}") public static Iterable constructorFeeder() { - return TimeseriesQueryRunnerTest.constructorFeeder(); + // Use TimeseriesQueryRunnerTest's constructorFeeder, but remove vectorized tests, since this aggregator + // can't vectorize yet. + return StreamSupport.stream(TimeseriesQueryRunnerTest.constructorFeeder().spliterator(), false) + .filter(constructor -> !((boolean) constructor[2]) /* !vectorize */) + .map(constructor -> new Object[]{constructor[0], constructor[1], constructor[3]}) + .collect(Collectors.toList()); } private final QueryRunner runner; private final boolean descending; - public VarianceTimeseriesQueryTest(QueryRunner runner, boolean descending, List aggregatorFactories) + public VarianceTimeseriesQueryTest( + QueryRunner runner, + boolean descending, + List aggregatorFactories + ) { this.runner = runner; this.descending = descending; diff --git a/processing/src/main/java/org/apache/druid/collections/bitmap/BatchIteratorAdapter.java b/processing/src/main/java/org/apache/druid/collections/bitmap/BatchIteratorAdapter.java new file mode 100644 index 000000000000..dd43263bda8e --- /dev/null +++ b/processing/src/main/java/org/apache/druid/collections/bitmap/BatchIteratorAdapter.java @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.collections.bitmap; + +import com.google.common.base.Preconditions; +import org.roaringbitmap.BatchIterator; +import org.roaringbitmap.IntIterator; + +public class BatchIteratorAdapter implements BatchIterator +{ + private final IntIterator iterator; + + public BatchIteratorAdapter(IntIterator iterator) + { + this.iterator = Preconditions.checkNotNull(iterator, "iterator"); + } + + @Override + public int nextBatch(int[] buffer) + { + int i; + for (i = 0; i < buffer.length && iterator.hasNext(); i++) { + buffer[i] = iterator.next(); + } + + return i; + } + + @Override + public boolean hasNext() + { + return iterator.hasNext(); + } +} diff --git a/processing/src/main/java/org/apache/druid/collections/bitmap/ImmutableBitmap.java b/processing/src/main/java/org/apache/druid/collections/bitmap/ImmutableBitmap.java index 6e8247cfb226..d02e2371d500 100644 --- a/processing/src/main/java/org/apache/druid/collections/bitmap/ImmutableBitmap.java +++ b/processing/src/main/java/org/apache/druid/collections/bitmap/ImmutableBitmap.java @@ -19,6 +19,7 @@ package org.apache.druid.collections.bitmap; +import org.roaringbitmap.BatchIterator; import org.roaringbitmap.IntIterator; /** @@ -32,6 +33,14 @@ public interface ImmutableBitmap */ IntIterator iterator(); + /** + * @return a batched iterator over the set bits of this bitmap + */ + default BatchIterator batchIterator() + { + return new BatchIteratorAdapter(iterator()); + } + /** * @return The number of bits set to true in this bitmap */ diff --git a/processing/src/main/java/org/apache/druid/collections/bitmap/WrappedImmutableRoaringBitmap.java b/processing/src/main/java/org/apache/druid/collections/bitmap/WrappedImmutableRoaringBitmap.java index 50946ca87ddc..f2c099c8d9f0 100644 --- a/processing/src/main/java/org/apache/druid/collections/bitmap/WrappedImmutableRoaringBitmap.java +++ b/processing/src/main/java/org/apache/druid/collections/bitmap/WrappedImmutableRoaringBitmap.java @@ -20,6 +20,7 @@ package org.apache.druid.collections.bitmap; import com.google.common.base.Throwables; +import org.roaringbitmap.BatchIterator; import org.roaringbitmap.IntIterator; import org.roaringbitmap.buffer.ImmutableRoaringBitmap; @@ -79,6 +80,12 @@ public IntIterator iterator() return bitmap.getIntIterator(); } + @Override + public BatchIterator batchIterator() + { + return bitmap.getBatchIterator(); + } + @Override public int size() { diff --git a/processing/src/main/java/org/apache/druid/query/DefaultQueryMetrics.java b/processing/src/main/java/org/apache/druid/query/DefaultQueryMetrics.java index cc5ad5e0332d..b65883e94e15 100644 --- a/processing/src/main/java/org/apache/druid/query/DefaultQueryMetrics.java +++ b/processing/src/main/java/org/apache/druid/query/DefaultQueryMetrics.java @@ -189,6 +189,12 @@ public void identity(String identity) // Emit nothing by default. } + @Override + public void vectorized(final boolean vectorized) + { + // Emit nothing by default. + } + @Override public BitmapResultFactory makeBitmapResultFactory(BitmapFactory factory) { diff --git a/processing/src/main/java/org/apache/druid/query/QueryContexts.java b/processing/src/main/java/org/apache/druid/query/QueryContexts.java index a19221807092..c4cd74b7241b 100644 --- a/processing/src/main/java/org/apache/druid/query/QueryContexts.java +++ b/processing/src/main/java/org/apache/druid/query/QueryContexts.java @@ -23,7 +23,10 @@ import com.google.common.collect.ImmutableMap; import org.apache.druid.guice.annotations.PublicApi; import org.apache.druid.java.util.common.IAE; +import org.apache.druid.java.util.common.ISE; import org.apache.druid.java.util.common.Numbers; +import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.segment.QueryableIndexStorageAdapter; import java.util.concurrent.TimeUnit; @@ -43,11 +46,43 @@ public class QueryContexts public static final boolean DEFAULT_USE_CACHE = true; public static final boolean DEFAULT_POPULATE_RESULTLEVEL_CACHE = true; public static final boolean DEFAULT_USE_RESULTLEVEL_CACHE = true; + public static final Vectorize DEFAULT_VECTORIZE = Vectorize.FALSE; public static final int DEFAULT_PRIORITY = 0; public static final int DEFAULT_UNCOVERED_INTERVALS_LIMIT = 0; public static final long DEFAULT_TIMEOUT_MILLIS = TimeUnit.MINUTES.toMillis(5); public static final long NO_TIMEOUT = 0; + public enum Vectorize + { + FALSE { + @Override + public boolean shouldVectorize(final boolean canVectorize) + { + return false; + } + }, + TRUE { + @Override + public boolean shouldVectorize(final boolean canVectorize) + { + return canVectorize; + } + }, + FORCE { + @Override + public boolean shouldVectorize(final boolean canVectorize) + { + if (!canVectorize) { + throw new ISE("Cannot vectorize!"); + } + + return true; + } + }; + + public abstract boolean shouldVectorize(boolean canVectorize); + } + public static boolean isBySegment(Query query) { return isBySegment(query, DEFAULT_BY_SEGMENT); @@ -113,6 +148,16 @@ public static boolean isSerializeDateTimeAsLongInner(Query query, boolean return parseBoolean(query, "serializeDateTimeAsLongInner", defaultValue); } + public static Vectorize getVectorize(Query query) + { + return parseEnum(query, "vectorize", Vectorize.class, DEFAULT_VECTORIZE); + } + + public static int getVectorSize(Query query) + { + return parseInt(query, "vectorSize", QueryableIndexStorageAdapter.DEFAULT_VECTOR_SIZE); + } + public static int getUncoveredIntervalsLimit(Query query) { return getUncoveredIntervalsLimit(query, DEFAULT_UNCOVERED_INTERVALS_LIMIT); @@ -239,4 +284,19 @@ static boolean parseBoolean(Query query, String key, boolean defaultValue private QueryContexts() { } + + static > E parseEnum(Query query, String key, Class clazz, E defaultValue) + { + Object val = query.getContextValue(key); + if (val == null) { + return defaultValue; + } + if (val instanceof String) { + return Enum.valueOf(clazz, StringUtils.toUpperCase((String) val)); + } else if (val instanceof Boolean) { + return Enum.valueOf(clazz, StringUtils.toUpperCase(String.valueOf(val))); + } else { + throw new ISE("Unknown type [%s]. Cannot parse!", val.getClass()); + } + } } diff --git a/processing/src/main/java/org/apache/druid/query/QueryMetrics.java b/processing/src/main/java/org/apache/druid/query/QueryMetrics.java index 701c910c4df7..aa650d37f68e 100644 --- a/processing/src/main/java/org/apache/druid/query/QueryMetrics.java +++ b/processing/src/main/java/org/apache/druid/query/QueryMetrics.java @@ -230,6 +230,12 @@ public interface QueryMetrics> */ void identity(String identity); + /** + * Sets whether are not a segment scan has been vectorized. Generally expected to only be attached to segment-level + * metrics, since at whole-query level we might have a mix of vectorized and non-vectorized segment scans. + */ + void vectorized(boolean vectorized); + /** * Creates a {@link BitmapResultFactory} which may record some information along bitmap construction from {@link * #preFilters(List)}. The returned BitmapResultFactory may add some dimensions to this QueryMetrics from it's {@link diff --git a/processing/src/main/java/org/apache/druid/query/QueryRunnerHelper.java b/processing/src/main/java/org/apache/druid/query/QueryRunnerHelper.java index 53f281dbd868..c99b6aec5740 100644 --- a/processing/src/main/java/org/apache/druid/query/QueryRunnerHelper.java +++ b/processing/src/main/java/org/apache/druid/query/QueryRunnerHelper.java @@ -21,7 +21,6 @@ import com.google.common.base.Function; import com.google.common.base.Preconditions; -import com.google.common.base.Predicates; import org.apache.druid.java.util.common.granularity.Granularity; import org.apache.druid.java.util.common.guava.Sequence; import org.apache.druid.java.util.common.guava.Sequences; @@ -34,19 +33,19 @@ import java.io.Closeable; import java.util.List; import java.util.Map; +import java.util.Objects; /** */ public class QueryRunnerHelper { - public static Sequence> makeCursorBasedQuery( final StorageAdapter adapter, - List queryIntervals, - Filter filter, - VirtualColumns virtualColumns, - boolean descending, - Granularity granularity, + final List queryIntervals, + final Filter filter, + final VirtualColumns virtualColumns, + final boolean descending, + final Granularity granularity, final Function> mapFn ) { @@ -57,16 +56,9 @@ public static Sequence> makeCursorBasedQuery( return Sequences.filter( Sequences.map( adapter.makeCursors(filter, queryIntervals.get(0), virtualColumns, granularity, descending, null), - new Function>() - { - @Override - public Result apply(Cursor input) - { - return mapFn.apply(input); - } - } + mapFn ), - Predicates.notNull() + Objects::nonNull ); } diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/AggregatorAdapter.java b/processing/src/main/java/org/apache/druid/query/aggregation/AggregatorAdapter.java new file mode 100644 index 000000000000..8e4c492b2095 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/aggregation/AggregatorAdapter.java @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation; + +import javax.annotation.Nullable; +import java.io.Closeable; +import java.nio.ByteBuffer; + +public interface AggregatorAdapter extends Closeable +{ + void init(ByteBuffer buf, int position); + + @Nullable + Object get(ByteBuffer buf, int position); + + void relocate(int oldPosition, int newPosition, ByteBuffer oldBuffer, ByteBuffer newBuffer); + + @Override + void close(); + + AggregatorFactory getFactory(); + + BufferAggregator asBufferAggregator(); + + VectorAggregator asVectorAggregator(); +} diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/AggregatorAdapters.java b/processing/src/main/java/org/apache/druid/query/aggregation/AggregatorAdapters.java new file mode 100644 index 000000000000..b914bb562966 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/aggregation/AggregatorAdapters.java @@ -0,0 +1,313 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation; + +import com.google.common.primitives.Ints; +import org.apache.druid.java.util.common.ISE; +import org.apache.druid.java.util.common.logger.Logger; +import org.apache.druid.segment.ColumnSelectorFactory; +import org.apache.druid.segment.vector.VectorColumnSelectorFactory; + +import javax.annotation.Nullable; +import java.io.Closeable; +import java.nio.ByteBuffer; +import java.util.Arrays; +import java.util.List; +import java.util.stream.Collectors; + +/** + * A class that helps query engines use Buffer- or VectorAggregators in a consistent way. + * + * The two main benefits this class provides are: + * + * (1) Query engines can treat BufferAggregators and VectorAggregators the same for operations that are equivalent + * across them, like "init", "get", "relocate", and "close". + * (2) Query engines are freed from the need to manage how much space each individual aggregator needs. They only + * need to allocate a block of size "spaceNeeded". + */ +public class AggregatorAdapters implements Closeable +{ + private static final Logger log = new Logger(AggregatorAdapters.class); + + private final List adapters; + private final List factories; + private final int[] aggregatorPositions; + private final int spaceNeeded; + + private AggregatorAdapters(final List adapters) + { + this.adapters = adapters; + this.factories = adapters.stream().map(AggregatorAdapter::getFactory).collect(Collectors.toList()); + this.aggregatorPositions = new int[adapters.size()]; + + long nextPosition = 0; + for (int i = 0; i < adapters.size(); i++) { + final AggregatorFactory aggregatorFactory = adapters.get(i).getFactory(); + aggregatorPositions[i] = Ints.checkedCast(nextPosition); + nextPosition += aggregatorFactory.getMaxIntermediateSizeWithNulls(); + } + + this.spaceNeeded = Ints.checkedCast(nextPosition); + } + + public static AggregatorAdapters factorizeVector( + final VectorColumnSelectorFactory columnSelectorFactory, + final List aggregatorFactories + ) + { + final AggregatorAdapter[] adapters = new AggregatorAdapter[aggregatorFactories.size()]; + for (int i = 0; i < aggregatorFactories.size(); i++) { + final AggregatorFactory aggregatorFactory = aggregatorFactories.get(i); + adapters[i] = new VectorAggregatorAdapter( + aggregatorFactory, + aggregatorFactory.factorizeVector(columnSelectorFactory) + ); + } + + return new AggregatorAdapters(Arrays.asList(adapters)); + } + + public static AggregatorAdapters factorizeBuffered( + final ColumnSelectorFactory columnSelectorFactory, + final List aggregatorFactories + ) + { + final AggregatorAdapter[] adapters = new AggregatorAdapter[aggregatorFactories.size()]; + for (int i = 0; i < aggregatorFactories.size(); i++) { + final AggregatorFactory aggregatorFactory = aggregatorFactories.get(i); + adapters[i] = new BufferAggregatorAdapter( + aggregatorFactory, + aggregatorFactory.factorizeBuffered(columnSelectorFactory) + ); + } + + return new AggregatorAdapters(Arrays.asList(adapters)); + } + + public int spaceNeeded() + { + return spaceNeeded; + } + + public List factories() + { + return factories; + } + + public int[] aggregatorPositions() + { + return aggregatorPositions; + } + + public int size() + { + return adapters.size(); + } + + public void init(final ByteBuffer buf, final int position) + { + for (int i = 0; i < adapters.size(); i++) { + adapters.get(i).init(buf, position + aggregatorPositions[i]); + } + } + + public void aggregateBuffered(final ByteBuffer buf, final int position) + { + for (int i = 0; i < adapters.size(); i++) { + final AggregatorAdapter adapter = adapters.get(i); + adapter.asBufferAggregator().aggregate(buf, position + aggregatorPositions[i]); + } + } + + public void aggregateVector( + final ByteBuffer buf, + final int position, + final int start, + final int end + ) + { + for (int i = 0; i < adapters.size(); i++) { + final AggregatorAdapter adapter = adapters.get(i); + adapter.asVectorAggregator().aggregate(buf, position + aggregatorPositions[i], start, end); + } + } + + public void aggregateVector( + final ByteBuffer buf, + final int numRows, + final int[] positions, + @Nullable final int[] rows + ) + { + for (int i = 0; i < adapters.size(); i++) { + final AggregatorAdapter adapter = adapters.get(i); + adapter.asVectorAggregator().aggregate(buf, numRows, positions, rows, aggregatorPositions[i]); + } + } + + @Nullable + public Object get(final ByteBuffer buf, final int position, final int aggregatorNumber) + { + return adapters.get(aggregatorNumber).get(buf, position + aggregatorPositions[aggregatorNumber]); + } + + public void relocate(int oldPosition, int newPosition, ByteBuffer oldBuffer, ByteBuffer newBuffer) + { + for (int i = 0; i < adapters.size(); i++) { + adapters.get(i).relocate( + oldPosition + aggregatorPositions[i], + newPosition + aggregatorPositions[i], + oldBuffer, + newBuffer + ); + } + } + + @Override + public void close() + { + for (AggregatorAdapter adapter : adapters) { + try { + adapter.close(); + } + catch (Exception e) { + log.warn(e, "Could not close aggregator [%s], skipping.", adapter.getFactory().getName()); + } + } + } + + private static class VectorAggregatorAdapter implements AggregatorAdapter + { + private final AggregatorFactory factory; + private final VectorAggregator aggregator; + + public VectorAggregatorAdapter(final AggregatorFactory factory, final VectorAggregator aggregator) + { + this.factory = factory; + this.aggregator = aggregator; + } + + @Override + public void init(final ByteBuffer buf, final int position) + { + aggregator.init(buf, position); + } + + @Override + public Object get(final ByteBuffer buf, final int position) + { + return aggregator.get(buf, position); + } + + @Override + public void close() + { + aggregator.close(); + } + + @Override + public void relocate( + final int oldPosition, + final int newPosition, + final ByteBuffer oldBuffer, + final ByteBuffer newBuffer + ) + { + aggregator.relocate(oldPosition, newPosition, oldBuffer, newBuffer); + } + + @Override + public AggregatorFactory getFactory() + { + return factory; + } + + @Override + public BufferAggregator asBufferAggregator() + { + throw new ISE("Not a BufferAggregator!"); + } + + @Override + public VectorAggregator asVectorAggregator() + { + return aggregator; + } + } + + private static class BufferAggregatorAdapter implements AggregatorAdapter + { + private final AggregatorFactory factory; + private final BufferAggregator aggregator; + + public BufferAggregatorAdapter(final AggregatorFactory factory, final BufferAggregator aggregator) + { + this.factory = factory; + this.aggregator = aggregator; + } + + @Override + public void init(final ByteBuffer buf, final int position) + { + aggregator.init(buf, position); + } + + @Override + public Object get(final ByteBuffer buf, final int position) + { + return aggregator.get(buf, position); + } + + @Override + public void close() + { + aggregator.close(); + } + + @Override + public void relocate( + final int oldPosition, + final int newPosition, + final ByteBuffer oldBuffer, + final ByteBuffer newBuffer + ) + { + aggregator.relocate(oldPosition, newPosition, oldBuffer, newBuffer); + } + + @Override + public AggregatorFactory getFactory() + { + return factory; + } + + @Override + public BufferAggregator asBufferAggregator() + { + return aggregator; + } + + @Override + public VectorAggregator asVectorAggregator() + { + throw new ISE("Not a VectorAggregator!"); + } + } +} diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/AggregatorFactory.java b/processing/src/main/java/org/apache/druid/query/aggregation/AggregatorFactory.java index 6ec9fde8fcbf..9f622a17eaf6 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/AggregatorFactory.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/AggregatorFactory.java @@ -25,6 +25,7 @@ import org.apache.druid.java.util.common.logger.Logger; import org.apache.druid.query.PerSegmentQueryOptimizationContext; import org.apache.druid.segment.ColumnSelectorFactory; +import org.apache.druid.segment.vector.VectorColumnSelectorFactory; import javax.annotation.Nullable; import java.util.Arrays; @@ -49,6 +50,23 @@ public abstract class AggregatorFactory implements Cacheable public abstract BufferAggregator factorizeBuffered(ColumnSelectorFactory metricFactory); + /** + * Create a VectorAggregator based on the provided column selector factory. Will throw an exception if + * this aggregation class does not support vectorization: check "canVectorize" first. + */ + public VectorAggregator factorizeVector(VectorColumnSelectorFactory selectorFactory) + { + throw new UOE("Aggregator[%s] cannot vectorize", getClass().getName()); + } + + /** + * Returns whether or not this aggregation class supports vectorization. The default implementation returns false. + */ + public boolean canVectorize() + { + return false; + } + public abstract Comparator getComparator(); /** diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/BufferAggregator.java b/processing/src/main/java/org/apache/druid/query/aggregation/BufferAggregator.java index ecd0c11b526a..2abef7e13257 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/BufferAggregator.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/BufferAggregator.java @@ -24,6 +24,7 @@ import org.apache.druid.query.monomorphicprocessing.HotLoopCallee; import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; +import javax.annotation.Nullable; import java.nio.ByteBuffer; /** @@ -33,6 +34,8 @@ * * Thus, an Aggregator can be thought of as a closure over some other thing that is stateful and changes between calls * to aggregate(...). + * + * @see VectorAggregator, the vectorized version */ @ExtensionPoint public interface BufferAggregator extends HotLoopCallee @@ -78,6 +81,7 @@ public interface BufferAggregator extends HotLoopCallee * @param position offset within the byte buffer at which the aggregate value is stored * @return the Object representation of the aggregate */ + @Nullable Object get(ByteBuffer buf, int position); /** diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/CountAggregatorFactory.java b/processing/src/main/java/org/apache/druid/query/aggregation/CountAggregatorFactory.java index 23fba55eed53..445b04f780ea 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/CountAggregatorFactory.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/CountAggregatorFactory.java @@ -24,6 +24,7 @@ import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; import org.apache.druid.segment.ColumnSelectorFactory; +import org.apache.druid.segment.vector.VectorColumnSelectorFactory; import java.util.Collections; import java.util.Comparator; @@ -57,12 +58,24 @@ public BufferAggregator factorizeBuffered(ColumnSelectorFactory metricFactory) return new CountBufferAggregator(); } + @Override + public VectorAggregator factorizeVector(final VectorColumnSelectorFactory selectorFactory) + { + return new CountVectorAggregator(); + } + @Override public Comparator getComparator() { return CountAggregator.COMPARATOR; } + @Override + public boolean canVectorize() + { + return true; + } + @Override public Object combine(Object lhs, Object rhs) { diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/CountVectorAggregator.java b/processing/src/main/java/org/apache/druid/query/aggregation/CountVectorAggregator.java new file mode 100644 index 000000000000..fec4793274e3 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/aggregation/CountVectorAggregator.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation; + +import javax.annotation.Nullable; +import java.nio.ByteBuffer; + +public class CountVectorAggregator implements VectorAggregator +{ + @Override + public void init(final ByteBuffer buf, final int position) + { + buf.putLong(position, 0); + } + + @Override + public void aggregate(final ByteBuffer buf, final int position, final int startRow, final int endRow) + { + final int delta = endRow - startRow; + buf.putLong(position, buf.getLong(position) + delta); + } + + @Override + public void aggregate( + final ByteBuffer buf, + final int numRows, + final int[] positions, + @Nullable final int[] rows, + final int positionOffset + ) + { + for (int i = 0; i < numRows; i++) { + final int position = positions[i] + positionOffset; + buf.putLong(position, buf.getLong(position) + 1); + } + } + + @Override + public Object get(final ByteBuffer buf, final int position) + { + return buf.getLong(position); + } + + @Override + public void close() + { + // Nothing to close. + } +} diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/DoubleSumAggregatorFactory.java b/processing/src/main/java/org/apache/druid/query/aggregation/DoubleSumAggregatorFactory.java index e5c8bf6c7f83..0292823056ac 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/DoubleSumAggregatorFactory.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/DoubleSumAggregatorFactory.java @@ -26,6 +26,8 @@ import org.apache.druid.math.expr.ExprMacroTable; import org.apache.druid.segment.BaseDoubleColumnValueSelector; import org.apache.druid.segment.ColumnSelectorFactory; +import org.apache.druid.segment.vector.VectorColumnSelectorFactory; +import org.apache.druid.segment.vector.VectorValueSelector; import javax.annotation.Nullable; import java.nio.ByteBuffer; @@ -61,12 +63,24 @@ protected BaseDoubleColumnValueSelector selector(ColumnSelectorFactory metricFac ); } + @Override + protected VectorValueSelector vectorSelector(VectorColumnSelectorFactory columnSelectorFactory) + { + return columnSelectorFactory.makeValueSelector(fieldName); + } + @Override protected Aggregator factorize(ColumnSelectorFactory metricFactory, BaseDoubleColumnValueSelector selector) { return new DoubleSumAggregator(selector); } + @Override + public boolean canVectorize() + { + return expression == null; + } + @Override protected BufferAggregator factorizeBuffered( ColumnSelectorFactory metricFactory, @@ -76,6 +90,15 @@ protected BufferAggregator factorizeBuffered( return new DoubleSumBufferAggregator(selector); } + @Override + protected VectorAggregator factorizeVector( + VectorColumnSelectorFactory columnSelectorFactory, + VectorValueSelector selector + ) + { + return new DoubleSumVectorAggregator(selector); + } + @Override @Nullable public Object combine(@Nullable Object lhs, @Nullable Object rhs) diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/DoubleSumVectorAggregator.java b/processing/src/main/java/org/apache/druid/query/aggregation/DoubleSumVectorAggregator.java new file mode 100644 index 000000000000..f66a4f406be7 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/aggregation/DoubleSumVectorAggregator.java @@ -0,0 +1,83 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation; + +import org.apache.druid.segment.vector.VectorValueSelector; + +import javax.annotation.Nullable; +import java.nio.ByteBuffer; + +public class DoubleSumVectorAggregator implements VectorAggregator +{ + private final VectorValueSelector selector; + + public DoubleSumVectorAggregator(final VectorValueSelector selector) + { + this.selector = selector; + } + + @Override + public void init(final ByteBuffer buf, final int position) + { + buf.putDouble(position, 0); + } + + @Override + public void aggregate(final ByteBuffer buf, final int position, final int startRow, final int endRow) + { + final double[] vector = selector.getDoubleVector(); + + double sum = 0; + for (int i = startRow; i < endRow; i++) { + sum += vector[i]; + } + + buf.putDouble(position, buf.getDouble(position) + sum); + } + + @Override + public void aggregate( + final ByteBuffer buf, + final int numRows, + final int[] positions, + @Nullable final int[] rows, + final int positionOffset + ) + { + final double[] vector = selector.getDoubleVector(); + + for (int i = 0; i < numRows; i++) { + final int position = positions[i] + positionOffset; + buf.putDouble(position, buf.getDouble(position) + vector[rows != null ? rows[i] : i]); + } + } + + @Override + public Object get(final ByteBuffer buf, final int position) + { + return buf.getDouble(position); + } + + @Override + public void close() + { + // Nothing to close. + } +} diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/FilteredAggregatorFactory.java b/processing/src/main/java/org/apache/druid/query/aggregation/FilteredAggregatorFactory.java index 62ee3ea91c76..c9a6dcd8e782 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/FilteredAggregatorFactory.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/FilteredAggregatorFactory.java @@ -25,13 +25,16 @@ import com.google.common.base.Strings; import org.apache.druid.query.PerSegmentQueryOptimizationContext; import org.apache.druid.query.filter.DimFilter; +import org.apache.druid.query.filter.Filter; import org.apache.druid.query.filter.IntervalDimFilter; import org.apache.druid.query.filter.ValueMatcher; +import org.apache.druid.query.filter.vector.VectorValueMatcher; import org.apache.druid.segment.ColumnSelectorFactory; import org.apache.druid.segment.column.ColumnHolder; -import org.apache.druid.segment.filter.Filters; +import org.apache.druid.segment.vector.VectorColumnSelectorFactory; import org.joda.time.Interval; +import javax.annotation.Nullable; import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Comparator; @@ -41,7 +44,10 @@ public class FilteredAggregatorFactory extends AggregatorFactory { private final AggregatorFactory delegate; - private final DimFilter filter; + private final DimFilter dimFilter; + private final Filter filter; + + @Nullable private final String name; // Constructor for backwards compat only @@ -56,22 +62,23 @@ public FilteredAggregatorFactory( @JsonCreator public FilteredAggregatorFactory( @JsonProperty("aggregator") AggregatorFactory delegate, - @JsonProperty("filter") DimFilter filter, - @JsonProperty("name") String name + @JsonProperty("filter") DimFilter dimFilter, + @Nullable @JsonProperty("name") String name ) { - Preconditions.checkNotNull(delegate); - Preconditions.checkNotNull(filter); + Preconditions.checkNotNull(delegate, "aggregator"); + Preconditions.checkNotNull(dimFilter, "filter"); this.delegate = delegate; - this.filter = filter; + this.dimFilter = dimFilter; + this.filter = dimFilter.toFilter(); this.name = name; } @Override public Aggregator factorize(ColumnSelectorFactory columnSelectorFactory) { - final ValueMatcher valueMatcher = Filters.toFilter(filter).makeMatcher(columnSelectorFactory); + final ValueMatcher valueMatcher = filter.makeMatcher(columnSelectorFactory); return new FilteredAggregator( valueMatcher, delegate.factorize(columnSelectorFactory) @@ -81,13 +88,30 @@ public Aggregator factorize(ColumnSelectorFactory columnSelectorFactory) @Override public BufferAggregator factorizeBuffered(ColumnSelectorFactory columnSelectorFactory) { - final ValueMatcher valueMatcher = Filters.toFilter(filter).makeMatcher(columnSelectorFactory); + final ValueMatcher valueMatcher = filter.makeMatcher(columnSelectorFactory); return new FilteredBufferAggregator( valueMatcher, delegate.factorizeBuffered(columnSelectorFactory) ); } + @Override + public VectorAggregator factorizeVector(VectorColumnSelectorFactory columnSelectorFactory) + { + Preconditions.checkState(canVectorize(), "Cannot vectorize"); + final VectorValueMatcher valueMatcher = filter.makeVectorMatcher(columnSelectorFactory); + return new FilteredVectorAggregator( + valueMatcher, + delegate.factorizeVector(columnSelectorFactory) + ); + } + + @Override + public boolean canVectorize() + { + return delegate.canVectorize() && filter.canVectorizeMatcher(); + } + @Override public Comparator getComparator() { @@ -145,7 +169,7 @@ public List requiredFields() @Override public byte[] getCacheKey() { - byte[] filterCacheKey = filter.getCacheKey(); + byte[] filterCacheKey = dimFilter.getCacheKey(); byte[] aggregatorCacheKey = delegate.getCacheKey(); return ByteBuffer.allocate(1 + filterCacheKey.length + aggregatorCacheKey.length) .put(AggregatorUtil.FILTERED_AGG_CACHE_TYPE_ID) @@ -169,8 +193,8 @@ public int getMaxIntermediateSize() @Override public AggregatorFactory optimizeForSegment(PerSegmentQueryOptimizationContext optimizationContext) { - if (filter instanceof IntervalDimFilter) { - IntervalDimFilter intervalDimFilter = ((IntervalDimFilter) filter); + if (dimFilter instanceof IntervalDimFilter) { + IntervalDimFilter intervalDimFilter = ((IntervalDimFilter) dimFilter); if (intervalDimFilter.getExtractionFn() != null) { // no support for extraction functions right now return this; @@ -236,7 +260,7 @@ public AggregatorFactory getAggregator() @JsonProperty public DimFilter getFilter() { - return filter; + return dimFilter; } @Override @@ -246,7 +270,7 @@ public List getRequiredColumns() } @Override - public boolean equals(Object o) + public boolean equals(final Object o) { if (this == o) { return true; @@ -254,16 +278,17 @@ public boolean equals(Object o) if (o == null || getClass() != o.getClass()) { return false; } - FilteredAggregatorFactory that = (FilteredAggregatorFactory) o; + final FilteredAggregatorFactory that = (FilteredAggregatorFactory) o; return Objects.equals(delegate, that.delegate) && - Objects.equals(filter, that.filter) && + Objects.equals(dimFilter, that.dimFilter) && Objects.equals(name, that.name); } @Override public int hashCode() { - return Objects.hash(delegate, filter, name); + + return Objects.hash(delegate, dimFilter, name); } @Override @@ -271,7 +296,7 @@ public String toString() { return "FilteredAggregatorFactory{" + "delegate=" + delegate + - ", filter=" + filter + + ", dimFilter=" + dimFilter + ", name='" + name + '\'' + '}'; } diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/FilteredVectorAggregator.java b/processing/src/main/java/org/apache/druid/query/aggregation/FilteredVectorAggregator.java new file mode 100644 index 000000000000..40c0490dbb75 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/aggregation/FilteredVectorAggregator.java @@ -0,0 +1,153 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation; + +import org.apache.druid.java.util.common.ISE; +import org.apache.druid.query.filter.vector.ReadableVectorMatch; +import org.apache.druid.query.filter.vector.VectorMatch; +import org.apache.druid.query.filter.vector.VectorValueMatcher; + +import javax.annotation.Nullable; +import java.nio.ByteBuffer; +import java.util.Arrays; + +public class FilteredVectorAggregator implements VectorAggregator +{ + private final VectorValueMatcher matcher; + private final VectorAggregator delegate; + private final int[] delegatePositions; + + @Nullable + private VectorMatch maskScratch = null; + + public FilteredVectorAggregator( + final VectorValueMatcher matcher, + final VectorAggregator delegate + ) + { + this.matcher = matcher; + this.delegate = delegate; + this.delegatePositions = new int[matcher.getMaxVectorSize()]; + } + + @Override + public void init(final ByteBuffer buf, final int position) + { + delegate.init(buf, position); + } + + @Override + public void aggregate(final ByteBuffer buf, final int position, final int startRow, final int endRow) + { + final ReadableVectorMatch mask; + + if (startRow == 0) { + mask = VectorMatch.allTrue(endRow); + } else { + if (maskScratch == null) { + maskScratch = VectorMatch.wrap(new int[matcher.getMaxVectorSize()]); + } + + final int maskSize = endRow - startRow; + final int[] maskArray = maskScratch.getSelection(); + for (int i = 0; i < maskSize; i++) { + maskArray[i] = startRow + i; + } + + maskScratch.setSelectionSize(maskSize); + mask = maskScratch; + } + + final ReadableVectorMatch match = matcher.match(mask); + + if (match.isAllTrue(matcher.getCurrentVectorSize())) { + delegate.aggregate(buf, position, startRow, endRow); + } else if (!match.isAllFalse()) { + Arrays.fill(delegatePositions, 0, match.getSelectionSize(), position); + delegate.aggregate(buf, match.getSelectionSize(), delegatePositions, match.getSelection(), 0); + } + } + + @Override + public void aggregate( + final ByteBuffer buf, + final int numRows, + final int[] positions, + @Nullable final int[] rows, + final int positionOffset + ) + { + final ReadableVectorMatch match0; + + if (rows == null) { + match0 = VectorMatch.allTrue(numRows); + } else { + match0 = VectorMatch.wrap(rows).setSelectionSize(numRows); + } + + final ReadableVectorMatch match = matcher.match(match0); + final int[] selection = match.getSelection(); + + if (rows == null) { + for (int i = 0; i < match.getSelectionSize(); i++) { + delegatePositions[i] = positions[selection[i]]; + } + } else { + // i iterates over the match; j iterates over the "rows" array + for (int i = 0, j = 0; i < match.getSelectionSize(); i++) { + for (; rows[j] < selection[i]; j++) { + // Do nothing; the for loop is doing the work of incrementing j. + } + + if (rows[j] != selection[i]) { + throw new ISE("Selection contained phantom row[%d]", selection[i]); + } + + delegatePositions[i] = positions[j]; + } + } + + delegate.aggregate(buf, match.getSelectionSize(), delegatePositions, selection, positionOffset); + } + + @Override + public Object get(final ByteBuffer buf, final int position) + { + return delegate.get(buf, position); + } + + @Override + public void close() + { + delegate.close(); + maskScratch = null; + } + + @Override + public void relocate( + final int oldPosition, + final int newPosition, + final ByteBuffer oldBuffer, + final ByteBuffer newBuffer + ) + { + delegate.relocate(oldPosition, newPosition, oldBuffer, newBuffer); + } +} diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/FloatSumAggregatorFactory.java b/processing/src/main/java/org/apache/druid/query/aggregation/FloatSumAggregatorFactory.java index 17a9f9df74c8..debe44d518f2 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/FloatSumAggregatorFactory.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/FloatSumAggregatorFactory.java @@ -26,6 +26,8 @@ import org.apache.druid.math.expr.ExprMacroTable; import org.apache.druid.segment.BaseFloatColumnValueSelector; import org.apache.druid.segment.ColumnSelectorFactory; +import org.apache.druid.segment.vector.VectorColumnSelectorFactory; +import org.apache.druid.segment.vector.VectorValueSelector; import javax.annotation.Nullable; import java.nio.ByteBuffer; @@ -61,12 +63,24 @@ protected BaseFloatColumnValueSelector selector(ColumnSelectorFactory metricFact ); } + @Override + protected VectorValueSelector vectorSelector(VectorColumnSelectorFactory columnSelectorFactory) + { + return columnSelectorFactory.makeValueSelector(fieldName); + } + @Override protected Aggregator factorize(ColumnSelectorFactory metricFactory, BaseFloatColumnValueSelector selector) { return new FloatSumAggregator(selector); } + @Override + public boolean canVectorize() + { + return expression == null; + } + @Override protected BufferAggregator factorizeBuffered( ColumnSelectorFactory metricFactory, @@ -76,6 +90,15 @@ protected BufferAggregator factorizeBuffered( return new FloatSumBufferAggregator(selector); } + @Override + protected VectorAggregator factorizeVector( + VectorColumnSelectorFactory columnSelectorFactory, + VectorValueSelector selector + ) + { + return new FloatSumVectorAggregator(selector); + } + @Override @Nullable public Object combine(@Nullable Object lhs, @Nullable Object rhs) diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/FloatSumVectorAggregator.java b/processing/src/main/java/org/apache/druid/query/aggregation/FloatSumVectorAggregator.java new file mode 100644 index 000000000000..d5bd54e8a86b --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/aggregation/FloatSumVectorAggregator.java @@ -0,0 +1,84 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation; + +import org.apache.druid.segment.vector.VectorValueSelector; + +import javax.annotation.Nullable; +import java.nio.ByteBuffer; + +public class FloatSumVectorAggregator implements VectorAggregator +{ + private final VectorValueSelector selector; + + public FloatSumVectorAggregator(final VectorValueSelector selector) + { + this.selector = selector; + } + + @Override + public void init(final ByteBuffer buf, final int position) + { + buf.putFloat(position, 0); + } + + @Override + public void aggregate(final ByteBuffer buf, final int position, final int startRow, final int endRow) + { + final float[] vector = selector.getFloatVector(); + + float sum = 0; + for (int i = startRow; i < endRow; i++) { + sum += vector[i]; + } + + buf.putFloat(position, buf.getFloat(position) + sum); + } + + + @Override + public void aggregate( + final ByteBuffer buf, + final int numPositions, + final int[] positions, + @Nullable final int[] rows, + final int positionOffset + ) + { + final float[] vector = selector.getFloatVector(); + + for (int i = 0; i < numPositions; i++) { + final int position = positions[i] + positionOffset; + buf.putFloat(position, buf.getFloat(position) + vector[rows != null ? rows[i] : i]); + } + } + + @Override + public Object get(final ByteBuffer buf, final int position) + { + return buf.getFloat(position); + } + + @Override + public void close() + { + // Nothing to close. + } +} diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/LongSumAggregatorFactory.java b/processing/src/main/java/org/apache/druid/query/aggregation/LongSumAggregatorFactory.java index 0fc2c6941375..accfc787f7a4 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/LongSumAggregatorFactory.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/LongSumAggregatorFactory.java @@ -26,6 +26,8 @@ import org.apache.druid.math.expr.ExprMacroTable; import org.apache.druid.segment.BaseLongColumnValueSelector; import org.apache.druid.segment.ColumnSelectorFactory; +import org.apache.druid.segment.vector.VectorColumnSelectorFactory; +import org.apache.druid.segment.vector.VectorValueSelector; import javax.annotation.Nullable; import java.nio.ByteBuffer; @@ -62,9 +64,9 @@ protected BaseLongColumnValueSelector selector(ColumnSelectorFactory metricFacto } @Override - protected Aggregator factorize(ColumnSelectorFactory metricFactory, BaseLongColumnValueSelector selector) + protected VectorValueSelector vectorSelector(VectorColumnSelectorFactory columnSelectorFactory) { - return new LongSumAggregator(selector); + return columnSelectorFactory.makeValueSelector(fieldName); } @Override @@ -76,6 +78,27 @@ protected BufferAggregator factorizeBuffered( return new LongSumBufferAggregator(selector); } + @Override + protected VectorAggregator factorizeVector( + VectorColumnSelectorFactory columnSelectorFactory, + VectorValueSelector selector + ) + { + return new LongSumVectorAggregator(selector); + } + + @Override + protected Aggregator factorize(ColumnSelectorFactory metricFactory, BaseLongColumnValueSelector selector) + { + return new LongSumAggregator(selector); + } + + @Override + public boolean canVectorize() + { + return expression == null; + } + @Override @Nullable public Object combine(@Nullable Object lhs, @Nullable Object rhs) diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/LongSumVectorAggregator.java b/processing/src/main/java/org/apache/druid/query/aggregation/LongSumVectorAggregator.java new file mode 100644 index 000000000000..24496c7c4800 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/aggregation/LongSumVectorAggregator.java @@ -0,0 +1,83 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation; + +import org.apache.druid.segment.vector.VectorValueSelector; + +import javax.annotation.Nullable; +import java.nio.ByteBuffer; + +public class LongSumVectorAggregator implements VectorAggregator +{ + private final VectorValueSelector selector; + + public LongSumVectorAggregator(final VectorValueSelector selector) + { + this.selector = selector; + } + + @Override + public void init(final ByteBuffer buf, final int position) + { + buf.putLong(position, 0); + } + + @Override + public void aggregate(final ByteBuffer buf, final int position, final int startRow, final int endRow) + { + final long[] vector = selector.getLongVector(); + + long sum = 0; + for (int i = startRow; i < endRow; i++) { + sum += vector[i]; + } + + buf.putLong(position, buf.getLong(position) + sum); + } + + @Override + public void aggregate( + final ByteBuffer buf, + final int numRows, + final int[] positions, + @Nullable final int[] rows, + final int positionOffset + ) + { + final long[] vector = selector.getLongVector(); + + for (int i = 0; i < numRows; i++) { + final int position = positions[i] + positionOffset; + buf.putLong(position, buf.getLong(position) + vector[rows != null ? rows[i] : i]); + } + } + + @Override + public Object get(final ByteBuffer buf, final int position) + { + return buf.getLong(position); + } + + @Override + public void close() + { + // Nothing to close. + } +} diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/NoopVectorAggregator.java b/processing/src/main/java/org/apache/druid/query/aggregation/NoopVectorAggregator.java new file mode 100644 index 000000000000..a57f7a99124d --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/aggregation/NoopVectorAggregator.java @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation; + +import javax.annotation.Nullable; +import java.nio.ByteBuffer; + +public class NoopVectorAggregator implements VectorAggregator +{ + private static final NoopVectorAggregator INSTANCE = new NoopVectorAggregator(); + + public static NoopVectorAggregator instance() + { + return INSTANCE; + } + + private NoopVectorAggregator() + { + // Singleton. + } + + @Override + public void init(ByteBuffer buf, int position) + { + // Do nothing. + } + + @Override + public void aggregate(final ByteBuffer buf, final int position, final int startRow, final int endRow) + { + // Do nothing. + } + + @Override + public void aggregate( + final ByteBuffer buf, + final int numRows, + final int[] positions, + @Nullable final int[] rows, + final int positionOffset + ) + { + // Do nothing. + } + + @Override + public Object get(ByteBuffer buf, int position) + { + return null; + } + + @Override + public void close() + { + // Do nothing. + } +} diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/NullableAggregatorFactory.java b/processing/src/main/java/org/apache/druid/query/aggregation/NullableAggregatorFactory.java index 1c7d4b231feb..ad8c0a5c043f 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/NullableAggregatorFactory.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/NullableAggregatorFactory.java @@ -20,11 +20,14 @@ package org.apache.druid.query.aggregation; +import com.google.common.base.Preconditions; import org.apache.druid.common.config.NullHandling; import org.apache.druid.guice.annotations.ExtensionPoint; import org.apache.druid.segment.BaseNullableColumnValueSelector; import org.apache.druid.segment.ColumnSelectorFactory; import org.apache.druid.segment.ColumnValueSelector; +import org.apache.druid.segment.vector.VectorColumnSelectorFactory; +import org.apache.druid.segment.vector.VectorValueSelector; /** * Abstract class with functionality to wrap {@link Aggregator}, {@link BufferAggregator} and {@link AggregateCombiner} @@ -35,21 +38,30 @@ public abstract class NullableAggregatorFactory extends AggregatorFactory { @Override - public final Aggregator factorize(ColumnSelectorFactory metricFactory) + public final Aggregator factorize(ColumnSelectorFactory columnSelectorFactory) { - T selector = selector(metricFactory); - Aggregator aggregator = factorize(metricFactory, selector); + T selector = selector(columnSelectorFactory); + Aggregator aggregator = factorize(columnSelectorFactory, selector); return NullHandling.replaceWithDefault() ? aggregator : new NullableAggregator(aggregator, selector); } @Override - public final BufferAggregator factorizeBuffered(ColumnSelectorFactory metricFactory) + public final BufferAggregator factorizeBuffered(ColumnSelectorFactory columnSelectorFactory) { - T selector = selector(metricFactory); - BufferAggregator aggregator = factorizeBuffered(metricFactory, selector); + T selector = selector(columnSelectorFactory); + BufferAggregator aggregator = factorizeBuffered(columnSelectorFactory, selector); return NullHandling.replaceWithDefault() ? aggregator : new NullableBufferAggregator(aggregator, selector); } + @Override + public final VectorAggregator factorizeVector(VectorColumnSelectorFactory columnSelectorFactory) + { + Preconditions.checkState(canVectorize(), "Cannot vectorize"); + VectorValueSelector selector = vectorSelector(columnSelectorFactory); + VectorAggregator aggregator = factorizeVector(columnSelectorFactory, selector); + return NullHandling.replaceWithDefault() ? aggregator : new NullableVectorAggregator(aggregator, selector); + } + @Override public final AggregateCombiner makeNullableAggregateCombiner() { @@ -70,26 +82,58 @@ public final int getMaxIntermediateSizeWithNulls() * * @see ColumnValueSelector */ - protected abstract T selector(ColumnSelectorFactory metricFactory); + protected abstract T selector(ColumnSelectorFactory columnSelectorFactory); + + /** + * Creates a {@link VectorValueSelector} for the aggregated column. + * + * @see VectorValueSelector + */ + protected VectorValueSelector vectorSelector(VectorColumnSelectorFactory columnSelectorFactory) + { + throw new UnsupportedOperationException("Cannot vectorize"); + } /** * Creates an {@link Aggregator} to aggregate values from several rows, by using the provided selector. - * @param metricFactory metricFactory - * @param selector {@link ColumnValueSelector} for the column to aggregate. + * + * @param columnSelectorFactory metricFactory + * @param selector {@link ColumnValueSelector} for the column to aggregate. * * @see Aggregator */ - protected abstract Aggregator factorize(ColumnSelectorFactory metricFactory, T selector); + protected abstract Aggregator factorize(ColumnSelectorFactory columnSelectorFactory, T selector); /** * Creates an {@link BufferAggregator} to aggregate values from several rows into a ByteBuffer. - * @param metricFactory metricFactory - * @param selector {@link ColumnValueSelector} for the column to aggregate. + * + * @param columnSelectorFactory columnSelectorFactory + * @param selector {@link ColumnValueSelector} for the column to aggregate. * * @see BufferAggregator */ protected abstract BufferAggregator factorizeBuffered( - ColumnSelectorFactory metricFactory, + ColumnSelectorFactory columnSelectorFactory, T selector ); + + /** + * Creates an {@link BufferAggregator} to aggregate values from several rows into a ByteBuffer. + * + * @param columnSelectorFactory columnSelectorFactory + * @param selector {@link ColumnValueSelector} for the column to aggregate. + * + * @see BufferAggregator + */ + protected VectorAggregator factorizeVector( + VectorColumnSelectorFactory columnSelectorFactory, + VectorValueSelector selector + ) + { + if (!canVectorize()) { + throw new UnsupportedOperationException("Cannot vectorize"); + } else { + throw new UnsupportedOperationException("canVectorize returned true but 'factorizeVector' is not implemented"); + } + } } diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/NullableBufferAggregator.java b/processing/src/main/java/org/apache/druid/query/aggregation/NullableBufferAggregator.java index 48de4dfebf70..5569b8a00df4 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/NullableBufferAggregator.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/NullableBufferAggregator.java @@ -27,14 +27,18 @@ import java.nio.ByteBuffer; /** - * The result of a NullableBufferAggregator will be null if all the values to be aggregated are null values or no values - * are aggregated at all. If any of the value is non-null, the result would be the aggregated value of the delegate - * aggregator. Note that the delegate aggregator is not required to perform check for - * {@link BaseNullableColumnValueSelector#isNull()} on the selector as only non-null values will be passed to the - * delegate aggregator. This class is only used when SQL compatible null handling is enabled. - * When writing aggregated result to buffer, it will write an additional byte to store the nullability of the - * aggregated result. - * Buffer Layout - 1 byte for storing nullability + delegate storage bytes. + * A wrapper around a non-null-aware BufferAggregator that makes it null-aware. This removes the need for each + * aggregator class to handle nulls on its own. + * + * The result of this aggregator will be null if all the values to be aggregated are null values or no values are + * aggregated at all. If any of the values are non-null, the result will be the aggregated value of the delegate + * aggregator. + * + * When wrapped by this class, the underlying aggregator's required storage space is increased by one byte. The extra + * byte is a boolean that stores whether or not any non-null values have been seen. The extra byte is placed before + * the underlying aggregator's normal state. (Buffer layout = [nullability byte] [delegate storage bytes]) + * + * @see NullableVectorAggregator, the vectorized version. */ @PublicApi public final class NullableBufferAggregator implements BufferAggregator @@ -111,6 +115,12 @@ public boolean isNull(ByteBuffer buf, int position) return buf.get(position) == NullHandling.IS_NULL_BYTE || delegate.isNull(buf, position + Byte.BYTES); } + @Override + public void relocate(int oldPosition, int newPosition, ByteBuffer oldBuffer, ByteBuffer newBuffer) + { + delegate.relocate(oldPosition + Byte.BYTES, newPosition + Byte.BYTES, oldBuffer, newBuffer); + } + @Override public void close() { diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/NullableVectorAggregator.java b/processing/src/main/java/org/apache/druid/query/aggregation/NullableVectorAggregator.java new file mode 100644 index 000000000000..bb8faed98d58 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/aggregation/NullableVectorAggregator.java @@ -0,0 +1,165 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation; + +import org.apache.druid.common.config.NullHandling; +import org.apache.druid.java.util.common.ISE; +import org.apache.druid.segment.vector.VectorValueSelector; + +import javax.annotation.Nullable; +import java.nio.ByteBuffer; +import java.util.Arrays; + +/** + * A wrapper around a non-null-aware VectorAggregator that makes it null-aware. This removes the need for each + * aggregator class to handle nulls on its own. This class only makes sense as a wrapper for "primitive" aggregators, + * i.e., ones that take {@link VectorValueSelector} as input. + * + * The result of this aggregator will be null if all the values to be aggregated are null values or no values are + * aggregated at all. If any of the values are non-null, the result will be the aggregated value of the delegate + * aggregator. + * + * When wrapped by this class, the underlying aggregator's required storage space is increased by one byte. The extra + * byte is a boolean that stores whether or not any non-null values have been seen. The extra byte is placed before + * the underlying aggregator's normal state. (Buffer layout = [nullability byte] [delegate storage bytes]) + * + * @see NullableBufferAggregator, the vectorized version. + */ +public class NullableVectorAggregator implements VectorAggregator +{ + private final VectorAggregator delegate; + private final VectorValueSelector selector; + + @Nullable + private int[] vAggregationPositions = null; + + @Nullable + private int[] vAggregationRows = null; + + NullableVectorAggregator(VectorAggregator delegate, VectorValueSelector selector) + { + this.delegate = delegate; + this.selector = selector; + } + + @Override + public void init(ByteBuffer buf, int position) + { + buf.put(position, NullHandling.IS_NULL_BYTE); + delegate.init(buf, position + Byte.BYTES); + } + + @Override + public void aggregate(ByteBuffer buf, int position, int startRow, int endRow) + { + final boolean[] nullVector = selector.getNullVector(); + if (nullVector != null) { + // Deferred initialization, since vAggregationPositions and vAggregationRows are only needed if nulls + // actually occur. + if (vAggregationPositions == null) { + vAggregationPositions = new int[selector.getMaxVectorSize()]; + vAggregationRows = new int[selector.getMaxVectorSize()]; + } + + int j = 0; + for (int i = startRow; i < endRow; i++) { + if (!nullVector[i]) { + vAggregationRows[j++] = i; + } + } + + Arrays.fill(vAggregationPositions, 0, j, position); + + doAggregate(buf, j, vAggregationPositions, vAggregationRows, 0); + } else { + doAggregate(buf, position, startRow, endRow); + } + } + + @Override + public void aggregate(ByteBuffer buf, int numRows, int[] positions, @Nullable int[] rows, int positionOffset) + { + final boolean[] nullVector = selector.getNullVector(); + if (nullVector != null) { + // Deferred initialization, since vAggregationPositions and vAggregationRows are only needed if nulls + // actually occur. + if (vAggregationPositions == null) { + vAggregationPositions = new int[selector.getMaxVectorSize()]; + vAggregationRows = new int[selector.getMaxVectorSize()]; + } + + int j = 0; + for (int i = 0; i < numRows; i++) { + final int rowNum = rows == null ? i : rows[i]; + if (!nullVector[rowNum]) { + vAggregationPositions[j] = positions[i]; + vAggregationRows[j] = rowNum; + j++; + } + } + + doAggregate(buf, j, vAggregationPositions, vAggregationRows, positionOffset); + } else { + doAggregate(buf, numRows, positions, rows, positionOffset); + } + } + + @Override + @Nullable + public Object get(ByteBuffer buf, int position) + { + switch (buf.get(position)) { + case NullHandling.IS_NULL_BYTE: + return null; + case NullHandling.IS_NOT_NULL_BYTE: + return delegate.get(buf, position + Byte.BYTES); + default: + // Corrupted byte? + throw new ISE("Bad null-marker byte, delegate class[%s]", delegate.getClass().getName()); + } + } + + @Override + public void relocate(int oldPosition, int newPosition, ByteBuffer oldBuffer, ByteBuffer newBuffer) + { + delegate.relocate(oldPosition + Byte.BYTES, newPosition + Byte.BYTES, oldBuffer, newBuffer); + } + + @Override + public void close() + { + delegate.close(); + } + + private void doAggregate(ByteBuffer buf, int position, int start, int end) + { + buf.put(position, NullHandling.IS_NOT_NULL_BYTE); + delegate.aggregate(buf, position + Byte.BYTES, start, end); + } + + private void doAggregate(ByteBuffer buf, int numRows, int[] positions, @Nullable int[] rows, int positionOffset) + { + for (int i = 0; i < numRows; i++) { + buf.put(positions[i] + positionOffset, NullHandling.IS_NOT_NULL_BYTE); + } + + delegate.aggregate(buf, numRows, positions, rows, positionOffset + Byte.BYTES); + } +} diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/SuppressedAggregatorFactory.java b/processing/src/main/java/org/apache/druid/query/aggregation/SuppressedAggregatorFactory.java index 64dcaf08ebb9..bf2592333085 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/SuppressedAggregatorFactory.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/SuppressedAggregatorFactory.java @@ -23,6 +23,7 @@ import org.apache.druid.query.cache.CacheKeyBuilder; import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; import org.apache.druid.segment.ColumnSelectorFactory; +import org.apache.druid.segment.vector.VectorColumnSelectorFactory; import javax.annotation.Nullable; import java.nio.ByteBuffer; @@ -62,6 +63,18 @@ public BufferAggregator factorizeBuffered(ColumnSelectorFactory metricFactory) return new SuppressedBufferAggregator(delegate.factorizeBuffered(metricFactory)); } + @Override + public VectorAggregator factorizeVector(VectorColumnSelectorFactory columnSelectorFactory) + { + return new SuppressedVectorAggregator(delegate.factorizeVector(columnSelectorFactory)); + } + + @Override + public boolean canVectorize() + { + return delegate.canVectorize(); + } + @Override public Comparator getComparator() { @@ -134,6 +147,12 @@ public int getMaxIntermediateSize() return delegate.getMaxIntermediateSize(); } + @Override + public int getMaxIntermediateSizeWithNulls() + { + return delegate.getMaxIntermediateSizeWithNulls(); + } + @Override public AggregatorFactory optimizeForSegment(PerSegmentQueryOptimizationContext optimizationContext) { @@ -371,4 +390,78 @@ public BufferAggregator getDelegate() return delegate; } } + + public static class SuppressedVectorAggregator implements VectorAggregator + { + private final VectorAggregator delegate; + + public SuppressedVectorAggregator(VectorAggregator delegate) + { + this.delegate = delegate; + } + + @Override + public void init(ByteBuffer buf, int position) + { + delegate.init(buf, position); + } + + @Override + public void aggregate(ByteBuffer buf, int position, int startRow, int endRow) + { + // no-op + } + + @Override + public void aggregate(ByteBuffer buf, int numRows, int[] positions, @Nullable int[] rows, int positionOffset) + { + // no-op + } + + @Nullable + @Override + public Object get(ByteBuffer buf, int position) + { + return delegate.get(buf, position); + } + + @Override + public void relocate(int oldPosition, int newPosition, ByteBuffer oldBuffer, ByteBuffer newBuffer) + { + delegate.relocate(oldPosition, newPosition, oldBuffer, newBuffer); + } + + @Override + public void close() + { + delegate.close(); + } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + SuppressedVectorAggregator that = (SuppressedVectorAggregator) o; + return Objects.equals(delegate, that.delegate); + } + + @Override + public int hashCode() + { + return Objects.hash(delegate); + } + + @Override + public String toString() + { + return "SuppressedVectorAggregator{" + + "delegate=" + delegate + + '}'; + } + } } diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/VectorAggregator.java b/processing/src/main/java/org/apache/druid/query/aggregation/VectorAggregator.java new file mode 100644 index 000000000000..9da781d24707 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/aggregation/VectorAggregator.java @@ -0,0 +1,86 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation; + +import javax.annotation.Nullable; +import java.nio.ByteBuffer; + +/** + * An object that can aggregate metrics into a ByteBuffer, from vectorized column selectors. Its aggregation-related + * methods (namely, "aggregate" and "get") do not take the actual input values to aggregate, because it is assumed that + * the VectorAggregator was given something that it can use to get at the current batch of data. + * + * None of the methods in this class are annotated with + * {@link org.apache.druid.query.monomorphicprocessing.CalledFromHotLoop} because vectorized query engines do not use + * monomorphic-processing-style specialization. + * + * @see BufferAggregator, the vectorized version. + */ +public interface VectorAggregator +{ + /** + * Same as {@link BufferAggregator#init}. + */ + void init(ByteBuffer buf, int position); + + /** + * Aggregate a range of rows into a single aggregation slot. + * + * Implementations must not change the position, limit or mark of the given buffer + * + * @param buf byte buffer storing the byte array representation of the aggregate + * @param position offset within the byte buffer at which the current aggregate value is stored + * @param startRow first row of the range within the current batch to aggregate (inclusive) + * @param endRow end row of the range (exclusive) + */ + void aggregate(ByteBuffer buf, int position, int startRow, int endRow); + + /** + * Aggregate a list of rows ("rows") into a list of aggregation slots ("positions"). + * + * Implementations must not change the position, limit or mark of the given buffer + * + * @param buf byte buffer storing the byte array representation of the aggregate + * @param numRows number of rows to aggregate + * @param positions array of aggregate value positions within the buffer; must be at least as long as "numRows" + * @param rows array of row numbers within the current row batch; must be at least as long as "numRows". If + * null, the aggregator will aggregate rows from 0 (inclusive) to numRows (exclusive). + * @param positionOffset an offset to apply to each value from "positions" + */ + void aggregate(ByteBuffer buf, int numRows, int positions[], @Nullable int[] rows, int positionOffset); + + /** + * Same as {@link BufferAggregator#get}. + */ + @Nullable + Object get(ByteBuffer buf, int position); + + /** + * Same as {@link BufferAggregator#relocate}. + */ + default void relocate(int oldPosition, int newPosition, ByteBuffer oldBuffer, ByteBuffer newBuffer) + { + } + + /** + * Release any resources used by the aggregator. + */ + void close(); +} diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/cardinality/CardinalityBufferAggregator.java b/processing/src/main/java/org/apache/druid/query/aggregation/cardinality/CardinalityBufferAggregator.java index abb157738d4d..2002ee18167d 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/cardinality/CardinalityBufferAggregator.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/cardinality/CardinalityBufferAggregator.java @@ -23,6 +23,7 @@ import org.apache.druid.query.ColumnSelectorPlus; import org.apache.druid.query.aggregation.BufferAggregator; import org.apache.druid.query.aggregation.cardinality.types.CardinalityAggregatorColumnSelectorStrategy; +import org.apache.druid.query.aggregation.hyperloglog.HyperUniquesBufferAggregator; import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; import java.nio.ByteBuffer; @@ -32,8 +33,6 @@ public class CardinalityBufferAggregator implements BufferAggregator private final ColumnSelectorPlus[] selectorPluses; private final boolean byRow; - private static final byte[] EMPTY_BYTES = HyperLogLogCollector.makeEmptyVersionedByteArray(); - CardinalityBufferAggregator( ColumnSelectorPlus[] selectorPluses, boolean byRow @@ -46,9 +45,7 @@ public class CardinalityBufferAggregator implements BufferAggregator @Override public void init(ByteBuffer buf, int position) { - final ByteBuffer mutationBuffer = buf.duplicate(); - mutationBuffer.position(position); - mutationBuffer.put(EMPTY_BYTES); + HyperUniquesBufferAggregator.doInit(buf, position); } @Override @@ -77,11 +74,7 @@ public void aggregate(ByteBuffer buf, int position) @Override public Object get(ByteBuffer buf, int position) { - ByteBuffer dataCopyBuffer = ByteBuffer.allocate(HyperLogLogCollector.getLatestNumBytesForDenseStorage()); - ByteBuffer mutationBuffer = buf.duplicate(); - mutationBuffer.position(position); - mutationBuffer.get(dataCopyBuffer.array()); - return HyperLogLogCollector.makeCollector(dataCopyBuffer); + return HyperUniquesBufferAggregator.doGet(buf, position); } @Override diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/hyperloglog/HyperUniquesAggregatorFactory.java b/processing/src/main/java/org/apache/druid/query/aggregation/hyperloglog/HyperUniquesAggregatorFactory.java index 05a08180952e..26cc6cad2379 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/hyperloglog/HyperUniquesAggregatorFactory.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/hyperloglog/HyperUniquesAggregatorFactory.java @@ -34,11 +34,16 @@ import org.apache.druid.query.aggregation.BufferAggregator; import org.apache.druid.query.aggregation.NoopAggregator; import org.apache.druid.query.aggregation.NoopBufferAggregator; +import org.apache.druid.query.aggregation.NoopVectorAggregator; +import org.apache.druid.query.aggregation.VectorAggregator; import org.apache.druid.query.aggregation.cardinality.HyperLogLogCollectorAggregateCombiner; import org.apache.druid.query.cache.CacheKeyBuilder; import org.apache.druid.segment.BaseObjectColumnValueSelector; import org.apache.druid.segment.ColumnSelectorFactory; import org.apache.druid.segment.NilColumnValueSelector; +import org.apache.druid.segment.column.ColumnCapabilities; +import org.apache.druid.segment.column.ValueType; +import org.apache.druid.segment.vector.VectorColumnSelectorFactory; import java.nio.ByteBuffer; import java.util.Collections; @@ -123,6 +128,23 @@ public BufferAggregator factorizeBuffered(ColumnSelectorFactory metricFactory) throw new IAE("Incompatible type for metric[%s], expected a HyperUnique, got a %s", fieldName, classOfObject); } + @Override + public VectorAggregator factorizeVector(final VectorColumnSelectorFactory selectorFactory) + { + final ColumnCapabilities capabilities = selectorFactory.getColumnCapabilities(fieldName); + if (capabilities == null || capabilities.getType() != ValueType.COMPLEX) { + return NoopVectorAggregator.instance(); + } else { + return new HyperUniquesVectorAggregator(selectorFactory.makeObjectSelector(fieldName)); + } + } + + @Override + public boolean canVectorize() + { + return true; + } + @Override public Comparator getComparator() { diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/hyperloglog/HyperUniquesBufferAggregator.java b/processing/src/main/java/org/apache/druid/query/aggregation/hyperloglog/HyperUniquesBufferAggregator.java index c7f31f4fc9d4..3eba440aee62 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/hyperloglog/HyperUniquesBufferAggregator.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/hyperloglog/HyperUniquesBufferAggregator.java @@ -38,14 +38,31 @@ public HyperUniquesBufferAggregator(BaseObjectColumnValueSelector selector) this.selector = selector; } - @Override - public void init(ByteBuffer buf, int position) + public static void doInit(ByteBuffer buf, int position) { final ByteBuffer mutationBuffer = buf.duplicate(); mutationBuffer.position(position); mutationBuffer.put(EMPTY_BYTES); } + public static HyperLogLogCollector doGet(ByteBuffer buf, int position) + { + final int size = HyperLogLogCollector.getLatestNumBytesForDenseStorage(); + ByteBuffer dataCopyBuffer = ByteBuffer.allocate(size); + ByteBuffer mutationBuffer = buf.duplicate(); + mutationBuffer.position(position); + mutationBuffer.limit(position + size); + dataCopyBuffer.put(mutationBuffer); + dataCopyBuffer.rewind(); + return HyperLogLogCollector.makeCollector(dataCopyBuffer); + } + + @Override + public void init(ByteBuffer buf, int position) + { + doInit(buf, position); + } + @Override public void aggregate(ByteBuffer buf, int position) { @@ -73,14 +90,7 @@ public void aggregate(ByteBuffer buf, int position) @Override public Object get(ByteBuffer buf, int position) { - final int size = HyperLogLogCollector.getLatestNumBytesForDenseStorage(); - ByteBuffer dataCopyBuffer = ByteBuffer.allocate(size); - ByteBuffer mutationBuffer = buf.duplicate(); - mutationBuffer.position(position); - mutationBuffer.limit(position + size); - dataCopyBuffer.put(mutationBuffer); - dataCopyBuffer.rewind(); - return HyperLogLogCollector.makeCollector(dataCopyBuffer); + return doGet(buf, position); } @Override diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/hyperloglog/HyperUniquesVectorAggregator.java b/processing/src/main/java/org/apache/druid/query/aggregation/hyperloglog/HyperUniquesVectorAggregator.java new file mode 100644 index 000000000000..616b9de6d6aa --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/aggregation/hyperloglog/HyperUniquesVectorAggregator.java @@ -0,0 +1,116 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.hyperloglog; + +import com.google.common.base.Preconditions; +import org.apache.druid.hll.HyperLogLogCollector; +import org.apache.druid.query.aggregation.VectorAggregator; +import org.apache.druid.segment.vector.VectorObjectSelector; + +import javax.annotation.Nullable; +import java.nio.ByteBuffer; + +public class HyperUniquesVectorAggregator implements VectorAggregator +{ + private final VectorObjectSelector selector; + + public HyperUniquesVectorAggregator(final VectorObjectSelector selector) + { + this.selector = Preconditions.checkNotNull(selector, "selector"); + } + + @Override + public void init(final ByteBuffer buf, final int position) + { + HyperUniquesBufferAggregator.doInit(buf, position); + } + + @Override + public void aggregate(final ByteBuffer buf, final int position, final int startRow, final int endRow) + { + // Save position, limit and restore later instead of allocating a new ByteBuffer object + final int oldPosition = buf.position(); + final int oldLimit = buf.limit(); + buf.limit(position + HyperLogLogCollector.getLatestNumBytesForDenseStorage()); + buf.position(position); + + try { + final HyperLogLogCollector collector = HyperLogLogCollector.makeCollector(buf); + final Object[] vector = selector.getObjectVector(); + for (int i = startRow; i < endRow; i++) { + final HyperLogLogCollector otherCollector = (HyperLogLogCollector) vector[i]; + if (otherCollector != null) { + collector.fold(otherCollector); + } + } + } + finally { + buf.limit(oldLimit); + buf.position(oldPosition); + } + } + + @Override + public void aggregate( + final ByteBuffer buf, + final int numRows, + final int[] positions, + @Nullable final int[] rows, + final int positionOffset + ) + { + final Object[] vector = selector.getObjectVector(); + + for (int i = 0; i < numRows; i++) { + final HyperLogLogCollector otherCollector = (HyperLogLogCollector) vector[rows != null ? rows[i] : i]; + if (otherCollector == null) { + continue; + } + + final int position = positions[i] + positionOffset; + + // Save position, limit and restore later instead of allocating a new ByteBuffer object + final int oldPosition = buf.position(); + final int oldLimit = buf.limit(); + buf.limit(position + HyperLogLogCollector.getLatestNumBytesForDenseStorage()); + buf.position(position); + + try { + HyperLogLogCollector.makeCollector(buf).fold(otherCollector); + } + finally { + buf.limit(oldLimit); + buf.position(oldPosition); + } + } + } + + @Override + public Object get(final ByteBuffer buf, final int position) + { + return HyperUniquesBufferAggregator.doGet(buf, position); + } + + @Override + public void close() + { + // Nothing to close. + } +} diff --git a/processing/src/main/java/org/apache/druid/query/dimension/DefaultDimensionSpec.java b/processing/src/main/java/org/apache/druid/query/dimension/DefaultDimensionSpec.java index 921402c8da98..ddc499c95e40 100644 --- a/processing/src/main/java/org/apache/druid/query/dimension/DefaultDimensionSpec.java +++ b/processing/src/main/java/org/apache/druid/query/dimension/DefaultDimensionSpec.java @@ -25,6 +25,8 @@ import org.apache.druid.query.extraction.ExtractionFn; import org.apache.druid.segment.DimensionSelector; import org.apache.druid.segment.column.ValueType; +import org.apache.druid.segment.vector.MultiValueDimensionVectorSelector; +import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector; import javax.annotation.Nullable; import java.util.Arrays; @@ -110,6 +112,24 @@ public DimensionSelector decorate(DimensionSelector selector) return selector; } + @Override + public SingleValueDimensionVectorSelector decorate(final SingleValueDimensionVectorSelector selector) + { + return selector; + } + + @Override + public MultiValueDimensionVectorSelector decorate(final MultiValueDimensionVectorSelector selector) + { + return selector; + } + + @Override + public boolean canVectorize() + { + return true; + } + @Override public boolean mustDecorate() { diff --git a/processing/src/main/java/org/apache/druid/query/dimension/DimensionSpec.java b/processing/src/main/java/org/apache/druid/query/dimension/DimensionSpec.java index bbf970ed2bc1..9f3eb2ae0317 100644 --- a/processing/src/main/java/org/apache/druid/query/dimension/DimensionSpec.java +++ b/processing/src/main/java/org/apache/druid/query/dimension/DimensionSpec.java @@ -22,9 +22,12 @@ import com.fasterxml.jackson.annotation.JsonSubTypes; import com.fasterxml.jackson.annotation.JsonTypeInfo; import org.apache.druid.java.util.common.Cacheable; +import org.apache.druid.java.util.common.UOE; import org.apache.druid.query.extraction.ExtractionFn; import org.apache.druid.segment.DimensionSelector; import org.apache.druid.segment.column.ValueType; +import org.apache.druid.segment.vector.MultiValueDimensionVectorSelector; +import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector; import javax.annotation.Nullable; @@ -55,10 +58,29 @@ public interface DimensionSpec extends Cacheable DimensionSelector decorate(DimensionSelector selector); + default SingleValueDimensionVectorSelector decorate(SingleValueDimensionVectorSelector selector) + { + throw new UOE("DimensionSpec[%s] cannot vectorize", getClass().getName()); + } + + default MultiValueDimensionVectorSelector decorate(MultiValueDimensionVectorSelector selector) + { + throw new UOE("DimensionSpec[%s] cannot vectorize", getClass().getName()); + } + /** * Does this DimensionSpec require that decorate() be called to produce correct results? */ boolean mustDecorate(); + /** + * Does this DimensionSpec have working {@link #decorate(SingleValueDimensionVectorSelector)} and + * {@link #decorate(MultiValueDimensionVectorSelector)} methods? + */ + default boolean canVectorize() + { + return false; + } + boolean preservesOrdering(); } diff --git a/processing/src/main/java/org/apache/druid/query/dimension/VectorColumnStrategizer.java b/processing/src/main/java/org/apache/druid/query/dimension/VectorColumnStrategizer.java new file mode 100644 index 000000000000..06338d4b93dd --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/dimension/VectorColumnStrategizer.java @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.dimension; + +import org.apache.druid.segment.vector.MultiValueDimensionVectorSelector; +import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector; +import org.apache.druid.segment.vector.VectorValueSelector; + +/** + * Class that encapsulates knowledge about how to create vector column processors. Used by + * {@link org.apache.druid.segment.DimensionHandlerUtils#makeVectorProcessor}. + */ +public interface VectorColumnStrategizer +{ + T makeSingleValueDimensionStrategy(SingleValueDimensionVectorSelector selector); + + T makeMultiValueDimensionStrategy(MultiValueDimensionVectorSelector selector); + + T makeFloatStrategy(VectorValueSelector selector); + + T makeDoubleStrategy(VectorValueSelector selector); + + T makeLongStrategy(VectorValueSelector selector); +} diff --git a/processing/src/main/java/org/apache/druid/query/filter/Filter.java b/processing/src/main/java/org/apache/druid/query/filter/Filter.java index a8b7c7f627dd..120358f30447 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/Filter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/Filter.java @@ -20,10 +20,13 @@ package org.apache.druid.query.filter; import org.apache.druid.collections.bitmap.ImmutableBitmap; +import org.apache.druid.java.util.common.UOE; import org.apache.druid.query.BitmapResultFactory; import org.apache.druid.query.DefaultBitmapResultFactory; +import org.apache.druid.query.filter.vector.VectorValueMatcher; import org.apache.druid.segment.ColumnSelector; import org.apache.druid.segment.ColumnSelectorFactory; +import org.apache.druid.segment.vector.VectorColumnSelectorFactory; public interface Filter { @@ -84,6 +87,17 @@ default ImmutableBitmap getBitmapIndex(BitmapIndexSelector selector) */ ValueMatcher makeMatcher(ColumnSelectorFactory factory); + /** + * Get a VectorValueMatcher that applies this filter to row vectors. + * + * @param factory Object used to create ValueMatchers + * + * @return VectorValueMatcher that applies this filter to row vectors. + */ + default VectorValueMatcher makeVectorMatcher(VectorColumnSelectorFactory factory) + { + throw new UOE("Filter[%s] cannot vectorize", getClass().getName()); + } /** * Indicates whether this filter can return a bitmap index for filtering, based on @@ -107,4 +121,12 @@ default ImmutableBitmap getBitmapIndex(BitmapIndexSelector selector) * @return true if this Filter supports selectivity estimation, false otherwise. */ boolean supportsSelectivityEstimation(ColumnSelector columnSelector, BitmapIndexSelector indexSelector); + + /** + * Returns true if this filter can produce a vectorized matcher from its "makeVectorMatcher" method. + */ + default boolean canVectorizeMatcher() + { + return false; + } } diff --git a/processing/src/main/java/org/apache/druid/query/filter/IntervalDimFilter.java b/processing/src/main/java/org/apache/druid/query/filter/IntervalDimFilter.java index c2d8511ba1f2..eec5ccaa2a41 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/IntervalDimFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/IntervalDimFilter.java @@ -163,6 +163,12 @@ public int hashCode() return result; } + @Override + public String toString() + { + return convertedFilter.toString(); + } + private List> makeIntervalLongs() { List> intervalLongs = new ArrayList<>(); diff --git a/processing/src/main/java/org/apache/druid/query/filter/StringValueMatcherColumnSelectorStrategy.java b/processing/src/main/java/org/apache/druid/query/filter/StringValueMatcherColumnSelectorStrategy.java index e50927534784..73100c611e7d 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/StringValueMatcherColumnSelectorStrategy.java +++ b/processing/src/main/java/org/apache/druid/query/filter/StringValueMatcherColumnSelectorStrategy.java @@ -20,20 +20,72 @@ package org.apache.druid.query.filter; import com.google.common.base.Predicate; +import org.apache.druid.common.config.NullHandling; +import org.apache.druid.segment.DimensionDictionarySelector; import org.apache.druid.segment.DimensionSelector; import org.apache.druid.segment.data.IndexedInts; import org.apache.druid.segment.filter.BooleanValueMatcher; +import javax.annotation.Nullable; +import java.util.Objects; + public class StringValueMatcherColumnSelectorStrategy implements ValueMatcherColumnSelectorStrategy { private static final String[] NULL_VALUE = new String[]{null}; private static final ValueGetter NULL_VALUE_GETTER = () -> NULL_VALUE; - @Override - public ValueMatcher makeValueMatcher(final DimensionSelector selector, String value) + private final boolean hasMultipleValues; + + public StringValueMatcherColumnSelectorStrategy(final boolean hasMultipleValues) + { + this.hasMultipleValues = hasMultipleValues; + } + + @Nullable + public static Boolean toBooleanIfPossible( + final DimensionDictionarySelector selector, + final boolean hasMultipleValues, + final Predicate predicate + ) { if (selector.getValueCardinality() == 0) { - return BooleanValueMatcher.of(value == null); + // Column has no values (it doesn't exist, or it's all empty arrays). + // Match if and only if "predicate" matches null. + return predicate.apply(null); + } else if (!hasMultipleValues && selector.getValueCardinality() == 1 && selector.nameLookupPossibleInAdvance()) { + // Every row has the same value. Match if and only if "predicate" matches the possible value. + return predicate.apply(selector.lookupName(0)); + } else { + return null; + } + } + + @Nullable + private static ValueMatcher toBooleanMatcherIfPossible( + final DimensionSelector selector, + final boolean hasMultipleValues, + final Predicate predicate + ) + { + final Boolean booleanValue = StringValueMatcherColumnSelectorStrategy.toBooleanIfPossible( + selector, + hasMultipleValues, + predicate + ); + return booleanValue == null ? null : BooleanValueMatcher.of(booleanValue); + } + + @Override + public ValueMatcher makeValueMatcher(final DimensionSelector selector, final String value) + { + final ValueMatcher booleanMatcher = toBooleanMatcherIfPossible( + selector, + hasMultipleValues, + s -> Objects.equals(s, NullHandling.emptyToNullIfNeeded(value)) + ); + + if (booleanMatcher != null) { + return booleanMatcher; } else { return selector.makeValueMatcher(value); } @@ -46,8 +98,10 @@ public ValueMatcher makeValueMatcher( ) { final Predicate predicate = predicateFactory.makeStringPredicate(); - if (selector.getValueCardinality() == 0) { - return BooleanValueMatcher.of(predicate.apply(null)); + final ValueMatcher booleanMatcher = toBooleanMatcherIfPossible(selector, hasMultipleValues, predicate); + + if (booleanMatcher != null) { + return booleanMatcher; } else { return selector.makeValueMatcher(predicate); } diff --git a/processing/src/main/java/org/apache/druid/query/filter/ValueMatcherColumnSelectorStrategyFactory.java b/processing/src/main/java/org/apache/druid/query/filter/ValueMatcherColumnSelectorStrategyFactory.java index edecbbe0a21a..2797f082f74a 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/ValueMatcherColumnSelectorStrategyFactory.java +++ b/processing/src/main/java/org/apache/druid/query/filter/ValueMatcherColumnSelectorStrategyFactory.java @@ -49,7 +49,7 @@ public ValueMatcherColumnSelectorStrategy makeColumnSelectorStrategy( ValueType type = capabilities.getType(); switch (type) { case STRING: - return new StringValueMatcherColumnSelectorStrategy(); + return new StringValueMatcherColumnSelectorStrategy(capabilities.hasMultipleValues()); case LONG: return new LongValueMatcherColumnSelectorStrategy(); case FLOAT: diff --git a/processing/src/main/java/org/apache/druid/query/filter/vector/BaseVectorValueMatcher.java b/processing/src/main/java/org/apache/druid/query/filter/vector/BaseVectorValueMatcher.java new file mode 100644 index 000000000000..aea2f1433327 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/filter/vector/BaseVectorValueMatcher.java @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.filter.vector; + +import org.apache.druid.segment.vector.VectorSizeInspector; + +public abstract class BaseVectorValueMatcher implements VectorValueMatcher +{ + private final VectorSizeInspector selector; + + public BaseVectorValueMatcher(final VectorSizeInspector selector) + { + this.selector = selector; + } + + @Override + public int getCurrentVectorSize() + { + return selector.getCurrentVectorSize(); + } + + @Override + public int getMaxVectorSize() + { + return selector.getMaxVectorSize(); + } +} diff --git a/processing/src/main/java/org/apache/druid/query/filter/vector/BooleanVectorValueMatcher.java b/processing/src/main/java/org/apache/druid/query/filter/vector/BooleanVectorValueMatcher.java new file mode 100644 index 000000000000..65af27b83fc2 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/filter/vector/BooleanVectorValueMatcher.java @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.filter.vector; + +import org.apache.druid.segment.vector.VectorSizeInspector; + +public class BooleanVectorValueMatcher extends BaseVectorValueMatcher +{ + private final VectorSizeInspector selector; + private final boolean matches; + + private BooleanVectorValueMatcher(final VectorSizeInspector selector, final boolean matches) + { + super(selector); + this.selector = selector; + this.matches = matches; + } + + public static BooleanVectorValueMatcher of(final VectorSizeInspector selector, final boolean matches) + { + return new BooleanVectorValueMatcher(selector, matches); + } + + @Override + public int getCurrentVectorSize() + { + return selector.getCurrentVectorSize(); + } + + @Override + public int getMaxVectorSize() + { + return selector.getCurrentVectorSize(); + } + + @Override + public ReadableVectorMatch match(final ReadableVectorMatch mask) + { + if (matches) { + assert mask.isValid(mask); + return mask; + } else { + return VectorMatch.allFalse(); + } + } +} diff --git a/processing/src/main/java/org/apache/druid/query/filter/vector/DoubleVectorValueMatcher.java b/processing/src/main/java/org/apache/druid/query/filter/vector/DoubleVectorValueMatcher.java new file mode 100644 index 000000000000..c304a117551a --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/filter/vector/DoubleVectorValueMatcher.java @@ -0,0 +1,105 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.filter.vector; + +import org.apache.druid.query.filter.DruidDoublePredicate; +import org.apache.druid.query.filter.DruidPredicateFactory; +import org.apache.druid.segment.DimensionHandlerUtils; +import org.apache.druid.segment.vector.VectorValueSelector; + +import javax.annotation.Nullable; + +public class DoubleVectorValueMatcher implements VectorValueMatcherFactory +{ + private final VectorValueSelector selector; + + public DoubleVectorValueMatcher(final VectorValueSelector selector) + { + this.selector = selector; + } + + @Override + public VectorValueMatcher makeMatcher(@Nullable final String value) + { + final Double matchVal = DimensionHandlerUtils.convertObjectToDouble(value); + + if (matchVal == null) { + return BooleanVectorValueMatcher.of(selector, false); + } + + final double matchValDouble = matchVal; + + return new BaseVectorValueMatcher(selector) + { + final VectorMatch match = VectorMatch.wrap(new int[selector.getMaxVectorSize()]); + + @Override + public ReadableVectorMatch match(final ReadableVectorMatch mask) + { + final double[] vector = selector.getDoubleVector(); + final int[] selection = match.getSelection(); + + int numRows = 0; + + for (int i = 0; i < mask.getSelectionSize(); i++) { + final int rowNum = mask.getSelection()[i]; + if (vector[rowNum] == matchValDouble) { + selection[numRows++] = rowNum; + } + } + + match.setSelectionSize(numRows); + assert match.isValid(mask); + return match; + } + }; + } + + @Override + public VectorValueMatcher makeMatcher(final DruidPredicateFactory predicateFactory) + { + final DruidDoublePredicate predicate = predicateFactory.makeDoublePredicate(); + + return new BaseVectorValueMatcher(selector) + { + final VectorMatch match = VectorMatch.wrap(new int[selector.getMaxVectorSize()]); + + @Override + public ReadableVectorMatch match(final ReadableVectorMatch mask) + { + final double[] vector = selector.getDoubleVector(); + final int[] selection = match.getSelection(); + + int numRows = 0; + + for (int i = 0; i < mask.getSelectionSize(); i++) { + final int rowNum = mask.getSelection()[i]; + if (predicate.applyDouble(vector[rowNum])) { + selection[numRows++] = rowNum; + } + } + + match.setSelectionSize(numRows); + assert match.isValid(mask); + return match; + } + }; + } +} diff --git a/processing/src/main/java/org/apache/druid/query/filter/vector/FloatVectorValueMatcher.java b/processing/src/main/java/org/apache/druid/query/filter/vector/FloatVectorValueMatcher.java new file mode 100644 index 000000000000..4ea33aecb5f0 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/filter/vector/FloatVectorValueMatcher.java @@ -0,0 +1,105 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.filter.vector; + +import org.apache.druid.query.filter.DruidFloatPredicate; +import org.apache.druid.query.filter.DruidPredicateFactory; +import org.apache.druid.segment.DimensionHandlerUtils; +import org.apache.druid.segment.vector.VectorValueSelector; + +import javax.annotation.Nullable; + +public class FloatVectorValueMatcher implements VectorValueMatcherFactory +{ + private final VectorValueSelector selector; + + public FloatVectorValueMatcher(final VectorValueSelector selector) + { + this.selector = selector; + } + + @Override + public VectorValueMatcher makeMatcher(@Nullable final String value) + { + final Float matchVal = DimensionHandlerUtils.convertObjectToFloat(value); + + if (matchVal == null) { + return BooleanVectorValueMatcher.of(selector, false); + } + + final float matchValFloat = matchVal; + + return new BaseVectorValueMatcher(selector) + { + final VectorMatch match = VectorMatch.wrap(new int[selector.getMaxVectorSize()]); + + @Override + public ReadableVectorMatch match(final ReadableVectorMatch mask) + { + final float[] vector = selector.getFloatVector(); + final int[] selection = match.getSelection(); + + int numRows = 0; + + for (int i = 0; i < mask.getSelectionSize(); i++) { + final int rowNum = mask.getSelection()[i]; + if (vector[rowNum] == matchValFloat) { + selection[numRows++] = rowNum; + } + } + + match.setSelectionSize(numRows); + assert match.isValid(mask); + return match; + } + }; + } + + @Override + public VectorValueMatcher makeMatcher(final DruidPredicateFactory predicateFactory) + { + final DruidFloatPredicate predicate = predicateFactory.makeFloatPredicate(); + + return new BaseVectorValueMatcher(selector) + { + final VectorMatch match = VectorMatch.wrap(new int[selector.getMaxVectorSize()]); + + @Override + public ReadableVectorMatch match(final ReadableVectorMatch mask) + { + final float[] vector = selector.getFloatVector(); + final int[] selection = match.getSelection(); + + int numRows = 0; + + for (int i = 0; i < mask.getSelectionSize(); i++) { + final int rowNum = mask.getSelection()[i]; + if (predicate.applyFloat(vector[rowNum])) { + selection[numRows++] = rowNum; + } + } + + match.setSelectionSize(numRows); + assert match.isValid(mask); + return match; + } + }; + } +} diff --git a/processing/src/main/java/org/apache/druid/query/filter/vector/LongVectorValueMatcher.java b/processing/src/main/java/org/apache/druid/query/filter/vector/LongVectorValueMatcher.java new file mode 100644 index 000000000000..a07f9ba32f0e --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/filter/vector/LongVectorValueMatcher.java @@ -0,0 +1,105 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.filter.vector; + +import org.apache.druid.query.filter.DruidLongPredicate; +import org.apache.druid.query.filter.DruidPredicateFactory; +import org.apache.druid.segment.DimensionHandlerUtils; +import org.apache.druid.segment.vector.VectorValueSelector; + +import javax.annotation.Nullable; + +public class LongVectorValueMatcher implements VectorValueMatcherFactory +{ + private final VectorValueSelector selector; + + public LongVectorValueMatcher(final VectorValueSelector selector) + { + this.selector = selector; + } + + @Override + public VectorValueMatcher makeMatcher(@Nullable final String value) + { + final Long matchVal = DimensionHandlerUtils.convertObjectToLong(value); + + if (matchVal == null) { + return BooleanVectorValueMatcher.of(selector, false); + } + + final long matchValLong = matchVal; + + return new BaseVectorValueMatcher(selector) + { + final VectorMatch match = VectorMatch.wrap(new int[selector.getMaxVectorSize()]); + + @Override + public ReadableVectorMatch match(final ReadableVectorMatch mask) + { + final long[] vector = selector.getLongVector(); + final int[] selection = match.getSelection(); + + int numRows = 0; + + for (int i = 0; i < mask.getSelectionSize(); i++) { + final int rowNum = mask.getSelection()[i]; + if (vector[rowNum] == matchValLong) { + selection[numRows++] = rowNum; + } + } + + match.setSelectionSize(numRows); + assert match.isValid(mask); + return match; + } + }; + } + + @Override + public VectorValueMatcher makeMatcher(final DruidPredicateFactory predicateFactory) + { + final DruidLongPredicate predicate = predicateFactory.makeLongPredicate(); + + return new BaseVectorValueMatcher(selector) + { + final VectorMatch match = VectorMatch.wrap(new int[selector.getMaxVectorSize()]); + + @Override + public ReadableVectorMatch match(final ReadableVectorMatch mask) + { + final long[] vector = selector.getLongVector(); + final int[] selection = match.getSelection(); + + int numRows = 0; + + for (int i = 0; i < mask.getSelectionSize(); i++) { + final int rowNum = mask.getSelection()[i]; + if (predicate.applyLong(vector[rowNum])) { + selection[numRows++] = rowNum; + } + } + + match.setSelectionSize(numRows); + assert match.isValid(mask); + return match; + } + }; + } +} diff --git a/processing/src/main/java/org/apache/druid/query/filter/vector/MultiValueStringVectorValueMatcher.java b/processing/src/main/java/org/apache/druid/query/filter/vector/MultiValueStringVectorValueMatcher.java new file mode 100644 index 000000000000..d192673784dc --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/filter/vector/MultiValueStringVectorValueMatcher.java @@ -0,0 +1,208 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.filter.vector; + +import com.google.common.base.Predicate; +import org.apache.druid.common.config.NullHandling; +import org.apache.druid.query.filter.DruidPredicateFactory; +import org.apache.druid.segment.IdLookup; +import org.apache.druid.segment.data.IndexedInts; +import org.apache.druid.segment.vector.MultiValueDimensionVectorSelector; + +import javax.annotation.Nullable; +import java.util.BitSet; +import java.util.Objects; + +public class MultiValueStringVectorValueMatcher implements VectorValueMatcherFactory +{ + private final MultiValueDimensionVectorSelector selector; + + public MultiValueStringVectorValueMatcher(final MultiValueDimensionVectorSelector selector) + { + this.selector = selector; + } + + @Override + public VectorValueMatcher makeMatcher(@Nullable final String value) + { + final String etnValue = NullHandling.emptyToNullIfNeeded(NullHandling.emptyToNullIfNeeded(value)); + final IdLookup idLookup = selector.idLookup(); + final int id; + + if (idLookup != null) { + // Optimization when names can be looked up to IDs ahead of time. + id = idLookup.lookupId(etnValue); + + if (id < 0) { + // Value doesn't exist in this column. + return BooleanVectorValueMatcher.of(selector, false); + } + + // Check for "id". + return new BaseVectorValueMatcher(selector) + { + final VectorMatch match = VectorMatch.wrap(new int[selector.getMaxVectorSize()]); + + @Override + public ReadableVectorMatch match(final ReadableVectorMatch mask) + { + final IndexedInts[] vector = selector.getRowVector(); + final int[] selection = match.getSelection(); + + int numRows = 0; + + for (int i = 0; i < mask.getSelectionSize(); i++) { + final int rowNum = mask.getSelection()[i]; + final IndexedInts ints = vector[rowNum]; + final int n = ints.size(); + + if (n == 0) { + // null should match empty rows in multi-value columns + if (etnValue == null) { + selection[numRows++] = rowNum; + } + } else { + for (int j = 0; j < n; j++) { + if (ints.get(j) == id) { + selection[numRows++] = rowNum; + break; + } + } + } + } + + match.setSelectionSize(numRows); + assert match.isValid(mask); + return match; + } + }; + } else { + return makeMatcher(s -> Objects.equals(s, etnValue)); + } + } + + @Override + public VectorValueMatcher makeMatcher(final DruidPredicateFactory predicateFactory) + { + return makeMatcher(predicateFactory.makeStringPredicate()); + } + + private VectorValueMatcher makeMatcher(final Predicate predicate) + { + final boolean matchNull = predicate.apply(null); + + if (selector.getValueCardinality() > 0) { + final BitSet checkedIds = new BitSet(selector.getValueCardinality()); + final BitSet matchingIds = new BitSet(selector.getValueCardinality()); + + // Lazy matcher; only check an id if matches() is called. + return new BaseVectorValueMatcher(selector) + { + private final VectorMatch match = VectorMatch.wrap(new int[selector.getMaxVectorSize()]); + + @Override + public ReadableVectorMatch match(final ReadableVectorMatch mask) + { + final IndexedInts[] vector = selector.getRowVector(); + final int[] selection = match.getSelection(); + + int numRows = 0; + + for (int i = 0; i < mask.getSelectionSize(); i++) { + final int rowNum = mask.getSelection()[i]; + final IndexedInts ints = vector[rowNum]; + final int n = ints.size(); + + if (n == 0) { + // null should match empty rows in multi-value columns + if (matchNull) { + selection[numRows++] = rowNum; + } + } else { + for (int j = 0; j < n; j++) { + final int id = ints.get(j); + final boolean matches; + + if (checkedIds.get(id)) { + matches = matchingIds.get(id); + } else { + matches = predicate.apply(selector.lookupName(id)); + checkedIds.set(id); + if (matches) { + matchingIds.set(id); + } + } + + if (matches) { + selection[numRows++] = rowNum; + break; + } + } + } + } + + match.setSelectionSize(numRows); + assert match.isValid(mask); + return match; + } + }; + } else { + // Evaluate "lookupName" and "predicate" on every row. + return new BaseVectorValueMatcher(selector) + { + final VectorMatch match = VectorMatch.wrap(new int[selector.getMaxVectorSize()]); + + @Override + public ReadableVectorMatch match(final ReadableVectorMatch mask) + { + final IndexedInts[] vector = selector.getRowVector(); + final int[] selection = match.getSelection(); + + int numRows = 0; + + for (int i = 0; i < mask.getSelectionSize(); i++) { + final int rowNum = mask.getSelection()[i]; + final IndexedInts ints = vector[rowNum]; + final int n = ints.size(); + + if (n == 0) { + // null should match empty rows in multi-value columns + if (matchNull) { + selection[numRows++] = rowNum; + } + } else { + for (int j = 0; j < n; j++) { + final int id = ints.get(j); + if (predicate.apply(selector.lookupName(id))) { + selection[numRows++] = rowNum; + break; + } + } + } + } + + match.setSelectionSize(numRows); + assert match.isValid(mask); + return match; + } + }; + } + } +} diff --git a/processing/src/main/java/org/apache/druid/query/filter/vector/ReadableVectorMatch.java b/processing/src/main/java/org/apache/druid/query/filter/vector/ReadableVectorMatch.java new file mode 100644 index 000000000000..42e185146f40 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/filter/vector/ReadableVectorMatch.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.filter.vector; + +import javax.annotation.Nullable; + +/** + * The result of calling {@link VectorValueMatcher#match}. + * + * @see VectorMatch, the implementation, which also adds some extra mutation methods. + */ +public interface ReadableVectorMatch +{ + /** + * Returns an array of indexes into the current batch. Only the first "getSelectionSize" are valid. + * + * Even though this array is technically mutable, it is very poor form to mutate it if you are not the owner of the + * VectorMatch object. + */ + int[] getSelection(); + + /** + * Returns the number of valid values in the array from "getSelection". + */ + int getSelectionSize(); + + /** + * Checks if this match has accepted every row in the vector. + * + * @param vectorSize the current vector size; must be passed in since VectorMatch objects do not "know" the size + * of the vector they came from. + */ + boolean isAllTrue(int vectorSize); + + /** + * Checks if this match has accepted *nothing*. + */ + boolean isAllFalse(); + + /** + * Checks if this match is valid (increasing row numbers, no out-of-range row numbers). Can additionally verify + * that the match is a subset of a provided "mask". + * + * Used by assertions and tests. + * + * @param mask if provided, checks if this match is a subset of the mask. + */ + boolean isValid(@Nullable ReadableVectorMatch mask); +} diff --git a/processing/src/main/java/org/apache/druid/query/filter/vector/SingleValueStringVectorValueMatcher.java b/processing/src/main/java/org/apache/druid/query/filter/vector/SingleValueStringVectorValueMatcher.java new file mode 100644 index 000000000000..6ed7c16c3688 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/filter/vector/SingleValueStringVectorValueMatcher.java @@ -0,0 +1,192 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.filter.vector; + +import com.google.common.base.Predicate; +import org.apache.druid.common.config.NullHandling; +import org.apache.druid.query.filter.DruidPredicateFactory; +import org.apache.druid.query.filter.StringValueMatcherColumnSelectorStrategy; +import org.apache.druid.segment.IdLookup; +import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector; + +import javax.annotation.Nullable; +import java.util.BitSet; +import java.util.Objects; + +public class SingleValueStringVectorValueMatcher implements VectorValueMatcherFactory +{ + private final SingleValueDimensionVectorSelector selector; + + public SingleValueStringVectorValueMatcher(final SingleValueDimensionVectorSelector selector) + { + this.selector = selector; + } + + @Nullable + private static BooleanVectorValueMatcher toBooleanMatcherIfPossible( + final SingleValueDimensionVectorSelector selector, + final Predicate predicate + ) + { + final Boolean booleanValue = StringValueMatcherColumnSelectorStrategy.toBooleanIfPossible( + selector, + false, + predicate + ); + + return booleanValue == null ? null : BooleanVectorValueMatcher.of(selector, booleanValue); + } + + @Override + public VectorValueMatcher makeMatcher(@Nullable final String value) + { + final String etnValue = NullHandling.emptyToNullIfNeeded(value); + + final VectorValueMatcher booleanMatcher = toBooleanMatcherIfPossible(selector, s -> Objects.equals(s, etnValue)); + if (booleanMatcher != null) { + return booleanMatcher; + } + + final IdLookup idLookup = selector.idLookup(); + final int id; + + if (idLookup != null) { + // Optimization when names can be looked up to IDs ahead of time. + id = idLookup.lookupId(etnValue); + + if (id < 0) { + // Value doesn't exist in this column. + return BooleanVectorValueMatcher.of(selector, false); + } + + // Check for "id". + return new BaseVectorValueMatcher(selector) + { + final VectorMatch match = VectorMatch.wrap(new int[selector.getMaxVectorSize()]); + + @Override + public ReadableVectorMatch match(final ReadableVectorMatch mask) + { + final int[] vector = selector.getRowVector(); + final int[] selection = match.getSelection(); + + int numRows = 0; + + for (int i = 0; i < mask.getSelectionSize(); i++) { + final int rowNum = mask.getSelection()[i]; + if (vector[rowNum] == id) { + selection[numRows++] = rowNum; + } + } + + match.setSelectionSize(numRows); + assert match.isValid(mask); + return match; + } + }; + } else { + return makeMatcher(s -> Objects.equals(s, etnValue)); + } + } + + @Override + public VectorValueMatcher makeMatcher(final DruidPredicateFactory predicateFactory) + { + return makeMatcher(predicateFactory.makeStringPredicate()); + } + + private VectorValueMatcher makeMatcher(final Predicate predicate) + { + final VectorValueMatcher booleanMatcher = toBooleanMatcherIfPossible(selector, predicate); + if (booleanMatcher != null) { + return booleanMatcher; + } + + if (selector.getValueCardinality() > 0) { + final BitSet checkedIds = new BitSet(selector.getValueCardinality()); + final BitSet matchingIds = new BitSet(selector.getValueCardinality()); + + // Lazy matcher; only check an id if matches() is called. + return new BaseVectorValueMatcher(selector) + { + private final VectorMatch match = VectorMatch.wrap(new int[selector.getMaxVectorSize()]); + + @Override + public ReadableVectorMatch match(final ReadableVectorMatch mask) + { + final int[] vector = selector.getRowVector(); + final int[] selection = match.getSelection(); + + int numRows = 0; + + for (int i = 0; i < mask.getSelectionSize(); i++) { + final int rowNum = mask.getSelection()[i]; + final int id = vector[rowNum]; + final boolean matches; + + if (checkedIds.get(id)) { + matches = matchingIds.get(id); + } else { + matches = predicate.apply(selector.lookupName(id)); + checkedIds.set(id); + if (matches) { + matchingIds.set(id); + } + } + + if (matches) { + selection[numRows++] = rowNum; + } + } + + match.setSelectionSize(numRows); + assert match.isValid(mask); + return match; + } + }; + } else { + // Evaluate "lookupName" and "predicate" on every row. + return new BaseVectorValueMatcher(selector) + { + final VectorMatch match = VectorMatch.wrap(new int[selector.getMaxVectorSize()]); + + @Override + public ReadableVectorMatch match(final ReadableVectorMatch mask) + { + final int[] vector = selector.getRowVector(); + final int[] selection = match.getSelection(); + + int numRows = 0; + + for (int i = 0; i < mask.getSelectionSize(); i++) { + final int rowNum = mask.getSelection()[i]; + if (predicate.apply(selector.lookupName(vector[rowNum]))) { + selection[numRows++] = rowNum; + } + } + + match.setSelectionSize(numRows); + assert match.isValid(mask); + return match; + } + }; + } + } +} diff --git a/processing/src/main/java/org/apache/druid/query/filter/vector/VectorMatch.java b/processing/src/main/java/org/apache/druid/query/filter/vector/VectorMatch.java new file mode 100644 index 000000000000..54a694766542 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/filter/vector/VectorMatch.java @@ -0,0 +1,267 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.filter.vector; + +import com.google.common.base.Preconditions; +import org.apache.druid.segment.QueryableIndexStorageAdapter; + +import javax.annotation.Nullable; + +/** + * Implementation class for ReadableVectorMatch. + * + * Also adds some useful methods, like "addAll", "removeAll", and "copyFrom". + */ +public class VectorMatch implements ReadableVectorMatch +{ + private static final int[] DEFAULT_ALL_TRUE_VECTOR = new int[QueryableIndexStorageAdapter.DEFAULT_VECTOR_SIZE]; + + private static final VectorMatch ALL_FALSE = new VectorMatch(new int[0], 0); + + static { + for (int i = 0; i < DEFAULT_ALL_TRUE_VECTOR.length; i++) { + DEFAULT_ALL_TRUE_VECTOR[i] = i; + } + } + + private final int[] selection; + private int selectionSize; + + private VectorMatch(final int[] selection, final int selectionSize) + { + this.selection = selection; + this.selectionSize = selectionSize; + } + + /** + * Creates a match that matches everything up to "numRows". This will often be the current vector size, but + * does not necessarily have to be. + */ + public static ReadableVectorMatch allTrue(final int numRows) + { + if (numRows <= DEFAULT_ALL_TRUE_VECTOR.length) { + return new VectorMatch(DEFAULT_ALL_TRUE_VECTOR, numRows); + } else { + final int[] selection = new int[numRows]; + + for (int i = 0; i < numRows; i++) { + selection[i] = i; + } + + return new VectorMatch(selection, numRows); + } + } + + /** + * Creates a match that matches nothing. + */ + public static ReadableVectorMatch allFalse() + { + return ALL_FALSE; + } + + /** + * Creates a new match object with selectionSize = 0, and the provided array as a backing array. + */ + public static VectorMatch wrap(final int[] selection) + { + return new VectorMatch(selection, 0); + } + + @Override + public boolean isAllTrue(final int vectorSize) + { + return selectionSize == vectorSize; + } + + @Override + public boolean isAllFalse() + { + return selectionSize == 0; + } + + @Override + public boolean isValid(@Nullable final ReadableVectorMatch mask) + { + if (mask != null && !mask.isValid(null)) { + // Invalid mask. + return false; + } + + // row numbers must be increasing. + int rowNum = -1; + for (int i = 0; i < selectionSize; i++) { + if (selection[i] > rowNum) { + rowNum = selection[i]; + } else { + return false; + } + } + + // row number cannot be larger than the max length of the selection vector. + if (rowNum > selection.length) { + return false; + } + + // row numbers must all be present in the mask, if it exists. + if (mask != null) { + final int[] maskArray = mask.getSelection(); + for (int i = 0, j = 0; i < selectionSize; i++) { + while (j < mask.getSelectionSize() && selection[i] > maskArray[j]) { + j++; + } + + if (j >= mask.getSelectionSize() || selection[i] != maskArray[j]) { + return false; + } + } + } + + return true; + } + + /** + * Removes all rows from this object that occur in "other", in place, and returns a reference to this object. Does + * not modify "other". + */ + public VectorMatch removeAll(final ReadableVectorMatch other) + { + //noinspection ObjectEquality + Preconditions.checkState(this != other, "'other' must be a different instance from 'this'"); + + int i = 0; // reading position in this.selection + int j = 0; // writing position in this.selection + int p = 0; // position in otherSelection + final int[] otherSelection = other.getSelection(); + for (; i < selectionSize; i++) { + while (p < other.getSelectionSize() && otherSelection[p] < selection[i]) { + // Other value < selection[i], keep reading in other so we can see if selection[i] should be preserved or not. + p++; + } + + if (!(p < other.getSelectionSize() && otherSelection[p] == selection[i])) { + // Preserve selection[i]. + selection[j++] = selection[i]; + } + } + selectionSize = j; + assert isValid(null); + return this; + } + + /** + * Adds all rows from "other" to this object, using "scratch" as scratch space if needed. Does not modify "other". + * Returns a reference to this object. + */ + public VectorMatch addAll(final ReadableVectorMatch other, final VectorMatch scratch) + { + //noinspection ObjectEquality + Preconditions.checkState(this != scratch, "'scratch' must be a different instance from 'this'"); + //noinspection ObjectEquality + Preconditions.checkState(other != scratch, "'scratch' must be a different instance from 'other'"); + + final int[] scratchSelection = scratch.getSelection(); + final int[] otherSelection = other.getSelection(); + + int i = 0; // this.selection pointer + int j = 0; // otherSelection pointer + int k = 0; // scratchSelection pointer + + for (; i < selectionSize; i++) { + while (j < other.getSelectionSize() && otherSelection[j] < selection[i]) { + scratchSelection[k++] = otherSelection[j++]; + } + + scratchSelection[k++] = selection[i]; + + if (j < other.getSelectionSize() && otherSelection[j] == selection[i]) { + j++; + } + } + + while (j < other.getSelectionSize()) { + scratchSelection[k++] = otherSelection[j++]; + } + + scratch.setSelectionSize(k); + copyFrom(scratch); + assert isValid(null); + return this; + } + + /** + * Copies "other" into this object, and returns a reference to this object. Does not modify "other". + */ + public VectorMatch copyFrom(final ReadableVectorMatch other) + { + Preconditions.checkState( + selection.length >= other.getSelectionSize(), + "Capacity[%s] cannot fit other match's selectionSize[%s]", + selection.length, + other.getSelectionSize() + ); + System.arraycopy(other.getSelection(), 0, selection, 0, other.getSelectionSize()); + selectionSize = other.getSelectionSize(); + assert isValid(null); + return this; + } + + @Override + public int[] getSelection() + { + return selection; + } + + @Override + public int getSelectionSize() + { + return selectionSize; + } + + /** + * Sets the valid selectionSize, and returns a reference to this object. + */ + public VectorMatch setSelectionSize(final int newSelectionSize) + { + Preconditions.checkArgument( + newSelectionSize <= selection.length, + "Oops! Cannot setSelectionSize[%s] > selection.length[%s].", + newSelectionSize, + selection.length + ); + this.selectionSize = newSelectionSize; + assert isValid(null); + return this; + } + + @Override + public String toString() + { + final StringBuilder retVal = new StringBuilder("["); + for (int i = 0; i < selectionSize; i++) { + if (i > 0) { + retVal.append(", "); + } + retVal.append(selection[i]); + } + retVal.append("]"); + return retVal.toString(); + } +} diff --git a/processing/src/main/java/org/apache/druid/query/filter/vector/VectorValueMatcher.java b/processing/src/main/java/org/apache/druid/query/filter/vector/VectorValueMatcher.java new file mode 100644 index 000000000000..f274ca3dfae4 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/filter/vector/VectorValueMatcher.java @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.filter.vector; + +import org.apache.druid.segment.vector.VectorSizeInspector; + +/** + * A vectorized value matcher. + * + * @see org.apache.druid.query.filter.ValueMatcher, the non-vectorized version + */ +public interface VectorValueMatcher extends VectorSizeInspector +{ + /** + * Examine the current vector and return a match indicating what is accepted. + * + * @param mask must not be null; use {@link VectorMatch#allTrue} if you don't need a mask. + * + * @return the subset of "mask" that this value matcher accepts + */ + ReadableVectorMatch match(ReadableVectorMatch mask); +} diff --git a/processing/src/main/java/org/apache/druid/query/filter/vector/VectorValueMatcherColumnStrategizer.java b/processing/src/main/java/org/apache/druid/query/filter/vector/VectorValueMatcherColumnStrategizer.java new file mode 100644 index 000000000000..7b970e20ce41 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/filter/vector/VectorValueMatcherColumnStrategizer.java @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.filter.vector; + +import org.apache.druid.query.dimension.VectorColumnStrategizer; +import org.apache.druid.segment.vector.MultiValueDimensionVectorSelector; +import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector; +import org.apache.druid.segment.vector.VectorValueSelector; + +public class VectorValueMatcherColumnStrategizer implements VectorColumnStrategizer +{ + private static final VectorValueMatcherColumnStrategizer INSTANCE = new VectorValueMatcherColumnStrategizer(); + + private VectorValueMatcherColumnStrategizer() + { + // Singleton. + } + + public static VectorValueMatcherColumnStrategizer instance() + { + return INSTANCE; + } + + @Override + public VectorValueMatcherFactory makeSingleValueDimensionStrategy( + final SingleValueDimensionVectorSelector selector + ) + { + return new SingleValueStringVectorValueMatcher(selector); + } + + @Override + public VectorValueMatcherFactory makeMultiValueDimensionStrategy( + final MultiValueDimensionVectorSelector selector + ) + { + return new MultiValueStringVectorValueMatcher(selector); + } + + @Override + public VectorValueMatcherFactory makeFloatStrategy(final VectorValueSelector selector) + { + return new FloatVectorValueMatcher(selector); + } + + @Override + public VectorValueMatcherFactory makeDoubleStrategy(final VectorValueSelector selector) + { + return new DoubleVectorValueMatcher(selector); + } + + @Override + public VectorValueMatcherFactory makeLongStrategy(final VectorValueSelector selector) + { + return new LongVectorValueMatcher(selector); + } +} diff --git a/processing/src/main/java/org/apache/druid/query/filter/vector/VectorValueMatcherFactory.java b/processing/src/main/java/org/apache/druid/query/filter/vector/VectorValueMatcherFactory.java new file mode 100644 index 000000000000..a7971eb74303 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/filter/vector/VectorValueMatcherFactory.java @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.filter.vector; + +import org.apache.druid.query.filter.DruidPredicateFactory; + +import javax.annotation.Nullable; + +public interface VectorValueMatcherFactory +{ + VectorValueMatcher makeMatcher(@Nullable String value); + + VectorValueMatcher makeMatcher(DruidPredicateFactory predicateFactory); +} diff --git a/processing/src/main/java/org/apache/druid/query/groupby/GroupByQuery.java b/processing/src/main/java/org/apache/druid/query/groupby/GroupByQuery.java index 8ab4601b09a7..29ec5ec332cc 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/GroupByQuery.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/GroupByQuery.java @@ -1077,6 +1077,7 @@ public String toString() ", aggregatorSpecs=" + aggregatorSpecs + ", postAggregatorSpecs=" + postAggregatorSpecs + ", havingSpec=" + havingSpec + + ", context=" + getContext() + '}'; } diff --git a/processing/src/main/java/org/apache/druid/query/groupby/GroupByQueryConfig.java b/processing/src/main/java/org/apache/druid/query/groupby/GroupByQueryConfig.java index 2ec00256095a..13107fba68e3 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/GroupByQueryConfig.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/GroupByQueryConfig.java @@ -42,6 +42,7 @@ public class GroupByQueryConfig private static final String CTX_KEY_FORCE_HASH_AGGREGATION = "forceHashAggregation"; private static final String CTX_KEY_INTERMEDIATE_COMBINE_DEGREE = "intermediateCombineDegree"; private static final String CTX_KEY_NUM_PARALLEL_COMBINE_THREADS = "numParallelCombineThreads"; + private static final String CTX_KEY_VECTORIZE = "vectorize"; @JsonProperty private String defaultStrategy = GroupByStrategySelector.STRATEGY_V2; @@ -88,6 +89,9 @@ public class GroupByQueryConfig @JsonProperty private int numParallelCombineThreads = 1; + @JsonProperty + private boolean vectorize = false; + public String getDefaultStrategy() { return defaultStrategy; @@ -168,6 +172,11 @@ public int getNumParallelCombineThreads() return numParallelCombineThreads; } + public boolean isVectorize() + { + return vectorize; + } + public boolean isForcePushDownNestedQuery() { return forcePushDownNestedQuery; @@ -203,7 +212,10 @@ public GroupByQueryConfig withOverrides(final GroupByQuery query) getMaxOnDiskStorage() ); newConfig.maxMergingDictionarySize = Math.min( - ((Number) query.getContextValue(CTX_KEY_MAX_MERGING_DICTIONARY_SIZE, getMaxMergingDictionarySize())).longValue(), + ((Number) query.getContextValue( + CTX_KEY_MAX_MERGING_DICTIONARY_SIZE, + getMaxMergingDictionarySize() + )).longValue(), getMaxMergingDictionarySize() ); newConfig.forcePushDownLimit = query.getContextBoolean(CTX_KEY_FORCE_LIMIT_PUSH_DOWN, isForcePushDownLimit()); @@ -217,6 +229,7 @@ public GroupByQueryConfig withOverrides(final GroupByQuery query) CTX_KEY_NUM_PARALLEL_COMBINE_THREADS, getNumParallelCombineThreads() ); + newConfig.vectorize = query.getContextBoolean(CTX_KEY_VECTORIZE, isVectorize()); return newConfig; } @@ -237,6 +250,7 @@ public String toString() ", forceHashAggregation=" + forceHashAggregation + ", intermediateCombineDegree=" + intermediateCombineDegree + ", numParallelCombineThreads=" + numParallelCombineThreads + + ", vectorize=" + vectorize + ", forcePushDownNestedQuery=" + forcePushDownNestedQuery + '}'; } diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/AbstractBufferHashGrouper.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/AbstractBufferHashGrouper.java index 8ca48ebb949a..aae2e4b3a3c3 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/AbstractBufferHashGrouper.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/AbstractBufferHashGrouper.java @@ -22,8 +22,7 @@ import com.google.common.base.Supplier; import org.apache.druid.java.util.common.IAE; import org.apache.druid.java.util.common.logger.Logger; -import org.apache.druid.query.aggregation.AggregatorFactory; -import org.apache.druid.query.aggregation.BufferAggregator; +import org.apache.druid.query.aggregation.AggregatorAdapters; import java.nio.ByteBuffer; @@ -35,8 +34,8 @@ public abstract class AbstractBufferHashGrouper implements Grouper bufferSupplier; protected final KeySerde keySerde; protected final int keySize; - protected final BufferAggregator[] aggregators; - protected final int[] aggregatorOffsets; + protected final AggregatorAdapters aggregators; + protected final int baseAggregatorOffset; protected final int bufferGrouperMaxSize; // Integer.MAX_VALUE in production, only used for unit tests // The load factor and bucket configurations are not final, to allow subclasses to set their own values @@ -53,15 +52,16 @@ public AbstractBufferHashGrouper( // the buffer returned from the below supplier can have dirty bits and should be cleared during initialization final Supplier bufferSupplier, final KeySerde keySerde, - final AggregatorFactory[] aggregatorFactories, + final AggregatorAdapters aggregators, + final int baseAggregatorOffset, final int bufferGrouperMaxSize ) { this.bufferSupplier = bufferSupplier; this.keySerde = keySerde; this.keySize = keySerde.keySize(); - this.aggregators = new BufferAggregator[aggregatorFactories.length]; - this.aggregatorOffsets = new int[aggregatorFactories.length]; + this.aggregators = aggregators; + this.baseAggregatorOffset = baseAggregatorOffset; this.bufferGrouperMaxSize = bufferGrouperMaxSize; } @@ -77,8 +77,9 @@ public AbstractBufferHashGrouper( * Called to check if it's possible to skip aggregation for a row. * * @param bucketWasUsed Was the row a new entry in the hash table? - * @param bucketOffset Offset of the bucket containing this row's entry in the hash table, - * within the buffer returned by hashTable.getTableBuffer() + * @param bucketOffset Offset of the bucket containing this row's entry in the hash table, + * within the buffer returned by hashTable.getTableBuffer() + * * @return true if aggregation can be skipped, false otherwise. */ public abstract boolean canSkipAggregate(boolean bucketWasUsed, int bucketOffset); @@ -123,7 +124,7 @@ public AggregateResult aggregate(KeyType key, int keyHash) if (keyBuffer == null) { // This may just trigger a spill and get ignored, which is ok. If it bubbles up to the user, the message will // be correct. - return Groupers.DICTIONARY_FULL; + return Groupers.dictionaryFull(0); } if (keyBuffer.remaining() != keySize) { @@ -135,11 +136,11 @@ public AggregateResult aggregate(KeyType key, int keyHash) } // find and try to expand if table is full and find again - int bucket = hashTable.findBucketWithAutoGrowth(keyBuffer, keyHash); + int bucket = hashTable.findBucketWithAutoGrowth(keyBuffer, keyHash, () -> {}); if (bucket < 0) { // This may just trigger a spill and get ignored, which is ok. If it bubbles up to the user, the message will // be correct. - return Groupers.HASH_TABLE_FULL; + return Groupers.hashTableFull(0); } final int bucketStartOffset = hashTable.getOffsetForBucket(bucket); @@ -149,10 +150,7 @@ public AggregateResult aggregate(KeyType key, int keyHash) // Set up key and initialize the aggs if this is a new bucket. if (!bucketWasUsed) { hashTable.initializeNewBucketKey(bucket, keyBuffer, keyHash); - for (int i = 0; i < aggregators.length; i++) { - aggregators[i].init(tableBuffer, bucketStartOffset + aggregatorOffsets[i]); - } - + aggregators.init(tableBuffer, bucketStartOffset + baseAggregatorOffset); newBucketHook(bucketStartOffset); } @@ -161,9 +159,7 @@ public AggregateResult aggregate(KeyType key, int keyHash) } // Aggregate the current row. - for (int i = 0; i < aggregators.length; i++) { - aggregators[i].aggregate(tableBuffer, bucketStartOffset + aggregatorOffsets[i]); - } + aggregators.aggregateBuffered(tableBuffer, bucketStartOffset + baseAggregatorOffset); afterAggregateHook(bucketStartOffset); @@ -173,23 +169,16 @@ public AggregateResult aggregate(KeyType key, int keyHash) @Override public void close() { - for (BufferAggregator aggregator : aggregators) { - try { - aggregator.close(); - } - catch (Exception e) { - log.warn(e, "Could not close aggregator [%s], skipping.", aggregator); - } - } + aggregators.close(); } protected Entry bucketEntryForOffset(final int bucketOffset) { final ByteBuffer tableBuffer = hashTable.getTableBuffer(); final KeyType key = keySerde.fromByteBuffer(tableBuffer, bucketOffset + HASH_SIZE); - final Object[] values = new Object[aggregators.length]; - for (int i = 0; i < aggregators.length; i++) { - values[i] = aggregators[i].get(tableBuffer, bucketOffset + aggregatorOffsets[i]); + final Object[] values = new Object[aggregators.size()]; + for (int i = 0; i < aggregators.size(); i++) { + values[i] = aggregators.get(tableBuffer, bucketOffset + baseAggregatorOffset, i); } return new Entry<>(key, values); diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/AggregateResult.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/AggregateResult.java index f1bd4931bda1..7ddd5528af9a 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/AggregateResult.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/AggregateResult.java @@ -19,13 +19,19 @@ package org.apache.druid.query.groupby.epinephelinae; +import com.google.common.base.Preconditions; +import org.apache.druid.java.util.common.ISE; + +import javax.annotation.Nullable; import java.util.Objects; public class AggregateResult { - private static final AggregateResult OK = new AggregateResult(true, null); + private static final AggregateResult OK = new AggregateResult(0, null); + + private final int count; - private final boolean ok; + @Nullable private final String reason; public static AggregateResult ok() @@ -33,29 +39,47 @@ public static AggregateResult ok() return OK; } - public static AggregateResult failure(final String reason) + public static AggregateResult partial(final int count, final String reason) { - return new AggregateResult(false, reason); + return new AggregateResult(count, Preconditions.checkNotNull(reason, "reason")); } - private AggregateResult(final boolean ok, final String reason) + private AggregateResult(final int count, @Nullable final String reason) { - this.ok = ok; + Preconditions.checkArgument(count >= 0, "count >= 0"); + this.count = count; this.reason = reason; } + /** + * True if all rows have been processed. + */ public boolean isOk() { - return ok; + return reason == null; } + public int getCount() + { + if (isOk()) { + throw new ISE("Cannot call getCount when isOk = true"); + } + + return count; + } + + @Nullable public String getReason() { + if (isOk()) { + throw new ISE("Cannot call getReason when isOk = true"); + } + return reason; } @Override - public boolean equals(final Object o) + public boolean equals(Object o) { if (this == o) { return true; @@ -63,22 +87,22 @@ public boolean equals(final Object o) if (o == null || getClass() != o.getClass()) { return false; } - final AggregateResult that = (AggregateResult) o; - return ok == that.ok && + AggregateResult that = (AggregateResult) o; + return count == that.count && Objects.equals(reason, that.reason); } @Override public int hashCode() { - return Objects.hash(ok, reason); + return Objects.hash(count, reason); } @Override public String toString() { return "AggregateResult{" + - "ok=" + ok + + "count=" + count + ", reason='" + reason + '\'' + '}'; } diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/BufferArrayGrouper.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/BufferArrayGrouper.java index 4bc541f4a3c1..2bd14cff6beb 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/BufferArrayGrouper.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/BufferArrayGrouper.java @@ -21,14 +21,14 @@ import com.google.common.base.Preconditions; import com.google.common.base.Supplier; +import org.apache.druid.java.util.common.IAE; import org.apache.druid.java.util.common.ISE; -import org.apache.druid.java.util.common.logger.Logger; import org.apache.druid.java.util.common.parsers.CloseableIterator; +import org.apache.druid.query.aggregation.AggregatorAdapters; import org.apache.druid.query.aggregation.AggregatorFactory; -import org.apache.druid.query.aggregation.BufferAggregator; import org.apache.druid.query.groupby.epinephelinae.column.GroupByColumnSelectorStrategy; -import org.apache.druid.segment.ColumnSelectorFactory; +import java.io.IOException; import java.nio.ByteBuffer; import java.util.Arrays; import java.util.NoSuchElementException; @@ -47,13 +47,10 @@ * different segments cannot be currently retrieved, this grouper can be used only when performing per-segment query * execution. */ -public class BufferArrayGrouper implements IntGrouper +public class BufferArrayGrouper implements VectorGrouper, IntGrouper { - private static final Logger LOG = new Logger(BufferArrayGrouper.class); - private final Supplier bufferSupplier; - private final BufferAggregator[] aggregators; - private final int[] aggregatorOffsets; + private final AggregatorAdapters aggregators; private final int cardinalityWithMissingValue; private final int recordSize; // size of all aggregated values @@ -61,6 +58,10 @@ public class BufferArrayGrouper implements IntGrouper private ByteBuffer usedFlagBuffer; private ByteBuffer valBuffer; + // Scratch objects used by aggregateVector(). Only set if initVectorized() is called. + private int[] vAggregationPositions = null; + private int[] vAggregationRows = null; + static long requiredBufferCapacity( int cardinality, AggregatorFactory[] aggregatorFactories @@ -72,7 +73,7 @@ static long requiredBufferCapacity( .sum(); return getUsedFlagBufferCapacity(cardinalityWithMissingValue) + // total used flags size - (long) cardinalityWithMissingValue * recordSize; // total values size + (long) cardinalityWithMissingValue * recordSize; // total values size } /** @@ -86,26 +87,17 @@ private static int getUsedFlagBufferCapacity(int cardinalityWithMissingValue) public BufferArrayGrouper( // the buffer returned from the below supplier can have dirty bits and should be cleared during initialization final Supplier bufferSupplier, - final ColumnSelectorFactory columnSelectorFactory, - final AggregatorFactory[] aggregatorFactories, + final AggregatorAdapters aggregators, final int cardinality ) { - Preconditions.checkNotNull(aggregatorFactories, "aggregatorFactories"); + Preconditions.checkNotNull(aggregators, "aggregators"); Preconditions.checkArgument(cardinality > 0, "Cardinality must a non-zero positive number"); this.bufferSupplier = Preconditions.checkNotNull(bufferSupplier, "bufferSupplier"); - this.aggregators = new BufferAggregator[aggregatorFactories.length]; - this.aggregatorOffsets = new int[aggregatorFactories.length]; + this.aggregators = aggregators; this.cardinalityWithMissingValue = cardinality + 1; - - int offset = 0; - for (int i = 0; i < aggregatorFactories.length; i++) { - aggregators[i] = aggregatorFactories[i].factorizeBuffered(columnSelectorFactory); - aggregatorOffsets[i] = offset; - offset += aggregatorFactories[i].getMaxIntermediateSizeWithNulls(); - } - recordSize = offset; + this.recordSize = aggregators.spaceNeeded(); } @Override @@ -115,6 +107,20 @@ public void init() final ByteBuffer buffer = bufferSupplier.get(); final int usedFlagBufferEnd = getUsedFlagBufferCapacity(cardinalityWithMissingValue); + + // Sanity check on buffer capacity. + if (usedFlagBufferEnd + (long) cardinalityWithMissingValue * recordSize > buffer.capacity()) { + // Should not happen in production, since we should only select array-based aggregation if we have + // enough scratch space. + throw new ISE( + "Records of size[%,d] and possible cardinality[%,d] exceeds the buffer capacity[%,d].", + recordSize, + cardinalityWithMissingValue, + valBuffer.capacity() + ); + } + + // Slice up the buffer. buffer.position(0); buffer.limit(usedFlagBufferEnd); usedFlagBuffer = buffer.slice(); @@ -129,6 +135,15 @@ public void init() } } + @Override + public void initVectorized(final int maxVectorSize) + { + init(); + + this.vAggregationPositions = new int[maxVectorSize]; + this.vAggregationRows = new int[maxVectorSize]; + } + @Override public boolean isInitialized() { @@ -136,7 +151,7 @@ public boolean isInitialized() } @Override - public AggregateResult aggregateKeyHash(int dimIndex) + public AggregateResult aggregateKeyHash(final int dimIndex) { Preconditions.checkArgument( dimIndex >= 0 && dimIndex < cardinalityWithMissingValue, @@ -144,39 +159,62 @@ public AggregateResult aggregateKeyHash(int dimIndex) dimIndex ); - final int recordOffset = dimIndex * recordSize; + initializeSlotIfNeeded(dimIndex); + aggregators.aggregateBuffered(valBuffer, dimIndex * recordSize); + return AggregateResult.ok(); + } + + @Override + public AggregateResult aggregateVector(int[] keySpace, int startRow, int endRow) + { + if (keySpace.length == 0) { + // Empty key space, assume keys are all zeroes. + final int dimIndex = 1; + + initializeSlotIfNeeded(dimIndex); - if (recordOffset + recordSize > valBuffer.capacity()) { - // This error cannot be recoverd, and the query must fail - throw new ISE( - "A record of size [%d] cannot be written to the array buffer at offset[%d] " - + "because it exceeds the buffer capacity[%d]. Try increasing druid.processing.buffer.sizeBytes", - recordSize, - recordOffset, - valBuffer.capacity() + aggregators.aggregateVector( + valBuffer, + dimIndex * recordSize, + startRow, + endRow ); - } + } else { + final int numRows = endRow - startRow; - if (!isUsedSlot(dimIndex)) { - initializeSlot(dimIndex); - } + for (int i = 0; i < numRows; i++) { + // +1 matches what hashFunction() would do. + final int dimIndex = keySpace[i] + 1; + + if (dimIndex < 0 || dimIndex >= cardinalityWithMissingValue) { + throw new IAE("Invalid dimIndex[%s]", dimIndex); + } + + vAggregationPositions[i] = dimIndex * recordSize; - for (int i = 0; i < aggregators.length; i++) { - aggregators[i].aggregate(valBuffer, recordOffset + aggregatorOffsets[i]); + initializeSlotIfNeeded(dimIndex); + } + + aggregators.aggregateVector( + valBuffer, + numRows, + vAggregationPositions, + Groupers.writeAggregationRows(vAggregationRows, startRow, endRow) + ); } return AggregateResult.ok(); } - private void initializeSlot(int dimIndex) + private void initializeSlotIfNeeded(int dimIndex) { final int index = dimIndex / Byte.SIZE; final int extraIndex = dimIndex % Byte.SIZE; - usedFlagBuffer.put(index, (byte) (usedFlagBuffer.get(index) | (1 << extraIndex))); + final int usedFlagByte = 1 << extraIndex; - final int recordOffset = dimIndex * recordSize; - for (int i = 0; i < aggregators.length; i++) { - aggregators[i].init(valBuffer, recordOffset + aggregatorOffsets[i]); + if ((usedFlagBuffer.get(index) & usedFlagByte) == 0) { + usedFlagBuffer.put(index, (byte) (usedFlagBuffer.get(index) | (1 << extraIndex))); + aggregators.init(valBuffer, dimIndex * recordSize); } } @@ -185,6 +223,7 @@ private boolean isUsedSlot(int dimIndex) final int index = dimIndex / Byte.SIZE; final int extraIndex = dimIndex % Byte.SIZE; final int usedFlagByte = 1 << extraIndex; + return (usedFlagBuffer.get(index) & usedFlagByte) != 0; } @@ -214,14 +253,36 @@ public IntGrouperHashFunction hashFunction() @Override public void close() { - for (BufferAggregator aggregator : aggregators) { - try { - aggregator.close(); + aggregators.close(); + } + + @Override + public CloseableIterator> iterator() + { + final CloseableIterator> iterator = iterator(false); + final ByteBuffer keyBuffer = ByteBuffer.allocate(Integer.BYTES); + return new CloseableIterator>() + { + @Override + public boolean hasNext() + { + return iterator.hasNext(); } - catch (Exception e) { - LOG.warn(e, "Could not close aggregator [%s], skipping.", aggregator); + + @Override + public Entry next() + { + final Entry integerEntry = iterator.next(); + keyBuffer.putInt(0, integerEntry.getKey()); + return new Entry<>(keyBuffer, integerEntry.getValues()); } - } + + @Override + public void close() throws IOException + { + iterator.close(); + } + }; } @Override @@ -269,10 +330,10 @@ public Entry next() findNext = true; - final Object[] values = new Object[aggregators.length]; + final Object[] values = new Object[aggregators.size()]; final int recordOffset = cur * recordSize; - for (int i = 0; i < aggregators.length; i++) { - values[i] = aggregators[i].get(valBuffer, recordOffset + aggregatorOffsets[i]); + for (int i = 0; i < aggregators.size(); i++) { + values[i] = aggregators.get(valBuffer, recordOffset, i); } return new Entry<>(cur - 1, values); } diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/BufferHashGrouper.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/BufferHashGrouper.java index 5f54fad91331..17995798f1af 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/BufferHashGrouper.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/BufferHashGrouper.java @@ -20,11 +20,13 @@ package org.apache.druid.query.groupby.epinephelinae; import com.google.common.base.Supplier; +import org.apache.commons.lang.mutable.MutableInt; import org.apache.druid.java.util.common.CloseableIterators; import org.apache.druid.java.util.common.IAE; +import org.apache.druid.java.util.common.ISE; import org.apache.druid.java.util.common.parsers.CloseableIterator; +import org.apache.druid.query.aggregation.AggregatorAdapters; import org.apache.druid.query.aggregation.AggregatorFactory; -import org.apache.druid.segment.ColumnSelectorFactory; import java.nio.ByteBuffer; import java.util.AbstractList; @@ -32,14 +34,14 @@ import java.util.Comparator; import java.util.List; import java.util.NoSuchElementException; +import java.util.function.ToIntFunction; -public class BufferHashGrouper extends AbstractBufferHashGrouper +public class BufferHashGrouper extends AbstractBufferHashGrouper implements VectorGrouper { private static final int MIN_INITIAL_BUCKETS = 4; private static final int DEFAULT_INITIAL_BUCKETS = 1024; private static final float DEFAULT_MAX_LOAD_FACTOR = 0.7f; - private final AggregatorFactory[] aggregatorFactories; private ByteBuffer buffer; private boolean initialized = false; @@ -58,19 +60,23 @@ public class BufferHashGrouper extends AbstractBufferHashGrouper bufferSupplier, final KeySerde keySerde, - final ColumnSelectorFactory columnSelectorFactory, - final AggregatorFactory[] aggregatorFactories, + final AggregatorAdapters aggregators, final int bufferGrouperMaxSize, final float maxLoadFactor, final int initialBuckets, final boolean useDefaultSorting ) { - super(bufferSupplier, keySerde, aggregatorFactories, bufferGrouperMaxSize); - this.aggregatorFactories = aggregatorFactories; + super(bufferSupplier, keySerde, aggregators, HASH_SIZE + keySerde.keySize(), bufferGrouperMaxSize); this.maxLoadFactor = maxLoadFactor > 0 ? maxLoadFactor : DEFAULT_MAX_LOAD_FACTOR; this.initialBuckets = initialBuckets > 0 ? Math.max(MIN_INITIAL_BUCKETS, initialBuckets) : DEFAULT_INITIAL_BUCKETS; @@ -79,14 +85,7 @@ public BufferHashGrouper( throw new IAE("Invalid maxLoadFactor[%f], must be < 1.0", maxLoadFactor); } - int offset = HASH_SIZE + keySize; - for (int i = 0; i < aggregatorFactories.length; i++) { - aggregators[i] = aggregatorFactories[i].factorizeBuffered(columnSelectorFactory); - aggregatorOffsets[i] = offset; - offset += aggregatorFactories[i].getMaxIntermediateSizeWithNulls(); - } - - this.bucketSize = offset; + this.bucketSize = HASH_SIZE + keySerde.keySize() + aggregators.spaceNeeded(); this.useDefaultSorting = useDefaultSorting; } @@ -132,15 +131,112 @@ public void init() } } + @Override + public void initVectorized(final int maxVectorSize) + { + if (!ByteBuffer.class.equals(keySerde.keyClazz())) { + throw new ISE("keyClazz[%s] must be ByteBuffer", keySerde.keyClazz()); + } + + if (keySize % Integer.BYTES != 0) { + throw new ISE("keySize[%s] must be a multiple of[%s]", keySize, Integer.BYTES); + } + + init(); + + this.vKeyBuffer = ByteBuffer.allocate(keySize); + this.vKeyHashCodes = new int[maxVectorSize]; + this.vAggregationPositions = new int[maxVectorSize]; + this.vAggregationRows = new int[maxVectorSize]; + } + + @Override + public AggregateResult aggregateVector(final int[] keySpace, final int startRow, final int endRow) + { + final int keyIntSize = keySize / Integer.BYTES; + final int numRows = endRow - startRow; + + // Initialize vKeyHashCodes: one int per key. + // Does *not* use hashFunction(). This is okay because the API of VectorGrouper does not expose any way of messing + // about with hash codes. + for (int i = 0, rowStart = 0; i < numRows; i++, rowStart += keyIntSize) { + vKeyHashCodes[i] = Groupers.hashIntArray(keySpace, rowStart, keyIntSize); + } + + final MutableInt aggregationStartRow = new MutableInt(startRow); + final MutableInt aggregationNumRows = new MutableInt(0); + + for (int rowNum = 0, keySpacePosition = 0; rowNum < numRows; rowNum++, keySpacePosition += keyIntSize) { + // Copy current key into keyBuffer. + vKeyBuffer.rewind(); + for (int i = 0; i < keyIntSize; i++) { + vKeyBuffer.putInt(keySpace[keySpacePosition + i]); + } + vKeyBuffer.rewind(); + + // Find, and if the table is full, expand and find again. + int bucket = hashTable.findBucketWithAutoGrowth( + vKeyBuffer, + vKeyHashCodes[rowNum], + () -> { + if (aggregationNumRows.intValue() > 0) { + doAggregateVector(aggregationStartRow.intValue(), aggregationNumRows.intValue()); + aggregationStartRow.setValue(aggregationStartRow.intValue() + aggregationNumRows.intValue()); + aggregationNumRows.setValue(0); + } + } + ); + + if (bucket < 0) { + // This may just trigger a spill and get ignored, which is ok. If it bubbles up to the user, the message will + // be correct. + + // Aggregate any remaining rows. + if (aggregationNumRows.intValue() > 0) { + doAggregateVector(aggregationStartRow.intValue(), aggregationNumRows.intValue()); + } + + return Groupers.hashTableFull(rowNum); + } + + final int bucketStartOffset = hashTable.getOffsetForBucket(bucket); + final boolean bucketWasUsed = hashTable.isBucketUsed(bucket); + + // Set up key and initialize the aggs if this is a new bucket. + if (!bucketWasUsed) { + hashTable.initializeNewBucketKey(bucket, vKeyBuffer, vKeyHashCodes[rowNum]); + aggregators.init(hashTable.getTableBuffer(), bucketStartOffset + baseAggregatorOffset); + } + + // Schedule the current row for aggregation. + vAggregationPositions[aggregationNumRows.intValue()] = bucketStartOffset + Integer.BYTES + keySize; + aggregationNumRows.increment(); + } + + // Aggregate any remaining rows. + if (aggregationNumRows.intValue() > 0) { + doAggregateVector(aggregationStartRow.intValue(), aggregationNumRows.intValue()); + } + + return AggregateResult.ok(); + } + @Override public boolean isInitialized() { return initialized; } + @Override + public ToIntFunction hashFunction() + { + return Groupers::hashObject; + } + @Override public void newBucketHook(int bucketOffset) { + // Nothing needed. } @Override @@ -152,7 +248,7 @@ public boolean canSkipAggregate(boolean bucketWasUsed, int bucketOffset) @Override public void afterAggregateHook(int bucketOffset) { - + // Nothing needed. } @Override @@ -163,6 +259,15 @@ public void reset() keySerde.reset(); } + @Override + @SuppressWarnings("unchecked") + public CloseableIterator> iterator() + { + // Unchecked cast, since this method is only called through the VectorGrouper interface, which uses + // ByteBuffer keys (and this is verified in initVectorized). + return (CloseableIterator) iterator(false); + } + @Override public CloseableIterator> iterator(boolean sorted) { @@ -201,7 +306,10 @@ public int size() if (useDefaultSorting) { comparator = keySerde.bufferComparator(); } else { - comparator = keySerde.bufferComparatorWithAggregators(aggregatorFactories, aggregatorOffsets); + comparator = keySerde.bufferComparatorWithAggregators( + aggregators.factories().toArray(new AggregatorFactory[0]), + aggregators.aggregatorPositions() + ); } // Sort offsets in-place. @@ -296,6 +404,16 @@ public void close() } } + private void doAggregateVector(final int startRow, final int numRows) + { + aggregators.aggregateVector( + hashTable.getTableBuffer(), + numRows, + vAggregationPositions, + Groupers.writeAggregationRows(vAggregationRows, startRow, startRow + numRows) + ); + } + private class BufferGrouperBucketUpdateHandler implements ByteBufferHashTable.BucketUpdateHandler { @Override @@ -314,14 +432,12 @@ public void handlePreTableSwap() public void handleBucketMove(int oldBucketOffset, int newBucketOffset, ByteBuffer oldBuffer, ByteBuffer newBuffer) { // relocate aggregators (see https://github.com/apache/incubator-druid/pull/4071) - for (int i = 0; i < aggregators.length; i++) { - aggregators[i].relocate( - oldBucketOffset + aggregatorOffsets[i], - newBucketOffset + aggregatorOffsets[i], - oldBuffer, - newBuffer - ); - } + aggregators.relocate( + oldBucketOffset + baseAggregatorOffset, + newBucketOffset + baseAggregatorOffset, + oldBuffer, + newBuffer + ); offsetList.add(newBucketOffset); } diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/ByteBufferHashTable.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/ByteBufferHashTable.java index 01482797f4b9..cdead589f392 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/ByteBufferHashTable.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/ByteBufferHashTable.java @@ -75,7 +75,6 @@ public static int calculateTableArenaSizeWithFixedAdditionalSize( protected int growthCount; - protected BucketUpdateHandler bucketUpdateHandler; public ByteBufferHashTable( @@ -254,18 +253,21 @@ protected void initializeNewBucketKey( * Find a bucket for a key, attempting to resize the table with adjustTableWhenFull() if possible. * * @param keyBuffer buffer containing the key - * @param keyHash hash of the key + * @param keyHash hash of the key + * * @return bucket number of the found bucket or -1 if a bucket could not be allocated after resizing. */ protected int findBucketWithAutoGrowth( final ByteBuffer keyBuffer, - final int keyHash + final int keyHash, + final Runnable preTableGrowthRunnable ) { int bucket = findBucket(canAllowNewBucket(), maxBuckets, tableBuffer, keyBuffer, keyHash); if (bucket < 0) { if (size < maxSizeForTesting) { + preTableGrowthRunnable.run(); adjustTableWhenFull(); bucket = findBucket(size < regrowthThreshold, maxBuckets, tableBuffer, keyBuffer, keyHash); } @@ -277,7 +279,7 @@ protected int findBucketWithAutoGrowth( /** * Finds the bucket into which we should insert a key. * - * @param keyBuffer key, must have exactly keySize bytes remaining. Will not be modified. + * @param keyBuffer key, must have exactly keySize bytes remaining. Will not be modified. * @param targetTableBuffer Need selectable buffer, since when resizing hash table, * findBucket() is used on the newly allocated table buffer * @@ -379,7 +381,9 @@ public int getGrowthCount() public interface BucketUpdateHandler { void handleNewBucket(int bucketOffset); + void handlePreTableSwap(); + void handleBucketMove(int oldBucketOffset, int newBucketOffset, ByteBuffer oldBuffer, ByteBuffer newBuffer); } } diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/ByteBufferKeySerde.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/ByteBufferKeySerde.java new file mode 100644 index 000000000000..f840c019a0f9 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/ByteBufferKeySerde.java @@ -0,0 +1,91 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.groupby.epinephelinae; + +import com.google.common.collect.ImmutableList; +import org.apache.druid.query.aggregation.AggregatorFactory; + +import java.nio.ByteBuffer; +import java.util.List; + +public class ByteBufferKeySerde implements Grouper.KeySerde +{ + private final int keySize; + + public ByteBufferKeySerde(final int keySize) + { + this.keySize = keySize; + } + + @Override + public int keySize() + { + return keySize; + } + + @Override + public Class keyClazz() + { + return ByteBuffer.class; + } + + @Override + public List getDictionary() + { + return ImmutableList.of(); + } + + @Override + public ByteBuffer toByteBuffer(ByteBuffer key) + { + return key; + } + + @Override + public ByteBuffer fromByteBuffer(ByteBuffer buffer, int position) + { + final ByteBuffer dup = buffer.duplicate(); + dup.position(position).limit(position + keySize); + return dup.slice(); + } + + @Override + public Grouper.BufferComparator bufferComparator() + { + // This class is used by segment processing engines, where bufferComparator will not be called. + throw new UnsupportedOperationException(); + } + + @Override + public Grouper.BufferComparator bufferComparatorWithAggregators( + AggregatorFactory[] aggregatorFactories, + int[] aggregatorOffsets + ) + { + // This class is used by segment processing engines, where bufferComparatorWithAggregators will not be called. + throw new UnsupportedOperationException(); + } + + @Override + public void reset() + { + // No state, nothing to reset + } +} diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/CloseableGrouperIterator.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/CloseableGrouperIterator.java index 43f3bcb037a1..aabb1676778d 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/CloseableGrouperIterator.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/CloseableGrouperIterator.java @@ -34,14 +34,13 @@ public class CloseableGrouperIterator implements CloseableIterator grouper, - final boolean sorted, + final CloseableIterator> iterator, final Function, T> transformer, final Closeable closeable ) { this.transformer = transformer; - this.iterator = grouper.iterator(sorted); + this.iterator = iterator; this.closer = Closer.create(); closer.register(iterator); diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/ConcurrentGrouper.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/ConcurrentGrouper.java index 4bf116e26bf5..f5ff7ba627cb 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/ConcurrentGrouper.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/ConcurrentGrouper.java @@ -264,9 +264,12 @@ public AggregateResult aggregate(KeyType key, int keyHash) synchronized (hashBasedGrouper) { if (!spilling) { - if (hashBasedGrouper.aggregate(key, keyHash).isOk()) { + final AggregateResult aggregateResult = hashBasedGrouper.aggregate(key, keyHash); + if (aggregateResult.isOk()) { return AggregateResult.ok(); } else { + // Expecting all-or-nothing behavior. + assert aggregateResult.getCount() == 0; spilling = true; } } diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java index 1b58dfe31b12..17098fcdce9e 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java @@ -22,6 +22,7 @@ import com.google.common.base.Preconditions; import com.google.common.base.Suppliers; import com.google.common.collect.ImmutableList; +import com.google.common.collect.Iterables; import com.google.common.collect.Maps; import org.apache.druid.collections.NonBlockingPool; import org.apache.druid.collections.ResourceHolder; @@ -34,9 +35,12 @@ import org.apache.druid.java.util.common.guava.BaseSequence; import org.apache.druid.java.util.common.guava.Sequence; import org.apache.druid.query.ColumnSelectorPlus; +import org.apache.druid.query.QueryContexts; +import org.apache.druid.query.aggregation.AggregatorAdapters; import org.apache.druid.query.aggregation.AggregatorFactory; import org.apache.druid.query.dimension.ColumnSelectorStrategyFactory; import org.apache.druid.query.dimension.DimensionSpec; +import org.apache.druid.query.filter.Filter; import org.apache.druid.query.groupby.GroupByQuery; import org.apache.druid.query.groupby.GroupByQueryConfig; import org.apache.druid.query.groupby.epinephelinae.column.DictionaryBuildingStringGroupByColumnSelectorStrategy; @@ -47,6 +51,7 @@ import org.apache.druid.query.groupby.epinephelinae.column.LongGroupByColumnSelectorStrategy; import org.apache.druid.query.groupby.epinephelinae.column.NullableValueGroupByColumnSelectorStrategy; import org.apache.druid.query.groupby.epinephelinae.column.StringGroupByColumnSelectorStrategy; +import org.apache.druid.query.groupby.epinephelinae.vector.VectorGroupByEngine; import org.apache.druid.query.groupby.strategy.GroupByStrategyV2; import org.apache.druid.segment.ColumnValueSelector; import org.apache.druid.segment.Cursor; @@ -106,23 +111,6 @@ public static Sequence process( throw new IAE("Should only have one interval, got[%s]", intervals); } - final Sequence cursors = storageAdapter.makeCursors( - Filters.toFilter(query.getDimFilter()), - intervals.get(0), - query.getVirtualColumns(), - query.getGranularity(), - false, - null - ); - - final boolean allSingleValueDims = query - .getDimensions() - .stream() - .allMatch(dimension -> { - final ColumnCapabilities columnCapabilities = storageAdapter.getColumnCapabilities(dimension.getDimension()); - return columnCapabilities != null && !columnCapabilities.hasMultipleValues(); - }); - final ResourceHolder bufferHolder = intermediateResultsBufferPool.take(); final String fudgeTimestampString = NullHandling.emptyToNullIfNeeded( @@ -133,6 +121,59 @@ public static Sequence process( ? null : DateTimes.utc(Long.parseLong(fudgeTimestampString)); + final Filter filter = Filters.convertToCNFFromQueryContext(query, Filters.toFilter(query.getFilter())); + final Interval interval = Iterables.getOnlyElement(query.getIntervals()); + + final boolean doVectorize = QueryContexts.getVectorize(query).shouldVectorize( + VectorGroupByEngine.canVectorize(query, storageAdapter, filter, interval) + ); + + final Sequence result; + + if (doVectorize) { + result = VectorGroupByEngine.process( + query, + storageAdapter, + bufferHolder.get(), + fudgeTimestamp, + filter, + interval, + querySpecificConfig + ); + } else { + result = processNonVectorized( + query, + storageAdapter, + bufferHolder.get(), + fudgeTimestamp, + querySpecificConfig, + filter, + interval + ); + } + + return result.withBaggage(bufferHolder); + } + + private static Sequence processNonVectorized( + final GroupByQuery query, + final StorageAdapter storageAdapter, + final ByteBuffer processingBuffer, + @Nullable final DateTime fudgeTimestamp, + final GroupByQueryConfig querySpecificConfig, + @Nullable final Filter filter, + final Interval interval + ) + { + final Sequence cursors = storageAdapter.makeCursors( + filter, + interval, + query.getVirtualColumns(), + query.getGranularity(), + false, + null + ); + return cursors.flatMap( cursor -> new BaseSequence<>( new BaseSequence.IteratorMaker>() @@ -148,30 +189,33 @@ public GroupByEngineIterator make() ); GroupByColumnSelectorPlus[] dims = createGroupBySelectorPlus(selectorPlus); - final ByteBuffer buffer = bufferHolder.get(); + final int cardinalityForArrayAggregation = getCardinalityForArrayAggregation( + querySpecificConfig, + query, + storageAdapter, + processingBuffer + ); - // Check array-based aggregation is applicable - if (isArrayAggregateApplicable(querySpecificConfig, query, dims, storageAdapter, buffer)) { + if (cardinalityForArrayAggregation >= 0) { return new ArrayAggregateIterator( query, querySpecificConfig, cursor, - buffer, + processingBuffer, fudgeTimestamp, dims, - allSingleValueDims, - // There must be 0 or 1 dimension if isArrayAggregateApplicable() is true - dims.length == 0 ? 1 : storageAdapter.getDimensionCardinality(dims[0].getName()) + isAllSingleValueDims(storageAdapter, query.getDimensions()), + cardinalityForArrayAggregation ); } else { return new HashAggregateIterator( query, querySpecificConfig, cursor, - buffer, + processingBuffer, fudgeTimestamp, dims, - allSingleValueDims + isAllSingleValueDims(storageAdapter, query.getDimensions()) ); } } @@ -183,56 +227,75 @@ public void cleanup(GroupByEngineIterator iterFromMake) } } ) - ).withBaggage(bufferHolder); + ); } - private static boolean isArrayAggregateApplicable( + /** + * Returns the cardinality of array needed to do array-based aggregation, or -1 if array-based aggregation + * is impossible. + */ + public static int getCardinalityForArrayAggregation( GroupByQueryConfig querySpecificConfig, GroupByQuery query, - GroupByColumnSelectorPlus[] dims, StorageAdapter storageAdapter, ByteBuffer buffer ) { if (querySpecificConfig.isForceHashAggregation()) { - return false; + return -1; } + final List dimensions = query.getDimensions(); final ColumnCapabilities columnCapabilities; final int cardinality; // Find cardinality - if (dims.length == 0) { + if (dimensions.isEmpty()) { columnCapabilities = null; cardinality = 1; - } else if (dims.length == 1) { - columnCapabilities = storageAdapter.getColumnCapabilities(dims[0].getName()); - cardinality = storageAdapter.getDimensionCardinality(dims[0].getName()); + } else if (dimensions.size() == 1) { + final String columnName = Iterables.getOnlyElement(dimensions).getDimension(); + columnCapabilities = storageAdapter.getColumnCapabilities(columnName); + cardinality = storageAdapter.getDimensionCardinality(columnName); } else { columnCapabilities = null; cardinality = -1; // ArrayAggregateIterator is not available } - // Choose array-based aggregation if the grouping key is a single string dimension of a - // known cardinality - if ((columnCapabilities == null || columnCapabilities.getType().equals(ValueType.STRING)) - && cardinality > 0) { - final AggregatorFactory[] aggregatorFactories = query - .getAggregatorSpecs() - .toArray(new AggregatorFactory[0]); + // Choose array-based aggregation if the grouping key is a single string dimension of a known cardinality + if (columnCapabilities != null && columnCapabilities.getType().equals(ValueType.STRING) && cardinality > 0) { + final AggregatorFactory[] aggregatorFactories = query.getAggregatorSpecs().toArray(new AggregatorFactory[0]); final long requiredBufferCapacity = BufferArrayGrouper.requiredBufferCapacity( cardinality, aggregatorFactories ); // Check that all keys and aggregated values can be contained the buffer - return requiredBufferCapacity <= buffer.capacity(); + return requiredBufferCapacity <= buffer.capacity() ? cardinality : -1; } - return false; + return -1; + } + + /** + * Checks whether all "dimensions" are either single-valued or nonexistent. + */ + public static boolean isAllSingleValueDims( + final StorageAdapter adapter, + final List dimensions + ) + { + return dimensions + .stream() + .allMatch( + dimension -> { + final ColumnCapabilities columnCapabilities = adapter.getColumnCapabilities(dimension.getDimension()); + return columnCapabilities == null || !columnCapabilities.hasMultipleValues(); + }); } - private static class GroupByStrategyFactory implements ColumnSelectorStrategyFactory + private static class GroupByStrategyFactory + implements ColumnSelectorStrategyFactory { @Override public GroupByColumnSelectorStrategy makeColumnSelectorStrategy( @@ -288,7 +351,7 @@ public GroupByEngineIterator( final GroupByQueryConfig querySpecificConfig, final Cursor cursor, final ByteBuffer buffer, - final DateTime fudgeTimestamp, + @Nullable final DateTime fudgeTimestamp, final GroupByColumnSelectorPlus[] dims, final boolean allSingleValueDims ) @@ -317,8 +380,7 @@ private CloseableGrouperIterator initNewDelegate() } return new CloseableGrouperIterator<>( - grouper, - false, + grouper.iterator(false), entry -> { Map theMap = Maps.newLinkedHashMap(); @@ -425,7 +487,7 @@ public HashAggregateIterator( GroupByQueryConfig querySpecificConfig, Cursor cursor, ByteBuffer buffer, - DateTime fudgeTimestamp, + @Nullable DateTime fudgeTimestamp, GroupByColumnSelectorPlus[] dims, boolean allSingleValueDims ) @@ -444,9 +506,10 @@ protected Grouper newGrouper() return new BufferHashGrouper<>( Suppliers.ofInstance(buffer), keySerde, - cursor.getColumnSelectorFactory(), - query.getAggregatorSpecs() - .toArray(new AggregatorFactory[0]), + AggregatorAdapters.factorizeBuffered( + cursor.getColumnSelectorFactory(), + query.getAggregatorSpecs() + ), querySpecificConfig.getBufferGrouperMaxSize(), querySpecificConfig.getBufferGrouperMaxLoadFactor(), querySpecificConfig.getBufferGrouperInitialBuckets(), @@ -577,7 +640,7 @@ public ArrayAggregateIterator( GroupByQueryConfig querySpecificConfig, Cursor cursor, ByteBuffer buffer, - DateTime fudgeTimestamp, + @Nullable DateTime fudgeTimestamp, GroupByColumnSelectorPlus[] dims, boolean allSingleValueDims, int cardinality @@ -599,9 +662,7 @@ protected IntGrouper newGrouper() { return new BufferArrayGrouper( Suppliers.ofInstance(buffer), - cursor.getColumnSelectorFactory(), - query.getAggregatorSpecs() - .toArray(new AggregatorFactory[0]), + AggregatorAdapters.factorizeBuffered(cursor.getColumnSelectorFactory(), query.getAggregatorSpecs()), cardinality ); } @@ -675,7 +736,7 @@ private void aggregateMultiValueDims(IntGrouper grouper) protected void putToMap(Integer key, Map map) { if (dim != null) { - if (key != -1) { + if (key != GroupByColumnSelectorStrategy.GROUP_BY_MISSING_VALUE) { map.put( dim.getOutputName(), ((DimensionSelector) dim.getSelector()).lookupName(key) @@ -687,7 +748,7 @@ protected void putToMap(Integer key, Map map) } } - private static void convertRowTypesToOutputTypes(List dimensionSpecs, Map rowMap) + public static void convertRowTypesToOutputTypes(List dimensionSpecs, Map rowMap) { for (DimensionSpec dimSpec : dimensionSpecs) { final ValueType outputType = dimSpec.getOutputType(); diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/Grouper.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/Grouper.java index 05664f516d1d..596254ecd23b 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/Grouper.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/Grouper.java @@ -25,6 +25,7 @@ import org.apache.druid.java.util.common.parsers.CloseableIterator; import org.apache.druid.query.aggregation.AggregatorFactory; +import javax.annotation.Nullable; import java.io.Closeable; import java.nio.ByteBuffer; import java.util.Arrays; @@ -36,10 +37,12 @@ * Groupers aggregate metrics from rows that they typically get from a ColumnSelectorFactory, under * grouping keys that some outside driver is passing in. They can also iterate over the grouped * rows after the aggregation is done. - *

+ * * They work sort of like a map of KeyType to aggregated values, except they don't support * random lookups. * + * See {@link VectorGrouper} for a vectorized version. + * * @param type of the key that will be passed in */ public interface Grouper extends Closeable @@ -89,7 +92,7 @@ default AggregateResult aggregate(KeyType key) default ToIntFunction hashFunction() { - return Groupers::hash; + return Groupers::hashObject; } /** @@ -247,6 +250,7 @@ interface KeySerde * * @return serialized key, or null if we are unable to serialize more keys due to resource limits */ + @Nullable ByteBuffer toByteBuffer(T key); /** diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/Groupers.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/Groupers.java index d173b04f5181..a1d8dbf816eb 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/Groupers.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/Groupers.java @@ -19,6 +19,7 @@ package org.apache.druid.query.groupby.epinephelinae; +import javax.annotation.Nullable; import java.nio.ByteBuffer; public class Groupers @@ -28,17 +29,22 @@ private Groupers() // No instantiation } - static final AggregateResult DICTIONARY_FULL = AggregateResult.failure( + private static final AggregateResult DICTIONARY_FULL_ZERO_COUNT = AggregateResult.partial( + 0, "Not enough dictionary space to execute this query. Try increasing " + "druid.query.groupBy.maxMergingDictionarySize or enable disk spilling by setting " + "druid.query.groupBy.maxOnDiskStorage to a positive number." ); - static final AggregateResult HASH_TABLE_FULL = AggregateResult.failure( + + private static final AggregateResult HASH_TABLE_FULL_ZERO_COUNT = AggregateResult.partial( + 0, "Not enough aggregation buffer space to execute this query. Try increasing " + "druid.processing.buffer.sizeBytes or enable disk spilling by setting " + "druid.query.groupBy.maxOnDiskStorage to a positive number." ); + private static final int USED_FLAG_MASK = 0x7fffffff; + private static final int C1 = 0xcc9e2d51; private static final int C2 = 0x1b873593; @@ -50,18 +56,46 @@ private Groupers() * MurmurHash3 was written by Austin Appleby, and is placed in the public domain. The author * hereby disclaims copyright to this source code. */ - static int smear(int hashCode) + private static int smear(int hashCode) { return C2 * Integer.rotateLeft(hashCode * C1, 15); } - public static int hash(final Object obj) + public static AggregateResult dictionaryFull(final int count) + { + if (count == 0) { + return DICTIONARY_FULL_ZERO_COUNT; + } else { + return AggregateResult.partial(count, DICTIONARY_FULL_ZERO_COUNT.getReason()); + } + } + + public static AggregateResult hashTableFull(final int count) + { + if (count == 0) { + return HASH_TABLE_FULL_ZERO_COUNT; + } else { + return AggregateResult.partial(count, HASH_TABLE_FULL_ZERO_COUNT.getReason()); + } + } + + public static int hashObject(final Object obj) { // Mask off the high bit so we can use that to determine if a bucket is used or not. - // Also apply the smear function, to improve distribution. - final int code = obj.hashCode(); - return smear(code) & 0x7fffffff; + // Also apply the "smear" function, to improve distribution. + return smear(obj.hashCode()) & USED_FLAG_MASK; + } + public static int hashIntArray(final int[] ints, final int start, final int length) + { + // Similar to what Arrays.hashCode would do. + // Also apply the "smear" function, to improve distribution. + int hashCode = 1; + for (int i = 0; i < length; i++) { + hashCode = 31 * hashCode + ints[start + i]; + } + + return smear(hashCode) & USED_FLAG_MASK; } static int getUsedFlag(int keyHash) @@ -76,4 +110,22 @@ public static ByteBuffer getSlice(ByteBuffer buffer, int sliceSize, int i) slice.limit(slice.position() + sliceSize); return slice.slice(); } + + /** + * Write ints from "start" to "end" into "scratch", if start != 0. Otherwise, return null. + */ + @Nullable + public static int[] writeAggregationRows(final int[] scratch, final int start, final int end) + { + if (start == 0) { + return null; + } else { + final int numRows = end - start; + for (int i = 0; i < numRows; i++) { + scratch[i] = start + i; + } + + return scratch; + } + } } diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/LimitedBufferHashGrouper.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/LimitedBufferHashGrouper.java index c8d97eafc1b5..4c71c31167ad 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/LimitedBufferHashGrouper.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/LimitedBufferHashGrouper.java @@ -24,8 +24,8 @@ import org.apache.druid.java.util.common.IAE; import org.apache.druid.java.util.common.ISE; import org.apache.druid.java.util.common.parsers.CloseableIterator; +import org.apache.druid.query.aggregation.AggregatorAdapters; import org.apache.druid.query.aggregation.AggregatorFactory; -import org.apache.druid.segment.ColumnSelectorFactory; import java.nio.ByteBuffer; import java.util.AbstractList; @@ -40,8 +40,6 @@ public class LimitedBufferHashGrouper extends AbstractBufferHashGrouper private static final int DEFAULT_INITIAL_BUCKETS = 1024; private static final float DEFAULT_MAX_LOAD_FACTOR = 0.7f; - private final AggregatorFactory[] aggregatorFactories; - // Limit to apply to results. private int limit; @@ -66,8 +64,7 @@ public class LimitedBufferHashGrouper extends AbstractBufferHashGrouper public LimitedBufferHashGrouper( final Supplier bufferSupplier, final Grouper.KeySerde keySerde, - final ColumnSelectorFactory columnSelectorFactory, - final AggregatorFactory[] aggregatorFactories, + final AggregatorAdapters aggregators, final int bufferGrouperMaxSize, final float maxLoadFactor, final int initialBuckets, @@ -75,7 +72,7 @@ public LimitedBufferHashGrouper( final boolean sortHasNonGroupingFields ) { - super(bufferSupplier, keySerde, aggregatorFactories, bufferGrouperMaxSize); + super(bufferSupplier, keySerde, aggregators, HASH_SIZE + keySerde.keySize(), bufferGrouperMaxSize); this.maxLoadFactor = maxLoadFactor > 0 ? maxLoadFactor : DEFAULT_MAX_LOAD_FACTOR; this.initialBuckets = initialBuckets > 0 ? Math.max(MIN_INITIAL_BUCKETS, initialBuckets) : DEFAULT_INITIAL_BUCKETS; this.limit = limit; @@ -85,18 +82,9 @@ public LimitedBufferHashGrouper( throw new IAE("Invalid maxLoadFactor[%f], must be < 1.0", maxLoadFactor); } - int offset = HASH_SIZE + keySize; - this.aggregatorFactories = aggregatorFactories; - for (int i = 0; i < aggregatorFactories.length; i++) { - aggregators[i] = aggregatorFactories[i].factorizeBuffered(columnSelectorFactory); - aggregatorOffsets[i] = offset; - offset += aggregatorFactories[i].getMaxIntermediateSizeWithNulls(); - } - // For each bucket, store an extra field indicating the bucket's current index within the heap when - // pushing down limits - offset += Integer.BYTES; - this.bucketSize = offset; + // pushing down limits (size Integer.BYTES). + this.bucketSize = HASH_SIZE + keySerde.keySize() + Integer.BYTES + aggregators.spaceNeeded(); } @Override @@ -374,8 +362,8 @@ private Comparator makeHeapComparator() return new Comparator() { final BufferComparator bufferComparator = keySerde.bufferComparatorWithAggregators( - aggregatorFactories, - aggregatorOffsets + aggregators.factories().toArray(new AggregatorFactory[0]), + aggregators.aggregatorPositions() ); @Override @@ -511,14 +499,12 @@ public void adjustTableWhenFull() offsetHeap.setAt(i, newBucketOffset); // relocate aggregators (see https://github.com/apache/incubator-druid/pull/4071) - for (int j = 0; j < aggregators.length; j++) { - aggregators[j].relocate( - oldBucketOffset + aggregatorOffsets[j], - newBucketOffset + aggregatorOffsets[j], - tableBuffer, - newTableBuffer - ); - } + aggregators.relocate( + oldBucketOffset + baseAggregatorOffset, + newBucketOffset + baseAggregatorOffset, + tableBuffer, + newTableBuffer + ); } } diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/RowBasedGrouperHelper.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/RowBasedGrouperHelper.java index 0b82472f3513..8ab33b11ecc1 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/RowBasedGrouperHelper.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/RowBasedGrouperHelper.java @@ -423,8 +423,7 @@ public static CloseableGrouperIterator makeGrouperIterator( final boolean includeTimestamp = GroupByStrategyV2.getUniversalTimestamp(query) == null; return new CloseableGrouperIterator<>( - grouper, - true, + grouper.iterator(true), new Function, Row>() { @Override @@ -820,7 +819,10 @@ public int compare(Grouper.Entry entry1, Grouper.Entry @Override public int compare(Grouper.Entry entry1, Grouper.Entry entry2) { - final int timeCompare = Longs.compare((long) entry1.getKey().getKey()[0], (long) entry2.getKey().getKey()[0]); + final int timeCompare = Longs.compare( + (long) entry1.getKey().getKey()[0], + (long) entry2.getKey().getKey()[0] + ); if (timeCompare != 0) { return timeCompare; @@ -917,8 +919,10 @@ private static int compareDimsInRowsWithAggs( // use natural comparison cmp = Comparators.naturalNullsFirst().compare(lhs, rhs); } else { - cmp = comparator.compare(DimensionHandlerUtils.convertObjectToString(lhs), - DimensionHandlerUtils.convertObjectToString(rhs)); + cmp = comparator.compare( + DimensionHandlerUtils.convertObjectToString(lhs), + DimensionHandlerUtils.convertObjectToString(rhs) + ); } if (cmp != 0) { @@ -1624,7 +1628,8 @@ private class FloatRowBasedKeySerdeHelper implements RowBasedKeySerdeHelper FloatRowBasedKeySerdeHelper( int keyBufferPosition, boolean pushLimitDown, - @Nullable StringComparator stringComparator) + @Nullable StringComparator stringComparator + ) { this.keyBufferPosition = keyBufferPosition; if (isPrimitiveComparable(pushLimitDown, stringComparator)) { diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/SpillingGrouper.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/SpillingGrouper.java index 5dce77ffcb86..0c17867470ed 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/SpillingGrouper.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/SpillingGrouper.java @@ -33,6 +33,7 @@ import org.apache.druid.java.util.common.logger.Logger; import org.apache.druid.java.util.common.parsers.CloseableIterator; import org.apache.druid.query.BaseQuery; +import org.apache.druid.query.aggregation.AggregatorAdapters; import org.apache.druid.query.aggregation.AggregatorFactory; import org.apache.druid.query.groupby.orderby.DefaultLimitSpec; import org.apache.druid.segment.ColumnSelectorFactory; @@ -42,6 +43,7 @@ import java.io.IOException; import java.nio.ByteBuffer; import java.util.ArrayList; +import java.util.Arrays; import java.util.Comparator; import java.util.HashSet; import java.util.Iterator; @@ -56,11 +58,12 @@ public class SpillingGrouper implements Grouper { private static final Logger log = new Logger(SpillingGrouper.class); - - private final Grouper grouper; - private static final AggregateResult DISK_FULL = AggregateResult.failure( + private static final AggregateResult DISK_FULL = AggregateResult.partial( + 0, "Not enough disk space to execute this query. Try raising druid.query.groupBy.maxOnDiskStorage." ); + + private final Grouper grouper; private final KeySerde keySerde; private final LimitedTemporaryStorage temporaryStorage; private final ObjectMapper spillMapper; @@ -97,8 +100,7 @@ public SpillingGrouper( LimitedBufferHashGrouper limitGrouper = new LimitedBufferHashGrouper<>( bufferSupplier, keySerde, - columnSelectorFactory, - aggregatorFactories, + AggregatorAdapters.factorizeBuffered(columnSelectorFactory, Arrays.asList(aggregatorFactories)), bufferGrouperMaxSize, bufferGrouperMaxLoadFactor, bufferGrouperInitialBuckets, @@ -120,8 +122,7 @@ public SpillingGrouper( this.grouper = new BufferHashGrouper<>( bufferSupplier, keySerde, - columnSelectorFactory, - aggregatorFactories, + AggregatorAdapters.factorizeBuffered(columnSelectorFactory, Arrays.asList(aggregatorFactories)), bufferGrouperMaxSize, bufferGrouperMaxLoadFactor, bufferGrouperInitialBuckets, @@ -134,8 +135,7 @@ public SpillingGrouper( this.grouper = new BufferHashGrouper<>( bufferSupplier, keySerde, - columnSelectorFactory, - aggregatorFactories, + AggregatorAdapters.factorizeBuffered(columnSelectorFactory, Arrays.asList(aggregatorFactories)), bufferGrouperMaxSize, bufferGrouperMaxLoadFactor, bufferGrouperInitialBuckets, @@ -169,6 +169,9 @@ public AggregateResult aggregate(KeyType key, int keyHash) if (result.isOk() || !spillingAllowed || temporaryStorage.maxSize() <= 0) { return result; } else { + // Expecting all-or-nothing behavior. + assert result.getCount() == 0; + // Warning: this can potentially block up a processing thread for a while. try { spill(); diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/VectorGrouper.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/VectorGrouper.java new file mode 100644 index 000000000000..e802a637006d --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/VectorGrouper.java @@ -0,0 +1,77 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.groupby.epinephelinae; + +import org.apache.druid.java.util.common.parsers.CloseableIterator; + +import java.io.Closeable; +import java.nio.ByteBuffer; + +/** + * Like a {@link Grouper}, but vectorized. Keys are always int arrays, so there is no generic type parameter KeyType. + *

+ * This interface is designed such that an implementation can implement both Grouper and VectorGrouper. Of course, + * it would generally only make sense for a particular instance to be called with one set of functionality or the + * other. + */ +public interface VectorGrouper extends Closeable +{ + /** + * Initialize the grouper. This method needs to be called before calling {@link #aggregateVector(int[], int)}. + */ + void initVectorized(int maxVectorSize); + + /** + * Aggregate the current vector of rows from "startVectorOffset" to "endVectorOffset" using the provided keys. + * + * @param keySpace array holding keys, chunked into ints. First (endVectorOffset - startVectorOffset) keys + * must be valid. + * @param startRow row to start at (inclusive). + * @param endRow row to end at (exclusive). + * + * @return result that indicates how many keys were aggregated (may be partial due to resource limits) + */ + AggregateResult aggregateVector(int[] keySpace, int startRow, int endRow); + + /** + * Reset the grouper to its initial state. + */ + void reset(); + + /** + * Close the grouper and release associated resources. + */ + @Override + void close(); + + /** + * Iterate through entries. + *

+ * Some implementations allow writes even after this method is called. After you are done with the iterator + * returned by this method, you should either call {@link #close()} (if you are done with the VectorGrouper) or + * {@link #reset()} (if you want to reuse it). + *

+ * Callers must process and discard the returned {@link Grouper.Entry}s immediately, because the keys may + * be reused. + * + * @return entry iterator + */ + CloseableIterator> iterator(); +} diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/DoubleGroupByVectorColumnSelector.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/DoubleGroupByVectorColumnSelector.java new file mode 100644 index 000000000000..2802e3a8aec8 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/DoubleGroupByVectorColumnSelector.java @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.groupby.epinephelinae.vector; + +import org.apache.druid.segment.vector.VectorValueSelector; + +import java.nio.ByteBuffer; +import java.util.Map; + +public class DoubleGroupByVectorColumnSelector implements GroupByVectorColumnSelector +{ + private final VectorValueSelector selector; + + DoubleGroupByVectorColumnSelector(final VectorValueSelector selector) + { + this.selector = selector; + } + + @Override + public int getGroupingKeySize() + { + return 2; + } + + @Override + public void writeKeys( + final int[] keySpace, + final int keySize, + final int keyOffset, + final int startRow, + final int endRow + ) + { + final double[] vector = selector.getDoubleVector(); + + for (int i = startRow, j = keyOffset; i < endRow; i++, j += keySize) { + final long longValue = Double.doubleToLongBits(vector[i]); + keySpace[j] = (int) (longValue >>> 32); + keySpace[j + 1] = (int) (longValue & 0xffffffffL); + } + } + + @Override + public void writeKeyToResultRow( + final String outputName, + final ByteBuffer keyBuffer, + final int keyOffset, + final Map resultMap + ) + { + final double value = keyBuffer.getDouble(keyOffset * Integer.BYTES); + resultMap.put(outputName, value); + } +} diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/FloatGroupByVectorColumnSelector.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/FloatGroupByVectorColumnSelector.java new file mode 100644 index 000000000000..5adbdb1f14ff --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/FloatGroupByVectorColumnSelector.java @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.groupby.epinephelinae.vector; + +import org.apache.druid.segment.vector.VectorValueSelector; + +import java.nio.ByteBuffer; +import java.util.Map; + +public class FloatGroupByVectorColumnSelector implements GroupByVectorColumnSelector +{ + private final VectorValueSelector selector; + + FloatGroupByVectorColumnSelector(final VectorValueSelector selector) + { + this.selector = selector; + } + + @Override + public int getGroupingKeySize() + { + return 1; + } + + @Override + public void writeKeys( + final int[] keySpace, + final int keySize, + final int keyOffset, + final int startRow, + final int endRow + ) + { + final float[] vector = selector.getFloatVector(); + + for (int i = startRow, j = keyOffset; i < endRow; i++, j += keySize) { + keySpace[j] = Float.floatToIntBits(vector[i]); + } + } + + @Override + public void writeKeyToResultRow( + final String outputName, + final ByteBuffer keyBuffer, + final int keyOffset, + final Map resultMap + ) + { + final float value = Float.intBitsToFloat(keyBuffer.getInt(keyOffset * Integer.BYTES)); + resultMap.put(outputName, value); + } +} diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/GroupByVectorColumnSelector.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/GroupByVectorColumnSelector.java new file mode 100644 index 000000000000..3cc415368eb9 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/GroupByVectorColumnSelector.java @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.groupby.epinephelinae.vector; + +import java.nio.ByteBuffer; +import java.util.Map; + +public interface GroupByVectorColumnSelector +{ + int getGroupingKeySize(); + + void writeKeys(int[] keySpace, int keySize, int keyOffset, int startRow, int endRow); + + void writeKeyToResultRow( + String outputName, + ByteBuffer keyBuffer, + int keyOffset, + Map resultMap + ); +} diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/GroupByVectorColumnStrategizer.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/GroupByVectorColumnStrategizer.java new file mode 100644 index 000000000000..c14041cdeae4 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/GroupByVectorColumnStrategizer.java @@ -0,0 +1,70 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.groupby.epinephelinae.vector; + +import org.apache.druid.query.dimension.VectorColumnStrategizer; +import org.apache.druid.segment.vector.MultiValueDimensionVectorSelector; +import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector; +import org.apache.druid.segment.vector.VectorValueSelector; + +public class GroupByVectorColumnStrategizer implements VectorColumnStrategizer +{ + private static final GroupByVectorColumnStrategizer INSTANCE = new GroupByVectorColumnStrategizer(); + + private GroupByVectorColumnStrategizer() + { + // Singleton. + } + + public static GroupByVectorColumnStrategizer instance() + { + return INSTANCE; + } + + @Override + public GroupByVectorColumnSelector makeSingleValueDimensionStrategy(final SingleValueDimensionVectorSelector selector) + { + return new SingleValueStringGroupByVectorColumnSelector(selector); + } + + @Override + public GroupByVectorColumnSelector makeMultiValueDimensionStrategy(final MultiValueDimensionVectorSelector selector) + { + throw new UnsupportedOperationException("Multi-value dimensions not yet implemented for vectorized groupBys"); + } + + @Override + public GroupByVectorColumnSelector makeFloatStrategy(final VectorValueSelector selector) + { + return new FloatGroupByVectorColumnSelector(selector); + } + + @Override + public GroupByVectorColumnSelector makeDoubleStrategy(final VectorValueSelector selector) + { + return new DoubleGroupByVectorColumnSelector(selector); + } + + @Override + public GroupByVectorColumnSelector makeLongStrategy(final VectorValueSelector selector) + { + return new LongGroupByVectorColumnSelector(selector); + } +} diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/LongGroupByVectorColumnSelector.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/LongGroupByVectorColumnSelector.java new file mode 100644 index 000000000000..6ddbd99b4e84 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/LongGroupByVectorColumnSelector.java @@ -0,0 +1,70 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.groupby.epinephelinae.vector; + +import org.apache.druid.segment.vector.VectorValueSelector; + +import java.nio.ByteBuffer; +import java.util.Map; + +public class LongGroupByVectorColumnSelector implements GroupByVectorColumnSelector +{ + private final VectorValueSelector selector; + + LongGroupByVectorColumnSelector(final VectorValueSelector selector) + { + this.selector = selector; + } + + @Override + public int getGroupingKeySize() + { + return 2; + } + + @Override + public void writeKeys( + final int[] keySpace, + final int keySize, + final int keyOffset, + final int startRow, + final int endRow + ) + { + final long[] vector = selector.getLongVector(); + + for (int i = startRow, j = keyOffset; i < endRow; i++, j += keySize) { + keySpace[j] = (int) (vector[i] >>> 32); + keySpace[j + 1] = (int) (vector[i] & 0xffffffffL); + } + } + + @Override + public void writeKeyToResultRow( + final String outputName, + final ByteBuffer keyBuffer, + final int keyOffset, + final Map resultMap + ) + { + final long value = keyBuffer.getLong(keyOffset * Integer.BYTES); + resultMap.put(outputName, value); + } +} diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/SingleValueStringGroupByVectorColumnSelector.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/SingleValueStringGroupByVectorColumnSelector.java new file mode 100644 index 000000000000..6a9b4289821f --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/SingleValueStringGroupByVectorColumnSelector.java @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.groupby.epinephelinae.vector; + +import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector; + +import java.nio.ByteBuffer; +import java.util.Map; + +public class SingleValueStringGroupByVectorColumnSelector implements GroupByVectorColumnSelector +{ + private final SingleValueDimensionVectorSelector selector; + + SingleValueStringGroupByVectorColumnSelector(final SingleValueDimensionVectorSelector selector) + { + this.selector = selector; + } + + @Override + public int getGroupingKeySize() + { + return 1; + } + + @Override + public void writeKeys( + final int[] keySpace, + final int keySize, + final int keyOffset, + final int startRow, + final int endRow + ) + { + final int[] rowVector = selector.getRowVector(); + + for (int i = startRow, j = keyOffset; i < endRow; i++, j += keySize) { + keySpace[j] = rowVector[i]; + } + } + + @Override + public void writeKeyToResultRow( + final String outputName, + final ByteBuffer keyBuffer, + final int keyOffset, + final Map resultMap + ) + { + final int id = keyBuffer.getInt(keyOffset * Integer.BYTES); + resultMap.put(outputName, selector.lookupName(id)); + } +} diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/VectorGroupByEngine.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/VectorGroupByEngine.java new file mode 100644 index 000000000000..98d9a465358b --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/VectorGroupByEngine.java @@ -0,0 +1,428 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.groupby.epinephelinae.vector; + +import com.google.common.base.Suppliers; +import org.apache.druid.data.input.MapBasedRow; +import org.apache.druid.data.input.Row; +import org.apache.druid.java.util.common.ISE; +import org.apache.druid.java.util.common.guava.BaseSequence; +import org.apache.druid.java.util.common.guava.Sequence; +import org.apache.druid.java.util.common.parsers.CloseableIterator; +import org.apache.druid.query.QueryContexts; +import org.apache.druid.query.aggregation.AggregatorAdapters; +import org.apache.druid.query.aggregation.AggregatorFactory; +import org.apache.druid.query.dimension.DimensionSpec; +import org.apache.druid.query.filter.Filter; +import org.apache.druid.query.groupby.GroupByQuery; +import org.apache.druid.query.groupby.GroupByQueryConfig; +import org.apache.druid.query.groupby.epinephelinae.AggregateResult; +import org.apache.druid.query.groupby.epinephelinae.BufferArrayGrouper; +import org.apache.druid.query.groupby.epinephelinae.BufferHashGrouper; +import org.apache.druid.query.groupby.epinephelinae.ByteBufferKeySerde; +import org.apache.druid.query.groupby.epinephelinae.CloseableGrouperIterator; +import org.apache.druid.query.groupby.epinephelinae.GroupByQueryEngineV2; +import org.apache.druid.query.groupby.epinephelinae.Grouper; +import org.apache.druid.query.groupby.epinephelinae.VectorGrouper; +import org.apache.druid.query.vector.VectorCursorGranularizer; +import org.apache.druid.segment.DimensionHandlerUtils; +import org.apache.druid.segment.StorageAdapter; +import org.apache.druid.segment.filter.Filters; +import org.apache.druid.segment.vector.VectorColumnSelectorFactory; +import org.apache.druid.segment.vector.VectorCursor; +import org.joda.time.DateTime; +import org.joda.time.Interval; + +import javax.annotation.Nullable; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.Collections; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.NoSuchElementException; +import java.util.stream.Collectors; + +public class VectorGroupByEngine +{ + private VectorGroupByEngine() + { + // No instantiation. + } + + public static boolean canVectorize( + final GroupByQuery query, + final StorageAdapter adapter, + @Nullable final Filter filter, + final Interval interval + ) + { + // Not yet supported: + // 1) Granularities other than ALL. + // 2) Multi-value dimensions. + return GroupByQueryEngineV2.isAllSingleValueDims(adapter, query.getDimensions()) + && query.getDimensions().stream().allMatch(DimensionSpec::canVectorize) + && query.getAggregatorSpecs().stream().allMatch(AggregatorFactory::canVectorize) + && adapter.canVectorize(filter, interval, query.getVirtualColumns(), false); + } + + public static Sequence process( + final GroupByQuery query, + final StorageAdapter storageAdapter, + final ByteBuffer processingBuffer, + @Nullable final DateTime fudgeTimestamp, + @Nullable final Filter filter, + final Interval interval, + final GroupByQueryConfig config + ) + { + if (!canVectorize(query, storageAdapter, filter, interval)) { + throw new ISE("Cannot vectorize"); + } + + return new BaseSequence<>( + new BaseSequence.IteratorMaker>() + { + @Override + public CloseableIterator make() + { + final VectorCursor cursor = storageAdapter.makeVectorCursor( + Filters.toFilter(query.getDimFilter()), + interval, + query.getVirtualColumns(), + false, + QueryContexts.getVectorSize(query), + null + ); + + if (cursor == null) { + // Return empty iterator. + return new CloseableIterator() + { + @Override + public boolean hasNext() + { + return false; + } + + @Override + public Row next() + { + throw new NoSuchElementException(); + } + + @Override + public void close() + { + // Nothing to do. + } + }; + } + + try { + final VectorColumnSelectorFactory columnSelectorFactory = cursor.getColumnSelectorFactory(); + final List dimensions = query.getDimensions().stream().map( + dimensionSpec -> + DimensionHandlerUtils.makeVectorProcessor( + dimensionSpec, + GroupByVectorColumnStrategizer.instance(), + columnSelectorFactory + ) + ).collect(Collectors.toList()); + + return new VectorGroupByEngineIterator( + query, + config, + storageAdapter, + cursor, + interval, + dimensions, + processingBuffer, + fudgeTimestamp + ); + } + catch (Throwable e) { + try { + cursor.close(); + } + catch (Throwable e2) { + e.addSuppressed(e2); + } + throw e; + } + } + + @Override + public void cleanup(CloseableIterator iterFromMake) + { + try { + iterFromMake.close(); + } + catch (IOException e) { + throw new RuntimeException(e); + } + } + } + ); + } + + private static class VectorGroupByEngineIterator implements CloseableIterator + { + private final GroupByQuery query; + private final GroupByQueryConfig querySpecificConfig; + private final StorageAdapter storageAdapter; + private final VectorCursor cursor; + private final List selectors; + private final ByteBuffer processingBuffer; + private final DateTime fudgeTimestamp; + private final int keySize; + private final int[] keySpace; + private final Grouper.KeySerde keySerde; + private final VectorGrouper vectorGrouper; + + @Nullable + private final VectorCursorGranularizer granulizer; + + // Granularity-bucket iterator and current bucket. + private final Iterator bucketIterator; + + @Nullable + private Interval bucketInterval; + + private int partiallyAggregatedRows = -1; + + @Nullable + private CloseableGrouperIterator delegate = null; + + VectorGroupByEngineIterator( + final GroupByQuery query, + final GroupByQueryConfig config, + final StorageAdapter storageAdapter, + final VectorCursor cursor, + final Interval cursorInterval, + final List selectors, + final ByteBuffer processingBuffer, + @Nullable final DateTime fudgeTimestamp + ) + { + this.query = query; + this.querySpecificConfig = config; + this.storageAdapter = storageAdapter; + this.cursor = cursor; + this.selectors = selectors; + this.processingBuffer = processingBuffer; + this.fudgeTimestamp = fudgeTimestamp; + this.keySize = selectors.stream().mapToInt(GroupByVectorColumnSelector::getGroupingKeySize).sum(); + this.keySpace = new int[keySize * cursor.getMaxVectorSize()]; + this.keySerde = new ByteBufferKeySerde(keySize * Integer.BYTES); + this.vectorGrouper = makeGrouper(); + this.granulizer = VectorCursorGranularizer.create(storageAdapter, cursor, query.getGranularity(), cursorInterval); + + if (granulizer != null) { + this.bucketIterator = granulizer.getBucketIterable().iterator(); + } else { + this.bucketIterator = Collections.emptyIterator(); + } + + this.bucketInterval = this.bucketIterator.hasNext() ? this.bucketIterator.next() : null; + } + + @Override + public Row next() + { + if (delegate == null || !delegate.hasNext()) { + throw new NoSuchElementException(); + } + + return delegate.next(); + } + + @Override + public boolean hasNext() + { + if (delegate != null && delegate.hasNext()) { + return true; + } else { + final boolean moreToRead = !cursor.isDone() || partiallyAggregatedRows >= 0; + + if (bucketInterval != null && moreToRead) { + while (delegate == null || !delegate.hasNext()) { + if (delegate != null) { + delegate.close(); + vectorGrouper.reset(); + } + + delegate = initNewDelegate(); + } + return true; + } else { + return false; + } + } + } + + @Override + public void remove() + { + throw new UnsupportedOperationException(); + } + + @Override + public void close() + { + cursor.close(); + + if (delegate != null) { + delegate.close(); + } + } + + private VectorGrouper makeGrouper() + { + final VectorGrouper grouper; + + final int cardinalityForArrayAggregation = GroupByQueryEngineV2.getCardinalityForArrayAggregation( + querySpecificConfig, + query, + storageAdapter, + processingBuffer + ); + + if (cardinalityForArrayAggregation >= 0) { + grouper = new BufferArrayGrouper( + Suppliers.ofInstance(processingBuffer), + AggregatorAdapters.factorizeVector( + cursor.getColumnSelectorFactory(), + query.getAggregatorSpecs() + ), + cardinalityForArrayAggregation + ); + } else { + grouper = new BufferHashGrouper<>( + Suppliers.ofInstance(processingBuffer), + keySerde, + AggregatorAdapters.factorizeVector( + cursor.getColumnSelectorFactory(), + query.getAggregatorSpecs() + ), + querySpecificConfig.getBufferGrouperMaxSize(), + querySpecificConfig.getBufferGrouperMaxLoadFactor(), + querySpecificConfig.getBufferGrouperInitialBuckets(), + true + ); + } + + grouper.initVectorized(cursor.getMaxVectorSize()); + + return grouper; + } + + private CloseableGrouperIterator initNewDelegate() + { + // Method must not be called unless there's a current bucketInterval. + assert bucketInterval != null; + + final DateTime timestamp = fudgeTimestamp != null + ? fudgeTimestamp + : query.getGranularity().toDateTime(bucketInterval.getStartMillis()); + + while (!cursor.isDone()) { + final int startOffset; + + if (partiallyAggregatedRows < 0) { + granulizer.setCurrentOffsets(bucketInterval); + startOffset = granulizer.getStartOffset(); + } else { + startOffset = granulizer.getStartOffset() + partiallyAggregatedRows; + } + + if (granulizer.getEndOffset() > startOffset) { + // Write keys to the keySpace. + int keyOffset = 0; + for (final GroupByVectorColumnSelector selector : selectors) { + selector.writeKeys(keySpace, keySize, keyOffset, startOffset, granulizer.getEndOffset()); + keyOffset += selector.getGroupingKeySize(); + } + + // Aggregate this vector. + final AggregateResult result = vectorGrouper.aggregateVector( + keySpace, + startOffset, + granulizer.getEndOffset() + ); + + if (result.isOk()) { + partiallyAggregatedRows = -1; + } else { + if (partiallyAggregatedRows < 0) { + partiallyAggregatedRows = result.getCount(); + } else { + partiallyAggregatedRows += result.getCount(); + } + } + } else { + partiallyAggregatedRows = -1; + } + + if (partiallyAggregatedRows >= 0) { + break; + } else if (!granulizer.advanceCursorWithinBucket()) { + // Advance bucketInterval. + bucketInterval = bucketIterator.hasNext() ? bucketIterator.next() : null; + break; + } + } + + return new CloseableGrouperIterator<>( + vectorGrouper.iterator(), + entry -> { + Map theMap = new LinkedHashMap<>(); + + // Add dimensions. + int keyOffset = 0; + for (int i = 0; i < selectors.size(); i++) { + final GroupByVectorColumnSelector selector = selectors.get(i); + + selector.writeKeyToResultRow( + query.getDimensions().get(i).getOutputName(), + entry.getKey(), + keyOffset, + theMap + ); + + keyOffset += selector.getGroupingKeySize(); + } + + // Convert dimension values to desired output types, possibly. + GroupByQueryEngineV2.convertRowTypesToOutputTypes(query.getDimensions(), theMap); + + // Add aggregations. + for (int i = 0; i < entry.getValues().length; i++) { + theMap.put(query.getAggregatorSpecs().get(i).getName(), entry.getValues()[i]); + } + + return new MapBasedRow(timestamp, theMap); + }, + vectorGrouper + ); + } + } +} diff --git a/processing/src/main/java/org/apache/druid/query/search/DefaultSearchQueryMetrics.java b/processing/src/main/java/org/apache/druid/query/search/DefaultSearchQueryMetrics.java index 3f7811335e1c..cf93988f80c3 100644 --- a/processing/src/main/java/org/apache/druid/query/search/DefaultSearchQueryMetrics.java +++ b/processing/src/main/java/org/apache/druid/query/search/DefaultSearchQueryMetrics.java @@ -154,6 +154,12 @@ public void identity(String identity) delegateQueryMetrics.identity(identity); } + @Override + public void vectorized(final boolean vectorized) + { + delegateQueryMetrics.vectorized(vectorized); + } + @Override public BitmapResultFactory makeBitmapResultFactory(BitmapFactory factory) { diff --git a/processing/src/main/java/org/apache/druid/query/select/DefaultSelectQueryMetrics.java b/processing/src/main/java/org/apache/druid/query/select/DefaultSelectQueryMetrics.java index 83cad77fecf7..dbd57aa6dec3 100644 --- a/processing/src/main/java/org/apache/druid/query/select/DefaultSelectQueryMetrics.java +++ b/processing/src/main/java/org/apache/druid/query/select/DefaultSelectQueryMetrics.java @@ -153,6 +153,12 @@ public void identity(String identity) delegateQueryMetrics.identity(identity); } + @Override + public void vectorized(final boolean vectorized) + { + delegateQueryMetrics.vectorized(vectorized); + } + @Override public BitmapResultFactory makeBitmapResultFactory(BitmapFactory factory) { diff --git a/processing/src/main/java/org/apache/druid/query/timeseries/TimeseriesQueryEngine.java b/processing/src/main/java/org/apache/druid/query/timeseries/TimeseriesQueryEngine.java index 0de74bbcbbf9..d24822acc631 100644 --- a/processing/src/main/java/org/apache/druid/query/timeseries/TimeseriesQueryEngine.java +++ b/processing/src/main/java/org/apache/druid/query/timeseries/TimeseriesQueryEngine.java @@ -19,24 +19,58 @@ package org.apache.druid.query.timeseries; -import com.google.common.base.Function; +import com.google.common.collect.Iterables; +import com.google.inject.Inject; +import org.apache.druid.collections.NonBlockingPool; +import org.apache.druid.collections.ResourceHolder; +import org.apache.druid.collections.StupidPool; +import org.apache.druid.guice.annotations.Global; +import org.apache.druid.java.util.common.ISE; +import org.apache.druid.java.util.common.granularity.Granularity; import org.apache.druid.java.util.common.guava.Sequence; +import org.apache.druid.java.util.common.guava.Sequences; +import org.apache.druid.java.util.common.io.Closer; +import org.apache.druid.query.QueryContexts; import org.apache.druid.query.QueryRunnerHelper; import org.apache.druid.query.Result; import org.apache.druid.query.aggregation.Aggregator; +import org.apache.druid.query.aggregation.AggregatorAdapters; import org.apache.druid.query.aggregation.AggregatorFactory; import org.apache.druid.query.filter.Filter; -import org.apache.druid.segment.Cursor; +import org.apache.druid.query.vector.VectorCursorGranularizer; import org.apache.druid.segment.SegmentMissingException; import org.apache.druid.segment.StorageAdapter; import org.apache.druid.segment.filter.Filters; +import org.apache.druid.segment.vector.VectorColumnSelectorFactory; +import org.apache.druid.segment.vector.VectorCursor; +import org.joda.time.Interval; +import javax.annotation.Nullable; +import java.nio.ByteBuffer; +import java.util.Collections; import java.util.List; +import java.util.Objects; /** */ public class TimeseriesQueryEngine { + private final NonBlockingPool bufferPool; + + /** + * Constructor for tests. In production, the @Inject constructor is used instead. + */ + public TimeseriesQueryEngine() + { + this.bufferPool = new StupidPool<>("dummy", () -> ByteBuffer.allocate(1000000)); + } + + @Inject + public TimeseriesQueryEngine(final @Global NonBlockingPool bufferPool) + { + this.bufferPool = bufferPool; + } + public Sequence> process(final TimeseriesQuery query, final StorageAdapter adapter) { if (adapter == null) { @@ -45,65 +79,210 @@ public Sequence> process(final TimeseriesQuery que ); } - final Filter filter = Filters.convertToCNFFromQueryContext(query, Filters.toFilter(query.getDimensionsFilter())); + final Filter filter = Filters.convertToCNFFromQueryContext(query, Filters.toFilter(query.getFilter())); + final Interval interval = Iterables.getOnlyElement(query.getIntervals()); + final Granularity gran = query.getGranularity(); + final boolean descending = query.isDescending(); + + final boolean doVectorize = QueryContexts.getVectorize(query).shouldVectorize( + adapter.canVectorize(filter, interval, query.getVirtualColumns(), descending) + && query.getAggregatorSpecs().stream().allMatch(AggregatorFactory::canVectorize) + ); + + final Sequence> result; + + if (doVectorize) { + result = processVectorized(query, adapter, filter, interval, gran, descending); + } else { + result = processNonVectorized(query, adapter, filter, interval, gran, descending); + } + final int limit = query.getLimit(); - Sequence> result = generateTimeseriesResult(adapter, query, filter); if (limit < Integer.MAX_VALUE) { return result.limit(limit); + } else { + return result; } - return result; } - private Sequence> generateTimeseriesResult(StorageAdapter adapter, TimeseriesQuery query, Filter filter) + private Sequence> processVectorized( + final TimeseriesQuery query, + final StorageAdapter adapter, + @Nullable final Filter filter, + final Interval interval, + final Granularity gran, + final boolean descending + ) { + final boolean skipEmptyBuckets = query.isSkipEmptyBuckets(); + final List aggregatorSpecs = query.getAggregatorSpecs(); + + final VectorCursor cursor = adapter.makeVectorCursor( + filter, + interval, + query.getVirtualColumns(), + descending, + QueryContexts.getVectorSize(query), + null + ); + + if (cursor == null) { + return Sequences.empty(); + } + + final Closer closer = Closer.create(); + closer.register(cursor); + + try { + final VectorCursorGranularizer granularizer = VectorCursorGranularizer.create( + adapter, + cursor, + gran, + interval + ); + + if (granularizer == null) { + return Sequences.empty(); + } + + final VectorColumnSelectorFactory columnSelectorFactory = cursor.getColumnSelectorFactory(); + final AggregatorAdapters aggregators = closer.register( + AggregatorAdapters.factorizeVector(columnSelectorFactory, query.getAggregatorSpecs()) + ); + + final ResourceHolder bufferHolder = closer.register(bufferPool.take()); + + final ByteBuffer buffer = bufferHolder.get(); + + if (aggregators.spaceNeeded() > buffer.remaining()) { + throw new ISE( + "Not enough space for aggregators, needed [%,d] bytes but have only [%,d].", + aggregators.spaceNeeded(), + buffer.remaining() + ); + } + + return Sequences.withBaggage( + Sequences + .simple(granularizer.getBucketIterable()) + .map( + bucketInterval -> { + // Whether or not the current bucket is empty + boolean emptyBucket = true; + + while (!cursor.isDone()) { + granularizer.setCurrentOffsets(bucketInterval); + + if (granularizer.getEndOffset() > granularizer.getStartOffset()) { + if (emptyBucket) { + aggregators.init(buffer, 0); + } + + aggregators.aggregateVector( + buffer, + 0, + granularizer.getStartOffset(), + granularizer.getEndOffset() + ); + + emptyBucket = false; + } + + if (!granularizer.advanceCursorWithinBucket()) { + break; + } + } + + if (emptyBucket && skipEmptyBuckets) { + // Return null, will get filtered out later by the Objects::nonNull filter. + return null; + } + + final TimeseriesResultBuilder bob = new TimeseriesResultBuilder( + gran.toDateTime(bucketInterval.getStartMillis()) + ); + + if (emptyBucket) { + aggregators.init(buffer, 0); + } + + for (int i = 0; i < aggregatorSpecs.size(); i++) { + bob.addMetric( + aggregatorSpecs.get(i).getName(), + aggregators.get(buffer, 0, i) + ); + } + + return bob.build(); + } + ) + .filter(Objects::nonNull), + closer + ); + } + catch (Throwable t1) { + try { + closer.close(); + } + catch (Throwable t2) { + t1.addSuppressed(t2); + } + throw t1; + } + } + + private Sequence> processNonVectorized( + final TimeseriesQuery query, + final StorageAdapter adapter, + @Nullable final Filter filter, + final Interval interval, + final Granularity gran, + final boolean descending + ) + { + final boolean skipEmptyBuckets = query.isSkipEmptyBuckets(); + final List aggregatorSpecs = query.getAggregatorSpecs(); + return QueryRunnerHelper.makeCursorBasedQuery( adapter, - query.getQuerySegmentSpec().getIntervals(), + Collections.singletonList(interval), filter, query.getVirtualColumns(), - query.isDescending(), - query.getGranularity(), - new Function>() - { - private final boolean skipEmptyBuckets = query.isSkipEmptyBuckets(); - private final List aggregatorSpecs = query.getAggregatorSpecs(); - - @Override - public Result apply(Cursor cursor) - { - if (skipEmptyBuckets && cursor.isDone()) { - return null; - } + descending, + gran, + cursor -> { + if (skipEmptyBuckets && cursor.isDone()) { + return null; + } - Aggregator[] aggregators = new Aggregator[aggregatorSpecs.size()]; - String[] aggregatorNames = new String[aggregatorSpecs.size()]; + Aggregator[] aggregators = new Aggregator[aggregatorSpecs.size()]; + String[] aggregatorNames = new String[aggregatorSpecs.size()]; - for (int i = 0; i < aggregatorSpecs.size(); i++) { - aggregators[i] = aggregatorSpecs.get(i).factorize(cursor.getColumnSelectorFactory()); - aggregatorNames[i] = aggregatorSpecs.get(i).getName(); - } + for (int i = 0; i < aggregatorSpecs.size(); i++) { + aggregators[i] = aggregatorSpecs.get(i).factorize(cursor.getColumnSelectorFactory()); + aggregatorNames[i] = aggregatorSpecs.get(i).getName(); + } - try { - while (!cursor.isDone()) { - for (Aggregator aggregator : aggregators) { - aggregator.aggregate(); - } - cursor.advance(); + try { + while (!cursor.isDone()) { + for (Aggregator aggregator : aggregators) { + aggregator.aggregate(); } - TimeseriesResultBuilder bob = new TimeseriesResultBuilder(cursor.getTime()); + cursor.advance(); + } - for (int i = 0; i < aggregatorSpecs.size(); i++) { - bob.addMetric(aggregatorNames[i], aggregators[i]); - } + TimeseriesResultBuilder bob = new TimeseriesResultBuilder(cursor.getTime()); - Result retVal = bob.build(); - return retVal; + for (int i = 0; i < aggregatorSpecs.size(); i++) { + bob.addMetric(aggregatorNames[i], aggregators[i].get()); } - finally { - // cleanup - for (Aggregator agg : aggregators) { - agg.close(); - } + + return bob.build(); + } + finally { + // cleanup + for (Aggregator agg : aggregators) { + agg.close(); } } } diff --git a/processing/src/main/java/org/apache/druid/query/timeseries/TimeseriesQueryQueryToolChest.java b/processing/src/main/java/org/apache/druid/query/timeseries/TimeseriesQueryQueryToolChest.java index 479fbf0c2d1b..ed2fe7d8ef4a 100644 --- a/processing/src/main/java/org/apache/druid/query/timeseries/TimeseriesQueryQueryToolChest.java +++ b/processing/src/main/java/org/apache/druid/query/timeseries/TimeseriesQueryQueryToolChest.java @@ -227,7 +227,7 @@ private Result getNullTimeseriesResultValue(TimeseriesQue final DateTime start = query.getIntervals().isEmpty() ? DateTimes.EPOCH : query.getIntervals().get(0).getStart(); TimeseriesResultBuilder bob = new TimeseriesResultBuilder(start); for (int i = 0; i < aggregatorSpecs.size(); i++) { - bob.addMetric(aggregatorNames[i], aggregators[i]); + bob.addMetric(aggregatorNames[i], aggregators[i].get()); aggregators[i].close(); } return bob.build(); diff --git a/processing/src/main/java/org/apache/druid/query/timeseries/TimeseriesResultBuilder.java b/processing/src/main/java/org/apache/druid/query/timeseries/TimeseriesResultBuilder.java index bd389cddaf81..2332f53c7b45 100644 --- a/processing/src/main/java/org/apache/druid/query/timeseries/TimeseriesResultBuilder.java +++ b/processing/src/main/java/org/apache/druid/query/timeseries/TimeseriesResultBuilder.java @@ -20,7 +20,6 @@ package org.apache.druid.query.timeseries; import org.apache.druid.query.Result; -import org.apache.druid.query.aggregation.Aggregator; import org.joda.time.DateTime; import java.util.HashMap; @@ -41,15 +40,15 @@ public TimeseriesResultBuilder( this.timestamp = timestamp; } - public TimeseriesResultBuilder addMetric(String name, Aggregator aggregator) + public TimeseriesResultBuilder addMetric(String name, Object value) { - metricValues.put(name, aggregator.get()); + metricValues.put(name, value); return this; } public Result build() { - return new Result( + return new Result<>( timestamp, new TimeseriesResultValue(metricValues) ); diff --git a/processing/src/main/java/org/apache/druid/query/vector/VectorCursorGranularizer.java b/processing/src/main/java/org/apache/druid/query/vector/VectorCursorGranularizer.java new file mode 100644 index 000000000000..987a993be578 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/vector/VectorCursorGranularizer.java @@ -0,0 +1,171 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.vector; + +import com.google.common.collect.Iterables; +import org.apache.druid.java.util.common.granularity.Granularity; +import org.apache.druid.segment.StorageAdapter; +import org.apache.druid.segment.column.ColumnHolder; +import org.apache.druid.segment.vector.VectorCursor; +import org.apache.druid.segment.vector.VectorValueSelector; +import org.joda.time.DateTime; +import org.joda.time.Interval; + +import javax.annotation.Nullable; + +/** + * Class that helps vectorized query engines handle "granularity" parameters. Nonvectorized engines have it handled + * for them by the StorageAdapter. Vectorized engines don't, because they can get efficiency gains by pushing + * granularity handling into the engine layer. + */ +public class VectorCursorGranularizer +{ + // And a cursor that has been made from it. + private final VectorCursor cursor; + + // Iterable that iterates over time buckets. + private final Iterable bucketIterable; + + // Vector selector for the "__time" column. + @Nullable + private final VectorValueSelector timeSelector; + + // Current time vector. + @Nullable + private long[] timestamps = null; + + // Offset into the vector that we should start reading from. + private int startOffset = 0; + + // Offset into the vector that is one past the last one we should read. + private int endOffset = 0; + + private VectorCursorGranularizer( + VectorCursor cursor, + Iterable bucketIterable, + @Nullable VectorValueSelector timeSelector + ) + { + this.cursor = cursor; + this.bucketIterable = bucketIterable; + this.timeSelector = timeSelector; + } + + @Nullable + public static VectorCursorGranularizer create( + final StorageAdapter storageAdapter, + final VectorCursor cursor, + final Granularity granularity, + final Interval cursorInterval + ) + { + final DateTime minTime = storageAdapter.getMinTime(); + final DateTime maxTime = storageAdapter.getMaxTime(); + final Interval actualInterval = cursorInterval.overlap(new Interval(minTime, granularity.bucketEnd(maxTime))); + + if (actualInterval == null) { + return null; + } + + final Iterable bucketIterable = granularity.getIterable(actualInterval); + final Interval firstBucket = granularity.bucket(actualInterval.getStart()); + + final VectorValueSelector timeSelector; + if (firstBucket.contains(actualInterval)) { + // Only one bucket, no need to read the time column. + assert Iterables.size(bucketIterable) == 1; + timeSelector = null; + } else { + // Multiple buckets, need to read the time column to know when we move from one to the next. + timeSelector = cursor.getColumnSelectorFactory().makeValueSelector(ColumnHolder.TIME_COLUMN_NAME); + } + + return new VectorCursorGranularizer(cursor, bucketIterable, timeSelector); + } + + public void setCurrentOffsets(final Interval bucketInterval) + { + final long timeStart = bucketInterval.getStartMillis(); + final long timeEnd = bucketInterval.getEndMillis(); + + int vectorSize = cursor.getCurrentVectorSize(); + endOffset = 0; + + if (timeSelector != null) { + if (timestamps == null) { + timestamps = timeSelector.getLongVector(); + } + + // Skip "offset" to start of bucketInterval. + while (startOffset < vectorSize && timestamps[startOffset] < timeStart) { + startOffset++; + } + + // Find end of bucketInterval. + for (endOffset = vectorSize - 1; + endOffset >= startOffset && timestamps[endOffset] >= timeEnd; + endOffset--) { + // nothing needed, "for" is doing the work. + } + + // Adjust: endOffset is now pointing at the last row to aggregate, but we want it + // to be one _past_ the last row. + endOffset++; + } else { + endOffset = vectorSize; + } + } + + /** + * Return true, and advances the cursor, if it can be advanced within the current time bucket. Otherwise, returns + * false and does nothing else. + */ + public boolean advanceCursorWithinBucket() + { + if (endOffset == cursor.getCurrentVectorSize()) { + cursor.advance(); + + if (timeSelector != null && !cursor.isDone()) { + timestamps = timeSelector.getLongVector(); + } + + startOffset = 0; + + return true; + } else { + return false; + } + } + + public Iterable getBucketIterable() + { + return bucketIterable; + } + + public int getStartOffset() + { + return startOffset; + } + + public int getEndOffset() + { + return endOffset; + } +} diff --git a/processing/src/main/java/org/apache/druid/segment/ColumnSelectorFactory.java b/processing/src/main/java/org/apache/druid/segment/ColumnSelectorFactory.java index 3d51c27ee7fa..07e66a672259 100644 --- a/processing/src/main/java/org/apache/druid/segment/ColumnSelectorFactory.java +++ b/processing/src/main/java/org/apache/druid/segment/ColumnSelectorFactory.java @@ -27,6 +27,8 @@ /** * Factory class for MetricSelectors + * + * @see org.apache.druid.segment.vector.VectorColumnSelectorFactory, the vectorized version */ @PublicApi public interface ColumnSelectorFactory diff --git a/processing/src/main/java/org/apache/druid/segment/Cursor.java b/processing/src/main/java/org/apache/druid/segment/Cursor.java index 7964485f5296..645caee895f7 100644 --- a/processing/src/main/java/org/apache/druid/segment/Cursor.java +++ b/processing/src/main/java/org/apache/druid/segment/Cursor.java @@ -23,13 +23,15 @@ /** * Cursor is an interface for iteration over a range of data points, used during query execution. {@link - * QueryableIndexStorageAdapter.QueryableIndexCursor} is an implementation for historical segments, and {@link + * QueryableIndexCursorSequenceBuilder.QueryableIndexCursor} is an implementation for historical segments, and {@link * org.apache.druid.segment.incremental.IncrementalIndexStorageAdapter.IncrementalIndexCursor} is an implementation for {@link * org.apache.druid.segment.incremental.IncrementalIndex}. * * Cursor is conceptually similar to {@link TimeAndDimsPointer}, but the latter is used for historical segment creation * rather than query execution (as Cursor). If those abstractions could be collapsed (and if it is worthwhile) is yet to * be determined. + * + * @see org.apache.druid.segment.vector.VectorCursor, the vectorized version */ public interface Cursor { diff --git a/processing/src/main/java/org/apache/druid/segment/CursorFactory.java b/processing/src/main/java/org/apache/druid/segment/CursorFactory.java index c3e55031702b..6a5d7cf7400d 100644 --- a/processing/src/main/java/org/apache/druid/segment/CursorFactory.java +++ b/processing/src/main/java/org/apache/druid/segment/CursorFactory.java @@ -23,14 +23,37 @@ import org.apache.druid.java.util.common.guava.Sequence; import org.apache.druid.query.QueryMetrics; import org.apache.druid.query.filter.Filter; +import org.apache.druid.segment.vector.VectorCursor; import org.joda.time.Interval; import javax.annotation.Nullable; /** + * Interface extended by {@link StorageAdapter}, which gives them the power to create cursors. + * + * @see StorageAdapter */ public interface CursorFactory { + /** + * Returns true if the provided combination of parameters can be handled by "makeVectorCursor". + * + * Query engines should use this before running in vectorized mode, and be prepared to fall back to non-vectorized + * mode if this method returns false. + */ + default boolean canVectorize( + @Nullable Filter filter, + Interval interval, + VirtualColumns virtualColumns, + boolean descending + ) + { + return false; + } + + /** + * Creates a sequence of Cursors, one for each time-granular bucket (based on the provided Granularity). + */ Sequence makeCursors( @Nullable Filter filter, Interval interval, @@ -39,4 +62,25 @@ Sequence makeCursors( boolean descending, @Nullable QueryMetrics queryMetrics ); + + /** + * Creates a VectorCursor. Unlike the Cursor returned by "makeCursor", there is just one of these. Hence, this method + * does not take a "granularity" parameter. Before calling this method, check "canVectorize" to see if the call you + * are about to make will throw an error or not. + * + * Returns null if there is no data to walk over (for example, if the "interval" does not overlap the data interval + * of this segment). + */ + @Nullable + default VectorCursor makeVectorCursor( + @Nullable Filter filter, + Interval interval, + VirtualColumns virtualColumns, + boolean descending, + int vectorSize, + @Nullable QueryMetrics queryMetrics + ) + { + throw new UnsupportedOperationException("Cannot vectorize. Check 'canVectorize' before calling 'makeVectorCursor'."); + } } diff --git a/processing/src/main/java/org/apache/druid/segment/DimensionDictionarySelector.java b/processing/src/main/java/org/apache/druid/segment/DimensionDictionarySelector.java new file mode 100644 index 000000000000..03783aade0cf --- /dev/null +++ b/processing/src/main/java/org/apache/druid/segment/DimensionDictionarySelector.java @@ -0,0 +1,102 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment; + +import org.apache.druid.query.monomorphicprocessing.CalledFromHotLoop; + +import javax.annotation.Nullable; + +/** + * Interface containing + */ +public interface DimensionDictionarySelector +{ + int CARDINALITY_UNKNOWN = -1; + + /** + * Value cardinality is the cardinality of the different occurring values. If there were 4 rows: + * + * A,B + * A + * B + * A + * + * Value cardinality would be 2. + * + * Cardinality may be unknown (e.g. the selector used by IncrementalIndex while reading input rows), + * in which case this method will return -1. If cardinality is unknown, you should assume this + * dimension selector has no dictionary, and avoid storing ids, calling "lookupId", or calling "lookupName" + * outside of the context of operating on a single row. + * + * @return the value cardinality, or -1 if unknown. + */ + int getValueCardinality(); + + /** + * The Name is the String name of the actual field. It is assumed that storage layers convert names + * into id values which can then be used to get the string value. For example + * + * A,B + * A + * A,B + * B + * + * getRow() would return + * + * getRow(0) => [0 1] + * getRow(1) => [0] + * getRow(2) => [0 1] + * getRow(3) => [1] + * + * and then lookupName would return: + * + * lookupName(0) => A + * lookupName(1) => B + * + * @param id id to lookup the field name for + * + * @return the field name for the given id + */ + @CalledFromHotLoop + @Nullable + String lookupName(int id); + + /** + * Returns true if it is possible to {@link #lookupName(int)} by ids from 0 to {@link #getValueCardinality()} + * before the rows with those ids are returned. + * + *

Returns false if {@link #lookupName(int)} could be called with ids, returned from the most recent call of {@link + * #getRow()} on this DimensionSelector, but not earlier. If {@link #getValueCardinality()} of this DimensionSelector + * additionally returns {@link #CARDINALITY_UNKNOWN}, {@code lookupName()} couldn't be called with ids, returned by + * not the most recent call of {@link #getRow()}, i. e. names for ids couldn't be looked up "later". If {@link + * #getValueCardinality()} returns a non-negative number, {@code lookupName()} could be called with any ids, returned + * from {@code #getRow()} since the creation of this DimensionSelector. + * + *

If {@link #lookupName(int)} is called with an ineligible id, result is undefined: exception could be thrown, or + * null returned, or some other random value. + */ + boolean nameLookupPossibleInAdvance(); + + /** + * Returns {@link IdLookup} if available for this DimensionSelector, or null. + */ + @Nullable + IdLookup idLookup(); +} diff --git a/processing/src/main/java/org/apache/druid/segment/DimensionHandlerUtils.java b/processing/src/main/java/org/apache/druid/segment/DimensionHandlerUtils.java index 197b976afb3a..0da6018be42b 100644 --- a/processing/src/main/java/org/apache/druid/segment/DimensionHandlerUtils.java +++ b/processing/src/main/java/org/apache/druid/segment/DimensionHandlerUtils.java @@ -26,15 +26,19 @@ import org.apache.druid.common.guava.GuavaUtils; import org.apache.druid.data.input.impl.DimensionSchema.MultiValueHandling; import org.apache.druid.java.util.common.IAE; +import org.apache.druid.java.util.common.ISE; import org.apache.druid.java.util.common.guava.Comparators; import org.apache.druid.java.util.common.parsers.ParseException; import org.apache.druid.query.ColumnSelectorPlus; import org.apache.druid.query.dimension.ColumnSelectorStrategy; import org.apache.druid.query.dimension.ColumnSelectorStrategyFactory; +import org.apache.druid.query.dimension.DefaultDimensionSpec; import org.apache.druid.query.dimension.DimensionSpec; +import org.apache.druid.query.dimension.VectorColumnStrategizer; import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ColumnCapabilitiesImpl; import org.apache.druid.segment.column.ValueType; +import org.apache.druid.segment.vector.VectorColumnSelectorFactory; import javax.annotation.Nullable; import java.math.BigDecimal; @@ -240,6 +244,80 @@ private static Colu return strategyFactory.makeColumnSelectorStrategy(capabilities, selector); } + /** + * Equivalent to calling makeVectorProcessor(DefaultDimensionSpec.of(column), strategyFactory, selectorFactory). + * + * @see #makeVectorProcessor(DimensionSpec, VectorColumnStrategizer, VectorColumnSelectorFactory) + */ + public static T makeVectorProcessor( + final String column, + final VectorColumnStrategizer strategyFactory, + final VectorColumnSelectorFactory selectorFactory + ) + { + return makeVectorProcessor(DefaultDimensionSpec.of(column), strategyFactory, selectorFactory); + } + + /** + * Creates "vector processors", which are objects that wrap a single vectorized input column and provide some + * functionality on top of it. Used by things like query engines and filter matchers. + * + * Supports the basic types STRING, LONG, DOUBLE, and FLOAT. + * + * @param dimensionSpec dimensionSpec for the input to the processor + * @param strategyFactory object that encapsulates the knowledge about how to create processors + * @param selectorFactory column selector factory used for creating the vector processor + */ + public static T makeVectorProcessor( + final DimensionSpec dimensionSpec, + final VectorColumnStrategizer strategyFactory, + final VectorColumnSelectorFactory selectorFactory + ) + { + final ColumnCapabilities capabilities = getEffectiveCapabilities( + dimensionSpec, + selectorFactory.getColumnCapabilities(dimensionSpec.getDimension()) + ); + + final ValueType type = capabilities.getType(); + + if (type == ValueType.STRING) { + if (capabilities.hasMultipleValues()) { + return strategyFactory.makeMultiValueDimensionStrategy( + selectorFactory.makeMultiValueDimensionSelector(dimensionSpec) + ); + } else { + return strategyFactory.makeSingleValueDimensionStrategy( + selectorFactory.makeSingleValueDimensionSelector(dimensionSpec) + ); + } + } else { + Preconditions.checkState( + dimensionSpec.getExtractionFn() == null && !dimensionSpec.mustDecorate(), + "Uh oh, was about to try to make a value selector for type[%s] with a dimensionSpec of class[%s] that " + + "requires decoration. Possible bug.", + type, + dimensionSpec.getClass().getName() + ); + + if (type == ValueType.LONG) { + return strategyFactory.makeLongStrategy( + selectorFactory.makeValueSelector(dimensionSpec.getDimension()) + ); + } else if (type == ValueType.FLOAT) { + return strategyFactory.makeFloatStrategy( + selectorFactory.makeValueSelector(dimensionSpec.getDimension()) + ); + } else if (type == ValueType.DOUBLE) { + return strategyFactory.makeDoubleStrategy( + selectorFactory.makeValueSelector(dimensionSpec.getDimension()) + ); + } else { + throw new ISE("Unsupported type[%s]", capabilities.getType()); + } + } + } + @Nullable public static String convertObjectToString(@Nullable Object valObj) { diff --git a/processing/src/main/java/org/apache/druid/segment/DimensionSelector.java b/processing/src/main/java/org/apache/druid/segment/DimensionSelector.java index 1c9bf9765f20..8a0f2a75bddc 100644 --- a/processing/src/main/java/org/apache/druid/segment/DimensionSelector.java +++ b/processing/src/main/java/org/apache/druid/segment/DimensionSelector.java @@ -36,12 +36,15 @@ import java.util.Arrays; /** + * Selector for a string-typed column, either single- or multi-valued. This is named a "dimension" selector for legacy + * reasons: in the past, all Druid dimensions were string-typed. + * + * @see org.apache.druid.segment.vector.SingleValueDimensionVectorSelector, a vectorized version + * @see org.apache.druid.segment.vector.MultiValueDimensionVectorSelector, another vectorized version */ @PublicApi -public interface DimensionSelector extends ColumnValueSelector, HotLoopCallee +public interface DimensionSelector extends ColumnValueSelector, DimensionDictionarySelector, HotLoopCallee { - int CARDINALITY_UNKNOWN = -1; - /** * Returns the indexed values at the current position in this DimensionSelector. * @@ -63,75 +66,6 @@ public interface DimensionSelector extends ColumnValueSelector, HotLoopC ValueMatcher makeValueMatcher(Predicate predicate); - /** - * Value cardinality is the cardinality of the different occurring values. If there were 4 rows: - * - * A,B - * A - * B - * A - * - * Value cardinality would be 2. - * - * Cardinality may be unknown (e.g. the selector used by IncrementalIndex while reading input rows), - * in which case this method will return -1. If cardinality is unknown, you should assume this - * dimension selector has no dictionary, and avoid storing ids, calling "lookupId", or calling "lookupName" - * outside of the context of operating on a single row. - * - * @return the value cardinality, or -1 if unknown. - */ - int getValueCardinality(); - - /** - * The Name is the String name of the actual field. It is assumed that storage layers convert names - * into id values which can then be used to get the string value. For example - * - * A,B - * A - * A,B - * B - * - * getRow() would return - * - * getRow(0) => [0 1] - * getRow(1) => [0] - * getRow(2) => [0 1] - * getRow(3) => [1] - * - * and then lookupName would return: - * - * lookupName(0) => A - * lookupName(1) => B - * - * @param id id to lookup the field name for - * @return the field name for the given id - */ - @CalledFromHotLoop - @Nullable - String lookupName(int id); - - /** - * Returns true if it is possible to {@link #lookupName(int)} by ids from 0 to {@link #getValueCardinality()} - * before the rows with those ids are returned. - * - *

Returns false if {@link #lookupName(int)} could be called with ids, returned from the most recent call of {@link - * #getRow()} on this DimensionSelector, but not earlier. If {@link #getValueCardinality()} of this DimensionSelector - * additionally returns {@link #CARDINALITY_UNKNOWN}, {@code lookupName()} couldn't be called with ids, returned by - * not the most recent call of {@link #getRow()}, i. e. names for ids couldn't be looked up "later". If {@link - * #getValueCardinality()} returns a non-negative number, {@code lookupName()} could be called with any ids, returned - * from {@code #getRow()} since the creation of this DimensionSelector. - * - *

If {@link #lookupName(int)} is called with an ineligible id, result is undefined: exception could be thrown, or - * null returned, or some other random value. - */ - boolean nameLookupPossibleInAdvance(); - - /** - * Returns {@link IdLookup} if available for this DimensionSelector, or null. - */ - @Nullable - IdLookup idLookup(); - /** * @deprecated This method is marked as deprecated in DimensionSelector to minimize the probability of accidental * calling. "Polymorphism" of DimensionSelector should be used only when operating on {@link ColumnValueSelector} diff --git a/processing/src/main/java/org/apache/druid/segment/QueryableIndexCursorSequenceBuilder.java b/processing/src/main/java/org/apache/druid/segment/QueryableIndexCursorSequenceBuilder.java new file mode 100644 index 000000000000..4d0970297cc0 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/segment/QueryableIndexCursorSequenceBuilder.java @@ -0,0 +1,618 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment; + +import com.google.common.base.Function; +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Lists; +import org.apache.druid.collections.bitmap.ImmutableBitmap; +import org.apache.druid.java.util.common.granularity.Granularity; +import org.apache.druid.java.util.common.guava.Sequence; +import org.apache.druid.java.util.common.guava.Sequences; +import org.apache.druid.java.util.common.io.Closer; +import org.apache.druid.query.BaseQuery; +import org.apache.druid.query.filter.Filter; +import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; +import org.apache.druid.segment.column.BaseColumn; +import org.apache.druid.segment.column.ColumnHolder; +import org.apache.druid.segment.column.NumericColumn; +import org.apache.druid.segment.data.Offset; +import org.apache.druid.segment.data.ReadableOffset; +import org.apache.druid.segment.historical.HistoricalCursor; +import org.apache.druid.segment.vector.BitmapVectorOffset; +import org.apache.druid.segment.vector.FilteredVectorOffset; +import org.apache.druid.segment.vector.NoFilterVectorOffset; +import org.apache.druid.segment.vector.QueryableIndexVectorColumnSelectorFactory; +import org.apache.druid.segment.vector.VectorColumnSelectorFactory; +import org.apache.druid.segment.vector.VectorCursor; +import org.apache.druid.segment.vector.VectorOffset; +import org.joda.time.DateTime; +import org.joda.time.Interval; + +import javax.annotation.Nullable; +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + +public class QueryableIndexCursorSequenceBuilder +{ + // At this threshold, timestamp searches switch from binary to linear. The idea is to avoid too much decompression + // buffer thrashing. The default value is chosen to be similar to the typical number of timestamps per block. + private static final int TOO_CLOSE_FOR_MISSILES = 15000; + + private final QueryableIndex index; + private final Interval interval; + private final VirtualColumns virtualColumns; + @Nullable + private final ImmutableBitmap filterBitmap; + private final long minDataTimestamp; + private final long maxDataTimestamp; + private final boolean descending; + @Nullable + private final Filter postFilter; + private final ColumnSelectorBitmapIndexSelector bitmapIndexSelector; + + public QueryableIndexCursorSequenceBuilder( + QueryableIndex index, + Interval interval, + VirtualColumns virtualColumns, + @Nullable ImmutableBitmap filterBitmap, + long minDataTimestamp, + long maxDataTimestamp, + boolean descending, + @Nullable Filter postFilter, + ColumnSelectorBitmapIndexSelector bitmapIndexSelector + ) + { + this.index = index; + this.interval = interval; + this.virtualColumns = virtualColumns; + this.filterBitmap = filterBitmap; + this.minDataTimestamp = minDataTimestamp; + this.maxDataTimestamp = maxDataTimestamp; + this.descending = descending; + this.postFilter = postFilter; + this.bitmapIndexSelector = bitmapIndexSelector; + } + + public Sequence build(final Granularity gran) + { + final Offset baseOffset; + + if (filterBitmap == null) { + baseOffset = descending + ? new SimpleDescendingOffset(index.getNumRows()) + : new SimpleAscendingOffset(index.getNumRows()); + } else { + baseOffset = BitmapOffset.of(filterBitmap, descending, index.getNumRows()); + } + + // Column caches shared amongst all cursors in this sequence. + final Map columnCache = new HashMap<>(); + + final NumericColumn timestamps = (NumericColumn) index.getColumnHolder(ColumnHolder.TIME_COLUMN_NAME).getColumn(); + + final Closer closer = Closer.create(); + closer.register(timestamps); + + Iterable iterable = gran.getIterable(interval); + if (descending) { + iterable = Lists.reverse(ImmutableList.copyOf(iterable)); + } + + return Sequences.withBaggage( + Sequences.map( + Sequences.simple(iterable), + new Function() + { + @Override + public Cursor apply(final Interval inputInterval) + { + final long timeStart = Math.max(interval.getStartMillis(), inputInterval.getStartMillis()); + final long timeEnd = Math.min( + interval.getEndMillis(), + gran.increment(inputInterval.getStart()).getMillis() + ); + + if (descending) { + for (; baseOffset.withinBounds(); baseOffset.increment()) { + if (timestamps.getLongSingleValueRow(baseOffset.getOffset()) < timeEnd) { + break; + } + } + } else { + for (; baseOffset.withinBounds(); baseOffset.increment()) { + if (timestamps.getLongSingleValueRow(baseOffset.getOffset()) >= timeStart) { + break; + } + } + } + + final Offset offset = descending ? + new DescendingTimestampCheckingOffset( + baseOffset, + timestamps, + timeStart, + minDataTimestamp >= timeStart + ) : + new AscendingTimestampCheckingOffset( + baseOffset, + timestamps, + timeEnd, + maxDataTimestamp < timeEnd + ); + + + final Offset baseCursorOffset = offset.clone(); + final ColumnSelectorFactory columnSelectorFactory = new QueryableIndexColumnSelectorFactory( + index, + virtualColumns, + descending, + closer, + baseCursorOffset.getBaseReadableOffset(), + columnCache + ); + final DateTime myBucket = gran.toDateTime(inputInterval.getStartMillis()); + + if (postFilter == null) { + return new QueryableIndexCursor(baseCursorOffset, columnSelectorFactory, myBucket); + } else { + FilteredOffset filteredOffset = new FilteredOffset( + baseCursorOffset, + columnSelectorFactory, + descending, + postFilter, + bitmapIndexSelector + ); + return new QueryableIndexCursor(filteredOffset, columnSelectorFactory, myBucket); + } + + } + } + ), + closer + ); + } + + public VectorCursor buildVectorized(final int vectorSize) + { + // Sanity check - matches QueryableIndexStorageAdapter.canVectorize + Preconditions.checkState(virtualColumns.size() == 0, "virtualColumns.size == 0"); + Preconditions.checkState(!descending, "!descending"); + + final Map columnCache = new HashMap<>(); + final Closer closer = Closer.create(); + + NumericColumn timestamps = null; + + final int startOffset; + final int endOffset; + + if (interval.getStartMillis() > minDataTimestamp) { + timestamps = (NumericColumn) index.getColumnHolder(ColumnHolder.TIME_COLUMN_NAME).getColumn(); + closer.register(timestamps); + + final int result = timeSearch(timestamps, interval.getStartMillis(), 0, index.getNumRows()); + if (result >= 0) { + startOffset = result; + } else { + startOffset = -(result + 1); + } + } else { + startOffset = 0; + } + + if (interval.getEndMillis() <= maxDataTimestamp) { + if (timestamps == null) { + timestamps = (NumericColumn) index.getColumnHolder(ColumnHolder.TIME_COLUMN_NAME).getColumn(); + closer.register(timestamps); + } + + final int result = timeSearch(timestamps, interval.getEndMillis(), startOffset, index.getNumRows()); + if (result >= 0) { + endOffset = result; + } else { + endOffset = -(result + 1); + } + } else { + endOffset = index.getNumRows(); + } + + final VectorOffset baseOffset = + filterBitmap == null + ? new NoFilterVectorOffset(vectorSize, startOffset, endOffset) + : new BitmapVectorOffset(vectorSize, filterBitmap, startOffset, endOffset); + + if (postFilter == null) { + return new QueryableIndexVectorCursor(index, baseOffset, closer, columnCache, vectorSize); + } else { + // baseColumnSelectorFactory using baseOffset is the column selector for filtering. + final VectorColumnSelectorFactory baseColumnSelectorFactory = new QueryableIndexVectorColumnSelectorFactory( + index, + baseOffset, + closer, + columnCache + ); + + final VectorOffset filteredOffset = FilteredVectorOffset.create( + baseOffset, + baseColumnSelectorFactory, + postFilter + ); + + // Now create the cursor and column selector that will be returned to the caller. + // + // There is an inefficiency with how we do things here: this cursor (the one that will be provided to the + // caller) does share a columnCache with "baseColumnSelectorFactory", but it *doesn't* share vector data. This + // means that if the caller wants to read from a column that is also used for filtering, the underlying column + // object will get hit twice for some of the values (anything that matched the filter). This is probably most + // noticeable if it causes thrashing of decompression buffers due to out-of-order reads. I haven't observed + // this directly but it seems possible in principle. + return new QueryableIndexVectorCursor(index, filteredOffset, closer, columnCache, vectorSize); + } + } + + /** + * Search the time column. Uses a binary search that switches to linear when it gets close. + * + * @param timeColumn the column + * @param timestamp the timestamp to search for + * @param startIndex first index to search, inclusive + * @param endIndex last index to search, exclusive + * + * @return index of timestamp, or negative number equal to (-(insertion point) - 1). + */ + private static int timeSearch( + final NumericColumn timeColumn, + final long timestamp, + final int startIndex, + final int endIndex + ) + { + final long prevTimestamp = timestamp - 1; + + // Binary search for prevTimestamp. + int minIndex = startIndex; + int maxIndex = endIndex - 1; + + while (minIndex <= maxIndex) { + if (maxIndex - minIndex < TOO_CLOSE_FOR_MISSILES) { + break; + } + + final int currIndex = (minIndex + maxIndex) >>> 1; + final long currValue = timeColumn.getLongSingleValueRow(currIndex); + + if (currValue < prevTimestamp) { + minIndex = currIndex + 1; + } else if (currValue > prevTimestamp) { + maxIndex = currIndex - 1; + } else { + // The value at currIndex is prevTimestamp. + minIndex = currIndex; + break; + } + } + + // Do linear search for the actual timestamp, then return. + for (; minIndex < endIndex; minIndex++) { + final long currValue = timeColumn.getLongSingleValueRow(minIndex); + if (currValue == timestamp) { + return minIndex; + } else if (currValue > timestamp) { + return -(minIndex + 1); + } + } + + return -(endIndex + 1); + } + + private static class QueryableIndexVectorCursor implements VectorCursor + { + private final Closer closer; + private final int vectorSize; + private final VectorOffset offset; + private final VectorColumnSelectorFactory columnSelectorFactory; + + public QueryableIndexVectorCursor( + final QueryableIndex index, + final VectorOffset offset, + final Closer closer, + final Map columnCache, + final int vectorSize + ) + { + this.offset = offset; + this.closer = closer; + this.vectorSize = vectorSize; + this.columnSelectorFactory = new QueryableIndexVectorColumnSelectorFactory(index, offset, closer, columnCache); + } + + @Override + public int getMaxVectorSize() + { + return vectorSize; + } + + @Override + public int getCurrentVectorSize() + { + return offset.getCurrentVectorSize(); + } + + @Override + public VectorColumnSelectorFactory getColumnSelectorFactory() + { + return columnSelectorFactory; + } + + @Override + public void advance() + { + offset.advance(); + BaseQuery.checkInterrupted(); + } + + @Override + public boolean isDone() + { + return offset.isDone(); + } + + @Override + public void reset() + { + offset.reset(); + } + + @Override + public void close() + { + try { + closer.close(); + } + catch (IOException e) { + throw new RuntimeException(e); + } + } + } + + private static class QueryableIndexCursor implements HistoricalCursor + { + private final Offset cursorOffset; + private final ColumnSelectorFactory columnSelectorFactory; + private final DateTime bucketStart; + + QueryableIndexCursor(Offset cursorOffset, ColumnSelectorFactory columnSelectorFactory, DateTime bucketStart) + { + this.cursorOffset = cursorOffset; + this.columnSelectorFactory = columnSelectorFactory; + this.bucketStart = bucketStart; + } + + @Override + public Offset getOffset() + { + return cursorOffset; + } + + @Override + public ColumnSelectorFactory getColumnSelectorFactory() + { + return columnSelectorFactory; + } + + @Override + public DateTime getTime() + { + return bucketStart; + } + + @Override + public void advance() + { + cursorOffset.increment(); + // Must call BaseQuery.checkInterrupted() after cursorOffset.increment(), not before, because + // FilteredOffset.increment() is a potentially long, not an "instant" operation (unlike to all other subclasses + // of Offset) and it returns early on interruption, leaving itself in an illegal state. We should not let + // aggregators, etc. access this illegal state and throw a QueryInterruptedException by calling + // BaseQuery.checkInterrupted(). + BaseQuery.checkInterrupted(); + } + + @Override + public void advanceUninterruptibly() + { + cursorOffset.increment(); + } + + @Override + public void advanceTo(int offset) + { + int count = 0; + while (count < offset && !isDone()) { + advance(); + count++; + } + } + + @Override + public boolean isDone() + { + return !cursorOffset.withinBounds(); + } + + @Override + public boolean isDoneOrInterrupted() + { + return isDone() || Thread.currentThread().isInterrupted(); + } + + @Override + public void reset() + { + cursorOffset.reset(); + } + } + + + public abstract static class TimestampCheckingOffset extends Offset + { + final Offset baseOffset; + final NumericColumn timestamps; + final long timeLimit; + final boolean allWithinThreshold; + + TimestampCheckingOffset( + Offset baseOffset, + NumericColumn timestamps, + long timeLimit, + boolean allWithinThreshold + ) + { + this.baseOffset = baseOffset; + this.timestamps = timestamps; + this.timeLimit = timeLimit; + // checks if all the values are within the Threshold specified, skips timestamp lookups and checks if all values are within threshold. + this.allWithinThreshold = allWithinThreshold; + } + + @Override + public int getOffset() + { + return baseOffset.getOffset(); + } + + @Override + public boolean withinBounds() + { + if (!baseOffset.withinBounds()) { + return false; + } + if (allWithinThreshold) { + return true; + } + return timeInRange(timestamps.getLongSingleValueRow(baseOffset.getOffset())); + } + + @Override + public void reset() + { + baseOffset.reset(); + } + + @Override + public ReadableOffset getBaseReadableOffset() + { + return baseOffset.getBaseReadableOffset(); + } + + protected abstract boolean timeInRange(long current); + + @Override + public void increment() + { + baseOffset.increment(); + } + + @SuppressWarnings("MethodDoesntCallSuperMethod") + @Override + public Offset clone() + { + throw new IllegalStateException("clone"); + } + + @Override + public void inspectRuntimeShape(RuntimeShapeInspector inspector) + { + inspector.visit("baseOffset", baseOffset); + inspector.visit("timestamps", timestamps); + inspector.visit("allWithinThreshold", allWithinThreshold); + } + } + + public static class AscendingTimestampCheckingOffset extends TimestampCheckingOffset + { + AscendingTimestampCheckingOffset( + Offset baseOffset, + NumericColumn timestamps, + long timeLimit, + boolean allWithinThreshold + ) + { + super(baseOffset, timestamps, timeLimit, allWithinThreshold); + } + + @Override + protected final boolean timeInRange(long current) + { + return current < timeLimit; + } + + @Override + public String toString() + { + return (baseOffset.withinBounds() ? timestamps.getLongSingleValueRow(baseOffset.getOffset()) : "OOB") + + "<" + timeLimit + "::" + baseOffset; + } + + @SuppressWarnings("MethodDoesntCallSuperMethod") + @Override + public Offset clone() + { + return new AscendingTimestampCheckingOffset(baseOffset.clone(), timestamps, timeLimit, allWithinThreshold); + } + } + + public static class DescendingTimestampCheckingOffset extends TimestampCheckingOffset + { + DescendingTimestampCheckingOffset( + Offset baseOffset, + NumericColumn timestamps, + long timeLimit, + boolean allWithinThreshold + ) + { + super(baseOffset, timestamps, timeLimit, allWithinThreshold); + } + + @Override + protected final boolean timeInRange(long current) + { + return current >= timeLimit; + } + + @Override + public String toString() + { + return timeLimit + ">=" + + (baseOffset.withinBounds() ? timestamps.getLongSingleValueRow(baseOffset.getOffset()) : "OOB") + + "::" + baseOffset; + } + + @SuppressWarnings("MethodDoesntCallSuperMethod") + @Override + public Offset clone() + { + return new DescendingTimestampCheckingOffset(baseOffset.clone(), timestamps, timeLimit, allWithinThreshold); + } + } +} diff --git a/processing/src/main/java/org/apache/druid/segment/QueryableIndexSegment.java b/processing/src/main/java/org/apache/druid/segment/QueryableIndexSegment.java index 40d89c7736ed..54dd92af3446 100644 --- a/processing/src/main/java/org/apache/druid/segment/QueryableIndexSegment.java +++ b/processing/src/main/java/org/apache/druid/segment/QueryableIndexSegment.java @@ -22,16 +22,18 @@ import org.joda.time.Interval; /** -*/ + */ public class QueryableIndexSegment extends AbstractSegment { private final QueryableIndex index; + private final QueryableIndexStorageAdapter storageAdapter; private final String identifier; public QueryableIndexSegment(final String segmentIdentifier, QueryableIndex index) { this.index = index; - identifier = segmentIdentifier; + this.storageAdapter = new QueryableIndexStorageAdapter(index); + this.identifier = segmentIdentifier; } @Override @@ -55,7 +57,7 @@ public QueryableIndex asQueryableIndex() @Override public StorageAdapter asStorageAdapter() { - return new QueryableIndexStorageAdapter(index); + return storageAdapter; } @Override diff --git a/processing/src/main/java/org/apache/druid/segment/QueryableIndexStorageAdapter.java b/processing/src/main/java/org/apache/druid/segment/QueryableIndexStorageAdapter.java index 779b06033ddf..6faacca88865 100644 --- a/processing/src/main/java/org/apache/druid/segment/QueryableIndexStorageAdapter.java +++ b/processing/src/main/java/org/apache/druid/segment/QueryableIndexStorageAdapter.java @@ -19,22 +19,18 @@ package org.apache.druid.segment; -import com.google.common.base.Function; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.Lists; import com.google.common.collect.Sets; import org.apache.druid.collections.bitmap.ImmutableBitmap; import org.apache.druid.java.util.common.DateTimes; +import org.apache.druid.java.util.common.ISE; +import org.apache.druid.java.util.common.granularity.Granularities; import org.apache.druid.java.util.common.granularity.Granularity; import org.apache.druid.java.util.common.guava.Sequence; import org.apache.druid.java.util.common.guava.Sequences; -import org.apache.druid.java.util.common.io.Closer; -import org.apache.druid.query.BaseQuery; import org.apache.druid.query.BitmapResultFactory; import org.apache.druid.query.DefaultBitmapResultFactory; import org.apache.druid.query.QueryMetrics; import org.apache.druid.query.filter.Filter; -import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; import org.apache.druid.segment.column.BaseColumn; import org.apache.druid.segment.column.BitmapIndex; import org.apache.druid.segment.column.ColumnCapabilities; @@ -43,10 +39,8 @@ import org.apache.druid.segment.column.DictionaryEncodedColumn; import org.apache.druid.segment.column.NumericColumn; import org.apache.druid.segment.data.Indexed; -import org.apache.druid.segment.data.Offset; -import org.apache.druid.segment.data.ReadableOffset; import org.apache.druid.segment.filter.AndFilter; -import org.apache.druid.segment.historical.HistoricalCursor; +import org.apache.druid.segment.vector.VectorCursor; import org.joda.time.DateTime; import org.joda.time.Interval; @@ -55,18 +49,24 @@ import java.io.UncheckedIOException; import java.util.ArrayList; import java.util.Collections; -import java.util.HashMap; import java.util.HashSet; import java.util.List; -import java.util.Map; import java.util.Objects; /** */ public class QueryableIndexStorageAdapter implements StorageAdapter { + public static final int DEFAULT_VECTOR_SIZE = 512; + private final QueryableIndex index; + @Nullable + private volatile DateTime minTime; + + @Nullable + private volatile DateTime maxTime; + public QueryableIndexStorageAdapter(QueryableIndex index) { this.index = index; @@ -124,17 +124,23 @@ public int getNumRows() @Override public DateTime getMinTime() { - try (final NumericColumn column = (NumericColumn) index.getColumnHolder(ColumnHolder.TIME_COLUMN_NAME).getColumn()) { - return DateTimes.utc(column.getLongSingleValueRow(0)); + if (minTime == null) { + // May be called a few times in parallel when first populating minTime, but this is benign, so allow it. + populateMinMaxTime(); } + + return minTime; } @Override public DateTime getMaxTime() { - try (final NumericColumn column = (NumericColumn) index.getColumnHolder(ColumnHolder.TIME_COLUMN_NAME).getColumn()) { - return DateTimes.utc(column.getLongSingleValueRow(column.length() - 1)); + if (maxTime == null) { + // May be called a few times in parallel when first populating maxTime, but this is benign, so allow it. + populateMinMaxTime(); } + + return maxTime; } @Override @@ -197,6 +203,71 @@ public DateTime getMaxIngestedEventTime() return getMaxTime(); } + @Override + public boolean canVectorize( + @Nullable final Filter filter, + final Interval interval, + final VirtualColumns virtualColumns, + final boolean descending + ) + { + if (filter != null) { + final boolean filterCanVectorize = + filter.supportsBitmapIndex(makeBitmapIndexSelector(virtualColumns)) + || filter.canVectorizeMatcher(); + + if (!filterCanVectorize) { + return false; + } + } + + // 1) Virtual columns can't vectorize yet + // 2) Vector cursors can't iterate backwards yet + return virtualColumns.size() == 0 && !descending; + } + + @Override + @Nullable + public VectorCursor makeVectorCursor( + @Nullable final Filter filter, + final Interval interval, + final VirtualColumns virtualColumns, + final boolean descending, + final int vectorSize, + @Nullable final QueryMetrics queryMetrics + ) + { + if (!canVectorize(filter, interval, virtualColumns, descending)) { + throw new ISE("Cannot vectorize. Check 'canVectorize' before calling 'makeVectorCursor'."); + } + + if (queryMetrics != null) { + queryMetrics.vectorized(true); + } + + final Interval actualInterval = computeCursorInterval(Granularities.ALL, interval); + + if (actualInterval == null) { + return null; + } + + final ColumnSelectorBitmapIndexSelector bitmapIndexSelector = makeBitmapIndexSelector(virtualColumns); + + final FilterAnalysis filterAnalysis = analyzeFilter(filter, bitmapIndexSelector, queryMetrics); + + return new QueryableIndexCursorSequenceBuilder( + index, + actualInterval, + virtualColumns, + filterAnalysis.getPreFilterBitmap(), + getMinTime().getMillis(), + getMaxTime().getMillis(), + descending, + filterAnalysis.getPostFilter(), + bitmapIndexSelector + ).buildVectorized(vectorSize > 0 ? vectorSize : DEFAULT_VECTOR_SIZE); + } + @Override public Sequence makeCursors( @Nullable Filter filter, @@ -207,25 +278,91 @@ public Sequence makeCursors( @Nullable QueryMetrics queryMetrics ) { + if (queryMetrics != null) { + queryMetrics.vectorized(false); + } + + final Interval actualInterval = computeCursorInterval(gran, interval); + + if (actualInterval == null) { + return Sequences.empty(); + } + + final ColumnSelectorBitmapIndexSelector bitmapIndexSelector = makeBitmapIndexSelector(virtualColumns); + + final FilterAnalysis filterAnalysis = analyzeFilter(filter, bitmapIndexSelector, queryMetrics); - DateTime minTime = getMinTime(); - long minDataTimestamp = minTime.getMillis(); - DateTime maxTime = getMaxTime(); - long maxDataTimestamp = maxTime.getMillis(); + return Sequences.filter( + new QueryableIndexCursorSequenceBuilder( + index, + actualInterval, + virtualColumns, + filterAnalysis.getPreFilterBitmap(), + getMinTime().getMillis(), + getMaxTime().getMillis(), + descending, + filterAnalysis.getPostFilter(), + bitmapIndexSelector + ).build(gran), + Objects::nonNull + ); + } + + @Nullable + public static ColumnCapabilities getColumnCapabilities(ColumnSelector index, String columnName) + { + final ColumnHolder columnHolder = index.getColumnHolder(columnName); + if (columnHolder == null) { + return null; + } + return columnHolder.getCapabilities(); + } + + @Override + public Metadata getMetadata() + { + return index.getMetadata(); + } + + private void populateMinMaxTime() + { + // Compute and cache minTime, maxTime. + final ColumnHolder columnHolder = index.getColumnHolder(ColumnHolder.TIME_COLUMN_NAME); + try (final NumericColumn column = (NumericColumn) columnHolder.getColumn()) { + this.minTime = DateTimes.utc(column.getLongSingleValueRow(0)); + this.maxTime = DateTimes.utc(column.getLongSingleValueRow(column.length() - 1)); + } + } + + @Nullable + private Interval computeCursorInterval(final Granularity gran, final Interval interval) + { + final DateTime minTime = getMinTime(); + final DateTime maxTime = getMaxTime(); final Interval dataInterval = new Interval(minTime, gran.bucketEnd(maxTime)); if (!interval.overlaps(dataInterval)) { - return Sequences.empty(); + return null; } - final Interval actualInterval = interval.overlap(dataInterval); + return interval.overlap(dataInterval); + } - final ColumnSelectorBitmapIndexSelector selector = new ColumnSelectorBitmapIndexSelector( + private ColumnSelectorBitmapIndexSelector makeBitmapIndexSelector(final VirtualColumns virtualColumns) + { + return new ColumnSelectorBitmapIndexSelector( index.getBitmapFactoryForDimensions(), virtualColumns, index ); + } + private FilterAnalysis analyzeFilter( + @Nullable final Filter filter, + ColumnSelectorBitmapIndexSelector bitmapIndexSelector, + @Nullable QueryMetrics queryMetrics + ) + { final int totalRows = index.getNumRows(); /* @@ -242,20 +379,18 @@ public Sequence makeCursors( * * Any subfilters that cannot be processed entirely with bitmap indexes will be moved to the post-filtering stage. */ - final Offset offset; final List preFilters; final List postFilters = new ArrayList<>(); int preFilteredRows = totalRows; if (filter == null) { preFilters = Collections.emptyList(); - offset = descending ? new SimpleDescendingOffset(totalRows) : new SimpleAscendingOffset(totalRows); } else { preFilters = new ArrayList<>(); if (filter instanceof AndFilter) { // If we get an AndFilter, we can split the subfilters across both filtering stages for (Filter subfilter : ((AndFilter) filter).getFilters()) { - if (subfilter.supportsBitmapIndex(selector)) { + if (subfilter.supportsBitmapIndex(bitmapIndexSelector)) { preFilters.add(subfilter); } else { postFilters.add(subfilter); @@ -263,33 +398,29 @@ public Sequence makeCursors( } } else { // If we get an OrFilter or a single filter, handle the filter in one stage - if (filter.supportsBitmapIndex(selector)) { + if (filter.supportsBitmapIndex(bitmapIndexSelector)) { preFilters.add(filter); } else { postFilters.add(filter); } } + } - if (preFilters.size() == 0) { - offset = descending ? new SimpleDescendingOffset(totalRows) : new SimpleAscendingOffset(totalRows); + final ImmutableBitmap preFilterBitmap; + if (preFilters.isEmpty()) { + preFilterBitmap = null; + } else { + if (queryMetrics != null) { + BitmapResultFactory bitmapResultFactory = + queryMetrics.makeBitmapResultFactory(bitmapIndexSelector.getBitmapFactory()); + long bitmapConstructionStartNs = System.nanoTime(); + // Use AndFilter.getBitmapResult to intersect the preFilters to get its short-circuiting behavior. + preFilterBitmap = AndFilter.getBitmapIndex(bitmapIndexSelector, bitmapResultFactory, preFilters); + preFilteredRows = preFilterBitmap.size(); + queryMetrics.reportBitmapConstructionTime(System.nanoTime() - bitmapConstructionStartNs); } else { - if (queryMetrics != null) { - BitmapResultFactory bitmapResultFactory = - queryMetrics.makeBitmapResultFactory(selector.getBitmapFactory()); - long bitmapConstructionStartNs = System.nanoTime(); - // Use AndFilter.getBitmapResult to intersect the preFilters to get its short-circuiting behavior. - ImmutableBitmap bitmapIndex = AndFilter.getBitmapIndex(selector, bitmapResultFactory, preFilters); - preFilteredRows = bitmapIndex.size(); - offset = BitmapOffset.of(bitmapIndex, descending, totalRows); - queryMetrics.reportBitmapConstructionTime(System.nanoTime() - bitmapConstructionStartNs); - } else { - BitmapResultFactory bitmapResultFactory = new DefaultBitmapResultFactory(selector.getBitmapFactory()); - offset = BitmapOffset.of( - AndFilter.getBitmapIndex(selector, bitmapResultFactory, preFilters), - descending, - totalRows - ); - } + BitmapResultFactory bitmapResultFactory = new DefaultBitmapResultFactory(bitmapIndexSelector.getBitmapFactory()); + preFilterBitmap = AndFilter.getBitmapIndex(bitmapIndexSelector, bitmapResultFactory, preFilters); } } @@ -309,388 +440,33 @@ public Sequence makeCursors( queryMetrics.reportPreFilteredRows(preFilteredRows); } - return Sequences.filter( - new CursorSequenceBuilder( - this, - actualInterval, - virtualColumns, - gran, - offset, - minDataTimestamp, - maxDataTimestamp, - descending, - postFilter, - selector - ).build(), - Objects::nonNull - ); + return new FilterAnalysis(preFilterBitmap, postFilter); } - @Nullable - static ColumnCapabilities getColumnCapabilities(ColumnSelector index, String columnName) + private static class FilterAnalysis { - ColumnHolder columnHolder = index.getColumnHolder(columnName); - if (columnHolder == null) { - return null; - } - return columnHolder.getCapabilities(); - } - - private static class CursorSequenceBuilder - { - private final QueryableIndex index; - private final Interval interval; - private final VirtualColumns virtualColumns; - private final Granularity gran; - private final Offset offset; - private final long minDataTimestamp; - private final long maxDataTimestamp; - private final boolean descending; - @Nullable private final Filter postFilter; - private final ColumnSelectorBitmapIndexSelector bitmapIndexSelector; - - public CursorSequenceBuilder( - QueryableIndexStorageAdapter storageAdapter, - Interval interval, - VirtualColumns virtualColumns, - Granularity gran, - Offset offset, - long minDataTimestamp, - long maxDataTimestamp, - boolean descending, - @Nullable Filter postFilter, - ColumnSelectorBitmapIndexSelector bitmapIndexSelector - ) - { - this.index = storageAdapter.index; - this.interval = interval; - this.virtualColumns = virtualColumns; - this.gran = gran; - this.offset = offset; - this.minDataTimestamp = minDataTimestamp; - this.maxDataTimestamp = maxDataTimestamp; - this.descending = descending; - this.postFilter = postFilter; - this.bitmapIndexSelector = bitmapIndexSelector; - } + private final ImmutableBitmap preFilterBitmap; - public Sequence build() - { - final Offset baseOffset = offset.clone(); - - // Column caches shared amongst all cursors in this sequence. - final Map columnCache = new HashMap<>(); - - final NumericColumn timestamps = (NumericColumn) index.getColumnHolder(ColumnHolder.TIME_COLUMN_NAME).getColumn(); - - final Closer closer = Closer.create(); - closer.register(timestamps); - - Iterable iterable = gran.getIterable(interval); - if (descending) { - iterable = Lists.reverse(ImmutableList.copyOf(iterable)); - } - - return Sequences.withBaggage( - Sequences.map( - Sequences.simple(iterable), - new Function() - { - @Override - public Cursor apply(final Interval inputInterval) - { - final long timeStart = Math.max(interval.getStartMillis(), inputInterval.getStartMillis()); - final long timeEnd = Math.min( - interval.getEndMillis(), - gran.increment(inputInterval.getStart()).getMillis() - ); - - if (descending) { - for (; baseOffset.withinBounds(); baseOffset.increment()) { - if (timestamps.getLongSingleValueRow(baseOffset.getOffset()) < timeEnd) { - break; - } - } - } else { - for (; baseOffset.withinBounds(); baseOffset.increment()) { - if (timestamps.getLongSingleValueRow(baseOffset.getOffset()) >= timeStart) { - break; - } - } - } - - final Offset offset = descending ? - new DescendingTimestampCheckingOffset( - baseOffset, - timestamps, - timeStart, - minDataTimestamp >= timeStart - ) : - new AscendingTimestampCheckingOffset( - baseOffset, - timestamps, - timeEnd, - maxDataTimestamp < timeEnd - ); - - - final Offset baseCursorOffset = offset.clone(); - final ColumnSelectorFactory columnSelectorFactory = new QueryableIndexColumnSelectorFactory( - index, - virtualColumns, - descending, - closer, - baseCursorOffset.getBaseReadableOffset(), - columnCache - ); - final DateTime myBucket = gran.toDateTime(inputInterval.getStartMillis()); - - if (postFilter == null) { - return new QueryableIndexCursor(baseCursorOffset, columnSelectorFactory, myBucket); - } else { - FilteredOffset filteredOffset = new FilteredOffset( - baseCursorOffset, - columnSelectorFactory, - descending, - postFilter, - bitmapIndexSelector - ); - return new QueryableIndexCursor(filteredOffset, columnSelectorFactory, myBucket); - } - - } - } - ), - closer - ); - } - } - - private static class QueryableIndexCursor implements HistoricalCursor - { - private final Offset cursorOffset; - private final ColumnSelectorFactory columnSelectorFactory; - private final DateTime bucketStart; - - QueryableIndexCursor(Offset cursorOffset, ColumnSelectorFactory columnSelectorFactory, DateTime bucketStart) - { - this.cursorOffset = cursorOffset; - this.columnSelectorFactory = columnSelectorFactory; - this.bucketStart = bucketStart; - } - - @Override - public Offset getOffset() - { - return cursorOffset; - } - - @Override - public ColumnSelectorFactory getColumnSelectorFactory() - { - return columnSelectorFactory; - } - - @Override - public DateTime getTime() - { - return bucketStart; - } - - @Override - public void advance() - { - cursorOffset.increment(); - // Must call BaseQuery.checkInterrupted() after cursorOffset.increment(), not before, because - // FilteredOffset.increment() is a potentially long, not an "instant" operation (unlike to all other subclasses - // of Offset) and it returns early on interruption, leaving itself in an illegal state. We should not let - // aggregators, etc. access this illegal state and throw a QueryInterruptedException by calling - // BaseQuery.checkInterrupted(). - BaseQuery.checkInterrupted(); - } - - @Override - public void advanceUninterruptibly() - { - cursorOffset.increment(); - } - - @Override - public void advanceTo(int offset) - { - int count = 0; - while (count < offset && !isDone()) { - advance(); - count++; - } - } - - @Override - public boolean isDone() - { - return !cursorOffset.withinBounds(); - } - - @Override - public boolean isDoneOrInterrupted() - { - return isDone() || Thread.currentThread().isInterrupted(); - } - - @Override - public void reset() - { - cursorOffset.reset(); - } - } - - public abstract static class TimestampCheckingOffset extends Offset - { - final Offset baseOffset; - final NumericColumn timestamps; - final long timeLimit; - final boolean allWithinThreshold; - - TimestampCheckingOffset( - Offset baseOffset, - NumericColumn timestamps, - long timeLimit, - boolean allWithinThreshold - ) - { - this.baseOffset = baseOffset; - this.timestamps = timestamps; - this.timeLimit = timeLimit; - // checks if all the values are within the Threshold specified, skips timestamp lookups and checks if all values - // are within threshold. - this.allWithinThreshold = allWithinThreshold; - } - - @Override - public int getOffset() - { - return baseOffset.getOffset(); - } - - @Override - public boolean withinBounds() - { - if (!baseOffset.withinBounds()) { - return false; - } - if (allWithinThreshold) { - return true; - } - return timeInRange(timestamps.getLongSingleValueRow(baseOffset.getOffset())); - } - - @Override - public void reset() - { - baseOffset.reset(); - } - - @Override - public ReadableOffset getBaseReadableOffset() - { - return baseOffset.getBaseReadableOffset(); - } - - protected abstract boolean timeInRange(long current); - - @Override - public void increment() - { - baseOffset.increment(); - } - - @SuppressWarnings("MethodDoesntCallSuperMethod") - @Override - public Offset clone() - { - throw new IllegalStateException("clone"); - } - - @Override - public void inspectRuntimeShape(RuntimeShapeInspector inspector) - { - inspector.visit("baseOffset", baseOffset); - inspector.visit("timestamps", timestamps); - inspector.visit("allWithinThreshold", allWithinThreshold); - } - } - - public static class AscendingTimestampCheckingOffset extends TimestampCheckingOffset - { - AscendingTimestampCheckingOffset( - Offset baseOffset, - NumericColumn timestamps, - long timeLimit, - boolean allWithinThreshold + public FilterAnalysis( + @Nullable final ImmutableBitmap preFilterBitmap, + @Nullable final Filter postFilter ) { - super(baseOffset, timestamps, timeLimit, allWithinThreshold); - } - - @Override - protected final boolean timeInRange(long current) - { - return current < timeLimit; - } - - @Override - public String toString() - { - return (baseOffset.withinBounds() ? timestamps.getLongSingleValueRow(baseOffset.getOffset()) : "OOB") + - "<" + timeLimit + "::" + baseOffset; - } - - @SuppressWarnings("MethodDoesntCallSuperMethod") - @Override - public Offset clone() - { - return new AscendingTimestampCheckingOffset(baseOffset.clone(), timestamps, timeLimit, allWithinThreshold); - } - } - - public static class DescendingTimestampCheckingOffset extends TimestampCheckingOffset - { - DescendingTimestampCheckingOffset( - Offset baseOffset, - NumericColumn timestamps, - long timeLimit, - boolean allWithinThreshold - ) - { - super(baseOffset, timestamps, timeLimit, allWithinThreshold); - } - - @Override - protected final boolean timeInRange(long current) - { - return current >= timeLimit; + this.preFilterBitmap = preFilterBitmap; + this.postFilter = postFilter; } - @Override - public String toString() + @Nullable + public ImmutableBitmap getPreFilterBitmap() { - return timeLimit + ">=" + - (baseOffset.withinBounds() ? timestamps.getLongSingleValueRow(baseOffset.getOffset()) : "OOB") + - "::" + baseOffset; + return preFilterBitmap; } - @SuppressWarnings("MethodDoesntCallSuperMethod") - @Override - public Offset clone() + @Nullable + public Filter getPostFilter() { - return new DescendingTimestampCheckingOffset(baseOffset.clone(), timestamps, timeLimit, allWithinThreshold); + return postFilter; } } - - @Override - public Metadata getMetadata() - { - return index.getMetadata(); - } } diff --git a/processing/src/main/java/org/apache/druid/segment/VirtualColumns.java b/processing/src/main/java/org/apache/druid/segment/VirtualColumns.java index ce4dcaf60bfc..0d74b79b16a8 100644 --- a/processing/src/main/java/org/apache/druid/segment/VirtualColumns.java +++ b/processing/src/main/java/org/apache/druid/segment/VirtualColumns.java @@ -224,6 +224,11 @@ public VirtualColumn[] getVirtualColumns() return virtualColumns.toArray(new VirtualColumn[0]); } + public int size() + { + return virtualColumns.size(); + } + public ColumnSelectorFactory wrap(final ColumnSelectorFactory baseFactory) { return new VirtualizedColumnSelectorFactory(baseFactory, this); diff --git a/processing/src/main/java/org/apache/druid/segment/column/BaseColumn.java b/processing/src/main/java/org/apache/druid/segment/column/BaseColumn.java index d692725fd129..f22693365e13 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/BaseColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/column/BaseColumn.java @@ -19,12 +19,26 @@ package org.apache.druid.segment.column; +import org.apache.druid.java.util.common.UOE; import org.apache.druid.segment.ColumnValueSelector; import org.apache.druid.segment.data.ReadableOffset; +import org.apache.druid.segment.vector.ReadableVectorOffset; +import org.apache.druid.segment.vector.VectorObjectSelector; +import org.apache.druid.segment.vector.VectorValueSelector; import java.io.Closeable; public interface BaseColumn extends Closeable { ColumnValueSelector makeColumnValueSelector(ReadableOffset offset); + + default VectorValueSelector makeVectorValueSelector(ReadableVectorOffset offset) + { + throw new UOE("Cannot make VectorValueSelector for column with class[%s]", getClass().getName()); + } + + default VectorObjectSelector makeVectorObjectSelector(ReadableVectorOffset offset) + { + throw new UOE("Cannot make VectorObjectSelector for column with class[%s]", getClass().getName()); + } } diff --git a/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilitiesImpl.java b/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilitiesImpl.java index 65c94ae091ab..ae36c672e9b9 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilitiesImpl.java +++ b/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilitiesImpl.java @@ -38,6 +38,13 @@ public class ColumnCapabilitiesImpl implements ColumnCapabilities @JsonIgnore private boolean filterable; + public static ColumnCapabilitiesImpl copyOf(final ColumnCapabilities other) + { + final ColumnCapabilitiesImpl capabilities = new ColumnCapabilitiesImpl(); + capabilities.merge(other); + return capabilities; + } + @Override @JsonProperty public ValueType getType() diff --git a/processing/src/main/java/org/apache/druid/segment/column/ComplexColumn.java b/processing/src/main/java/org/apache/druid/segment/column/ComplexColumn.java index 1a7981eec188..6c66021f2650 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/ComplexColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/column/ComplexColumn.java @@ -24,6 +24,8 @@ import org.apache.druid.segment.ObjectColumnSelector; import org.apache.druid.segment.data.GenericIndexed; import org.apache.druid.segment.data.ReadableOffset; +import org.apache.druid.segment.vector.ReadableVectorOffset; +import org.apache.druid.segment.vector.VectorObjectSelector; import javax.annotation.Nullable; @@ -82,6 +84,56 @@ public void inspectRuntimeShape(RuntimeShapeInspector inspector) }; } + @Override + public VectorObjectSelector makeVectorObjectSelector(ReadableVectorOffset offset) + { + return new VectorObjectSelector() + { + final Object[] vector = new Object[offset.getMaxVectorSize()]; + + private int id = ReadableVectorOffset.NULL_ID; + + @Override + public Object[] getObjectVector() + { + if (id == offset.getId()) { + return vector; + } + + if (offset.isContiguous()) { + final int startOffset = offset.getStartOffset(); + final int vectorSize = offset.getCurrentVectorSize(); + + for (int i = 0; i < vectorSize; i++) { + vector[i] = getRowValue(startOffset + i); + } + } else { + final int[] offsets = offset.getOffsets(); + final int vectorSize = offset.getCurrentVectorSize(); + + for (int i = 0; i < vectorSize; i++) { + vector[i] = getRowValue(offsets[i]); + } + } + + id = offset.getId(); + return vector; + } + + @Override + public int getCurrentVectorSize() + { + return offset.getCurrentVectorSize(); + } + + @Override + public int getMaxVectorSize() + { + return offset.getMaxVectorSize(); + } + }; + } + @Override public void close() { diff --git a/processing/src/main/java/org/apache/druid/segment/column/DictionaryEncodedColumn.java b/processing/src/main/java/org/apache/druid/segment/column/DictionaryEncodedColumn.java index 2e67a44ad4ff..229d7e35b8d8 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/DictionaryEncodedColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/column/DictionaryEncodedColumn.java @@ -24,6 +24,9 @@ import org.apache.druid.segment.DimensionSelector; import org.apache.druid.segment.data.IndexedInts; import org.apache.druid.segment.data.ReadableOffset; +import org.apache.druid.segment.vector.MultiValueDimensionVectorSelector; +import org.apache.druid.segment.vector.ReadableVectorOffset; +import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector; import javax.annotation.Nullable; @@ -32,11 +35,17 @@ public interface DictionaryEncodedColumn> extends BaseColumn { int length(); + boolean hasMultipleValues(); + int getSingleValueRow(int rowNum); + IndexedInts getMultiValueRow(int rowNum); + ActualType lookupName(int id); + int lookupId(ActualType name); + int getCardinality(); DimensionSelector makeDimensionSelector(ReadableOffset offset, @Nullable ExtractionFn extractionFn); @@ -46,4 +55,8 @@ default ColumnValueSelector makeColumnValueSelector(ReadableOffset offset) { return makeDimensionSelector(offset, null); } + + SingleValueDimensionVectorSelector makeSingleValueDimensionVectorSelector(ReadableVectorOffset vectorOffset); + + MultiValueDimensionVectorSelector makeMultiValueDimensionVectorSelector(ReadableVectorOffset vectorOffset); } diff --git a/processing/src/main/java/org/apache/druid/segment/column/DoublesColumn.java b/processing/src/main/java/org/apache/druid/segment/column/DoublesColumn.java index 80a7d2ffe410..34ab5bb0394b 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/DoublesColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/column/DoublesColumn.java @@ -25,7 +25,8 @@ import org.apache.druid.segment.IndexIO; import org.apache.druid.segment.data.ColumnarDoubles; import org.apache.druid.segment.data.ReadableOffset; - +import org.apache.druid.segment.vector.ReadableVectorOffset; +import org.apache.druid.segment.vector.VectorValueSelector; /** */ @@ -62,6 +63,12 @@ public ColumnValueSelector makeColumnValueSelector(ReadableOffset offset) return column.makeColumnValueSelector(offset, IndexIO.LEGACY_FACTORY.getBitmapFactory().makeEmptyImmutableBitmap()); } + @Override + public VectorValueSelector makeVectorValueSelector(ReadableVectorOffset offset) + { + return column.makeVectorValueSelector(offset, IndexIO.LEGACY_FACTORY.getBitmapFactory().makeEmptyImmutableBitmap()); + } + @Override public long getLongSingleValueRow(int rowNum) { diff --git a/processing/src/main/java/org/apache/druid/segment/column/DoublesColumnWithNulls.java b/processing/src/main/java/org/apache/druid/segment/column/DoublesColumnWithNulls.java index ee188234d912..1e2e017fcb27 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/DoublesColumnWithNulls.java +++ b/processing/src/main/java/org/apache/druid/segment/column/DoublesColumnWithNulls.java @@ -24,6 +24,8 @@ import org.apache.druid.segment.ColumnValueSelector; import org.apache.druid.segment.data.ColumnarDoubles; import org.apache.druid.segment.data.ReadableOffset; +import org.apache.druid.segment.vector.ReadableVectorOffset; +import org.apache.druid.segment.vector.VectorValueSelector; /** * DoublesColumn with null values. @@ -44,6 +46,12 @@ public ColumnValueSelector makeColumnValueSelector(ReadableOffset offset) return column.makeColumnValueSelector(offset, nullValueBitmap); } + @Override + public VectorValueSelector makeVectorValueSelector(ReadableVectorOffset offset) + { + return column.makeVectorValueSelector(offset, nullValueBitmap); + } + @Override public long getLongSingleValueRow(int rowNum) { diff --git a/processing/src/main/java/org/apache/druid/segment/column/FloatsColumn.java b/processing/src/main/java/org/apache/druid/segment/column/FloatsColumn.java index 2e42070df69b..6684e6a6b036 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/FloatsColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/column/FloatsColumn.java @@ -25,6 +25,8 @@ import org.apache.druid.segment.IndexIO; import org.apache.druid.segment.data.ColumnarFloats; import org.apache.druid.segment.data.ReadableOffset; +import org.apache.druid.segment.vector.ReadableVectorOffset; +import org.apache.druid.segment.vector.VectorValueSelector; /** */ @@ -61,6 +63,12 @@ public ColumnValueSelector makeColumnValueSelector(ReadableOffset offset) return column.makeColumnValueSelector(offset, IndexIO.LEGACY_FACTORY.getBitmapFactory().makeEmptyImmutableBitmap()); } + @Override + public VectorValueSelector makeVectorValueSelector(ReadableVectorOffset offset) + { + return column.makeVectorValueSelector(offset, IndexIO.LEGACY_FACTORY.getBitmapFactory().makeEmptyImmutableBitmap()); + } + @Override public long getLongSingleValueRow(int rowNum) { diff --git a/processing/src/main/java/org/apache/druid/segment/column/FloatsColumnWithNulls.java b/processing/src/main/java/org/apache/druid/segment/column/FloatsColumnWithNulls.java index cdde6e83393d..38fab32a4e63 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/FloatsColumnWithNulls.java +++ b/processing/src/main/java/org/apache/druid/segment/column/FloatsColumnWithNulls.java @@ -24,6 +24,8 @@ import org.apache.druid.segment.ColumnValueSelector; import org.apache.druid.segment.data.ColumnarFloats; import org.apache.druid.segment.data.ReadableOffset; +import org.apache.druid.segment.vector.ReadableVectorOffset; +import org.apache.druid.segment.vector.VectorValueSelector; /** * FloatsColumn with null values. @@ -44,6 +46,12 @@ public ColumnValueSelector makeColumnValueSelector(ReadableOffset offset) return column.makeColumnValueSelector(offset, nullValueBitmap); } + @Override + public VectorValueSelector makeVectorValueSelector(ReadableVectorOffset offset) + { + return column.makeVectorValueSelector(offset, nullValueBitmap); + } + @Override public long getLongSingleValueRow(int rowNum) { diff --git a/processing/src/main/java/org/apache/druid/segment/column/LongsColumn.java b/processing/src/main/java/org/apache/druid/segment/column/LongsColumn.java index acff61d8a6e4..6f17dfb7c015 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/LongsColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/column/LongsColumn.java @@ -25,6 +25,8 @@ import org.apache.druid.segment.IndexIO; import org.apache.druid.segment.data.ColumnarLongs; import org.apache.druid.segment.data.ReadableOffset; +import org.apache.druid.segment.vector.ReadableVectorOffset; +import org.apache.druid.segment.vector.VectorValueSelector; /** */ @@ -61,6 +63,12 @@ public ColumnValueSelector makeColumnValueSelector(ReadableOffset offset) return column.makeColumnValueSelector(offset, IndexIO.LEGACY_FACTORY.getBitmapFactory().makeEmptyImmutableBitmap()); } + @Override + public VectorValueSelector makeVectorValueSelector(ReadableVectorOffset offset) + { + return column.makeVectorValueSelector(offset, IndexIO.LEGACY_FACTORY.getBitmapFactory().makeEmptyImmutableBitmap()); + } + @Override public long getLongSingleValueRow(int rowNum) { diff --git a/processing/src/main/java/org/apache/druid/segment/column/LongsColumnWithNulls.java b/processing/src/main/java/org/apache/druid/segment/column/LongsColumnWithNulls.java index 19937e77d2be..0090cf7e99b1 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/LongsColumnWithNulls.java +++ b/processing/src/main/java/org/apache/druid/segment/column/LongsColumnWithNulls.java @@ -24,6 +24,8 @@ import org.apache.druid.segment.ColumnValueSelector; import org.apache.druid.segment.data.ColumnarLongs; import org.apache.druid.segment.data.ReadableOffset; +import org.apache.druid.segment.vector.ReadableVectorOffset; +import org.apache.druid.segment.vector.VectorValueSelector; /** * LongsColumn with null values. @@ -44,6 +46,12 @@ public ColumnValueSelector makeColumnValueSelector(ReadableOffset offset) return column.makeColumnValueSelector(offset, nullValueBitmap); } + @Override + public VectorValueSelector makeVectorValueSelector(ReadableVectorOffset offset) + { + return column.makeVectorValueSelector(offset, nullValueBitmap); + } + @Override public long getLongSingleValueRow(int rowNum) { diff --git a/processing/src/main/java/org/apache/druid/segment/column/StringDictionaryEncodedColumn.java b/processing/src/main/java/org/apache/druid/segment/column/StringDictionaryEncodedColumn.java index 7bddc80f37ed..a76cc8a21403 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/StringDictionaryEncodedColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/column/StringDictionaryEncodedColumn.java @@ -37,13 +37,16 @@ import org.apache.druid.segment.filter.BooleanValueMatcher; import org.apache.druid.segment.historical.HistoricalDimensionSelector; import org.apache.druid.segment.historical.SingleValueHistoricalDimensionSelector; +import org.apache.druid.segment.vector.MultiValueDimensionVectorSelector; +import org.apache.druid.segment.vector.ReadableVectorOffset; +import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector; import javax.annotation.Nullable; import java.io.IOException; import java.util.BitSet; /** -*/ + */ public class StringDictionaryEncodedColumn implements DictionaryEncodedColumn { @Nullable @@ -318,6 +321,164 @@ public void inspectRuntimeShape(RuntimeShapeInspector inspector) } } + @Override + public SingleValueDimensionVectorSelector makeSingleValueDimensionVectorSelector(final ReadableVectorOffset offset) + { + class QueryableSingleValueDimensionVectorSelector implements SingleValueDimensionVectorSelector, IdLookup + { + private final int[] vector = new int[offset.getMaxVectorSize()]; + private int id = ReadableVectorOffset.NULL_ID; + + @Override + public int[] getRowVector() + { + if (id == offset.getId()) { + return vector; + } + + if (offset.isContiguous()) { + column.get(vector, offset.getStartOffset(), offset.getCurrentVectorSize()); + } else { + column.get(vector, offset.getOffsets(), offset.getCurrentVectorSize()); + } + + id = offset.getId(); + return vector; + } + + @Override + public int getValueCardinality() + { + return getCardinality(); + } + + @Nullable + @Override + public String lookupName(final int id) + { + return StringDictionaryEncodedColumn.this.lookupName(id); + } + + @Override + public boolean nameLookupPossibleInAdvance() + { + return true; + } + + @Nullable + @Override + public IdLookup idLookup() + { + return this; + } + + @Override + public int lookupId(@Nullable final String name) + { + return StringDictionaryEncodedColumn.this.lookupId(name); + } + + @Override + public int getCurrentVectorSize() + { + return offset.getCurrentVectorSize(); + } + + @Override + public int getMaxVectorSize() + { + return offset.getMaxVectorSize(); + } + } + + return new QueryableSingleValueDimensionVectorSelector(); + } + + @Override + public MultiValueDimensionVectorSelector makeMultiValueDimensionVectorSelector(final ReadableVectorOffset offset) + { + class QueryableMultiValueDimensionVectorSelector implements MultiValueDimensionVectorSelector, IdLookup + { + private final IndexedInts[] vector = new IndexedInts[offset.getMaxVectorSize()]; + private int id = ReadableVectorOffset.NULL_ID; + + @Override + public IndexedInts[] getRowVector() + { + if (id == offset.getId()) { + return vector; + } + + if (offset.isContiguous()) { + final int currentOffset = offset.getStartOffset(); + final int numRows = offset.getCurrentVectorSize(); + + for (int i = 0; i < numRows; i++) { + // Must use getUnshared, otherwise all elements in the vector could be the same shared object. + vector[i] = multiValueColumn.getUnshared(i + currentOffset); + } + } else { + final int[] offsets = offset.getOffsets(); + final int numRows = offset.getCurrentVectorSize(); + + for (int i = 0; i < numRows; i++) { + // Must use getUnshared, otherwise all elements in the vector could be the same shared object. + vector[i] = multiValueColumn.getUnshared(offsets[i]); + } + } + + id = offset.getId(); + return vector; + } + + @Override + public int getValueCardinality() + { + return getCardinality(); + } + + @Nullable + @Override + public String lookupName(final int id) + { + return StringDictionaryEncodedColumn.this.lookupName(id); + } + + @Override + public boolean nameLookupPossibleInAdvance() + { + return true; + } + + @Nullable + @Override + public IdLookup idLookup() + { + return this; + } + + @Override + public int lookupId(@Nullable final String name) + { + return StringDictionaryEncodedColumn.this.lookupId(name); + } + + @Override + public int getCurrentVectorSize() + { + return offset.getCurrentVectorSize(); + } + + @Override + public int getMaxVectorSize() + { + return offset.getMaxVectorSize(); + } + } + + return new QueryableMultiValueDimensionVectorSelector(); + } + @Override public void close() throws IOException { diff --git a/processing/src/main/java/org/apache/druid/segment/data/BlockLayoutColumnarDoublesSupplier.java b/processing/src/main/java/org/apache/druid/segment/data/BlockLayoutColumnarDoublesSupplier.java index 8701ea970f85..82c7479dafc3 100644 --- a/processing/src/main/java/org/apache/druid/segment/data/BlockLayoutColumnarDoublesSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/data/BlockLayoutColumnarDoublesSupplier.java @@ -80,7 +80,9 @@ private class BlockLayoutColumnarDoubles implements ColumnarDoubles int currBufferNum = -1; ResourceHolder holder; - /** doubleBuffer's position must be 0 */ + /** + * doubleBuffer's position must be 0 + */ DoubleBuffer doubleBuffer; @Override @@ -103,6 +105,63 @@ public double get(int index) return doubleBuffer.get(bufferIndex); } + @Override + public void get(final double[] out, final int start, final int length) + { + // division + remainder is optimized by the compiler so keep those together + int bufferNum = start / sizePer; + int bufferIndex = start % sizePer; + + int p = 0; + + while (p < length) { + if (bufferNum != currBufferNum) { + loadBuffer(bufferNum); + } + + final int limit = Math.min(length - p, sizePer - bufferIndex); + final int oldPosition = doubleBuffer.position(); + try { + doubleBuffer.position(bufferIndex); + doubleBuffer.get(out, p, limit); + } + finally { + doubleBuffer.position(oldPosition); + } + p += limit; + bufferNum++; + bufferIndex = 0; + } + } + + @Override + public void get(final double[] out, final int[] indexes, final int length) + { + int p = 0; + + while (p < length) { + int bufferNum = indexes[p] / sizePer; + if (bufferNum != currBufferNum) { + loadBuffer(bufferNum); + } + + final int indexOffset = bufferNum * sizePer; + + int i = p; + for (; i < length; i++) { + int index = indexes[i] - indexOffset; + if (index >= sizePer) { + break; + } + + out[i] = doubleBuffer.get(index); + } + + assert i > p; + p = i; + } + } + protected void loadBuffer(int bufferNum) { CloseQuietly.close(holder); diff --git a/processing/src/main/java/org/apache/druid/segment/data/BlockLayoutColumnarFloatsSupplier.java b/processing/src/main/java/org/apache/druid/segment/data/BlockLayoutColumnarFloatsSupplier.java index 5673164131e5..a0e4c3fbc308 100644 --- a/processing/src/main/java/org/apache/druid/segment/data/BlockLayoutColumnarFloatsSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/data/BlockLayoutColumnarFloatsSupplier.java @@ -21,7 +21,6 @@ import com.google.common.base.Supplier; import org.apache.druid.collections.ResourceHolder; -import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.java.util.common.guava.CloseQuietly; import java.nio.ByteBuffer; @@ -81,7 +80,9 @@ private class BlockLayoutColumnarFloats implements ColumnarFloats int currBufferNum = -1; ResourceHolder holder; - /** floatBuffer's position must be 0 */ + /** + * floatBuffer's position must be 0 + */ FloatBuffer floatBuffer; @Override @@ -105,17 +106,59 @@ public float get(int index) } @Override - public void fill(int index, float[] toFill) + public void get(final float[] out, final int start, final int length) { - if (totalSize - index < toFill.length) { - throw new IndexOutOfBoundsException( - StringUtils.format( - "Cannot fill array of size[%,d] at index[%,d]. Max size[%,d]", toFill.length, index, totalSize - ) - ); + // division + remainder is optimized by the compiler so keep those together + int bufferNum = start / sizePer; + int bufferIndex = start % sizePer; + + int p = 0; + + while (p < length) { + if (bufferNum != currBufferNum) { + loadBuffer(bufferNum); + } + + final int limit = Math.min(length - p, sizePer - bufferIndex); + final int oldPosition = floatBuffer.position(); + try { + floatBuffer.position(bufferIndex); + floatBuffer.get(out, p, limit); + } + finally { + floatBuffer.position(oldPosition); + } + p += limit; + bufferNum++; + bufferIndex = 0; } - for (int i = 0; i < toFill.length; i++) { - toFill[i] = get(index + i); + } + + @Override + public void get(final float[] out, final int[] indexes, final int length) + { + int p = 0; + + while (p < length) { + int bufferNum = indexes[p] / sizePer; + if (bufferNum != currBufferNum) { + loadBuffer(bufferNum); + } + + final int indexOffset = bufferNum * sizePer; + + int i = p; + for (; i < length; i++) { + int index = indexes[i] - indexOffset; + if (index >= sizePer) { + break; + } + + out[i] = floatBuffer.get(index); + } + + assert i > p; + p = i; } } diff --git a/processing/src/main/java/org/apache/druid/segment/data/BlockLayoutColumnarLongsSupplier.java b/processing/src/main/java/org/apache/druid/segment/data/BlockLayoutColumnarLongsSupplier.java index b8be1de96276..64a2d8f2ccf8 100644 --- a/processing/src/main/java/org/apache/druid/segment/data/BlockLayoutColumnarLongsSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/data/BlockLayoutColumnarLongsSupplier.java @@ -21,7 +21,6 @@ import com.google.common.base.Supplier; import org.apache.druid.collections.ResourceHolder; -import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.java.util.common.guava.CloseQuietly; import java.nio.ByteBuffer; @@ -85,6 +84,7 @@ protected void loadBuffer(int bufferNum) buffer = holder.get(); // asLongBuffer() makes the longBuffer's position = 0 longBuffer = buffer.asLongBuffer(); + reader.setBuffer(buffer); currBufferNum = bufferNum; } }; @@ -120,7 +120,9 @@ private class BlockLayoutColumnarLongs implements ColumnarLongs int currBufferNum = -1; ResourceHolder holder; ByteBuffer buffer; - /** longBuffer's position must be 0 */ + /** + * longBuffer's position must be 0 + */ LongBuffer longBuffer; @Override @@ -144,17 +146,41 @@ public long get(int index) } @Override - public void fill(int index, long[] toFill) + public void get(final long[] out, final int start, final int length) { - if (totalSize - index < toFill.length) { - throw new IndexOutOfBoundsException( - StringUtils.format( - "Cannot fill array of size[%,d] at index[%,d]. Max size[%,d]", toFill.length, index, totalSize - ) - ); + // division + remainder is optimized by the compiler so keep those together + int bufferNum = start / sizePer; + int bufferIndex = start % sizePer; + + int p = 0; + + while (p < length) { + if (bufferNum != currBufferNum) { + loadBuffer(bufferNum); + } + + final int limit = Math.min(length - p, sizePer - bufferIndex); + reader.read(out, p, bufferIndex, limit); + p += limit; + bufferNum++; + bufferIndex = 0; } - for (int i = 0; i < toFill.length; i++) { - toFill[i] = get(index + i); + } + + @Override + public void get(final long[] out, final int[] indexes, final int length) + { + int p = 0; + + while (p < length) { + int bufferNum = indexes[p] / sizePer; + if (bufferNum != currBufferNum) { + loadBuffer(bufferNum); + } + + final int numRead = reader.read(out, p, indexes, length - p, bufferNum * sizePer, sizePer); + assert numRead > 0; + p += numRead; } } diff --git a/processing/src/main/java/org/apache/druid/segment/data/ColumnarDoubles.java b/processing/src/main/java/org/apache/druid/segment/data/ColumnarDoubles.java index 39e9cf03aba6..4f357af4ecb9 100644 --- a/processing/src/main/java/org/apache/druid/segment/data/ColumnarDoubles.java +++ b/processing/src/main/java/org/apache/druid/segment/data/ColumnarDoubles.java @@ -25,7 +25,12 @@ import org.apache.druid.segment.ColumnValueSelector; import org.apache.druid.segment.DoubleColumnSelector; import org.apache.druid.segment.historical.HistoricalColumnSelector; +import org.apache.druid.segment.vector.BaseDoubleVectorValueSelector; +import org.apache.druid.segment.vector.ReadableVectorOffset; +import org.apache.druid.segment.vector.VectorSelectorUtils; +import org.apache.druid.segment.vector.VectorValueSelector; +import javax.annotation.Nullable; import java.io.Closeable; /** @@ -38,6 +43,20 @@ public interface ColumnarDoubles extends Closeable double get(int index); + default void get(double[] out, int start, int length) + { + for (int i = 0; i < length; i++) { + out[i] = get(i + start); + } + } + + default void get(double[] out, int[] indexes, int length) + { + for (int i = 0; i < length; i++) { + out[i] = get(indexes[i]); + } + } + @Override void close(); @@ -106,5 +125,60 @@ public void inspectRuntimeShape(RuntimeShapeInspector inspector) return new HistoricalDoubleColumnSelectorWithNulls(); } } -} + default VectorValueSelector makeVectorValueSelector( + final ReadableVectorOffset theOffset, + final ImmutableBitmap nullValueBitmap + ) + { + class ColumnarDoublesVectorValueSelector extends BaseDoubleVectorValueSelector + { + private final double[] doubleVector; + + private int id = ReadableVectorOffset.NULL_ID; + + @Nullable + private boolean[] nullVector = null; + + private ColumnarDoublesVectorValueSelector() + { + super(theOffset); + this.doubleVector = new double[offset.getMaxVectorSize()]; + } + + @Nullable + @Override + public boolean[] getNullVector() + { + computeVectorsIfNeeded(); + return nullVector; + } + + @Override + public double[] getDoubleVector() + { + computeVectorsIfNeeded(); + return doubleVector; + } + + private void computeVectorsIfNeeded() + { + if (id == offset.getId()) { + return; + } + + if (offset.isContiguous()) { + ColumnarDoubles.this.get(doubleVector, offset.getStartOffset(), offset.getCurrentVectorSize()); + } else { + ColumnarDoubles.this.get(doubleVector, offset.getOffsets(), offset.getCurrentVectorSize()); + } + + nullVector = VectorSelectorUtils.populateNullVector(nullVector, offset, nullValueBitmap); + + id = offset.getId(); + } + } + + return new ColumnarDoublesVectorValueSelector(); + } +} diff --git a/processing/src/main/java/org/apache/druid/segment/data/ColumnarFloats.java b/processing/src/main/java/org/apache/druid/segment/data/ColumnarFloats.java index f8441a27b688..adcffb4597a6 100644 --- a/processing/src/main/java/org/apache/druid/segment/data/ColumnarFloats.java +++ b/processing/src/main/java/org/apache/druid/segment/data/ColumnarFloats.java @@ -25,7 +25,12 @@ import org.apache.druid.segment.ColumnValueSelector; import org.apache.druid.segment.FloatColumnSelector; import org.apache.druid.segment.historical.HistoricalColumnSelector; +import org.apache.druid.segment.vector.BaseFloatVectorValueSelector; +import org.apache.druid.segment.vector.ReadableVectorOffset; +import org.apache.druid.segment.vector.VectorSelectorUtils; +import org.apache.druid.segment.vector.VectorValueSelector; +import javax.annotation.Nullable; import java.io.Closeable; /** @@ -38,7 +43,19 @@ public interface ColumnarFloats extends Closeable float get(int index); - void fill(int index, float[] toFill); + default void get(float[] out, int start, int length) + { + for (int i = 0; i < length; i++) { + out[i] = get(i + start); + } + } + + default void get(float[] out, int[] indexes, int length) + { + for (int i = 0; i < length; i++) { + out[i] = get(indexes[i]); + } + } @Override void close(); @@ -108,4 +125,60 @@ public void inspectRuntimeShape(RuntimeShapeInspector inspector) return new HistoricalFloatColumnSelectorwithNulls(); } } + + default VectorValueSelector makeVectorValueSelector( + final ReadableVectorOffset theOffset, + final ImmutableBitmap nullValueBitmap + ) + { + class ColumnarFloatsVectorValueSelector extends BaseFloatVectorValueSelector + { + private final float[] floatVector; + + private int id = ReadableVectorOffset.NULL_ID; + + @Nullable + private boolean[] nullVector = null; + + private ColumnarFloatsVectorValueSelector() + { + super(theOffset); + this.floatVector = new float[offset.getMaxVectorSize()]; + } + + @Nullable + @Override + public boolean[] getNullVector() + { + computeVectorsIfNeeded(); + return nullVector; + } + + @Override + public float[] getFloatVector() + { + computeVectorsIfNeeded(); + return floatVector; + } + + private void computeVectorsIfNeeded() + { + if (id == offset.getId()) { + return; + } + + if (offset.isContiguous()) { + ColumnarFloats.this.get(floatVector, offset.getStartOffset(), offset.getCurrentVectorSize()); + } else { + ColumnarFloats.this.get(floatVector, offset.getOffsets(), offset.getCurrentVectorSize()); + } + + nullVector = VectorSelectorUtils.populateNullVector(nullVector, offset, nullValueBitmap); + + id = offset.getId(); + } + } + + return new ColumnarFloatsVectorValueSelector(); + } } diff --git a/processing/src/main/java/org/apache/druid/segment/data/ColumnarLongs.java b/processing/src/main/java/org/apache/druid/segment/data/ColumnarLongs.java index 00ff086ddd86..41a3ac104109 100644 --- a/processing/src/main/java/org/apache/druid/segment/data/ColumnarLongs.java +++ b/processing/src/main/java/org/apache/druid/segment/data/ColumnarLongs.java @@ -25,7 +25,12 @@ import org.apache.druid.segment.ColumnValueSelector; import org.apache.druid.segment.LongColumnSelector; import org.apache.druid.segment.historical.HistoricalColumnSelector; +import org.apache.druid.segment.vector.BaseLongVectorValueSelector; +import org.apache.druid.segment.vector.ReadableVectorOffset; +import org.apache.druid.segment.vector.VectorSelectorUtils; +import org.apache.druid.segment.vector.VectorValueSelector; +import javax.annotation.Nullable; import java.io.Closeable; /** @@ -38,7 +43,19 @@ public interface ColumnarLongs extends Closeable long get(int index); - void fill(int index, long[] toFill); + default void get(long[] out, int start, int length) + { + for (int i = 0; i < length; i++) { + out[i] = get(i + start); + } + } + + default void get(long[] out, int[] indexes, int length) + { + for (int i = 0; i < length; i++) { + out[i] = get(indexes[i]); + } + } @Override void close(); @@ -108,4 +125,60 @@ public void inspectRuntimeShape(RuntimeShapeInspector inspector) return new HistoricalLongColumnSelectorWithNulls(); } } + + default VectorValueSelector makeVectorValueSelector( + final ReadableVectorOffset theOffset, + final ImmutableBitmap nullValueBitmap + ) + { + class ColumnarLongsVectorValueSelector extends BaseLongVectorValueSelector + { + private final long[] longVector; + + private int id = ReadableVectorOffset.NULL_ID; + + @Nullable + private boolean[] nullVector = null; + + private ColumnarLongsVectorValueSelector() + { + super(theOffset); + this.longVector = new long[offset.getMaxVectorSize()]; + } + + @Nullable + @Override + public boolean[] getNullVector() + { + computeVectorsIfNeeded(); + return nullVector; + } + + @Override + public long[] getLongVector() + { + computeVectorsIfNeeded(); + return longVector; + } + + private void computeVectorsIfNeeded() + { + if (id == offset.getId()) { + return; + } + + if (offset.isContiguous()) { + ColumnarLongs.this.get(longVector, offset.getStartOffset(), offset.getCurrentVectorSize()); + } else { + ColumnarLongs.this.get(longVector, offset.getOffsets(), offset.getCurrentVectorSize()); + } + + nullVector = VectorSelectorUtils.populateNullVector(nullVector, offset, nullValueBitmap); + + id = offset.getId(); + } + } + + return new ColumnarLongsVectorValueSelector(); + } } diff --git a/processing/src/main/java/org/apache/druid/segment/data/ColumnarMultiInts.java b/processing/src/main/java/org/apache/druid/segment/data/ColumnarMultiInts.java index 8392e133ff78..0064e126cd4c 100644 --- a/processing/src/main/java/org/apache/druid/segment/data/ColumnarMultiInts.java +++ b/processing/src/main/java/org/apache/druid/segment/data/ColumnarMultiInts.java @@ -27,6 +27,16 @@ */ public interface ColumnarMultiInts extends Indexed, Closeable { + /** + * Returns the values at a given row index. The IndexedInts object may potentially be reused, so callers should + * not keep references to it. + */ @Override IndexedInts get(int index); + + /** + * Returns the values at a given row index. The IndexedInts object will not be reused. This method may be less + * efficient than plain "get". + */ + IndexedInts getUnshared(int index); } diff --git a/processing/src/main/java/org/apache/druid/segment/data/CompressedVSizeColumnarIntsSupplier.java b/processing/src/main/java/org/apache/druid/segment/data/CompressedVSizeColumnarIntsSupplier.java index 335dabfe1540..7f10c103b290 100644 --- a/processing/src/main/java/org/apache/druid/segment/data/CompressedVSizeColumnarIntsSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/data/CompressedVSizeColumnarIntsSupplier.java @@ -289,7 +289,9 @@ private class CompressedVSizeColumnarInts implements ColumnarInts int currBufferNum = -1; ResourceHolder holder; - /** buffer's position must be 0 */ + /** + * buffer's position must be 0 + */ ByteBuffer buffer; boolean bigEndian; @@ -322,6 +324,66 @@ public int get(int index) return _get(buffer, bigEndian, bufferIndex); } + @Override + public void get(int[] out, int start, int length) + { + int p = 0; + + while (p < length) { + // assumes the number of entries in each buffer is a power of 2 + final int bufferNum = (start + p) >> div; + if (bufferNum != currBufferNum) { + loadBuffer(bufferNum); + } + + final int currBufferStart = bufferNum * sizePer; + final int nextBufferStart = currBufferStart + sizePer; + + int i; + for (i = p; i < length; i++) { + final int index = start + i; + if (index >= nextBufferStart) { + break; + } + + out[i] = _get(buffer, bigEndian, index - currBufferStart); + } + + assert i > p; + p = i; + } + } + + @Override + public void get(final int[] out, final int[] indexes, final int length) + { + int p = 0; + + while (p < length) { + // assumes the number of entries in each buffer is a power of 2 + final int bufferNum = indexes[p] >> div; + if (bufferNum != currBufferNum) { + loadBuffer(bufferNum); + } + + final int currBufferStart = bufferNum * sizePer; + final int nextBufferStart = currBufferStart + sizePer; + + int i; + for (i = p; i < length; i++) { + final int index = indexes[i]; + if (index >= nextBufferStart) { + break; + } + + out[i] = _get(buffer, bigEndian, index - currBufferStart); + } + + assert i > p; + p = i; + } + } + /** * Returns the value at the given bufferIndex in the current decompression buffer * diff --git a/processing/src/main/java/org/apache/druid/segment/data/CompressedVSizeColumnarMultiIntsSupplier.java b/processing/src/main/java/org/apache/druid/segment/data/CompressedVSizeColumnarMultiIntsSupplier.java index 4df385606460..048f1fe99504 100644 --- a/processing/src/main/java/org/apache/druid/segment/data/CompressedVSizeColumnarMultiIntsSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/data/CompressedVSizeColumnarMultiIntsSupplier.java @@ -38,17 +38,21 @@ * Format - * byte 1 - version * offsets - {@link ColumnarInts} of length num of rows + 1 representing offsets of starting index of first element of - * each row in values index and last element equal to length of values column, the last element in the offsets - * represents the total length of values column. + * each row in values index and last element equal to length of values column, the last element in the offsets + * represents the total length of values column. * values - {@link ColumnarInts} representing concatenated values of all rows */ public class CompressedVSizeColumnarMultiIntsSupplier implements WritableSupplier { private static final byte version = 0x2; - /** See class-level comment */ + /** + * See class-level comment + */ private final CompressedVSizeColumnarIntsSupplier offsetSupplier; - /** See class-level comment */ + /** + * See class-level comment + */ private final CompressedVSizeColumnarIntsSupplier valueSupplier; private CompressedVSizeColumnarMultiIntsSupplier( @@ -178,6 +182,39 @@ public IndexedInts get(int index) return rowValues; } + @Override + public IndexedInts getUnshared(int index) + { + final int offset = offsets.get(index); + final int size = offsets.get(index + 1) - offset; + + class UnsharedIndexedInts implements IndexedInts + { + @Override + public int size() + { + return size; + } + + @Override + public int get(int index) + { + if (index >= size) { + throw new IAE("Index[%d] >= size[%d]", index, size); + } + return values.get(index + offset); + } + + @Override + public void inspectRuntimeShape(RuntimeShapeInspector inspector) + { + inspector.visit("values", values); + } + } + + return new UnsharedIndexedInts(); + } + @Override public int indexOf(IndexedInts value) { diff --git a/processing/src/main/java/org/apache/druid/segment/data/CompressionFactory.java b/processing/src/main/java/org/apache/druid/segment/data/CompressionFactory.java index b204bfd97afb..b7e7c96b26b1 100644 --- a/processing/src/main/java/org/apache/druid/segment/data/CompressionFactory.java +++ b/processing/src/main/java/org/apache/druid/segment/data/CompressionFactory.java @@ -283,6 +283,27 @@ public interface LongEncodingReader long read(int index); + default void read(long[] out, int outPosition, int startIndex, int length) + { + for (int i = 0; i < length; i++) { + out[outPosition + i] = read(startIndex + i); + } + } + + default int read(long[] out, int outPosition, int[] indexes, int length, int indexOffset, int limit) + { + for (int i = 0; i < length; i++) { + int index = indexes[outPosition + i] - indexOffset; + if (index >= limit) { + return i; + } + + out[outPosition + i] = read(index); + } + + return length; + } + LongEncodingReader duplicate(); } diff --git a/processing/src/main/java/org/apache/druid/segment/data/EntireLayoutColumnarFloatsSupplier.java b/processing/src/main/java/org/apache/druid/segment/data/EntireLayoutColumnarFloatsSupplier.java index 612f8b08ef56..05e35b539fbc 100644 --- a/processing/src/main/java/org/apache/druid/segment/data/EntireLayoutColumnarFloatsSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/data/EntireLayoutColumnarFloatsSupplier.java @@ -20,7 +20,6 @@ package org.apache.druid.segment.data; import com.google.common.base.Supplier; -import org.apache.druid.java.util.common.StringUtils; import java.nio.ByteBuffer; import java.nio.ByteOrder; @@ -58,21 +57,6 @@ public float get(int index) return buffer.get(buffer.position() + index); } - @Override - public void fill(int index, float[] toFill) - { - if (totalSize - index < toFill.length) { - throw new IndexOutOfBoundsException( - StringUtils.format( - "Cannot fill array of size[%,d] at index[%,d]. Max size[%,d]", toFill.length, index, totalSize - ) - ); - } - for (int i = 0; i < toFill.length; i++) { - toFill[i] = get(index + i); - } - } - @Override public String toString() { diff --git a/processing/src/main/java/org/apache/druid/segment/data/EntireLayoutColumnarLongsSupplier.java b/processing/src/main/java/org/apache/druid/segment/data/EntireLayoutColumnarLongsSupplier.java index 84bb254406d9..6093c5347070 100644 --- a/processing/src/main/java/org/apache/druid/segment/data/EntireLayoutColumnarLongsSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/data/EntireLayoutColumnarLongsSupplier.java @@ -20,7 +20,6 @@ package org.apache.druid.segment.data; import com.google.common.base.Supplier; -import org.apache.druid.java.util.common.StringUtils; public class EntireLayoutColumnarLongsSupplier implements Supplier { @@ -55,21 +54,6 @@ public long get(int index) return reader.read(index); } - @Override - public void fill(int index, long[] toFill) - { - if (totalSize - index < toFill.length) { - throw new IndexOutOfBoundsException( - StringUtils.format( - "Cannot fill array of size[%,d] at index[%,d]. Max size[%,d]", toFill.length, index, totalSize - ) - ); - } - for (int i = 0; i < toFill.length; i++) { - toFill[i] = get(index + i); - } - } - @Override public String toString() { diff --git a/processing/src/main/java/org/apache/druid/segment/data/IndexedInts.java b/processing/src/main/java/org/apache/druid/segment/data/IndexedInts.java index 1844ac88357b..8fd27ec55e17 100644 --- a/processing/src/main/java/org/apache/druid/segment/data/IndexedInts.java +++ b/processing/src/main/java/org/apache/druid/segment/data/IndexedInts.java @@ -41,9 +41,24 @@ static IndexedInts empty() @CalledFromHotLoop int size(); + @CalledFromHotLoop int get(int index); + default void get(int[] out, int start, int length) + { + for (int i = 0; i < length; i++) { + out[i] = get(i + start); + } + } + + default void get(int[] out, int[] indexes, int length) + { + for (int i = 0; i < length; i++) { + out[i] = get(indexes[i]); + } + } + default void forEach(IntConsumer action) { int size = size(); diff --git a/processing/src/main/java/org/apache/druid/segment/data/LongsLongEncodingReader.java b/processing/src/main/java/org/apache/druid/segment/data/LongsLongEncodingReader.java index 316cf78cc4c6..aaf2c0ef2025 100644 --- a/processing/src/main/java/org/apache/druid/segment/data/LongsLongEncodingReader.java +++ b/processing/src/main/java/org/apache/druid/segment/data/LongsLongEncodingReader.java @@ -49,6 +49,19 @@ public long read(int index) return buffer.get(buffer.position() + index); } + @Override + public void read(final long[] out, final int outPosition, final int startIndex, final int length) + { + final int oldPosition = buffer.position(); + try { + buffer.position(oldPosition + startIndex); + buffer.get(out, outPosition, length); + } + finally { + buffer.position(oldPosition); + } + } + @Override public CompressionFactory.LongEncodingReader duplicate() { diff --git a/processing/src/main/java/org/apache/druid/segment/data/ReadableOffset.java b/processing/src/main/java/org/apache/druid/segment/data/ReadableOffset.java index e116aebb66f6..1373574c7e63 100644 --- a/processing/src/main/java/org/apache/druid/segment/data/ReadableOffset.java +++ b/processing/src/main/java/org/apache/druid/segment/data/ReadableOffset.java @@ -28,6 +28,8 @@ * See the companion class Offset, for more context on how this could be useful. A ReadableOffset should be * given to classes (e.g. FloatColumnSelector objects) by something which keeps a reference to the base Offset object * and increments it. + * + * @see org.apache.druid.segment.vector.ReadableVectorOffset, the vectorized version. */ public interface ReadableOffset extends HotLoopCallee { diff --git a/processing/src/main/java/org/apache/druid/segment/data/VSizeColumnarMultiInts.java b/processing/src/main/java/org/apache/druid/segment/data/VSizeColumnarMultiInts.java index 4d579e47b339..7a21817feea5 100644 --- a/processing/src/main/java/org/apache/druid/segment/data/VSizeColumnarMultiInts.java +++ b/processing/src/main/java/org/apache/druid/segment/data/VSizeColumnarMultiInts.java @@ -138,6 +138,12 @@ public VSizeColumnarInts get(int index) return myBuffer.hasRemaining() ? new VSizeColumnarInts(myBuffer, numBytes) : null; } + @Override + public IndexedInts getUnshared(final int index) + { + return get(index); + } + @Override public int indexOf(IndexedInts value) { diff --git a/processing/src/main/java/org/apache/druid/segment/filter/AndFilter.java b/processing/src/main/java/org/apache/druid/segment/filter/AndFilter.java index f7f598eb7f9c..76fd626c9d45 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/AndFilter.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/AndFilter.java @@ -30,9 +30,13 @@ import org.apache.druid.query.filter.Filter; import org.apache.druid.query.filter.RowOffsetMatcherFactory; import org.apache.druid.query.filter.ValueMatcher; +import org.apache.druid.query.filter.vector.BaseVectorValueMatcher; +import org.apache.druid.query.filter.vector.ReadableVectorMatch; +import org.apache.druid.query.filter.vector.VectorValueMatcher; import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; import org.apache.druid.segment.ColumnSelector; import org.apache.druid.segment.ColumnSelectorFactory; +import org.apache.druid.segment.vector.VectorColumnSelectorFactory; import java.util.ArrayList; import java.util.List; @@ -104,6 +108,23 @@ public ValueMatcher makeMatcher(ColumnSelectorFactory factory) return makeMatcher(matchers); } + @Override + public VectorValueMatcher makeVectorMatcher(final VectorColumnSelectorFactory factory) + { + final VectorValueMatcher[] matchers = new VectorValueMatcher[filters.size()]; + + for (int i = 0; i < filters.size(); i++) { + matchers[i] = filters.get(i).makeVectorMatcher(factory); + } + return makeVectorMatcher(matchers); + } + + @Override + public boolean canVectorizeMatcher() + { + return filters.stream().allMatch(Filter::canVectorizeMatcher); + } + @Override public ValueMatcher makeMatcher( BitmapIndexSelector selector, @@ -180,7 +201,7 @@ public String toString() return StringUtils.format("(%s)", AND_JOINER.join(filters)); } - private ValueMatcher makeMatcher(final ValueMatcher[] baseMatchers) + private static ValueMatcher makeMatcher(final ValueMatcher[] baseMatchers) { Preconditions.checkState(baseMatchers.length > 0); if (baseMatchers.length == 1) { @@ -211,5 +232,32 @@ public void inspectRuntimeShape(RuntimeShapeInspector inspector) }; } + private static VectorValueMatcher makeVectorMatcher(final VectorValueMatcher[] baseMatchers) + { + Preconditions.checkState(baseMatchers.length > 0); + if (baseMatchers.length == 1) { + return baseMatchers[0]; + } + return new BaseVectorValueMatcher(baseMatchers[0]) + { + @Override + public ReadableVectorMatch match(final ReadableVectorMatch mask) + { + ReadableVectorMatch match = mask; + + for (VectorValueMatcher matcher : baseMatchers) { + if (match.isAllFalse()) { + // Short-circuit if the entire vector is false. + break; + } + + match = matcher.match(match); + } + + assert match.isValid(mask); + return match; + } + }; + } } diff --git a/processing/src/main/java/org/apache/druid/segment/filter/BoundFilter.java b/processing/src/main/java/org/apache/druid/segment/filter/BoundFilter.java index 6b7b6fdc1a53..28b8548f9d73 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/BoundFilter.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/BoundFilter.java @@ -35,11 +35,15 @@ import org.apache.druid.query.filter.DruidPredicateFactory; import org.apache.druid.query.filter.Filter; import org.apache.druid.query.filter.ValueMatcher; +import org.apache.druid.query.filter.vector.VectorValueMatcher; +import org.apache.druid.query.filter.vector.VectorValueMatcherColumnStrategizer; import org.apache.druid.query.ordering.StringComparators; import org.apache.druid.segment.ColumnSelector; import org.apache.druid.segment.ColumnSelectorFactory; +import org.apache.druid.segment.DimensionHandlerUtils; import org.apache.druid.segment.IntListUtils; import org.apache.druid.segment.column.BitmapIndex; +import org.apache.druid.segment.vector.VectorColumnSelectorFactory; import java.util.Comparator; @@ -124,6 +128,22 @@ public ValueMatcher makeMatcher(ColumnSelectorFactory factory) return Filters.makeValueMatcher(factory, boundDimFilter.getDimension(), getPredicateFactory()); } + @Override + public VectorValueMatcher makeVectorMatcher(final VectorColumnSelectorFactory factory) + { + return DimensionHandlerUtils.makeVectorProcessor( + boundDimFilter.getDimension(), + VectorValueMatcherColumnStrategizer.instance(), + factory + ).makeMatcher(getPredicateFactory()); + } + + @Override + public boolean canVectorizeMatcher() + { + return true; + } + @Override public boolean supportsBitmapIndex(BitmapIndexSelector selector) { diff --git a/processing/src/main/java/org/apache/druid/segment/filter/DimensionPredicateFilter.java b/processing/src/main/java/org/apache/druid/segment/filter/DimensionPredicateFilter.java index fe2a6a647e1f..fbcff2a2dafa 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/DimensionPredicateFilter.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/DimensionPredicateFilter.java @@ -31,8 +31,12 @@ import org.apache.druid.query.filter.DruidPredicateFactory; import org.apache.druid.query.filter.Filter; import org.apache.druid.query.filter.ValueMatcher; +import org.apache.druid.query.filter.vector.VectorValueMatcher; +import org.apache.druid.query.filter.vector.VectorValueMatcherColumnStrategizer; import org.apache.druid.segment.ColumnSelector; import org.apache.druid.segment.ColumnSelectorFactory; +import org.apache.druid.segment.DimensionHandlerUtils; +import org.apache.druid.segment.vector.VectorColumnSelectorFactory; /** */ @@ -100,6 +104,22 @@ public ValueMatcher makeMatcher(ColumnSelectorFactory factory) return Filters.makeValueMatcher(factory, dimension, predicateFactory); } + @Override + public VectorValueMatcher makeVectorMatcher(final VectorColumnSelectorFactory factory) + { + return DimensionHandlerUtils.makeVectorProcessor( + dimension, + VectorValueMatcherColumnStrategizer.instance(), + factory + ).makeMatcher(predicateFactory); + } + + @Override + public boolean canVectorizeMatcher() + { + return true; + } + @Override public boolean supportsBitmapIndex(BitmapIndexSelector selector) { diff --git a/processing/src/main/java/org/apache/druid/segment/filter/InFilter.java b/processing/src/main/java/org/apache/druid/segment/filter/InFilter.java index 18a2db358c28..8186781ee8a0 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/InFilter.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/InFilter.java @@ -33,10 +33,14 @@ import org.apache.druid.query.filter.DruidPredicateFactory; import org.apache.druid.query.filter.Filter; import org.apache.druid.query.filter.ValueMatcher; +import org.apache.druid.query.filter.vector.VectorValueMatcher; +import org.apache.druid.query.filter.vector.VectorValueMatcherColumnStrategizer; import org.apache.druid.segment.ColumnSelector; import org.apache.druid.segment.ColumnSelectorFactory; +import org.apache.druid.segment.DimensionHandlerUtils; import org.apache.druid.segment.IntIteratorUtils; import org.apache.druid.segment.column.BitmapIndex; +import org.apache.druid.segment.vector.VectorColumnSelectorFactory; import java.util.Iterator; import java.util.Set; @@ -142,6 +146,22 @@ public ValueMatcher makeMatcher(ColumnSelectorFactory factory) return Filters.makeValueMatcher(factory, dimension, getPredicateFactory()); } + @Override + public VectorValueMatcher makeVectorMatcher(final VectorColumnSelectorFactory factory) + { + return DimensionHandlerUtils.makeVectorProcessor( + dimension, + VectorValueMatcherColumnStrategizer.instance(), + factory + ).makeMatcher(getPredicateFactory()); + } + + @Override + public boolean canVectorizeMatcher() + { + return true; + } + @Override public boolean supportsBitmapIndex(BitmapIndexSelector selector) { diff --git a/processing/src/main/java/org/apache/druid/segment/filter/LikeFilter.java b/processing/src/main/java/org/apache/druid/segment/filter/LikeFilter.java index 018102a1833a..765bcb430fdb 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/LikeFilter.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/LikeFilter.java @@ -30,11 +30,15 @@ import org.apache.druid.query.filter.Filter; import org.apache.druid.query.filter.LikeDimFilter; import org.apache.druid.query.filter.ValueMatcher; +import org.apache.druid.query.filter.vector.VectorValueMatcher; +import org.apache.druid.query.filter.vector.VectorValueMatcherColumnStrategizer; import org.apache.druid.segment.ColumnSelector; import org.apache.druid.segment.ColumnSelectorFactory; +import org.apache.druid.segment.DimensionHandlerUtils; import org.apache.druid.segment.column.BitmapIndex; import org.apache.druid.segment.data.CloseableIndexed; import org.apache.druid.segment.data.Indexed; +import org.apache.druid.segment.vector.VectorColumnSelectorFactory; import java.io.IOException; import java.io.UncheckedIOException; @@ -75,6 +79,22 @@ public ValueMatcher makeMatcher(ColumnSelectorFactory factory) return Filters.makeValueMatcher(factory, dimension, likeMatcher.predicateFactory(extractionFn)); } + @Override + public VectorValueMatcher makeVectorMatcher(final VectorColumnSelectorFactory factory) + { + return DimensionHandlerUtils.makeVectorProcessor( + dimension, + VectorValueMatcherColumnStrategizer.instance(), + factory + ).makeMatcher(likeMatcher.predicateFactory(extractionFn)); + } + + @Override + public boolean canVectorizeMatcher() + { + return true; + } + @Override public boolean supportsBitmapIndex(BitmapIndexSelector selector) { diff --git a/processing/src/main/java/org/apache/druid/segment/filter/NotFilter.java b/processing/src/main/java/org/apache/druid/segment/filter/NotFilter.java index 20ed4ffdc914..ef12e1693347 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/NotFilter.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/NotFilter.java @@ -23,9 +23,14 @@ import org.apache.druid.query.filter.BitmapIndexSelector; import org.apache.druid.query.filter.Filter; import org.apache.druid.query.filter.ValueMatcher; +import org.apache.druid.query.filter.vector.BaseVectorValueMatcher; +import org.apache.druid.query.filter.vector.ReadableVectorMatch; +import org.apache.druid.query.filter.vector.VectorMatch; +import org.apache.druid.query.filter.vector.VectorValueMatcher; import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; import org.apache.druid.segment.ColumnSelector; import org.apache.druid.segment.ColumnSelectorFactory; +import org.apache.druid.segment.vector.VectorColumnSelectorFactory; /** */ @@ -70,6 +75,34 @@ public void inspectRuntimeShape(RuntimeShapeInspector inspector) }; } + @Override + public VectorValueMatcher makeVectorMatcher(final VectorColumnSelectorFactory factory) + { + final VectorValueMatcher baseMatcher = baseFilter.makeVectorMatcher(factory); + + return new BaseVectorValueMatcher(baseMatcher) + { + final VectorMatch scratch = VectorMatch.wrap(new int[factory.getMaxVectorSize()]); + + @Override + public ReadableVectorMatch match(final ReadableVectorMatch mask) + { + final ReadableVectorMatch baseMatch = baseMatcher.match(mask); + + scratch.copyFrom(mask); + scratch.removeAll(baseMatch); + assert scratch.isValid(mask); + return scratch; + } + }; + } + + @Override + public boolean canVectorizeMatcher() + { + return baseFilter.canVectorizeMatcher(); + } + @Override public boolean supportsBitmapIndex(BitmapIndexSelector selector) { diff --git a/processing/src/main/java/org/apache/druid/segment/filter/OrFilter.java b/processing/src/main/java/org/apache/druid/segment/filter/OrFilter.java index 70d0532d17bb..01d82298e401 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/OrFilter.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/OrFilter.java @@ -29,9 +29,14 @@ import org.apache.druid.query.filter.Filter; import org.apache.druid.query.filter.RowOffsetMatcherFactory; import org.apache.druid.query.filter.ValueMatcher; +import org.apache.druid.query.filter.vector.BaseVectorValueMatcher; +import org.apache.druid.query.filter.vector.ReadableVectorMatch; +import org.apache.druid.query.filter.vector.VectorMatch; +import org.apache.druid.query.filter.vector.VectorValueMatcher; import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; import org.apache.druid.segment.ColumnSelector; import org.apache.druid.segment.ColumnSelectorFactory; +import org.apache.druid.segment.vector.VectorColumnSelectorFactory; import java.util.ArrayList; import java.util.List; @@ -77,6 +82,23 @@ public ValueMatcher makeMatcher(ColumnSelectorFactory factory) return makeMatcher(matchers); } + @Override + public VectorValueMatcher makeVectorMatcher(final VectorColumnSelectorFactory factory) + { + final VectorValueMatcher[] matchers = new VectorValueMatcher[filters.size()]; + + for (int i = 0; i < filters.size(); i++) { + matchers[i] = filters.get(i).makeVectorMatcher(factory); + } + return makeVectorMatcher(matchers); + } + + @Override + public boolean canVectorizeMatcher() + { + return filters.stream().allMatch(Filter::canVectorizeMatcher); + } + @Override public ValueMatcher makeMatcher( BitmapIndexSelector selector, @@ -105,39 +127,6 @@ public ValueMatcher makeMatcher( return makeMatcher(matchers.toArray(AndFilter.EMPTY_VALUE_MATCHER_ARRAY)); } - - private ValueMatcher makeMatcher(final ValueMatcher[] baseMatchers) - { - Preconditions.checkState(baseMatchers.length > 0); - - if (baseMatchers.length == 1) { - return baseMatchers[0]; - } - - return new ValueMatcher() - { - @Override - public boolean matches() - { - for (ValueMatcher matcher : baseMatchers) { - if (matcher.matches()) { - return true; - } - } - return false; - } - - @Override - public void inspectRuntimeShape(RuntimeShapeInspector inspector) - { - inspector.visit("firstBaseMatcher", baseMatchers[0]); - inspector.visit("secondBaseMatcher", baseMatchers[1]); - // Don't inspect the 3rd and all consequent baseMatchers, cut runtime shape combinations at this point. - // Anyway if the filter is so complex, Hotspot won't inline all calls because of the inline limit. - } - }; - } - @Override public List getFilters() { @@ -182,4 +171,74 @@ public String toString() { return StringUtils.format("(%s)", OR_JOINER.join(filters)); } + + private static ValueMatcher makeMatcher(final ValueMatcher[] baseMatchers) + { + Preconditions.checkState(baseMatchers.length > 0); + + if (baseMatchers.length == 1) { + return baseMatchers[0]; + } + + return new ValueMatcher() + { + @Override + public boolean matches() + { + for (ValueMatcher matcher : baseMatchers) { + if (matcher.matches()) { + return true; + } + } + return false; + } + + @Override + public void inspectRuntimeShape(RuntimeShapeInspector inspector) + { + inspector.visit("firstBaseMatcher", baseMatchers[0]); + inspector.visit("secondBaseMatcher", baseMatchers[1]); + // Don't inspect the 3rd and all consequent baseMatchers, cut runtime shape combinations at this point. + // Anyway if the filter is so complex, Hotspot won't inline all calls because of the inline limit. + } + }; + } + + private static VectorValueMatcher makeVectorMatcher(final VectorValueMatcher[] baseMatchers) + { + Preconditions.checkState(baseMatchers.length > 0); + if (baseMatchers.length == 1) { + return baseMatchers[0]; + } + + return new BaseVectorValueMatcher(baseMatchers[0]) + { + final VectorMatch currentMask = VectorMatch.wrap(new int[getMaxVectorSize()]); + final VectorMatch scratch = VectorMatch.wrap(new int[getMaxVectorSize()]); + final VectorMatch retVal = VectorMatch.wrap(new int[getMaxVectorSize()]); + + @Override + public ReadableVectorMatch match(final ReadableVectorMatch mask) + { + ReadableVectorMatch currentMatch = baseMatchers[0].match(mask); + + currentMask.copyFrom(mask); + retVal.copyFrom(currentMatch); + + for (int i = 1; i < baseMatchers.length; i++) { + if (retVal.isAllTrue(getCurrentVectorSize())) { + // Short-circuit if the entire vector is true. + break; + } + + currentMask.removeAll(currentMatch); + currentMatch = baseMatchers[i].match(currentMask); + retVal.addAll(currentMatch, scratch); + } + + assert retVal.isValid(mask); + return retVal; + } + }; + } } diff --git a/processing/src/main/java/org/apache/druid/segment/filter/SelectorFilter.java b/processing/src/main/java/org/apache/druid/segment/filter/SelectorFilter.java index fe83220fc358..3640b7d3a512 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/SelectorFilter.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/SelectorFilter.java @@ -24,8 +24,12 @@ import org.apache.druid.query.filter.BitmapIndexSelector; import org.apache.druid.query.filter.Filter; import org.apache.druid.query.filter.ValueMatcher; +import org.apache.druid.query.filter.vector.VectorValueMatcher; +import org.apache.druid.query.filter.vector.VectorValueMatcherColumnStrategizer; import org.apache.druid.segment.ColumnSelector; import org.apache.druid.segment.ColumnSelectorFactory; +import org.apache.druid.segment.DimensionHandlerUtils; +import org.apache.druid.segment.vector.VectorColumnSelectorFactory; /** */ @@ -55,6 +59,16 @@ public ValueMatcher makeMatcher(ColumnSelectorFactory factory) return Filters.makeValueMatcher(factory, dimension, value); } + @Override + public VectorValueMatcher makeVectorMatcher(final VectorColumnSelectorFactory factory) + { + return DimensionHandlerUtils.makeVectorProcessor( + dimension, + VectorValueMatcherColumnStrategizer.instance(), + factory + ).makeMatcher(value); + } + @Override public boolean supportsBitmapIndex(BitmapIndexSelector selector) { @@ -73,6 +87,12 @@ public double estimateSelectivity(BitmapIndexSelector indexSelector) return (double) indexSelector.getBitmapIndex(dimension, value).size() / indexSelector.getNumRows(); } + @Override + public boolean canVectorizeMatcher() + { + return true; + } + @Override public String toString() { diff --git a/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexColumnSelectorFactory.java b/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexColumnSelectorFactory.java index e1b04c39f58b..69c71641c858 100644 --- a/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexColumnSelectorFactory.java +++ b/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexColumnSelectorFactory.java @@ -38,19 +38,21 @@ */ class IncrementalIndexColumnSelectorFactory implements ColumnSelectorFactory { + private final IncrementalIndexStorageAdapter adapter; private final IncrementalIndex index; private final VirtualColumns virtualColumns; private final boolean descending; private final IncrementalIndexRowHolder rowHolder; IncrementalIndexColumnSelectorFactory( - IncrementalIndex index, + IncrementalIndexStorageAdapter adapter, VirtualColumns virtualColumns, boolean descending, IncrementalIndexRowHolder rowHolder ) { - this.index = index; + this.adapter = adapter; + this.index = adapter.index; this.virtualColumns = virtualColumns; this.descending = descending; this.rowHolder = rowHolder; @@ -126,6 +128,7 @@ public ColumnCapabilities getColumnCapabilities(String columnName) return virtualColumns.getColumnCapabilities(columnName); } - return index.getCapabilities(columnName); + // Use adapter.getColumnCapabilities instead of index.getCapabilities (see note in IncrementalIndexStorageAdapater) + return adapter.getColumnCapabilities(columnName); } } diff --git a/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexRowIterator.java b/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexRowIterator.java index 627c63f2d8a7..639b6abe1750 100644 --- a/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexRowIterator.java +++ b/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexRowIterator.java @@ -64,7 +64,12 @@ private static RowPointer makeRowPointer( ) { ColumnSelectorFactory columnSelectorFactory = - new IncrementalIndexColumnSelectorFactory(incrementalIndex, VirtualColumns.EMPTY, false, rowHolder); + new IncrementalIndexColumnSelectorFactory( + new IncrementalIndexStorageAdapter(incrementalIndex), + VirtualColumns.EMPTY, + false, + rowHolder + ); ColumnValueSelector[] dimensionSelectors = incrementalIndex .getDimensions() .stream() diff --git a/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexStorageAdapter.java b/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexStorageAdapter.java index ae8e14a2aad6..3284ff74af25 100644 --- a/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexStorageAdapter.java +++ b/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexStorageAdapter.java @@ -38,7 +38,9 @@ import org.apache.druid.segment.StorageAdapter; import org.apache.druid.segment.VirtualColumns; import org.apache.druid.segment.column.ColumnCapabilities; +import org.apache.druid.segment.column.ColumnCapabilitiesImpl; import org.apache.druid.segment.column.ColumnHolder; +import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.data.Indexed; import org.apache.druid.segment.data.ListIndexed; import org.apache.druid.segment.filter.BooleanValueMatcher; @@ -52,7 +54,7 @@ */ public class IncrementalIndexStorageAdapter implements StorageAdapter { - private final IncrementalIndex index; + final IncrementalIndex index; public IncrementalIndexStorageAdapter(IncrementalIndex index) { @@ -154,7 +156,24 @@ public Capabilities getCapabilities() @Override public ColumnCapabilities getColumnCapabilities(String column) { - return index.getCapabilities(column); + // Different from index.getCapabilities because, in a way, IncrementalIndex's string-typed dimensions + // are always potentially multi-valued at query time. (Missing / null values for a row can potentially be + // represented by an empty array; see StringDimensionIndexer.IndexerDimensionSelector's getRow method.) + // + // We don't want to represent this as having-multiple-values in index.getCapabilities, because that's used + // at index-persisting time to determine if we need a multi-value column or not. However, that means we + // need to tweak the capabilities here in the StorageAdapter (a query-time construct), so at query time + // they appear multi-valued. + + final ColumnCapabilities capabilitiesFromIndex = index.getCapabilities(column); + final IncrementalIndex.DimensionDesc dimensionDesc = index.getDimension(column); + if (dimensionDesc != null && dimensionDesc.getCapabilities().getType() == ValueType.STRING) { + final ColumnCapabilitiesImpl retVal = ColumnCapabilitiesImpl.copyOf(capabilitiesFromIndex); + retVal.setHasMultipleValues(true); + return retVal; + } else { + return capabilitiesFromIndex; + } } @Override @@ -237,7 +256,12 @@ private class IncrementalIndexCursor implements Cursor ) { currEntry = new IncrementalIndexRowHolder(); - columnSelectorFactory = new IncrementalIndexColumnSelectorFactory(index, virtualColumns, descending, currEntry); + columnSelectorFactory = new IncrementalIndexColumnSelectorFactory( + IncrementalIndexStorageAdapter.this, + virtualColumns, + descending, + currEntry + ); // Set maxRowIndex before creating the filterMatcher. See https://github.com/apache/incubator-druid/pull/6340 maxRowIndex = index.getLastRowIndex(); filterMatcher = filter == null ? BooleanValueMatcher.of(true) : filter.makeMatcher(columnSelectorFactory); diff --git a/processing/src/main/java/org/apache/druid/segment/vector/BaseDoubleVectorValueSelector.java b/processing/src/main/java/org/apache/druid/segment/vector/BaseDoubleVectorValueSelector.java new file mode 100644 index 000000000000..1e94e53c7282 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/segment/vector/BaseDoubleVectorValueSelector.java @@ -0,0 +1,88 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.vector; + +public abstract class BaseDoubleVectorValueSelector implements VectorValueSelector +{ + protected final ReadableVectorOffset offset; + + private int longId = ReadableVectorOffset.NULL_ID; + private int floatId = ReadableVectorOffset.NULL_ID; + + private long[] longVector; + private float[] floatVector; + + public BaseDoubleVectorValueSelector(final ReadableVectorOffset offset) + { + this.offset = offset; + } + + @Override + public int getCurrentVectorSize() + { + return offset.getCurrentVectorSize(); + } + + @Override + public int getMaxVectorSize() + { + return offset.getMaxVectorSize(); + } + + @Override + public long[] getLongVector() + { + if (longId == offset.getId()) { + return longVector; + } + + if (longVector == null) { + longVector = new long[offset.getMaxVectorSize()]; + } + + final double[] doubleVector = getDoubleVector(); + for (int i = 0; i < getCurrentVectorSize(); i++) { + longVector[i] = (long) doubleVector[i]; + } + + longId = offset.getId(); + return longVector; + } + + @Override + public float[] getFloatVector() + { + if (floatId == offset.getId()) { + return floatVector; + } + + if (floatVector == null) { + floatVector = new float[offset.getMaxVectorSize()]; + } + + final double[] doubleVector = getDoubleVector(); + for (int i = 0; i < getCurrentVectorSize(); i++) { + floatVector[i] = (float) doubleVector[i]; + } + + floatId = offset.getId(); + return floatVector; + } +} diff --git a/processing/src/main/java/org/apache/druid/segment/vector/BaseFloatVectorValueSelector.java b/processing/src/main/java/org/apache/druid/segment/vector/BaseFloatVectorValueSelector.java new file mode 100644 index 000000000000..0ec248f39c13 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/segment/vector/BaseFloatVectorValueSelector.java @@ -0,0 +1,88 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.vector; + +public abstract class BaseFloatVectorValueSelector implements VectorValueSelector +{ + protected final ReadableVectorOffset offset; + + private int longId = ReadableVectorOffset.NULL_ID; + private int doubleId = ReadableVectorOffset.NULL_ID; + + private long[] longVector; + private double[] doubleVector; + + public BaseFloatVectorValueSelector(final ReadableVectorOffset offset) + { + this.offset = offset; + } + + @Override + public int getCurrentVectorSize() + { + return offset.getCurrentVectorSize(); + } + + @Override + public int getMaxVectorSize() + { + return offset.getMaxVectorSize(); + } + + @Override + public long[] getLongVector() + { + if (longId == offset.getId()) { + return longVector; + } + + if (longVector == null) { + longVector = new long[offset.getMaxVectorSize()]; + } + + final float[] floatVector = getFloatVector(); + for (int i = 0; i < getCurrentVectorSize(); i++) { + longVector[i] = (long) floatVector[i]; + } + + longId = offset.getId(); + return longVector; + } + + @Override + public double[] getDoubleVector() + { + if (doubleId == offset.getId()) { + return doubleVector; + } + + if (doubleVector == null) { + doubleVector = new double[offset.getMaxVectorSize()]; + } + + final float[] floatVector = getFloatVector(); + for (int i = 0; i < getCurrentVectorSize(); i++) { + doubleVector[i] = (double) floatVector[i]; + } + + doubleId = offset.getId(); + return doubleVector; + } +} diff --git a/processing/src/main/java/org/apache/druid/segment/vector/BaseLongVectorValueSelector.java b/processing/src/main/java/org/apache/druid/segment/vector/BaseLongVectorValueSelector.java new file mode 100644 index 000000000000..ddf486577444 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/segment/vector/BaseLongVectorValueSelector.java @@ -0,0 +1,88 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.vector; + +public abstract class BaseLongVectorValueSelector implements VectorValueSelector +{ + protected final ReadableVectorOffset offset; + + private int floatId = ReadableVectorOffset.NULL_ID; + private int doubleId = ReadableVectorOffset.NULL_ID; + + private float[] floatVector; + private double[] doubleVector; + + public BaseLongVectorValueSelector(final ReadableVectorOffset offset) + { + this.offset = offset; + } + + @Override + public int getCurrentVectorSize() + { + return offset.getCurrentVectorSize(); + } + + @Override + public int getMaxVectorSize() + { + return offset.getMaxVectorSize(); + } + + @Override + public float[] getFloatVector() + { + if (floatId == offset.getId()) { + return floatVector; + } + + if (floatVector == null) { + floatVector = new float[offset.getMaxVectorSize()]; + } + + final long[] longVector = getLongVector(); + for (int i = 0; i < getCurrentVectorSize(); i++) { + floatVector[i] = (float) longVector[i]; + } + + floatId = offset.getId(); + return floatVector; + } + + @Override + public double[] getDoubleVector() + { + if (doubleId == offset.getId()) { + return doubleVector; + } + + if (doubleVector == null) { + doubleVector = new double[offset.getMaxVectorSize()]; + } + + final long[] longVector = getLongVector(); + for (int i = 0; i < getCurrentVectorSize(); i++) { + doubleVector[i] = (double) longVector[i]; + } + + doubleId = offset.getId(); + return doubleVector; + } +} diff --git a/processing/src/main/java/org/apache/druid/segment/vector/BitmapVectorOffset.java b/processing/src/main/java/org/apache/druid/segment/vector/BitmapVectorOffset.java new file mode 100644 index 000000000000..7d26480938da --- /dev/null +++ b/processing/src/main/java/org/apache/druid/segment/vector/BitmapVectorOffset.java @@ -0,0 +1,134 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.vector; + +import com.google.common.base.Preconditions; +import org.apache.druid.collections.bitmap.ImmutableBitmap; +import org.roaringbitmap.BatchIterator; + +public class BitmapVectorOffset implements VectorOffset +{ + private final ImmutableBitmap bitmap; + private final int[] offsets; + private final int startOffset; + private final int endOffset; + + private BatchIterator iterator; + private boolean pastEnd; + private int currentVectorSize; + + public BitmapVectorOffset( + final int vectorSize, + final ImmutableBitmap bitmap, + final int startOffset, + final int endOffset + ) + { + this.bitmap = bitmap; + this.offsets = new int[vectorSize]; + this.startOffset = startOffset; + this.endOffset = endOffset; + reset(); + } + + @Override + public int getId() + { + // Should not be called when the offset is empty. + Preconditions.checkState(currentVectorSize > 0, "currentVectorSize > 0"); + return offsets[0]; + } + + @Override + public void advance() + { + currentVectorSize = 0; + + if (pastEnd) { + return; + } + + while (currentVectorSize == 0 && iterator.hasNext()) { + final int numRead = iterator.nextBatch(offsets); + + int from = 0; + while (from < numRead && offsets[from] < startOffset) { + from++; + } + + if (from > 0) { + System.arraycopy(offsets, from, offsets, 0, numRead - from); + } + + int to = numRead - from; + while (to > 0 && offsets[to - 1] >= endOffset) { + pastEnd = true; + to--; + } + + currentVectorSize = to; + } + } + + @Override + public boolean isDone() + { + return currentVectorSize == 0; + } + + @Override + public boolean isContiguous() + { + return false; + } + + @Override + public int getMaxVectorSize() + { + return offsets.length; + } + + @Override + public int getCurrentVectorSize() + { + return currentVectorSize; + } + + @Override + public int getStartOffset() + { + throw new UnsupportedOperationException("not contiguous"); + } + + @Override + public int[] getOffsets() + { + return offsets; + } + + @Override + public void reset() + { + iterator = bitmap.batchIterator(); + currentVectorSize = 0; + pastEnd = false; + advance(); + } +} diff --git a/processing/src/main/java/org/apache/druid/segment/vector/DimensionVectorSelector.java b/processing/src/main/java/org/apache/druid/segment/vector/DimensionVectorSelector.java new file mode 100644 index 000000000000..72384aec195a --- /dev/null +++ b/processing/src/main/java/org/apache/druid/segment/vector/DimensionVectorSelector.java @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.vector; + +import org.apache.druid.query.monomorphicprocessing.CalledFromHotLoop; +import org.apache.druid.segment.IdLookup; + +import javax.annotation.Nullable; + +/** + * Common interf + */ +public interface DimensionVectorSelector extends VectorSizeInspector +{ + int getValueCardinality(); + + @CalledFromHotLoop + @Nullable + String lookupName(int id); + + boolean nameLookupPossibleInAdvance(); + + @Nullable + IdLookup idLookup(); +} diff --git a/processing/src/main/java/org/apache/druid/segment/vector/FilteredVectorOffset.java b/processing/src/main/java/org/apache/druid/segment/vector/FilteredVectorOffset.java new file mode 100644 index 000000000000..a6313339c664 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/segment/vector/FilteredVectorOffset.java @@ -0,0 +1,175 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.vector; + +import com.google.common.base.Preconditions; +import org.apache.druid.java.util.common.ISE; +import org.apache.druid.query.filter.Filter; +import org.apache.druid.query.filter.vector.ReadableVectorMatch; +import org.apache.druid.query.filter.vector.VectorMatch; +import org.apache.druid.query.filter.vector.VectorValueMatcher; + +public class FilteredVectorOffset implements VectorOffset +{ + private final VectorOffset baseOffset; + private final VectorValueMatcher filterMatcher; + private final int[] offsets; + private int currentVectorSize = 0; + private boolean allTrue = false; + + private FilteredVectorOffset(final VectorOffset baseOffset, final VectorValueMatcher filterMatcher) + { + this.baseOffset = baseOffset; + this.filterMatcher = filterMatcher; + this.offsets = new int[baseOffset.getMaxVectorSize()]; + advanceWhileVectorIsEmptyAndPopulateOffsets(); + } + + public static FilteredVectorOffset create( + final VectorOffset baseOffset, + final VectorColumnSelectorFactory baseColumnSelectorFactory, + final Filter filter + ) + { + // This is not the same logic as the row-by-row FilteredOffset, which uses bitmaps whenever possible. + // I am not convinced that approach is best in all cases (it's potentially too eager) and also have not implemented + // it for vector matchers yet. So let's keep this method simple for now, and try to harmonize them in the future. + Preconditions.checkState(filter.canVectorizeMatcher(), "Cannot vectorize"); + final VectorValueMatcher filterMatcher = filter.makeVectorMatcher(baseColumnSelectorFactory); + return new FilteredVectorOffset(baseOffset, filterMatcher); + } + + @Override + public int getId() + { + // Should not be called when the offset is empty. + Preconditions.checkState(currentVectorSize > 0, "currentVectorSize > 0"); + return baseOffset.getId(); + } + + @Override + public void advance() + { + baseOffset.advance(); + advanceWhileVectorIsEmptyAndPopulateOffsets(); + } + + @Override + public boolean isDone() + { + return currentVectorSize == 0; + } + + @Override + public boolean isContiguous() + { + return allTrue && baseOffset.isContiguous(); + } + + @Override + public int getMaxVectorSize() + { + return baseOffset.getMaxVectorSize(); + } + + @Override + public int getCurrentVectorSize() + { + return currentVectorSize; + } + + @Override + public int getStartOffset() + { + if (isContiguous()) { + return baseOffset.getStartOffset(); + } else { + throw new ISE("Cannot call getStartOffset when not contiguous!"); + } + } + + @Override + public int[] getOffsets() + { + if (isContiguous()) { + throw new ISE("Cannot call getOffsets when not contiguous!"); + } else if (allTrue) { + return baseOffset.getOffsets(); + } else { + return offsets; + } + } + + private void advanceWhileVectorIsEmptyAndPopulateOffsets() + { + allTrue = false; + + int j = 0; + + while (j == 0) { + if (baseOffset.isDone()) { + currentVectorSize = 0; + return; + } + + final ReadableVectorMatch match = filterMatcher.match(VectorMatch.allTrue(baseOffset.getCurrentVectorSize())); + + if (match.isAllTrue(baseOffset.getCurrentVectorSize())) { + currentVectorSize = baseOffset.getCurrentVectorSize(); + allTrue = true; + return; + } else if (match.isAllFalse()) { + baseOffset.advance(); + } else { + final int[] selection = match.getSelection(); + final int selectionSize = match.getSelectionSize(); + + if (baseOffset.isContiguous()) { + final int startOffset = baseOffset.getStartOffset(); + + for (int i = 0; i < selectionSize; i++) { + offsets[j++] = startOffset + selection[i]; + } + } else { + final int[] baseOffsets = baseOffset.getOffsets(); + + for (int i = 0; i < selectionSize; i++) { + offsets[j++] = baseOffsets[selection[i]]; + } + } + + if (j == 0) { + baseOffset.advance(); + } + } + } + + currentVectorSize = j; + } + + @Override + public void reset() + { + currentVectorSize = 0; + allTrue = false; + baseOffset.reset(); + advanceWhileVectorIsEmptyAndPopulateOffsets(); + } +} diff --git a/processing/src/main/java/org/apache/druid/segment/vector/MultiValueDimensionVectorSelector.java b/processing/src/main/java/org/apache/druid/segment/vector/MultiValueDimensionVectorSelector.java new file mode 100644 index 000000000000..baacb3be28b0 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/segment/vector/MultiValueDimensionVectorSelector.java @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.vector; + +import org.apache.druid.segment.DimensionDictionarySelector; +import org.apache.druid.segment.data.IndexedInts; + +/** + * Vectorized selector for a multi-valued string-typed column. + * + * @see org.apache.druid.segment.DimensionSelector, the non-vectorized version. + * @see SingleValueDimensionVectorSelector, the singly-valued version. + */ +public interface MultiValueDimensionVectorSelector extends DimensionDictionarySelector, VectorSizeInspector +{ + /** + * Get the current vector. The array will be reused, so it is not a good idea to retain a reference to it. + */ + IndexedInts[] getRowVector(); +} diff --git a/processing/src/main/java/org/apache/druid/segment/vector/NilVectorSelector.java b/processing/src/main/java/org/apache/druid/segment/vector/NilVectorSelector.java new file mode 100644 index 000000000000..3aeb32c1f262 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/segment/vector/NilVectorSelector.java @@ -0,0 +1,178 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.vector; + +import org.apache.druid.common.config.NullHandling; +import org.apache.druid.segment.IdLookup; +import org.apache.druid.segment.QueryableIndexStorageAdapter; + +import javax.annotation.Nullable; + +public class NilVectorSelector + implements VectorValueSelector, VectorObjectSelector, SingleValueDimensionVectorSelector, IdLookup +{ + private static final boolean[] DEFAULT_NULLS_VECTOR = new boolean[QueryableIndexStorageAdapter.DEFAULT_VECTOR_SIZE]; + private static final int[] DEFAULT_INT_VECTOR = new int[QueryableIndexStorageAdapter.DEFAULT_VECTOR_SIZE]; + private static final long[] DEFAULT_LONG_VECTOR = new long[QueryableIndexStorageAdapter.DEFAULT_VECTOR_SIZE]; + private static final float[] DEFAULT_FLOAT_VECTOR = new float[QueryableIndexStorageAdapter.DEFAULT_VECTOR_SIZE]; + private static final double[] DEFAULT_DOUBLE_VECTOR = new double[QueryableIndexStorageAdapter.DEFAULT_VECTOR_SIZE]; + private static final Object[] DEFAULT_OBJECT_VECTOR = new Object[QueryableIndexStorageAdapter.DEFAULT_VECTOR_SIZE]; + + static { + for (int i = 0; i < DEFAULT_NULLS_VECTOR.length; i++) { + DEFAULT_NULLS_VECTOR[i] = true; + } + } + + private final VectorSizeInspector vectorSizeInspector; + private final boolean[] nulls; + private final int[] ints; + private final long[] longs; + private final float[] floats; + private final double[] doubles; + private final Object[] objects; + + private NilVectorSelector( + final VectorSizeInspector vectorSizeInspector, + final boolean[] nulls, + final int[] ints, + final long[] longs, + final float[] floats, + final double[] doubles, + final Object[] objects + ) + { + this.vectorSizeInspector = vectorSizeInspector; + this.nulls = nulls; + this.ints = ints; + this.longs = longs; + this.floats = floats; + this.doubles = doubles; + this.objects = objects; + } + + public static NilVectorSelector create(final VectorSizeInspector vectorSizeInspector) + { + if (vectorSizeInspector.getMaxVectorSize() <= QueryableIndexStorageAdapter.DEFAULT_VECTOR_SIZE) { + // Reuse static vars when possible. + return new NilVectorSelector( + vectorSizeInspector, + DEFAULT_NULLS_VECTOR, + DEFAULT_INT_VECTOR, + DEFAULT_LONG_VECTOR, + DEFAULT_FLOAT_VECTOR, + DEFAULT_DOUBLE_VECTOR, + DEFAULT_OBJECT_VECTOR + ); + } else { + return new NilVectorSelector( + vectorSizeInspector, + new boolean[vectorSizeInspector.getMaxVectorSize()], + new int[vectorSizeInspector.getMaxVectorSize()], + new long[vectorSizeInspector.getMaxVectorSize()], + new float[vectorSizeInspector.getMaxVectorSize()], + new double[vectorSizeInspector.getMaxVectorSize()], + new Object[vectorSizeInspector.getMaxVectorSize()] + ); + } + } + + @Override + public long[] getLongVector() + { + return longs; + } + + @Override + public float[] getFloatVector() + { + return floats; + } + + @Override + public double[] getDoubleVector() + { + return doubles; + } + + @Nullable + @Override + public boolean[] getNullVector() + { + return nulls; + } + + @Override + public int[] getRowVector() + { + return ints; + } + + @Override + public int getValueCardinality() + { + return 1; + } + + @Nullable + @Override + public String lookupName(final int id) + { + assert id == 0 : "id = " + id; + return null; + } + + @Override + public boolean nameLookupPossibleInAdvance() + { + return false; + } + + @Nullable + @Override + public IdLookup idLookup() + { + return this; + } + + @Override + public int lookupId(@Nullable final String name) + { + return NullHandling.isNullOrEquivalent(name) ? 0 : -1; + } + + @Override + public Object[] getObjectVector() + { + return objects; + } + + @Override + public int getCurrentVectorSize() + { + return vectorSizeInspector.getCurrentVectorSize(); + } + + @Override + public int getMaxVectorSize() + { + return vectorSizeInspector.getMaxVectorSize(); + } +} diff --git a/processing/src/main/java/org/apache/druid/segment/vector/NoFilterVectorOffset.java b/processing/src/main/java/org/apache/druid/segment/vector/NoFilterVectorOffset.java new file mode 100644 index 000000000000..a24fabcff0ee --- /dev/null +++ b/processing/src/main/java/org/apache/druid/segment/vector/NoFilterVectorOffset.java @@ -0,0 +1,90 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.vector; + +public class NoFilterVectorOffset implements VectorOffset +{ + private final int maxVectorSize; + private final int start; + private final int end; + private int theOffset; + + public NoFilterVectorOffset(final int maxVectorSize, final int start, final int end) + { + this.maxVectorSize = maxVectorSize; + this.start = start; + this.end = end; + reset(); + } + + @Override + public int getId() + { + return theOffset; + } + + @Override + public void advance() + { + theOffset += maxVectorSize; + } + + @Override + public boolean isDone() + { + return theOffset >= end; + } + + @Override + public boolean isContiguous() + { + return true; + } + + @Override + public int getMaxVectorSize() + { + return maxVectorSize; + } + + @Override + public int getCurrentVectorSize() + { + return Math.min(maxVectorSize, end - theOffset); + } + + @Override + public int getStartOffset() + { + return theOffset; + } + + @Override + public int[] getOffsets() + { + throw new UnsupportedOperationException("no filter"); + } + + @Override + public void reset() + { + theOffset = start; + } +} diff --git a/processing/src/main/java/org/apache/druid/segment/vector/QueryableIndexVectorColumnSelectorFactory.java b/processing/src/main/java/org/apache/druid/segment/vector/QueryableIndexVectorColumnSelectorFactory.java new file mode 100644 index 000000000000..83110499862f --- /dev/null +++ b/processing/src/main/java/org/apache/druid/segment/vector/QueryableIndexVectorColumnSelectorFactory.java @@ -0,0 +1,194 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.vector; + +import org.apache.druid.java.util.common.ISE; +import org.apache.druid.java.util.common.io.Closer; +import org.apache.druid.query.dimension.DimensionSpec; +import org.apache.druid.segment.QueryableIndex; +import org.apache.druid.segment.QueryableIndexStorageAdapter; +import org.apache.druid.segment.column.BaseColumn; +import org.apache.druid.segment.column.ColumnCapabilities; +import org.apache.druid.segment.column.ColumnHolder; +import org.apache.druid.segment.column.DictionaryEncodedColumn; +import org.apache.druid.segment.column.ValueType; + +import javax.annotation.Nullable; +import java.util.HashMap; +import java.util.Map; + +public class QueryableIndexVectorColumnSelectorFactory implements VectorColumnSelectorFactory +{ + private final QueryableIndex index; + private final ReadableVectorOffset offset; + private final Closer closer; + private final Map columnCache; + + // Shared selectors are useful, since they cache vectors internally, and we can avoid recomputation if the same + // selector is used by more than one part of a query. + private final Map singleValueDimensionSelectorCache; + private final Map multiValueDimensionSelectorCache; + private final Map valueSelectorCache; + private final Map objectSelectorCache; + + public QueryableIndexVectorColumnSelectorFactory( + final QueryableIndex index, + final ReadableVectorOffset offset, + final Closer closer, + final Map columnCache + ) + { + this.index = index; + this.offset = offset; + this.closer = closer; + this.columnCache = columnCache; + this.singleValueDimensionSelectorCache = new HashMap<>(); + this.multiValueDimensionSelectorCache = new HashMap<>(); + this.valueSelectorCache = new HashMap<>(); + this.objectSelectorCache = new HashMap<>(); + } + + @Override + public int getMaxVectorSize() + { + return offset.getMaxVectorSize(); + } + + @Override + public MultiValueDimensionVectorSelector makeMultiValueDimensionSelector(final DimensionSpec dimensionSpec) + { + if (!dimensionSpec.canVectorize()) { + throw new ISE("DimensionSpec[%s] cannot be vectorized", dimensionSpec); + } + + return multiValueDimensionSelectorCache.computeIfAbsent( + dimensionSpec, + spec -> { + final ColumnHolder holder = index.getColumnHolder(spec.getDimension()); + if (holder == null + || !holder.getCapabilities().isDictionaryEncoded() + || holder.getCapabilities().getType() != ValueType.STRING + || !holder.getCapabilities().hasMultipleValues()) { + throw new ISE( + "Column[%s] is not a multi-value string column, do not ask for a multi-value selector", + spec.getDimension() + ); + } + + @SuppressWarnings("unchecked") + final DictionaryEncodedColumn dictionaryEncodedColumn = (DictionaryEncodedColumn) + getCachedColumn(spec.getDimension()); + + final MultiValueDimensionVectorSelector selector = dictionaryEncodedColumn.makeMultiValueDimensionVectorSelector( + offset + ); + + return spec.decorate(selector); + } + ); + } + + @Override + public SingleValueDimensionVectorSelector makeSingleValueDimensionSelector(final DimensionSpec dimensionSpec) + { + if (!dimensionSpec.canVectorize()) { + throw new ISE("DimensionSpec[%s] cannot be vectorized", dimensionSpec); + } + + return singleValueDimensionSelectorCache.computeIfAbsent( + dimensionSpec, + spec -> { + final ColumnHolder holder = index.getColumnHolder(spec.getDimension()); + if (holder == null + || !holder.getCapabilities().isDictionaryEncoded() + || holder.getCapabilities().getType() != ValueType.STRING) { + // Asking for a single-value dimension selector on a non-string column gets you a bunch of nulls. + return NilVectorSelector.create(offset); + } + + if (holder.getCapabilities().hasMultipleValues()) { + // Asking for a single-value dimension selector on a multi-value column gets you an error. + throw new ISE("Column[%s] is multi-value, do not ask for a single-value selector", spec.getDimension()); + } + + @SuppressWarnings("unchecked") + final DictionaryEncodedColumn dictionaryEncodedColumn = (DictionaryEncodedColumn) + getCachedColumn(spec.getDimension()); + + final SingleValueDimensionVectorSelector selector = + dictionaryEncodedColumn.makeSingleValueDimensionVectorSelector(offset); + + return spec.decorate(selector); + } + ); + } + + @Override + public VectorValueSelector makeValueSelector(final String columnName) + { + return valueSelectorCache.computeIfAbsent( + columnName, + name -> { + final BaseColumn column = getCachedColumn(name); + if (column == null) { + return NilVectorSelector.create(offset); + } else { + return column.makeVectorValueSelector(offset); + } + } + ); + } + + @Override + public VectorObjectSelector makeObjectSelector(final String columnName) + { + return objectSelectorCache.computeIfAbsent( + columnName, + name -> { + final BaseColumn column = getCachedColumn(name); + if (column == null) { + return NilVectorSelector.create(offset); + } else { + return column.makeVectorObjectSelector(offset); + } + } + ); + } + + @Nullable + private BaseColumn getCachedColumn(final String columnName) + { + return columnCache.computeIfAbsent(columnName, name -> { + ColumnHolder holder = index.getColumnHolder(name); + if (holder != null) { + return closer.register(holder.getColumn()); + } else { + return null; + } + }); + } + + @Nullable + @Override + public ColumnCapabilities getColumnCapabilities(final String columnName) + { + return QueryableIndexStorageAdapter.getColumnCapabilities(index, columnName); + } +} diff --git a/processing/src/main/java/org/apache/druid/segment/vector/ReadableVectorOffset.java b/processing/src/main/java/org/apache/druid/segment/vector/ReadableVectorOffset.java new file mode 100644 index 000000000000..4fb494f9bc62 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/segment/vector/ReadableVectorOffset.java @@ -0,0 +1,67 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.vector; + +/** + * Provides a batch of offsets, ostensibly as indexes into an array. + * + * A ReadableVectorOffset should be given to classes (e.g. column selector objects) by something which keeps a + * reference to the base VectorOffset object and increments it. + * + * @see VectorOffset, the movable version. + * @see org.apache.druid.segment.data.ReadableOffset, the non-vectorized version. + */ +public interface ReadableVectorOffset extends VectorSizeInspector +{ + /** + * A marker value that will never be returned by "getId". + */ + int NULL_ID = -1; + + /** + * Returns an integer that uniquely identifies the current position of the offset. Should *not* be construed as an + * actual offset; for that, use "getStartOffset" or "getOffsets". This is useful for caching: it is safe to assume + * nothing has changed in the offset so long as the id remains the same. + */ + int getId(); + + /** + * Checks if the current batch is a contiguous range or not. This is only good for one batch at a time, since the + * same object may return some contiguous batches and some non-contiguous batches. So, callers must check this method + * each time they want to retrieve the current batch of offsets. + */ + boolean isContiguous(); + + /** + * If "isContiguous" is true, this method returns the start offset of the range. The length of the range is + * given by "getCurrentVectorSize". + * + * Throws an exception if "isContiguous" is false. + */ + int getStartOffset(); + + /** + * If "isContiguous" is false, this method returns a batch of offsets. The array may be longer than the number of + * valid offsets, so callers need to check "getCurrentVectorSize" too. + * + * Throws an exception if "isContiguous" is true. + */ + int[] getOffsets(); +} diff --git a/processing/src/main/java/org/apache/druid/segment/vector/SingleValueDimensionVectorSelector.java b/processing/src/main/java/org/apache/druid/segment/vector/SingleValueDimensionVectorSelector.java new file mode 100644 index 000000000000..f9fd484ad388 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/segment/vector/SingleValueDimensionVectorSelector.java @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.vector; + +import org.apache.druid.segment.DimensionDictionarySelector; + +/** + * Vectorized selector for a singly-valued string-typed column. Unlike the non-vectorized version, this is done as + * a separate interface, which is useful since it allows "getRowVector" to be a primitive int array. + * + * @see org.apache.druid.segment.DimensionSelector, the non-vectorized version. + * @see MultiValueDimensionVectorSelector, the multi-valued version. + */ +public interface SingleValueDimensionVectorSelector extends DimensionDictionarySelector, VectorSizeInspector +{ + /** + * Get the current vector. The array will be reused, so it is not a good idea to retain a reference to it. + */ + int[] getRowVector(); +} diff --git a/processing/src/main/java/org/apache/druid/segment/vector/VectorColumnSelectorFactory.java b/processing/src/main/java/org/apache/druid/segment/vector/VectorColumnSelectorFactory.java new file mode 100644 index 000000000000..1634cc6ab3a1 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/segment/vector/VectorColumnSelectorFactory.java @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.vector; + +import org.apache.druid.query.dimension.DimensionSpec; +import org.apache.druid.segment.column.ColumnCapabilities; + +import javax.annotation.Nullable; + +/** + * + * + * @see org.apache.druid.segment.ColumnSelectorFactory, the non-vectorized version. + */ +public interface VectorColumnSelectorFactory +{ + /** + * Returns the maximum vector size for the {@link VectorCursor} that generated this object. + * + * @see VectorCursor#getMaxVectorSize() + */ + int getMaxVectorSize(); + + /** + * Returns a string-typed, single-value-per-row column selector. + */ + SingleValueDimensionVectorSelector makeSingleValueDimensionSelector(DimensionSpec dimensionSpec); + + /** + * Returns a string-typed, multi-value-per-row column selector. + */ + MultiValueDimensionVectorSelector makeMultiValueDimensionSelector(DimensionSpec dimensionSpec); + + /** + * Returns a primitive column selector. + */ + VectorValueSelector makeValueSelector(String column); + + /** + * Returns an object selector, useful for complex columns. + */ + VectorObjectSelector makeObjectSelector(String column); + + /** + * Returns capabilities of a particular column, or null if the column doesn't exist. Unlike ColumnSelectorFactory, + * null does not potentially indicate a dynamically discovered column. + * + * @return capabilities, or null if the column doesn't exist. + */ + @Nullable + ColumnCapabilities getColumnCapabilities(String column); +} diff --git a/processing/src/main/java/org/apache/druid/segment/vector/VectorCursor.java b/processing/src/main/java/org/apache/druid/segment/vector/VectorCursor.java new file mode 100644 index 000000000000..a8f5637b4f55 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/segment/vector/VectorCursor.java @@ -0,0 +1,75 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.vector; + +import java.io.Closeable; + +/** + * Vectorized cursor used during query execution. VectorCursors are returned by + * {@link org.apache.druid.segment.StorageAdapter#makeVectorCursor} and are created by + * {@link org.apache.druid.segment.QueryableIndexCursorSequenceBuilder#buildVectorized}. + * + * Unlike the non-vectorized version, VectorCursor does not have a getTime() method. This is because we are trying to + * avoid creating needlessly-small vectors when the time granularity is very fine. See + * {@link org.apache.druid.query.vector.VectorCursorGranularizer} for a helper that makes it easier for query engines to + * do their own time granularization. + * + * An example of how to use the methods in this class: + * + *

+ *   try (VectorCursor cursor = adapter.makeVectorCursor(...)) {
+ *     // ProcessorClass is some vectorized processor class.
+ *     ProcessorClass o = makeProcessor(cursor.getColumnSelectorFactory());
+ *     for (; !cursor.isDone(); cursor.advance()) {
+ *       o.process();
+ *     }
+ *   }
+ * 
+ * + * @see org.apache.druid.segment.Cursor, the non-vectorized version. + */ +public interface VectorCursor extends VectorSizeInspector, Closeable +{ + /** + * Returns a vectorized column selector factory. + */ + VectorColumnSelectorFactory getColumnSelectorFactory(); + + /** + * Advances the cursor, skipping forward a number of rows equal to the current vector size. + */ + void advance(); + + /** + * Returns false if the cursor is readable, true if it has nothing left to read. + */ + boolean isDone(); + + /** + * Resets the cursor back to its original state. Useful for query engines that want to make multiple passes. + */ + void reset(); + + /** + * Close the cursor and release its resources. + */ + @Override + void close(); +} diff --git a/processing/src/main/java/org/apache/druid/segment/vector/VectorObjectSelector.java b/processing/src/main/java/org/apache/druid/segment/vector/VectorObjectSelector.java new file mode 100644 index 000000000000..f64e861396d1 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/segment/vector/VectorObjectSelector.java @@ -0,0 +1,33 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.vector; + +/** + * Vectorized object selector, useful for complex columns. + * + * @see org.apache.druid.segment.ColumnValueSelector, the non-vectorized version. + */ +public interface VectorObjectSelector extends VectorSizeInspector +{ + /** + * Get the current vector. Individual elements of the array may be null. + */ + Object[] getObjectVector(); +} diff --git a/processing/src/main/java/org/apache/druid/segment/vector/VectorOffset.java b/processing/src/main/java/org/apache/druid/segment/vector/VectorOffset.java new file mode 100644 index 000000000000..e2aefc8d97be --- /dev/null +++ b/processing/src/main/java/org/apache/druid/segment/vector/VectorOffset.java @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.vector; + +/** + * The movable version of ReadableVectorOffset. + * + * @see org.apache.druid.segment.data.Offset, the non-vectorized version. + */ +public interface VectorOffset extends ReadableVectorOffset +{ + /** + * Advances by one batch. + */ + void advance(); + + /** + * Checks if iteration is "done", meaning the current batch of offsets is empty, and there are no more coming. + */ + boolean isDone(); + + /** + * Resets the object back to its original state. Once this is done, iteration can begin anew. + */ + void reset(); +} diff --git a/processing/src/main/java/org/apache/druid/segment/vector/VectorSelectorUtils.java b/processing/src/main/java/org/apache/druid/segment/vector/VectorSelectorUtils.java new file mode 100644 index 000000000000..112d1b119d51 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/segment/vector/VectorSelectorUtils.java @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.vector; + +import org.apache.druid.collections.bitmap.ImmutableBitmap; + +import javax.annotation.Nullable; + +public class VectorSelectorUtils +{ + /** + * Helper used by ColumnarLongs, ColumnarDoubles, etc. to populate null-flag vectors. + */ + @Nullable + public static boolean[] populateNullVector( + @Nullable final boolean[] nullVector, + final ReadableVectorOffset offset, + final ImmutableBitmap nullValueBitmap + ) + { + if (nullValueBitmap.isEmpty()) { + return null; + } + + final boolean[] retVal; + + if (nullVector != null) { + retVal = nullVector; + } else { + retVal = new boolean[offset.getMaxVectorSize()]; + } + + // Probably not super efficient to call "get" so much, but, no worse than the non-vectorized version. + if (offset.isContiguous()) { + for (int i = 0; i < offset.getCurrentVectorSize(); i++) { + retVal[i] = nullValueBitmap.get(i + offset.getStartOffset()); + } + } else { + for (int i = 0; i < offset.getCurrentVectorSize(); i++) { + retVal[i] = nullValueBitmap.get(offset.getOffsets()[i]); + } + } + + return retVal; + } +} diff --git a/processing/src/main/java/org/apache/druid/segment/vector/VectorSizeInspector.java b/processing/src/main/java/org/apache/druid/segment/vector/VectorSizeInspector.java new file mode 100644 index 000000000000..a5d6c8a15082 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/segment/vector/VectorSizeInspector.java @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.vector; + +/** + * Common interface for vectorized column selectors, matchers, etc, where callers are given the ability to inspect + * current and maximum vector sizes. + */ +public interface VectorSizeInspector +{ + /** + * Returns the maximum vector size for this cursor. It will not change for the lifetime of this cursor, and is + * generally used to allocate scratch arrays for later processing. Will always be greater than zero. + */ + int getMaxVectorSize(); + + /** + * Returns the current vector size for this cursor. Will never be larger than the max size returned by + * {@link #getMaxVectorSize()}. + */ + int getCurrentVectorSize(); +} diff --git a/processing/src/main/java/org/apache/druid/segment/vector/VectorValueSelector.java b/processing/src/main/java/org/apache/druid/segment/vector/VectorValueSelector.java new file mode 100644 index 000000000000..216967cc9303 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/segment/vector/VectorValueSelector.java @@ -0,0 +1,55 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.vector; + +import javax.annotation.Nullable; + +/** + * Vectorized selector for primitive columns. + * + * @see org.apache.druid.segment.ColumnValueSelector, the non-vectorized version. + */ +public interface VectorValueSelector extends VectorSizeInspector +{ + /** + * Get the current vector, casting to longs as necessary. The array will be reused, so it is not a good idea to + * retain a reference to it. + */ + long[] getLongVector(); + + /** + * Get the current vector, casting to floats as necessary. The array will be reused, so it is not a good idea to + * retain a reference to it. + */ + float[] getFloatVector(); + + /** + * Get the current vector, casting to doubles as necessary. The array will be reused, so it is not a good idea to + * retain a reference to it. + */ + double[] getDoubleVector(); + + /** + * Gets a vector of booleans signifying which rows are null and which are not (true for null). Returns null if it is + * known that there are no nulls in the vector, possibly because the column is non-nullable. + */ + @Nullable + boolean[] getNullVector(); +} diff --git a/processing/src/test/java/org/apache/druid/query/QueryRunnerTestHelper.java b/processing/src/test/java/org/apache/druid/query/QueryRunnerTestHelper.java index cfc18e85b9f0..829ba8c1c3bb 100644 --- a/processing/src/test/java/org/apache/druid/query/QueryRunnerTestHelper.java +++ b/processing/src/test/java/org/apache/druid/query/QueryRunnerTestHelper.java @@ -326,6 +326,15 @@ public void remove() }; } + /** + * Check if a QueryRunner returned by {@link #makeQueryRunners(QueryRunnerFactory)} is vectorizable. + */ + public static boolean isTestRunnerVectorizable(QueryRunner runner) + { + final String runnerName = runner.toString(); + return !("rtIndex".equals(runnerName) || "noRollupRtIndex".equals(runnerName)); + } + public static > List> makeQueryRunners( QueryRunnerFactory factory ) diff --git a/processing/src/test/java/org/apache/druid/query/filter/vector/VectorMatchTest.java b/processing/src/test/java/org/apache/druid/query/filter/vector/VectorMatchTest.java new file mode 100644 index 000000000000..5a6754552820 --- /dev/null +++ b/processing/src/test/java/org/apache/druid/query/filter/vector/VectorMatchTest.java @@ -0,0 +1,127 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.filter.vector; + +import org.junit.Assert; +import org.junit.Test; + +public class VectorMatchTest +{ + private static final int VECTOR_SIZE = 10; + + @Test + public void testRemoveAll() + { + assertMatchEquals( + VectorMatch.allFalse(), + copy(VectorMatch.allTrue(VECTOR_SIZE)).removeAll(VectorMatch.allTrue(VECTOR_SIZE)) + ); + + assertMatchEquals( + VectorMatch.allTrue(VECTOR_SIZE), + copy(VectorMatch.allTrue(VECTOR_SIZE)).removeAll(VectorMatch.allFalse()) + ); + + assertMatchEquals( + createMatch(new int[]{3, 6, 7, 8, 10}), + createMatch(new int[]{3, 5, 6, 7, 8, 10}).removeAll(createMatch(new int[]{4, 5, 9})) + ); + + assertMatchEquals( + createMatch(new int[]{3, 6, 7, 8, 10}), + createMatch(new int[]{3, 5, 6, 7, 8, 10}).removeAll(createMatch(new int[]{2, 5, 9})) + ); + + assertMatchEquals( + createMatch(new int[]{6, 7, 8, 10}), + createMatch(new int[]{3, 5, 6, 7, 8, 10}).removeAll(createMatch(new int[]{3, 5, 9})) + ); + + assertMatchEquals( + createMatch(new int[]{6, 7, 8}), + createMatch(new int[]{3, 5, 6, 7, 8, 10}).removeAll(createMatch(new int[]{3, 5, 10})) + ); + } + + @Test + public void testAddAll() + { + final VectorMatch scratch = VectorMatch.wrap(new int[VECTOR_SIZE]); + + assertMatchEquals( + VectorMatch.allTrue(VECTOR_SIZE), + copy(VectorMatch.allTrue(VECTOR_SIZE)).addAll(VectorMatch.allTrue(VECTOR_SIZE), scratch) + ); + + assertMatchEquals( + VectorMatch.allTrue(VECTOR_SIZE), + createMatch(new int[]{}).addAll(VectorMatch.allTrue(VECTOR_SIZE), scratch) + ); + + assertMatchEquals( + createMatch(new int[]{3, 4, 5, 6, 7, 8, 9, 10}), + createMatch(new int[]{3, 5, 6, 7, 8, 10}).addAll(createMatch(new int[]{4, 5, 9}), scratch) + ); + + assertMatchEquals( + createMatch(new int[]{3, 4, 5, 6, 7, 8, 10}), + createMatch(new int[]{3, 5, 6, 7, 8}).addAll(createMatch(new int[]{4, 5, 10}), scratch) + ); + + assertMatchEquals( + createMatch(new int[]{2, 3, 5, 6, 7, 8, 9, 10}), + createMatch(new int[]{3, 5, 6, 7, 8, 10}).addAll(createMatch(new int[]{2, 5, 9}), scratch) + ); + + assertMatchEquals( + createMatch(new int[]{3, 5, 6, 7, 8, 9, 10}), + createMatch(new int[]{3, 5, 6, 7, 8, 10}).addAll(createMatch(new int[]{3, 5, 9}), scratch) + ); + + assertMatchEquals( + createMatch(new int[]{3, 5, 6, 7, 8, 10}), + createMatch(new int[]{3, 5, 6, 7, 8, 10}).addAll(createMatch(new int[]{3, 5, 10}), scratch) + ); + } + + /** + * Useful because VectorMatch equality is based on identity, not value. (Since they are mutable.) + */ + private static void assertMatchEquals(ReadableVectorMatch expected, ReadableVectorMatch actual) + { + Assert.assertEquals(expected.toString(), actual.toString()); + } + + private static VectorMatch copy(final ReadableVectorMatch match) + { + final int[] selection = match.getSelection(); + final int[] newSelection = new int[selection.length]; + System.arraycopy(selection, 0, newSelection, 0, selection.length); + return VectorMatch.wrap(newSelection).setSelectionSize(match.getSelectionSize()); + } + + private static VectorMatch createMatch(final int[] selection) + { + final VectorMatch match = VectorMatch.wrap(new int[VECTOR_SIZE]); + System.arraycopy(selection, 0, match.getSelection(), 0, selection.length); + match.setSelectionSize(selection.length); + return match; + } +} diff --git a/processing/src/test/java/org/apache/druid/query/groupby/GroupByQueryRunnerTest.java b/processing/src/test/java/org/apache/druid/query/groupby/GroupByQueryRunnerTest.java index 646e2d33a4e2..8f718a2e1cfe 100644 --- a/processing/src/test/java/org/apache/druid/query/groupby/GroupByQueryRunnerTest.java +++ b/processing/src/test/java/org/apache/druid/query/groupby/GroupByQueryRunnerTest.java @@ -188,6 +188,7 @@ public int getNumThreads() private final QueryRunner runner; private final GroupByQueryRunnerFactory factory; private final GroupByQueryConfig config; + private final boolean vectorize; @Rule public ExpectedException expectedException = ExpectedException.none(); @@ -236,6 +237,13 @@ public String getDefaultStrategy() return GroupByStrategySelector.STRATEGY_V2; } + @Override + public int getBufferGrouperInitialBuckets() + { + // Small initial table to force some growing. + return 4; + } + @Override public String toString() { @@ -406,7 +414,7 @@ public ByteBuffer get() } @Parameterized.Parameters(name = "{0}") - public static Collection constructorFeeder() + public static Collection constructorFeeder() { final List constructors = new ArrayList<>(); for (GroupByQueryConfig config : testConfigs()) { @@ -414,12 +422,14 @@ public static Collection constructorFeeder() final GroupByQueryRunnerFactory factory = factoryAndCloser.lhs; resourceCloser.register(factoryAndCloser.rhs); for (QueryRunner runner : QueryRunnerTestHelper.makeQueryRunners(factory)) { - final String testName = StringUtils.format( - "config=%s, runner=%s", - config.toString(), - runner.toString() - ); - constructors.add(new Object[]{testName, config, factory, runner}); + for (boolean vectorize : ImmutableList.of(false, true)) { + final String testName = StringUtils.format("config=%s, runner=%s, vectorize=%s", config, runner, vectorize); + + // Add vectorization tests for any indexes that support it. + if (!vectorize || QueryRunnerTestHelper.isTestRunnerVectorizable(runner)) { + constructors.add(new Object[]{testName, config, factory, runner, vectorize}); + } + } } } @@ -436,26 +446,29 @@ public GroupByQueryRunnerTest( String testName, GroupByQueryConfig config, GroupByQueryRunnerFactory factory, - QueryRunner runner + QueryRunner runner, + boolean vectorize ) { this.config = config; this.factory = factory; this.runner = factory.mergeRunners(MoreExecutors.sameThreadExecutor(), ImmutableList.of(runner)); + this.vectorize = vectorize; } @Test public void testGroupBy() { - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("quality", "alias")) - .setAggregatorSpecs(QueryRunnerTestHelper.rowsCount, - new LongSumAggregatorFactory("idx", "index"), - new FloatSumAggregatorFactory("idxFloat", "indexFloat"), - new DoubleSumAggregatorFactory("idxDouble", "index")) + .setAggregatorSpecs( + QueryRunnerTestHelper.rowsCount, + new LongSumAggregatorFactory("idx", "index"), + new FloatSumAggregatorFactory("idxFloat", "indexFloat"), + new DoubleSumAggregatorFactory("idxDouble", "index") + ) .setGranularity(QueryRunnerTestHelper.dayGran) .build(); @@ -488,8 +501,10 @@ public void testGroupBy() @Test public void testGroupByOnMissingColumn() { - GroupByQuery query = GroupByQuery - .builder() + // Cannot vectorize due to extraction dimension spec. + cannotVectorize(); + + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions( @@ -515,8 +530,7 @@ public void testGroupByOnMissingColumn() @Test public void testGroupByWithStringPostAggregator() { - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("quality", "alias")) @@ -566,8 +580,10 @@ public void testGroupByWithStringPostAggregator() @Test public void testGroupByWithStringVirtualColumn() { - GroupByQuery query = GroupByQuery - .builder() + // Cannot vectorize due to virtual columns. + cannotVectorize(); + + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setVirtualColumns( @@ -628,8 +644,7 @@ public void testGroupByWithStringVirtualColumn() @Test public void testGroupByWithDurationGranularity() { - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("quality", "alias")) @@ -669,8 +684,7 @@ public void testGroupByWithOutputNameCollisions() expectedException.expect(IllegalArgumentException.class); expectedException.expectMessage("[alias] already defined"); - GroupByQuery - .builder() + makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("quality", "alias")) @@ -685,14 +699,13 @@ public void testGroupByWithSortDimsFirst() if (config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V1)) { return; } - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("quality", "alias")) .setAggregatorSpecs(QueryRunnerTestHelper.rowsCount, new LongSumAggregatorFactory("idx", "index")) .setGranularity(QueryRunnerTestHelper.dayGran) - .setContext(ImmutableMap.of("sortByDimsFirst", true)) + .overrideContext(ImmutableMap.of("sortByDimsFirst", true)) .build(); List expectedResults = Arrays.asList( @@ -731,14 +744,13 @@ public void testGroupByWithSortDimsFirst() @Test public void testGroupByWithChunkPeriod() { - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("quality", "alias")) .setAggregatorSpecs(QueryRunnerTestHelper.rowsCount, new LongSumAggregatorFactory("idx", "index")) .setGranularity(QueryRunnerTestHelper.allGran) - .setContext(ImmutableMap.of("chunkPeriod", "P1D")) + .overrideContext(ImmutableMap.of("chunkPeriod", "P1D")) .build(); List expectedResults = Arrays.asList( @@ -760,8 +772,7 @@ public void testGroupByWithChunkPeriod() @Test public void testGroupByNoAggregators() { - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("quality", "alias")) @@ -797,8 +808,10 @@ public void testGroupByNoAggregators() @Test public void testMultiValueDimension() { - GroupByQuery query = GroupByQuery - .builder() + // Cannot vectorize due to multi-value dimensions. + cannotVectorize(); + + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("placementish", "alias")) @@ -825,8 +838,10 @@ public void testMultiValueDimension() @Test public void testTwoMultiValueDimensions() { - GroupByQuery query = GroupByQuery - .builder() + // Cannot vectorize due to multi-value dimensions. + cannotVectorize(); + + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimFilter(new SelectorDimFilter("placementish", "a", null)) @@ -851,8 +866,10 @@ public void testTwoMultiValueDimensions() @Test public void testMultipleDimensionsOneOfWhichIsMultiValue1() { - GroupByQuery query = GroupByQuery - .builder() + // Cannot vectorize due to multi-value dimensions. + cannotVectorize(); + + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions( @@ -1070,8 +1087,10 @@ public void testMultipleDimensionsOneOfWhichIsMultiValue1() @Test public void testMultipleDimensionsOneOfWhichIsMultiValueDifferentOrder() { - GroupByQuery query = GroupByQuery - .builder() + // Cannot vectorize due to multi-value dimensions. + cannotVectorize(); + + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions( @@ -1289,14 +1308,13 @@ public void testMultipleDimensionsOneOfWhichIsMultiValueDifferentOrder() @Test public void testGroupByMaxRowsLimitContextOverride() { - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("quality", "alias")) .setAggregatorSpecs(QueryRunnerTestHelper.rowsCount, new LongSumAggregatorFactory("idx", "index")) .setGranularity(QueryRunnerTestHelper.dayGran) - .setContext(ImmutableMap.of("maxResults", 1)) + .overrideContext(ImmutableMap.of("maxResults", 1)) .build(); List expectedResults = null; @@ -1333,14 +1351,13 @@ public void testGroupByMaxRowsLimitContextOverride() @Test public void testGroupByTimeoutContextOverride() { - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("quality", "alias")) .setAggregatorSpecs(QueryRunnerTestHelper.rowsCount, new LongSumAggregatorFactory("idx", "index")) .setGranularity(QueryRunnerTestHelper.dayGran) - .setContext(ImmutableMap.of(QueryContexts.TIMEOUT_KEY, 60000)) + .overrideContext(ImmutableMap.of(QueryContexts.TIMEOUT_KEY, 60000)) .build(); List expectedResults = Arrays.asList( @@ -1372,14 +1389,13 @@ public void testGroupByTimeoutContextOverride() @Test public void testGroupByMaxOnDiskStorageContextOverride() { - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("quality", "alias")) .setAggregatorSpecs(QueryRunnerTestHelper.rowsCount, new LongSumAggregatorFactory("idx", "index")) .setGranularity(QueryRunnerTestHelper.dayGran) - .setContext(ImmutableMap.of("maxOnDiskStorage", 0, "bufferGrouperMaxSize", 1)) + .overrideContext(ImmutableMap.of("maxOnDiskStorage", 0, "bufferGrouperMaxSize", 1)) .build(); List expectedResults = null; @@ -1390,7 +1406,15 @@ public void testGroupByMaxOnDiskStorageContextOverride() expectedResults = Arrays.asList( GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "automotive", "rows", 1L, "idx", 135L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "business", "rows", 1L, "idx", 118L), - GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "entertainment", "rows", 1L, "idx", 158L), + GroupByQueryRunnerTestHelper.createExpectedRow( + "2011-04-01", + "alias", + "entertainment", + "rows", + 1L, + "idx", + 158L + ), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "health", "rows", 1L, "idx", 120L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "mezzanine", "rows", 3L, "idx", 2870L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "news", "rows", 1L, "idx", 121L), @@ -1400,7 +1424,15 @@ public void testGroupByMaxOnDiskStorageContextOverride() GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "automotive", "rows", 1L, "idx", 147L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "business", "rows", 1L, "idx", 112L), - GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "entertainment", "rows", 1L, "idx", 166L), + GroupByQueryRunnerTestHelper.createExpectedRow( + "2011-04-02", + "alias", + "entertainment", + "rows", + 1L, + "idx", + 166L + ), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "health", "rows", 1L, "idx", 113L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "mezzanine", "rows", 3L, "idx", 2447L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "news", "rows", 1L, "idx", 114L), @@ -1417,14 +1449,13 @@ public void testGroupByMaxOnDiskStorageContextOverride() @Test public void testNotEnoughDictionarySpaceThroughContextOverride() { - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("quality", "alias")) .setAggregatorSpecs(QueryRunnerTestHelper.rowsCount, new LongSumAggregatorFactory("idx", "index")) .setGranularity(QueryRunnerTestHelper.dayGran) - .setContext(ImmutableMap.of("maxOnDiskStorage", 0, "maxMergingDictionarySize", 1)) + .overrideContext(ImmutableMap.of("maxOnDiskStorage", 0, "maxMergingDictionarySize", 1)) .build(); List expectedResults = null; @@ -1462,14 +1493,13 @@ public void testNotEnoughDictionarySpaceThroughContextOverride() @Test public void testNotEnoughDiskSpaceThroughContextOverride() { - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("quality", "alias")) .setAggregatorSpecs(QueryRunnerTestHelper.rowsCount, new LongSumAggregatorFactory("idx", "index")) .setGranularity(QueryRunnerTestHelper.dayGran) - .setContext(ImmutableMap.of("maxOnDiskStorage", 1, "maxMergingDictionarySize", 1)) + .overrideContext(ImmutableMap.of("maxOnDiskStorage", 1, "maxMergingDictionarySize", 1)) .build(); List expectedResults = null; @@ -1512,8 +1542,7 @@ public void testNotEnoughDiskSpaceThroughContextOverride() @Test public void testSubqueryWithOuterMaxOnDiskStorageContextOverride() { - final GroupByQuery subquery = GroupByQuery - .builder() + final GroupByQuery subquery = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.fullOnInterval) .setDimensions(new DefaultDimensionSpec("quality", "alias")) @@ -1524,7 +1553,7 @@ public void testSubqueryWithOuterMaxOnDiskStorageContextOverride() null ) ) - .setContext( + .overrideContext( ImmutableMap.of( "maxOnDiskStorage", Integer.MAX_VALUE, "bufferGrouperMaxSize", Integer.MAX_VALUE @@ -1532,13 +1561,12 @@ public void testSubqueryWithOuterMaxOnDiskStorageContextOverride() ) .build(); - final GroupByQuery query = GroupByQuery - .builder() + final GroupByQuery query = makeQueryBuilder() .setDataSource(subquery) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new ArrayList<>()).setAggregatorSpecs(new CountAggregatorFactory("count")) .setGranularity(QueryRunnerTestHelper.allGran) - .setContext(ImmutableMap.of("maxOnDiskStorage", 0, "bufferGrouperMaxSize", 0)) + .overrideContext(ImmutableMap.of("maxOnDiskStorage", 0, "bufferGrouperMaxSize", 0)) .build(); // v1 strategy throws an exception for this query because it tries to merge the noop outer @@ -1560,6 +1588,9 @@ public void testSubqueryWithOuterMaxOnDiskStorageContextOverride() @Test public void testGroupByWithRebucketRename() { + // Cannot vectorize due to extraction dimension spec. + cannotVectorize(); + Map map = new HashMap<>(); map.put("automotive", "automotive0"); map.put("business", "business0"); @@ -1570,8 +1601,7 @@ public void testGroupByWithRebucketRename() map.put("premium", "premium0"); map.put("technology", "technology0"); map.put("travel", "travel0"); - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird).setDimensions(new ExtractionDimensionSpec( "quality", @@ -1627,6 +1657,9 @@ public void testGroupByWithRebucketRename() @Test public void testGroupByWithSimpleRenameRetainMissingNonInjective() { + // Cannot vectorize due to extraction dimension spec. + cannotVectorize(); + Map map = new HashMap<>(); map.put("automotive", "automotive0"); map.put("business", "business0"); @@ -1637,8 +1670,7 @@ public void testGroupByWithSimpleRenameRetainMissingNonInjective() map.put("premium", "premium0"); map.put("technology", "technology0"); map.put("travel", "travel0"); - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird).setDimensions(new ExtractionDimensionSpec( "quality", @@ -1704,8 +1736,7 @@ public void testGroupByWithSimpleRenameRetainMissing() map.put("premium", "premium0"); map.put("technology", "technology0"); map.put("travel", "travel0"); - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird).setDimensions(new ExtractionDimensionSpec( "quality", @@ -1771,8 +1802,7 @@ public void testGroupByWithSimpleRenameAndMissingString() map.put("premium", "premium0"); map.put("technology", "technology0"); map.put("travel", "travel0"); - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird).setDimensions(new ExtractionDimensionSpec( "quality", @@ -1837,8 +1867,7 @@ public void testGroupByWithSimpleRename() map.put("premium", "premium0"); map.put("technology", "technology0"); map.put("travel", "travel0"); - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird).setDimensions(new ExtractionDimensionSpec( "quality", @@ -1893,8 +1922,7 @@ public void testGroupByWithSimpleRename() @Test public void testGroupByWithUniques() { - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setAggregatorSpecs(QueryRunnerTestHelper.rowsCount, QueryRunnerTestHelper.qualityUniques) @@ -1918,8 +1946,7 @@ public void testGroupByWithUniques() @Test(expected = IllegalArgumentException.class) public void testGroupByWithUniquesAndPostAggWithSameName() { - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setAggregatorSpecs(QueryRunnerTestHelper.rowsCount, new HyperUniquesAggregatorFactory( @@ -1951,8 +1978,10 @@ public void testGroupByWithUniquesAndPostAggWithSameName() @Test public void testGroupByWithCardinality() { - GroupByQuery query = GroupByQuery - .builder() + // Cannot vectorize due to "cardinality" aggregator. + cannotVectorize(); + + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setAggregatorSpecs(QueryRunnerTestHelper.rowsCount, QueryRunnerTestHelper.qualityCardinality) @@ -1976,13 +2005,17 @@ public void testGroupByWithCardinality() @Test public void testGroupByWithFirstLast() { - GroupByQuery query = GroupByQuery - .builder() + // Cannot vectorize due to "first", "last" aggregators. + cannotVectorize(); + + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.fullOnInterval) .setDimensions(new DefaultDimensionSpec("market", "market")) - .setAggregatorSpecs(new LongFirstAggregatorFactory("first", "index"), - new LongLastAggregatorFactory("last", "index")) + .setAggregatorSpecs( + new LongFirstAggregatorFactory("first", "index"), + new LongLastAggregatorFactory("last", "index") + ) .setGranularity(QueryRunnerTestHelper.monthGran) .build(); @@ -2008,16 +2041,20 @@ public void testGroupByWithFirstLast() @Test public void testGroupByWithNoResult() { - GroupByQuery query = GroupByQuery - .builder() + // Cannot vectorize due to "cardinality" aggregator. + cannotVectorize(); + + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.emptyInterval) .setDimensions(new DefaultDimensionSpec("market", "market")) - .setAggregatorSpecs(QueryRunnerTestHelper.rowsCount, - QueryRunnerTestHelper.indexLongSum, - QueryRunnerTestHelper.qualityCardinality, - new LongFirstAggregatorFactory("first", "index"), - new LongLastAggregatorFactory("last", "index")) + .setAggregatorSpecs( + QueryRunnerTestHelper.rowsCount, + QueryRunnerTestHelper.indexLongSum, + QueryRunnerTestHelper.qualityCardinality, + new LongFirstAggregatorFactory("first", "index"), + new LongLastAggregatorFactory("last", "index") + ) .setGranularity(QueryRunnerTestHelper.dayGran) .build(); @@ -2029,6 +2066,9 @@ public void testGroupByWithNoResult() @Test public void testGroupByWithNullProducingDimExtractionFn() { + // Cannot vectorize due to extraction dimension spec. + cannotVectorize(); + final ExtractionFn nullExtractionFn = new RegexDimExtractionFn("(\\w{1})", false, null) { @Override @@ -2043,8 +2083,7 @@ public String apply(String dimValue) return "mezzanine".equals(dimValue) ? null : super.apply(dimValue); } }; - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setAggregatorSpecs(QueryRunnerTestHelper.rowsCount, new LongSumAggregatorFactory("idx", "index")) @@ -2103,8 +2142,7 @@ public String apply(String dimValue) } }; - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setAggregatorSpecs(QueryRunnerTestHelper.rowsCount, new LongSumAggregatorFactory("idx", "index")) @@ -2144,7 +2182,7 @@ public void testGroupByWithTimeZone() { DateTimeZone tz = DateTimes.inferTzfromString("America/Los_Angeles"); - GroupByQuery query = GroupByQuery.builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setInterval("2011-03-31T00:00:00-07:00/2011-04-02T00:00:00-07:00") .setDimensions(new DefaultDimensionSpec("quality", "alias")) @@ -2334,8 +2372,7 @@ public void testGroupByWithTimeZone() @Test public void testMergeResults() { - GroupByQuery.Builder builder = GroupByQuery - .builder() + GroupByQuery.Builder builder = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setInterval("2011-04-02/2011-04-04") .setDimensions(new DefaultDimensionSpec("quality", "alias")) @@ -2412,8 +2449,7 @@ public void testMergeResultsWithLimit() private void doTestMergeResultsWithValidLimit(final int limit) { - GroupByQuery.Builder builder = GroupByQuery - .builder() + GroupByQuery.Builder builder = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setInterval("2011-04-02/2011-04-04") .setDimensions(new DefaultDimensionSpec("quality", "alias")) @@ -2449,8 +2485,7 @@ private void doTestMergeResultsWithValidLimit(final int limit) public void testMergeResultsAcrossMultipleDaysWithLimitAndOrderBy() { final int limit = 14; - GroupByQuery.Builder builder = GroupByQuery - .builder() + GroupByQuery.Builder builder = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setInterval(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("quality", "alias")) @@ -2492,9 +2527,11 @@ public void testMergeResultsAcrossMultipleDaysWithLimitAndOrderBy() @Test public void testMergeResultsAcrossMultipleDaysWithLimitAndOrderByUsingMathExpressions() { + // Cannot vectorize due to virtual columns. + cannotVectorize(); + final int limit = 14; - GroupByQuery.Builder builder = GroupByQuery - .builder() + GroupByQuery.Builder builder = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setInterval(QueryRunnerTestHelper.firstToThird) .setVirtualColumns( @@ -2544,8 +2581,7 @@ public void testMergeResultsAcrossMultipleDaysWithLimitAndOrderByUsingMathExpres @Test(expected = IllegalArgumentException.class) public void testMergeResultsWithNegativeLimit() { - GroupByQuery.Builder builder = GroupByQuery - .builder() + GroupByQuery.Builder builder = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setInterval("2011-04-02/2011-04-04") .setDimensions(new DefaultDimensionSpec("quality", "alias")) @@ -2618,8 +2654,7 @@ public int compare(Row o1, Row o2) private void doTestMergeResultsWithOrderBy(LimitSpec orderBySpec, List expectedResults) { - GroupByQuery.Builder builder = GroupByQuery - .builder() + GroupByQuery.Builder builder = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setInterval("2011-04-02/2011-04-04") .setDimensions(new DefaultDimensionSpec("quality", "alias")) @@ -2659,8 +2694,10 @@ public Sequence run(QueryPlus queryPlus, Map responseC @Test public void testGroupByOrderLimit() { - GroupByQuery.Builder builder = GroupByQuery - .builder() + // Cannot vectorize due to expression-based aggregator. + cannotVectorize(); + + GroupByQuery.Builder builder = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setInterval("2011-04-02/2011-04-04") .setDimensions(new DefaultDimensionSpec("quality", "alias")) @@ -2716,7 +2753,8 @@ public void testGroupByOrderLimit() TestHelper.assertExpectedObjects( expectedResults, mergeRunner.run(QueryPlus.wrap(builder.build()), context), - "no-limit"); + "no-limit" + ); TestHelper.assertExpectedObjects( Iterables.limit(expectedResults, 5), mergeRunner.run(QueryPlus.wrap(builder.setLimit(5).build()), context), @@ -2751,8 +2789,7 @@ public void testGroupByOrderLimit() @Test public void testGroupByWithOrderLimit2() { - GroupByQuery.Builder builder = GroupByQuery - .builder() + GroupByQuery.Builder builder = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setInterval("2011-04-02/2011-04-04") .setDimensions(new DefaultDimensionSpec("quality", "alias")) @@ -2788,8 +2825,7 @@ public void testGroupByWithOrderLimit2() @Test public void testGroupByWithOrderLimit3() { - GroupByQuery.Builder builder = GroupByQuery - .builder() + GroupByQuery.Builder builder = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setInterval("2011-04-02/2011-04-04") .setDimensions(new DefaultDimensionSpec("quality", "alias")) @@ -2826,8 +2862,7 @@ public void testGroupByWithOrderLimit3() @Test public void testGroupByOrderLimitNumeric() { - GroupByQuery.Builder builder = GroupByQuery - .builder() + GroupByQuery.Builder builder = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setInterval("2011-04-02/2011-04-04") .setDimensions(new DefaultDimensionSpec("quality", "alias")) @@ -2863,7 +2898,7 @@ public void testGroupByOrderLimitNumeric() @Test public void testGroupByWithSameCaseOrdering() { - GroupByQuery query = new GroupByQuery.Builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setGranularity(QueryRunnerTestHelper.allGran).setDimensions(new DefaultDimensionSpec( QueryRunnerTestHelper.marketDimension, @@ -2909,7 +2944,7 @@ public void testGroupByWithSameCaseOrdering() @Test public void testGroupByWithOrderLimit4() { - GroupByQuery query = new GroupByQuery.Builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setGranularity(QueryRunnerTestHelper.allGran) .setDimensions(new DefaultDimensionSpec( @@ -2946,7 +2981,7 @@ public void testGroupByWithOrderLimit4() @Test public void testGroupByWithOrderOnHyperUnique() { - GroupByQuery query = new GroupByQuery.Builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setGranularity(QueryRunnerTestHelper.allGran) .setDimensions(new DefaultDimensionSpec( @@ -3009,7 +3044,7 @@ public void testGroupByWithOrderOnHyperUnique() @Test public void testGroupByWithHavingOnHyperUnique() { - GroupByQuery query = new GroupByQuery.Builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setGranularity(QueryRunnerTestHelper.allGran) .setDimensions(new DefaultDimensionSpec( @@ -3056,7 +3091,7 @@ public void testGroupByWithHavingOnHyperUnique() @Test public void testGroupByWithHavingOnFinalizedHyperUnique() { - GroupByQuery query = new GroupByQuery.Builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setGranularity(QueryRunnerTestHelper.allGran) .setDimensions(new DefaultDimensionSpec( @@ -3106,7 +3141,7 @@ public void testGroupByWithHavingOnFinalizedHyperUnique() @Test public void testGroupByWithLimitOnFinalizedHyperUnique() { - GroupByQuery query = new GroupByQuery.Builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setGranularity(QueryRunnerTestHelper.allGran).setDimensions(new DefaultDimensionSpec( QueryRunnerTestHelper.marketDimension, @@ -3171,6 +3206,9 @@ public void testGroupByWithLimitOnFinalizedHyperUnique() @Test public void testGroupByWithAlphaNumericDimensionOrder() { + // Cannot vectorize due to extraction dimension spec. + cannotVectorize(); + Map map = new HashMap<>(); map.put("automotive", "health105"); map.put("business", "health20"); @@ -3182,8 +3220,7 @@ public void testGroupByWithAlphaNumericDimensionOrder() map.put("technology", "travel123"); map.put("travel", "travel555"); - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird).setDimensions(new ExtractionDimensionSpec( "quality", @@ -3227,6 +3264,9 @@ public void testGroupByWithAlphaNumericDimensionOrder() @Test public void testGroupByWithLookupAndLimitAndSortByDimsFirst() { + // Cannot vectorize due to extraction dimension spec. + cannotVectorize(); + Map map = new HashMap<>(); map.put("automotive", "9"); map.put("business", "8"); @@ -3238,8 +3278,7 @@ public void testGroupByWithLookupAndLimitAndSortByDimsFirst() map.put("technology", "2"); map.put("travel", "1"); - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird).setDimensions(new ExtractionDimensionSpec( "quality", @@ -3253,7 +3292,7 @@ public void testGroupByWithLookupAndLimitAndSortByDimsFirst() ) ) .setGranularity(QueryRunnerTestHelper.dayGran) - .setContext(ImmutableMap.of("sortByDimsFirst", true)) + .overrideContext(ImmutableMap.of("sortByDimsFirst", true)) .build(); List expectedResults = Arrays.asList( @@ -3285,7 +3324,7 @@ public void testGroupByWithLookupAndLimitAndSortByDimsFirst() // in time when Druid does support this, we can re-evaluate this test. public void testLimitPerGrouping() { - GroupByQuery query = new GroupByQuery.Builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setGranularity(QueryRunnerTestHelper.dayGran).setDimensions(new DefaultDimensionSpec( QueryRunnerTestHelper.marketDimension, @@ -3348,8 +3387,7 @@ public void testPostAggMergedHavingSpec() ) ); - GroupByQuery.Builder builder = GroupByQuery - .builder() + GroupByQuery.Builder builder = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setInterval("2011-04-02/2011-04-04") .setDimensions(new DefaultDimensionSpec("quality", "alias")) @@ -3394,8 +3432,7 @@ public Sequence run(QueryPlus queryPlus, Map responseC @Test public void testGroupByWithOrderLimitHavingSpec() { - GroupByQuery.Builder builder = GroupByQuery - .builder() + GroupByQuery.Builder builder = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setInterval("2011-01-25/2011-01-28") .setDimensions(new DefaultDimensionSpec("quality", "alias")) @@ -3494,8 +3531,7 @@ public void testPostAggHavingSpec() ) ); - GroupByQuery.Builder builder = GroupByQuery - .builder() + GroupByQuery.Builder builder = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setInterval("2011-04-02/2011-04-04") .setDimensions(new DefaultDimensionSpec("quality", "alias")) @@ -3522,14 +3558,14 @@ public void testPostAggHavingSpec() @Test public void testHavingSpec() { + List expectedResults = Arrays.asList( GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "business", "rows", 2L, "idx", 217L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "mezzanine", "rows", 6L, "idx", 4420L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "premium", "rows", 6L, "idx", 4416L) ); - GroupByQuery.Builder builder = GroupByQuery - .builder() + GroupByQuery.Builder builder = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setInterval("2011-04-02/2011-04-04") .setDimensions(new DefaultDimensionSpec("quality", "alias")) @@ -3576,8 +3612,7 @@ public void testDimFilterHavingSpec() null ); - GroupByQuery.Builder builder = GroupByQuery - .builder() + GroupByQuery.Builder builder = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setInterval("2011-04-02/2011-04-04") .setDimensions(new DefaultDimensionSpec("quality", "alias")) @@ -3618,8 +3653,7 @@ public void testDimFilterHavingSpecWithExtractionFns() null ); - GroupByQuery.Builder builder = GroupByQuery - .builder() + GroupByQuery.Builder builder = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setInterval("2011-04-02/2011-04-04") .setDimensions(new DefaultDimensionSpec("quality", "alias")) @@ -3644,8 +3678,7 @@ public void testMergedHavingSpec() GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "premium", "rows", 6L, "idx", 4416L) ); - GroupByQuery.Builder builder = GroupByQuery - .builder() + GroupByQuery.Builder builder = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setInterval("2011-04-02/2011-04-04") .setDimensions(new DefaultDimensionSpec("quality", "alias")) @@ -3728,8 +3761,7 @@ public void testMergedPostAggHavingSpec() ) ); - GroupByQuery.Builder builder = GroupByQuery - .builder() + GroupByQuery.Builder builder = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setInterval("2011-04-02/2011-04-04") .setDimensions(new DefaultDimensionSpec("quality", "alias")) @@ -3895,8 +3927,7 @@ public void testCustomAggregatorHavingSpec() ) ); - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("quality", "alias")) @@ -3925,8 +3956,7 @@ public void testCustomAggregatorHavingSpec() @Test public void testGroupByWithRegEx() { - GroupByQuery.Builder builder = GroupByQuery - .builder() + GroupByQuery.Builder builder = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setInterval("2011-04-02/2011-04-04") .setDimFilter(new RegexDimFilter("quality", "auto.*", null)) @@ -3948,8 +3978,7 @@ public void testGroupByWithRegEx() @Test public void testGroupByWithNonexistentDimension() { - GroupByQuery.Builder builder = GroupByQuery - .builder() + GroupByQuery.Builder builder = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setInterval("2011-04-02/2011-04-04") .addDimension("billy") @@ -4003,8 +4032,7 @@ public void testGroupByWithNonexistentDimension() @Test public void testIdenticalSubquery() { - GroupByQuery subquery = GroupByQuery - .builder() + GroupByQuery subquery = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("quality", "alias")) @@ -4014,14 +4042,15 @@ public void testIdenticalSubquery() null, JavaScriptConfig.getEnabledInstance() )) - .setAggregatorSpecs(QueryRunnerTestHelper.rowsCount, - new LongSumAggregatorFactory("idx", "index"), - new LongSumAggregatorFactory("indexMaxPlusTen", "indexMaxPlusTen")) + .setAggregatorSpecs( + QueryRunnerTestHelper.rowsCount, + new LongSumAggregatorFactory("idx", "index"), + new LongSumAggregatorFactory("indexMaxPlusTen", "indexMaxPlusTen") + ) .setGranularity(QueryRunnerTestHelper.dayGran) .build(); - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(subquery) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("alias", "alias")) @@ -4059,8 +4088,7 @@ public void testIdenticalSubquery() @Test public void testSubqueryWithMultipleIntervalsInOuterQuery() { - GroupByQuery subquery = GroupByQuery - .builder() + GroupByQuery subquery = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("quality", "alias")) @@ -4070,14 +4098,15 @@ public void testSubqueryWithMultipleIntervalsInOuterQuery() null, JavaScriptConfig.getEnabledInstance() )) - .setAggregatorSpecs(QueryRunnerTestHelper.rowsCount, - new LongSumAggregatorFactory("idx", "index"), - new LongSumAggregatorFactory("indexMaxPlusTen", "indexMaxPlusTen")) + .setAggregatorSpecs( + QueryRunnerTestHelper.rowsCount, + new LongSumAggregatorFactory("idx", "index"), + new LongSumAggregatorFactory("indexMaxPlusTen", "indexMaxPlusTen") + ) .setGranularity(QueryRunnerTestHelper.dayGran) .build(); - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(subquery) .setQuerySegmentSpec( new MultipleIntervalSegmentSpec( @@ -4122,8 +4151,7 @@ public void testSubqueryWithMultipleIntervalsInOuterQuery() @Test public void testSubqueryWithMultipleIntervalsInOuterQueryAndChunkPeriod() { - GroupByQuery subquery = GroupByQuery - .builder() + GroupByQuery subquery = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("quality", "alias")) @@ -4133,15 +4161,16 @@ public void testSubqueryWithMultipleIntervalsInOuterQueryAndChunkPeriod() null, JavaScriptConfig.getEnabledInstance() )) - .setAggregatorSpecs(QueryRunnerTestHelper.rowsCount, - new LongSumAggregatorFactory("idx", "index"), - new LongSumAggregatorFactory("indexMaxPlusTen", "indexMaxPlusTen")) + .setAggregatorSpecs( + QueryRunnerTestHelper.rowsCount, + new LongSumAggregatorFactory("idx", "index"), + new LongSumAggregatorFactory("indexMaxPlusTen", "indexMaxPlusTen") + ) .setGranularity(QueryRunnerTestHelper.dayGran) - .setContext(ImmutableMap.of("chunkPeriod", "P1D")) + .overrideContext(ImmutableMap.of("chunkPeriod", "P1D")) .build(); - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(subquery) .setQuerySegmentSpec( new MultipleIntervalSegmentSpec( @@ -4188,8 +4217,7 @@ public void testSubqueryWithExtractionFnInOuterQuery() { //https://github.com/apache/incubator-druid/issues/2556 - GroupByQuery subquery = GroupByQuery - .builder() + GroupByQuery subquery = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("quality", "alias")) @@ -4199,14 +4227,15 @@ public void testSubqueryWithExtractionFnInOuterQuery() null, JavaScriptConfig.getEnabledInstance() )) - .setAggregatorSpecs(QueryRunnerTestHelper.rowsCount, - new LongSumAggregatorFactory("idx", "index"), - new LongSumAggregatorFactory("indexMaxPlusTen", "indexMaxPlusTen")) + .setAggregatorSpecs( + QueryRunnerTestHelper.rowsCount, + new LongSumAggregatorFactory("idx", "index"), + new LongSumAggregatorFactory("indexMaxPlusTen", "indexMaxPlusTen") + ) .setGranularity(QueryRunnerTestHelper.dayGran) .build(); - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(subquery) .setQuerySegmentSpec( new MultipleIntervalSegmentSpec( @@ -4233,24 +4262,29 @@ public void testSubqueryWithExtractionFnInOuterQuery() @Test public void testDifferentGroupingSubquery() { - GroupByQuery subquery = GroupByQuery - .builder() + // Cannot vectorize due to virtual columns. + cannotVectorize(); + + GroupByQuery subquery = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("quality", "alias")) - .setAggregatorSpecs(QueryRunnerTestHelper.rowsCount, - new LongSumAggregatorFactory("idx", "index"), - new LongSumAggregatorFactory("indexMaxPlusTen", "indexMaxPlusTen")) + .setAggregatorSpecs( + QueryRunnerTestHelper.rowsCount, + new LongSumAggregatorFactory("idx", "index"), + new LongSumAggregatorFactory("indexMaxPlusTen", "indexMaxPlusTen") + ) .setGranularity(QueryRunnerTestHelper.dayGran) .build(); - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(subquery) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) - .setAggregatorSpecs(QueryRunnerTestHelper.rowsCount, - new DoubleMaxAggregatorFactory("idx", "idx"), - new DoubleMaxAggregatorFactory("indexMaxPlusTen", "indexMaxPlusTen")) + .setAggregatorSpecs( + QueryRunnerTestHelper.rowsCount, + new DoubleMaxAggregatorFactory("idx", "idx"), + new DoubleMaxAggregatorFactory("indexMaxPlusTen", "indexMaxPlusTen") + ) .setGranularity(QueryRunnerTestHelper.dayGran) .build(); @@ -4265,13 +4299,15 @@ public void testDifferentGroupingSubquery() GroupByQueryRunnerTestHelper.runQuery(factory, runner, query), "subquery" ); - subquery = new GroupByQuery.Builder(subquery) + subquery = makeQueryBuilder(subquery) .setVirtualColumns( new ExpressionVirtualColumn("expr", "-index + 100", ValueType.FLOAT, TestExprMacroTable.INSTANCE) ) - .setAggregatorSpecs(QueryRunnerTestHelper.rowsCount, - new LongSumAggregatorFactory("idx", "expr"), - new LongSumAggregatorFactory("indexMaxPlusTen", "indexMaxPlusTen")) + .setAggregatorSpecs( + QueryRunnerTestHelper.rowsCount, + new LongSumAggregatorFactory("idx", "expr"), + new LongSumAggregatorFactory("indexMaxPlusTen", "indexMaxPlusTen") + ) .build(); query = (GroupByQuery) query.withDataSource(new QueryDataSource(subquery)); @@ -4290,8 +4326,7 @@ public void testDifferentGroupingSubquery() @Test public void testDifferentGroupingSubqueryMultipleAggregatorsOnSameField() { - GroupByQuery subquery = GroupByQuery - .builder() + GroupByQuery subquery = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("quality", "alias")) @@ -4311,14 +4346,15 @@ public void testDifferentGroupingSubqueryMultipleAggregatorsOnSameField() .setGranularity(QueryRunnerTestHelper.dayGran) .build(); - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(subquery) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) - .setAggregatorSpecs(new DoubleMaxAggregatorFactory("idx1", "idx"), - new DoubleMaxAggregatorFactory("idx2", "idx"), - new DoubleMaxAggregatorFactory("idx3", "post_agg"), - new DoubleMaxAggregatorFactory("idx4", "post_agg")) + .setAggregatorSpecs( + new DoubleMaxAggregatorFactory("idx1", "idx"), + new DoubleMaxAggregatorFactory("idx2", "idx"), + new DoubleMaxAggregatorFactory("idx3", "post_agg"), + new DoubleMaxAggregatorFactory("idx4", "post_agg") + ) .setGranularity(QueryRunnerTestHelper.dayGran) .build(); @@ -4339,8 +4375,7 @@ public void testDifferentGroupingSubqueryMultipleAggregatorsOnSameField() @Test public void testDifferentGroupingSubqueryWithFilter() { - GroupByQuery subquery = GroupByQuery - .builder() + GroupByQuery subquery = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("quality", "quality")) @@ -4348,8 +4383,7 @@ public void testDifferentGroupingSubqueryWithFilter() .setGranularity(QueryRunnerTestHelper.dayGran) .build(); - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(subquery) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setAggregatorSpecs(new DoubleMaxAggregatorFactory("idx", "idx")) @@ -4383,8 +4417,7 @@ public void testDifferentGroupingSubqueryWithFilter() @Test public void testDifferentIntervalSubquery() { - GroupByQuery subquery = GroupByQuery - .builder() + GroupByQuery subquery = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("quality", "alias")) @@ -4392,8 +4425,7 @@ public void testDifferentIntervalSubquery() .setGranularity(QueryRunnerTestHelper.dayGran) .build(); - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(subquery) .setQuerySegmentSpec(QueryRunnerTestHelper.secondOnly) .setAggregatorSpecs(new DoubleMaxAggregatorFactory("idx", "idx")) @@ -4416,8 +4448,7 @@ public void testGroupByTimeExtractionNamedUnderUnderTime() "'__time' cannot be used as an output name for dimensions, aggregators, or post-aggregators." ); - GroupByQuery - .builder() + makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.fullOnInterval) .setDimensions( @@ -4451,8 +4482,7 @@ public void testGroupByWithUnderUnderTimeAsDimensionNameWithHavingAndLimit() "'__time' cannot be used as an output name for dimensions, aggregators, or post-aggregators." ); - GroupByQuery - .builder() + makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("quality", "__time")) @@ -4478,8 +4508,7 @@ public void testGroupByWithUnderUnderTimeAsDimensionNameWithHavingAndLimit() @Test public void testEmptySubquery() { - GroupByQuery subquery = GroupByQuery - .builder() + GroupByQuery subquery = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.emptyInterval) .setDimensions(new DefaultDimensionSpec("quality", "alias")) @@ -4487,8 +4516,7 @@ public void testEmptySubquery() .setGranularity(QueryRunnerTestHelper.dayGran) .build(); - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(subquery) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setAggregatorSpecs(new DoubleMaxAggregatorFactory("idx", "idx")) @@ -4502,8 +4530,7 @@ public void testEmptySubquery() @Test public void testSubqueryWithPostAggregators() { - final GroupByQuery subquery = GroupByQuery - .builder() + final GroupByQuery subquery = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("quality", "alias")) @@ -4527,13 +4554,14 @@ public void testSubqueryWithPostAggregators() .setGranularity(QueryRunnerTestHelper.dayGran) .build(); - final GroupByQuery query = GroupByQuery - .builder() + final GroupByQuery query = makeQueryBuilder() .setDataSource(subquery) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("alias", "alias")) - .setAggregatorSpecs(new LongSumAggregatorFactory("rows", "rows"), - new LongSumAggregatorFactory("idx", "idx_subpostagg")) + .setAggregatorSpecs( + new LongSumAggregatorFactory("rows", "rows"), + new LongSumAggregatorFactory("idx", "idx_subpostagg") + ) .setPostAggregatorSpecs( Collections.singletonList( new ArithmeticPostAggregator( @@ -4758,17 +4786,11 @@ public void testSubqueryWithPostAggregators() @Test public void testSubqueryWithPostAggregatorsAndHaving() { - final GroupByQuery subquery = GroupByQuery - .builder() + final GroupByQuery subquery = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("quality", "alias")) - .setDimFilter(new JavaScriptDimFilter( - "quality", - "function(dim){ return true; }", - null, - JavaScriptConfig.getEnabledInstance() - )).setAggregatorSpecs(QueryRunnerTestHelper.rowsCount, new LongSumAggregatorFactory("idx_subagg", "index")) + .setAggregatorSpecs(QueryRunnerTestHelper.rowsCount, new LongSumAggregatorFactory("idx_subagg", "index")) .setPostAggregatorSpecs( Collections.singletonList( new ArithmeticPostAggregator( @@ -4796,13 +4818,14 @@ public boolean eval(Row row) .setGranularity(QueryRunnerTestHelper.dayGran) .build(); - final GroupByQuery query = GroupByQuery - .builder() + final GroupByQuery query = makeQueryBuilder() .setDataSource(subquery) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("alias", "alias")) - .setAggregatorSpecs(new LongSumAggregatorFactory("rows", "rows"), - new LongSumAggregatorFactory("idx", "idx_subpostagg")) + .setAggregatorSpecs( + new LongSumAggregatorFactory("rows", "rows"), + new LongSumAggregatorFactory("idx", "idx_subpostagg") + ) .setPostAggregatorSpecs( Collections.singletonList( new ArithmeticPostAggregator( @@ -5005,8 +5028,10 @@ public boolean eval(Row row) @Test public void testSubqueryWithMultiColumnAggregators() { - final GroupByQuery subquery = GroupByQuery - .builder() + // Cannot vectorize due to javascript functionality. + cannotVectorize(); + + final GroupByQuery subquery = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("quality", "alias")) @@ -5016,16 +5041,18 @@ public void testSubqueryWithMultiColumnAggregators() null, JavaScriptConfig.getEnabledInstance() )) - .setAggregatorSpecs(QueryRunnerTestHelper.rowsCount, - new DoubleSumAggregatorFactory("idx_subagg", "index"), - new JavaScriptAggregatorFactory( - "js_agg", - Arrays.asList("index", "market"), - "function(current, index, dim){return current + index + dim.length;}", - "function(){return 0;}", - "function(a,b){return a + b;}", - JavaScriptConfig.getEnabledInstance() - )) + .setAggregatorSpecs( + QueryRunnerTestHelper.rowsCount, + new DoubleSumAggregatorFactory("idx_subagg", "index"), + new JavaScriptAggregatorFactory( + "js_agg", + Arrays.asList("index", "market"), + "function(current, index, dim){return current + index + dim.length;}", + "function(){return 0;}", + "function(a,b){return a + b;}", + JavaScriptConfig.getEnabledInstance() + ) + ) .setPostAggregatorSpecs( Collections.singletonList( new ArithmeticPostAggregator( @@ -5053,14 +5080,15 @@ public boolean eval(Row row) .setGranularity(QueryRunnerTestHelper.dayGran) .build(); - final GroupByQuery query = GroupByQuery - .builder() + final GroupByQuery query = makeQueryBuilder() .setDataSource(subquery) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("alias", "alias")) - .setAggregatorSpecs(new LongSumAggregatorFactory("rows", "rows"), - new LongSumAggregatorFactory("idx", "idx_subpostagg"), - new DoubleSumAggregatorFactory("js_outer_agg", "js_agg")) + .setAggregatorSpecs( + new LongSumAggregatorFactory("rows", "rows"), + new LongSumAggregatorFactory("idx", "idx_subpostagg"), + new DoubleSumAggregatorFactory("js_outer_agg", "js_agg") + ) .setPostAggregatorSpecs( Collections.singletonList( new ArithmeticPostAggregator( @@ -5162,8 +5190,7 @@ public boolean eval(Row row) @Test public void testSubqueryWithOuterFilterAggregator() { - final GroupByQuery subquery = GroupByQuery - .builder() + final GroupByQuery subquery = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.fullOnInterval) .setDimensions(new DefaultDimensionSpec("market", "market"), new DefaultDimensionSpec("quality", "quality")) @@ -5172,8 +5199,7 @@ public void testSubqueryWithOuterFilterAggregator() .build(); final DimFilter filter = new SelectorDimFilter("market", "spot", null); - final GroupByQuery query = GroupByQuery - .builder() + final GroupByQuery query = makeQueryBuilder() .setDataSource(subquery) .setQuerySegmentSpec(QueryRunnerTestHelper.fullOnInterval) .setDimensions(new ArrayList<>()) @@ -5191,8 +5217,7 @@ public void testSubqueryWithOuterFilterAggregator() @Test public void testSubqueryWithOuterTimeFilter() { - final GroupByQuery subquery = GroupByQuery - .builder() + final GroupByQuery subquery = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.fullOnInterval) .setDimensions(new DefaultDimensionSpec("market", "market"), new DefaultDimensionSpec("quality", "quality")) @@ -5202,8 +5227,7 @@ public void testSubqueryWithOuterTimeFilter() final DimFilter fridayFilter = new SelectorDimFilter(ColumnHolder.TIME_COLUMN_NAME, "Friday", new TimeFormatExtractionFn("EEEE", null, null, null, false)); final DimFilter firstDaysFilter = new InDimFilter(ColumnHolder.TIME_COLUMN_NAME, ImmutableList.of("1", "2", "3"), new TimeFormatExtractionFn("d", null, null, null, false)); - final GroupByQuery query = GroupByQuery - .builder() + final GroupByQuery query = makeQueryBuilder() .setDataSource(subquery) .setQuerySegmentSpec(QueryRunnerTestHelper.fullOnInterval) .setDimensions(new ArrayList<>()) @@ -5230,21 +5254,19 @@ public void testSubqueryWithOuterTimeFilter() @Test public void testSubqueryWithContextTimeout() { - final GroupByQuery subquery = GroupByQuery - .builder() + final GroupByQuery subquery = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.fullOnInterval) .setDimensions(new DefaultDimensionSpec("quality", "alias")) .setGranularity(QueryRunnerTestHelper.dayGran) .build(); - final GroupByQuery query = GroupByQuery - .builder() + final GroupByQuery query = makeQueryBuilder() .setDataSource(subquery) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new ArrayList<>()).setAggregatorSpecs(new CountAggregatorFactory("count")) .setGranularity(QueryRunnerTestHelper.allGran) - .setContext(ImmutableMap.of(QueryContexts.TIMEOUT_KEY, 10000)) + .overrideContext(ImmutableMap.of(QueryContexts.TIMEOUT_KEY, 10000)) .build(); List expectedResults = Collections.singletonList( @@ -5257,16 +5279,14 @@ public void testSubqueryWithContextTimeout() @Test public void testSubqueryWithOuterVirtualColumns() { - final GroupByQuery subquery = GroupByQuery - .builder() + final GroupByQuery subquery = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.fullOnInterval) .setDimensions(new DefaultDimensionSpec("quality", "alias")) .setGranularity(QueryRunnerTestHelper.dayGran) .build(); - final GroupByQuery query = GroupByQuery - .builder() + final GroupByQuery query = makeQueryBuilder() .setDataSource(subquery) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setVirtualColumns(new ExpressionVirtualColumn("expr", "1", ValueType.FLOAT, TestExprMacroTable.INSTANCE)) @@ -5284,17 +5304,15 @@ public void testSubqueryWithOuterVirtualColumns() @Test public void testSubqueryWithOuterCardinalityAggregator() { - final GroupByQuery subquery = GroupByQuery - .builder() + final GroupByQuery subquery = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.fullOnInterval) .setDimensions(new DefaultDimensionSpec("market", "market"), new DefaultDimensionSpec("quality", "quality")) .setAggregatorSpecs(QueryRunnerTestHelper.rowsCount, new LongSumAggregatorFactory("index", "index")) - .setGranularity(QueryRunnerTestHelper.dayGran) + .setGranularity(QueryRunnerTestHelper.allGran) .build(); - final GroupByQuery query = GroupByQuery - .builder() + final GroupByQuery query = makeQueryBuilder() .setDataSource(subquery) .setQuerySegmentSpec(QueryRunnerTestHelper.fullOnInterval) .setDimensions(new ArrayList<>()).setAggregatorSpecs(new CardinalityAggregatorFactory( @@ -5318,8 +5336,7 @@ public void testSubqueryWithOuterCardinalityAggregator() @Test public void testSubqueryWithOuterCountAggregator() { - final GroupByQuery subquery = GroupByQuery - .builder() + final GroupByQuery subquery = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.fullOnInterval) .setDimensions(new DefaultDimensionSpec("quality", "alias")) @@ -5332,8 +5349,7 @@ public void testSubqueryWithOuterCountAggregator() ) .build(); - final GroupByQuery query = GroupByQuery - .builder() + final GroupByQuery query = makeQueryBuilder() .setDataSource(subquery) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new ArrayList<>()).setAggregatorSpecs(new CountAggregatorFactory("count")) @@ -5361,8 +5377,7 @@ public void testSubqueryWithOuterCountAggregator() @Test public void testSubqueryWithOuterDimJavascriptAggregators() { - final GroupByQuery subquery = GroupByQuery - .builder() + final GroupByQuery subquery = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("market", "market"), new DefaultDimensionSpec("quality", "quality")) @@ -5370,8 +5385,7 @@ public void testSubqueryWithOuterDimJavascriptAggregators() .setGranularity(QueryRunnerTestHelper.dayGran) .build(); - final GroupByQuery query = GroupByQuery - .builder() + final GroupByQuery query = makeQueryBuilder() .setDataSource(subquery) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("quality", "quality")) @@ -5414,8 +5428,7 @@ public void testSubqueryWithOuterDimJavascriptAggregators() @Test public void testSubqueryWithOuterJavascriptAggregators() { - final GroupByQuery subquery = GroupByQuery - .builder() + final GroupByQuery subquery = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("market", "market"), new DefaultDimensionSpec("quality", "quality")) @@ -5423,8 +5436,7 @@ public void testSubqueryWithOuterJavascriptAggregators() .setGranularity(QueryRunnerTestHelper.dayGran) .build(); - final GroupByQuery query = GroupByQuery - .builder() + final GroupByQuery query = makeQueryBuilder() .setDataSource(subquery) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("quality", "quality")) @@ -5467,25 +5479,27 @@ public void testSubqueryWithOuterJavascriptAggregators() @Test public void testSubqueryWithHyperUniques() { - GroupByQuery subquery = GroupByQuery - .builder() + GroupByQuery subquery = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("quality", "alias")) - .setAggregatorSpecs(QueryRunnerTestHelper.rowsCount, - new LongSumAggregatorFactory("idx", "index"), - new HyperUniquesAggregatorFactory("quality_uniques", "quality_uniques")) + .setAggregatorSpecs( + QueryRunnerTestHelper.rowsCount, + new LongSumAggregatorFactory("idx", "index"), + new HyperUniquesAggregatorFactory("quality_uniques", "quality_uniques") + ) .setGranularity(QueryRunnerTestHelper.dayGran) .build(); - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(subquery) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("alias", "alias")) - .setAggregatorSpecs(new LongSumAggregatorFactory("rows", "rows"), - new LongSumAggregatorFactory("idx", "idx"), - new HyperUniquesAggregatorFactory("uniq", "quality_uniques")) + .setAggregatorSpecs( + new LongSumAggregatorFactory("rows", "rows"), + new LongSumAggregatorFactory("idx", "idx"), + new HyperUniquesAggregatorFactory("uniq", "quality_uniques") + ) .setGranularity(QueryRunnerTestHelper.allGran) .build(); @@ -5599,8 +5613,7 @@ public void testSubqueryWithHyperUniques() @Test public void testSubqueryWithHyperUniquesPostAggregator() { - GroupByQuery subquery = GroupByQuery - .builder() + GroupByQuery subquery = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new ArrayList<>()) @@ -5615,8 +5628,7 @@ public void testSubqueryWithHyperUniquesPostAggregator() .setGranularity(QueryRunnerTestHelper.dayGran) .build(); - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(subquery) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new ArrayList<>()) @@ -5653,8 +5665,10 @@ public void testSubqueryWithHyperUniquesPostAggregator() @Test public void testSubqueryWithFirstLast() { - GroupByQuery subquery = GroupByQuery - .builder() + // Cannot vectorize due to "first", "last" aggregators. + cannotVectorize(); + + GroupByQuery subquery = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.fullOnInterval) .setDimensions(new DefaultDimensionSpec("market", "market")) @@ -5662,11 +5676,10 @@ public void testSubqueryWithFirstLast() new LongFirstAggregatorFactory("innerfirst", "index"), new LongLastAggregatorFactory("innerlast", "index")) .setGranularity(QueryRunnerTestHelper.dayGran) - .setContext(ImmutableMap.of("finalize", true)) + .overrideContext(ImmutableMap.of("finalize", true)) .build(); - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(subquery) .setQuerySegmentSpec(QueryRunnerTestHelper.fullOnInterval) .setDimensions(new ArrayList<>()) @@ -5693,8 +5706,7 @@ public void testGroupByWithSubtotalsSpec() return; } - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(Lists.newArrayList(new DefaultDimensionSpec("quality", "alias"), new DefaultDimensionSpec("market", "market"))) @@ -5766,8 +5778,7 @@ public void testGroupByWithSubtotalsSpecWithLongDimensionColumn() return; } - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(Lists.newArrayList(new DefaultDimensionSpec("qualityLong", "ql", ValueType.LONG), new DefaultDimensionSpec("market", "market"))) @@ -5827,10 +5838,6 @@ public void testGroupByWithSubtotalsSpecWithLongDimensionColumn() ); Iterable results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query); - - for (Row row : results) { - System.out.println(row); - } TestHelper.assertExpectedObjects(expectedResults, results, "subtotal-long-dim"); } @@ -5841,8 +5848,7 @@ public void testGroupByWithSubtotalsSpecWithOrderLimit() return; } - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(Lists.newArrayList(new DefaultDimensionSpec("quality", "alias"), new DefaultDimensionSpec("market", "market"))) @@ -5877,8 +5883,10 @@ public void testGroupByWithSubtotalsSpecWithOrderLimit() @Test public void testGroupByWithTimeColumn() { - GroupByQuery query = GroupByQuery - .builder() + // Cannot vectorize due to javascript aggregator. + cannotVectorize(); + + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setAggregatorSpecs(QueryRunnerTestHelper.rowsCount, @@ -5906,8 +5914,10 @@ public void testGroupByWithTimeColumn() @Test public void testGroupByTimeExtraction() { - GroupByQuery query = GroupByQuery - .builder() + // Cannot vectorize due to extraction dimension spec. + cannotVectorize(); + + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.fullOnInterval) .setDimensions( @@ -6124,6 +6134,9 @@ public void testGroupByTimeExtraction() @Test public void testGroupByTimeExtractionWithNulls() { + // Cannot vectorize due to extraction dimension specs. + cannotVectorize(); + final DimExtractionFn nullWednesdays = new DimExtractionFn() { @Override @@ -6155,8 +6168,7 @@ public ExtractionType getExtractionType() } }; - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.fullOnInterval) .setDimensions( @@ -6395,15 +6407,14 @@ public void testBySegmentResults() for (int i = 0; i < segmentCount; i++) { bySegmentResults.add(singleSegmentResult); } - GroupByQuery.Builder builder = GroupByQuery - .builder() + GroupByQuery.Builder builder = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setInterval("2011-04-02/2011-04-04") .setDimensions(new DefaultDimensionSpec("quality", "alias")) .setAggregatorSpecs(QueryRunnerTestHelper.rowsCount, new LongSumAggregatorFactory("idx", "index")) .setGranularity(new PeriodGranularity(new Period("P1M"), null, null)) .setDimFilter(new SelectorDimFilter("quality", "mezzanine", null)) - .setContext(ImmutableMap.of("bySegment", true)); + .overrideContext(ImmutableMap.of("bySegment", true)); final GroupByQuery fullQuery = builder.build(); QueryToolChest toolChest = factory.getToolchest(); @@ -6431,6 +6442,9 @@ public void testBySegmentResults() @Test public void testBySegmentResultsUnOptimizedDimextraction() { + // Cannot vectorize due to extraction dimension spec. + cannotVectorize(); + int segmentCount = 32; Result singleSegmentResult = new Result( DateTimes.of("2011-01-12T00:00:00.000Z"), @@ -6452,8 +6466,7 @@ public void testBySegmentResultsUnOptimizedDimextraction() for (int i = 0; i < segmentCount; i++) { bySegmentResults.add(singleSegmentResult); } - GroupByQuery.Builder builder = GroupByQuery - .builder() + GroupByQuery.Builder builder = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setInterval("2011-04-02/2011-04-04").setDimensions(new ExtractionDimensionSpec( "quality", @@ -6468,7 +6481,7 @@ public void testBySegmentResultsUnOptimizedDimextraction() )).setAggregatorSpecs(QueryRunnerTestHelper.rowsCount, new LongSumAggregatorFactory("idx", "index")) .setGranularity(new PeriodGranularity(new Period("P1M"), null, null)) .setDimFilter(new SelectorDimFilter("quality", "mezzanine", null)) - .setContext(ImmutableMap.of("bySegment", true)); + .overrideContext(ImmutableMap.of("bySegment", true)); final GroupByQuery fullQuery = builder.build(); QueryToolChest toolChest = factory.getToolchest(); @@ -6484,7 +6497,11 @@ public void testBySegmentResultsUnOptimizedDimextraction() ) ); - TestHelper.assertExpectedObjects(bySegmentResults, theRunner.run(QueryPlus.wrap(fullQuery), new HashMap<>()), "bySegment"); + TestHelper.assertExpectedObjects( + bySegmentResults, + theRunner.run(QueryPlus.wrap(fullQuery), new HashMap<>()), + "bySegment" + ); exec.shutdownNow(); } @@ -6512,8 +6529,7 @@ public void testBySegmentResultsOptimizedDimextraction() for (int i = 0; i < segmentCount; i++) { bySegmentResults.add(singleSegmentResult); } - GroupByQuery.Builder builder = GroupByQuery - .builder() + GroupByQuery.Builder builder = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setInterval("2011-04-02/2011-04-04").setDimensions(new ExtractionDimensionSpec( "quality", @@ -6528,7 +6544,7 @@ public void testBySegmentResultsOptimizedDimextraction() )).setAggregatorSpecs(QueryRunnerTestHelper.rowsCount, new LongSumAggregatorFactory("idx", "index")) .setGranularity(new PeriodGranularity(new Period("P1M"), null, null)) .setDimFilter(new SelectorDimFilter("quality", "mezzanine", null)) - .setContext(ImmutableMap.of("bySegment", true)); + .overrideContext(ImmutableMap.of("bySegment", true)); final GroupByQuery fullQuery = builder.build(); QueryToolChest toolChest = factory.getToolchest(); @@ -6544,7 +6560,11 @@ public void testBySegmentResultsOptimizedDimextraction() ) ); - TestHelper.assertExpectedObjects(bySegmentResults, theRunner.run(QueryPlus.wrap(fullQuery), new HashMap<>()), "bySegment-dim-extraction"); + TestHelper.assertExpectedObjects( + bySegmentResults, + theRunner.run(QueryPlus.wrap(fullQuery), new HashMap<>()), + "bySegment-dim-extraction" + ); exec.shutdownNow(); } @@ -6571,15 +6591,17 @@ public void testGroupByWithExtractionDimFilter() new SelectorDimFilter("quality", "travel", null) ); - GroupByQuery query = GroupByQuery.builder() - .setDataSource(QueryRunnerTestHelper.dataSource) - .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) - .setDimensions(new DefaultDimensionSpec("quality", "alias")) - .setAggregatorSpecs(QueryRunnerTestHelper.rowsCount, - new LongSumAggregatorFactory("idx", "index")) - .setGranularity(QueryRunnerTestHelper.dayGran) - .setDimFilter(new OrDimFilter(dimFilters)) - .build(); + GroupByQuery query = makeQueryBuilder() + .setDataSource(QueryRunnerTestHelper.dataSource) + .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) + .setDimensions(new DefaultDimensionSpec("quality", "alias")) + .setAggregatorSpecs( + QueryRunnerTestHelper.rowsCount, + new LongSumAggregatorFactory("idx", "index") + ) + .setGranularity(QueryRunnerTestHelper.dayGran) + .setDimFilter(new OrDimFilter(dimFilters)) + .build(); List expectedResults = Arrays.asList( GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "automotive", "rows", 1L, "idx", 135L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "business", "rows", 1L, "idx", 118L), @@ -6623,12 +6645,14 @@ public void testGroupByWithExtractionDimFilterCaseMappingValueIsNullOrEmpty() MapLookupExtractor mapLookupExtractor = new MapLookupExtractor(extractionMap, false); LookupExtractionFn lookupExtractionFn = new LookupExtractionFn(mapLookupExtractor, false, null, true, false); - GroupByQuery query = GroupByQuery.builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("quality", "alias")) - .setAggregatorSpecs(QueryRunnerTestHelper.rowsCount, - new LongSumAggregatorFactory("idx", "index")) + .setAggregatorSpecs( + QueryRunnerTestHelper.rowsCount, + new LongSumAggregatorFactory("idx", "index") + ) .setGranularity(QueryRunnerTestHelper.dayGran) .setDimFilter(new ExtractionDimFilter("quality", "", lookupExtractionFn, null)) .build(); @@ -6661,17 +6685,14 @@ public void testGroupByWithExtractionDimFilterWhenSearchValueNotInTheMap() MapLookupExtractor mapLookupExtractor = new MapLookupExtractor(extractionMap, false); LookupExtractionFn lookupExtractionFn = new LookupExtractionFn(mapLookupExtractor, false, null, true, false); - GroupByQuery query = GroupByQuery.builder() - .setDataSource(QueryRunnerTestHelper.dataSource) - .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) - .setDimensions(new DefaultDimensionSpec("quality", "alias")) - .setAggregatorSpecs(QueryRunnerTestHelper.rowsCount, - new LongSumAggregatorFactory("idx", "index")) - .setGranularity(QueryRunnerTestHelper.dayGran) - .setDimFilter( - new ExtractionDimFilter("quality", "NOT_THERE", lookupExtractionFn, null) - ) - .build(); + GroupByQuery query = makeQueryBuilder() + .setDataSource(QueryRunnerTestHelper.dataSource) + .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) + .setDimensions(new DefaultDimensionSpec("quality", "alias")) + .setAggregatorSpecs(QueryRunnerTestHelper.rowsCount, new LongSumAggregatorFactory("idx", "index")) + .setGranularity(QueryRunnerTestHelper.dayGran) + .setDimFilter(new ExtractionDimFilter("quality", "NOT_THERE", lookupExtractionFn, null)) + .build(); List expectedResults = Collections.emptyList(); Iterable results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query); @@ -6695,21 +6716,23 @@ public void testGroupByWithExtractionDimFilterKeyisNull() extractionMap.put("", "NOT_USED"); } - GroupByQuery query = GroupByQuery.builder() - .setDataSource(QueryRunnerTestHelper.dataSource) - .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) - .setDimensions(new DefaultDimensionSpec("null_column", "alias")) - .setAggregatorSpecs(QueryRunnerTestHelper.rowsCount, - new LongSumAggregatorFactory("idx", "index")) - .setGranularity(QueryRunnerTestHelper.dayGran) - .setDimFilter( - new ExtractionDimFilter( - "null_column", - "REPLACED_VALUE", - lookupExtractionFn, - null - ) - ).build(); + GroupByQuery query = makeQueryBuilder() + .setDataSource(QueryRunnerTestHelper.dataSource) + .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) + .setDimensions(new DefaultDimensionSpec("null_column", "alias")) + .setAggregatorSpecs( + QueryRunnerTestHelper.rowsCount, + new LongSumAggregatorFactory("idx", "index") + ) + .setGranularity(QueryRunnerTestHelper.dayGran) + .setDimFilter( + new ExtractionDimFilter( + "null_column", + "REPLACED_VALUE", + lookupExtractionFn, + null + ) + ).build(); List expectedResults = Arrays .asList( @@ -6738,19 +6761,16 @@ public void testGroupByWithAggregatorFilterAndExtractionFunction() MapLookupExtractor mapLookupExtractor = new MapLookupExtractor(extractionMap, false); LookupExtractionFn lookupExtractionFn = new LookupExtractionFn(mapLookupExtractor, false, "missing", true, false); DimFilter filter = new ExtractionDimFilter("quality", "mezzanineANDnews", lookupExtractionFn, null); - GroupByQuery query = GroupByQuery.builder() - .setDataSource(QueryRunnerTestHelper.dataSource) - .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) - .setDimensions(new DefaultDimensionSpec("quality", "alias")) - .setAggregatorSpecs(new FilteredAggregatorFactory( - QueryRunnerTestHelper.rowsCount, - filter - ), new FilteredAggregatorFactory( - new LongSumAggregatorFactory("idx", "index"), - filter - )) - .setGranularity(QueryRunnerTestHelper.dayGran) - .build(); + GroupByQuery query = makeQueryBuilder() + .setDataSource(QueryRunnerTestHelper.dataSource) + .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) + .setDimensions(new DefaultDimensionSpec("quality", "alias")) + .setAggregatorSpecs( + new FilteredAggregatorFactory(QueryRunnerTestHelper.rowsCount, filter), + new FilteredAggregatorFactory(new LongSumAggregatorFactory("idx", "index"), filter) + ) + .setGranularity(QueryRunnerTestHelper.dayGran) + .build(); List expectedResults = Arrays.asList( GroupByQueryRunnerTestHelper.createExpectedRow( "2011-04-01", @@ -6899,12 +6919,14 @@ public void testGroupByWithExtractionDimFilterOptimazitionManyToOne() MapLookupExtractor mapLookupExtractor = new MapLookupExtractor(extractionMap, false); LookupExtractionFn lookupExtractionFn = new LookupExtractionFn(mapLookupExtractor, false, null, true, true); - GroupByQuery query = GroupByQuery.builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("quality", "alias")) - .setAggregatorSpecs(QueryRunnerTestHelper.rowsCount, - new LongSumAggregatorFactory("idx", "index")) + .setAggregatorSpecs( + QueryRunnerTestHelper.rowsCount, + new LongSumAggregatorFactory("idx", "index") + ) .setGranularity(QueryRunnerTestHelper.dayGran) .setDimFilter( new ExtractionDimFilter( @@ -6943,8 +6965,7 @@ public void testGroupByWithExtractionDimFilterNullDims() lookupExtractionFn = new LookupExtractionFn(mapLookupExtractor, false, "EMPTY", true, true); } - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("null_column", "alias")) @@ -6989,7 +7010,11 @@ public void testBySegmentResultsWithAllFiltersWithExtractionFns() String extractionJsFn = "function(str) { return 'super-' + str; }"; String jsFn = "function(x) { return(x === 'super-mezzanine') }"; - ExtractionFn extractionFn = new JavaScriptExtractionFn(extractionJsFn, false, JavaScriptConfig.getEnabledInstance()); + ExtractionFn extractionFn = new JavaScriptExtractionFn( + extractionJsFn, + false, + JavaScriptConfig.getEnabledInstance() + ); List superFilterList = new ArrayList<>(); superFilterList.add(new SelectorDimFilter("quality", "super-mezzanine", extractionFn)); @@ -7017,15 +7042,14 @@ public void testBySegmentResultsWithAllFiltersWithExtractionFns() superFilterList.add(new JavaScriptDimFilter("quality", jsFn, extractionFn, JavaScriptConfig.getEnabledInstance())); DimFilter superFilter = new AndDimFilter(superFilterList); - GroupByQuery.Builder builder = GroupByQuery - .builder() + GroupByQuery.Builder builder = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setInterval("2011-04-02/2011-04-04") .setDimensions(new DefaultDimensionSpec("quality", "alias")) .setAggregatorSpecs(QueryRunnerTestHelper.rowsCount, new LongSumAggregatorFactory("idx", "index")) .setGranularity(new PeriodGranularity(new Period("P1M"), null, null)) .setDimFilter(superFilter) - .setContext(ImmutableMap.of("bySegment", true)); + .overrideContext(ImmutableMap.of("bySegment", true)); final GroupByQuery fullQuery = builder.build(); QueryToolChest toolChest = factory.getToolchest(); @@ -7041,7 +7065,11 @@ public void testBySegmentResultsWithAllFiltersWithExtractionFns() ) ); - TestHelper.assertExpectedObjects(bySegmentResults, theRunner.run(QueryPlus.wrap(fullQuery), new HashMap<>()), "bySegment-filter"); + TestHelper.assertExpectedObjects( + bySegmentResults, + theRunner.run(QueryPlus.wrap(fullQuery), new HashMap<>()), + "bySegment-filter" + ); exec.shutdownNow(); } @@ -7066,10 +7094,12 @@ public void testGroupByWithAllFiltersOnNullDimsWithExtractionFns() superFilterList.add( new SearchQueryDimFilter("null_column", new ContainsSearchQuerySpec("EMPTY", true), extractionFn) ); - superFilterList.add(new JavaScriptDimFilter("null_column", jsFn, extractionFn, JavaScriptConfig.getEnabledInstance())); + superFilterList.add( + new JavaScriptDimFilter("null_column", jsFn, extractionFn, JavaScriptConfig.getEnabledInstance()) + ); DimFilter superFilter = new AndDimFilter(superFilterList); - GroupByQuery query = GroupByQuery.builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("null_column", "alias")) @@ -7092,11 +7122,13 @@ public void testGroupByWithAllFiltersOnNullDimsWithExtractionFns() @Test public void testGroupByCardinalityAggWithExtractionFn() { + // Cannot vectorize due to "cardinality" aggregator. + cannotVectorize(); + String helloJsFn = "function(str) { return 'hello' }"; ExtractionFn helloFn = new JavaScriptExtractionFn(helloJsFn, false, JavaScriptConfig.getEnabledInstance()); - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("market", "alias")) @@ -7128,8 +7160,10 @@ public void testGroupByCardinalityAggWithExtractionFn() @Test public void testGroupByCardinalityAggOnFloat() { - GroupByQuery query = GroupByQuery - .builder() + // Cannot vectorize due to "cardinality" aggregator. + cannotVectorize(); + + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("market", "alias")) @@ -7165,8 +7199,7 @@ public void testGroupByLongColumn() expectedException.expectMessage("GroupBy v1 only supports dimensions with an outputType of STRING."); } - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("qualityLong", "ql_alias", ValueType.LONG)) @@ -7223,8 +7256,7 @@ public void testGroupByLongColumnDescending() expectedException.expectMessage("GroupBy v1 only supports dimensions with an outputType of STRING."); } - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("qualityLong", "ql_alias", ValueType.LONG)) @@ -7276,6 +7308,9 @@ public void testGroupByLongColumnDescending() @Test public void testGroupByLongColumnWithExFn() { + // Cannot vectorize due to extraction dimension spec. + cannotVectorize(); + if (config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V1)) { expectedException.expect(UnsupportedOperationException.class); expectedException.expectMessage("GroupBy v1 does not support dimension selectors with unknown cardinality."); @@ -7284,8 +7319,7 @@ public void testGroupByLongColumnWithExFn() String jsFn = "function(str) { return 'super-' + str; }"; ExtractionFn jsExtractionFn = new JavaScriptExtractionFn(jsFn, false, JavaScriptConfig.getEnabledInstance()); - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new ExtractionDimensionSpec("qualityLong", "ql_alias", jsExtractionFn)) @@ -7326,8 +7360,7 @@ public void testGroupByLongTimeColumn() expectedException.expectMessage("GroupBy v1 only supports dimensions with an outputType of STRING."); } - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("__time", "time_alias", ValueType.LONG)) @@ -7363,11 +7396,13 @@ public void testGroupByLongTimeColumn() @Test public void testGroupByLongTimeColumnWithExFn() { + // Cannot vectorize due to extraction dimension spec. + cannotVectorize(); + String jsFn = "function(str) { return 'super-' + str; }"; ExtractionFn jsExtractionFn = new JavaScriptExtractionFn(jsFn, false, JavaScriptConfig.getEnabledInstance()); - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new ExtractionDimensionSpec("__time", "time_alias", jsExtractionFn)) @@ -7408,8 +7443,7 @@ public void testGroupByFloatColumn() expectedException.expectMessage("GroupBy v1 only supports dimensions with an outputType of STRING."); } - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("index", "index_alias", ValueType.FLOAT)) @@ -7467,8 +7501,7 @@ public void testGroupByFloatColumnDescending() expectedException.expectMessage("GroupBy v1 only supports dimensions with an outputType of STRING."); } - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("qualityFloat", "qf_alias", ValueType.FLOAT)) @@ -7525,8 +7558,7 @@ public void testGroupByDoubleColumnDescending() expectedException.expectMessage("GroupBy v1 only supports dimensions with an outputType of STRING."); } - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("qualityDouble", "alias", ValueType.DOUBLE)) @@ -7578,6 +7610,9 @@ public void testGroupByDoubleColumnDescending() @Test public void testGroupByFloatColumnWithExFn() { + // Cannot vectorize due to extraction dimension spec. + cannotVectorize(); + if (config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V1)) { expectedException.expect(UnsupportedOperationException.class); expectedException.expectMessage("GroupBy v1 does not support dimension selectors with unknown cardinality."); @@ -7586,8 +7621,7 @@ public void testGroupByFloatColumnWithExFn() String jsFn = "function(str) { return 'super-' + str; }"; ExtractionFn jsExtractionFn = new JavaScriptExtractionFn(jsFn, false, JavaScriptConfig.getEnabledInstance()); - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new ExtractionDimensionSpec("index", "index_alias", jsExtractionFn)) @@ -7631,8 +7665,7 @@ public void testGroupByWithHavingSpecOnLongAndFloat() expectedException.expectMessage("GroupBy v1 only supports dimensions with an outputType of STRING."); } - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions( @@ -7688,8 +7721,7 @@ public void testGroupByLongAndFloatOutputAsString() expectedException.expectMessage("GroupBy v1 does not support dimension selectors with unknown cardinality."); } - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions( @@ -7737,8 +7769,7 @@ public void testGroupByNumericStringsAsNumeric() expectedException.expectMessage("GroupBy v1 does not support dimension selectors with unknown cardinality."); } - GroupByQuery subquery = GroupByQuery - .builder() + GroupByQuery subquery = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions( @@ -7751,8 +7782,7 @@ public void testGroupByNumericStringsAsNumeric() .setGranularity(QueryRunnerTestHelper.dayGran) .build(); - GroupByQuery outerQuery = GroupByQuery - .builder() + GroupByQuery outerQuery = makeQueryBuilder() .setDataSource(subquery) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions( @@ -7790,6 +7820,9 @@ public void testGroupByNumericStringsAsNumeric() @Test public void testGroupByNumericStringsAsNumericWithDecoration() { + // Cannot vectorize due to regex-filtered dimension spec. + cannotVectorize(); + if (config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V1)) { expectedException.expect(UnsupportedOperationException.class); expectedException.expectMessage("GroupBy v1 only supports dimensions with an outputType of STRING."); @@ -7807,8 +7840,7 @@ public void testGroupByNumericStringsAsNumericWithDecoration() true ); - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(regexSpec, listFilteredSpec) @@ -7842,6 +7874,9 @@ public void testGroupByNumericStringsAsNumericWithDecoration() @Test public void testGroupByDecorationOnNumerics() { + // Cannot vectorize due to filtered dimension spec. + cannotVectorize(); + if (config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V1)) { expectedException.expect(UnsupportedOperationException.class); expectedException.expectMessage("GroupBy v1 only supports dimensions with an outputType of STRING."); @@ -7858,8 +7893,7 @@ public void testGroupByDecorationOnNumerics() true ); - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(regexSpec, listFilteredSpec) @@ -7912,8 +7946,7 @@ public void testGroupByNestedWithInnerQueryNumerics() expectedException.expectMessage("GroupBy v1 only supports dimensions with an outputType of STRING."); } - GroupByQuery subquery = GroupByQuery - .builder() + GroupByQuery subquery = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions( @@ -7931,8 +7964,7 @@ public void testGroupByNestedWithInnerQueryNumerics() .setGranularity(QueryRunnerTestHelper.dayGran) .build(); - GroupByQuery outerQuery = GroupByQuery - .builder() + GroupByQuery outerQuery = makeQueryBuilder() .setDataSource(subquery) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions( @@ -7956,8 +7988,10 @@ public void testGroupByNestedWithInnerQueryNumerics() ) ) ) - .setAggregatorSpecs(new LongSumAggregatorFactory("ql_alias_sum", "ql_alias"), - new DoubleSumAggregatorFactory("qf_alias_sum", "qf_alias")) + .setAggregatorSpecs( + new LongSumAggregatorFactory("ql_alias_sum", "ql_alias"), + new DoubleSumAggregatorFactory("qf_alias_sum", "qf_alias") + ) .setGranularity(QueryRunnerTestHelper.allGran) .build(); @@ -7983,8 +8017,7 @@ public void testGroupByNestedWithInnerQueryNumericsWithLongTime() expectedException.expectMessage("GroupBy v1 only supports dimensions with an outputType of STRING."); } - GroupByQuery subQuery = GroupByQuery - .builder() + GroupByQuery subQuery = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions( @@ -7995,16 +8028,17 @@ public void testGroupByNestedWithInnerQueryNumericsWithLongTime() .setGranularity(QueryRunnerTestHelper.allGran) .build(); - GroupByQuery outerQuery = GroupByQuery - .builder() + GroupByQuery outerQuery = makeQueryBuilder() .setDataSource(subQuery) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions( new DefaultDimensionSpec("alias", "market"), new DefaultDimensionSpec("time_alias", "time_alias2", ValueType.LONG) ) - .setAggregatorSpecs(new LongMaxAggregatorFactory("time_alias_max", "time_alias"), - new DoubleMaxAggregatorFactory("index_alias_max", "index_alias")) + .setAggregatorSpecs( + new LongMaxAggregatorFactory("time_alias_max", "time_alias"), + new DoubleMaxAggregatorFactory("index_alias_max", "index_alias") + ) .setGranularity(QueryRunnerTestHelper.allGran) .build(); @@ -8060,6 +8094,9 @@ public void testGroupByNestedWithInnerQueryNumericsWithLongTime() @Test public void testGroupByStringOutputAsLong() { + // Cannot vectorize due to extraction dimension spec. + cannotVectorize(); + if (config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V1)) { expectedException.expect(UnsupportedOperationException.class); expectedException.expectMessage("GroupBy v1 only supports dimensions with an outputType of STRING."); @@ -8067,8 +8104,7 @@ public void testGroupByStringOutputAsLong() ExtractionFn strlenFn = StrlenExtractionFn.instance(); - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new ExtractionDimensionSpec( @@ -8110,33 +8146,37 @@ public void testGroupByStringOutputAsLong() @Test public void testGroupByWithAggsOnNumericDimensions() { - GroupByQuery query = GroupByQuery - .builder() + // Cannot vectorize due to javascript aggregators. + cannotVectorize(); + + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("quality", "alias")) .setDimFilter(new SelectorDimFilter("quality", "technology", null)) - .setAggregatorSpecs(QueryRunnerTestHelper.rowsCount, - new LongSumAggregatorFactory("qlLong", "qualityLong"), - new DoubleSumAggregatorFactory("qlFloat", "qualityLong"), - new JavaScriptAggregatorFactory( - "qlJs", - ImmutableList.of("qualityLong"), - "function(a,b) { return a + b; }", - "function() { return 0; }", - "function(a,b) { return a + b }", - JavaScriptConfig.getEnabledInstance() - ), - new DoubleSumAggregatorFactory("qfFloat", "qualityFloat"), - new LongSumAggregatorFactory("qfLong", "qualityFloat"), - new JavaScriptAggregatorFactory( - "qfJs", - ImmutableList.of("qualityFloat"), - "function(a,b) { return a + b; }", - "function() { return 0; }", - "function(a,b) { return a + b }", - JavaScriptConfig.getEnabledInstance() - )) + .setAggregatorSpecs( + QueryRunnerTestHelper.rowsCount, + new LongSumAggregatorFactory("qlLong", "qualityLong"), + new DoubleSumAggregatorFactory("qlFloat", "qualityLong"), + new JavaScriptAggregatorFactory( + "qlJs", + ImmutableList.of("qualityLong"), + "function(a,b) { return a + b; }", + "function() { return 0; }", + "function(a,b) { return a + b }", + JavaScriptConfig.getEnabledInstance() + ), + new DoubleSumAggregatorFactory("qfFloat", "qualityFloat"), + new LongSumAggregatorFactory("qfLong", "qualityFloat"), + new JavaScriptAggregatorFactory( + "qfJs", + ImmutableList.of("qualityFloat"), + "function(a,b) { return a + b; }", + "function() { return 0; }", + "function(a,b) { return a + b }", + JavaScriptConfig.getEnabledInstance() + ) + ) .setGranularity(QueryRunnerTestHelper.dayGran) .build(); @@ -8172,6 +8212,9 @@ public void testGroupByWithAggsOnNumericDimensions() @Test public void testGroupByNestedOuterExtractionFnOnFloatInner() { + // Cannot vectorize due to extraction dimension spec. + cannotVectorize(); + if (config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V1)) { expectedException.expect(UnsupportedOperationException.class); expectedException.expectMessage("GroupBy v1 only supports dimensions with an outputType of STRING."); @@ -8180,8 +8223,7 @@ public void testGroupByNestedOuterExtractionFnOnFloatInner() String jsFn = "function(obj) { return obj; }"; ExtractionFn jsExtractionFn = new JavaScriptExtractionFn(jsFn, false, JavaScriptConfig.getEnabledInstance()); - GroupByQuery subquery = GroupByQuery - .builder() + GroupByQuery subquery = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("quality", "alias"), new ExtractionDimensionSpec( @@ -8195,8 +8237,7 @@ public void testGroupByNestedOuterExtractionFnOnFloatInner() .setGranularity(QueryRunnerTestHelper.dayGran) .build(); - GroupByQuery outerQuery = GroupByQuery - .builder() + GroupByQuery outerQuery = makeQueryBuilder() .setDataSource(subquery) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("alias", "alias"), new ExtractionDimensionSpec( @@ -8224,13 +8265,15 @@ public void testGroupByNestedOuterExtractionFnOnFloatInner() @Test public void testGroupByNestedDoubleTimeExtractionFnWithLongOutputTypes() { + // Cannot vectorize due to extraction dimension spec. + cannotVectorize(); + if (config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V1)) { expectedException.expect(UnsupportedOperationException.class); expectedException.expectMessage("GroupBy v1 only supports dimensions with an outputType of STRING."); } - GroupByQuery subquery = GroupByQuery - .builder() + GroupByQuery subquery = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions( @@ -8247,8 +8290,7 @@ public void testGroupByNestedDoubleTimeExtractionFnWithLongOutputTypes() .setGranularity(QueryRunnerTestHelper.dayGran) .build(); - GroupByQuery outerQuery = GroupByQuery - .builder() + GroupByQuery outerQuery = makeQueryBuilder() .setDataSource(subquery) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("alias", "alias"), new ExtractionDimensionSpec( @@ -8279,7 +8321,7 @@ public void testGroupByLimitPushDown() if (!config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V2)) { return; } - GroupByQuery query = new GroupByQuery.Builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setGranularity(QueryRunnerTestHelper.allGran).setDimensions(new DefaultDimensionSpec( QueryRunnerTestHelper.marketDimension, @@ -8295,7 +8337,7 @@ public void testGroupByLimitPushDown() 2 ) ).setAggregatorSpecs(QueryRunnerTestHelper.rowsCount) - .setContext(ImmutableMap.of(GroupByQueryConfig.CTX_KEY_FORCE_LIMIT_PUSH_DOWN, true)) + .overrideContext(ImmutableMap.of(GroupByQueryConfig.CTX_KEY_FORCE_LIMIT_PUSH_DOWN, true)) .build(); List expectedResults = Arrays.asList( @@ -8325,8 +8367,7 @@ public void testMergeResultsWithLimitPushDown() if (!config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V2)) { return; } - GroupByQuery.Builder builder = GroupByQuery - .builder() + GroupByQuery.Builder builder = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setInterval("2011-04-02/2011-04-04") .setDimensions(new DefaultDimensionSpec("quality", "alias")) @@ -8337,7 +8378,7 @@ public void testMergeResultsWithLimitPushDown() 5 ) ) - .setContext(ImmutableMap.of(GroupByQueryConfig.CTX_KEY_FORCE_LIMIT_PUSH_DOWN, true)) + .overrideContext(ImmutableMap.of(GroupByQueryConfig.CTX_KEY_FORCE_LIMIT_PUSH_DOWN, true)) .setGranularity(Granularities.ALL); final GroupByQuery allGranQuery = builder.build(); @@ -8392,8 +8433,7 @@ public void testMergeResultsWithLimitPushDownSortByAgg() if (!config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V2)) { return; } - GroupByQuery.Builder builder = GroupByQuery - .builder() + GroupByQuery.Builder builder = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setInterval("2011-04-02/2011-04-04") .setDimensions(new DefaultDimensionSpec("quality", "alias")) @@ -8404,7 +8444,7 @@ public void testMergeResultsWithLimitPushDownSortByAgg() 5 ) ) - .setContext(ImmutableMap.of(GroupByQueryConfig.CTX_KEY_FORCE_LIMIT_PUSH_DOWN, true)) + .overrideContext(ImmutableMap.of(GroupByQueryConfig.CTX_KEY_FORCE_LIMIT_PUSH_DOWN, true)) .setGranularity(Granularities.ALL); final GroupByQuery allGranQuery = builder.build(); @@ -8457,8 +8497,7 @@ public void testMergeResultsWithLimitPushDownSortByDimDim() if (!config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V2)) { return; } - GroupByQuery.Builder builder = GroupByQuery - .builder() + GroupByQuery.Builder builder = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setInterval("2011-04-02/2011-04-04") .setDimensions(new DefaultDimensionSpec("quality", "alias"), new DefaultDimensionSpec("market", "market")) @@ -8472,7 +8511,7 @@ public void testMergeResultsWithLimitPushDownSortByDimDim() 5 ) ) - .setContext( + .overrideContext( ImmutableMap.of(GroupByQueryConfig.CTX_KEY_FORCE_LIMIT_PUSH_DOWN, true) ) .setGranularity(Granularities.ALL); @@ -8527,8 +8566,7 @@ public void testMergeResultsWithLimitPushDownSortByDimAggDim() if (!config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V2)) { return; } - GroupByQuery.Builder builder = GroupByQuery - .builder() + GroupByQuery.Builder builder = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setInterval("2011-04-02/2011-04-04") .setDimensions(new DefaultDimensionSpec("quality", "alias"), new DefaultDimensionSpec("market", "market")) @@ -8552,7 +8590,7 @@ public void testMergeResultsWithLimitPushDownSortByDimAggDim() 5 ) ) - .setContext( + .overrideContext( ImmutableMap.of( GroupByQueryConfig.CTX_KEY_FORCE_LIMIT_PUSH_DOWN, true @@ -8610,7 +8648,7 @@ public void testGroupByLimitPushDownPostAggNotSupported() expectedException.expect(UnsupportedOperationException.class); expectedException.expectMessage("Limit push down when sorting by a post aggregator is not supported."); - GroupByQuery query = new GroupByQuery.Builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setGranularity(QueryRunnerTestHelper.allGran).setDimensions(new DefaultDimensionSpec( QueryRunnerTestHelper.marketDimension, @@ -8629,7 +8667,7 @@ public void testGroupByLimitPushDownPostAggNotSupported() .setPostAggregatorSpecs( Collections.singletonList(new ConstantPostAggregator("constant", 1)) ) - .setContext( + .overrideContext( ImmutableMap.of( GroupByQueryConfig.CTX_KEY_FORCE_LIMIT_PUSH_DOWN, true @@ -8643,8 +8681,7 @@ public void testGroupByLimitPushDownPostAggNotSupported() @Test public void testEmptySubqueryWithLimitPushDown() { - GroupByQuery subquery = GroupByQuery - .builder() + GroupByQuery subquery = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.emptyInterval) .setDimensions(new DefaultDimensionSpec("quality", "alias")) @@ -8652,17 +8689,16 @@ public void testEmptySubqueryWithLimitPushDown() .setLimitSpec( new DefaultLimitSpec( Collections.singletonList(new OrderByColumnSpec( - "alias", - OrderByColumnSpec.Direction.DESCENDING - )), + "alias", + OrderByColumnSpec.Direction.DESCENDING + )), 5 ) ) .setGranularity(QueryRunnerTestHelper.dayGran) .build(); - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(subquery) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setAggregatorSpecs(new DoubleMaxAggregatorFactory("idx", "idx")) @@ -8678,8 +8714,7 @@ public void testEmptySubqueryWithLimitPushDown() @Test public void testSubqueryWithMultipleIntervalsInOuterQueryWithLimitPushDown() { - GroupByQuery subquery = GroupByQuery - .builder() + GroupByQuery subquery = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("quality", "alias")) @@ -8703,8 +8738,7 @@ public void testSubqueryWithMultipleIntervalsInOuterQueryWithLimitPushDown() .setGranularity(QueryRunnerTestHelper.dayGran) .build(); - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(subquery) .setQuerySegmentSpec( new MultipleIntervalSegmentSpec( @@ -8754,8 +8788,7 @@ public void testRejectForceLimitPushDownWithHaving() expectedException.expect(IAE.class); expectedException.expectMessage("Cannot force limit push down when a having spec is present."); - GroupByQuery - .builder() + makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setGranularity(QueryRunnerTestHelper.allGran) .setDimensions(new DefaultDimensionSpec(QueryRunnerTestHelper.marketDimension, "marketalias")) @@ -8767,7 +8800,7 @@ public void testRejectForceLimitPushDownWithHaving() ) ) .setAggregatorSpecs(QueryRunnerTestHelper.rowsCount) - .setContext(ImmutableMap.of(GroupByQueryConfig.CTX_KEY_FORCE_LIMIT_PUSH_DOWN, true)) + .overrideContext(ImmutableMap.of(GroupByQueryConfig.CTX_KEY_FORCE_LIMIT_PUSH_DOWN, true)) .setHavingSpec(new GreaterThanHavingSpec("rows", 10)) .build(); } @@ -8775,13 +8808,15 @@ public void testRejectForceLimitPushDownWithHaving() @Test public void testTypeConversionWithMergingChainedExecutionRunner() { + // Cannot vectorize due to extraction dimension spec. + cannotVectorize(); + if (config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V1)) { expectedException.expect(UnsupportedOperationException.class); expectedException.expectMessage("GroupBy v1 only supports dimensions with an outputType of STRING."); } - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions( @@ -8811,4 +8846,38 @@ public void testTypeConversionWithMergingChainedExecutionRunner() Iterable results = GroupByQueryRunnerTestHelper.runQuery(factory, mergingRunner, query); TestHelper.assertExpectedObjects(expectedResults, results, "type-conversion"); } + + /** + * Use this method instead of makeQueryBuilder() to make sure the context is set properly. Also, avoid + * setContext in tests. Only use overrideContext. + */ + private GroupByQuery.Builder makeQueryBuilder() + { + return GroupByQuery.builder().overrideContext(makeContext()); + } + + /** + * Use this method instead of makeQueryBuilder() to make sure the context is set properly. Also, avoid + * setContext in tests. Only use overrideContext. + */ + private GroupByQuery.Builder makeQueryBuilder(final GroupByQuery query) + { + return new GroupByQuery.Builder(query).overrideContext(makeContext()); + } + + private Map makeContext() + { + return ImmutableMap.builder() + .put("vectorize", vectorize ? "force" : "false") + .put("vectorSize", 16) // Small vector size to ensure we use more than one. + .build(); + } + + private void cannotVectorize() + { + if (vectorize && config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V2)) { + expectedException.expect(IllegalStateException.class); + expectedException.expectMessage("Cannot vectorize!"); + } + } } diff --git a/processing/src/test/java/org/apache/druid/query/groupby/GroupByTimeseriesQueryRunnerTest.java b/processing/src/test/java/org/apache/druid/query/groupby/GroupByTimeseriesQueryRunnerTest.java index 06a3e5eec037..2953c3a7df70 100644 --- a/processing/src/test/java/org/apache/druid/query/groupby/GroupByTimeseriesQueryRunnerTest.java +++ b/processing/src/test/java/org/apache/druid/query/groupby/GroupByTimeseriesQueryRunnerTest.java @@ -21,7 +21,6 @@ import com.google.common.base.Function; import com.google.common.collect.ImmutableList; -import com.google.common.collect.Lists; import com.google.common.util.concurrent.MoreExecutors; import org.apache.druid.data.input.MapBasedRow; import org.apache.druid.data.input.Row; @@ -52,6 +51,8 @@ import org.junit.runners.Parameterized; import java.io.IOException; +import java.util.ArrayList; +import java.util.List; import java.util.Map; /** @@ -68,7 +69,7 @@ public static void teardown() throws IOException } @SuppressWarnings("unchecked") - @Parameterized.Parameters(name = "{0}") + @Parameterized.Parameters(name = "{0}, vectorize = {1}") public static Iterable constructorFeeder() { GroupByQueryConfig config = new GroupByQueryConfig(); @@ -78,74 +79,76 @@ public static Iterable constructorFeeder() ); final GroupByQueryRunnerFactory factory = factoryAndCloser.lhs; resourceCloser.register(factoryAndCloser.rhs); - return QueryRunnerTestHelper.transformToConstructionFeeder( - Lists.transform( - QueryRunnerTestHelper.makeQueryRunners(factory), - new Function, Object>() - { - @Override - public Object apply(final QueryRunner input) + + final List constructors = new ArrayList<>(); + + for (QueryRunner runner : QueryRunnerTestHelper.makeQueryRunners(factory)) { + final QueryRunner modifiedRunner = new QueryRunner() + { + @Override + public Sequence run(QueryPlus queryPlus, Map responseContext) + { + TimeseriesQuery tsQuery = (TimeseriesQuery) queryPlus.getQuery(); + QueryRunner newRunner = factory.mergeRunners( + MoreExecutors.sameThreadExecutor(), ImmutableList.of(runner) + ); + QueryToolChest toolChest = factory.getToolchest(); + + newRunner = new FinalizeResultsQueryRunner<>( + toolChest.mergeResults(toolChest.preMergeQueryDecoration(newRunner)), + toolChest + ); + + GroupByQuery newQuery = GroupByQuery + .builder() + .setDataSource(tsQuery.getDataSource()) + .setQuerySegmentSpec(tsQuery.getQuerySegmentSpec()) + .setGranularity(tsQuery.getGranularity()) + .setDimFilter(tsQuery.getDimensionsFilter()) + .setAggregatorSpecs(tsQuery.getAggregatorSpecs()) + .setPostAggregatorSpecs(tsQuery.getPostAggregatorSpecs()) + .setVirtualColumns(tsQuery.getVirtualColumns()) + .setContext(tsQuery.getContext()) + .build(); + + return Sequences.map( + newRunner.run(queryPlus.withQuery(newQuery), responseContext), + new Function>() { - return new QueryRunner() + @Override + public Result apply(final Row input) { - @Override - public Sequence run(QueryPlus queryPlus, Map responseContext) - { - TimeseriesQuery tsQuery = (TimeseriesQuery) queryPlus.getQuery(); - QueryRunner newRunner = factory.mergeRunners( - MoreExecutors.sameThreadExecutor(), ImmutableList.of(input) - ); - QueryToolChest toolChest = factory.getToolchest(); - - newRunner = new FinalizeResultsQueryRunner<>( - toolChest.mergeResults(toolChest.preMergeQueryDecoration(newRunner)), - toolChest - ); - - GroupByQuery newQuery = GroupByQuery - .builder() - .setDataSource(tsQuery.getDataSource()) - .setQuerySegmentSpec(tsQuery.getQuerySegmentSpec()) - .setGranularity(tsQuery.getGranularity()) - .setDimFilter(tsQuery.getDimensionsFilter()) - .setAggregatorSpecs(tsQuery.getAggregatorSpecs()) - .setPostAggregatorSpecs(tsQuery.getPostAggregatorSpecs()) - .setVirtualColumns(tsQuery.getVirtualColumns()) - .setContext(tsQuery.getContext()) - .build(); - - return Sequences.map( - newRunner.run(queryPlus.withQuery(newQuery), responseContext), - new Function>() - { - @Override - public Result apply(final Row input) - { - MapBasedRow row = (MapBasedRow) input; - - return new Result<>( - row.getTimestamp(), new TimeseriesResultValue(row.getEvent()) - ); - } - } - ); - } - - @Override - public String toString() - { - return input.toString(); - } - }; + MapBasedRow row = (MapBasedRow) input; + + return new Result<>( + row.getTimestamp(), new TimeseriesResultValue(row.getEvent()) + ); + } } - } - ) - ); + ); + } + + @Override + public String toString() + { + return runner.toString(); + } + }; + + for (boolean vectorize : ImmutableList.of(false, true)) { + // Add vectorization tests for any indexes that support it. + if (!vectorize || QueryRunnerTestHelper.isTestRunnerVectorizable(runner)) { + constructors.add(new Object[]{modifiedRunner, vectorize}); + } + } + } + + return constructors; } - public GroupByTimeseriesQueryRunnerTest(QueryRunner runner) + public GroupByTimeseriesQueryRunnerTest(QueryRunner runner, boolean vectorize) { - super(runner, false, QueryRunnerTestHelper.commonDoubleAggregators); + super(runner, false, vectorize, QueryRunnerTestHelper.commonDoubleAggregators); } // GroupBy handles timestamps differently when granularity is ALL diff --git a/processing/src/test/java/org/apache/druid/query/groupby/epinephelinae/BufferArrayGrouperTest.java b/processing/src/test/java/org/apache/druid/query/groupby/epinephelinae/BufferArrayGrouperTest.java index 8f2d3f69a571..98d46fc9d126 100644 --- a/processing/src/test/java/org/apache/druid/query/groupby/epinephelinae/BufferArrayGrouperTest.java +++ b/processing/src/test/java/org/apache/druid/query/groupby/epinephelinae/BufferArrayGrouperTest.java @@ -27,6 +27,7 @@ import com.google.common.primitives.Ints; import org.apache.druid.common.config.NullHandling; import org.apache.druid.data.input.MapBasedRow; +import org.apache.druid.query.aggregation.AggregatorAdapters; import org.apache.druid.query.aggregation.AggregatorFactory; import org.apache.druid.query.aggregation.CountAggregatorFactory; import org.apache.druid.query.aggregation.LongSumAggregatorFactory; @@ -44,7 +45,7 @@ public class BufferArrayGrouperTest public void testAggregate() { final TestColumnSelectorFactory columnSelectorFactory = GrouperTestUtil.newColumnSelectorFactory(); - final IntGrouper grouper = newGrouper(columnSelectorFactory, 1024); + final IntGrouper grouper = newGrouper(columnSelectorFactory, 32768); columnSelectorFactory.setRow(new MapBasedRow(0, ImmutableMap.of("value", 10L))); grouper.aggregate(12); @@ -77,11 +78,13 @@ private BufferArrayGrouper newGrouper( final BufferArrayGrouper grouper = new BufferArrayGrouper( Suppliers.ofInstance(buffer), - columnSelectorFactory, - new AggregatorFactory[]{ - new LongSumAggregatorFactory("valueSum", "value"), - new CountAggregatorFactory("count") - }, + AggregatorAdapters.factorizeBuffered( + columnSelectorFactory, + ImmutableList.of( + new LongSumAggregatorFactory("valueSum", "value"), + new CountAggregatorFactory("count") + ) + ), 1000 ); grouper.init(); diff --git a/processing/src/test/java/org/apache/druid/query/groupby/epinephelinae/BufferHashGrouperTest.java b/processing/src/test/java/org/apache/druid/query/groupby/epinephelinae/BufferHashGrouperTest.java index 64b270d0f4d3..a2275f7f32f7 100644 --- a/processing/src/test/java/org/apache/druid/query/groupby/epinephelinae/BufferHashGrouperTest.java +++ b/processing/src/test/java/org/apache/druid/query/groupby/epinephelinae/BufferHashGrouperTest.java @@ -27,7 +27,7 @@ import com.google.common.primitives.Ints; import org.apache.druid.common.config.NullHandling; import org.apache.druid.data.input.MapBasedRow; -import org.apache.druid.query.aggregation.AggregatorFactory; +import org.apache.druid.query.aggregation.AggregatorAdapters; import org.apache.druid.query.aggregation.CountAggregatorFactory; import org.apache.druid.query.aggregation.LongSumAggregatorFactory; import org.apache.druid.segment.CloserRule; @@ -56,11 +56,13 @@ public void testSimple() final Grouper grouper = new BufferHashGrouper<>( Suppliers.ofInstance(ByteBuffer.allocate(1000)), GrouperTestUtil.intKeySerde(), - columnSelectorFactory, - new AggregatorFactory[]{ - new LongSumAggregatorFactory("valueSum", "value"), - new CountAggregatorFactory("count") - }, + AggregatorAdapters.factorizeBuffered( + columnSelectorFactory, + ImmutableList.of( + new LongSumAggregatorFactory("valueSum", "value"), + new CountAggregatorFactory("count") + ) + ), Integer.MAX_VALUE, 0, 0, @@ -187,11 +189,13 @@ private BufferHashGrouper makeGrouper( final BufferHashGrouper grouper = new BufferHashGrouper<>( Suppliers.ofInstance(buffer), GrouperTestUtil.intKeySerde(), - columnSelectorFactory, - new AggregatorFactory[]{ - new LongSumAggregatorFactory("valueSum", "value"), - new CountAggregatorFactory("count") - }, + AggregatorAdapters.factorizeBuffered( + columnSelectorFactory, + ImmutableList.of( + new LongSumAggregatorFactory("valueSum", "value"), + new CountAggregatorFactory("count") + ) + ), Integer.MAX_VALUE, maxLoadFactor, initialBuckets, diff --git a/processing/src/test/java/org/apache/druid/query/groupby/epinephelinae/LimitedBufferHashGrouperTest.java b/processing/src/test/java/org/apache/druid/query/groupby/epinephelinae/LimitedBufferHashGrouperTest.java index 4ce6f4593eaf..70e1abcde4a3 100644 --- a/processing/src/test/java/org/apache/druid/query/groupby/epinephelinae/LimitedBufferHashGrouperTest.java +++ b/processing/src/test/java/org/apache/druid/query/groupby/epinephelinae/LimitedBufferHashGrouperTest.java @@ -20,12 +20,13 @@ package org.apache.druid.query.groupby.epinephelinae; import com.google.common.base.Suppliers; +import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; import org.apache.druid.common.config.NullHandling; import org.apache.druid.data.input.MapBasedRow; import org.apache.druid.java.util.common.IAE; -import org.apache.druid.query.aggregation.AggregatorFactory; +import org.apache.druid.query.aggregation.AggregatorAdapters; import org.apache.druid.query.aggregation.CountAggregatorFactory; import org.apache.druid.query.aggregation.LongSumAggregatorFactory; import org.junit.Assert; @@ -202,11 +203,13 @@ private static LimitedBufferHashGrouper makeGrouper( LimitedBufferHashGrouper grouper = new LimitedBufferHashGrouper<>( Suppliers.ofInstance(ByteBuffer.allocate(bufferSize)), GrouperTestUtil.intKeySerde(), - columnSelectorFactory, - new AggregatorFactory[]{ - new LongSumAggregatorFactory("valueSum", "value"), - new CountAggregatorFactory("count") - }, + AggregatorAdapters.factorizeBuffered( + columnSelectorFactory, + ImmutableList.of( + new LongSumAggregatorFactory("valueSum", "value"), + new CountAggregatorFactory("count") + ) + ), Integer.MAX_VALUE, 0.5f, initialBuckets, diff --git a/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataQueryTest.java b/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataQueryTest.java index 13e1d3ec4d94..3f9ebc919315 100644 --- a/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataQueryTest.java +++ b/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataQueryTest.java @@ -185,7 +185,7 @@ public SegmentMetadataQueryTest( "placement", new ColumnAnalysis( ValueType.STRING.toString(), - false, + !mmap1, mmap1 ? 10881 : 10764, 1, "preferred", @@ -226,7 +226,7 @@ public SegmentMetadataQueryTest( "placement", new ColumnAnalysis( ValueType.STRING.toString(), - false, + !mmap2, mmap2 ? 10881 : 0, 1, null, @@ -272,7 +272,7 @@ public void testSegmentMetadataQueryWithRollupMerge() "placement", new ColumnAnalysis( ValueType.STRING.toString(), - false, + !mmap1 || !mmap2, 0, 0, null, @@ -340,7 +340,7 @@ public void testSegmentMetadataQueryWithHasMultipleValuesMerge() "placement", new ColumnAnalysis( ValueType.STRING.toString(), - false, + !mmap1 || !mmap2, 0, 1, null, @@ -408,7 +408,7 @@ public void testSegmentMetadataQueryWithComplexColumnMerge() "placement", new ColumnAnalysis( ValueType.STRING.toString(), - false, + !mmap1 || !mmap2, 0, 1, null, @@ -471,7 +471,7 @@ public void testSegmentMetadataQueryWithDefaultAnalysisMerge() { ColumnAnalysis analysis = new ColumnAnalysis( ValueType.STRING.toString(), - false, + !mmap1 || !mmap2, (mmap1 ? 10881 : 10764) + (mmap2 ? 10881 : 10764), 1, "preferred", @@ -486,7 +486,7 @@ public void testSegmentMetadataQueryWithDefaultAnalysisMerge2() { ColumnAnalysis analysis = new ColumnAnalysis( ValueType.STRING.toString(), - false, + !mmap1 || !mmap2, (mmap1 ? 6882 : 6808) + (mmap2 ? 6882 : 6808), 3, "spot", @@ -501,7 +501,7 @@ public void testSegmentMetadataQueryWithDefaultAnalysisMerge3() { ColumnAnalysis analysis = new ColumnAnalysis( ValueType.STRING.toString(), - false, + !mmap1 || !mmap2, (mmap1 ? 9765 : 9660) + (mmap2 ? 9765 : 9660), 9, "automotive", @@ -587,7 +587,7 @@ public void testSegmentMetadataQueryWithNoAnalysisTypesMerge() "placement", new ColumnAnalysis( ValueType.STRING.toString(), - false, + !mmap1 || !mmap2, 0, 0, null, @@ -649,7 +649,7 @@ public void testSegmentMetadataQueryWithAggregatorsMerge() "placement", new ColumnAnalysis( ValueType.STRING.toString(), - false, + !mmap1 || !mmap2, 0, 0, null, @@ -707,7 +707,7 @@ public void testSegmentMetadataQueryWithTimestampSpecMerge() "placement", new ColumnAnalysis( ValueType.STRING.toString(), - false, + !mmap1 || !mmap2, 0, 0, null, @@ -765,7 +765,7 @@ public void testSegmentMetadataQueryWithQueryGranularityMerge() "placement", new ColumnAnalysis( ValueType.STRING.toString(), - false, + !mmap1 || !mmap2, 0, 0, null, diff --git a/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataUnionQueryTest.java b/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataUnionQueryTest.java index e98f1d11f6a5..1e5b3b33db3e 100644 --- a/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataUnionQueryTest.java +++ b/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataUnionQueryTest.java @@ -101,7 +101,7 @@ public void testSegmentMetadataUnionQuery() "placement", new ColumnAnalysis( ValueType.STRING.toString(), - false, + !mmap, mmap ? 43524 : 43056, 1, "preferred", diff --git a/processing/src/test/java/org/apache/druid/query/spec/SpecificSegmentQueryRunnerTest.java b/processing/src/test/java/org/apache/druid/query/spec/SpecificSegmentQueryRunnerTest.java index 930e32175031..3c481a50fce2 100644 --- a/processing/src/test/java/org/apache/druid/query/spec/SpecificSegmentQueryRunnerTest.java +++ b/processing/src/test/java/org/apache/druid/query/spec/SpecificSegmentQueryRunnerTest.java @@ -143,7 +143,7 @@ public void testRetry2() throws Exception ); CountAggregator rows = new CountAggregator(); rows.aggregate(); - builder.addMetric("rows", rows); + builder.addMetric("rows", rows.get()); final Result value = builder.build(); final SpecificSegmentQueryRunner queryRunner = new SpecificSegmentQueryRunner( diff --git a/processing/src/test/java/org/apache/druid/query/timeseries/TimeseriesQueryRunnerTest.java b/processing/src/test/java/org/apache/druid/query/timeseries/TimeseriesQueryRunnerTest.java index 668ec933e138..38a172e7575e 100644 --- a/processing/src/test/java/org/apache/druid/query/timeseries/TimeseriesQueryRunnerTest.java +++ b/processing/src/test/java/org/apache/druid/query/timeseries/TimeseriesQueryRunnerTest.java @@ -66,7 +66,9 @@ import org.joda.time.Interval; import org.joda.time.Period; import org.junit.Assert; +import org.junit.Rule; import org.junit.Test; +import org.junit.rules.ExpectedException; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; @@ -76,19 +78,23 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.stream.Collectors; +import java.util.stream.StreamSupport; /** */ @RunWith(Parameterized.class) public class TimeseriesQueryRunnerTest { - public static final Map CONTEXT = ImmutableMap.of(); - @Parameterized.Parameters(name = "{0}:descending={1}") + @Rule + public ExpectedException expectedException = ExpectedException.none(); + + @Parameterized.Parameters(name = "{0}:descending={1},vectorize={2}") public static Iterable constructorFeeder() { - return QueryRunnerTestHelper.cartesian( + final Iterable baseConstructors = QueryRunnerTestHelper.cartesian( // runners QueryRunnerTestHelper.makeQueryRunners( new TimeseriesQueryRunnerFactory( @@ -101,8 +107,25 @@ public static Iterable constructorFeeder() ), // descending? Arrays.asList(false, true), + // vectorize? + Arrays.asList(false, true), + // double vs. float Arrays.asList(QueryRunnerTestHelper.commonDoubleAggregators, QueryRunnerTestHelper.commonFloatAggregators) ); + + // Add vectorization tests for any indexes that support it. + return StreamSupport + .stream(baseConstructors.spliterator(), false) + .filter( + constructor -> { + boolean canVectorize = + QueryRunnerTestHelper.isTestRunnerVectorizable((QueryRunner) constructor[0]) + && !(boolean) constructor[1] /* descending */; + final boolean vectorize = (boolean) constructor[2]; /* vectorize */ + return !vectorize || canVectorize; + } + ) + .collect(Collectors.toList()); } private void assertExpectedResults(Iterable> expectedResults, Iterable> results) @@ -115,22 +138,28 @@ private void assertExpectedResults(Iterable> expectedResults, Iter protected final QueryRunner runner; protected final boolean descending; + protected final boolean vectorize; private final List aggregatorFactoryList; public TimeseriesQueryRunnerTest( QueryRunner runner, boolean descending, + boolean vectorize, List aggregatorFactoryList ) { this.runner = runner; this.descending = descending; + this.vectorize = vectorize; this.aggregatorFactoryList = aggregatorFactoryList; } @Test public void testEmptyTimeseries() { + // Cannot vectorize due to "doubleFirst" aggregator. + cannotVectorize(); + TimeseriesQuery query = Druids.newTimeseriesQueryBuilder() .dataSource(QueryRunnerTestHelper.dataSource) .granularity(QueryRunnerTestHelper.allGran) @@ -144,6 +173,7 @@ public void testEmptyTimeseries() ) ) .descending(descending) + .context(makeContext()) .build(); Map resultMap = new HashMap<>(); resultMap.put("rows", 0L); @@ -177,6 +207,7 @@ public void testFullOnTimeseries() ) .postAggregators(QueryRunnerTestHelper.addRowsIndexConstant) .descending(descending) + .context(makeContext()) .build(); Iterable> results = runner.run(QueryPlus.wrap(query), CONTEXT).toList(); @@ -277,6 +308,7 @@ public void testTimeseriesNoAggregators() .granularity(gran) .intervals(QueryRunnerTestHelper.fullOnInterval) .descending(descending) + .context(makeContext()) .build(); Iterable> results = runner.run(QueryPlus.wrap(query), CONTEXT).toList(); @@ -302,6 +334,9 @@ public void testTimeseriesNoAggregators() @Test public void testFullOnTimeseriesMaxMin() { + // Cannot vectorize due to "doubleMin", "doubleMax" aggregators. + cannotVectorize(); + TimeseriesQuery query = Druids.newTimeseriesQueryBuilder() .dataSource(QueryRunnerTestHelper.dataSource) .granularity(Granularities.ALL) @@ -313,6 +348,7 @@ public void testFullOnTimeseriesMaxMin() ) ) .descending(descending) + .context(makeContext()) .build(); DateTime expectedEarliest = DateTimes.of("2011-01-12"); @@ -349,6 +385,7 @@ public void testFullOnTimeseriesWithFilter() ) ) .descending(descending) + .context(makeContext()) .build(); Assert.assertEquals( @@ -405,6 +442,7 @@ public void testTimeseries() ) ) .descending(descending) + .context(makeContext()) .build(); List> expectedResults = Arrays.asList( @@ -575,6 +613,9 @@ public void testTimeseriesIntervalOutOfRanges() @Test public void testTimeseriesWithVirtualColumn() { + // Cannot vectorize due to virtual columns. + cannotVectorize(); + TimeseriesQuery query = Druids.newTimeseriesQueryBuilder() .dataSource(QueryRunnerTestHelper.dataSource) .granularity(QueryRunnerTestHelper.dayGran) @@ -595,6 +636,7 @@ public void testTimeseriesWithVirtualColumn() TestExprMacroTable.INSTANCE ) ) + .context(makeContext()) .build(); List> expectedResults = Arrays.asList( @@ -640,6 +682,7 @@ public void testTimeseriesWithTimeZone() ) ) .descending(descending) + .context(makeContext()) .build(); List> expectedResults = Arrays.asList( @@ -684,6 +727,7 @@ public void testTimeseriesWithVaryingGran() ) ) .descending(descending) + .context(makeContext()) .build(); List> expectedResults1 = Collections.singletonList( @@ -716,6 +760,7 @@ public void testTimeseriesWithVaryingGran() QueryRunnerTestHelper.qualityUniques ) ) + .context(makeContext()) .build(); List> expectedResults2 = Collections.singletonList( @@ -759,6 +804,7 @@ public void testTimeseriesGranularityNotAlignedOnSegmentBoundariesWithFilter() ) ) .descending(descending) + .context(makeContext()) .build(); List> expectedResults1 = Arrays.asList( @@ -802,6 +848,7 @@ public void testTimeseriesQueryZeroFilling() ) ) .descending(descending) + .context(makeContext()) .build(); List> lotsOfZeroes = new ArrayList<>(); @@ -870,6 +917,7 @@ public void testTimeseriesQueryGranularityNotAlignedWithRollupGranularity() ) ) .descending(descending) + .context(makeContext()) .build(); List> expectedResults1 = Collections.singletonList( @@ -908,6 +956,7 @@ public void testTimeseriesWithVaryingGranWithFilter() ) ) .descending(descending) + .context(makeContext()) .build(); List> expectedResults1 = Collections.singletonList( @@ -940,6 +989,7 @@ public void testTimeseriesWithVaryingGranWithFilter() QueryRunnerTestHelper.qualityUniques ) ) + .context(makeContext()) .build(); List> expectedResults2 = Collections.singletonList( @@ -976,6 +1026,7 @@ public void testTimeseriesQueryBeyondTimeRangeOfData() ) ) .descending(descending) + .context(makeContext()) .build(); List> expectedResults = Collections.emptyList(); @@ -999,6 +1050,7 @@ public void testTimeseriesWithOrFilter() ) .postAggregators(QueryRunnerTestHelper.addRowsIndexConstant) .descending(descending) + .context(makeContext()) .build(); List> expectedResults = Arrays.asList( @@ -1036,16 +1088,21 @@ public void testTimeseriesWithRegexFilter() TimeseriesQuery query = Druids.newTimeseriesQueryBuilder() .dataSource(QueryRunnerTestHelper.dataSource) .granularity(QueryRunnerTestHelper.dayGran) - .filters(new RegexDimFilter(QueryRunnerTestHelper.marketDimension, "^.p.*$", null)) // spot and upfront - .intervals(QueryRunnerTestHelper.firstToThird) - .aggregators( - QueryRunnerTestHelper.rowsCount, - QueryRunnerTestHelper.indexLongSum, - QueryRunnerTestHelper.qualityUniques - ) - .postAggregators(QueryRunnerTestHelper.addRowsIndexConstant) - .descending(descending) - .build(); + .filters(new RegexDimFilter( + QueryRunnerTestHelper.marketDimension, + "^.p.*$", + null + )) // spot and upfront + .intervals(QueryRunnerTestHelper.firstToThird) + .aggregators( + QueryRunnerTestHelper.rowsCount, + QueryRunnerTestHelper.indexLongSum, + QueryRunnerTestHelper.qualityUniques + ) + .postAggregators(QueryRunnerTestHelper.addRowsIndexConstant) + .descending(descending) + .context(makeContext()) + .build(); List> expectedResults = Arrays.asList( new Result<>( @@ -1091,6 +1148,7 @@ public void testTimeseriesWithFilter1() ) .postAggregators(QueryRunnerTestHelper.addRowsIndexConstant) .descending(descending) + .context(makeContext()) .build(); List> expectedResults = Arrays.asList( @@ -1137,6 +1195,7 @@ public void testTimeseriesWithFilter2() ) .postAggregators(QueryRunnerTestHelper.addRowsIndexConstant) .descending(descending) + .context(makeContext()) .build(); List> expectedResults = Arrays.asList( @@ -1183,6 +1242,7 @@ public void testTimeseriesWithFilter3() ) .postAggregators(QueryRunnerTestHelper.addRowsIndexConstant) .descending(descending) + .context(makeContext()) .build(); List> expectedResults = Arrays.asList( @@ -1229,6 +1289,7 @@ public void testTimeseriesWithMultiDimFilterAndOr() .aggregators(aggregatorFactoryList) .postAggregators(QueryRunnerTestHelper.addRowsIndexConstant) .descending(descending) + .context(makeContext()) .build(); List> expectedResults = Arrays.asList( @@ -1275,6 +1336,7 @@ public void testTimeseriesWithMultiDimFilter() .aggregators(aggregatorFactoryList) .postAggregators(QueryRunnerTestHelper.addRowsIndexConstant) .descending(descending) + .context(makeContext()) .build(); List> expectedResults = Arrays.asList( @@ -1321,6 +1383,7 @@ public void testTimeseriesWithOtherMultiDimFilter() .aggregators(QueryRunnerTestHelper.commonDoubleAggregators) .postAggregators(QueryRunnerTestHelper.addRowsIndexConstant) .descending(descending) + .context(makeContext()) .build(); List> expectedResults = Arrays.asList( @@ -1373,6 +1436,7 @@ public void testTimeseriesWithNonExistentFilterInOr() ) .postAggregators(QueryRunnerTestHelper.addRowsIndexConstant) .descending(descending) + .context(makeContext()) .build(); List> expectedResults = Arrays.asList( @@ -1431,6 +1495,7 @@ public void testTimeseriesWithInFilter() ) .postAggregators(QueryRunnerTestHelper.addRowsIndexConstant) .descending(descending) + .context(makeContext()) .build(); List> expectedResults = Arrays.asList( @@ -1477,6 +1542,7 @@ public void testTimeseriesWithNonExistentFilterAndMultiDimAndOr() .aggregators(aggregatorFactoryList) .postAggregators(QueryRunnerTestHelper.addRowsIndexConstant) .descending(descending) + .context(makeContext()) .build(); List> expectedResults = Arrays.asList( @@ -1519,6 +1585,7 @@ public void testTimeseriesWithFilterOnNonExistentDimension() .aggregators(aggregatorFactoryList) .postAggregators(QueryRunnerTestHelper.addRowsIndexConstant) .descending(descending) + .context(makeContext()) .build(); Map resultMap = new HashMap<>(); @@ -1558,11 +1625,13 @@ public void testTimeseriesWithFilterOnNonExistentDimensionSkipBuckets() .postAggregators(QueryRunnerTestHelper.addRowsIndexConstant) .context(ImmutableMap.of("skipEmptyBuckets", "true")) .descending(descending) + .context(makeContext(ImmutableMap.of("skipEmptyBuckets", "true"))) .build(); List> expectedResults = Collections.emptyList(); - Iterable> results = runner.run(QueryPlus.wrap(query), new HashMap()).toList(); + Iterable> results = runner.run(QueryPlus.wrap(query), new HashMap()) + .toList(); assertExpectedResults(expectedResults, results); } @@ -1577,6 +1646,7 @@ public void testTimeseriesWithNullFilterOnNonExistentDimension() .aggregators(aggregatorFactoryList) .postAggregators(QueryRunnerTestHelper.addRowsIndexConstant) .descending(descending) + .context(makeContext()) .build(); List> expectedResults = Arrays.asList( @@ -1604,7 +1674,8 @@ public void testTimeseriesWithNullFilterOnNonExistentDimension() ) ); - Iterable> results = runner.run(QueryPlus.wrap(query), new HashMap()).toList(); + Iterable> results = runner.run(QueryPlus.wrap(query), new HashMap()) + .toList(); assertExpectedResults(expectedResults, results); } @@ -1619,6 +1690,7 @@ public void testTimeseriesWithInvertedFilterOnNonExistentDimension() .aggregators(aggregatorFactoryList) .postAggregators(QueryRunnerTestHelper.addRowsIndexConstant) .descending(descending) + .context(makeContext()) .build(); List> expectedResults = Arrays.asList( @@ -1646,7 +1718,8 @@ public void testTimeseriesWithInvertedFilterOnNonExistentDimension() ) ); - Iterable> results = runner.run(QueryPlus.wrap(query), new HashMap()).toList(); + Iterable> results = runner.run(QueryPlus.wrap(query), new HashMap()) + .toList(); assertExpectedResults(expectedResults, results); } @@ -1661,6 +1734,7 @@ public void testTimeseriesWithNonExistentFilter() .aggregators(aggregatorFactoryList) .postAggregators(QueryRunnerTestHelper.addRowsIndexConstant) .descending(descending) + .context(makeContext()) .build(); Map resultMap = new HashMap<>(); resultMap.put("rows", 0L); @@ -1702,6 +1776,7 @@ public void testTimeseriesWithNonExistentFilterAndMultiDim() .aggregators(aggregatorFactoryList) .postAggregators(QueryRunnerTestHelper.addRowsIndexConstant) .descending(descending) + .context(makeContext()) .build(); Map resultMap = new HashMap<>(); resultMap.put("rows", 0L); @@ -1731,6 +1806,9 @@ public void testTimeseriesWithNonExistentFilterAndMultiDim() @Test public void testTimeseriesWithMultiValueFilteringJavascriptAggregator() { + // Cannot vectorize due to JavaScript aggregators. + cannotVectorize(); + TimeseriesQuery query = Druids.newTimeseriesQueryBuilder() .dataSource(QueryRunnerTestHelper.dataSource) .granularity(QueryRunnerTestHelper.allGran) @@ -1743,6 +1821,7 @@ public void testTimeseriesWithMultiValueFilteringJavascriptAggregator() ) ) .descending(descending) + .context(makeContext()) .build(); Iterable> expectedResults = ImmutableList.of( @@ -1765,6 +1844,9 @@ public void testTimeseriesWithMultiValueFilteringJavascriptAggregator() @Test public void testTimeseriesWithMultiValueFilteringJavascriptAggregatorAndAlsoRegularFilters() { + // Cannot vectorize due to JavaScript aggregators. + cannotVectorize(); + TimeseriesQuery query = Druids.newTimeseriesQueryBuilder() .dataSource(QueryRunnerTestHelper.dataSource) .granularity(QueryRunnerTestHelper.allGran) @@ -1778,6 +1860,7 @@ public void testTimeseriesWithMultiValueFilteringJavascriptAggregatorAndAlsoRegu ) ) .descending(descending) + .context(makeContext()) .build(); List> expectedResults = ImmutableList.of( @@ -1800,6 +1883,9 @@ public void testTimeseriesWithMultiValueFilteringJavascriptAggregatorAndAlsoRegu @Test public void testTimeseriesWithFirstLastAggregator() { + // Cannot vectorize due to "doubleFirst", "doubleLast" aggregators. + cannotVectorize(); + TimeseriesQuery query = Druids.newTimeseriesQueryBuilder() .dataSource(QueryRunnerTestHelper.dataSource) .granularity(QueryRunnerTestHelper.monthGran) @@ -1811,6 +1897,7 @@ public void testTimeseriesWithFirstLastAggregator() ) ) .descending(descending) + .context(makeContext()) .build(); // There's a difference between ascending and descending results since granularity of druid.sample.tsv is days, @@ -1914,6 +2001,7 @@ public void testTimeseriesWithMultiValueDimFilter1() .aggregators(aggregatorFactoryList) .postAggregators(QueryRunnerTestHelper.addRowsIndexConstant) .descending(descending) + .context(makeContext()) .build(); TimeseriesQuery query1 = Druids @@ -1924,6 +2012,7 @@ public void testTimeseriesWithMultiValueDimFilter1() .aggregators(aggregatorFactoryList) .postAggregators(QueryRunnerTestHelper.addRowsIndexConstant) .descending(descending) + .context(makeContext()) .build(); Iterable> expectedResults = runner.run(QueryPlus.wrap(query1), CONTEXT).toList(); Iterable> actualResults = runner.run(QueryPlus.wrap(query), CONTEXT).toList(); @@ -1942,6 +2031,7 @@ public void testTimeseriesWithMultiValueDimFilter2() .aggregators(aggregatorFactoryList) .postAggregators(QueryRunnerTestHelper.addRowsIndexConstant) .descending(descending) + .context(makeContext()) .build(); TimeseriesQuery query1 = Druids @@ -1953,6 +2043,7 @@ public void testTimeseriesWithMultiValueDimFilter2() .aggregators(aggregatorFactoryList) .postAggregators(QueryRunnerTestHelper.addRowsIndexConstant) .descending(descending) + .context(makeContext()) .build(); Iterable> expectedResults = runner.run(QueryPlus.wrap(query1), CONTEXT).toList(); Iterable> actualResults = runner.run(QueryPlus.wrap(query), CONTEXT).toList(); @@ -1975,6 +2066,7 @@ public void testTimeseriesWithMultiValueDimFilterAndOr1() .aggregators(aggregatorFactoryList) .postAggregators(QueryRunnerTestHelper.addRowsIndexConstant) .descending(descending) + .context(makeContext()) .build(); AndDimFilter andDimFilter2 = new AndDimFilter( @@ -1991,6 +2083,7 @@ public void testTimeseriesWithMultiValueDimFilterAndOr1() .aggregators(aggregatorFactoryList) .postAggregators(QueryRunnerTestHelper.addRowsIndexConstant) .descending(descending) + .context(makeContext()) .build(); Iterable> expectedResults = runner.run(QueryPlus.wrap(query2), CONTEXT).toList(); Iterable> actualResults = runner.run(QueryPlus.wrap(query), CONTEXT).toList(); @@ -2013,6 +2106,7 @@ public void testTimeseriesWithMultiValueDimFilterAndOr2() .aggregators(aggregatorFactoryList) .postAggregators(QueryRunnerTestHelper.addRowsIndexConstant) .descending(descending) + .context(makeContext()) .build(); AndDimFilter andDimFilter2 = new AndDimFilter( @@ -2029,6 +2123,7 @@ public void testTimeseriesWithMultiValueDimFilterAndOr2() .aggregators(aggregatorFactoryList) .postAggregators(QueryRunnerTestHelper.addRowsIndexConstant) .descending(descending) + .context(makeContext()) .build(); Iterable> expectedResults = runner.run(QueryPlus.wrap(query2), CONTEXT).toList(); Iterable> actualResults = runner.run(QueryPlus.wrap(query), CONTEXT).toList(); @@ -2048,14 +2143,15 @@ public void testTimeSeriesWithFilteredAgg() Iterables.concat( aggregatorFactoryList, Collections.singletonList(new FilteredAggregatorFactory( - new CountAggregatorFactory("filteredAgg"), - new SelectorDimFilter(QueryRunnerTestHelper.marketDimension, "spot", null) - )) + new CountAggregatorFactory("filteredAgg"), + new SelectorDimFilter(QueryRunnerTestHelper.marketDimension, "spot", null) + )) ) ) ) .postAggregators(QueryRunnerTestHelper.addRowsIndexConstant) .descending(descending) + .context(makeContext()) .build(); Iterable> actualResults = runner.run(QueryPlus.wrap(query), CONTEXT).toList(); @@ -2100,6 +2196,7 @@ public void testTimeSeriesWithFilteredAggDimensionNotPresentNotNullValue() ) .postAggregators(QueryRunnerTestHelper.addRowsIndexConstant) .descending(descending) + .context(makeContext()) .build(); Iterable> actualResults = runner.run(QueryPlus.wrap(query), CONTEXT).toList(); @@ -2145,6 +2242,7 @@ public void testTimeSeriesWithFilteredAggDimensionNotPresentNullValue() ) .postAggregators(QueryRunnerTestHelper.addRowsIndexConstant) .descending(descending) + .context(makeContext()) .build(); Iterable> actualResults = runner.run(QueryPlus.wrap(query), CONTEXT).toList(); @@ -2192,6 +2290,7 @@ public void testTimeSeriesWithFilteredAggValueNotPresent() ) .postAggregators(QueryRunnerTestHelper.addRowsIndexConstant) .descending(descending) + .context(makeContext()) .build(); Iterable> actualResults = runner.run(QueryPlus.wrap(query), CONTEXT).toList(); @@ -2236,6 +2335,7 @@ public void testTimeSeriesWithFilteredAggInvertedNullValue() ) .postAggregators(QueryRunnerTestHelper.addRowsIndexConstant) .descending(descending) + .context(makeContext()) .build(); Iterable> actualResults = runner.run(QueryPlus.wrap(query), CONTEXT).toList(); @@ -2260,6 +2360,9 @@ public void testTimeSeriesWithFilteredAggInvertedNullValue() @Test public void testTimeseriesWithTimeColumn() { + // Cannot vectorize due to JavaScript aggregators. + cannotVectorize(); + TimeseriesQuery query = Druids.newTimeseriesQueryBuilder() .dataSource(QueryRunnerTestHelper.dataSource) .intervals(QueryRunnerTestHelper.firstToThird) @@ -2270,6 +2373,7 @@ public void testTimeseriesWithTimeColumn() ) .granularity(QueryRunnerTestHelper.allGran) .descending(descending) + .context(makeContext()) .build(); List> expectedResults = Collections.singletonList( @@ -2342,6 +2446,7 @@ public void testTimeseriesWithBoundFilter1() QueryRunnerTestHelper.qualityUniques ) .postAggregators(QueryRunnerTestHelper.addRowsIndexConstant) + .context(makeContext()) .build(); List> expectedResults = Arrays.asList( @@ -2385,7 +2490,11 @@ public void testTimeSeriesWithSelectionFilterLookupExtractionFn() .dataSource(QueryRunnerTestHelper.dataSource) .granularity(QueryRunnerTestHelper.dayGran) .filters( - new SelectorDimFilter(QueryRunnerTestHelper.marketDimension, "upfront", lookupExtractionFn) + new SelectorDimFilter( + QueryRunnerTestHelper.marketDimension, + "upfront", + lookupExtractionFn + ) ) .intervals(QueryRunnerTestHelper.firstToThird) .aggregators( @@ -2394,6 +2503,7 @@ public void testTimeSeriesWithSelectionFilterLookupExtractionFn() QueryRunnerTestHelper.qualityUniques ) .postAggregators(QueryRunnerTestHelper.addRowsIndexConstant) + .context(makeContext()) .build(); List> expectedResults = Arrays.asList( @@ -2451,6 +2561,7 @@ public void testTimeseriesWithLimit() ) .descending(descending) .limit(10) + .context(makeContext()) .build(); // Must create a toolChest so we can run mergeResults. @@ -2467,4 +2578,26 @@ public void testTimeseriesWithLimit() final List list = finalRunner.run(QueryPlus.wrap(query), CONTEXT).toList(); Assert.assertEquals(10, list.size()); } + + private Map makeContext() + { + return makeContext(ImmutableMap.of()); + } + + private Map makeContext(final Map myContext) + { + final Map context = new HashMap<>(); + context.put("vectorize", vectorize ? "force" : "false"); + context.put("vectorSize", 16); // Small vector size to ensure we use more than one. + context.putAll(myContext); + return context; + } + + private void cannotVectorize() + { + if (vectorize) { + expectedException.expect(IllegalStateException.class); + expectedException.expectMessage("Cannot vectorize!"); + } + } } diff --git a/processing/src/test/java/org/apache/druid/segment/data/CompressedFloatsSerdeTest.java b/processing/src/test/java/org/apache/druid/segment/data/CompressedFloatsSerdeTest.java index 7d7bd6f851e7..1c9d1dc7c617 100644 --- a/processing/src/test/java/org/apache/druid/segment/data/CompressedFloatsSerdeTest.java +++ b/processing/src/test/java/org/apache/druid/segment/data/CompressedFloatsSerdeTest.java @@ -144,7 +144,7 @@ public void testWithValues(float[] values) throws Exception private void tryFill(ColumnarFloats indexed, float[] vals, final int startIndex, final int size) { float[] filled = new float[size]; - indexed.fill(startIndex, filled); + indexed.get(filled, startIndex, filled.length); for (int i = startIndex; i < filled.length; i++) { Assert.assertEquals(vals[i + startIndex], filled[i], DELTA); diff --git a/processing/src/test/java/org/apache/druid/segment/data/CompressedLongsSerdeTest.java b/processing/src/test/java/org/apache/druid/segment/data/CompressedLongsSerdeTest.java index 0f65d6515b83..f8341ed51b06 100644 --- a/processing/src/test/java/org/apache/druid/segment/data/CompressedLongsSerdeTest.java +++ b/processing/src/test/java/org/apache/druid/segment/data/CompressedLongsSerdeTest.java @@ -167,7 +167,7 @@ public void testValues(long[] values) throws Exception private void tryFill(ColumnarLongs indexed, long[] vals, final int startIndex, final int size) { long[] filled = new long[size]; - indexed.fill(startIndex, filled); + indexed.get(filled, startIndex, size); for (int i = startIndex; i < filled.length; i++) { Assert.assertEquals(vals[i + startIndex], filled[i]); diff --git a/processing/src/test/java/org/apache/druid/segment/filter/BaseFilterTest.java b/processing/src/test/java/org/apache/druid/segment/filter/BaseFilterTest.java index 9200ec48b921..335ca490319e 100644 --- a/processing/src/test/java/org/apache/druid/segment/filter/BaseFilterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/filter/BaseFilterTest.java @@ -26,6 +26,7 @@ import com.google.common.collect.Iterables; import org.apache.druid.common.guava.SettableSupplier; import org.apache.druid.data.input.InputRow; +import org.apache.druid.java.util.common.ISE; import org.apache.druid.java.util.common.Intervals; import org.apache.druid.java.util.common.Pair; import org.apache.druid.java.util.common.StringUtils; @@ -36,12 +37,14 @@ import org.apache.druid.query.aggregation.Aggregator; import org.apache.druid.query.aggregation.CountAggregatorFactory; import org.apache.druid.query.aggregation.FilteredAggregatorFactory; +import org.apache.druid.query.aggregation.VectorAggregator; import org.apache.druid.query.dimension.DefaultDimensionSpec; import org.apache.druid.query.expression.TestExprMacroTable; import org.apache.druid.query.filter.BitmapIndexSelector; import org.apache.druid.query.filter.DimFilter; import org.apache.druid.query.filter.Filter; import org.apache.druid.query.filter.ValueMatcher; +import org.apache.druid.query.filter.vector.VectorValueMatcher; import org.apache.druid.query.groupby.RowBasedColumnSelectorFactory; import org.apache.druid.segment.ColumnSelector; import org.apache.druid.segment.ColumnSelectorFactory; @@ -60,6 +63,9 @@ import org.apache.druid.segment.data.RoaringBitmapSerdeFactory; import org.apache.druid.segment.incremental.IncrementalIndex; import org.apache.druid.segment.incremental.IncrementalIndexStorageAdapter; +import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector; +import org.apache.druid.segment.vector.VectorColumnSelectorFactory; +import org.apache.druid.segment.vector.VectorCursor; import org.apache.druid.segment.virtual.ExpressionVirtualColumn; import org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMediumFactory; import org.apache.druid.segment.writeout.SegmentWriteOutMediumFactory; @@ -71,7 +77,9 @@ import org.junit.runners.Parameterized; import java.io.Closeable; +import java.nio.ByteBuffer; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collection; import java.util.HashMap; import java.util.List; @@ -94,11 +102,12 @@ public abstract class BaseFilterTest protected final IndexBuilder indexBuilder; protected final Function> finisher; - protected StorageAdapter adapter; - protected boolean cnf; - protected boolean optimize; + protected final boolean cnf; + protected final boolean optimize; protected final String testName; + protected StorageAdapter adapter; + // JUnit creates a new test instance for every test method call. // For filter tests, the test setup creates a segment. // Creating a new segment for every test method call is pretty slow, so cache the StorageAdapters. @@ -204,10 +213,11 @@ public static Collection makeConstructors() for (boolean cnf : ImmutableList.of(false, true)) { for (boolean optimize : ImmutableList.of(false, true)) { final String testName = StringUtils.format( - "bitmaps[%s], indexMerger[%s], finisher[%s], optimize[%s]", + "bitmaps[%s], indexMerger[%s], finisher[%s], cnf[%s], optimize[%s]", bitmapSerdeFactoryEntry.getKey(), segmentWriteOutMediumFactoryEntry.getKey(), finisherEntry.getKey(), + cnf, optimize ); final IndexBuilder indexBuilder = IndexBuilder @@ -256,6 +266,20 @@ private Sequence makeCursorSequence(final Filter filter) ); } + private VectorCursor makeVectorCursor(final Filter filter) + { + return adapter.makeVectorCursor( + filter, + Intervals.ETERNITY, + // VirtualColumns do not support vectorization yet. Avoid passing them in, and any tests that need virtual + // columns should skip vectorization tests. + VirtualColumns.EMPTY, + false, + 3, // Vector size smaller than the number of rows, to ensure we use more than one. + null + ); + } + /** * Selects elements from "selectColumn" from rows matching a filter. selectColumn must be a single valued dimension. */ @@ -291,30 +315,66 @@ public List apply(Cursor input) private long selectCountUsingFilteredAggregator(final DimFilter filter) { - final Sequence cursors = makeCursorSequence(makeFilter(filter)); + final Sequence cursors = makeCursorSequence(null); Sequence aggSeq = Sequences.map( cursors, - new Function() - { - @Override - public Aggregator apply(Cursor input) - { - Aggregator agg = new FilteredAggregatorFactory( - new CountAggregatorFactory("count"), - maybeOptimize(filter) - ).factorize(input.getColumnSelectorFactory()); - - for (; !input.isDone(); input.advance()) { - agg.aggregate(); - } - - return agg; + cursor -> { + Aggregator agg = new FilteredAggregatorFactory( + new CountAggregatorFactory("count"), + maybeOptimize(filter) + ).factorize(cursor.getColumnSelectorFactory()); + + for (; !cursor.isDone(); cursor.advance()) { + agg.aggregate(); } + + return agg; } ); return aggSeq.toList().get(0).getLong(); } + private long selectCountUsingVectorizedFilteredAggregator(final DimFilter dimFilter) + { + Preconditions.checkState(makeFilter(dimFilter).canVectorizeMatcher(), "Cannot vectorize filter: %s", dimFilter); + + try (final VectorCursor cursor = makeVectorCursor(null)) { + final FilteredAggregatorFactory aggregatorFactory = new FilteredAggregatorFactory( + new CountAggregatorFactory("count"), + maybeOptimize(dimFilter) + ); + final VectorAggregator aggregator = aggregatorFactory.factorizeVector(cursor.getColumnSelectorFactory()); + final ByteBuffer buf = ByteBuffer.allocate(aggregatorFactory.getMaxIntermediateSizeWithNulls() * 2); + + // Use two slots: one for each form of aggregate. + aggregator.init(buf, 0); + aggregator.init(buf, aggregatorFactory.getMaxIntermediateSizeWithNulls()); + + for (; !cursor.isDone(); cursor.advance()) { + aggregator.aggregate(buf, 0, 0, cursor.getCurrentVectorSize()); + + final int[] positions = new int[cursor.getCurrentVectorSize()]; + Arrays.fill(positions, aggregatorFactory.getMaxIntermediateSizeWithNulls()); + + final int[] allRows = new int[cursor.getCurrentVectorSize()]; + for (int i = 0; i < allRows.length; i++) { + allRows[i] = i; + } + + aggregator.aggregate(buf, cursor.getCurrentVectorSize(), positions, allRows, 0); + } + + final long val1 = (long) aggregator.get(buf, 0); + final long val2 = (long) aggregator.get(buf, aggregatorFactory.getMaxIntermediateSizeWithNulls()); + + if (val1 != val2) { + throw new ISE("Oh no, val1[%d] != val2[%d]", val1, val2); + } + + return val1; + } + } + private List selectColumnValuesMatchingFilterUsingPostFiltering( final DimFilter filter, final String selectColumn @@ -382,6 +442,100 @@ public List apply(Cursor input) return seq.toList().get(0); } + private List selectColumnValuesMatchingFilterUsingVectorizedPostFiltering( + final DimFilter filter, + final String selectColumn + ) + { + final Filter theFilter = makeFilter(filter); + final Filter postFilteringFilter = new Filter() + { + @Override + public T getBitmapResult(BitmapIndexSelector selector, BitmapResultFactory bitmapResultFactory) + { + throw new UnsupportedOperationException(); + } + + @Override + public ValueMatcher makeMatcher(ColumnSelectorFactory factory) + { + return theFilter.makeMatcher(factory); + } + + @Override + public boolean supportsBitmapIndex(BitmapIndexSelector selector) + { + return false; + } + + @Override + public VectorValueMatcher makeVectorMatcher(VectorColumnSelectorFactory factory) + { + return theFilter.makeVectorMatcher(factory); + } + + @Override + public boolean canVectorizeMatcher() + { + return theFilter.canVectorizeMatcher(); + } + + @Override + public boolean supportsSelectivityEstimation(ColumnSelector columnSelector, BitmapIndexSelector indexSelector) + { + return false; + } + + @Override + public double estimateSelectivity(BitmapIndexSelector indexSelector) + { + return 1.0; + } + }; + + try (final VectorCursor cursor = makeVectorCursor(postFilteringFilter)) { + final SingleValueDimensionVectorSelector selector = cursor + .getColumnSelectorFactory() + .makeSingleValueDimensionSelector(new DefaultDimensionSpec(selectColumn, selectColumn)); + + final List values = new ArrayList<>(); + + while (!cursor.isDone()) { + final int[] rowVector = selector.getRowVector(); + for (int i = 0; i < cursor.getCurrentVectorSize(); i++) { + values.add(selector.lookupName(rowVector[i])); + } + cursor.advance(); + } + + return values; + } + } + + private List selectColumnValuesMatchingFilterUsingVectorCursor( + final DimFilter filter, + final String selectColumn + ) + { + try (final VectorCursor cursor = makeVectorCursor(makeFilter(filter))) { + final SingleValueDimensionVectorSelector selector = cursor + .getColumnSelectorFactory() + .makeSingleValueDimensionSelector(new DefaultDimensionSpec(selectColumn, selectColumn)); + + final List values = new ArrayList<>(); + + while (!cursor.isDone()) { + final int[] rowVector = selector.getRowVector(); + for (int i = 0; i < cursor.getCurrentVectorSize(); i++) { + values.add(selector.lookupName(rowVector[i])); + } + cursor.advance(); + } + + return values; + } + } + private List selectColumnValuesMatchingFilterUsingRowBasedColumnSelectorFactory( final DimFilter filter, final String selectColumn @@ -412,22 +566,68 @@ protected void assertFilterMatches( final DimFilter filter, final List expectedRows ) + { + // IncrementalIndex cannot ever vectorize. + final boolean testVectorized = !(adapter instanceof IncrementalIndexStorageAdapter); + assertFilterMatches(filter, expectedRows, testVectorized); + } + + protected void assertFilterMatchesSkipVectorize( + final DimFilter filter, + final List expectedRows + ) + { + assertFilterMatches(filter, expectedRows, false); + } + + private void assertFilterMatches( + final DimFilter filter, + final List expectedRows, + final boolean testVectorized + ) { Assert.assertEquals( "Cursor: " + filter, expectedRows, selectColumnValuesMatchingFilter(filter, "dim0") ); + + if (testVectorized) { + Assert.assertEquals( + "Cursor (vectorized): " + filter, + expectedRows, + selectColumnValuesMatchingFilterUsingVectorCursor(filter, "dim0") + ); + } + Assert.assertEquals( "Cursor with postFiltering: " + filter, expectedRows, selectColumnValuesMatchingFilterUsingPostFiltering(filter, "dim0") ); + + if (testVectorized) { + Assert.assertEquals( + "Cursor with postFiltering (vectorized): " + filter, + expectedRows, + selectColumnValuesMatchingFilterUsingVectorizedPostFiltering(filter, "dim0") + ); + } + Assert.assertEquals( "Filtered aggregator: " + filter, expectedRows.size(), selectCountUsingFilteredAggregator(filter) ); + + if (testVectorized) { + Assert.assertEquals( + "Filtered aggregator (vectorized): " + filter, + expectedRows.size(), + selectCountUsingVectorizedFilteredAggregator(filter) + ); + } + Assert.assertEquals( "RowBasedColumnSelectorFactory: " + filter, expectedRows, diff --git a/processing/src/test/java/org/apache/druid/segment/filter/BoundFilterTest.java b/processing/src/test/java/org/apache/druid/segment/filter/BoundFilterTest.java index 6e0a3cf050c0..0da79d0fd9ff 100644 --- a/processing/src/test/java/org/apache/druid/segment/filter/BoundFilterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/filter/BoundFilterTest.java @@ -436,12 +436,12 @@ public void testNumericMatchTooStrict() @Test public void testNumericMatchVirtualColumn() { - assertFilterMatches( + assertFilterMatchesSkipVectorize( new BoundDimFilter("expr", "1", "2", false, false, false, null, StringComparators.NUMERIC), ImmutableList.of("0", "1", "2", "3", "4", "5", "6", "7") ); - assertFilterMatches( + assertFilterMatchesSkipVectorize( new BoundDimFilter("expr", "2", "3", false, false, false, null, StringComparators.NUMERIC), ImmutableList.of() ); diff --git a/processing/src/test/java/org/apache/druid/segment/filter/ColumnComparisonFilterTest.java b/processing/src/test/java/org/apache/druid/segment/filter/ColumnComparisonFilterTest.java index a57d36f731eb..676fa4d4743a 100644 --- a/processing/src/test/java/org/apache/druid/segment/filter/ColumnComparisonFilterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/filter/ColumnComparisonFilterTest.java @@ -45,6 +45,7 @@ import org.junit.runners.Parameterized; import java.io.Closeable; +import java.util.Collections; import java.util.List; import java.util.Map; @@ -97,19 +98,19 @@ public static void tearDown() throws Exception @Test public void testColumnsWithoutNulls() { - assertFilterMatches(new ColumnComparisonDimFilter(ImmutableList.of( + assertFilterMatchesSkipVectorize(new ColumnComparisonDimFilter(ImmutableList.of( DefaultDimensionSpec.of("dim0"), DefaultDimensionSpec.of("dim1") )), ImmutableList.of("2", "5", "8")); - assertFilterMatches(new ColumnComparisonDimFilter(ImmutableList.of( + assertFilterMatchesSkipVectorize(new ColumnComparisonDimFilter(ImmutableList.of( DefaultDimensionSpec.of("dim0"), DefaultDimensionSpec.of("dim2") )), ImmutableList.of("3", "4", "5")); - assertFilterMatches(new ColumnComparisonDimFilter(ImmutableList.of( + assertFilterMatchesSkipVectorize(new ColumnComparisonDimFilter(ImmutableList.of( DefaultDimensionSpec.of("dim1"), DefaultDimensionSpec.of("dim2") )), ImmutableList.of("5", "9")); - assertFilterMatches(new ColumnComparisonDimFilter(ImmutableList.of( + assertFilterMatchesSkipVectorize(new ColumnComparisonDimFilter(ImmutableList.of( DefaultDimensionSpec.of("dim0"), DefaultDimensionSpec.of("dim1"), DefaultDimensionSpec.of("dim2") @@ -119,35 +120,56 @@ public void testColumnsWithoutNulls() @Test public void testMissingColumnNotSpecifiedInDimensionList() { - assertFilterMatches( - new ColumnComparisonDimFilter( - ImmutableList.of(DefaultDimensionSpec.of("dim6"), DefaultDimensionSpec.of("dim7")) - ), - ImmutableList.of("0", "1", "2", "3", "4", "5", "6", "7", "8", "9") - ); + assertFilterMatchesSkipVectorize(new ColumnComparisonDimFilter(ImmutableList.of( + DefaultDimensionSpec.of("dim6"), + DefaultDimensionSpec.of("dim7") + )), ImmutableList.of("0", "1", "2", "3", "4", "5", "6", "7", "8", "9")); + if (NullHandling.replaceWithDefault()) { - assertFilterMatches( + // "" is equivalent to null which is equivalent to a missing dimension + assertFilterMatchesSkipVectorize(new ColumnComparisonDimFilter(ImmutableList.of( + DefaultDimensionSpec.of("dim1"), + DefaultDimensionSpec.of("dim6") + )), ImmutableList.of("0")); + + assertFilterMatchesSkipVectorize(new ColumnComparisonDimFilter(ImmutableList.of( + DefaultDimensionSpec.of("dim2"), + DefaultDimensionSpec.of("dim6") + )), ImmutableList.of("1", "2", "6", "7", "8")); + + assertFilterMatchesSkipVectorize( new ColumnComparisonDimFilter( ImmutableList.of(DefaultDimensionSpec.of("dim1"), DefaultDimensionSpec.of("dim6")) ), ImmutableList.of("0") ); - assertFilterMatches( + assertFilterMatchesSkipVectorize( new ColumnComparisonDimFilter( ImmutableList.of(DefaultDimensionSpec.of("dim2"), DefaultDimensionSpec.of("dim6")) ), ImmutableList.of("1", "2", "6", "7", "8") ); } else { - assertFilterMatches( + // "" is not equivalent to a missing dimension + assertFilterMatchesSkipVectorize(new ColumnComparisonDimFilter(ImmutableList.of( + DefaultDimensionSpec.of("dim1"), + DefaultDimensionSpec.of("dim6") + )), Collections.emptyList()); + + assertFilterMatchesSkipVectorize(new ColumnComparisonDimFilter(ImmutableList.of( + DefaultDimensionSpec.of("dim2"), + DefaultDimensionSpec.of("dim6") + )), ImmutableList.of("1", "6", "7", "8")); + + assertFilterMatchesSkipVectorize( new ColumnComparisonDimFilter( ImmutableList.of(DefaultDimensionSpec.of("dim1"), DefaultDimensionSpec.of("dim6")) ), ImmutableList.of() ); - assertFilterMatches( + assertFilterMatchesSkipVectorize( new ColumnComparisonDimFilter( ImmutableList.of(DefaultDimensionSpec.of("dim2"), DefaultDimensionSpec.of("dim6")) ), @@ -165,7 +187,7 @@ public void testSelectorWithLookupExtractionFn() LookupExtractor mapExtractor = new MapLookupExtractor(stringMap, false); LookupExtractionFn lookupFn = new LookupExtractionFn(mapExtractor, true, null, false, true); - assertFilterMatches(new ColumnComparisonDimFilter(ImmutableList.of( + assertFilterMatchesSkipVectorize(new ColumnComparisonDimFilter(ImmutableList.of( new ExtractionDimensionSpec("dim0", "dim0", lookupFn), new ExtractionDimensionSpec("dim1", "dim1", lookupFn) )), ImmutableList.of("2", "5", "7", "8")); diff --git a/processing/src/test/java/org/apache/druid/segment/filter/ExpressionFilterTest.java b/processing/src/test/java/org/apache/druid/segment/filter/ExpressionFilterTest.java index 3d3d50c971a2..1cfd2718146d 100644 --- a/processing/src/test/java/org/apache/druid/segment/filter/ExpressionFilterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/filter/ExpressionFilterTest.java @@ -117,22 +117,22 @@ public static void tearDown() throws Exception @Test public void testOneSingleValuedStringColumn() { - assertFilterMatches(EDF("dim3 == ''"), ImmutableList.of("0")); - assertFilterMatches(EDF("dim3 == '1'"), ImmutableList.of("3", "4", "6")); - assertFilterMatches(EDF("dim3 == 'a'"), ImmutableList.of("7")); - assertFilterMatches(EDF("dim3 == 1"), ImmutableList.of("3", "4", "6")); - assertFilterMatches(EDF("dim3 == 1.0"), ImmutableList.of("3", "4", "6")); - assertFilterMatches(EDF("dim3 == 1.234"), ImmutableList.of("9")); - assertFilterMatches(EDF("dim3 < '2'"), ImmutableList.of("0", "1", "3", "4", "6", "9")); + assertFilterMatchesSkipVectorize(EDF("dim3 == ''"), ImmutableList.of("0")); + assertFilterMatchesSkipVectorize(EDF("dim3 == '1'"), ImmutableList.of("3", "4", "6")); + assertFilterMatchesSkipVectorize(EDF("dim3 == 'a'"), ImmutableList.of("7")); + assertFilterMatchesSkipVectorize(EDF("dim3 == 1"), ImmutableList.of("3", "4", "6")); + assertFilterMatchesSkipVectorize(EDF("dim3 == 1.0"), ImmutableList.of("3", "4", "6")); + assertFilterMatchesSkipVectorize(EDF("dim3 == 1.234"), ImmutableList.of("9")); + assertFilterMatchesSkipVectorize(EDF("dim3 < '2'"), ImmutableList.of("0", "1", "3", "4", "6", "9")); if (NullHandling.replaceWithDefault()) { - assertFilterMatches(EDF("dim3 < 2"), ImmutableList.of("0", "3", "4", "6", "7", "9")); - assertFilterMatches(EDF("dim3 < 2.0"), ImmutableList.of("0", "3", "4", "6", "7", "9")); + assertFilterMatchesSkipVectorize(EDF("dim3 < 2"), ImmutableList.of("0", "3", "4", "6", "7", "9")); + assertFilterMatchesSkipVectorize(EDF("dim3 < 2.0"), ImmutableList.of("0", "3", "4", "6", "7", "9")); } else { // Empty String and "a" will not match - assertFilterMatches(EDF("dim3 < 2"), ImmutableList.of("3", "4", "6", "9")); - assertFilterMatches(EDF("dim3 < 2.0"), ImmutableList.of("3", "4", "6", "9")); + assertFilterMatchesSkipVectorize(EDF("dim3 < 2"), ImmutableList.of("3", "4", "6", "9")); + assertFilterMatchesSkipVectorize(EDF("dim3 < 2.0"), ImmutableList.of("3", "4", "6", "9")); } - assertFilterMatches(EDF("like(dim3, '1%')"), ImmutableList.of("1", "3", "4", "6", "9")); + assertFilterMatchesSkipVectorize(EDF("like(dim3, '1%')"), ImmutableList.of("1", "3", "4", "6", "9")); } @Test @@ -141,109 +141,121 @@ public void testOneMultiValuedStringColumn() // Expressions currently treat multi-valued arrays as nulls. // This test is just documenting the current behavior, not necessarily saying it makes sense. if (NullHandling.replaceWithDefault()) { - assertFilterMatches(EDF("dim4 == ''"), ImmutableList.of("0", "1", "2", "4", "5", "6", "7", "8")); + assertFilterMatchesSkipVectorize(EDF("dim4 == ''"), ImmutableList.of("0", "1", "2", "4", "5", "6", "7", "8")); } else { - assertFilterMatches(EDF("dim4 == ''"), ImmutableList.of("2")); + assertFilterMatchesSkipVectorize(EDF("dim4 == ''"), ImmutableList.of("2")); // AS per SQL standard null == null returns false. - assertFilterMatches(EDF("dim4 == null"), ImmutableList.of()); + assertFilterMatchesSkipVectorize(EDF("dim4 == null"), ImmutableList.of()); } - assertFilterMatches(EDF("dim4 == '1'"), ImmutableList.of()); - assertFilterMatches(EDF("dim4 == '3'"), ImmutableList.of("3")); + assertFilterMatchesSkipVectorize(EDF("dim4 == '1'"), ImmutableList.of()); + assertFilterMatchesSkipVectorize(EDF("dim4 == '3'"), ImmutableList.of("3")); } @Test public void testOneLongColumn() { if (NullHandling.replaceWithDefault()) { - assertFilterMatches(EDF("dim1 == ''"), ImmutableList.of("0")); + assertFilterMatchesSkipVectorize(EDF("dim1 == ''"), ImmutableList.of("0")); } else { // A long does not match empty string - assertFilterMatches(EDF("dim1 == ''"), ImmutableList.of()); + assertFilterMatchesSkipVectorize(EDF("dim1 == ''"), ImmutableList.of()); } - assertFilterMatches(EDF("dim1 == '1'"), ImmutableList.of("1")); - assertFilterMatches(EDF("dim1 == 2"), ImmutableList.of("2")); - assertFilterMatches(EDF("dim1 < '2'"), ImmutableList.of("0", "1")); - assertFilterMatches(EDF("dim1 < 2"), ImmutableList.of("0", "1")); - assertFilterMatches(EDF("dim1 < 2.0"), ImmutableList.of("0", "1")); - assertFilterMatches(EDF("like(dim1, '1%')"), ImmutableList.of("1")); + assertFilterMatchesSkipVectorize(EDF("dim1 == '1'"), ImmutableList.of("1")); + assertFilterMatchesSkipVectorize(EDF("dim1 == 2"), ImmutableList.of("2")); + assertFilterMatchesSkipVectorize(EDF("dim1 < '2'"), ImmutableList.of("0", "1")); + assertFilterMatchesSkipVectorize(EDF("dim1 < 2"), ImmutableList.of("0", "1")); + assertFilterMatchesSkipVectorize(EDF("dim1 < 2.0"), ImmutableList.of("0", "1")); + assertFilterMatchesSkipVectorize(EDF("like(dim1, '1%')"), ImmutableList.of("1")); } @Test public void testOneFloatColumn() { if (NullHandling.replaceWithDefault()) { - assertFilterMatches(EDF("dim2 == ''"), ImmutableList.of("0")); + assertFilterMatchesSkipVectorize(EDF("dim2 == ''"), ImmutableList.of("0")); } else { // A float does not match empty string - assertFilterMatches(EDF("dim2 == ''"), ImmutableList.of()); + assertFilterMatchesSkipVectorize(EDF("dim2 == ''"), ImmutableList.of()); } - assertFilterMatches(EDF("dim2 == '1'"), ImmutableList.of("1")); - assertFilterMatches(EDF("dim2 == 2"), ImmutableList.of("2")); - assertFilterMatches(EDF("dim2 < '2'"), ImmutableList.of("0", "1")); - assertFilterMatches(EDF("dim2 < 2"), ImmutableList.of("0", "1")); - assertFilterMatches(EDF("dim2 < 2.0"), ImmutableList.of("0", "1")); - assertFilterMatches(EDF("like(dim2, '1%')"), ImmutableList.of("1")); + assertFilterMatchesSkipVectorize(EDF("dim2 == '1'"), ImmutableList.of("1")); + assertFilterMatchesSkipVectorize(EDF("dim2 == 2"), ImmutableList.of("2")); + assertFilterMatchesSkipVectorize(EDF("dim2 < '2'"), ImmutableList.of("0", "1")); + assertFilterMatchesSkipVectorize(EDF("dim2 < 2"), ImmutableList.of("0", "1")); + assertFilterMatchesSkipVectorize(EDF("dim2 < 2.0"), ImmutableList.of("0", "1")); + assertFilterMatchesSkipVectorize(EDF("like(dim2, '1%')"), ImmutableList.of("1")); } @Test public void testConstantExpression() { - assertFilterMatches(EDF("1 + 1"), ImmutableList.of("0", "1", "2", "3", "4", "5", "6", "7", "8", "9")); - assertFilterMatches(EDF("0 + 0"), ImmutableList.of()); + assertFilterMatchesSkipVectorize(EDF("1 + 1"), ImmutableList.of("0", "1", "2", "3", "4", "5", "6", "7", "8", "9")); + assertFilterMatchesSkipVectorize(EDF("0 + 0"), ImmutableList.of()); } @Test public void testCompareColumns() { // String vs string - assertFilterMatches(EDF("dim0 == dim3"), ImmutableList.of("2", "5", "8")); + assertFilterMatchesSkipVectorize(EDF("dim0 == dim3"), ImmutableList.of("2", "5", "8")); if (NullHandling.replaceWithDefault()) { // String vs long - assertFilterMatches(EDF("dim1 == dim3"), ImmutableList.of("0", "2", "5", "8")); + assertFilterMatchesSkipVectorize(EDF("dim1 == dim3"), ImmutableList.of("0", "2", "5", "8")); // String vs float - assertFilterMatches(EDF("dim2 == dim3"), ImmutableList.of("0", "2", "5", "8")); + assertFilterMatchesSkipVectorize(EDF("dim2 == dim3"), ImmutableList.of("0", "2", "5", "8")); } else { // String vs long - assertFilterMatches(EDF("dim1 == dim3"), ImmutableList.of("2", "5", "8")); + assertFilterMatchesSkipVectorize(EDF("dim1 == dim3"), ImmutableList.of("2", "5", "8")); // String vs float - assertFilterMatches(EDF("dim2 == dim3"), ImmutableList.of("2", "5", "8")); + assertFilterMatchesSkipVectorize(EDF("dim2 == dim3"), ImmutableList.of("2", "5", "8")); } // String vs. multi-value string // Expressions currently treat multi-valued arrays as nulls. // This test is just documenting the current behavior, not necessarily saying it makes sense. - assertFilterMatches(EDF("dim0 == dim4"), ImmutableList.of("3")); + assertFilterMatchesSkipVectorize(EDF("dim0 == dim4"), ImmutableList.of("3")); } @Test public void testMissingColumn() { if (NullHandling.replaceWithDefault()) { - assertFilterMatches(EDF("missing == ''"), ImmutableList.of("0", "1", "2", "3", "4", "5", "6", "7", "8", "9")); + assertFilterMatchesSkipVectorize( + EDF("missing == ''"), + ImmutableList.of("0", "1", "2", "3", "4", "5", "6", "7", "8", "9") + ); } else { // AS per SQL standard null == null returns false. - assertFilterMatches(EDF("missing == null"), ImmutableList.of()); + assertFilterMatchesSkipVectorize(EDF("missing == null"), ImmutableList.of()); } - assertFilterMatches(EDF("missing == '1'"), ImmutableList.of()); - assertFilterMatches(EDF("missing == 2"), ImmutableList.of()); + assertFilterMatchesSkipVectorize(EDF("missing == '1'"), ImmutableList.of()); + assertFilterMatchesSkipVectorize(EDF("missing == 2"), ImmutableList.of()); if (NullHandling.replaceWithDefault()) { // missing equivaluent to 0 - assertFilterMatches(EDF("missing < '2'"), ImmutableList.of("0", "1", "2", "3", "4", "5", "6", "7", "8", "9")); - assertFilterMatches(EDF("missing < 2"), ImmutableList.of("0", "1", "2", "3", "4", "5", "6", "7", "8", "9")); - assertFilterMatches(EDF("missing < 2.0"), ImmutableList.of("0", "1", "2", "3", "4", "5", "6", "7", "8", "9")); + assertFilterMatchesSkipVectorize( + EDF("missing < '2'"), + ImmutableList.of("0", "1", "2", "3", "4", "5", "6", "7", "8", "9") + ); + assertFilterMatchesSkipVectorize( + EDF("missing < 2"), + ImmutableList.of("0", "1", "2", "3", "4", "5", "6", "7", "8", "9") + ); + assertFilterMatchesSkipVectorize( + EDF("missing < 2.0"), + ImmutableList.of("0", "1", "2", "3", "4", "5", "6", "7", "8", "9") + ); } else { // missing equivalent to null - assertFilterMatches(EDF("missing < '2'"), ImmutableList.of()); - assertFilterMatches(EDF("missing < 2"), ImmutableList.of()); - assertFilterMatches(EDF("missing < 2.0"), ImmutableList.of()); + assertFilterMatchesSkipVectorize(EDF("missing < '2'"), ImmutableList.of()); + assertFilterMatchesSkipVectorize(EDF("missing < 2"), ImmutableList.of()); + assertFilterMatchesSkipVectorize(EDF("missing < 2.0"), ImmutableList.of()); } - assertFilterMatches(EDF("missing > '2'"), ImmutableList.of()); - assertFilterMatches(EDF("missing > 2"), ImmutableList.of()); - assertFilterMatches(EDF("missing > 2.0"), ImmutableList.of()); - assertFilterMatches(EDF("like(missing, '1%')"), ImmutableList.of()); + assertFilterMatchesSkipVectorize(EDF("missing > '2'"), ImmutableList.of()); + assertFilterMatchesSkipVectorize(EDF("missing > 2"), ImmutableList.of()); + assertFilterMatchesSkipVectorize(EDF("missing > 2.0"), ImmutableList.of()); + assertFilterMatchesSkipVectorize(EDF("like(missing, '1%')"), ImmutableList.of()); } @Test diff --git a/processing/src/test/java/org/apache/druid/segment/filter/FloatAndDoubleFilteringTest.java b/processing/src/test/java/org/apache/druid/segment/filter/FloatAndDoubleFilteringTest.java index 797ffe68c316..1ef93152f361 100644 --- a/processing/src/test/java/org/apache/druid/segment/filter/FloatAndDoubleFilteringTest.java +++ b/processing/src/test/java/org/apache/druid/segment/filter/FloatAndDoubleFilteringTest.java @@ -211,13 +211,13 @@ private void doTestFloatColumnFiltering(final String columnName) String jsFn = "function(x) { return(x === 3 || x === 5) }"; - assertFilterMatches( + assertFilterMatchesSkipVectorize( new JavaScriptDimFilter(columnName, jsFn, null, JavaScriptConfig.getEnabledInstance()), ImmutableList.of("3", "5") ); String jsFn2 = "function(x) { return(x === 3.0 || x === 5.0) }"; - assertFilterMatches( + assertFilterMatchesSkipVectorize( new JavaScriptDimFilter(columnName, jsFn2, null, JavaScriptConfig.getEnabledInstance()), ImmutableList.of("3", "5") ); @@ -338,7 +338,7 @@ private void doTestFloatFilterWithExtractionFn(final String columnName) ); String jsFn = "function(x) { return(x === 'Wednesday' || x === 'Thursday') }"; - assertFilterMatches( + assertFilterMatchesSkipVectorize( new JavaScriptDimFilter(columnName, jsFn, exfn, JavaScriptConfig.getEnabledInstance()), ImmutableList.of("3", "4") ); diff --git a/processing/src/test/java/org/apache/druid/segment/filter/JavaScriptFilterTest.java b/processing/src/test/java/org/apache/druid/segment/filter/JavaScriptFilterTest.java index f06da4f4494b..2786edbb3246 100644 --- a/processing/src/test/java/org/apache/druid/segment/filter/JavaScriptFilterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/filter/JavaScriptFilterTest.java @@ -101,27 +101,27 @@ private String jsValueFilter(String value) @Test public void testSingleValueStringColumnWithoutNulls() { - assertFilterMatches(newJavaScriptDimFilter("dim0", jsNullFilter, null), ImmutableList.of()); - assertFilterMatches(newJavaScriptDimFilter("dim0", jsValueFilter(""), null), ImmutableList.of()); - assertFilterMatches(newJavaScriptDimFilter("dim0", jsValueFilter("0"), null), ImmutableList.of("0")); - assertFilterMatches(newJavaScriptDimFilter("dim0", jsValueFilter("1"), null), ImmutableList.of("1")); + assertFilterMatchesSkipVectorize(newJavaScriptDimFilter("dim0", jsNullFilter, null), ImmutableList.of()); + assertFilterMatchesSkipVectorize(newJavaScriptDimFilter("dim0", jsValueFilter(""), null), ImmutableList.of()); + assertFilterMatchesSkipVectorize(newJavaScriptDimFilter("dim0", jsValueFilter("0"), null), ImmutableList.of("0")); + assertFilterMatchesSkipVectorize(newJavaScriptDimFilter("dim0", jsValueFilter("1"), null), ImmutableList.of("1")); } @Test public void testSingleValueStringColumnWithNulls() { if (NullHandling.replaceWithDefault()) { - assertFilterMatches(newJavaScriptDimFilter("dim1", jsNullFilter, null), ImmutableList.of("0")); + assertFilterMatchesSkipVectorize(newJavaScriptDimFilter("dim1", jsNullFilter, null), ImmutableList.of("0")); } else { - assertFilterMatches(newJavaScriptDimFilter("dim1", jsNullFilter, null), ImmutableList.of()); - assertFilterMatches(newJavaScriptDimFilter("dim1", jsValueFilter(""), null), ImmutableList.of("0")); + assertFilterMatchesSkipVectorize(newJavaScriptDimFilter("dim1", jsNullFilter, null), ImmutableList.of()); + assertFilterMatchesSkipVectorize(newJavaScriptDimFilter("dim1", jsValueFilter(""), null), ImmutableList.of("0")); } - assertFilterMatches(newJavaScriptDimFilter("dim1", jsValueFilter("10"), null), ImmutableList.of("1")); - assertFilterMatches(newJavaScriptDimFilter("dim1", jsValueFilter("2"), null), ImmutableList.of("2")); - assertFilterMatches(newJavaScriptDimFilter("dim1", jsValueFilter("1"), null), ImmutableList.of("3")); - assertFilterMatches(newJavaScriptDimFilter("dim1", jsValueFilter("def"), null), ImmutableList.of("4")); - assertFilterMatches(newJavaScriptDimFilter("dim1", jsValueFilter("abc"), null), ImmutableList.of("5")); - assertFilterMatches(newJavaScriptDimFilter("dim1", jsValueFilter("ab"), null), ImmutableList.of()); + assertFilterMatchesSkipVectorize(newJavaScriptDimFilter("dim1", jsValueFilter("10"), null), ImmutableList.of("1")); + assertFilterMatchesSkipVectorize(newJavaScriptDimFilter("dim1", jsValueFilter("2"), null), ImmutableList.of("2")); + assertFilterMatchesSkipVectorize(newJavaScriptDimFilter("dim1", jsValueFilter("1"), null), ImmutableList.of("3")); + assertFilterMatchesSkipVectorize(newJavaScriptDimFilter("dim1", jsValueFilter("def"), null), ImmutableList.of("4")); + assertFilterMatchesSkipVectorize(newJavaScriptDimFilter("dim1", jsValueFilter("abc"), null), ImmutableList.of("5")); + assertFilterMatchesSkipVectorize(newJavaScriptDimFilter("dim1", jsValueFilter("ab"), null), ImmutableList.of()); } @Test @@ -129,33 +129,45 @@ public void testMultiValueStringColumn() { // multi-val null...... if (NullHandling.replaceWithDefault()) { - assertFilterMatches(newJavaScriptDimFilter("dim2", jsNullFilter, null), ImmutableList.of("1", "2", "5")); + assertFilterMatchesSkipVectorize( + newJavaScriptDimFilter("dim2", jsNullFilter, null), + ImmutableList.of("1", "2", "5") + ); } else { - assertFilterMatches(newJavaScriptDimFilter("dim2", jsNullFilter, null), ImmutableList.of("1", "5")); - assertFilterMatches(newJavaScriptDimFilter("dim2", jsValueFilter(""), null), ImmutableList.of("2")); + assertFilterMatchesSkipVectorize(newJavaScriptDimFilter("dim2", jsNullFilter, null), ImmutableList.of("1", "5")); + assertFilterMatchesSkipVectorize(newJavaScriptDimFilter("dim2", jsValueFilter(""), null), ImmutableList.of("2")); } - assertFilterMatches(newJavaScriptDimFilter("dim2", jsValueFilter("a"), null), ImmutableList.of("0", "3")); - assertFilterMatches(newJavaScriptDimFilter("dim2", jsValueFilter("b"), null), ImmutableList.of("0")); - assertFilterMatches(newJavaScriptDimFilter("dim2", jsValueFilter("c"), null), ImmutableList.of("4")); - assertFilterMatches(newJavaScriptDimFilter("dim2", jsValueFilter("d"), null), ImmutableList.of()); + assertFilterMatchesSkipVectorize( + newJavaScriptDimFilter("dim2", jsValueFilter("a"), null), + ImmutableList.of("0", "3") + ); + assertFilterMatchesSkipVectorize(newJavaScriptDimFilter("dim2", jsValueFilter("b"), null), ImmutableList.of("0")); + assertFilterMatchesSkipVectorize(newJavaScriptDimFilter("dim2", jsValueFilter("c"), null), ImmutableList.of("4")); + assertFilterMatchesSkipVectorize(newJavaScriptDimFilter("dim2", jsValueFilter("d"), null), ImmutableList.of()); } @Test public void testMissingColumnSpecifiedInDimensionList() { - assertFilterMatches(newJavaScriptDimFilter("dim3", jsNullFilter, null), ImmutableList.of("0", "1", "2", "3", "4", "5")); - assertFilterMatches(newJavaScriptDimFilter("dim3", jsValueFilter("a"), null), ImmutableList.of()); - assertFilterMatches(newJavaScriptDimFilter("dim3", jsValueFilter("b"), null), ImmutableList.of()); - assertFilterMatches(newJavaScriptDimFilter("dim3", jsValueFilter("c"), null), ImmutableList.of()); + assertFilterMatchesSkipVectorize( + newJavaScriptDimFilter("dim3", jsNullFilter, null), + ImmutableList.of("0", "1", "2", "3", "4", "5") + ); + assertFilterMatchesSkipVectorize(newJavaScriptDimFilter("dim3", jsValueFilter("a"), null), ImmutableList.of()); + assertFilterMatchesSkipVectorize(newJavaScriptDimFilter("dim3", jsValueFilter("b"), null), ImmutableList.of()); + assertFilterMatchesSkipVectorize(newJavaScriptDimFilter("dim3", jsValueFilter("c"), null), ImmutableList.of()); } @Test public void testMissingColumnNotSpecifiedInDimensionList() { - assertFilterMatches(newJavaScriptDimFilter("dim4", jsNullFilter, null), ImmutableList.of("0", "1", "2", "3", "4", "5")); - assertFilterMatches(newJavaScriptDimFilter("dim4", jsValueFilter("a"), null), ImmutableList.of()); - assertFilterMatches(newJavaScriptDimFilter("dim4", jsValueFilter("b"), null), ImmutableList.of()); - assertFilterMatches(newJavaScriptDimFilter("dim4", jsValueFilter("c"), null), ImmutableList.of()); + assertFilterMatchesSkipVectorize( + newJavaScriptDimFilter("dim4", jsNullFilter, null), + ImmutableList.of("0", "1", "2", "3", "4", "5") + ); + assertFilterMatchesSkipVectorize(newJavaScriptDimFilter("dim4", jsValueFilter("a"), null), ImmutableList.of()); + assertFilterMatchesSkipVectorize(newJavaScriptDimFilter("dim4", jsValueFilter("b"), null), ImmutableList.of()); + assertFilterMatchesSkipVectorize(newJavaScriptDimFilter("dim4", jsValueFilter("c"), null), ImmutableList.of()); } @Test @@ -170,20 +182,50 @@ public void testJavascriptFilterWithLookupExtractionFn() LookupExtractor mapExtractor = new MapLookupExtractor(stringMap, false); LookupExtractionFn lookupFn = new LookupExtractionFn(mapExtractor, false, "UNKNOWN", false, true); - assertFilterMatches(newJavaScriptDimFilter("dim0", jsValueFilter("HELLO"), lookupFn), ImmutableList.of("1")); - assertFilterMatches(newJavaScriptDimFilter("dim0", jsValueFilter("UNKNOWN"), lookupFn), ImmutableList.of("0", "2", "3", "4", "5")); + assertFilterMatchesSkipVectorize( + newJavaScriptDimFilter("dim0", jsValueFilter("HELLO"), lookupFn), + ImmutableList.of("1") + ); + assertFilterMatchesSkipVectorize( + newJavaScriptDimFilter("dim0", jsValueFilter("UNKNOWN"), lookupFn), + ImmutableList.of("0", "2", "3", "4", "5") + ); - assertFilterMatches(newJavaScriptDimFilter("dim1", jsValueFilter("HELLO"), lookupFn), ImmutableList.of("3", "4")); - assertFilterMatches(newJavaScriptDimFilter("dim1", jsValueFilter("UNKNOWN"), lookupFn), ImmutableList.of("0", "1", "2", "5")); + assertFilterMatchesSkipVectorize( + newJavaScriptDimFilter("dim1", jsValueFilter("HELLO"), lookupFn), + ImmutableList.of("3", "4") + ); + assertFilterMatchesSkipVectorize( + newJavaScriptDimFilter("dim1", jsValueFilter("UNKNOWN"), lookupFn), + ImmutableList.of("0", "1", "2", "5") + ); - assertFilterMatches(newJavaScriptDimFilter("dim2", jsValueFilter("HELLO"), lookupFn), ImmutableList.of("0", "3")); - assertFilterMatches(newJavaScriptDimFilter("dim2", jsValueFilter("UNKNOWN"), lookupFn), ImmutableList.of("0", "1", "2", "4", "5")); + assertFilterMatchesSkipVectorize( + newJavaScriptDimFilter("dim2", jsValueFilter("HELLO"), lookupFn), + ImmutableList.of("0", "3") + ); + assertFilterMatchesSkipVectorize( + newJavaScriptDimFilter("dim2", jsValueFilter("UNKNOWN"), lookupFn), + ImmutableList.of("0", "1", "2", "4", "5") + ); - assertFilterMatches(newJavaScriptDimFilter("dim3", jsValueFilter("HELLO"), lookupFn), ImmutableList.of()); - assertFilterMatches(newJavaScriptDimFilter("dim3", jsValueFilter("UNKNOWN"), lookupFn), ImmutableList.of("0", "1", "2", "3", "4", "5")); + assertFilterMatchesSkipVectorize( + newJavaScriptDimFilter("dim3", jsValueFilter("HELLO"), lookupFn), + ImmutableList.of() + ); + assertFilterMatchesSkipVectorize( + newJavaScriptDimFilter("dim3", jsValueFilter("UNKNOWN"), lookupFn), + ImmutableList.of("0", "1", "2", "3", "4", "5") + ); - assertFilterMatches(newJavaScriptDimFilter("dim4", jsValueFilter("HELLO"), lookupFn), ImmutableList.of()); - assertFilterMatches(newJavaScriptDimFilter("dim4", jsValueFilter("UNKNOWN"), lookupFn), ImmutableList.of("0", "1", "2", "3", "4", "5")); + assertFilterMatchesSkipVectorize( + newJavaScriptDimFilter("dim4", jsValueFilter("HELLO"), lookupFn), + ImmutableList.of() + ); + assertFilterMatchesSkipVectorize( + newJavaScriptDimFilter("dim4", jsValueFilter("UNKNOWN"), lookupFn), + ImmutableList.of("0", "1", "2", "3", "4", "5") + ); } private JavaScriptDimFilter newJavaScriptDimFilter( diff --git a/processing/src/test/java/org/apache/druid/segment/filter/LongFilteringTest.java b/processing/src/test/java/org/apache/druid/segment/filter/LongFilteringTest.java index 94c5fbde17cb..1cee93ba72ea 100644 --- a/processing/src/test/java/org/apache/druid/segment/filter/LongFilteringTest.java +++ b/processing/src/test/java/org/apache/druid/segment/filter/LongFilteringTest.java @@ -255,7 +255,7 @@ public void testLongColumnFiltering() ); String jsFn = "function(x) { return(x === 3 || x === 5) }"; - assertFilterMatches( + assertFilterMatchesSkipVectorize( new JavaScriptDimFilter(LONG_COLUMN, jsFn, null, JavaScriptConfig.getEnabledInstance()), ImmutableList.of("3", "5") ); @@ -363,7 +363,7 @@ public void testLongFilterWithExtractionFn() ); String jsFn = "function(x) { return(x === 'Wednesday' || x === 'Thursday') }"; - assertFilterMatches( + assertFilterMatchesSkipVectorize( new JavaScriptDimFilter(LONG_COLUMN, jsFn, exfn, JavaScriptConfig.getEnabledInstance()), ImmutableList.of("3", "4") ); diff --git a/processing/src/test/java/org/apache/druid/segment/filter/SelectorFilterTest.java b/processing/src/test/java/org/apache/druid/segment/filter/SelectorFilterTest.java index 55f8c09b9c94..a36aeb910a95 100644 --- a/processing/src/test/java/org/apache/druid/segment/filter/SelectorFilterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/filter/SelectorFilterTest.java @@ -69,9 +69,20 @@ public class SelectorFilterTest extends BaseFilterTest ); private static final List ROWS = ImmutableList.of( - PARSER.parseBatch(ImmutableMap.of("dim0", "0", "dim1", "", "dim2", ImmutableList.of("a", "b"), "dim6", "2017-07-25")).get(0), - PARSER.parseBatch(ImmutableMap.of("dim0", "1", "dim1", "10", "dim2", ImmutableList.of(), "dim6", "2017-07-25")).get(0), - PARSER.parseBatch(ImmutableMap.of("dim0", "2", "dim1", "2", "dim2", ImmutableList.of(""), "dim6", "2017-05-25")).get(0), + PARSER.parseBatch(ImmutableMap.of( + "dim0", + "0", + "dim1", + "", + "dim2", + ImmutableList.of("a", "b"), + "dim6", + "2017-07-25" + )).get(0), + PARSER.parseBatch(ImmutableMap.of("dim0", "1", "dim1", "10", "dim2", ImmutableList.of(), "dim6", "2017-07-25")) + .get(0), + PARSER.parseBatch(ImmutableMap.of("dim0", "2", "dim1", "2", "dim2", ImmutableList.of(""), "dim6", "2017-05-25")) + .get(0), PARSER.parseBatch(ImmutableMap.of("dim0", "3", "dim1", "1", "dim2", ImmutableList.of("a"))).get(0), PARSER.parseBatch(ImmutableMap.of("dim0", "4", "dim1", "def", "dim2", ImmutableList.of("c"))).get(0), PARSER.parseBatch(ImmutableMap.of("dim0", "5", "dim1", "abc")).get(0) @@ -107,10 +118,24 @@ public static void tearDown() throws Exception @Test public void testWithTimeExtractionFnNull() { - assertFilterMatches(new SelectorDimFilter("dim0", null, new TimeDimExtractionFn("yyyy-MM-dd", "yyyy-MM", true)), ImmutableList.of()); - assertFilterMatches(new SelectorDimFilter("dim6", null, new TimeDimExtractionFn("yyyy-MM-dd", "yyyy-MM", true)), ImmutableList.of("3", "4", "5")); - assertFilterMatches(new SelectorDimFilter("dim6", "2017-07", new TimeDimExtractionFn("yyyy-MM-dd", "yyyy-MM", true)), ImmutableList.of("0", "1")); - assertFilterMatches(new SelectorDimFilter("dim6", "2017-05", new TimeDimExtractionFn("yyyy-MM-dd", "yyyy-MM", true)), ImmutableList.of("2")); + assertFilterMatches( + new SelectorDimFilter("dim0", null, new TimeDimExtractionFn("yyyy-MM-dd", "yyyy-MM", true)), + ImmutableList.of() + ); + assertFilterMatches( + new SelectorDimFilter("dim6", null, new TimeDimExtractionFn("yyyy-MM-dd", "yyyy-MM", true)), + ImmutableList.of("3", "4", "5") + ); + assertFilterMatches(new SelectorDimFilter( + "dim6", + "2017-07", + new TimeDimExtractionFn("yyyy-MM-dd", "yyyy-MM", true) + ), ImmutableList.of("0", "1")); + assertFilterMatches(new SelectorDimFilter( + "dim6", + "2017-05", + new TimeDimExtractionFn("yyyy-MM-dd", "yyyy-MM", true) + ), ImmutableList.of("2")); } @Test @@ -187,8 +212,11 @@ public void testMissingColumnNotSpecifiedInDimensionList() @Test public void testExpressionVirtualColumn() { - assertFilterMatches(new SelectorDimFilter("expr", "1.1", null), ImmutableList.of("0", "1", "2", "3", "4", "5")); - assertFilterMatches(new SelectorDimFilter("expr", "1.2", null), ImmutableList.of()); + assertFilterMatchesSkipVectorize( + new SelectorDimFilter("expr", "1.1", null), + ImmutableList.of("0", "1", "2", "3", "4", "5") + ); + assertFilterMatchesSkipVectorize(new SelectorDimFilter("expr", "1.2", null), ImmutableList.of()); } @Test @@ -213,10 +241,16 @@ public void testSelectorWithLookupExtractionFn() assertFilterMatches(new SelectorDimFilter("dim2", "UNKNOWN", lookupFn), ImmutableList.of("0", "1", "2", "4", "5")); assertFilterMatches(new SelectorDimFilter("dim3", "HELLO", lookupFn), ImmutableList.of()); - assertFilterMatches(new SelectorDimFilter("dim3", "UNKNOWN", lookupFn), ImmutableList.of("0", "1", "2", "3", "4", "5")); + assertFilterMatches( + new SelectorDimFilter("dim3", "UNKNOWN", lookupFn), + ImmutableList.of("0", "1", "2", "3", "4", "5") + ); assertFilterMatches(new SelectorDimFilter("dim4", "HELLO", lookupFn), ImmutableList.of()); - assertFilterMatches(new SelectorDimFilter("dim4", "UNKNOWN", lookupFn), ImmutableList.of("0", "1", "2", "3", "4", "5")); + assertFilterMatches( + new SelectorDimFilter("dim4", "UNKNOWN", lookupFn), + ImmutableList.of("0", "1", "2", "3", "4", "5") + ); final Map stringMap2 = ImmutableMap.of( "2", "5" @@ -299,7 +333,10 @@ public void testSelectorWithLookupExtractionFn() // tests that ExtractionDimFilter (identical to SelectorDimFilter now) optimize() with lookup works // remove these when ExtractionDimFilter is removed. - assertFilterMatches(new ExtractionDimFilter("dim1", "UNKNOWN", lookupFn, null), ImmutableList.of("0", "1", "2", "5")); + assertFilterMatches( + new ExtractionDimFilter("dim1", "UNKNOWN", lookupFn, null), + ImmutableList.of("0", "1", "2", "5") + ); assertFilterMatches(new ExtractionDimFilter("dim0", "5", lookupFn2, null), ImmutableList.of("2", "5")); if (NullHandling.replaceWithDefault()) { assertFilterMatches( diff --git a/processing/src/test/java/org/apache/druid/segment/filter/TimeFilteringTest.java b/processing/src/test/java/org/apache/druid/segment/filter/TimeFilteringTest.java index 417256a0501e..6f7cc6731f26 100644 --- a/processing/src/test/java/org/apache/druid/segment/filter/TimeFilteringTest.java +++ b/processing/src/test/java/org/apache/druid/segment/filter/TimeFilteringTest.java @@ -142,7 +142,7 @@ public void testTimeFilterAsLong() ); String jsFn = "function(x) { return(x === 3 || x === 5) }"; - assertFilterMatches( + assertFilterMatchesSkipVectorize( new JavaScriptDimFilter(ColumnHolder.TIME_COLUMN_NAME, jsFn, null, JavaScriptConfig.getEnabledInstance()), ImmutableList.of("3", "5") ); @@ -206,7 +206,7 @@ public void testTimeFilterWithExtractionFn() ); String jsFn = "function(x) { return(x === 'Wednesday' || x === 'Thursday') }"; - assertFilterMatches( + assertFilterMatchesSkipVectorize( new JavaScriptDimFilter(ColumnHolder.TIME_COLUMN_NAME, jsFn, exfn, JavaScriptConfig.getEnabledInstance()), ImmutableList.of("2", "3") ); @@ -337,7 +337,7 @@ public void testIntervalFilterOnStringDimension() // increment timestamp by 2 hours String timeBoosterJsFn = "function(x) { return(Number(x) + 7200000) }"; ExtractionFn exFn = new JavaScriptExtractionFn(timeBoosterJsFn, true, JavaScriptConfig.getEnabledInstance()); - assertFilterMatches( + assertFilterMatchesSkipVectorize( new IntervalDimFilter( "dim0", Collections.singletonList(Intervals.of("1970-01-01T02:00:00.001Z/1970-01-01T02:00:00.005Z")), diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/BaseCalciteQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/BaseCalciteQueryTest.java index 816a910328e6..4e3ada665998 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/BaseCalciteQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/BaseCalciteQueryTest.java @@ -34,6 +34,7 @@ import org.apache.druid.math.expr.ExprMacroTable; import org.apache.druid.query.Query; import org.apache.druid.query.QueryContexts; +import org.apache.druid.query.QueryDataSource; import org.apache.druid.query.QueryRunnerFactoryConglomerate; import org.apache.druid.query.aggregation.AggregatorFactory; import org.apache.druid.query.aggregation.post.ExpressionPostAggregator; @@ -89,6 +90,7 @@ import org.junit.rules.TemporaryFolder; import java.io.IOException; +import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.List; @@ -216,6 +218,10 @@ public int getMaxSemiJoinRowsInMemory() @Rule public TemporaryFolder temporaryFolder = new TemporaryFolder(); + + public boolean cannotVectorize = false; + public boolean skipVectorize = false; + public SpecificSegmentsQuerySegmentWalker walker = null; public QueryLogHook queryLogHook; @@ -486,6 +492,19 @@ public void testQuery( testQuery(plannerConfig, QUERY_CONTEXT_DEFAULT, sql, authenticationResult, expectedQueries, expectedResults); } + private Query recursivelyOverrideContext(final Query q, final Map context) + { + final Query q2; + if (q.getDataSource() instanceof QueryDataSource) { + final Query subQuery = ((QueryDataSource) q.getDataSource()).getQuery(); + q2 = q.withDataSource(new QueryDataSource(recursivelyOverrideContext(subQuery, context))); + } else { + q2 = q; + } + + return q2.withOverriddenContext(context); + } + public void testQuery( final PlannerConfig plannerConfig, final Map queryContext, @@ -496,9 +515,38 @@ public void testQuery( ) throws Exception { log.info("SQL: %s", sql); - queryLogHook.clearRecordedQueries(); - final List plannerResults = getResults(plannerConfig, queryContext, sql, authenticationResult); - verifyResults(sql, expectedQueries, expectedResults, plannerResults); + + final List vectorizeValues = new ArrayList<>(); + + vectorizeValues.add("false"); + + if (!skipVectorize) { + vectorizeValues.add("force"); + } + + for (final String vectorize : vectorizeValues) { + queryLogHook.clearRecordedQueries(); + + final Map theQueryContext = new HashMap<>(queryContext); + theQueryContext.put("vectorize", vectorize); + + if (!"false".equals(vectorize)) { + theQueryContext.put("vectorSize", 2); // Small vector size to ensure we use more than one. + } + + final List theQueries = new ArrayList<>(); + for (Query query : expectedQueries) { + theQueries.add(recursivelyOverrideContext(query, theQueryContext)); + } + + if (cannotVectorize && "force".equals(vectorize)) { + expectedException.expect(IllegalStateException.class); + expectedException.expectMessage("Cannot vectorize"); + } + + final List plannerResults = getResults(plannerConfig, theQueryContext, sql, authenticationResult); + verifyResults(sql, theQueries, expectedResults, plannerResults); + } } public List getResults( @@ -603,4 +651,14 @@ public void verifyResults( } } } + + protected void cannotVectorize() + { + cannotVectorize = true; + } + + protected void skipVectorize() + { + skipVectorize = true; + } } diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java index 2bacbe085150..8641f15efd84 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java @@ -500,12 +500,15 @@ public void testUnqualifiedTableName() throws Exception @Test public void testExplainSelectStar() throws Exception { + // Skip vectorization since otherwise the "context" will change for each subtest. + skipVectorize(); + testQuery( "EXPLAIN PLAN FOR SELECT * FROM druid.foo", ImmutableList.of(), ImmutableList.of( new Object[]{ - "DruidQueryRel(query=[{\"queryType\":\"scan\",\"dataSource\":{\"type\":\"table\",\"name\":\"foo\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"virtualColumns\":[],\"resultFormat\":\"compactedList\",\"batchSize\":20480,\"limit\":9223372036854775807,\"filter\":null,\"columns\":[\"__time\",\"cnt\",\"dim1\",\"dim2\",\"dim3\",\"m1\",\"m2\",\"unique_dim1\"],\"legacy\":false,\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\"},\"descending\":false,\"granularity\":{\"type\":\"all\"}}], signature=[{__time:LONG, cnt:LONG, dim1:STRING, dim2:STRING, dim3:STRING, m1:FLOAT, m2:DOUBLE, unique_dim1:COMPLEX}])\n" + "DruidQueryRel(query=[{\"queryType\":\"scan\",\"dataSource\":{\"type\":\"table\",\"name\":\"foo\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"virtualColumns\":[],\"resultFormat\":\"compactedList\",\"batchSize\":20480,\"limit\":9223372036854775807,\"filter\":null,\"columns\":[\"__time\",\"cnt\",\"dim1\",\"dim2\",\"dim3\",\"m1\",\"m2\",\"unique_dim1\"],\"legacy\":false,\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\",\"vectorize\":\"false\"},\"descending\":false,\"granularity\":{\"type\":\"all\"}}], signature=[{__time:LONG, cnt:LONG, dim1:STRING, dim2:STRING, dim3:STRING, m1:FLOAT, m2:DOUBLE, unique_dim1:COMPLEX}])\n" } ) ); @@ -749,13 +752,16 @@ public void testSelfJoinWithFallback() throws Exception @Test public void testExplainSelfJoinWithFallback() throws Exception { + // Skip vectorization since otherwise the "context" will change for each subtest. + skipVectorize(); + String emptyStringEq = NullHandling.replaceWithDefault() ? null : "\"\""; final String explanation = "BindableJoin(condition=[=($0, $2)], joinType=[inner])\n" + " DruidQueryRel(query=[{\"queryType\":\"scan\",\"dataSource\":{\"type\":\"table\",\"name\":\"foo\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"virtualColumns\":[],\"resultFormat\":\"compactedList\",\"batchSize\":20480,\"limit\":9223372036854775807,\"filter\":{\"type\":\"not\",\"field\":{\"type\":\"selector\",\"dimension\":\"dim1\",\"value\":" + emptyStringEq - + ",\"extractionFn\":null}},\"columns\":[\"dim1\"],\"legacy\":false,\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\"},\"descending\":false,\"granularity\":{\"type\":\"all\"}}], signature=[{dim1:STRING}])\n" - + " DruidQueryRel(query=[{\"queryType\":\"scan\",\"dataSource\":{\"type\":\"table\",\"name\":\"foo\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"virtualColumns\":[],\"resultFormat\":\"compactedList\",\"batchSize\":20480,\"limit\":9223372036854775807,\"filter\":null,\"columns\":[\"dim1\",\"dim2\"],\"legacy\":false,\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\"},\"descending\":false,\"granularity\":{\"type\":\"all\"}}], signature=[{dim1:STRING, dim2:STRING}])\n"; + + ",\"extractionFn\":null}},\"columns\":[\"dim1\"],\"legacy\":false,\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\",\"vectorize\":\"false\"},\"descending\":false,\"granularity\":{\"type\":\"all\"}}], signature=[{dim1:STRING}])\n" + + " DruidQueryRel(query=[{\"queryType\":\"scan\",\"dataSource\":{\"type\":\"table\",\"name\":\"foo\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"virtualColumns\":[],\"resultFormat\":\"compactedList\",\"batchSize\":20480,\"limit\":9223372036854775807,\"filter\":null,\"columns\":[\"dim1\",\"dim2\"],\"legacy\":false,\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\",\"vectorize\":\"false\"},\"descending\":false,\"granularity\":{\"type\":\"all\"}}], signature=[{dim1:STRING, dim2:STRING}])\n"; testQuery( PLANNER_CONFIG_FALLBACK, @@ -1064,6 +1070,9 @@ public void testHavingOnDoubleSum() throws Exception @Test public void testHavingOnApproximateCountDistinct() throws Exception { + // Cannot vectorize due to "cardinality" aggregator. + cannotVectorize(); + testQuery( "SELECT dim2, COUNT(DISTINCT m1) FROM druid.foo GROUP BY dim2 HAVING COUNT(DISTINCT m1) > 1", ImmutableList.of( @@ -1214,6 +1223,9 @@ public void testHavingOnFloatSum() throws Exception @Test public void testColumnComparison() throws Exception { + // Cannot vectorize due to expression filter. + cannotVectorize(); + testQuery( "SELECT dim1, m1, COUNT(*) FROM druid.foo WHERE m1 - 1 = dim1 GROUP BY dim1, m1", ImmutableList.of( @@ -1579,6 +1591,9 @@ public void testPruneDeadAggregatorsThroughHaving() throws Exception @Test public void testGroupByCaseWhen() throws Exception { + // Cannot vectorize due to virtual columns. + cannotVectorize(); + testQuery( "SELECT\n" + " CASE EXTRACT(DAY FROM __time)\n" @@ -1629,6 +1644,9 @@ public void testGroupByCaseWhen() throws Exception @Test public void testGroupByCaseWhenOfTripleAnd() throws Exception { + // Cannot vectorize due to virtual columns. + cannotVectorize(); + testQuery( "SELECT\n" + " CASE WHEN m1 > 1 AND m1 < 5 AND cnt = 1 THEN 'x' ELSE NULL END," @@ -1749,6 +1767,9 @@ public void testCoalesceColumns() throws Exception // Doesn't conform to the SQL standard, but it's how we do it. // This example is used in the sql.md doc. + // Cannot vectorize due to virtual columns. + cannotVectorize(); + testQuery( "SELECT COALESCE(dim2, dim1), COUNT(*) FROM druid.foo GROUP BY COALESCE(dim2, dim1)\n", ImmutableList.of( @@ -1901,6 +1922,9 @@ public void testGroupByWithFilterMatchingNothing() throws Exception // This query should actually return [0, null] rather than an empty result set, but it doesn't. // This test just "documents" the current behavior. + // Cannot vectorize due to "longMax" aggregator. + cannotVectorize(); + testQuery( "SELECT COUNT(*), MAX(cnt) FROM druid.foo WHERE dim1 = 'foobar'", ImmutableList.of( @@ -1923,6 +1947,9 @@ public void testGroupByWithFilterMatchingNothing() throws Exception @Test public void testGroupByWithFilterMatchingNothingWithGroupByLiteral() throws Exception { + // Cannot vectorize due to "longMax" aggregator. + cannotVectorize(); + testQuery( "SELECT COUNT(*), MAX(cnt) FROM druid.foo WHERE dim1 = 'foobar' GROUP BY 'dummy'", ImmutableList.of( @@ -1994,6 +2021,9 @@ public void testCountNullableColumn() throws Exception @Test public void testCountNullableExpression() throws Exception { + // Cannot vectorize due to expression filter. + cannotVectorize(); + testQuery( "SELECT COUNT(CASE WHEN dim2 = 'abc' THEN 'yes' WHEN dim2 = 'def' THEN 'yes' END) FROM druid.foo", ImmutableList.of( @@ -2092,6 +2122,9 @@ public void testCountStarOnView() throws Exception @Test public void testExplainCountStarOnView() throws Exception { + // Skip vectorization since otherwise the "context" will change for each subtest. + skipVectorize(); + final String explanation = "DruidQueryRel(query=[{" + "\"queryType\":\"timeseries\"," @@ -2104,7 +2137,7 @@ public void testExplainCountStarOnView() throws Exception + "\"aggregations\":[{\"type\":\"count\",\"name\":\"a0\"}]," + "\"postAggregations\":[]," + "\"limit\":2147483647," - + "\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"skipEmptyBuckets\":true,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\"}}]" + + "\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"skipEmptyBuckets\":true,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\",\"vectorize\":\"false\"}}]" + ", signature=[{a0:LONG}])\n"; testQuery( @@ -2299,6 +2332,9 @@ public void testFilterOnStringAsNumber() throws Exception @Test public void testSimpleAggregations() throws Exception { + // Cannot vectorize due to "longMax" aggregator. + cannotVectorize(); + testQuery( "SELECT COUNT(*), COUNT(cnt), COUNT(dim1), AVG(cnt), SUM(cnt), SUM(cnt) + MIN(cnt) + MAX(cnt), COUNT(dim2) FROM druid.foo", ImmutableList.of( @@ -2388,6 +2424,9 @@ public void testGroupByWithSortOnPostAggregationNoTopNConfig() throws Exception { // Use PlannerConfig to disable topN, so this query becomes a groupBy. + // Cannot vectorize due to "floatMin", "floatMax" aggregators. + cannotVectorize(); + testQuery( PLANNER_CONFIG_NO_TOPN, "SELECT dim1, MIN(m1) + MAX(m1) AS x FROM druid.foo GROUP BY dim1 ORDER BY x LIMIT 3", @@ -2431,6 +2470,9 @@ public void testGroupByWithSortOnPostAggregationNoTopNContext() throws Exception { // Use context to disable topN, so this query becomes a groupBy. + // Cannot vectorize due to "floatMin", "floatMax" aggregators. + cannotVectorize(); + testQuery( PLANNER_CONFIG_DEFAULT, QUERY_CONTEXT_NO_TOPN, @@ -2477,6 +2519,9 @@ public void testGroupByWithSortOnPostAggregationNoTopNContext() throws Exception @Test public void testFilteredAggregations() throws Exception { + // Cannot vectorize due to "cardinality", "longMax" aggregators. + cannotVectorize(); + testQuery( "SELECT " + "SUM(case dim1 when 'abc' then cnt end), " @@ -2648,6 +2693,9 @@ public void testFilteredAggregationWithNotIn() throws Exception @Test public void testExpressionAggregations() throws Exception { + // Cannot vectorize due to "doubleMax" aggregator. + cannotVectorize(); + final ExprMacroTable macroTable = CalciteTests.createExprMacroTable(); testQuery( @@ -2686,6 +2734,9 @@ public void testExpressionAggregations() throws Exception @Test public void testExpressionFilteringAndGrouping() throws Exception { + // Cannot vectorize due to virtual columns. + cannotVectorize(); + testQuery( "SELECT\n" + " FLOOR(m1 / 2) * 2,\n" @@ -2732,6 +2783,9 @@ public void testExpressionFilteringAndGrouping() throws Exception @Test public void testExpressionFilteringAndGroupingUsingCastToLong() throws Exception { + // Cannot vectorize due to virtual columns. + cannotVectorize(); + testQuery( "SELECT\n" + " CAST(m1 AS BIGINT) / 2 * 2,\n" @@ -2780,6 +2834,9 @@ public void testExpressionFilteringAndGroupingUsingCastToLong() throws Exception @Test public void testExpressionFilteringAndGroupingOnStringCastToNumber() throws Exception { + // Cannot vectorize due to virtual columns. + cannotVectorize(); + testQuery( "SELECT\n" + " FLOOR(CAST(dim1 AS FLOAT) / 2) * 2,\n" @@ -3468,6 +3525,9 @@ public void testCountStarWithTimeFilterOnLongColumnUsingTimestampToMillis() thro @Test public void testSumOfString() throws Exception { + // Cannot vectorize due to expressions in aggregators. + cannotVectorize(); + testQuery( "SELECT SUM(CAST(dim1 AS INTEGER)) FROM druid.foo", ImmutableList.of( @@ -3495,6 +3555,9 @@ public void testSumOfString() throws Exception @Test public void testSumOfExtractionFn() throws Exception { + // Cannot vectorize due to expressions in aggregators. + cannotVectorize(); + testQuery( "SELECT SUM(CAST(SUBSTRING(dim1, 1, 10) AS INTEGER)) FROM druid.foo", ImmutableList.of( @@ -3522,6 +3585,9 @@ public void testSumOfExtractionFn() throws Exception @Test public void testTimeseriesWithTimeFilterOnLongColumnUsingMillisToTimestamp() throws Exception { + // Cannot vectorize due to virtual columns. + cannotVectorize(); + testQuery( "SELECT\n" + " FLOOR(MILLIS_TO_TIMESTAMP(cnt) TO YEAR),\n" @@ -3767,6 +3833,9 @@ public void testSelectDistinctWithSortAsOuterQuery4() throws Exception @Test public void testCountDistinct() throws Exception { + // Cannot vectorize due to "cardinality" aggregator. + cannotVectorize(); + testQuery( "SELECT SUM(cnt), COUNT(distinct dim2), COUNT(distinct unique_dim1) FROM druid.foo", ImmutableList.of( @@ -3799,6 +3868,9 @@ public void testCountDistinct() throws Exception @Test public void testCountDistinctOfCaseWhen() throws Exception { + // Cannot vectorize due to "cardinality" aggregator. + cannotVectorize(); + testQuery( "SELECT\n" + "COUNT(DISTINCT CASE WHEN m1 >= 4 THEN m1 END),\n" @@ -3891,6 +3963,9 @@ public void testApproxCountDistinctWhenHllDisabled() throws Exception { // When HLL is disabled, APPROX_COUNT_DISTINCT is still approximate. + // Cannot vectorize due to "cardinality" aggregator. + cannotVectorize(); + testQuery( PLANNER_CONFIG_NO_HLL, "SELECT APPROX_COUNT_DISTINCT(dim2) FROM druid.foo", @@ -3977,6 +4052,9 @@ public void testExactCountDistinctWithGroupingAndOtherAggregators() throws Excep @Test public void testApproxCountDistinct() throws Exception { + // Cannot vectorize due to virtual columns. + cannotVectorize(); + testQuery( "SELECT\n" + " SUM(cnt),\n" @@ -4054,6 +4132,9 @@ public void testApproxCountDistinct() throws Exception @Test public void testNestedGroupBy() throws Exception { + // Cannot vectorize due to virtual columns. + cannotVectorize(); + testQuery( "SELECT\n" + " FLOOR(__time to hour) AS __time,\n" @@ -4172,10 +4253,13 @@ public void testDoubleNestedGroupBy() throws Exception @Test public void testExplainDoubleNestedGroupBy() throws Exception { + // Skip vectorization since otherwise the "context" will change for each subtest. + skipVectorize(); + final String explanation = - "DruidOuterQueryRel(query=[{\"queryType\":\"timeseries\",\"dataSource\":{\"type\":\"table\",\"name\":\"__subquery__\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"descending\":false,\"virtualColumns\":[],\"filter\":null,\"granularity\":{\"type\":\"all\"},\"aggregations\":[{\"type\":\"longSum\",\"name\":\"a0\",\"fieldName\":\"cnt\",\"expression\":null},{\"type\":\"count\",\"name\":\"a1\"}],\"postAggregations\":[],\"limit\":2147483647,\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"skipEmptyBuckets\":true,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\"}}], signature=[{a0:LONG, a1:LONG}])\n" - + " DruidOuterQueryRel(query=[{\"queryType\":\"groupBy\",\"dataSource\":{\"type\":\"table\",\"name\":\"__subquery__\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"virtualColumns\":[],\"filter\":null,\"granularity\":{\"type\":\"all\"},\"dimensions\":[{\"type\":\"default\",\"dimension\":\"dim2\",\"outputName\":\"d0\",\"outputType\":\"STRING\"}],\"aggregations\":[{\"type\":\"longSum\",\"name\":\"a0\",\"fieldName\":\"cnt\",\"expression\":null}],\"postAggregations\":[],\"having\":null,\"limitSpec\":{\"type\":\"NoopLimitSpec\"},\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\"},\"descending\":false}], signature=[{d0:STRING, a0:LONG}])\n" - + " DruidQueryRel(query=[{\"queryType\":\"groupBy\",\"dataSource\":{\"type\":\"table\",\"name\":\"foo\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"virtualColumns\":[],\"filter\":null,\"granularity\":{\"type\":\"all\"},\"dimensions\":[{\"type\":\"default\",\"dimension\":\"dim1\",\"outputName\":\"d0\",\"outputType\":\"STRING\"},{\"type\":\"default\",\"dimension\":\"dim2\",\"outputName\":\"d1\",\"outputType\":\"STRING\"}],\"aggregations\":[{\"type\":\"count\",\"name\":\"a0\"}],\"postAggregations\":[],\"having\":null,\"limitSpec\":{\"type\":\"NoopLimitSpec\"},\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\"},\"descending\":false}], signature=[{d0:STRING, d1:STRING, a0:LONG}])\n"; + "DruidOuterQueryRel(query=[{\"queryType\":\"timeseries\",\"dataSource\":{\"type\":\"table\",\"name\":\"__subquery__\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"descending\":false,\"virtualColumns\":[],\"filter\":null,\"granularity\":{\"type\":\"all\"},\"aggregations\":[{\"type\":\"longSum\",\"name\":\"a0\",\"fieldName\":\"cnt\",\"expression\":null},{\"type\":\"count\",\"name\":\"a1\"}],\"postAggregations\":[],\"limit\":2147483647,\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"skipEmptyBuckets\":true,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\",\"vectorize\":\"false\"}}], signature=[{a0:LONG, a1:LONG}])\n" + + " DruidOuterQueryRel(query=[{\"queryType\":\"groupBy\",\"dataSource\":{\"type\":\"table\",\"name\":\"__subquery__\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"virtualColumns\":[],\"filter\":null,\"granularity\":{\"type\":\"all\"},\"dimensions\":[{\"type\":\"default\",\"dimension\":\"dim2\",\"outputName\":\"d0\",\"outputType\":\"STRING\"}],\"aggregations\":[{\"type\":\"longSum\",\"name\":\"a0\",\"fieldName\":\"cnt\",\"expression\":null}],\"postAggregations\":[],\"having\":null,\"limitSpec\":{\"type\":\"NoopLimitSpec\"},\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\",\"vectorize\":\"false\"},\"descending\":false}], signature=[{d0:STRING, a0:LONG}])\n" + + " DruidQueryRel(query=[{\"queryType\":\"groupBy\",\"dataSource\":{\"type\":\"table\",\"name\":\"foo\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"virtualColumns\":[],\"filter\":null,\"granularity\":{\"type\":\"all\"},\"dimensions\":[{\"type\":\"default\",\"dimension\":\"dim1\",\"outputName\":\"d0\",\"outputType\":\"STRING\"},{\"type\":\"default\",\"dimension\":\"dim2\",\"outputName\":\"d1\",\"outputType\":\"STRING\"}],\"aggregations\":[{\"type\":\"count\",\"name\":\"a0\"}],\"postAggregations\":[],\"having\":null,\"limitSpec\":{\"type\":\"NoopLimitSpec\"},\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\",\"vectorize\":\"false\"},\"descending\":false}], signature=[{d0:STRING, d1:STRING, a0:LONG}])\n"; testQuery( "EXPLAIN PLAN FOR SELECT SUM(cnt), COUNT(*) FROM (\n" @@ -4242,6 +4326,9 @@ public void testExactCountDistinctUsingSubquery() throws Exception @Test public void testMinMaxAvgDailyCountWithLimit() throws Exception { + // Cannot vectorize due to virtual columns. + cannotVectorize(); + testQuery( "SELECT * FROM (" + " SELECT max(cnt), min(cnt), avg(cnt), TIME_EXTRACT(max(t), 'EPOCH') last_time, count(1) num_days FROM (\n" @@ -4305,6 +4392,9 @@ public void testMinMaxAvgDailyCountWithLimit() throws Exception @Test public void testAvgDailyCountDistinct() throws Exception { + // Cannot vectorize due to virtual columns. + cannotVectorize(); + testQuery( "SELECT\n" + " AVG(u)\n" @@ -4556,6 +4646,9 @@ public void testRemovableLeftJoin() throws Exception @Test public void testExactCountDistinctOfSemiJoinResult() throws Exception { + // Cannot vectorize due to extraction dimension spec. + cannotVectorize(); + testQuery( "SELECT COUNT(*)\n" + "FROM (\n" @@ -4630,10 +4723,13 @@ public void testMaxSemiJoinRowsInMemory() throws Exception @Test public void testExplainExactCountDistinctOfSemiJoinResult() throws Exception { + // Skip vectorization since otherwise the "context" will change for each subtest. + skipVectorize(); + final String explanation = - "DruidOuterQueryRel(query=[{\"queryType\":\"timeseries\",\"dataSource\":{\"type\":\"table\",\"name\":\"__subquery__\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"descending\":false,\"virtualColumns\":[],\"filter\":null,\"granularity\":{\"type\":\"all\"},\"aggregations\":[{\"type\":\"count\",\"name\":\"a0\"}],\"postAggregations\":[],\"limit\":2147483647,\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"skipEmptyBuckets\":true,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\"}}], signature=[{a0:LONG}])\n" - + " DruidSemiJoin(query=[{\"queryType\":\"groupBy\",\"dataSource\":{\"type\":\"table\",\"name\":\"foo\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"virtualColumns\":[],\"filter\":null,\"granularity\":{\"type\":\"all\"},\"dimensions\":[{\"type\":\"default\",\"dimension\":\"dim2\",\"outputName\":\"d0\",\"outputType\":\"STRING\"}],\"aggregations\":[],\"postAggregations\":[],\"having\":null,\"limitSpec\":{\"type\":\"NoopLimitSpec\"},\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\"},\"descending\":false}], leftExpressions=[[SUBSTRING($3, 1, 1)]], rightKeys=[[0]])\n" - + " DruidQueryRel(query=[{\"queryType\":\"groupBy\",\"dataSource\":{\"type\":\"table\",\"name\":\"foo\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"virtualColumns\":[],\"filter\":{\"type\":\"not\",\"field\":{\"type\":\"selector\",\"dimension\":\"dim1\",\"value\":null,\"extractionFn\":null}},\"granularity\":{\"type\":\"all\"},\"dimensions\":[{\"type\":\"extraction\",\"dimension\":\"dim1\",\"outputName\":\"d0\",\"outputType\":\"STRING\",\"extractionFn\":{\"type\":\"substring\",\"index\":0,\"length\":1}}],\"aggregations\":[],\"postAggregations\":[],\"having\":null,\"limitSpec\":{\"type\":\"NoopLimitSpec\"},\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\"},\"descending\":false}], signature=[{d0:STRING}])\n"; + "DruidOuterQueryRel(query=[{\"queryType\":\"timeseries\",\"dataSource\":{\"type\":\"table\",\"name\":\"__subquery__\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"descending\":false,\"virtualColumns\":[],\"filter\":null,\"granularity\":{\"type\":\"all\"},\"aggregations\":[{\"type\":\"count\",\"name\":\"a0\"}],\"postAggregations\":[],\"limit\":2147483647,\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"skipEmptyBuckets\":true,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\",\"vectorize\":\"false\"}}], signature=[{a0:LONG}])\n" + + " DruidSemiJoin(query=[{\"queryType\":\"groupBy\",\"dataSource\":{\"type\":\"table\",\"name\":\"foo\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"virtualColumns\":[],\"filter\":null,\"granularity\":{\"type\":\"all\"},\"dimensions\":[{\"type\":\"default\",\"dimension\":\"dim2\",\"outputName\":\"d0\",\"outputType\":\"STRING\"}],\"aggregations\":[],\"postAggregations\":[],\"having\":null,\"limitSpec\":{\"type\":\"NoopLimitSpec\"},\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\",\"vectorize\":\"false\"},\"descending\":false}], leftExpressions=[[SUBSTRING($3, 1, 1)]], rightKeys=[[0]])\n" + + " DruidQueryRel(query=[{\"queryType\":\"groupBy\",\"dataSource\":{\"type\":\"table\",\"name\":\"foo\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"virtualColumns\":[],\"filter\":{\"type\":\"not\",\"field\":{\"type\":\"selector\",\"dimension\":\"dim1\",\"value\":null,\"extractionFn\":null}},\"granularity\":{\"type\":\"all\"},\"dimensions\":[{\"type\":\"extraction\",\"dimension\":\"dim1\",\"outputName\":\"d0\",\"outputType\":\"STRING\",\"extractionFn\":{\"type\":\"substring\",\"index\":0,\"length\":1}}],\"aggregations\":[],\"postAggregations\":[],\"having\":null,\"limitSpec\":{\"type\":\"NoopLimitSpec\"},\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\",\"vectorize\":\"false\"},\"descending\":false}], signature=[{d0:STRING}])\n"; testQuery( "EXPLAIN PLAN FOR SELECT COUNT(*)\n" @@ -4926,6 +5022,9 @@ public void testHistogramUsingSubqueryWithSort() throws Exception @Test public void testCountDistinctArithmetic() throws Exception { + // Cannot vectorize due to "cardinality" aggregator. + cannotVectorize(); + testQuery( "SELECT\n" + " SUM(cnt),\n" @@ -4970,6 +5069,9 @@ public void testCountDistinctArithmetic() throws Exception @Test public void testCountDistinctOfSubstring() throws Exception { + // Cannot vectorize due to "cardinality" aggregator. + cannotVectorize(); + testQuery( "SELECT COUNT(DISTINCT SUBSTRING(dim1, 1, 1)) FROM druid.foo WHERE dim1 <> ''", ImmutableList.of( @@ -5009,6 +5111,9 @@ public void testCountDistinctOfTrim() throws Exception { // Test a couple different syntax variants of TRIM. + // Cannot vectorize due to virtual columns. + cannotVectorize(); + testQuery( "SELECT COUNT(DISTINCT TRIM(BOTH ' ' FROM dim1)) FROM druid.foo WHERE TRIM(dim1) <> ''", ImmutableList.of( @@ -5044,6 +5149,9 @@ public void testSillyQuarters() throws Exception { // Like FLOOR(__time TO QUARTER) but silly. + // Cannot vectorize due to virtual columns. + cannotVectorize(); + testQuery( "SELECT CAST((EXTRACT(MONTH FROM __time) - 1 ) / 3 + 1 AS INTEGER) AS quarter, COUNT(*)\n" + "FROM foo\n" @@ -5072,6 +5180,9 @@ public void testSillyQuarters() throws Exception @Test public void testRegexpExtract() throws Exception { + // Cannot vectorize due to extractionFn in dimension spec. + cannotVectorize(); + String nullValue = NullHandling.replaceWithDefault() ? "" : null; testQuery( "SELECT DISTINCT\n" @@ -5418,6 +5529,9 @@ public void testFilterOnTimeFloorComparisonMisaligned() throws Exception @Test public void testFilterOnTimeExtract() throws Exception { + // Cannot vectorize due to expression filter. + cannotVectorize(); + testQuery( "SELECT COUNT(*) FROM druid.foo\n" + "WHERE EXTRACT(YEAR FROM __time) = 2000\n" @@ -5446,6 +5560,9 @@ public void testFilterOnTimeExtract() throws Exception @Test public void testFilterOnTimeExtractWithMultipleDays() throws Exception { + // Cannot vectorize due to expression filters. + cannotVectorize(); + testQuery( "SELECT COUNT(*) FROM druid.foo\n" + "WHERE EXTRACT(YEAR FROM __time) = 2000\n" @@ -5497,6 +5614,9 @@ public void testFilterOnTimeFloorMisaligned() throws Exception @Test public void testGroupByFloor() throws Exception { + // Cannot vectorize due to virtual columns. + cannotVectorize(); + testQuery( PLANNER_CONFIG_NO_SUBQUERIES, // Sanity check; this simple query should work with subqueries disabled. "SELECT floor(CAST(dim1 AS float)), COUNT(*) FROM druid.foo GROUP BY floor(CAST(dim1 AS float))", @@ -5526,6 +5646,9 @@ public void testGroupByFloor() throws Exception @Test public void testGroupByFloorWithOrderBy() throws Exception { + // Cannot vectorize due to virtual columns. + cannotVectorize(); + testQuery( "SELECT floor(CAST(dim1 AS float)) AS fl, COUNT(*) FROM druid.foo GROUP BY floor(CAST(dim1 AS float)) ORDER BY fl DESC", ImmutableList.of( @@ -5577,6 +5700,9 @@ public void testGroupByFloorWithOrderBy() throws Exception @Test public void testGroupByFloorTimeAndOneOtherDimensionWithOrderBy() throws Exception { + // Cannot vectorize due to virtual columns. + cannotVectorize(); + testQuery( "SELECT floor(__time TO year), dim2, COUNT(*)" + " FROM druid.foo" @@ -5652,6 +5778,9 @@ public void testGroupByFloorTimeAndOneOtherDimensionWithOrderBy() throws Excepti @Test public void testGroupByStringLength() throws Exception { + // Cannot vectorize due to virtual columns. + cannotVectorize(); + testQuery( "SELECT CHARACTER_LENGTH(dim1), COUNT(*) FROM druid.foo GROUP BY CHARACTER_LENGTH(dim1)", ImmutableList.of( @@ -5677,6 +5806,9 @@ public void testGroupByStringLength() throws Exception @Test public void testFilterAndGroupByLookup() throws Exception { + // Cannot vectorize due to extraction dimension specs. + cannotVectorize(); + String nullValue = NullHandling.replaceWithDefault() ? "" : null; final RegisteredLookupExtractionFn extractionFn = new RegisteredLookupExtractionFn( null, @@ -5731,6 +5863,9 @@ public void testFilterAndGroupByLookup() throws Exception @Test public void testCountDistinctOfLookup() throws Exception { + // Cannot vectorize due to "cardinality" aggregator. + cannotVectorize(); + final RegisteredLookupExtractionFn extractionFn = new RegisteredLookupExtractionFn( null, "lookyloo", @@ -5935,6 +6070,9 @@ public void testTimeseriesUsingTimeFloor() throws Exception @Test public void testTimeseriesUsingTimeFloorWithTimeShift() throws Exception { + // Cannot vectorize due to virtual columns. + cannotVectorize(); + testQuery( "SELECT SUM(cnt), gran FROM (\n" + " SELECT TIME_FLOOR(TIME_SHIFT(__time, 'P1D', -1), 'P1M') AS gran,\n" @@ -5983,6 +6121,9 @@ public void testTimeseriesUsingTimeFloorWithTimeShift() throws Exception @Test public void testTimeseriesUsingTimeFloorWithTimestampAdd() throws Exception { + // Cannot vectorize due to virtual columns. + cannotVectorize(); + testQuery( "SELECT SUM(cnt), gran FROM (\n" + " SELECT TIME_FLOOR(TIMESTAMPADD(DAY, -1, __time), 'P1M') AS gran,\n" @@ -6233,6 +6374,9 @@ public void testTimeseriesUsingFloorPlusCastAsDate() throws Exception @Test public void testTimeseriesDescending() throws Exception { + // Cannot vectorize due to descending order. + cannotVectorize(); + testQuery( "SELECT gran, SUM(cnt) FROM (\n" + " SELECT floor(__time TO month) AS gran,\n" @@ -6260,6 +6404,9 @@ public void testTimeseriesDescending() throws Exception @Test public void testGroupByExtractYear() throws Exception { + // Cannot vectorize due to virtual columns. + cannotVectorize(); + testQuery( "SELECT\n" + " EXTRACT(YEAR FROM __time) AS \"year\",\n" @@ -6306,6 +6453,9 @@ public void testGroupByExtractYear() throws Exception @Test public void testGroupByFormatYearAndMonth() throws Exception { + // Cannot vectorize due to virtual columns. + cannotVectorize(); + testQuery( "SELECT\n" + " TIME_FORMAT(__time, 'yyyy MM') AS \"year\",\n" @@ -6352,6 +6502,9 @@ public void testGroupByFormatYearAndMonth() throws Exception @Test public void testGroupByExtractFloorTime() throws Exception { + // Cannot vectorize due to virtual columns. + cannotVectorize(); + testQuery( "SELECT\n" + "EXTRACT(YEAR FROM FLOOR(__time TO YEAR)) AS \"year\", SUM(cnt)\n" @@ -6384,6 +6537,9 @@ public void testGroupByExtractFloorTime() throws Exception @Test public void testGroupByExtractFloorTimeLosAngeles() throws Exception { + // Cannot vectorize due to virtual columns. + cannotVectorize(); + testQuery( PLANNER_CONFIG_DEFAULT, QUERY_CONTEXT_LOS_ANGELES, @@ -6505,6 +6661,9 @@ public void testTimeseriesWithOrderByAndLimit() throws Exception @Test public void testGroupByTimeAndOtherDimension() throws Exception { + // Cannot vectorize due to virtual columns. + cannotVectorize(); + testQuery( "SELECT dim2, gran, SUM(cnt)\n" + "FROM (SELECT FLOOR(__time TO MONTH) AS gran, dim2, cnt FROM druid.foo) AS x\n" @@ -6621,15 +6780,18 @@ public void testUsingSubqueryAsPartOfOrFilter() throws Exception // This query should ideally be plannable without fallback, but it's not. The "OR" means it isn't really // a semiJoin and so the filter condition doesn't get converted. + // Skip vectorization since otherwise the "context" will change for each subtest. + skipVectorize(); + final String explanation = "BindableSort(sort0=[$1], dir0=[ASC])\n" + " BindableAggregate(group=[{0, 1}], EXPR$2=[COUNT()])\n" + " BindableFilter(condition=[OR(=($0, 'xxx'), CAST(AND(IS NOT NULL($4), <>($2, 0), IS NOT NULL($1))):BOOLEAN)])\n" + " BindableJoin(condition=[=($1, $3)], joinType=[left])\n" + " BindableJoin(condition=[true], joinType=[inner])\n" - + " DruidQueryRel(query=[{\"queryType\":\"scan\",\"dataSource\":{\"type\":\"table\",\"name\":\"foo\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"virtualColumns\":[],\"resultFormat\":\"compactedList\",\"batchSize\":20480,\"limit\":9223372036854775807,\"filter\":null,\"columns\":[\"dim1\",\"dim2\"],\"legacy\":false,\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\"},\"descending\":false,\"granularity\":{\"type\":\"all\"}}], signature=[{dim1:STRING, dim2:STRING}])\n" - + " DruidQueryRel(query=[{\"queryType\":\"timeseries\",\"dataSource\":{\"type\":\"table\",\"name\":\"foo\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"descending\":false,\"virtualColumns\":[],\"filter\":{\"type\":\"like\",\"dimension\":\"dim1\",\"pattern\":\"%bc\",\"escape\":null,\"extractionFn\":null},\"granularity\":{\"type\":\"all\"},\"aggregations\":[{\"type\":\"count\",\"name\":\"a0\"}],\"postAggregations\":[],\"limit\":2147483647,\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"skipEmptyBuckets\":true,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\"}}], signature=[{a0:LONG}])\n" - + " DruidQueryRel(query=[{\"queryType\":\"groupBy\",\"dataSource\":{\"type\":\"table\",\"name\":\"foo\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"virtualColumns\":[{\"type\":\"expression\",\"name\":\"d1:v\",\"expression\":\"1\",\"outputType\":\"LONG\"}],\"filter\":{\"type\":\"like\",\"dimension\":\"dim1\",\"pattern\":\"%bc\",\"escape\":null,\"extractionFn\":null},\"granularity\":{\"type\":\"all\"},\"dimensions\":[{\"type\":\"default\",\"dimension\":\"dim1\",\"outputName\":\"d0\",\"outputType\":\"STRING\"},{\"type\":\"default\",\"dimension\":\"d1:v\",\"outputName\":\"d1\",\"outputType\":\"LONG\"}],\"aggregations\":[],\"postAggregations\":[],\"having\":null,\"limitSpec\":{\"type\":\"NoopLimitSpec\"},\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\"},\"descending\":false}], signature=[{d0:STRING, d1:LONG}])\n"; + + " DruidQueryRel(query=[{\"queryType\":\"scan\",\"dataSource\":{\"type\":\"table\",\"name\":\"foo\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"virtualColumns\":[],\"resultFormat\":\"compactedList\",\"batchSize\":20480,\"limit\":9223372036854775807,\"filter\":null,\"columns\":[\"dim1\",\"dim2\"],\"legacy\":false,\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\",\"vectorize\":\"false\"},\"descending\":false,\"granularity\":{\"type\":\"all\"}}], signature=[{dim1:STRING, dim2:STRING}])\n" + + " DruidQueryRel(query=[{\"queryType\":\"timeseries\",\"dataSource\":{\"type\":\"table\",\"name\":\"foo\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"descending\":false,\"virtualColumns\":[],\"filter\":{\"type\":\"like\",\"dimension\":\"dim1\",\"pattern\":\"%bc\",\"escape\":null,\"extractionFn\":null},\"granularity\":{\"type\":\"all\"},\"aggregations\":[{\"type\":\"count\",\"name\":\"a0\"}],\"postAggregations\":[],\"limit\":2147483647,\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"skipEmptyBuckets\":true,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\",\"vectorize\":\"false\"}}], signature=[{a0:LONG}])\n" + + " DruidQueryRel(query=[{\"queryType\":\"groupBy\",\"dataSource\":{\"type\":\"table\",\"name\":\"foo\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"virtualColumns\":[{\"type\":\"expression\",\"name\":\"d1:v\",\"expression\":\"1\",\"outputType\":\"LONG\"}],\"filter\":{\"type\":\"like\",\"dimension\":\"dim1\",\"pattern\":\"%bc\",\"escape\":null,\"extractionFn\":null},\"granularity\":{\"type\":\"all\"},\"dimensions\":[{\"type\":\"default\",\"dimension\":\"dim1\",\"outputName\":\"d0\",\"outputType\":\"STRING\"},{\"type\":\"default\",\"dimension\":\"d1:v\",\"outputName\":\"d1\",\"outputType\":\"LONG\"}],\"aggregations\":[],\"postAggregations\":[],\"having\":null,\"limitSpec\":{\"type\":\"NoopLimitSpec\"},\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\",\"vectorize\":\"false\"},\"descending\":false}], signature=[{d0:STRING, d1:LONG}])\n"; final String theQuery = "SELECT dim1, dim2, COUNT(*) FROM druid.foo\n" + "WHERE dim1 = 'xxx' OR dim2 IN (SELECT dim1 FROM druid.foo WHERE dim1 LIKE '%bc')\n" @@ -6811,6 +6973,9 @@ public void testSemiJoinWithOuterTimeExtract() throws Exception @Test public void testUsingSubqueryWithExtractionFns() throws Exception { + // Cannot vectorize due to extraction dimension specs. + cannotVectorize(); + testQuery( "SELECT dim2, COUNT(*) FROM druid.foo " + "WHERE substring(dim2, 1, 1) IN (SELECT substring(dim1, 1, 1) FROM druid.foo WHERE dim1 <> '')" @@ -7077,6 +7242,9 @@ public void testSortProjectAfterNestedGroupBy() throws Exception @Test public void testPostAggWithTimeseries() throws Exception { + // Cannot vectorize due to descending order. + cannotVectorize(); + testQuery( "SELECT " + " FLOOR(__time TO YEAR), " @@ -7346,6 +7514,9 @@ public void testRequireTimeConditionPositive() throws Exception ) ); + // Cannot vectorize next test due to "cardinality" aggregator. + cannotVectorize(); + // semi-join requires time condition on both left and right query testQuery( PLANNER_CONFIG_REQUIRE_TIME_CONDITION, From f872d70fa728dfe093ba02bda4deec5545ef8502 Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Wed, 2 Jan 2019 09:38:26 -0800 Subject: [PATCH 03/20] WIP --- .../src/main/java/org/apache/druid/query/QueryContexts.java | 1 + 1 file changed, 1 insertion(+) diff --git a/processing/src/main/java/org/apache/druid/query/QueryContexts.java b/processing/src/main/java/org/apache/druid/query/QueryContexts.java index c4cd74b7241b..d5003e26cca3 100644 --- a/processing/src/main/java/org/apache/druid/query/QueryContexts.java +++ b/processing/src/main/java/org/apache/druid/query/QueryContexts.java @@ -52,6 +52,7 @@ public class QueryContexts public static final long DEFAULT_TIMEOUT_MILLIS = TimeUnit.MINUTES.toMillis(5); public static final long NO_TIMEOUT = 0; + @SuppressWarnings("unused") // Used by Jackson serialization public enum Vectorize { FALSE { From 8ebe2683e1f38192a21f371b74541c98852e46d8 Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Wed, 2 Jan 2019 11:06:53 -0800 Subject: [PATCH 04/20] Adjustments for unused things. --- .../NullableAggregatorFactory.java | 11 ++--- .../epinephelinae/GroupByQueryEngineV2.java | 2 +- .../vector/VectorGroupByEngine.java | 7 ++-- .../timeseries/TimeseriesQueryEngine.java | 2 +- .../apache/druid/segment/CursorFactory.java | 1 - .../segment/QueryableIndexStorageAdapter.java | 3 +- .../vector/DimensionVectorSelector.java | 42 ------------------- .../druid/segment/vector/VectorCursor.java | 1 + 8 files changed, 13 insertions(+), 56 deletions(-) delete mode 100644 processing/src/main/java/org/apache/druid/segment/vector/DimensionVectorSelector.java diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/NullableAggregatorFactory.java b/processing/src/main/java/org/apache/druid/query/aggregation/NullableAggregatorFactory.java index ad8c0a5c043f..e88eac3428e5 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/NullableAggregatorFactory.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/NullableAggregatorFactory.java @@ -107,7 +107,7 @@ protected VectorValueSelector vectorSelector(VectorColumnSelectorFactory columnS /** * Creates an {@link BufferAggregator} to aggregate values from several rows into a ByteBuffer. * - * @param columnSelectorFactory columnSelectorFactory + * @param columnSelectorFactory columnSelectorFactory in case any other columns are needed. * @param selector {@link ColumnValueSelector} for the column to aggregate. * * @see BufferAggregator @@ -118,15 +118,16 @@ protected abstract BufferAggregator factorizeBuffered( ); /** - * Creates an {@link BufferAggregator} to aggregate values from several rows into a ByteBuffer. + * Creates a {@link VectorAggregator} to aggregate values from several rows into a ByteBuffer. * - * @param columnSelectorFactory columnSelectorFactory - * @param selector {@link ColumnValueSelector} for the column to aggregate. + * @param columnSelectorFactory columnSelectorFactory in case any other columns are needed. + * @param selector {@link VectorValueSelector} for the column to aggregate. * * @see BufferAggregator */ protected VectorAggregator factorizeVector( - VectorColumnSelectorFactory columnSelectorFactory, + // Not used by current aggregators, but here for parity with "factorizeBuffered". + @SuppressWarnings("unused") VectorColumnSelectorFactory columnSelectorFactory, VectorValueSelector selector ) { diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java index 17098fcdce9e..e6ef6483335b 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java @@ -125,7 +125,7 @@ public static Sequence process( final Interval interval = Iterables.getOnlyElement(query.getIntervals()); final boolean doVectorize = QueryContexts.getVectorize(query).shouldVectorize( - VectorGroupByEngine.canVectorize(query, storageAdapter, filter, interval) + VectorGroupByEngine.canVectorize(query, storageAdapter, filter) ); final Sequence result; diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/VectorGroupByEngine.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/VectorGroupByEngine.java index 98d9a465358b..a65ae0099510 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/VectorGroupByEngine.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/VectorGroupByEngine.java @@ -71,8 +71,7 @@ private VectorGroupByEngine() public static boolean canVectorize( final GroupByQuery query, final StorageAdapter adapter, - @Nullable final Filter filter, - final Interval interval + @Nullable final Filter filter ) { // Not yet supported: @@ -81,7 +80,7 @@ public static boolean canVectorize( return GroupByQueryEngineV2.isAllSingleValueDims(adapter, query.getDimensions()) && query.getDimensions().stream().allMatch(DimensionSpec::canVectorize) && query.getAggregatorSpecs().stream().allMatch(AggregatorFactory::canVectorize) - && adapter.canVectorize(filter, interval, query.getVirtualColumns(), false); + && adapter.canVectorize(filter, query.getVirtualColumns(), false); } public static Sequence process( @@ -94,7 +93,7 @@ public static Sequence process( final GroupByQueryConfig config ) { - if (!canVectorize(query, storageAdapter, filter, interval)) { + if (!canVectorize(query, storageAdapter, filter)) { throw new ISE("Cannot vectorize"); } diff --git a/processing/src/main/java/org/apache/druid/query/timeseries/TimeseriesQueryEngine.java b/processing/src/main/java/org/apache/druid/query/timeseries/TimeseriesQueryEngine.java index d24822acc631..612452261200 100644 --- a/processing/src/main/java/org/apache/druid/query/timeseries/TimeseriesQueryEngine.java +++ b/processing/src/main/java/org/apache/druid/query/timeseries/TimeseriesQueryEngine.java @@ -85,7 +85,7 @@ public Sequence> process(final TimeseriesQuery que final boolean descending = query.isDescending(); final boolean doVectorize = QueryContexts.getVectorize(query).shouldVectorize( - adapter.canVectorize(filter, interval, query.getVirtualColumns(), descending) + adapter.canVectorize(filter, query.getVirtualColumns(), descending) && query.getAggregatorSpecs().stream().allMatch(AggregatorFactory::canVectorize) ); diff --git a/processing/src/main/java/org/apache/druid/segment/CursorFactory.java b/processing/src/main/java/org/apache/druid/segment/CursorFactory.java index 6a5d7cf7400d..809e7b85b063 100644 --- a/processing/src/main/java/org/apache/druid/segment/CursorFactory.java +++ b/processing/src/main/java/org/apache/druid/segment/CursorFactory.java @@ -43,7 +43,6 @@ public interface CursorFactory */ default boolean canVectorize( @Nullable Filter filter, - Interval interval, VirtualColumns virtualColumns, boolean descending ) diff --git a/processing/src/main/java/org/apache/druid/segment/QueryableIndexStorageAdapter.java b/processing/src/main/java/org/apache/druid/segment/QueryableIndexStorageAdapter.java index 6faacca88865..d055b4bb2f64 100644 --- a/processing/src/main/java/org/apache/druid/segment/QueryableIndexStorageAdapter.java +++ b/processing/src/main/java/org/apache/druid/segment/QueryableIndexStorageAdapter.java @@ -206,7 +206,6 @@ public DateTime getMaxIngestedEventTime() @Override public boolean canVectorize( @Nullable final Filter filter, - final Interval interval, final VirtualColumns virtualColumns, final boolean descending ) @@ -237,7 +236,7 @@ public VectorCursor makeVectorCursor( @Nullable final QueryMetrics queryMetrics ) { - if (!canVectorize(filter, interval, virtualColumns, descending)) { + if (!canVectorize(filter, virtualColumns, descending)) { throw new ISE("Cannot vectorize. Check 'canVectorize' before calling 'makeVectorCursor'."); } diff --git a/processing/src/main/java/org/apache/druid/segment/vector/DimensionVectorSelector.java b/processing/src/main/java/org/apache/druid/segment/vector/DimensionVectorSelector.java deleted file mode 100644 index 72384aec195a..000000000000 --- a/processing/src/main/java/org/apache/druid/segment/vector/DimensionVectorSelector.java +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.segment.vector; - -import org.apache.druid.query.monomorphicprocessing.CalledFromHotLoop; -import org.apache.druid.segment.IdLookup; - -import javax.annotation.Nullable; - -/** - * Common interf - */ -public interface DimensionVectorSelector extends VectorSizeInspector -{ - int getValueCardinality(); - - @CalledFromHotLoop - @Nullable - String lookupName(int id); - - boolean nameLookupPossibleInAdvance(); - - @Nullable - IdLookup idLookup(); -} diff --git a/processing/src/main/java/org/apache/druid/segment/vector/VectorCursor.java b/processing/src/main/java/org/apache/druid/segment/vector/VectorCursor.java index a8f5637b4f55..fde58855a53e 100644 --- a/processing/src/main/java/org/apache/druid/segment/vector/VectorCursor.java +++ b/processing/src/main/java/org/apache/druid/segment/vector/VectorCursor.java @@ -65,6 +65,7 @@ public interface VectorCursor extends VectorSizeInspector, Closeable /** * Resets the cursor back to its original state. Useful for query engines that want to make multiple passes. */ + @SuppressWarnings("unused") /* Not currently used, but anticipated to be used by topN in the future. */ void reset(); /** From 76c573aa8a5dc52eb52aa31d4d7fa035cd3d13a9 Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Wed, 2 Jan 2019 12:51:21 -0800 Subject: [PATCH 05/20] Adjust javadocs. --- .../query/groupby/epinephelinae/VectorGrouper.java | 2 +- .../druid/segment/DimensionDictionarySelector.java | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/VectorGrouper.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/VectorGrouper.java index e802a637006d..1da43f8a99a6 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/VectorGrouper.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/VectorGrouper.java @@ -34,7 +34,7 @@ public interface VectorGrouper extends Closeable { /** - * Initialize the grouper. This method needs to be called before calling {@link #aggregateVector(int[], int)}. + * Initialize the grouper. This method needs to be called before calling {@link #aggregateVector}. */ void initVectorized(int maxVectorSize); diff --git a/processing/src/main/java/org/apache/druid/segment/DimensionDictionarySelector.java b/processing/src/main/java/org/apache/druid/segment/DimensionDictionarySelector.java index 03783aade0cf..402380096fbc 100644 --- a/processing/src/main/java/org/apache/druid/segment/DimensionDictionarySelector.java +++ b/processing/src/main/java/org/apache/druid/segment/DimensionDictionarySelector.java @@ -82,12 +82,12 @@ public interface DimensionDictionarySelector * Returns true if it is possible to {@link #lookupName(int)} by ids from 0 to {@link #getValueCardinality()} * before the rows with those ids are returned. * - *

Returns false if {@link #lookupName(int)} could be called with ids, returned from the most recent call of {@link - * #getRow()} on this DimensionSelector, but not earlier. If {@link #getValueCardinality()} of this DimensionSelector - * additionally returns {@link #CARDINALITY_UNKNOWN}, {@code lookupName()} couldn't be called with ids, returned by - * not the most recent call of {@link #getRow()}, i. e. names for ids couldn't be looked up "later". If {@link - * #getValueCardinality()} returns a non-negative number, {@code lookupName()} could be called with any ids, returned - * from {@code #getRow()} since the creation of this DimensionSelector. + *

Returns false if {@link #lookupName(int)} could be called with ids, returned from the most recent row (or row + * vector) returned by this DimensionSelector, but not earlier. If {@link #getValueCardinality()} of this + * DimensionSelector additionally returns {@link #CARDINALITY_UNKNOWN}, {@code lookupName()} couldn't be called with + * ids, returned by not the most recent row (or row vector), i. e. names for ids couldn't be looked up "later". If + * {@link #getValueCardinality()} returns a non-negative number, {@code lookupName()} could be called with any ids, + * returned from rows (or row vectors) returned since the creation of this DimensionSelector. * *

If {@link #lookupName(int)} is called with an ineligible id, result is undefined: exception could be thrown, or * null returned, or some other random value. From 27af43d2a3092187a38f3514a0f1f7bd68c0be76 Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Wed, 2 Jan 2019 18:20:42 -0800 Subject: [PATCH 06/20] DimensionDictionarySelector adjustments. --- .../org/apache/druid/query/groupby/GroupByQueryEngine.java | 3 ++- .../druid/query/groupby/RowBasedColumnSelectorFactory.java | 3 ++- .../query/topn/types/StringTopNColumnSelectorStrategy.java | 3 ++- .../apache/druid/segment/DimensionDictionarySelector.java | 6 ++++-- .../org/apache/druid/segment/DoubleDimensionIndexer.java | 2 +- .../org/apache/druid/segment/FloatDimensionIndexer.java | 2 +- .../java/org/apache/druid/segment/LongDimensionIndexer.java | 2 +- .../segment/incremental/IncrementalIndexStorageAdapter.java | 4 ++-- ...ngleStringInputCachingExpressionColumnValueSelector.java | 3 ++- .../segment/virtual/SingleStringInputDimensionSelector.java | 3 ++- .../apache/druid/segment/virtual/VirtualColumnsTest.java | 3 ++- 11 files changed, 21 insertions(+), 13 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/query/groupby/GroupByQueryEngine.java b/processing/src/main/java/org/apache/druid/query/groupby/GroupByQueryEngine.java index c8dcef3857c2..1383b202d357 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/GroupByQueryEngine.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/GroupByQueryEngine.java @@ -44,6 +44,7 @@ import org.apache.druid.query.dimension.DimensionSpec; import org.apache.druid.query.filter.Filter; import org.apache.druid.segment.Cursor; +import org.apache.druid.segment.DimensionDictionarySelector; import org.apache.druid.segment.DimensionSelector; import org.apache.druid.segment.StorageAdapter; import org.apache.druid.segment.column.ValueType; @@ -331,7 +332,7 @@ public RowIterator(GroupByQuery query, final Cursor cursor, ByteBuffer metricsBu } final DimensionSelector selector = cursor.getColumnSelectorFactory().makeDimensionSelector(dimSpec); - if (selector.getValueCardinality() == DimensionSelector.CARDINALITY_UNKNOWN) { + if (selector.getValueCardinality() == DimensionDictionarySelector.CARDINALITY_UNKNOWN) { throw new UnsupportedOperationException( "GroupBy v1 does not support dimension selectors with unknown cardinality."); } diff --git a/processing/src/main/java/org/apache/druid/query/groupby/RowBasedColumnSelectorFactory.java b/processing/src/main/java/org/apache/druid/query/groupby/RowBasedColumnSelectorFactory.java index 081b48191de1..34e5596b1e8c 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/RowBasedColumnSelectorFactory.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/RowBasedColumnSelectorFactory.java @@ -31,6 +31,7 @@ import org.apache.druid.segment.BaseSingleValueDimensionSelector; import org.apache.druid.segment.ColumnSelectorFactory; import org.apache.druid.segment.ColumnValueSelector; +import org.apache.druid.segment.DimensionDictionarySelector; import org.apache.druid.segment.DimensionHandlerUtils; import org.apache.druid.segment.DimensionSelector; import org.apache.druid.segment.IdLookup; @@ -242,7 +243,7 @@ public void inspectRuntimeShape(RuntimeShapeInspector inspector) @Override public int getValueCardinality() { - return DimensionSelector.CARDINALITY_UNKNOWN; + return DimensionDictionarySelector.CARDINALITY_UNKNOWN; } @Override diff --git a/processing/src/main/java/org/apache/druid/query/topn/types/StringTopNColumnSelectorStrategy.java b/processing/src/main/java/org/apache/druid/query/topn/types/StringTopNColumnSelectorStrategy.java index dd30c369fcb9..f5e838d42b60 100644 --- a/processing/src/main/java/org/apache/druid/query/topn/types/StringTopNColumnSelectorStrategy.java +++ b/processing/src/main/java/org/apache/druid/query/topn/types/StringTopNColumnSelectorStrategy.java @@ -25,6 +25,7 @@ import org.apache.druid.query.topn.TopNQuery; import org.apache.druid.query.topn.TopNResultBuilder; import org.apache.druid.segment.Cursor; +import org.apache.druid.segment.DimensionDictionarySelector; import org.apache.druid.segment.DimensionHandlerUtils; import org.apache.druid.segment.DimensionSelector; import org.apache.druid.segment.StorageAdapter; @@ -87,7 +88,7 @@ public long dimExtractionScanAndAggregate( Map, Aggregator[]> aggregatesStore ) { - if (selector.getValueCardinality() != DimensionSelector.CARDINALITY_UNKNOWN) { + if (selector.getValueCardinality() != DimensionDictionarySelector.CARDINALITY_UNKNOWN) { return dimExtractionScanAndAggregateWithCardinalityKnown(query, cursor, selector, rowSelector, aggregatesStore); } else { return dimExtractionScanAndAggregateWithCardinalityUnknown(query, cursor, selector, aggregatesStore); diff --git a/processing/src/main/java/org/apache/druid/segment/DimensionDictionarySelector.java b/processing/src/main/java/org/apache/druid/segment/DimensionDictionarySelector.java index 402380096fbc..a02c025b4b8d 100644 --- a/processing/src/main/java/org/apache/druid/segment/DimensionDictionarySelector.java +++ b/processing/src/main/java/org/apache/druid/segment/DimensionDictionarySelector.java @@ -24,7 +24,9 @@ import javax.annotation.Nullable; /** - * Interface containing + * Interface containing dictionary-related methods common to {@link DimensionSelector}, + * {@link org.apache.druid.segment.vector.SingleValueDimensionVectorSelector}, and + * {@link org.apache.druid.segment.vector.MultiValueDimensionVectorSelector}. */ public interface DimensionDictionarySelector { @@ -84,7 +86,7 @@ public interface DimensionDictionarySelector * *

Returns false if {@link #lookupName(int)} could be called with ids, returned from the most recent row (or row * vector) returned by this DimensionSelector, but not earlier. If {@link #getValueCardinality()} of this - * DimensionSelector additionally returns {@link #CARDINALITY_UNKNOWN}, {@code lookupName()} couldn't be called with + * selector additionally returns {@link #CARDINALITY_UNKNOWN}, {@code lookupName()} couldn't be called with * ids, returned by not the most recent row (or row vector), i. e. names for ids couldn't be looked up "later". If * {@link #getValueCardinality()} returns a non-negative number, {@code lookupName()} could be called with any ids, * returned from rows (or row vectors) returned since the creation of this DimensionSelector. diff --git a/processing/src/main/java/org/apache/druid/segment/DoubleDimensionIndexer.java b/processing/src/main/java/org/apache/druid/segment/DoubleDimensionIndexer.java index 0ddacfb781b0..55c0a2c2d110 100644 --- a/processing/src/main/java/org/apache/druid/segment/DoubleDimensionIndexer.java +++ b/processing/src/main/java/org/apache/druid/segment/DoubleDimensionIndexer.java @@ -80,7 +80,7 @@ public Double getMaxValue() @Override public int getCardinality() { - return DimensionSelector.CARDINALITY_UNKNOWN; + return DimensionDictionarySelector.CARDINALITY_UNKNOWN; } @Override diff --git a/processing/src/main/java/org/apache/druid/segment/FloatDimensionIndexer.java b/processing/src/main/java/org/apache/druid/segment/FloatDimensionIndexer.java index a17ebea883d4..d68856317345 100644 --- a/processing/src/main/java/org/apache/druid/segment/FloatDimensionIndexer.java +++ b/processing/src/main/java/org/apache/druid/segment/FloatDimensionIndexer.java @@ -81,7 +81,7 @@ public Float getMaxValue() @Override public int getCardinality() { - return DimensionSelector.CARDINALITY_UNKNOWN; + return DimensionDictionarySelector.CARDINALITY_UNKNOWN; } @Override diff --git a/processing/src/main/java/org/apache/druid/segment/LongDimensionIndexer.java b/processing/src/main/java/org/apache/druid/segment/LongDimensionIndexer.java index 6c69735046a3..c43945a9b772 100644 --- a/processing/src/main/java/org/apache/druid/segment/LongDimensionIndexer.java +++ b/processing/src/main/java/org/apache/druid/segment/LongDimensionIndexer.java @@ -81,7 +81,7 @@ public Long getMaxValue() @Override public int getCardinality() { - return DimensionSelector.CARDINALITY_UNKNOWN; + return DimensionDictionarySelector.CARDINALITY_UNKNOWN; } @Override diff --git a/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexStorageAdapter.java b/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexStorageAdapter.java index 3284ff74af25..26835cc2cb2a 100644 --- a/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexStorageAdapter.java +++ b/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexStorageAdapter.java @@ -32,8 +32,8 @@ import org.apache.druid.segment.Capabilities; import org.apache.druid.segment.ColumnSelectorFactory; import org.apache.druid.segment.Cursor; +import org.apache.druid.segment.DimensionDictionarySelector; import org.apache.druid.segment.DimensionIndexer; -import org.apache.druid.segment.DimensionSelector; import org.apache.druid.segment.Metadata; import org.apache.druid.segment.StorageAdapter; import org.apache.druid.segment.VirtualColumns; @@ -99,7 +99,7 @@ public int getDimensionCardinality(String dimension) DimensionIndexer indexer = desc.getIndexer(); int cardinality = indexer.getCardinality(); - return cardinality != DimensionSelector.CARDINALITY_UNKNOWN ? cardinality : Integer.MAX_VALUE; + return cardinality != DimensionDictionarySelector.CARDINALITY_UNKNOWN ? cardinality : Integer.MAX_VALUE; } @Override diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/SingleStringInputCachingExpressionColumnValueSelector.java b/processing/src/main/java/org/apache/druid/segment/virtual/SingleStringInputCachingExpressionColumnValueSelector.java index 87c5df19d1f6..39573ab7d6aa 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/SingleStringInputCachingExpressionColumnValueSelector.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/SingleStringInputCachingExpressionColumnValueSelector.java @@ -28,6 +28,7 @@ import org.apache.druid.math.expr.Parser; import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; import org.apache.druid.segment.ColumnValueSelector; +import org.apache.druid.segment.DimensionDictionarySelector; import org.apache.druid.segment.DimensionSelector; import org.apache.druid.segment.data.IndexedInts; @@ -65,7 +66,7 @@ public SingleStringInputCachingExpressionColumnValueSelector( final Supplier inputSupplier = ExpressionSelectors.supplierFromDimensionSelector(selector); this.bindings = name -> inputSupplier.get(); - if (selector.getValueCardinality() == DimensionSelector.CARDINALITY_UNKNOWN) { + if (selector.getValueCardinality() == DimensionDictionarySelector.CARDINALITY_UNKNOWN) { throw new ISE("Selector must have a dictionary"); } else if (selector.getValueCardinality() <= CACHE_SIZE) { arrayEvalCache = new ExprEval[selector.getValueCardinality()]; diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/SingleStringInputDimensionSelector.java b/processing/src/main/java/org/apache/druid/segment/virtual/SingleStringInputDimensionSelector.java index ce49901553b3..f189c087b502 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/SingleStringInputDimensionSelector.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/SingleStringInputDimensionSelector.java @@ -26,6 +26,7 @@ import org.apache.druid.math.expr.Parser; import org.apache.druid.query.filter.ValueMatcher; import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; +import org.apache.druid.segment.DimensionDictionarySelector; import org.apache.druid.segment.DimensionSelector; import org.apache.druid.segment.DimensionSelectorUtils; import org.apache.druid.segment.IdLookup; @@ -61,7 +62,7 @@ public SingleStringInputDimensionSelector( } // Verify selector has a working dictionary. - if (selector.getValueCardinality() == DimensionSelector.CARDINALITY_UNKNOWN + if (selector.getValueCardinality() == DimensionDictionarySelector.CARDINALITY_UNKNOWN || !selector.nameLookupPossibleInAdvance()) { throw new ISE("Selector of class[%s] does not have a dictionary, cannot use it.", selector.getClass().getName()); } diff --git a/processing/src/test/java/org/apache/druid/segment/virtual/VirtualColumnsTest.java b/processing/src/test/java/org/apache/druid/segment/virtual/VirtualColumnsTest.java index 3363a570110b..2b42c0350e72 100644 --- a/processing/src/test/java/org/apache/druid/segment/virtual/VirtualColumnsTest.java +++ b/processing/src/test/java/org/apache/druid/segment/virtual/VirtualColumnsTest.java @@ -36,6 +36,7 @@ import org.apache.druid.segment.BaseObjectColumnValueSelector; import org.apache.druid.segment.ColumnSelectorFactory; import org.apache.druid.segment.ColumnValueSelector; +import org.apache.druid.segment.DimensionDictionarySelector; import org.apache.druid.segment.DimensionSelector; import org.apache.druid.segment.DimensionSelectorUtils; import org.apache.druid.segment.IdLookup; @@ -319,7 +320,7 @@ public IndexedInts getRow() @Override public int getValueCardinality() { - return DimensionSelector.CARDINALITY_UNKNOWN; + return DimensionDictionarySelector.CARDINALITY_UNKNOWN; } @Override From 7137f8b95c5a45fc35b17e571399a21c8c92b1a6 Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Mon, 14 Jan 2019 14:47:37 -0800 Subject: [PATCH 07/20] Add "clone" to BatchIteratorAdapter. --- .../druid/collections/bitmap/BatchIteratorAdapter.java | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/processing/src/main/java/org/apache/druid/collections/bitmap/BatchIteratorAdapter.java b/processing/src/main/java/org/apache/druid/collections/bitmap/BatchIteratorAdapter.java index dd43263bda8e..225999f02c07 100644 --- a/processing/src/main/java/org/apache/druid/collections/bitmap/BatchIteratorAdapter.java +++ b/processing/src/main/java/org/apache/druid/collections/bitmap/BatchIteratorAdapter.java @@ -23,7 +23,7 @@ import org.roaringbitmap.BatchIterator; import org.roaringbitmap.IntIterator; -public class BatchIteratorAdapter implements BatchIterator +public final class BatchIteratorAdapter implements BatchIterator { private final IntIterator iterator; @@ -48,4 +48,11 @@ public boolean hasNext() { return iterator.hasNext(); } + + @Override + public BatchIterator clone() + { + // It's okay to make a "new BatchIteratorAdapter" instead of calling super.clone(), since this class is final. + return new BatchIteratorAdapter(iterator.clone()); + } } From 5a7f6c2d09b6fb740b0ebb018991d0b8deaf2e4e Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Mon, 18 Feb 2019 12:50:11 -0800 Subject: [PATCH 08/20] ValueMatcher javadocs. --- .../java/org/apache/druid/query/filter/ValueMatcher.java | 5 +++++ .../apache/druid/query/filter/vector/VectorValueMatcher.java | 5 ++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/processing/src/main/java/org/apache/druid/query/filter/ValueMatcher.java b/processing/src/main/java/org/apache/druid/query/filter/ValueMatcher.java index be36c4d760b1..a800ec7e39e7 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/ValueMatcher.java +++ b/processing/src/main/java/org/apache/druid/query/filter/ValueMatcher.java @@ -25,6 +25,11 @@ import org.apache.druid.segment.BaseNullableColumnValueSelector; /** + * An object that returns a boolean indicating if the "current" row should be selected or not. The most prominent use + * of this interface is that it is returned by the {@link Filter} "makeMatcher" method, where it is used to identify + * selected rows for filtered cursors and filtered aggregators. + * + * @see org.apache.druid.query.filter.vector.VectorValueMatcher, the vectorized version */ public interface ValueMatcher extends HotLoopCallee { diff --git a/processing/src/main/java/org/apache/druid/query/filter/vector/VectorValueMatcher.java b/processing/src/main/java/org/apache/druid/query/filter/vector/VectorValueMatcher.java index f274ca3dfae4..242166115b99 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/vector/VectorValueMatcher.java +++ b/processing/src/main/java/org/apache/druid/query/filter/vector/VectorValueMatcher.java @@ -19,10 +19,13 @@ package org.apache.druid.query.filter.vector; +import org.apache.druid.query.filter.Filter; import org.apache.druid.segment.vector.VectorSizeInspector; /** - * A vectorized value matcher. + * An object that returns a boolean indicating if the "current" row should be selected or not. The most prominent use + * of this interface is that it is returned by the {@link Filter} "makeVectorMatcher" method, where it is used to + * identify selected rows for filtered cursors and filtered aggregators. * * @see org.apache.druid.query.filter.ValueMatcher, the non-vectorized version */ From 94b903ec44a2d7d0c0733e9a84e859f068606d10 Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Mon, 18 Feb 2019 13:30:28 -0800 Subject: [PATCH 09/20] Fix benchmark. --- .../org/apache/druid/benchmark/query/SqlVsNativeBenchmark.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/src/main/java/org/apache/druid/benchmark/query/SqlVsNativeBenchmark.java b/benchmarks/src/main/java/org/apache/druid/benchmark/query/SqlVsNativeBenchmark.java index 1606aba876f3..fdac5ffcdeef 100644 --- a/benchmarks/src/main/java/org/apache/druid/benchmark/query/SqlVsNativeBenchmark.java +++ b/benchmarks/src/main/java/org/apache/druid/benchmark/query/SqlVsNativeBenchmark.java @@ -111,7 +111,7 @@ public void setup() this.walker = closer.register(new SpecificSegmentsQuerySegmentWalker(conglomerate).add(dataSegment, index)); final DruidSchema druidSchema = CalciteTests.createMockSchema(conglomerate, walker, plannerConfig); - final SystemSchema systemSchema = CalciteTests.createMockSystemSchema(druidSchema, walker); + final SystemSchema systemSchema = CalciteTests.createMockSystemSchema(druidSchema, walker, plannerConfig); plannerFactory = new PlannerFactory( druidSchema, From 7395a0ac850764369fc26985f2add2050e3b40c1 Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Thu, 4 Jul 2019 23:44:46 -0700 Subject: [PATCH 10/20] Fixups post-merge. --- .../epinephelinae/GroupByQueryEngineV2.java | 28 ++++++++--- .../vector/VectorGroupByEngine.java | 17 +++++-- .../virtual/ExpressionVirtualColumn.java | 5 +- .../query/groupby/GroupByQueryRunnerTest.java | 5 +- .../timeseries/TimeseriesQueryRunnerTest.java | 2 +- .../druid/sql/calcite/CalciteQueryTest.java | 50 ++++++++++++++++++- 6 files changed, 91 insertions(+), 16 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java index 6dcd41918c65..d0c75c0dc8cb 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java @@ -53,6 +53,7 @@ import org.apache.druid.query.groupby.epinephelinae.column.StringGroupByColumnSelectorStrategy; import org.apache.druid.query.groupby.epinephelinae.vector.VectorGroupByEngine; import org.apache.druid.query.groupby.strategy.GroupByStrategyV2; +import org.apache.druid.segment.ColumnSelectorFactory; import org.apache.druid.segment.ColumnValueSelector; import org.apache.druid.segment.Cursor; import org.apache.druid.segment.DimensionHandlerUtils; @@ -72,6 +73,7 @@ import java.util.List; import java.util.Map; import java.util.NoSuchElementException; +import java.util.function.Function; public class GroupByQueryEngineV2 { @@ -181,13 +183,15 @@ private static Sequence processNonVectorized( @Override public GroupByEngineIterator make() { - ColumnSelectorPlus[] selectorPlus = DimensionHandlerUtils + final ColumnSelectorFactory columnSelectorFactory = cursor.getColumnSelectorFactory(); + final ColumnSelectorPlus[] selectorPlus = DimensionHandlerUtils .createColumnSelectorPluses( STRATEGY_FACTORY, query.getDimensions(), - cursor.getColumnSelectorFactory() + columnSelectorFactory ); - GroupByColumnSelectorPlus[] dims = createGroupBySelectorPlus(selectorPlus); + + final GroupByColumnSelectorPlus[] dims = createGroupBySelectorPlus(selectorPlus); final int cardinalityForArrayAggregation = getCardinalityForArrayAggregation( querySpecificConfig, @@ -204,7 +208,7 @@ public GroupByEngineIterator make() processingBuffer, fudgeTimestamp, dims, - isAllSingleValueDims(storageAdapter, query.getDimensions()), + isAllSingleValueDims(columnSelectorFactory::getColumnCapabilities, query.getDimensions()), cardinalityForArrayAggregation ); } else { @@ -215,7 +219,7 @@ public GroupByEngineIterator make() processingBuffer, fudgeTimestamp, dims, - isAllSingleValueDims(storageAdapter, query.getDimensions()) + isAllSingleValueDims(columnSelectorFactory::getColumnCapabilities, query.getDimensions()) ); } } @@ -278,10 +282,11 @@ public static int getCardinalityForArrayAggregation( } /** - * Checks whether all "dimensions" are either single-valued or nonexistent. + * Checks whether all "dimensions" are either single-valued or nonexistent (which is just as good as single-valued, + * since their selectors will show up as full of nulls). */ public static boolean isAllSingleValueDims( - final StorageAdapter adapter, + final Function capabilitiesFunction, final List dimensions ) { @@ -289,7 +294,14 @@ public static boolean isAllSingleValueDims( .stream() .allMatch( dimension -> { - final ColumnCapabilities columnCapabilities = adapter.getColumnCapabilities(dimension.getDimension()); + if (dimension.mustDecorate()) { + // DimensionSpecs that decorate may turn singly-valued columns into multi-valued selectors. + // To be safe, we must return false here. + return false; + } + + // Now check column capabilities. + final ColumnCapabilities columnCapabilities = capabilitiesFunction.apply(dimension.getDimension()); return columnCapabilities == null || !columnCapabilities.hasMultipleValues(); }); } diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/VectorGroupByEngine.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/VectorGroupByEngine.java index a65ae0099510..615444e57357 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/VectorGroupByEngine.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/VectorGroupByEngine.java @@ -74,10 +74,19 @@ public static boolean canVectorize( @Nullable final Filter filter ) { - // Not yet supported: - // 1) Granularities other than ALL. - // 2) Multi-value dimensions. - return GroupByQueryEngineV2.isAllSingleValueDims(adapter, query.getDimensions()) + // Multi-value dimensions are not yet supported. + // + // Two notes here about how we're handling this check: + // 1) After multi-value dimensions are supported, we could alter "GroupByQueryEngineV2.isAllSingleValueDims" + // to accept a ColumnSelectorFactory, which makes more sense than using a StorageAdapter (see #8013). + // 2) Technically using StorageAdapter here is bad since it only looks at real columns, but they might + // be shadowed by virtual columns (again, see #8013). But it's fine for now since adapter.canVectorize + // always returns false if there are any virtual columns. + // + // This situation should sort itself out pretty well once this engine supports multi-valued columns. Then we + // won't have to worry about having this all-single-value-dims check here. + + return GroupByQueryEngineV2.isAllSingleValueDims(adapter::getColumnCapabilities, query.getDimensions()) && query.getDimensions().stream().allMatch(DimensionSpec::canVectorize) && query.getAggregatorSpecs().stream().allMatch(AggregatorFactory::canVectorize) && adapter.canVectorize(filter, query.getVirtualColumns(), false); diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVirtualColumn.java b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVirtualColumn.java index 871682d4982c..c322fff9d2bf 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVirtualColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVirtualColumn.java @@ -105,7 +105,10 @@ public ColumnValueSelector makeColumnValueSelector(String columnName, ColumnS @Override public ColumnCapabilities capabilities(String columnName) { - return new ColumnCapabilitiesImpl().setType(outputType); + // Note: Ideally we would only "setHasMultipleValues(true)" if the expression in question could potentially return + // multiple values. However, we don't currently have a good way of determining this, so to be safe we always + // set the flag. + return new ColumnCapabilitiesImpl().setType(outputType).setHasMultipleValues(true); } @Override diff --git a/processing/src/test/java/org/apache/druid/query/groupby/GroupByQueryRunnerTest.java b/processing/src/test/java/org/apache/druid/query/groupby/GroupByQueryRunnerTest.java index 00002016de46..7c818a996bd3 100644 --- a/processing/src/test/java/org/apache/druid/query/groupby/GroupByQueryRunnerTest.java +++ b/processing/src/test/java/org/apache/druid/query/groupby/GroupByQueryRunnerTest.java @@ -6270,6 +6270,9 @@ public void testSubqueryWithFirstLast() @Test public void testGroupByWithSubtotalsSpec() { + // Cannot vectorize due to usage of expressions. + cannotVectorize(); + if (!config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V2)) { return; } @@ -10613,7 +10616,7 @@ private Map makeContext() private void cannotVectorize() { if (vectorize && config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V2)) { - expectedException.expect(IllegalStateException.class); + expectedException.expect(RuntimeException.class); expectedException.expectMessage("Cannot vectorize!"); } } diff --git a/processing/src/test/java/org/apache/druid/query/timeseries/TimeseriesQueryRunnerTest.java b/processing/src/test/java/org/apache/druid/query/timeseries/TimeseriesQueryRunnerTest.java index 2f235ac54b40..4c8d91c0fe52 100644 --- a/processing/src/test/java/org/apache/druid/query/timeseries/TimeseriesQueryRunnerTest.java +++ b/processing/src/test/java/org/apache/druid/query/timeseries/TimeseriesQueryRunnerTest.java @@ -2596,7 +2596,7 @@ private Map makeContext(final Map myContext) private void cannotVectorize() { if (vectorize) { - expectedException.expect(IllegalStateException.class); + expectedException.expect(RuntimeException.class); expectedException.expectMessage("Cannot vectorize!"); } } diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java index 54140ec4efcf..009c8ec103bf 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java @@ -565,7 +565,7 @@ public void testExplainSelectStar() throws Exception ImmutableList.of(), ImmutableList.of( new Object[]{ - "DruidQueryRel(query=[{\"queryType\":\"scan\",\"dataSource\":{\"type\":\"table\",\"name\":\"foo\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"virtualColumns\":[],\"resultFormat\":\"compactedList\",\"batchSize\":20480,\"limit\":9223372036854775807,\"order\":\"none\",\"filter\":null,\"columns\":[\"__time\",\"cnt\",\"dim1\",\"dim2\",\"dim3\",\"m1\",\"m2\",\"unique_dim1\"],\"legacy\":false,\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\",\"sqlQueryId\":\"dummy\",\"vectorize\":false},\"descending\":false,\"granularity\":{\"type\":\"all\"}}], signature=[{__time:LONG, cnt:LONG, dim1:STRING, dim2:STRING, dim3:STRING, m1:FLOAT, m2:DOUBLE, unique_dim1:COMPLEX}])\n" + "DruidQueryRel(query=[{\"queryType\":\"scan\",\"dataSource\":{\"type\":\"table\",\"name\":\"foo\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"virtualColumns\":[],\"resultFormat\":\"compactedList\",\"batchSize\":20480,\"limit\":9223372036854775807,\"order\":\"none\",\"filter\":null,\"columns\":[\"__time\",\"cnt\",\"dim1\",\"dim2\",\"dim3\",\"m1\",\"m2\",\"unique_dim1\"],\"legacy\":false,\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\",\"sqlQueryId\":\"dummy\",\"vectorize\":\"false\"},\"descending\":false,\"granularity\":{\"type\":\"all\"}}], signature=[{__time:LONG, cnt:LONG, dim1:STRING, dim2:STRING, dim3:STRING, m1:FLOAT, m2:DOUBLE, unique_dim1:COMPLEX}])\n" } ) ); @@ -2435,6 +2435,9 @@ public void testCountStarWithLongColumnFiltersOnTwoPoints() throws Exception @Test public void testFilterOnStringAsNumber() throws Exception { + // Cannot vectorize due to usage of expressions. + cannotVectorize(); + testQuery( "SELECT distinct dim1 FROM druid.foo WHERE " + "dim1 = 10 OR " @@ -3808,6 +3811,9 @@ public void testSelectDistinctWithCascadeExtractionFilter() throws Exception @Test public void testSelectDistinctWithStrlenFilter() throws Exception { + // Cannot vectorize due to usage of expressions. + cannotVectorize(); + testQuery( "SELECT distinct dim1 FROM druid.foo " + "WHERE CHARACTER_LENGTH(dim1) = 3 OR CAST(CHARACTER_LENGTH(dim1) AS varchar) = 3", @@ -8124,6 +8130,9 @@ public void testTimestampCeil() throws Exception @Test public void testNvlColumns() throws Exception { + // Cannot vectorize due to usage of expressions. + cannotVectorize(); + testQuery( "SELECT NVL(dim2, dim1), COUNT(*) FROM druid.foo GROUP BY NVL(dim2, dim1)\n", ImmutableList.of( @@ -8162,6 +8171,9 @@ public void testNvlColumns() throws Exception @Test public void testMultiValueStringWorksLikeStringGroupBy() throws Exception { + // Cannot vectorize due to usage of expressions. + cannotVectorize(); + List expected; if (NullHandling.replaceWithDefault()) { expected = ImmutableList.of( @@ -8213,6 +8225,9 @@ public void testMultiValueStringWorksLikeStringGroupBy() throws Exception @Test public void testMultiValueStringWorksLikeStringGroupByWithFilter() throws Exception { + // Cannot vectorize due to usage of expressions. + cannotVectorize(); + testQuery( "SELECT concat(dim3, 'foo'), SUM(cnt) FROM druid.numfoo where concat(dim3, 'foo') = 'bfoo' GROUP BY 1 ORDER BY 2 DESC", ImmutableList.of( @@ -8493,6 +8508,9 @@ public void testMultiValueStringSlice() throws Exception @Test public void testMultiValueStringLength() throws Exception { + // Cannot vectorize due to usage of expressions. + cannotVectorize(); + testQuery( "SELECT dim1, MV_LENGTH(dim3), SUM(cnt) FROM druid.numfoo GROUP BY 1, 2 ORDER BY 2 DESC", ImmutableList.of( @@ -8533,6 +8551,9 @@ public void testMultiValueStringLength() throws Exception @Test public void testMultiValueStringAppend() throws Exception { + // Cannot vectorize due to usage of expressions. + cannotVectorize(); + ImmutableList results; if (NullHandling.replaceWithDefault()) { results = ImmutableList.of( @@ -8586,6 +8607,9 @@ public void testMultiValueStringAppend() throws Exception @Test public void testMultiValueStringPrepend() throws Exception { + // Cannot vectorize due to usage of expressions. + cannotVectorize(); + ImmutableList results; if (NullHandling.replaceWithDefault()) { results = ImmutableList.of( @@ -8639,6 +8663,9 @@ public void testMultiValueStringPrepend() throws Exception @Test public void testMultiValueStringPrependAppend() throws Exception { + // Cannot vectorize due to usage of expressions. + cannotVectorize(); + ImmutableList results; if (NullHandling.replaceWithDefault()) { results = ImmutableList.of( @@ -8692,6 +8719,9 @@ public void testMultiValueStringPrependAppend() throws Exception @Test public void testMultiValueStringConcat() throws Exception { + // Cannot vectorize due to usage of expressions. + cannotVectorize(); + ImmutableList results; if (NullHandling.replaceWithDefault()) { results = ImmutableList.of( @@ -8743,6 +8773,9 @@ public void testMultiValueStringConcat() throws Exception @Test public void testMultiValueStringOffset() throws Exception { + // Cannot vectorize due to usage of expressions. + cannotVectorize(); + testQuery( "SELECT MV_OFFSET(dim3, 1), SUM(cnt) FROM druid.numfoo GROUP BY 1 ORDER BY 2 DESC", ImmutableList.of( @@ -8779,6 +8812,9 @@ public void testMultiValueStringOffset() throws Exception @Test public void testMultiValueStringOrdinal() throws Exception { + // Cannot vectorize due to usage of expressions. + cannotVectorize(); + testQuery( "SELECT MV_ORDINAL(dim3, 2), SUM(cnt) FROM druid.numfoo GROUP BY 1 ORDER BY 2 DESC", ImmutableList.of( @@ -8815,6 +8851,9 @@ public void testMultiValueStringOrdinal() throws Exception @Test public void testMultiValueStringOffsetOf() throws Exception { + // Cannot vectorize due to usage of expressions. + cannotVectorize(); + testQuery( "SELECT MV_OFFSET_OF(dim3, 'b'), SUM(cnt) FROM druid.numfoo GROUP BY 1 ORDER BY 2 DESC", ImmutableList.of( @@ -8851,6 +8890,9 @@ public void testMultiValueStringOffsetOf() throws Exception @Test public void testMultiValueStringOrdinalOf() throws Exception { + // Cannot vectorize due to usage of expressions. + cannotVectorize(); + testQuery( "SELECT MV_ORDINAL_OF(dim3, 'b'), SUM(cnt) FROM druid.numfoo GROUP BY 1 ORDER BY 2 DESC", ImmutableList.of( @@ -8887,6 +8929,9 @@ public void testMultiValueStringOrdinalOf() throws Exception @Test public void testMultiValueStringToString() throws Exception { + // Cannot vectorize due to usage of expressions. + cannotVectorize(); + ImmutableList results; if (NullHandling.replaceWithDefault()) { results = ImmutableList.of( @@ -8936,6 +8981,9 @@ public void testMultiValueStringToString() throws Exception @Test public void testMultiValueStringToStringToMultiValueString() throws Exception { + // Cannot vectorize due to usage of expressions. + cannotVectorize(); + ImmutableList results; if (NullHandling.replaceWithDefault()) { results = ImmutableList.of( From 360bb6d132bc7da9df8683a8a4b59d119ef6c627 Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Thu, 4 Jul 2019 23:55:04 -0700 Subject: [PATCH 11/20] Expect exception on testGroupByWithStringVirtualColumn for IncrementalIndex. --- .../druid/query/groupby/GroupByQueryRunnerTest.java | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/processing/src/test/java/org/apache/druid/query/groupby/GroupByQueryRunnerTest.java b/processing/src/test/java/org/apache/druid/query/groupby/GroupByQueryRunnerTest.java index 7c818a996bd3..3eb69c0dda41 100644 --- a/processing/src/test/java/org/apache/druid/query/groupby/GroupByQueryRunnerTest.java +++ b/processing/src/test/java/org/apache/druid/query/groupby/GroupByQueryRunnerTest.java @@ -25,6 +25,7 @@ import com.google.common.base.Suppliers; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSet; import com.google.common.collect.Iterables; import com.google.common.collect.Lists; import com.google.common.collect.Ordering; @@ -186,6 +187,7 @@ public int getNumThreads() private static final Closer resourceCloser = Closer.create(); private final QueryRunner runner; + private final String runnerName; private final GroupByQueryRunnerFactory factory; private final GroupByQueryConfig config; private final boolean vectorize; @@ -447,6 +449,7 @@ public GroupByQueryRunnerTest( this.config = config; this.factory = factory; this.runner = factory.mergeRunners(Execs.directExecutor(), ImmutableList.of(runner)); + this.runnerName = runner.toString(); this.vectorize = vectorize; } @@ -973,6 +976,13 @@ public void testGroupByWithStringVirtualColumn() // Cannot vectorize due to virtual columns. cannotVectorize(); + // Cannot run with groupBy v1 on IncrementalIndex, because expressions would turn multi-value inputs + // into cardinalityless selectors, and groupBy v1 requires selectors that have a cardinality. + if (config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V1) + && ImmutableSet.of("rtIndex", "noRollupRtIndex").contains(runnerName)) { + expectedException.expectMessage("GroupBy v1 does not support dimension selectors with unknown cardinality."); + } + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) From ec3abeec1a52fba2192d24085cc4d3d7e934fdae Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Fri, 5 Jul 2019 01:29:55 -0700 Subject: [PATCH 12/20] BloomDimFilterSqlTest: Tag two non-vectorizable tests. --- .../druid/query/filter/sql/BloomDimFilterSqlTest.java | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/filter/sql/BloomDimFilterSqlTest.java b/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/filter/sql/BloomDimFilterSqlTest.java index be32f01b78f6..fd9f5f5b667a 100644 --- a/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/filter/sql/BloomDimFilterSqlTest.java +++ b/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/filter/sql/BloomDimFilterSqlTest.java @@ -169,6 +169,9 @@ public void testBloomFilterExprFilter() throws Exception @Test public void testBloomFilterVirtualColumn() throws Exception { + // Cannot vectorize due to expression virtual columns. + cannotVectorize(); + BloomKFilter filter = new BloomKFilter(1500); filter.addString("def-foo"); byte[] bytes = BloomFilterSerializersModule.bloomKFilterToBytes(filter); @@ -199,6 +202,9 @@ public void testBloomFilterVirtualColumn() throws Exception @Test public void testBloomFilterVirtualColumnNumber() throws Exception { + // Cannot vectorize due to expression virtual columns. + cannotVectorize(); + BloomKFilter filter = new BloomKFilter(1500); filter.addFloat(20.2f); byte[] bytes = BloomFilterSerializersModule.bloomKFilterToBytes(filter); From 8d4f3498256d06931196c08fd34664fa4b2c1fb2 Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Fri, 5 Jul 2019 01:32:18 -0700 Subject: [PATCH 13/20] Minor adjustments. --- .../epinephelinae/vector/VectorGroupByEngine.java | 4 ++-- .../query/timeseries/TimeseriesQueryEngine.java | 15 ++++++++++----- .../query/vector/VectorCursorGranularizer.java | 14 ++++++++------ .../data/BlockLayoutColumnarDoublesSupplier.java | 4 ++++ .../data/BlockLayoutColumnarFloatsSupplier.java | 4 ++++ .../data/BlockLayoutColumnarLongsSupplier.java | 4 ++++ .../CompressedVSizeColumnarMultiIntsSupplier.java | 4 ++-- 7 files changed, 34 insertions(+), 15 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/VectorGroupByEngine.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/VectorGroupByEngine.java index 615444e57357..96b9988e5ec9 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/VectorGroupByEngine.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/VectorGroupByEngine.java @@ -225,7 +225,7 @@ private static class VectorGroupByEngineIterator implements CloseableIterator selectors, final ByteBuffer processingBuffer, @Nullable final DateTime fudgeTimestamp @@ -242,7 +242,7 @@ private static class VectorGroupByEngineIterator implements CloseableIterator bufferPoo this.bufferPool = bufferPool; } + /** + * Run a single-segment, single-interval timeseries query on a particular adapter. The query must have been + * scoped down to a single interval before calling this method. + */ public Sequence> process(final TimeseriesQuery query, final StorageAdapter adapter) { if (adapter == null) { @@ -109,7 +114,7 @@ private Sequence> processVectorized( final TimeseriesQuery query, final StorageAdapter adapter, @Nullable final Filter filter, - final Interval interval, + final Interval queryInterval, final Granularity gran, final boolean descending ) @@ -119,7 +124,7 @@ private Sequence> processVectorized( final VectorCursor cursor = adapter.makeVectorCursor( filter, - interval, + queryInterval, query.getVirtualColumns(), descending, QueryContexts.getVectorSize(query), @@ -138,7 +143,7 @@ private Sequence> processVectorized( adapter, cursor, gran, - interval + queryInterval ); if (granularizer == null) { @@ -235,7 +240,7 @@ private Sequence> processNonVectorized( final TimeseriesQuery query, final StorageAdapter adapter, @Nullable final Filter filter, - final Interval interval, + final Interval queryInterval, final Granularity gran, final boolean descending ) @@ -245,7 +250,7 @@ private Sequence> processNonVectorized( return QueryRunnerHelper.makeCursorBasedQuery( adapter, - Collections.singletonList(interval), + Collections.singletonList(queryInterval), filter, query.getVirtualColumns(), descending, diff --git a/processing/src/main/java/org/apache/druid/query/vector/VectorCursorGranularizer.java b/processing/src/main/java/org/apache/druid/query/vector/VectorCursorGranularizer.java index 987a993be578..163befcf2817 100644 --- a/processing/src/main/java/org/apache/druid/query/vector/VectorCursorGranularizer.java +++ b/processing/src/main/java/org/apache/druid/query/vector/VectorCursorGranularizer.java @@ -73,22 +73,24 @@ public static VectorCursorGranularizer create( final StorageAdapter storageAdapter, final VectorCursor cursor, final Granularity granularity, - final Interval cursorInterval + final Interval queryInterval ) { final DateTime minTime = storageAdapter.getMinTime(); final DateTime maxTime = storageAdapter.getMaxTime(); - final Interval actualInterval = cursorInterval.overlap(new Interval(minTime, granularity.bucketEnd(maxTime))); - if (actualInterval == null) { + final Interval storageAdapterInterval = new Interval(minTime, granularity.bucketEnd(maxTime)); + final Interval clippedQueryInterval = queryInterval.overlap(storageAdapterInterval); + + if (clippedQueryInterval == null) { return null; } - final Iterable bucketIterable = granularity.getIterable(actualInterval); - final Interval firstBucket = granularity.bucket(actualInterval.getStart()); + final Iterable bucketIterable = granularity.getIterable(clippedQueryInterval); + final Interval firstBucket = granularity.bucket(clippedQueryInterval.getStart()); final VectorValueSelector timeSelector; - if (firstBucket.contains(actualInterval)) { + if (firstBucket.contains(clippedQueryInterval)) { // Only one bucket, no need to read the time column. assert Iterables.size(bucketIterable) == 1; timeSelector = null; diff --git a/processing/src/main/java/org/apache/druid/segment/data/BlockLayoutColumnarDoublesSupplier.java b/processing/src/main/java/org/apache/druid/segment/data/BlockLayoutColumnarDoublesSupplier.java index 82c7479dafc3..adae6d391b2f 100644 --- a/processing/src/main/java/org/apache/druid/segment/data/BlockLayoutColumnarDoublesSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/data/BlockLayoutColumnarDoublesSupplier.java @@ -30,7 +30,11 @@ public class BlockLayoutColumnarDoublesSupplier implements Supplier { private final GenericIndexed> baseDoubleBuffers; + + // The number of rows in this column. private final int totalSize; + + // The number of doubles per buffer. private final int sizePer; public BlockLayoutColumnarDoublesSupplier( diff --git a/processing/src/main/java/org/apache/druid/segment/data/BlockLayoutColumnarFloatsSupplier.java b/processing/src/main/java/org/apache/druid/segment/data/BlockLayoutColumnarFloatsSupplier.java index a0e4c3fbc308..a7a8deaec1c5 100644 --- a/processing/src/main/java/org/apache/druid/segment/data/BlockLayoutColumnarFloatsSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/data/BlockLayoutColumnarFloatsSupplier.java @@ -30,7 +30,11 @@ public class BlockLayoutColumnarFloatsSupplier implements Supplier { private final GenericIndexed> baseFloatBuffers; + + // The number of rows in this column. private final int totalSize; + + // The number of floats per buffer. private final int sizePer; public BlockLayoutColumnarFloatsSupplier( diff --git a/processing/src/main/java/org/apache/druid/segment/data/BlockLayoutColumnarLongsSupplier.java b/processing/src/main/java/org/apache/druid/segment/data/BlockLayoutColumnarLongsSupplier.java index 64a2d8f2ccf8..808e7bedce71 100644 --- a/processing/src/main/java/org/apache/druid/segment/data/BlockLayoutColumnarLongsSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/data/BlockLayoutColumnarLongsSupplier.java @@ -30,7 +30,11 @@ public class BlockLayoutColumnarLongsSupplier implements Supplier { private final GenericIndexed> baseLongBuffers; + + // The number of rows in this column. private final int totalSize; + + // The number of longs per buffer. private final int sizePer; private final CompressionFactory.LongEncodingReader baseReader; diff --git a/processing/src/main/java/org/apache/druid/segment/data/CompressedVSizeColumnarMultiIntsSupplier.java b/processing/src/main/java/org/apache/druid/segment/data/CompressedVSizeColumnarMultiIntsSupplier.java index 048f1fe99504..0017e8d95e3b 100644 --- a/processing/src/main/java/org/apache/druid/segment/data/CompressedVSizeColumnarMultiIntsSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/data/CompressedVSizeColumnarMultiIntsSupplier.java @@ -38,8 +38,8 @@ * Format - * byte 1 - version * offsets - {@link ColumnarInts} of length num of rows + 1 representing offsets of starting index of first element of - * each row in values index and last element equal to length of values column, the last element in the offsets - * represents the total length of values column. + * each row in values index and last element equal to length of values column, the last element in the offsets + * represents the total length of values column. * values - {@link ColumnarInts} representing concatenated values of all rows */ public class CompressedVSizeColumnarMultiIntsSupplier implements WritableSupplier From c74177741e18d8ffa162b6119ebcd586d75ef0e6 Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Fri, 5 Jul 2019 12:16:49 -0700 Subject: [PATCH 14/20] Update surefire, bump up Xmx in Travis. --- .travis.yml | 12 ++++++------ pom.xml | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.travis.yml b/.travis.yml index 2a2b08d325fb..c7c798f88836 100644 --- a/.travis.yml +++ b/.travis.yml @@ -60,7 +60,7 @@ matrix: before_script: unset _JAVA_OPTIONS script: # Set MAVEN_OPTS for Surefire launcher - - MAVEN_OPTS='-Xmx512m' mvn test -B -pl processing + - MAVEN_OPTS='-Xmx800m' mvn test -B -pl processing - sh -c "dmesg | egrep -i '(oom|out of memory|kill process|killed).*' -C 1 || exit 0" - free -m @@ -71,7 +71,7 @@ matrix: before_script: unset _JAVA_OPTIONS script: # Set MAVEN_OPTS for Surefire launcher - - MAVEN_OPTS='-Xmx512m' mvn test -B -Ddruid.generic.useDefaultValueForNull=false -pl processing + - MAVEN_OPTS='-Xmx800m' mvn test -B -Ddruid.generic.useDefaultValueForNull=false -pl processing - sh -c "dmesg | egrep -i '(oom|out of memory|kill process|killed).*' -C 1 || exit 0" - free -m @@ -82,7 +82,7 @@ matrix: before_script: unset _JAVA_OPTIONS script: # Set MAVEN_OPTS for Surefire launcher - - MAVEN_OPTS='-Xmx512m' mvn test -B -pl server + - MAVEN_OPTS='-Xmx800m' mvn test -B -pl server # server module test with SQL Compatibility enabled - env: @@ -91,7 +91,7 @@ matrix: before_script: unset _JAVA_OPTIONS script: # Set MAVEN_OPTS for Surefire launcher - - MAVEN_OPTS='-Xmx512m' mvn test -B -pl server -Ddruid.generic.useDefaultValueForNull=false + - MAVEN_OPTS='-Xmx800m' mvn test -B -pl server -Ddruid.generic.useDefaultValueForNull=false # other modules test @@ -101,7 +101,7 @@ matrix: before_script: unset _JAVA_OPTIONS script: # Set MAVEN_OPTS for Surefire launcher - - MAVEN_OPTS='-Xmx512m' mvn test -B -pl '!processing,!server' + - MAVEN_OPTS='-Xmx800m' mvn test -B -pl '!processing,!server' - sh -c "dmesg | egrep -i '(oom|out of memory|kill process|killed).*' -C 1 || exit 0" - free -m @@ -112,7 +112,7 @@ matrix: before_script: unset _JAVA_OPTIONS script: # Set MAVEN_OPTS for Surefire launcher - - MAVEN_OPTS='-Xmx512m' mvn test -B -Ddruid.generic.useDefaultValueForNull=false -pl '!processing,!server' + - MAVEN_OPTS='-Xmx800m' mvn test -B -Ddruid.generic.useDefaultValueForNull=false -pl '!processing,!server' - sh -c "dmesg | egrep -i '(oom|out of memory|kill process|killed).*' -C 1 || exit 0" - free -m diff --git a/pom.xml b/pom.xml index 836307aa802b..1a802c9f5e15 100644 --- a/pom.xml +++ b/pom.xml @@ -1227,7 +1227,7 @@ org.apache.maven.plugins maven-surefire-plugin - 2.19.1 + 2.22.2 From e633cd56c7efc06742bab820adfc6d2be660cff0 Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Mon, 8 Jul 2019 18:21:11 -0700 Subject: [PATCH 15/20] Some more adjustments. --- .../filter/vector/ReadableVectorMatch.java | 3 +- .../timeseries/TimeseriesQueryEngine.java | 2 + .../QueryableIndexCursorSequenceBuilder.java | 54 ++++--- ...eryableIndexCursorSequenceBuilderTest.java | 139 ++++++++++++++++++ 4 files changed, 180 insertions(+), 18 deletions(-) create mode 100644 processing/src/test/java/org/apache/druid/segment/QueryableIndexCursorSequenceBuilderTest.java diff --git a/processing/src/main/java/org/apache/druid/query/filter/vector/ReadableVectorMatch.java b/processing/src/main/java/org/apache/druid/query/filter/vector/ReadableVectorMatch.java index 42e185146f40..73cfade5f4e1 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/vector/ReadableVectorMatch.java +++ b/processing/src/main/java/org/apache/druid/query/filter/vector/ReadableVectorMatch.java @@ -32,7 +32,8 @@ public interface ReadableVectorMatch * Returns an array of indexes into the current batch. Only the first "getSelectionSize" are valid. * * Even though this array is technically mutable, it is very poor form to mutate it if you are not the owner of the - * VectorMatch object. + * VectorMatch object. The reason we use a mutable array here instead of positional getter methods, by the way, is in + * the hopes of keeping access to the selection vector as low-level and optimizable as possible. */ int[] getSelection(); diff --git a/processing/src/main/java/org/apache/druid/query/timeseries/TimeseriesQueryEngine.java b/processing/src/main/java/org/apache/druid/query/timeseries/TimeseriesQueryEngine.java index 4ae782ca5af2..87c24ffb11ff 100644 --- a/processing/src/main/java/org/apache/druid/query/timeseries/TimeseriesQueryEngine.java +++ b/processing/src/main/java/org/apache/druid/query/timeseries/TimeseriesQueryEngine.java @@ -19,6 +19,7 @@ package org.apache.druid.query.timeseries; +import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.Iterables; import com.google.inject.Inject; import org.apache.druid.collections.NonBlockingPool; @@ -61,6 +62,7 @@ public class TimeseriesQueryEngine /** * Constructor for tests. In production, the @Inject constructor is used instead. */ + @VisibleForTesting public TimeseriesQueryEngine() { this.bufferPool = new StupidPool<>("dummy", () -> ByteBuffer.allocate(1000000)); diff --git a/processing/src/main/java/org/apache/druid/segment/QueryableIndexCursorSequenceBuilder.java b/processing/src/main/java/org/apache/druid/segment/QueryableIndexCursorSequenceBuilder.java index 4d0970297cc0..7ec0632f2722 100644 --- a/processing/src/main/java/org/apache/druid/segment/QueryableIndexCursorSequenceBuilder.java +++ b/processing/src/main/java/org/apache/druid/segment/QueryableIndexCursorSequenceBuilder.java @@ -19,6 +19,7 @@ package org.apache.druid.segment; +import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Function; import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; @@ -54,8 +55,10 @@ public class QueryableIndexCursorSequenceBuilder { - // At this threshold, timestamp searches switch from binary to linear. The idea is to avoid too much decompression - // buffer thrashing. The default value is chosen to be similar to the typical number of timestamps per block. + /** + * At this threshold, timestamp searches switch from binary to linear. See + * {@link #timeSearch(NumericColumn, long, int, int, int)} for more details. + */ private static final int TOO_CLOSE_FOR_MISSILES = 15000; private final QueryableIndex index; @@ -210,7 +213,13 @@ public VectorCursor buildVectorized(final int vectorSize) timestamps = (NumericColumn) index.getColumnHolder(ColumnHolder.TIME_COLUMN_NAME).getColumn(); closer.register(timestamps); - final int result = timeSearch(timestamps, interval.getStartMillis(), 0, index.getNumRows()); + final int result = timeSearch( + timestamps, + interval.getStartMillis(), + 0, + index.getNumRows(), + TOO_CLOSE_FOR_MISSILES + ); if (result >= 0) { startOffset = result; } else { @@ -226,7 +235,13 @@ public VectorCursor buildVectorized(final int vectorSize) closer.register(timestamps); } - final int result = timeSearch(timestamps, interval.getEndMillis(), startOffset, index.getNumRows()); + final int result = timeSearch( + timestamps, + interval.getEndMillis(), + startOffset, + index.getNumRows(), + TOO_CLOSE_FOR_MISSILES + ); if (result >= 0) { endOffset = result; } else { @@ -271,20 +286,26 @@ public VectorCursor buildVectorized(final int vectorSize) } /** - * Search the time column. Uses a binary search that switches to linear when it gets close. + * Search the time column. Uses a binary search that switches to linear when it gets close, based on + * the value of "tooCloseForMissiles". The idea is to avoid too much decompression buffer thrashing. The + * default value {@link #TOO_CLOSE_FOR_MISSILES} is chosen to be similar to the typical number of timestamps + * per block. It is parameterizable to make unit testing easier. * - * @param timeColumn the column - * @param timestamp the timestamp to search for - * @param startIndex first index to search, inclusive - * @param endIndex last index to search, exclusive + * @param timeColumn the column + * @param timestamp the timestamp to search for + * @param startIndex first index to search, inclusive + * @param endIndex last index to search, exclusive + * @param tooCloseForMissiles switch to linear search when we are this close to the target index * - * @return index of timestamp, or negative number equal to (-(insertion point) - 1). + * @return first index that has a timestamp equal to, or greater, than "timestamp" */ - private static int timeSearch( + @VisibleForTesting + static int timeSearch( final NumericColumn timeColumn, final long timestamp, final int startIndex, - final int endIndex + final int endIndex, + final int tooCloseForMissiles ) { final long prevTimestamp = timestamp - 1; @@ -294,7 +315,7 @@ private static int timeSearch( int maxIndex = endIndex - 1; while (minIndex <= maxIndex) { - if (maxIndex - minIndex < TOO_CLOSE_FOR_MISSILES) { + if (maxIndex - minIndex < tooCloseForMissiles) { break; } @@ -315,14 +336,13 @@ private static int timeSearch( // Do linear search for the actual timestamp, then return. for (; minIndex < endIndex; minIndex++) { final long currValue = timeColumn.getLongSingleValueRow(minIndex); - if (currValue == timestamp) { + if (currValue >= timestamp) { return minIndex; - } else if (currValue > timestamp) { - return -(minIndex + 1); } } - return -(endIndex + 1); + // Not found. + return endIndex; } private static class QueryableIndexVectorCursor implements VectorCursor diff --git a/processing/src/test/java/org/apache/druid/segment/QueryableIndexCursorSequenceBuilderTest.java b/processing/src/test/java/org/apache/druid/segment/QueryableIndexCursorSequenceBuilderTest.java new file mode 100644 index 000000000000..abb7132373c4 --- /dev/null +++ b/processing/src/test/java/org/apache/druid/segment/QueryableIndexCursorSequenceBuilderTest.java @@ -0,0 +1,139 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment; + +import com.google.common.collect.ImmutableMap; +import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; +import org.apache.druid.segment.column.NumericColumn; +import org.apache.druid.segment.data.ReadableOffset; +import org.junit.Assert; +import org.junit.Test; + +import java.util.Map; + +public class QueryableIndexCursorSequenceBuilderTest +{ + @Test + public void testTimeSearch() + { + final int[] values = new int[]{0, 1, 1, 1, 1, 1, 1, 1, 5, 7, 10}; + final NumericColumn column = new NumericColumn() + { + @Override + public int length() + { + return values.length; + } + + @Override + public long getLongSingleValueRow(int rowNum) + { + return values[rowNum]; + } + + @Override + public void close() + { + throw new UnsupportedOperationException(); + } + + @Override + public void inspectRuntimeShape(RuntimeShapeInspector inspector) + { + throw new UnsupportedOperationException(); + } + + @Override + public ColumnValueSelector makeColumnValueSelector(ReadableOffset offset) + { + throw new UnsupportedOperationException(); + } + }; + + // Binary search only + final Map closenessThresholds = ImmutableMap.of( + "binary search only", 0, + "linear search only", Integer.MAX_VALUE, + "switching search", 3 + ); + + for (Map.Entry entry : closenessThresholds.entrySet()) { + Assert.assertEquals( + entry.getKey(), + 0, + QueryableIndexCursorSequenceBuilder.timeSearch(column, 0, 0, values.length, entry.getValue()) + ); + + Assert.assertEquals( + entry.getKey(), + 2, + QueryableIndexCursorSequenceBuilder.timeSearch(column, 0, 2, values.length, entry.getValue()) + ); + + Assert.assertEquals( + entry.getKey(), + 0, + QueryableIndexCursorSequenceBuilder.timeSearch(column, 0, 0, values.length / 2, entry.getValue()) + ); + + Assert.assertEquals( + entry.getKey(), + 1, + QueryableIndexCursorSequenceBuilder.timeSearch(column, 1, 0, values.length, entry.getValue()) + ); + + Assert.assertEquals( + entry.getKey(), + 2, + QueryableIndexCursorSequenceBuilder.timeSearch(column, 1, 2, values.length, entry.getValue()) + ); + + Assert.assertEquals( + entry.getKey(), + 1, + QueryableIndexCursorSequenceBuilder.timeSearch(column, 1, 0, values.length / 2, entry.getValue()) + ); + + Assert.assertEquals( + entry.getKey(), + 1, + QueryableIndexCursorSequenceBuilder.timeSearch(column, 1, 1, 8, entry.getValue()) + ); + + Assert.assertEquals( + entry.getKey(), + 8, + QueryableIndexCursorSequenceBuilder.timeSearch(column, 2, 0, values.length, entry.getValue()) + ); + + Assert.assertEquals( + entry.getKey(), + 10, + QueryableIndexCursorSequenceBuilder.timeSearch(column, 10, 0, values.length, entry.getValue()) + ); + + Assert.assertEquals( + entry.getKey(), + 11, + QueryableIndexCursorSequenceBuilder.timeSearch(column, 15, 0, values.length, entry.getValue()) + ); + } + } +} From 496484b6a3028158471834f3fd93004fbf9919fe Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Mon, 8 Jul 2019 19:46:10 -0700 Subject: [PATCH 16/20] Javadoc adjustments --- .../src/main/java/org/apache/druid/query/QueryMetrics.java | 2 +- .../query/groupby/epinephelinae/ByteBufferHashTable.java | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/query/QueryMetrics.java b/processing/src/main/java/org/apache/druid/query/QueryMetrics.java index 3058f464e701..e34d66196878 100644 --- a/processing/src/main/java/org/apache/druid/query/QueryMetrics.java +++ b/processing/src/main/java/org/apache/druid/query/QueryMetrics.java @@ -237,7 +237,7 @@ public interface QueryMetrics> void identity(String identity); /** - * Sets whether are not a segment scan has been vectorized. Generally expected to only be attached to segment-level + * Sets whether or not a segment scan has been vectorized. Generally expected to only be attached to segment-level * metrics, since at whole-query level we might have a mix of vectorized and non-vectorized segment scans. */ void vectorized(boolean vectorized); diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/ByteBufferHashTable.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/ByteBufferHashTable.java index cdead589f392..6ba201e1183d 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/ByteBufferHashTable.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/ByteBufferHashTable.java @@ -250,10 +250,11 @@ protected void initializeNewBucketKey( } /** - * Find a bucket for a key, attempting to resize the table with adjustTableWhenFull() if possible. + * Find a bucket for a key, attempting to grow the table with adjustTableWhenFull() if possible. * - * @param keyBuffer buffer containing the key - * @param keyHash hash of the key + * @param keyBuffer buffer containing the key + * @param keyHash hash of the key + * @param preTableGrowthRunnable runnable that executes before the table grows * * @return bucket number of the found bucket or -1 if a bucket could not be allocated after resizing. */ From f1a4667e62c3fe160beb2e9b6233a0955f203350 Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Mon, 8 Jul 2019 23:06:10 -0700 Subject: [PATCH 17/20] AggregatorAdapters adjustments. --- .../query/aggregation/AggregatorAdapter.java | 43 -------- .../query/aggregation/AggregatorAdapters.java | 102 +++++++++++++++--- 2 files changed, 89 insertions(+), 56 deletions(-) delete mode 100644 processing/src/main/java/org/apache/druid/query/aggregation/AggregatorAdapter.java diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/AggregatorAdapter.java b/processing/src/main/java/org/apache/druid/query/aggregation/AggregatorAdapter.java deleted file mode 100644 index 8e4c492b2095..000000000000 --- a/processing/src/main/java/org/apache/druid/query/aggregation/AggregatorAdapter.java +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.query.aggregation; - -import javax.annotation.Nullable; -import java.io.Closeable; -import java.nio.ByteBuffer; - -public interface AggregatorAdapter extends Closeable -{ - void init(ByteBuffer buf, int position); - - @Nullable - Object get(ByteBuffer buf, int position); - - void relocate(int oldPosition, int newPosition, ByteBuffer oldBuffer, ByteBuffer newBuffer); - - @Override - void close(); - - AggregatorFactory getFactory(); - - BufferAggregator asBufferAggregator(); - - VectorAggregator asVectorAggregator(); -} diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/AggregatorAdapters.java b/processing/src/main/java/org/apache/druid/query/aggregation/AggregatorAdapters.java index b914bb562966..8ae7a33b08d6 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/AggregatorAdapters.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/AggregatorAdapters.java @@ -46,15 +46,15 @@ public class AggregatorAdapters implements Closeable { private static final Logger log = new Logger(AggregatorAdapters.class); - private final List adapters; + private final List adapters; private final List factories; private final int[] aggregatorPositions; private final int spaceNeeded; - private AggregatorAdapters(final List adapters) + private AggregatorAdapters(final List adapters) { this.adapters = adapters; - this.factories = adapters.stream().map(AggregatorAdapter::getFactory).collect(Collectors.toList()); + this.factories = adapters.stream().map(Adapter::getFactory).collect(Collectors.toList()); this.aggregatorPositions = new int[adapters.size()]; long nextPosition = 0; @@ -67,12 +67,15 @@ private AggregatorAdapters(final List adapters) this.spaceNeeded = Ints.checkedCast(nextPosition); } + /** + * Create an adapters object based on {@link VectorAggregator}. + */ public static AggregatorAdapters factorizeVector( final VectorColumnSelectorFactory columnSelectorFactory, final List aggregatorFactories ) { - final AggregatorAdapter[] adapters = new AggregatorAdapter[aggregatorFactories.size()]; + final Adapter[] adapters = new Adapter[aggregatorFactories.size()]; for (int i = 0; i < aggregatorFactories.size(); i++) { final AggregatorFactory aggregatorFactory = aggregatorFactories.get(i); adapters[i] = new VectorAggregatorAdapter( @@ -84,12 +87,15 @@ public static AggregatorAdapters factorizeVector( return new AggregatorAdapters(Arrays.asList(adapters)); } + /** + * Create an adapters object based on {@link BufferAggregator}. + */ public static AggregatorAdapters factorizeBuffered( final ColumnSelectorFactory columnSelectorFactory, final List aggregatorFactories ) { - final AggregatorAdapter[] adapters = new AggregatorAdapter[aggregatorFactories.size()]; + final Adapter[] adapters = new Adapter[aggregatorFactories.size()]; for (int i = 0; i < aggregatorFactories.size(); i++) { final AggregatorFactory aggregatorFactory = aggregatorFactories.get(i); adapters[i] = new BufferAggregatorAdapter( @@ -101,26 +107,44 @@ public static AggregatorAdapters factorizeBuffered( return new AggregatorAdapters(Arrays.asList(adapters)); } + /** + * Return the amount of buffer bytes needed by all aggregators wrapped up in this object. + */ public int spaceNeeded() { return spaceNeeded; } + /** + * Return the {@link AggregatorFactory} objects that were used to create this object. + */ public List factories() { return factories; } + /** + * Return the individual positions of each aggregator within a hypothetical buffer of size {@link #spaceNeeded()}. + */ public int[] aggregatorPositions() { return aggregatorPositions; } + /** + * Return the number of aggregators in this object. + */ public int size() { return adapters.size(); } + /** + * Initialize all aggregators. + * + * @param buf aggregation buffer + * @param position position in buffer where our block of size {@link #spaceNeeded()} starts + */ public void init(final ByteBuffer buf, final int position) { for (int i = 0; i < adapters.size(); i++) { @@ -128,14 +152,24 @@ public void init(final ByteBuffer buf, final int position) } } + /** + * Call {@link BufferAggregator#aggregate(ByteBuffer, int)} on all of our aggregators. + * + * This method is only valid if the underlying aggregators are {@link BufferAggregator}. + */ public void aggregateBuffered(final ByteBuffer buf, final int position) { for (int i = 0; i < adapters.size(); i++) { - final AggregatorAdapter adapter = adapters.get(i); + final Adapter adapter = adapters.get(i); adapter.asBufferAggregator().aggregate(buf, position + aggregatorPositions[i]); } } + /** + * Call {@link VectorAggregator#aggregate(ByteBuffer, int, int, int)} on all of our aggregators. + * + * This method is only valid if the underlying aggregators are {@link VectorAggregator}. + */ public void aggregateVector( final ByteBuffer buf, final int position, @@ -144,11 +178,16 @@ public void aggregateVector( ) { for (int i = 0; i < adapters.size(); i++) { - final AggregatorAdapter adapter = adapters.get(i); + final Adapter adapter = adapters.get(i); adapter.asVectorAggregator().aggregate(buf, position + aggregatorPositions[i], start, end); } } + /** + * Call {@link VectorAggregator#aggregate(ByteBuffer, int, int[], int[], int)} on all of our aggregators. + * + * This method is only valid if the underlying aggregators are {@link VectorAggregator}. + */ public void aggregateVector( final ByteBuffer buf, final int numRows, @@ -157,17 +196,27 @@ public void aggregateVector( ) { for (int i = 0; i < adapters.size(); i++) { - final AggregatorAdapter adapter = adapters.get(i); + final Adapter adapter = adapters.get(i); adapter.asVectorAggregator().aggregate(buf, numRows, positions, rows, aggregatorPositions[i]); } } + /** + * Retrieve aggregation state from one of our aggregators. + * + * @param buf aggregation buffer + * @param position position in buffer where our block of size {@link #spaceNeeded()} starts + * @param aggregatorNumber which aggregator to retrieve state, from 0 to {@link #size()} - 1 + */ @Nullable public Object get(final ByteBuffer buf, final int position, final int aggregatorNumber) { return adapters.get(aggregatorNumber).get(buf, position + aggregatorPositions[aggregatorNumber]); } + /** + * Inform all of our aggregators that they are being relocated. + */ public void relocate(int oldPosition, int newPosition, ByteBuffer oldBuffer, ByteBuffer newBuffer) { for (int i = 0; i < adapters.size(); i++) { @@ -180,10 +229,13 @@ public void relocate(int oldPosition, int newPosition, ByteBuffer oldBuffer, Byt } } + /** + * Close all of our aggregators. + */ @Override public void close() { - for (AggregatorAdapter adapter : adapters) { + for (Adapter adapter : adapters) { try { adapter.close(); } @@ -193,12 +245,36 @@ public void close() } } - private static class VectorAggregatorAdapter implements AggregatorAdapter + /** + * The interface that allows this class to achieve its goals of partially unifying handling of + * BufferAggregator and VectorAggregator. Private, since it doesn't escape this class and the + * only two implementations are private static classes below. + */ + private interface Adapter extends Closeable + { + void init(ByteBuffer buf, int position); + + @Nullable + Object get(ByteBuffer buf, int position); + + void relocate(int oldPosition, int newPosition, ByteBuffer oldBuffer, ByteBuffer newBuffer); + + @Override + void close(); + + AggregatorFactory getFactory(); + + BufferAggregator asBufferAggregator(); + + VectorAggregator asVectorAggregator(); + } + + private static class VectorAggregatorAdapter implements Adapter { private final AggregatorFactory factory; private final VectorAggregator aggregator; - public VectorAggregatorAdapter(final AggregatorFactory factory, final VectorAggregator aggregator) + VectorAggregatorAdapter(final AggregatorFactory factory, final VectorAggregator aggregator) { this.factory = factory; this.aggregator = aggregator; @@ -252,12 +328,12 @@ public VectorAggregator asVectorAggregator() } } - private static class BufferAggregatorAdapter implements AggregatorAdapter + private static class BufferAggregatorAdapter implements Adapter { private final AggregatorFactory factory; private final BufferAggregator aggregator; - public BufferAggregatorAdapter(final AggregatorFactory factory, final BufferAggregator aggregator) + BufferAggregatorAdapter(final AggregatorFactory factory, final BufferAggregator aggregator) { this.factory = factory; this.aggregator = aggregator; From 791c3c9d61f948a58490938f10dab7baf8c832db Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Wed, 10 Jul 2019 08:44:01 -0700 Subject: [PATCH 18/20] Additional comments. --- .../filter/vector/ReadableVectorMatch.java | 3 ++- .../QueryableIndexCursorSequenceBuilder.java | 22 +++++-------------- 2 files changed, 8 insertions(+), 17 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/query/filter/vector/ReadableVectorMatch.java b/processing/src/main/java/org/apache/druid/query/filter/vector/ReadableVectorMatch.java index 73cfade5f4e1..88cbbf3aaa07 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/vector/ReadableVectorMatch.java +++ b/processing/src/main/java/org/apache/druid/query/filter/vector/ReadableVectorMatch.java @@ -33,7 +33,8 @@ public interface ReadableVectorMatch * * Even though this array is technically mutable, it is very poor form to mutate it if you are not the owner of the * VectorMatch object. The reason we use a mutable array here instead of positional getter methods, by the way, is in - * the hopes of keeping access to the selection vector as low-level and optimizable as possible. + * the hopes of keeping access to the selection vector as low-level and optimizable as possible. Potential + * optimizations could include making it easier for the JVM to use CPU-level vectorization, avoid method calls, etc. */ int[] getSelection(); diff --git a/processing/src/main/java/org/apache/druid/segment/QueryableIndexCursorSequenceBuilder.java b/processing/src/main/java/org/apache/druid/segment/QueryableIndexCursorSequenceBuilder.java index 7ec0632f2722..c1a0dc157126 100644 --- a/processing/src/main/java/org/apache/druid/segment/QueryableIndexCursorSequenceBuilder.java +++ b/processing/src/main/java/org/apache/druid/segment/QueryableIndexCursorSequenceBuilder.java @@ -56,7 +56,8 @@ public class QueryableIndexCursorSequenceBuilder { /** - * At this threshold, timestamp searches switch from binary to linear. See + * At this threshold, timestamp searches switch from binary to linear. This default value is chosen to be similar to + * the typical number of timestamps per block. See * {@link #timeSearch(NumericColumn, long, int, int, int)} for more details. */ private static final int TOO_CLOSE_FOR_MISSILES = 15000; @@ -213,18 +214,13 @@ public VectorCursor buildVectorized(final int vectorSize) timestamps = (NumericColumn) index.getColumnHolder(ColumnHolder.TIME_COLUMN_NAME).getColumn(); closer.register(timestamps); - final int result = timeSearch( + startOffset = timeSearch( timestamps, interval.getStartMillis(), 0, index.getNumRows(), TOO_CLOSE_FOR_MISSILES ); - if (result >= 0) { - startOffset = result; - } else { - startOffset = -(result + 1); - } } else { startOffset = 0; } @@ -235,18 +231,13 @@ public VectorCursor buildVectorized(final int vectorSize) closer.register(timestamps); } - final int result = timeSearch( + endOffset = timeSearch( timestamps, interval.getEndMillis(), startOffset, index.getNumRows(), TOO_CLOSE_FOR_MISSILES ); - if (result >= 0) { - endOffset = result; - } else { - endOffset = -(result + 1); - } } else { endOffset = index.getNumRows(); } @@ -287,9 +278,8 @@ public VectorCursor buildVectorized(final int vectorSize) /** * Search the time column. Uses a binary search that switches to linear when it gets close, based on - * the value of "tooCloseForMissiles". The idea is to avoid too much decompression buffer thrashing. The - * default value {@link #TOO_CLOSE_FOR_MISSILES} is chosen to be similar to the typical number of timestamps - * per block. It is parameterizable to make unit testing easier. + * the value of "tooCloseForMissiles". The idea is to avoid thrashing between adjacent blocks (yielding excessive + * decompressions) as the search gets close to the row. A reasonable default value is {@link #TOO_CLOSE_FOR_MISSILES}. * * @param timeColumn the column * @param timestamp the timestamp to search for From 8156fd32c49aa306b0e3fc8ecdd25aae3544ac87 Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Thu, 11 Jul 2019 09:27:19 -0700 Subject: [PATCH 19/20] Remove switching search. --- .../QueryableIndexCursorSequenceBuilder.java | 35 +++---------------- 1 file changed, 4 insertions(+), 31 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/QueryableIndexCursorSequenceBuilder.java b/processing/src/main/java/org/apache/druid/segment/QueryableIndexCursorSequenceBuilder.java index c1a0dc157126..95bb3d3f6685 100644 --- a/processing/src/main/java/org/apache/druid/segment/QueryableIndexCursorSequenceBuilder.java +++ b/processing/src/main/java/org/apache/druid/segment/QueryableIndexCursorSequenceBuilder.java @@ -55,13 +55,6 @@ public class QueryableIndexCursorSequenceBuilder { - /** - * At this threshold, timestamp searches switch from binary to linear. This default value is chosen to be similar to - * the typical number of timestamps per block. See - * {@link #timeSearch(NumericColumn, long, int, int, int)} for more details. - */ - private static final int TOO_CLOSE_FOR_MISSILES = 15000; - private final QueryableIndex index; private final Interval interval; private final VirtualColumns virtualColumns; @@ -214,13 +207,7 @@ public VectorCursor buildVectorized(final int vectorSize) timestamps = (NumericColumn) index.getColumnHolder(ColumnHolder.TIME_COLUMN_NAME).getColumn(); closer.register(timestamps); - startOffset = timeSearch( - timestamps, - interval.getStartMillis(), - 0, - index.getNumRows(), - TOO_CLOSE_FOR_MISSILES - ); + startOffset = timeSearch(timestamps, interval.getStartMillis(), 0, index.getNumRows()); } else { startOffset = 0; } @@ -231,13 +218,7 @@ public VectorCursor buildVectorized(final int vectorSize) closer.register(timestamps); } - endOffset = timeSearch( - timestamps, - interval.getEndMillis(), - startOffset, - index.getNumRows(), - TOO_CLOSE_FOR_MISSILES - ); + endOffset = timeSearch(timestamps, interval.getEndMillis(), startOffset, index.getNumRows()); } else { endOffset = index.getNumRows(); } @@ -277,15 +258,12 @@ public VectorCursor buildVectorized(final int vectorSize) } /** - * Search the time column. Uses a binary search that switches to linear when it gets close, based on - * the value of "tooCloseForMissiles". The idea is to avoid thrashing between adjacent blocks (yielding excessive - * decompressions) as the search gets close to the row. A reasonable default value is {@link #TOO_CLOSE_FOR_MISSILES}. + * Search the time column using binary search. * * @param timeColumn the column * @param timestamp the timestamp to search for * @param startIndex first index to search, inclusive * @param endIndex last index to search, exclusive - * @param tooCloseForMissiles switch to linear search when we are this close to the target index * * @return first index that has a timestamp equal to, or greater, than "timestamp" */ @@ -294,8 +272,7 @@ static int timeSearch( final NumericColumn timeColumn, final long timestamp, final int startIndex, - final int endIndex, - final int tooCloseForMissiles + final int endIndex ) { final long prevTimestamp = timestamp - 1; @@ -305,10 +282,6 @@ static int timeSearch( int maxIndex = endIndex - 1; while (minIndex <= maxIndex) { - if (maxIndex - minIndex < tooCloseForMissiles) { - break; - } - final int currIndex = (minIndex + maxIndex) >>> 1; final long currValue = timeColumn.getLongSingleValueRow(currIndex); From f2e6392bc643404b67e97eb3b38dc6f0ba2aa61c Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Thu, 11 Jul 2019 10:05:08 -0700 Subject: [PATCH 20/20] Only missiles. --- .../QueryableIndexCursorSequenceBuilder.java | 3 +- ...eryableIndexCursorSequenceBuilderTest.java | 116 +++++++----------- 2 files changed, 49 insertions(+), 70 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/QueryableIndexCursorSequenceBuilder.java b/processing/src/main/java/org/apache/druid/segment/QueryableIndexCursorSequenceBuilder.java index 95bb3d3f6685..ba30649e6448 100644 --- a/processing/src/main/java/org/apache/druid/segment/QueryableIndexCursorSequenceBuilder.java +++ b/processing/src/main/java/org/apache/druid/segment/QueryableIndexCursorSequenceBuilder.java @@ -258,7 +258,8 @@ public VectorCursor buildVectorized(final int vectorSize) } /** - * Search the time column using binary search. + * Search the time column using binary search. Benchmarks on various other approaches (linear search, binary + * search that switches to linear at various closeness thresholds) indicated that a pure binary search worked best. * * @param timeColumn the column * @param timestamp the timestamp to search for diff --git a/processing/src/test/java/org/apache/druid/segment/QueryableIndexCursorSequenceBuilderTest.java b/processing/src/test/java/org/apache/druid/segment/QueryableIndexCursorSequenceBuilderTest.java index abb7132373c4..ea93e1b75c2e 100644 --- a/processing/src/test/java/org/apache/druid/segment/QueryableIndexCursorSequenceBuilderTest.java +++ b/processing/src/test/java/org/apache/druid/segment/QueryableIndexCursorSequenceBuilderTest.java @@ -19,15 +19,12 @@ package org.apache.druid.segment; -import com.google.common.collect.ImmutableMap; import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; import org.apache.druid.segment.column.NumericColumn; import org.apache.druid.segment.data.ReadableOffset; import org.junit.Assert; import org.junit.Test; -import java.util.Map; - public class QueryableIndexCursorSequenceBuilderTest { @Test @@ -67,73 +64,54 @@ public ColumnValueSelector makeColumnValueSelector(ReadableOffset offset) } }; - // Binary search only - final Map closenessThresholds = ImmutableMap.of( - "binary search only", 0, - "linear search only", Integer.MAX_VALUE, - "switching search", 3 + Assert.assertEquals( + 0, + QueryableIndexCursorSequenceBuilder.timeSearch(column, 0, 0, values.length) + ); + + Assert.assertEquals( + 2, + QueryableIndexCursorSequenceBuilder.timeSearch(column, 0, 2, values.length) + ); + + Assert.assertEquals( + 0, + QueryableIndexCursorSequenceBuilder.timeSearch(column, 0, 0, values.length / 2) + ); + + Assert.assertEquals( + 1, + QueryableIndexCursorSequenceBuilder.timeSearch(column, 1, 0, values.length) + ); + + Assert.assertEquals( + 2, + QueryableIndexCursorSequenceBuilder.timeSearch(column, 1, 2, values.length) ); - for (Map.Entry entry : closenessThresholds.entrySet()) { - Assert.assertEquals( - entry.getKey(), - 0, - QueryableIndexCursorSequenceBuilder.timeSearch(column, 0, 0, values.length, entry.getValue()) - ); - - Assert.assertEquals( - entry.getKey(), - 2, - QueryableIndexCursorSequenceBuilder.timeSearch(column, 0, 2, values.length, entry.getValue()) - ); - - Assert.assertEquals( - entry.getKey(), - 0, - QueryableIndexCursorSequenceBuilder.timeSearch(column, 0, 0, values.length / 2, entry.getValue()) - ); - - Assert.assertEquals( - entry.getKey(), - 1, - QueryableIndexCursorSequenceBuilder.timeSearch(column, 1, 0, values.length, entry.getValue()) - ); - - Assert.assertEquals( - entry.getKey(), - 2, - QueryableIndexCursorSequenceBuilder.timeSearch(column, 1, 2, values.length, entry.getValue()) - ); - - Assert.assertEquals( - entry.getKey(), - 1, - QueryableIndexCursorSequenceBuilder.timeSearch(column, 1, 0, values.length / 2, entry.getValue()) - ); - - Assert.assertEquals( - entry.getKey(), - 1, - QueryableIndexCursorSequenceBuilder.timeSearch(column, 1, 1, 8, entry.getValue()) - ); - - Assert.assertEquals( - entry.getKey(), - 8, - QueryableIndexCursorSequenceBuilder.timeSearch(column, 2, 0, values.length, entry.getValue()) - ); - - Assert.assertEquals( - entry.getKey(), - 10, - QueryableIndexCursorSequenceBuilder.timeSearch(column, 10, 0, values.length, entry.getValue()) - ); - - Assert.assertEquals( - entry.getKey(), - 11, - QueryableIndexCursorSequenceBuilder.timeSearch(column, 15, 0, values.length, entry.getValue()) - ); - } + Assert.assertEquals( + 1, + QueryableIndexCursorSequenceBuilder.timeSearch(column, 1, 0, values.length / 2) + ); + + Assert.assertEquals( + 1, + QueryableIndexCursorSequenceBuilder.timeSearch(column, 1, 1, 8) + ); + + Assert.assertEquals( + 8, + QueryableIndexCursorSequenceBuilder.timeSearch(column, 2, 0, values.length) + ); + + Assert.assertEquals( + 10, + QueryableIndexCursorSequenceBuilder.timeSearch(column, 10, 0, values.length) + ); + + Assert.assertEquals( + 11, + QueryableIndexCursorSequenceBuilder.timeSearch(column, 15, 0, values.length) + ); } }