diff --git a/benchmarks/src/main/java/org/apache/druid/benchmark/query/ScanBenchmark.java b/benchmarks/src/main/java/org/apache/druid/benchmark/query/ScanBenchmark.java index 511de6bca9ec..876aa95b6751 100644 --- a/benchmarks/src/main/java/org/apache/druid/benchmark/query/ScanBenchmark.java +++ b/benchmarks/src/main/java/org/apache/druid/benchmark/query/ScanBenchmark.java @@ -107,12 +107,15 @@ public class ScanBenchmark @Param({"200000"}) private int rowsPerSegment; - @Param({"basic.A"}) + @Param({"basic.A", "basic.B", "basic.C", "basic.D"}) private String schemaAndQuery; @Param({"1000", "99999"}) private int limit; + @Param({"NONE", "DESCENDING", "ASCENDING"}) + private static ScanQuery.Order ordering; + private static final Logger log = new Logger(ScanBenchmark.class); private static final ObjectMapper JSON_MAPPER; private static final IndexMergerV9 INDEX_MERGER_V9; @@ -178,7 +181,8 @@ private static Druids.ScanQueryBuilder basicA(final BenchmarkSchemaInfo basicSch return Druids.newScanQueryBuilder() .dataSource("blah") - .intervals(intervalSpec); + .intervals(intervalSpec) + .order(ordering); } private static Druids.ScanQueryBuilder basicB(final BenchmarkSchemaInfo basicSchema) @@ -197,7 +201,9 @@ private static Druids.ScanQueryBuilder basicB(final BenchmarkSchemaInfo basicSch return Druids.newScanQueryBuilder() .filters(filter) - .intervals(intervalSpec); + .dataSource("blah") + .intervals(intervalSpec) + .order(ordering); } private static Druids.ScanQueryBuilder basicC(final BenchmarkSchemaInfo basicSchema) @@ -207,8 +213,10 @@ private static Druids.ScanQueryBuilder basicC(final BenchmarkSchemaInfo basicSch final String dimName = "dimUniform"; return Druids.newScanQueryBuilder() - .filters(new SelectorDimFilter(dimName, "3", StrlenExtractionFn.instance())) - .intervals(intervalSpec); + .filters(new SelectorDimFilter(dimName, "3", StrlenExtractionFn.instance())) + .intervals(intervalSpec) + .dataSource("blah") + .order(ordering); } private static Druids.ScanQueryBuilder basicD(final BenchmarkSchemaInfo basicSchema) @@ -220,8 +228,10 @@ private static Druids.ScanQueryBuilder basicD(final BenchmarkSchemaInfo basicSch final String dimName = "dimUniform"; return Druids.newScanQueryBuilder() - .filters(new BoundDimFilter(dimName, "100", "10000", true, true, true, null, null)) - .intervals(intervalSpec); + .filters(new BoundDimFilter(dimName, "100", "10000", true, true, true, null, null)) + .intervals(intervalSpec) + .dataSource("blah") + .order(ordering); } @Setup @@ -289,7 +299,8 @@ public void setup() throws IOException config, DefaultGenericQueryMetricsFactory.instance() ), - new ScanQueryEngine() + new ScanQueryEngine(), + new ScanQueryConfig() ); } diff --git a/core/src/main/java/org/apache/druid/java/util/common/guava/LimitedSequence.java b/core/src/main/java/org/apache/druid/java/util/common/guava/LimitedSequence.java index 9f76e46c8d31..784161dd0edc 100644 --- a/core/src/main/java/org/apache/druid/java/util/common/guava/LimitedSequence.java +++ b/core/src/main/java/org/apache/druid/java/util/common/guava/LimitedSequence.java @@ -30,11 +30,11 @@ final class LimitedSequence extends YieldingSequenceBase { private final Sequence baseSequence; - private final int limit; + private final long limit; LimitedSequence( Sequence baseSequence, - int limit + long limit ) { Preconditions.checkNotNull(baseSequence); @@ -106,7 +106,7 @@ public void close() throws IOException private class LimitedYieldingAccumulator extends DelegatingYieldingAccumulator { - int count; + long count; boolean interruptYield = false; LimitedYieldingAccumulator(YieldingAccumulator accumulator) diff --git a/core/src/main/java/org/apache/druid/java/util/common/guava/MergeSequence.java b/core/src/main/java/org/apache/druid/java/util/common/guava/MergeSequence.java index 3624969a045b..912775cba8ed 100644 --- a/core/src/main/java/org/apache/druid/java/util/common/guava/MergeSequence.java +++ b/core/src/main/java/org/apache/druid/java/util/common/guava/MergeSequence.java @@ -27,6 +27,7 @@ import java.util.PriorityQueue; /** + * Used to perform an n-way merge on n ordered sequences */ public class MergeSequence extends YieldingSequenceBase { @@ -42,20 +43,18 @@ public MergeSequence( this.baseSequences = (Sequence>) baseSequences; } + /* + Note: the yielder for MergeSequence returns elements from the priority queue in order of increasing priority. + This is due to the fact that PriorityQueue#remove() polls from the head of the queue which is, according to + the PriorityQueue javadoc, "the least element with respect to the specified ordering" + */ @Override public Yielder toYielder(OutType initValue, YieldingAccumulator accumulator) { PriorityQueue> pQueue = new PriorityQueue<>( 32, ordering.onResultOf( - new Function, T>() - { - @Override - public T apply(Yielder input) - { - return input.get(); - } - } + (Function, T>) input -> input.get() ) ); diff --git a/core/src/main/java/org/apache/druid/java/util/common/guava/Sequence.java b/core/src/main/java/org/apache/druid/java/util/common/guava/Sequence.java index c5eded22d0f5..555c768df68a 100644 --- a/core/src/main/java/org/apache/druid/java/util/common/guava/Sequence.java +++ b/core/src/main/java/org/apache/druid/java/util/common/guava/Sequence.java @@ -54,13 +54,13 @@ public interface Sequence OutType accumulate(OutType initValue, Accumulator accumulator); /** - * Return an Yielder for accumulated sequence. + * Return a Yielder for accumulated sequence. * * @param initValue the initial value to pass along to start the accumulation. * @param accumulator the accumulator which is responsible for accumulating input values. * @param the type of accumulated value. * - * @return an Yielder for accumulated sequence. + * @return a Yielder for accumulated sequence. * * @see Yielder */ @@ -72,6 +72,8 @@ default Sequence map(Function mapper) } /** + * This will materialize the entire sequence. Use at your own risk. + * * Several benchmarks rely on this method to eagerly accumulate Sequences to ArrayLists. e.g. * GroupByBenchmark. */ @@ -80,7 +82,7 @@ default List toList() return accumulate(new ArrayList<>(), Accumulators.list()); } - default Sequence limit(int limit) + default Sequence limit(long limit) { return new LimitedSequence<>(this, limit); } diff --git a/docs/content/querying/scan-query.md b/docs/content/querying/scan-query.md index 462d14fde90b..ee44681b0253 100644 --- a/docs/content/querying/scan-query.md +++ b/docs/content/querying/scan-query.md @@ -24,7 +24,16 @@ title: "Scan query" # Scan query -Scan query returns raw Druid rows in streaming mode. +The Scan query returns raw Druid rows in streaming mode. The biggest difference between the Select query and the Scan +query is that the Scan query does not retain all the returned rows in memory before they are returned to the client. +The Select query _will_ retain the rows in memory, causing memory pressure if too many rows are returned. +The Scan query can return all the rows without issuing another pagination query. + +In addition to straightforward usage where a Scan query is issued to the Broker, the Scan query can also be issued +directly to Historical processes or streaming ingestion tasks. This can be useful if you want to retrieve large +amounts of data in parallel. + +An example Scan query object is shown below: ```json { @@ -36,28 +45,29 @@ Scan query returns raw Druid rows in streaming mode. "2013-01-01/2013-01-02" ], "batchSize":20480, - "limit":5 + "limit":3 } ``` -There are several main parts to a scan query: +The following are the main parameters for Scan queries: |property|description|required?| |--------|-----------|---------| |queryType|This String should always be "scan"; this is the first thing Druid looks at to figure out how to interpret the query|yes| |dataSource|A String or Object defining the data source to query, very similar to a table in a relational database. See [DataSource](../querying/datasource.html) for more information.|yes| |intervals|A JSON Object representing ISO-8601 Intervals. This defines the time ranges to run the query over.|yes| -|resultFormat|How result represented, list or compactedList or valueVector. Currently only `list` and `compactedList` are supported. Default is `list`|no| +|resultFormat|How the results are represented: list, compactedList or valueVector. Currently only `list` and `compactedList` are supported. Default is `list`|no| |filter|See [Filters](../querying/filters.html)|no| |columns|A String array of dimensions and metrics to scan. If left empty, all dimensions and metrics are returned.|no| |batchSize|How many rows buffered before return to client. Default is `20480`|no| |limit|How many rows to return. If not specified, all rows will be returned.|no| +|order|The ordering of returned rows based on timestamp. "ascending", "descending", and "none" (default) are supported. Currently, "ascending" and "descending" are only supported for queries where the limit is less than `druid.query.scan.maxRowsQueuedForOrdering`. Scan queries that are either legacy mode or have a limit greater than `druid.query.scan.maxRowsQueuedForOrdering` will not be time-ordered and default to a order of "none".|none| |legacy|Return results consistent with the legacy "scan-query" contrib extension. Defaults to the value set by `druid.query.scan.legacy`, which in turn defaults to false. See [Legacy mode](#legacy-mode) for details.|no| |context|An additional JSON Object which can be used to specify certain flags.|no| ## Example results -The format of the result when resultFormat equals to `list`: +The format of the result when resultFormat equals `list`: ```json [{ @@ -123,41 +133,11 @@ The format of the result when resultFormat equals to `list`: "delta" : 77.0, "variation" : 77.0, "deleted" : 0.0 - }, { - "timestamp" : "2013-01-01T00:00:00.000Z", - "robot" : "0", - "namespace" : "article", - "anonymous" : "0", - "unpatrolled" : "0", - "page" : "113_U.S._73", - "language" : "en", - "newpage" : "1", - "user" : "MZMcBride", - "count" : 1.0, - "added" : 70.0, - "delta" : 70.0, - "variation" : 70.0, - "deleted" : 0.0 - }, { - "timestamp" : "2013-01-01T00:00:00.000Z", - "robot" : "0", - "namespace" : "article", - "anonymous" : "0", - "unpatrolled" : "0", - "page" : "113_U.S._756", - "language" : "en", - "newpage" : "1", - "user" : "MZMcBride", - "count" : 1.0, - "added" : 68.0, - "delta" : 68.0, - "variation" : 68.0, - "deleted" : 0.0 } ] } ] ``` -The format of the result when resultFormat equals to `compactedList`: +The format of the result when resultFormat equals `compactedList`: ```json [{ @@ -168,18 +148,39 @@ The format of the result when resultFormat equals to `compactedList`: "events" : [ ["2013-01-01T00:00:00.000Z", "1", "article", "0", "0", "11._korpus_(NOVJ)", "sl", "0", "EmausBot", 1.0, 39.0, 39.0, 39.0, 0.0], ["2013-01-01T00:00:00.000Z", "0", "article", "0", "0", "112_U.S._580", "en", "1", "MZMcBride", 1.0, 70.0, 70.0, 70.0, 0.0], - ["2013-01-01T00:00:00.000Z", "0", "article", "0", "0", "113_U.S._243", "en", "1", "MZMcBride", 1.0, 77.0, 77.0, 77.0, 0.0], - ["2013-01-01T00:00:00.000Z", "0", "article", "0", "0", "113_U.S._73", "en", "1", "MZMcBride", 1.0, 70.0, 70.0, 70.0, 0.0], - ["2013-01-01T00:00:00.000Z", "0", "article", "0", "0", "113_U.S._756", "en", "1", "MZMcBride", 1.0, 68.0, 68.0, 68.0, 0.0] + ["2013-01-01T00:00:00.000Z", "0", "article", "0", "0", "113_U.S._243", "en", "1", "MZMcBride", 1.0, 77.0, 77.0, 77.0, 0.0] ] } ] ``` -The biggest difference between select query and scan query is that, scan query doesn't retain all rows in memory before rows can be returned to client. -It will cause memory pressure if too many rows required by select query. -Scan query doesn't have this issue. -Scan query can return all rows without issuing another pagination query, which is extremely useful when query against Historical or realtime process directly. - +## Time Ordering + +The Scan query currently supports ordering based on timestamp for non-legacy queries. Note that using time ordering +will yield results that do not indicate which segment rows are from (`segmentId` will show up as `null`). Furthermore, +time ordering is only supported where the result set limit is less than `druid.query.scan.maxRowsQueuedForOrdering` +rows **or** all segments scanned have fewer than `druid.query.scan.maxSegmentPartitionsOrderedInMemory` partitions. Also, +time ordering is not supported for queries issued directly to historicals unless a list of segments is specified. The +reasoning behind these limitations is that the implementation of time ordering uses two strategies that can consume too +much heap memory if left unbounded. These strategies (listed below) are chosen on a per-Historical basis depending on +query result set limit and the number of segments being scanned. + +1. Priority Queue: Each segment on a Historical is opened sequentially. Every row is added to a bounded priority +queue which is ordered by timestamp. For every row above the result set limit, the row with the earliest (if descending) +or latest (if ascending) timestamp will be dequeued. After every row has been processed, the sorted contents of the +priority queue are streamed back to the Broker(s) in batches. Attempting to load too many rows into memory runs the +risk of Historical nodes running out of memory. The `druid.query.scan.maxRowsQueuedForOrdering` property protects +from this by limiting the number of rows in the query result set when time ordering is used. + +2. N-Way Merge: For each segment, each partition is opened in parallel. Since each partition's rows are already +time-ordered, an n-way merge can be performed on the results from each partition. This approach doesn't persist the entire +result set in memory (like the Priority Queue) as it streams back batches as they are returned from the merge function. +However, attempting to query too many partition could also result in high memory usage due to the need to open +decompression and decoding buffers for each. The `druid.query.scan.maxSegmentPartitionsOrderedInMemory` limit protects +from this by capping the number of partitions opened at any times when time ordering is used. + +Both `druid.query.scan.maxRowsQueuedForOrdering` and `druid.query.scan.maxSegmentPartitionsOrderedInMemory` are +configurable and can be tuned based on hardware specs and number of dimensions being queried. + ## Legacy mode The Scan query supports a legacy mode designed for protocol compatibility with the former scan-query contrib extension. @@ -194,3 +195,11 @@ Legacy mode can be triggered either by passing `"legacy" : true` in your query J `druid.query.scan.legacy = true` on your Druid processes. If you were previously using the scan-query contrib extension, the best way to migrate is to activate legacy mode during a rolling upgrade, then switch it off after the upgrade is complete. + +## Configuration Properties + +|property|description|values|default| +|--------|-----------|------|-------| +|druid.query.scan.maxRowsQueuedForOrdering|The maximum number of rows returned when time ordering is used|An integer in [0, 2147483647]|100000| +|druid.query.scan.maxSegmentPartitionsOrderedInMemory|The maximum number of segments scanned per historical when time ordering is used|An integer in [0, 2147483647]|50| +|druid.query.scan.legacy|Whether legacy mode should be turned on for Scan queries|true or false|false| \ No newline at end of file diff --git a/extensions-core/kafka-indexing-service/src/test/java/org/apache/druid/indexing/kafka/KafkaIndexTaskTest.java b/extensions-core/kafka-indexing-service/src/test/java/org/apache/druid/indexing/kafka/KafkaIndexTaskTest.java index 24573da46a29..6b9a20365b55 100644 --- a/extensions-core/kafka-indexing-service/src/test/java/org/apache/druid/indexing/kafka/KafkaIndexTaskTest.java +++ b/extensions-core/kafka-indexing-service/src/test/java/org/apache/druid/indexing/kafka/KafkaIndexTaskTest.java @@ -2586,7 +2586,8 @@ public void registerQuery(Query query, ListenableFuture future) new ScanQueryConfig(), new DefaultGenericQueryMetricsFactory(TestHelper.makeJsonMapper()) ), - new ScanQueryEngine() + new ScanQueryEngine(), + new ScanQueryConfig() ) ) .build() diff --git a/processing/src/main/java/org/apache/druid/query/Druids.java b/processing/src/main/java/org/apache/druid/query/Druids.java index 6ef7f93afb61..47e3ede9a339 100644 --- a/processing/src/main/java/org/apache/druid/query/Druids.java +++ b/processing/src/main/java/org/apache/druid/query/Druids.java @@ -918,12 +918,13 @@ public static class ScanQueryBuilder private QuerySegmentSpec querySegmentSpec; private VirtualColumns virtualColumns; private Map context; - private String resultFormat; + private ScanQuery.ResultFormat resultFormat; private int batchSize; private long limit; private DimFilter dimFilter; private List columns; private Boolean legacy; + private ScanQuery.Order order; public ScanQueryBuilder() { @@ -937,6 +938,7 @@ public ScanQueryBuilder() dimFilter = null; columns = new ArrayList<>(); legacy = null; + order = null; } public ScanQuery build() @@ -948,6 +950,7 @@ public ScanQuery build() resultFormat, batchSize, limit, + order, dimFilter, columns, legacy, @@ -967,7 +970,8 @@ public static ScanQueryBuilder copy(ScanQuery query) .filters(query.getFilter()) .columns(query.getColumns()) .legacy(query.isLegacy()) - .context(query.getContext()); + .context(query.getContext()) + .order(query.getOrder()); } public ScanQueryBuilder dataSource(String ds) @@ -1005,7 +1009,7 @@ public ScanQueryBuilder context(Map c) return this; } - public ScanQueryBuilder resultFormat(String r) + public ScanQueryBuilder resultFormat(ScanQuery.ResultFormat r) { resultFormat = r; return this; @@ -1046,6 +1050,12 @@ public ScanQueryBuilder legacy(Boolean legacy) this.legacy = legacy; return this; } + + public ScanQueryBuilder order(ScanQuery.Order order) + { + this.order = order; + return this; + } } public static ScanQueryBuilder newScanQueryBuilder() diff --git a/processing/src/main/java/org/apache/druid/query/scan/ScanQuery.java b/processing/src/main/java/org/apache/druid/query/scan/ScanQuery.java index c6d1583b59c2..d68f2c82ba1b 100644 --- a/processing/src/main/java/org/apache/druid/query/scan/ScanQuery.java +++ b/processing/src/main/java/org/apache/druid/query/scan/ScanQuery.java @@ -21,7 +21,11 @@ import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.annotation.JsonValue; import com.google.common.base.Preconditions; +import com.google.common.collect.Ordering; +import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.java.util.common.UOE; import org.apache.druid.query.BaseQuery; import org.apache.druid.query.DataSource; import org.apache.druid.query.Druids; @@ -36,26 +40,88 @@ public class ScanQuery extends BaseQuery { - public static final String RESULT_FORMAT_LIST = "list"; - public static final String RESULT_FORMAT_COMPACTED_LIST = "compactedList"; - public static final String RESULT_FORMAT_VALUE_VECTOR = "valueVector"; + public enum ResultFormat + { + RESULT_FORMAT_LIST, + RESULT_FORMAT_COMPACTED_LIST, + RESULT_FORMAT_VALUE_VECTOR; + + @JsonValue + @Override + public String toString() + { + switch (this) { + case RESULT_FORMAT_LIST: + return "list"; + case RESULT_FORMAT_COMPACTED_LIST: + return "compactedList"; + case RESULT_FORMAT_VALUE_VECTOR: + return "valueVector"; + default: + return ""; + } + } + + @JsonCreator + public static ResultFormat fromString(String name) + { + switch (name) { + case "compactedList": + return RESULT_FORMAT_COMPACTED_LIST; + case "valueVector": + return RESULT_FORMAT_VALUE_VECTOR; + case "list": + return RESULT_FORMAT_LIST; + default: + throw new UOE("Scan query result format [%s] is not supported.", name); + } + } + } + + public enum Order + { + ASCENDING, + DESCENDING, + NONE; + + @JsonValue + @Override + public String toString() + { + return StringUtils.toLowerCase(this.name()); + } + + @JsonCreator + public static Order fromString(String name) + { + return valueOf(StringUtils.toUpperCase(name)); + } + } + + /** + * This context flag corresponds to whether the query is running on the "outermost" process (i.e. the process + * the query is sent to). + */ + public static final String CTX_KEY_OUTERMOST = "scanOutermost"; private final VirtualColumns virtualColumns; - private final String resultFormat; + private final ResultFormat resultFormat; private final int batchSize; private final long limit; private final DimFilter dimFilter; private final List columns; private final Boolean legacy; + private final Order order; @JsonCreator public ScanQuery( @JsonProperty("dataSource") DataSource dataSource, @JsonProperty("intervals") QuerySegmentSpec querySegmentSpec, @JsonProperty("virtualColumns") VirtualColumns virtualColumns, - @JsonProperty("resultFormat") String resultFormat, + @JsonProperty("resultFormat") ResultFormat resultFormat, @JsonProperty("batchSize") int batchSize, @JsonProperty("limit") long limit, + @JsonProperty("order") Order order, @JsonProperty("filter") DimFilter dimFilter, @JsonProperty("columns") List columns, @JsonProperty("legacy") Boolean legacy, @@ -64,7 +130,7 @@ public ScanQuery( { super(dataSource, querySegmentSpec, false, context); this.virtualColumns = VirtualColumns.nullToEmpty(virtualColumns); - this.resultFormat = resultFormat == null ? RESULT_FORMAT_LIST : resultFormat; + this.resultFormat = (resultFormat == null) ? ResultFormat.RESULT_FORMAT_LIST : resultFormat; this.batchSize = (batchSize == 0) ? 4096 * 5 : batchSize; this.limit = (limit == 0) ? Long.MAX_VALUE : limit; Preconditions.checkArgument(this.batchSize > 0, "batchSize must be greater than 0"); @@ -72,6 +138,7 @@ public ScanQuery( this.dimFilter = dimFilter; this.columns = columns; this.legacy = legacy; + this.order = order == null ? Order.NONE : order; } @JsonProperty @@ -81,7 +148,7 @@ public VirtualColumns getVirtualColumns() } @JsonProperty - public String getResultFormat() + public ResultFormat getResultFormat() { return resultFormat; } @@ -98,6 +165,12 @@ public long getLimit() return limit; } + @JsonProperty + public Order getOrder() + { + return order; + } + @Override public boolean hasFilters() { @@ -132,6 +205,12 @@ public Boolean isLegacy() return legacy; } + @Override + public Ordering getResultOrdering() + { + return Ordering.from(new ScanResultValueTimestampComparator(this)).reverse(); + } + public ScanQuery withNonNullLegacy(final ScanQueryConfig scanQueryConfig) { return Druids.ScanQueryBuilder.copy(this).legacy(legacy != null ? legacy : scanQueryConfig.isLegacy()).build(); diff --git a/processing/src/main/java/org/apache/druid/query/scan/ScanQueryConfig.java b/processing/src/main/java/org/apache/druid/query/scan/ScanQueryConfig.java index 272bddbeaecc..8121f47b48ac 100644 --- a/processing/src/main/java/org/apache/druid/query/scan/ScanQueryConfig.java +++ b/processing/src/main/java/org/apache/druid/query/scan/ScanQueryConfig.java @@ -40,6 +40,22 @@ public ScanQueryConfig setLegacy(final boolean legacy) return this; } + @JsonProperty + private int maxRowsQueuedForOrdering = 100000; + + public int getMaxRowsQueuedForOrdering() + { + return maxRowsQueuedForOrdering; + } + + @JsonProperty + private int maxSegmentPartitionsOrderedInMemory = 50; + + public int getMaxSegmentPartitionsOrderedInMemory() + { + return maxSegmentPartitionsOrderedInMemory; + } + @Override public boolean equals(final Object o) { diff --git a/processing/src/main/java/org/apache/druid/query/scan/ScanQueryEngine.java b/processing/src/main/java/org/apache/druid/query/scan/ScanQueryEngine.java index ba98ad956cf3..2a5edbd14d34 100644 --- a/processing/src/main/java/org/apache/druid/query/scan/ScanQueryEngine.java +++ b/processing/src/main/java/org/apache/druid/query/scan/ScanQueryEngine.java @@ -68,7 +68,7 @@ public Sequence process( if (responseContext.get(ScanQueryRunnerFactory.CTX_COUNT) != null) { long count = (long) responseContext.get(ScanQueryRunnerFactory.CTX_COUNT); - if (count >= query.getLimit()) { + if (count >= query.getLimit() && query.getOrder().equals(ScanQuery.Order.NONE)) { return Sequences.empty(); } } @@ -123,7 +123,7 @@ public Sequence process( if (responseContext.get(ScanQueryRunnerFactory.CTX_COUNT) == null) { responseContext.put(ScanQueryRunnerFactory.CTX_COUNT, 0L); } - final long limit = query.getLimit() - (long) responseContext.get(ScanQueryRunnerFactory.CTX_COUNT); + final long limit = calculateLimit(query, responseContext); return Sequences.concat( adapter .makeCursors( @@ -131,7 +131,8 @@ public Sequence process( intervals.get(0), query.getVirtualColumns(), Granularities.ALL, - query.isDescending(), + query.getOrder().equals(ScanQuery.Order.DESCENDING) || + (query.getOrder().equals(ScanQuery.Order.NONE) && query.isDescending()), null ) .map(cursor -> new BaseSequence<>( @@ -177,13 +178,13 @@ public ScanResultValue next() } final long lastOffset = offset; final Object events; - final String resultFormat = query.getResultFormat(); - if (ScanQuery.RESULT_FORMAT_COMPACTED_LIST.equals(resultFormat)) { + final ScanQuery.ResultFormat resultFormat = query.getResultFormat(); + if (ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST.equals(resultFormat)) { events = rowsToCompactedList(); - } else if (ScanQuery.RESULT_FORMAT_LIST.equals(resultFormat)) { + } else if (ScanQuery.ResultFormat.RESULT_FORMAT_LIST.equals(resultFormat)) { events = rowsToList(); } else { - throw new UOE("resultFormat[%s] is not supported", resultFormat); + throw new UOE("resultFormat[%s] is not supported", resultFormat.toString()); } responseContext.put( ScanQueryRunnerFactory.CTX_COUNT, @@ -256,4 +257,16 @@ public void cleanup(Iterator iterFromMake) )) ); } + + /** + * If we're performing time-ordering, we want to scan through the first `limit` rows in each segment ignoring the number + * of rows already counted on other segments. + */ + private long calculateLimit(ScanQuery query, Map responseContext) + { + if (query.getOrder().equals(ScanQuery.Order.NONE)) { + return query.getLimit() - (long) responseContext.get(ScanQueryRunnerFactory.CTX_COUNT); + } + return query.getLimit(); + } } diff --git a/processing/src/main/java/org/apache/druid/query/scan/ScanQueryLimitRowIterator.java b/processing/src/main/java/org/apache/druid/query/scan/ScanQueryLimitRowIterator.java index 3f56054b5f49..f246d7c5fcf3 100644 --- a/processing/src/main/java/org/apache/druid/query/scan/ScanQueryLimitRowIterator.java +++ b/processing/src/main/java/org/apache/druid/query/scan/ScanQueryLimitRowIterator.java @@ -19,23 +19,43 @@ package org.apache.druid.query.scan; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Iterables; +import org.apache.druid.java.util.common.UOE; import org.apache.druid.java.util.common.guava.Sequence; import org.apache.druid.java.util.common.guava.Yielder; import org.apache.druid.java.util.common.guava.YieldingAccumulator; import org.apache.druid.java.util.common.parsers.CloseableIterator; +import org.apache.druid.query.Query; import org.apache.druid.query.QueryPlus; import org.apache.druid.query.QueryRunner; import java.io.IOException; +import java.util.ArrayList; import java.util.List; import java.util.Map; +/** + * This iterator supports iteration through a Sequence returned by a ScanResultValue QueryRunner. Its behaviour + * varies depending on whether the query is returning time-ordered values and whether the CTX_KEY_OUTERMOST flag is + * set as false. + * + * Behaviours: + * 1) No time ordering: expects a Sequence of ScanResultValues which each contain up to query.batchSize events. + * The iterator will be "done" when the limit of events is reached. The final ScanResultValue might contain + * fewer than batchSize events so that the limit number of events is returned. + * 2) Time Ordering, CTX_KEY_OUTERMOST false: Same behaviour as no time ordering + * 3) Time Ordering, CTX_KEY_OUTERMOST=true or null: The Sequence processed in this case should contain ScanResultValues + * that contain only one event each for the CachingClusteredClient n-way merge. This iterator will perform + * batching according to query batch size until the limit is reached. + */ public class ScanQueryLimitRowIterator implements CloseableIterator { private Yielder yielder; - private String resultFormat; + private ScanQuery.ResultFormat resultFormat; private long limit; private long count = 0; + private ScanQuery query; public ScanQueryLimitRowIterator( QueryRunner baseRunner, @@ -43,11 +63,13 @@ public ScanQueryLimitRowIterator( Map responseContext ) { - ScanQuery query = (ScanQuery) queryPlus.getQuery(); - resultFormat = query.getResultFormat(); - limit = query.getLimit(); - Sequence baseSequence = baseRunner.run(queryPlus, responseContext); - yielder = baseSequence.toYielder( + this.query = (ScanQuery) queryPlus.getQuery(); + this.resultFormat = query.getResultFormat(); + this.limit = query.getLimit(); + Query historicalQuery = + queryPlus.getQuery().withOverriddenContext(ImmutableMap.of(ScanQuery.CTX_KEY_OUTERMOST, false)); + Sequence baseSequence = baseRunner.run(QueryPlus.wrap(historicalQuery), responseContext); + this.yielder = baseSequence.toYielder( null, new YieldingAccumulator() { @@ -70,9 +92,15 @@ public boolean hasNext() @Override public ScanResultValue next() { - ScanResultValue batch = yielder.get(); - if (ScanQuery.RESULT_FORMAT_COMPACTED_LIST.equals(resultFormat) || - ScanQuery.RESULT_FORMAT_LIST.equals(resultFormat)) { + if (ScanQuery.ResultFormat.RESULT_FORMAT_VALUE_VECTOR.equals(resultFormat)) { + throw new UOE(ScanQuery.ResultFormat.RESULT_FORMAT_VALUE_VECTOR + " is not supported yet"); + } + + // We want to perform multi-event ScanResultValue limiting if we are not time-ordering or are at the + // inner-level if we are time-ordering + if (query.getOrder() == ScanQuery.Order.NONE || + !query.getContextBoolean(ScanQuery.CTX_KEY_OUTERMOST, true)) { + ScanResultValue batch = yielder.get(); List events = (List) batch.getEvents(); if (events.size() <= limit - count) { count += events.size(); @@ -81,12 +109,26 @@ public ScanResultValue next() } else { // last batch // single batch length is <= Integer.MAX_VALUE, so this should not overflow - int left = (int) (limit - count); + int numLeft = (int) (limit - count); count = limit; - return new ScanResultValue(batch.getSegmentId(), batch.getColumns(), events.subList(0, left)); + return new ScanResultValue(batch.getSegmentId(), batch.getColumns(), events.subList(0, numLeft)); + } + } else { + // Perform single-event ScanResultValue batching at the outer level. Each scan result value from the yielder + // in this case will only have one event so there's no need to iterate through events. + int batchSize = query.getBatchSize(); + List eventsToAdd = new ArrayList<>(batchSize); + List columns = new ArrayList<>(); + while (eventsToAdd.size() < batchSize && !yielder.isDone() && count < limit) { + ScanResultValue srv = yielder.get(); + // Only replace once using the columns from the first event + columns = columns.isEmpty() ? srv.getColumns() : columns; + eventsToAdd.add(Iterables.getOnlyElement((List) srv.getEvents())); + yielder = yielder.next(null); + count++; } + return new ScanResultValue(null, columns, eventsToAdd); } - throw new UnsupportedOperationException(ScanQuery.RESULT_FORMAT_VALUE_VECTOR + " is not supported yet"); } @Override diff --git a/processing/src/main/java/org/apache/druid/query/scan/ScanQueryQueryToolChest.java b/processing/src/main/java/org/apache/druid/query/scan/ScanQueryQueryToolChest.java index b031a84a82c5..6d6758b19260 100644 --- a/processing/src/main/java/org/apache/druid/query/scan/ScanQueryQueryToolChest.java +++ b/processing/src/main/java/org/apache/druid/query/scan/ScanQueryQueryToolChest.java @@ -25,7 +25,6 @@ import com.google.inject.Inject; import org.apache.druid.java.util.common.guava.BaseSequence; import org.apache.druid.java.util.common.guava.CloseQuietly; -import org.apache.druid.java.util.common.guava.Sequence; import org.apache.druid.query.GenericQueryMetricsFactory; import org.apache.druid.query.Query; import org.apache.druid.query.QueryMetrics; @@ -34,8 +33,6 @@ import org.apache.druid.query.QueryToolChest; import org.apache.druid.query.aggregation.MetricManipulationFn; -import java.util.Map; - public class ScanQueryQueryToolChest extends QueryToolChest { private static final TypeReference TYPE_REFERENCE = new TypeReference() @@ -58,38 +55,30 @@ public ScanQueryQueryToolChest( @Override public QueryRunner mergeResults(final QueryRunner runner) { - return new QueryRunner() - { - @Override - public Sequence run( - final QueryPlus queryPlus, final Map responseContext - ) - { - // Ensure "legacy" is a non-null value, such that all other nodes this query is forwarded to will treat it - // the same way, even if they have different default legacy values. - final ScanQuery scanQuery = ((ScanQuery) queryPlus.getQuery()).withNonNullLegacy(scanQueryConfig); - final QueryPlus queryPlusWithNonNullLegacy = queryPlus.withQuery(scanQuery); - - if (scanQuery.getLimit() == Long.MAX_VALUE) { - return runner.run(queryPlusWithNonNullLegacy, responseContext); - } - return new BaseSequence<>( - new BaseSequence.IteratorMaker() + return (queryPlus, responseContext) -> { + // Ensure "legacy" is a non-null value, such that all other nodes this query is forwarded to will treat it + // the same way, even if they have different default legacy values. + final ScanQuery scanQuery = ((ScanQuery) (queryPlus.getQuery())) + .withNonNullLegacy(scanQueryConfig); + final QueryPlus queryPlusWithNonNullLegacy = queryPlus.withQuery(scanQuery); + if (scanQuery.getLimit() == Long.MAX_VALUE) { + return runner.run(queryPlusWithNonNullLegacy, responseContext); + } + return new BaseSequence<>( + new BaseSequence.IteratorMaker() + { + @Override + public ScanQueryLimitRowIterator make() { - @Override - public ScanQueryLimitRowIterator make() - { - return new ScanQueryLimitRowIterator(runner, queryPlusWithNonNullLegacy, responseContext); - } + return new ScanQueryLimitRowIterator(runner, queryPlusWithNonNullLegacy, responseContext); + } - @Override - public void cleanup(ScanQueryLimitRowIterator iterFromMake) - { - CloseQuietly.close(iterFromMake); - } + @Override + public void cleanup(ScanQueryLimitRowIterator iterFromMake) + { + CloseQuietly.close(iterFromMake); } - ); - } + }); }; } @@ -117,18 +106,13 @@ public TypeReference getResultTypeReference() @Override public QueryRunner preMergeQueryDecoration(final QueryRunner runner) { - return new QueryRunner() - { - @Override - public Sequence run(QueryPlus queryPlus, Map responseContext) - { - ScanQuery scanQuery = (ScanQuery) queryPlus.getQuery(); - if (scanQuery.getFilter() != null) { - scanQuery = scanQuery.withDimFilter(scanQuery.getFilter().optimize()); - queryPlus = queryPlus.withQuery(scanQuery); - } - return runner.run(queryPlus, responseContext); + return (queryPlus, responseContext) -> { + ScanQuery scanQuery = (ScanQuery) queryPlus.getQuery(); + if (scanQuery.getFilter() != null) { + scanQuery = scanQuery.withDimFilter(scanQuery.getFilter().optimize()); + queryPlus = queryPlus.withQuery(scanQuery); } + return runner.run(queryPlus, responseContext); }; } } diff --git a/processing/src/main/java/org/apache/druid/query/scan/ScanQueryRunnerFactory.java b/processing/src/main/java/org/apache/druid/query/scan/ScanQueryRunnerFactory.java index 73d9f680239c..41729f786386 100644 --- a/processing/src/main/java/org/apache/druid/query/scan/ScanQueryRunnerFactory.java +++ b/processing/src/main/java/org/apache/druid/query/scan/ScanQueryRunnerFactory.java @@ -19,22 +19,40 @@ package org.apache.druid.query.scan; -import com.google.common.base.Function; +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Preconditions; +import com.google.common.collect.Lists; +import com.google.common.collect.Ordering; import com.google.inject.Inject; import org.apache.druid.java.util.common.ISE; import org.apache.druid.java.util.common.JodaUtils; +import org.apache.druid.java.util.common.Pair; +import org.apache.druid.java.util.common.UOE; import org.apache.druid.java.util.common.guava.Sequence; import org.apache.druid.java.util.common.guava.Sequences; +import org.apache.druid.java.util.common.guava.Yielder; +import org.apache.druid.java.util.common.guava.YieldingAccumulator; import org.apache.druid.query.Query; import org.apache.druid.query.QueryContexts; import org.apache.druid.query.QueryPlus; import org.apache.druid.query.QueryRunner; import org.apache.druid.query.QueryRunnerFactory; import org.apache.druid.query.QueryToolChest; +import org.apache.druid.query.SegmentDescriptor; +import org.apache.druid.query.spec.MultipleSpecificSegmentSpec; import org.apache.druid.segment.Segment; +import org.joda.time.Interval; +import java.util.ArrayDeque; +import java.util.ArrayList; +import java.util.Comparator; +import java.util.Deque; +import java.util.LinkedHashMap; +import java.util.List; import java.util.Map; +import java.util.PriorityQueue; import java.util.concurrent.ExecutorService; +import java.util.stream.Collectors; public class ScanQueryRunnerFactory implements QueryRunnerFactory { @@ -44,15 +62,18 @@ public class ScanQueryRunnerFactory implements QueryRunnerFactory mergeRunners( ) { // in single thread and in jetty thread instead of processing thread - return new QueryRunner() - { - @Override - public Sequence run( - final QueryPlus queryPlus, final Map responseContext - ) - { - // Note: this variable is effective only when queryContext has a timeout. - // See the comment of CTX_TIMEOUT_AT. - final long timeoutAt = System.currentTimeMillis() + QueryContexts.getTimeout(queryPlus.getQuery()); - responseContext.put(CTX_TIMEOUT_AT, timeoutAt); - return Sequences.concat( + return (queryPlus, responseContext) -> { + ScanQuery query = (ScanQuery) queryPlus.getQuery(); + + // Note: this variable is effective only when queryContext has a timeout. + // See the comment of CTX_TIMEOUT_AT. + final long timeoutAt = System.currentTimeMillis() + QueryContexts.getTimeout(queryPlus.getQuery()); + responseContext.put(CTX_TIMEOUT_AT, timeoutAt); + + if (query.getOrder().equals(ScanQuery.Order.NONE)) { + // Use normal strategy + Sequence returnedRows = Sequences.concat( Sequences.map( Sequences.simple(queryRunners), - new Function, Sequence>() - { - @Override - public Sequence apply(final QueryRunner input) - { - return input.run(queryPlus, responseContext); - } - } + input -> input.run(queryPlus, responseContext) ) ); + if (query.getLimit() <= Integer.MAX_VALUE) { + return returnedRows.limit(Math.toIntExact(query.getLimit())); + } else { + return returnedRows; + } + } else { + // Query segment spec must be an instance of MultipleSpecificSegmentSpec because segment descriptors need + // to be present for a 1:1 matching of intervals with query runners. The other types of segment spec condense + // the intervals (i.e. merge neighbouring intervals), eliminating the 1:1 relationship between intervals + // and query runners. + if (!(query.getQuerySegmentSpec() instanceof MultipleSpecificSegmentSpec)) { + throw new UOE("Time-ordering on scan queries is only supported for queries with segment specs" + + "of type MultipleSpecificSegmentSpec"); + } + // Ascending time order for both descriptors and query runners by default + List descriptorsOrdered = + ((MultipleSpecificSegmentSpec) query.getQuerySegmentSpec()).getDescriptors(); + List> queryRunnersOrdered = Lists.newArrayList(queryRunners); + + if (query.getOrder().equals(ScanQuery.Order.DESCENDING)) { + descriptorsOrdered = Lists.reverse(descriptorsOrdered); + queryRunnersOrdered = Lists.reverse(queryRunnersOrdered); + } + + if (query.getLimit() <= scanQueryConfig.getMaxRowsQueuedForOrdering()) { + // Use priority queue strategy + return priorityQueueSortAndLimit( + Sequences.concat(Sequences.map( + Sequences.simple(queryRunnersOrdered), + input -> input.run(queryPlus, responseContext) + )), + query, + descriptorsOrdered + ); + } else { + Preconditions.checkState( + descriptorsOrdered.size() == queryRunnersOrdered.size(), + "Number of segment descriptors does not equal number of " + + "query runners...something went wrong!" + ); + + // Combine the two lists of segment descriptors and query runners into a single list of + // segment descriptors - query runner pairs. This makes it easier to use stream operators. + List>> descriptorsAndRunnersOrdered = new ArrayList<>(); + for (int i = 0; i < queryRunnersOrdered.size(); i++) { + descriptorsAndRunnersOrdered.add(new Pair<>(descriptorsOrdered.get(i), queryRunnersOrdered.get(i))); + } + + // Group the list of pairs by interval. The LinkedHashMap will have an interval paired with a list of all the + // query runners for that segment + LinkedHashMap>>> partitionsGroupedByInterval = + descriptorsAndRunnersOrdered.stream() + .collect(Collectors.groupingBy( + x -> x.lhs.getInterval(), + LinkedHashMap::new, + Collectors.toList() + )); + + // Find the segment with the largest numbers of partitions. This will be used to compare with the + // maxSegmentPartitionsOrderedInMemory limit to determine if the query is at risk of consuming too much memory. + int maxNumPartitionsInSegment = + partitionsGroupedByInterval.values() + .stream() + .map(x -> x.size()) + .max(Comparator.comparing(Integer::valueOf)) + .get(); + + if (maxNumPartitionsInSegment <= scanQueryConfig.getMaxSegmentPartitionsOrderedInMemory()) { + // Use n-way merge strategy + + // Create a list of grouped runner lists (i.e. each sublist/"runner group" corresponds to an interval) -> + // there should be no interval overlap. We create a list of lists so we can create a sequence of sequences. + // There's no easy way to convert a LinkedHashMap to a sequence because it's non-iterable. + List>> groupedRunners = + partitionsGroupedByInterval.entrySet() + .stream() + .map(entry -> entry.getValue() + .stream() + .map(segQueryRunnerPair -> segQueryRunnerPair.rhs) + .collect(Collectors.toList())) + .collect(Collectors.toList()); + + return nWayMergeAndLimit(groupedRunners, queryPlus, responseContext); + } + throw new UOE( + "Time ordering for queries of %,d partitions per segment and a row limit of %,d is not supported." + + " Try reducing the scope of the query to scan fewer partitions than the configurable limit of" + + " %,d partitions or lower the row limit below %,d.", + maxNumPartitionsInSegment, + query.getLimit(), + scanQueryConfig.getMaxSegmentPartitionsOrderedInMemory(), + scanQueryConfig.getMaxRowsQueuedForOrdering() + ); + } } }; } + @VisibleForTesting + Sequence priorityQueueSortAndLimit( + Sequence inputSequence, + ScanQuery scanQuery, + List descriptorsOrdered + ) + { + Comparator priorityQComparator = new ScanResultValueTimestampComparator(scanQuery); + + if (scanQuery.getLimit() > Integer.MAX_VALUE) { + throw new UOE( + "Limit of %,d rows not supported for priority queue strategy of time-ordering scan results", + scanQuery.getLimit() + ); + } + + // Converting the limit from long to int could theoretically throw an ArithmeticException but this branch + // only runs if limit < MAX_LIMIT_FOR_IN_MEMORY_TIME_ORDERING (which should be < Integer.MAX_VALUE) + int limit = Math.toIntExact(scanQuery.getLimit()); + + PriorityQueue q = new PriorityQueue<>(limit, priorityQComparator); + + Yielder yielder = inputSequence.toYielder( + null, + new YieldingAccumulator() + { + @Override + public ScanResultValue accumulate(ScanResultValue accumulated, ScanResultValue in) + { + yield(); + return in; + } + } + ); + boolean doneScanning = yielder.isDone(); + // We need to scan limit elements and anything else in the last segment + int numRowsScanned = 0; + Interval finalInterval = null; + while (!doneScanning) { + ScanResultValue next = yielder.get(); + List singleEventScanResultValues = next.toSingleEventScanResultValues(); + for (ScanResultValue srv : singleEventScanResultValues) { + numRowsScanned++; + // Using an intermediate unbatched ScanResultValue is not that great memory-wise, but the column list + // needs to be preserved for queries using the compactedList result format + q.offer(srv); + if (q.size() > limit) { + q.poll(); + } + + // Finish scanning the interval containing the limit row + if (numRowsScanned > limit && finalInterval == null) { + long timestampOfLimitRow = srv.getFirstEventTimestamp(scanQuery.getResultFormat()); + for (SegmentDescriptor descriptor : descriptorsOrdered) { + if (descriptor.getInterval().contains(timestampOfLimitRow)) { + finalInterval = descriptor.getInterval(); + } + } + if (finalInterval == null) { + throw new ISE("WTH??? Row came from an unscanned interval?"); + } + } + } + yielder = yielder.next(null); + doneScanning = yielder.isDone() || + (finalInterval != null && + !finalInterval.contains(next.getFirstEventTimestamp(scanQuery.getResultFormat()))); + } + // Need to convert to a Deque because Priority Queue's iterator doesn't guarantee that the sorted order + // will be maintained. Deque was chosen over list because its addFirst is O(1). + final Deque sortedElements = new ArrayDeque<>(q.size()); + while (q.size() != 0) { + // addFirst is used since PriorityQueue#poll() dequeues the low-priority (timestamp-wise) events first. + sortedElements.addFirst(q.poll()); + } + return Sequences.simple(sortedElements); + } + + @VisibleForTesting + Sequence nWayMergeAndLimit( + List>> groupedRunners, + QueryPlus queryPlus, + Map responseContext + ) + { + // Starting from the innermost Sequences.map: + // (1) Deaggregate each ScanResultValue returned by the query runners + // (2) Combine the deaggregated ScanResultValues into a single sequence + // (3) Create a sequence of results from each runner in the group and flatmerge based on timestamp + // (4) Create a sequence of results from each runner group + // (5) Join all the results into a single sequence + Sequence resultSequence = + Sequences.concat( + Sequences.map( + Sequences.simple(groupedRunners), + runnerGroup -> + Sequences.map( + Sequences.simple(runnerGroup), + (input) -> Sequences.concat( + Sequences.map( + input.run(queryPlus, responseContext), + srv -> Sequences.simple(srv.toSingleEventScanResultValues()) + ) + ) + ).flatMerge( + seq -> seq, + Ordering.from(new ScanResultValueTimestampComparator( + (ScanQuery) queryPlus.getQuery() + )).reverse() + ) + ) + ); + long limit = ((ScanQuery) (queryPlus.getQuery())).getLimit(); + if (limit == Long.MAX_VALUE) { + return resultSequence; + } + return resultSequence.limit(limit); + } + @Override public QueryToolChest getToolchest() { diff --git a/processing/src/main/java/org/apache/druid/query/scan/ScanResultValue.java b/processing/src/main/java/org/apache/druid/query/scan/ScanResultValue.java index 8fc151965478..7bfcf025e1bc 100644 --- a/processing/src/main/java/org/apache/druid/query/scan/ScanResultValue.java +++ b/processing/src/main/java/org/apache/druid/query/scan/ScanResultValue.java @@ -21,8 +21,14 @@ import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; +import org.apache.druid.java.util.common.UOE; +import org.apache.druid.segment.column.ColumnHolder; +import javax.annotation.Nullable; +import java.util.ArrayList; +import java.util.Collections; import java.util.List; +import java.util.Map; public class ScanResultValue implements Comparable { @@ -40,7 +46,7 @@ public class ScanResultValue implements Comparable @JsonCreator public ScanResultValue( - @JsonProperty("segmentId") String segmentId, + @Nullable @JsonProperty("segmentId") String segmentId, @JsonProperty("columns") List columns, @JsonProperty("events") Object events ) @@ -50,6 +56,7 @@ public ScanResultValue( this.events = events; } + @Nullable @JsonProperty public String getSegmentId() { @@ -68,6 +75,29 @@ public Object getEvents() return events; } + public long getFirstEventTimestamp(ScanQuery.ResultFormat resultFormat) + { + if (resultFormat.equals(ScanQuery.ResultFormat.RESULT_FORMAT_LIST)) { + return (Long) ((Map) ((List) this.getEvents()).get(0)).get(ColumnHolder.TIME_COLUMN_NAME); + } else if (resultFormat.equals(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)) { + int timeColumnIndex = this.getColumns().indexOf(ColumnHolder.TIME_COLUMN_NAME); + List firstEvent = (List) ((List) this.getEvents()).get(0); + return (Long) firstEvent.get(timeColumnIndex); + } + throw new UOE("Unable to get first event timestamp using result format of [%s]", resultFormat.toString()); + } + + public List toSingleEventScanResultValues() + { + List singleEventScanResultValues = new ArrayList<>(); + List events = (List) this.getEvents(); + for (Object event : events) { + singleEventScanResultValues.add(new ScanResultValue(segmentId, columns, Collections.singletonList(event))); + } + return singleEventScanResultValues; + } + + @Override public boolean equals(Object o) { diff --git a/processing/src/main/java/org/apache/druid/query/scan/ScanResultValueTimestampComparator.java b/processing/src/main/java/org/apache/druid/query/scan/ScanResultValueTimestampComparator.java new file mode 100644 index 000000000000..dcf3bade136f --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/scan/ScanResultValueTimestampComparator.java @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.scan; + +import com.google.common.primitives.Longs; + +import java.util.Comparator; + +/** + * This comparator class supports comparisons of ScanResultValues based on the timestamp of their first event. Since + * only the first event is looked at, this Comparator is especially useful for unbatched ScanResultValues (such as in + * {@link ScanQueryQueryToolChest#mergeResults(org.apache.druid.query.QueryRunner )}. The comparator takes a scanQuery as + * a parameter so that it knows the result format (list or compactedList) of Object ScanResultValue#events. It uses + * this result format to perform a bunch of type casts on the object to get the timestamp then compares the timestamps. + */ +public class ScanResultValueTimestampComparator implements Comparator +{ + private final ScanQuery scanQuery; + + public ScanResultValueTimestampComparator(ScanQuery scanQuery) + { + this.scanQuery = scanQuery; + } + + @Override + public int compare(ScanResultValue o1, ScanResultValue o2) + { + int comparison; + comparison = Longs.compare( + o1.getFirstEventTimestamp(scanQuery.getResultFormat()), + o2.getFirstEventTimestamp(scanQuery.getResultFormat())); + if (scanQuery.getOrder().equals(ScanQuery.Order.DESCENDING)) { + return comparison; + } + return comparison * -1; + } +} diff --git a/processing/src/test/java/org/apache/druid/query/DoubleStorageTest.java b/processing/src/test/java/org/apache/druid/query/DoubleStorageTest.java index ed7591117dc2..f784a86521a1 100644 --- a/processing/src/test/java/org/apache/druid/query/DoubleStorageTest.java +++ b/processing/src/test/java/org/apache/druid/query/DoubleStorageTest.java @@ -93,7 +93,8 @@ public class DoubleStorageTest private static final ScanQueryRunnerFactory SCAN_QUERY_RUNNER_FACTORY = new ScanQueryRunnerFactory( scanQueryQueryToolChest, - new ScanQueryEngine() + new ScanQueryEngine(), + new ScanQueryConfig() ); private Druids.ScanQueryBuilder newTestQuery() diff --git a/processing/src/test/java/org/apache/druid/query/scan/MultiSegmentScanQueryTest.java b/processing/src/test/java/org/apache/druid/query/scan/MultiSegmentScanQueryTest.java index e138d73a1fe8..7b051b0fee52 100644 --- a/processing/src/test/java/org/apache/druid/query/scan/MultiSegmentScanQueryTest.java +++ b/processing/src/test/java/org/apache/druid/query/scan/MultiSegmentScanQueryTest.java @@ -73,7 +73,8 @@ public class MultiSegmentScanQueryTest private static final QueryRunnerFactory factory = new ScanQueryRunnerFactory( toolChest, - new ScanQueryEngine() + new ScanQueryEngine(), + new ScanQueryConfig() ); // time modified version of druid.sample.numeric.tsv diff --git a/processing/src/test/java/org/apache/druid/query/scan/ScanQueryConfigTest.java b/processing/src/test/java/org/apache/druid/query/scan/ScanQueryConfigTest.java new file mode 100644 index 000000000000..7a71c44e28c2 --- /dev/null +++ b/processing/src/test/java/org/apache/druid/query/scan/ScanQueryConfigTest.java @@ -0,0 +1,67 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.scan; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.collect.ImmutableMap; +import org.apache.druid.jackson.DefaultObjectMapper; +import org.junit.Assert; +import org.junit.Test; + +public class ScanQueryConfigTest +{ + private final ObjectMapper MAPPER = new DefaultObjectMapper(); + + private final ImmutableMap CONFIG_MAP = ImmutableMap + .builder() + .put("maxSegmentPartitionsOrderedInMemory", "1") + .put("maxRowsQueuedForOrdering", "1") + .put("legacy", "true") + .build(); + + private final ImmutableMap CONFIG_MAP2 = ImmutableMap + .builder() + .put("legacy", "false") + .put("maxSegmentPartitionsOrderedInMemory", "42") + .build(); + + private final ImmutableMap CONFIG_MAP_EMPTY = ImmutableMap + .builder() + .build(); + + @Test + public void testSerde() + { + final ScanQueryConfig config = MAPPER.convertValue(CONFIG_MAP, ScanQueryConfig.class); + Assert.assertEquals(1, config.getMaxRowsQueuedForOrdering()); + Assert.assertEquals(1, config.getMaxSegmentPartitionsOrderedInMemory()); + Assert.assertTrue(config.isLegacy()); + + final ScanQueryConfig config2 = MAPPER.convertValue(CONFIG_MAP2, ScanQueryConfig.class); + Assert.assertEquals(100000, config2.getMaxRowsQueuedForOrdering()); + Assert.assertEquals(42, config2.getMaxSegmentPartitionsOrderedInMemory()); + Assert.assertFalse(config2.isLegacy()); + + final ScanQueryConfig config3 = MAPPER.convertValue(CONFIG_MAP_EMPTY, ScanQueryConfig.class); + Assert.assertEquals(100000, config3.getMaxRowsQueuedForOrdering()); + Assert.assertEquals(50, config3.getMaxSegmentPartitionsOrderedInMemory()); + Assert.assertFalse(config3.isLegacy()); + } +} diff --git a/processing/src/test/java/org/apache/druid/query/scan/ScanQueryLimitRowIteratorTest.java b/processing/src/test/java/org/apache/druid/query/scan/ScanQueryLimitRowIteratorTest.java new file mode 100644 index 000000000000..f0dafbc4370c --- /dev/null +++ b/processing/src/test/java/org/apache/druid/query/scan/ScanQueryLimitRowIteratorTest.java @@ -0,0 +1,211 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.scan; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import org.apache.druid.java.util.common.guava.Sequences; +import org.apache.druid.query.Druids; +import org.apache.druid.query.QueryPlus; +import org.apache.druid.query.QueryRunnerTestHelper; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ThreadLocalRandom; + +@RunWith(Parameterized.class) +public class ScanQueryLimitRowIteratorTest +{ + private static final int NUM_ELEMENTS = 1000; + private static int batchSize; + private static int limit; + private static List singleEventScanResultValues = new ArrayList<>(); + private static List multiEventScanResultValues = new ArrayList<>(); + private static final ScanQuery.ResultFormat resultFormat = ScanQuery.ResultFormat.RESULT_FORMAT_LIST; + + public ScanQueryLimitRowIteratorTest( + final int batchSize, + final int limit + ) + { + this.batchSize = batchSize; + this.limit = limit; + } + + @Parameterized.Parameters(name = "{0} {1}") + public static Iterable constructorFeeder() + { + List batchSizes = ImmutableList.of(1, 33); + List limits = ImmutableList.of(3, 10000); + return QueryRunnerTestHelper.cartesian( + batchSizes, + limits + ); + } + + @Before + public void setup() + { + singleEventScanResultValues = new ArrayList<>(); + multiEventScanResultValues = new ArrayList<>(); + for (int i = 0; i < NUM_ELEMENTS; i++) { + singleEventScanResultValues.add( + ScanQueryTestHelper.generateScanResultValue( + ThreadLocalRandom.current().nextLong(), + resultFormat, + 1 + )); + } + for (int i = 0; i < NUM_ELEMENTS / batchSize; i++) { + multiEventScanResultValues.add( + ScanQueryTestHelper.generateScanResultValue( + ThreadLocalRandom.current().nextLong(), + resultFormat, + batchSize + )); + } + multiEventScanResultValues.add( + ScanQueryTestHelper.generateScanResultValue( + ThreadLocalRandom.current().nextLong(), + resultFormat, + NUM_ELEMENTS % batchSize + )); + } + + /** + * Expect no batching to occur and limit to be applied + */ + @Test + public void testNonOrderedScan() + { + ScanQuery query = Druids.newScanQueryBuilder() + .limit(limit) + .order(ScanQuery.Order.NONE) + .dataSource("some datasource") + .batchSize(batchSize) + .intervals(QueryRunnerTestHelper.fullOnIntervalSpec) + .resultFormat(resultFormat) + .context(ImmutableMap.of(ScanQuery.CTX_KEY_OUTERMOST, false)) + .build(); + QueryPlus queryPlus = QueryPlus.wrap(query); + ScanQueryLimitRowIterator itr = new ScanQueryLimitRowIterator( + ((queryInput, responseContext) -> Sequences.simple(multiEventScanResultValues)), + queryPlus, + ImmutableMap.of() + ); + + int count = 0; + int expectedNumRows = Math.min(limit, NUM_ELEMENTS); + + while (itr.hasNext()) { + ScanResultValue curr = itr.next(); + List> events = ScanQueryTestHelper.getEventsListResultFormat(curr); + if (events.size() != batchSize) { + if (expectedNumRows - count > batchSize) { + Assert.fail("Batch size is incorrect"); + } else { + Assert.assertEquals(expectedNumRows - count, events.size()); + } + } + count += events.size(); + } + Assert.assertEquals(expectedNumRows, count); + } + + /** + * Expect batching to occur and limit to be applied on the Broker. Input from Historical + * is a sequence of single-event ScanResultValues. + */ + @Test + public void testBrokerOrderedScan() + { + ScanQuery query = Druids.newScanQueryBuilder() + .limit(limit) + .order(ScanQuery.Order.DESCENDING) + .dataSource("some datasource") + .batchSize(batchSize) + .intervals(QueryRunnerTestHelper.fullOnIntervalSpec) + .resultFormat(resultFormat) + .build(); + QueryPlus queryPlus = QueryPlus.wrap(query); + ScanQueryLimitRowIterator itr = new ScanQueryLimitRowIterator( + ((queryInput, responseContext) -> Sequences.simple(singleEventScanResultValues)), + queryPlus, + ImmutableMap.of() + ); + + int count = 0; + int expectedNumRows = Math.min(limit, NUM_ELEMENTS); + while (itr.hasNext()) { + ScanResultValue curr = itr.next(); + List> events = ScanQueryTestHelper.getEventsListResultFormat(curr); + if (events.size() != batchSize) { + if (expectedNumRows - count >= batchSize) { + Assert.fail("Batch size is incorrect"); + } else { + Assert.assertEquals(expectedNumRows - count, events.size()); + } + } + count += events.size(); + } + Assert.assertEquals(expectedNumRows, count); + } + + /** + * Expect no batching to occur and limit to be applied. Input is a sequence of sorted single-event ScanResultValues + * (unbatching and sorting occurs in ScanQueryRunnerFactory#mergeRunners()). + */ + @Test + public void testHistoricalOrderedScan() + { + ScanQuery query = Druids.newScanQueryBuilder() + .limit(limit) + .order(ScanQuery.Order.DESCENDING) + .dataSource("some datasource") + .batchSize(batchSize) + .intervals(QueryRunnerTestHelper.fullOnIntervalSpec) + .resultFormat(resultFormat) + .context(ImmutableMap.of(ScanQuery.CTX_KEY_OUTERMOST, false)) + .build(); + + QueryPlus queryPlus = QueryPlus.wrap(query); + ScanQueryLimitRowIterator itr = new ScanQueryLimitRowIterator( + ((queryInput, responseContext) -> Sequences.simple(singleEventScanResultValues)), + queryPlus, + ImmutableMap.of() + ); + + int count = 0; + int expectedNumRows = Math.min(limit, NUM_ELEMENTS); + while (itr.hasNext()) { + ScanResultValue curr = itr.next(); + List> events = ScanQueryTestHelper.getEventsListResultFormat(curr); + Assert.assertEquals(1, events.size()); + count += events.size(); + } + Assert.assertEquals(expectedNumRows, count); + } +} diff --git a/processing/src/test/java/org/apache/druid/query/scan/ScanQueryRunnerFactoryTest.java b/processing/src/test/java/org/apache/druid/query/scan/ScanQueryRunnerFactoryTest.java new file mode 100644 index 000000000000..a7cf3c60e3fd --- /dev/null +++ b/processing/src/test/java/org/apache/druid/query/scan/ScanQueryRunnerFactoryTest.java @@ -0,0 +1,258 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.scan; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import org.apache.druid.java.util.common.DateTimes; +import org.apache.druid.java.util.common.UOE; +import org.apache.druid.java.util.common.guava.Sequence; +import org.apache.druid.java.util.common.guava.Sequences; +import org.apache.druid.query.DefaultGenericQueryMetricsFactory; +import org.apache.druid.query.Druids; +import org.apache.druid.query.QueryPlus; +import org.apache.druid.query.QueryRunner; +import org.apache.druid.query.QueryRunnerTestHelper; +import org.apache.druid.query.SegmentDescriptor; +import org.joda.time.Interval; +import org.junit.Assert; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; + + +@RunWith(Parameterized.class) +public class ScanQueryRunnerFactoryTest +{ + private int numElements; + private ScanQuery query; + private ScanQuery.ResultFormat resultFormat; + + private static final ScanQueryRunnerFactory factory = new ScanQueryRunnerFactory( + new ScanQueryQueryToolChest( + new ScanQueryConfig(), + DefaultGenericQueryMetricsFactory.instance() + ), + new ScanQueryEngine(), + new ScanQueryConfig() + ); + + public ScanQueryRunnerFactoryTest( + final int numElements, + final int batchSize, + final long limit, + final ScanQuery.ResultFormat resultFormat, + final ScanQuery.Order order + ) + { + this.numElements = numElements; + this.query = Druids.newScanQueryBuilder() + .batchSize(batchSize) + .limit(limit) + .order(order) + .intervals(QueryRunnerTestHelper.fullOnIntervalSpec) + .dataSource("some datasource") + .resultFormat(resultFormat) + .build(); + this.resultFormat = resultFormat; + } + + @Parameterized.Parameters(name = "{0} {1} {2} {3} {4}") + public static Iterable constructorFeeder() + { + List numsElements = ImmutableList.of(0, 10, 100); + List batchSizes = ImmutableList.of(1, 100); + List limits = ImmutableList.of(3L, 1000L, Long.MAX_VALUE); + List resultFormats = ImmutableList.of( + ScanQuery.ResultFormat.RESULT_FORMAT_LIST, + ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST + ); + List order = ImmutableList.of( + ScanQuery.Order.ASCENDING, + ScanQuery.Order.DESCENDING + ); + + return QueryRunnerTestHelper.cartesian( + numsElements, + batchSizes, + limits, + resultFormats, + order + ); + } + + @Test + public void testSortAndLimitScanResultValues() + { + List srvs = new ArrayList<>(numElements); + List expectedEventTimestamps = new ArrayList<>(); + for (int i = 0; i < numElements; i++) { + long timestamp = DateTimes.of("2015-01-01").plusHours(i).getMillis(); + expectedEventTimestamps.add(timestamp); + srvs.add(ScanQueryTestHelper.generateScanResultValue(timestamp, resultFormat, 1)); + } + expectedEventTimestamps.sort((o1, o2) -> { + int retVal = 0; + if (o1 > o2) { + retVal = 1; + } else if (o1 < o2) { + retVal = -1; + } + if (query.getOrder().equals(ScanQuery.Order.DESCENDING)) { + return retVal * -1; + } + return retVal; + }); + Sequence inputSequence = Sequences.simple(srvs); + try { + List output = factory.priorityQueueSortAndLimit( + inputSequence, + query, + ImmutableList.of(new SegmentDescriptor(new Interval( + DateTimes.of("2010-01-01"), + DateTimes.of("2019-01-01").plusHours(1) + ), "1", 0)) + ).toList(); + if (query.getLimit() > Integer.MAX_VALUE) { + Assert.fail("Unsupported exception should have been thrown due to high limit"); + } + validateSortedOutput(output, expectedEventTimestamps); + } + catch (UOE e) { + if (query.getLimit() <= Integer.MAX_VALUE) { + Assert.fail("Unsupported operation exception should not have been thrown here"); + } + } + } + + @Test + public void testNWayMerge() + { + List expectedEventTimestamps = new ArrayList<>(numElements * 3); + + List scanResultValues1 = new ArrayList<>(numElements); + for (int i = 0; i < numElements; i++) { + long timestamp = DateTimes.of("2015-01-01").plusMinutes(i * 2).getMillis(); + expectedEventTimestamps.add(timestamp); + scanResultValues1.add(ScanQueryTestHelper.generateScanResultValue(timestamp, resultFormat, 1)); + } + + List scanResultValues2 = new ArrayList<>(numElements); + for (int i = 0; i < numElements; i++) { + long timestamp = DateTimes.of("2015-01-01").plusMinutes(i * 2 + 1).getMillis(); + expectedEventTimestamps.add(timestamp); + scanResultValues2.add(ScanQueryTestHelper.generateScanResultValue(timestamp, resultFormat, 1)); + } + + List scanResultValues3 = new ArrayList<>(numElements); + for (int i = 0; i < numElements; i++) { + long timestamp = DateTimes.of("2015-01-02").plusMinutes(i).getMillis(); + expectedEventTimestamps.add(timestamp); + scanResultValues3.add(ScanQueryTestHelper.generateScanResultValue(timestamp, resultFormat, 1)); + } + + if (query.getOrder() == ScanQuery.Order.DESCENDING) { + Collections.reverse(scanResultValues1); + Collections.reverse(scanResultValues2); + Collections.reverse(scanResultValues3); + } + + QueryRunner runnerSegment1Partition1 = + (queryPlus, responseContext) -> Sequences.simple(scanResultValues1); + + QueryRunner runnerSegment1Partition2 = + (queryPlus, responseContext) -> Sequences.simple(scanResultValues2); + + + QueryRunner runnerSegment2Partition1 = + (queryPlus, responseContext) -> Sequences.simple(scanResultValues3); + + QueryRunner runnerSegment2Partition2 = + (queryPlus, responseContext) -> Sequences.empty(); + + List>> groupedRunners = new ArrayList<>(2); + + if (query.getOrder() == ScanQuery.Order.DESCENDING) { + groupedRunners.add(Arrays.asList(runnerSegment2Partition1, runnerSegment2Partition2)); + groupedRunners.add(Arrays.asList(runnerSegment1Partition1, runnerSegment1Partition2)); + } else { + groupedRunners.add(Arrays.asList(runnerSegment1Partition1, runnerSegment1Partition2)); + groupedRunners.add(Arrays.asList(runnerSegment2Partition1, runnerSegment2Partition2)); + } + + expectedEventTimestamps.sort((o1, o2) -> { + int retVal = 0; + if (o1 > o2) { + retVal = 1; + } else if (o1 < o2) { + retVal = -1; + } + if (query.getOrder().equals(ScanQuery.Order.DESCENDING)) { + return retVal * -1; + } + return retVal; + }); + + List output = + factory.nWayMergeAndLimit( + groupedRunners, + QueryPlus.wrap(query), + ImmutableMap.of() + ).toList(); + + validateSortedOutput(output, expectedEventTimestamps); + } + + private void validateSortedOutput(List output, List expectedEventTimestamps) + { + // check each scan result value has one event + for (ScanResultValue srv : output) { + if (resultFormat.equals(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)) { + Assert.assertTrue(ScanQueryTestHelper.getEventsCompactedListResultFormat(srv).size() == 1); + } else if (resultFormat.equals(ScanQuery.ResultFormat.RESULT_FORMAT_LIST)) { + Assert.assertTrue(ScanQueryTestHelper.getEventsListResultFormat(srv).size() == 1); + } + } + + // check total # of rows <= limit + Assert.assertTrue(output.size() <= query.getLimit()); + + // check ordering is correct + for (int i = 1; i < output.size(); i++) { + if (query.getOrder().equals(ScanQuery.Order.DESCENDING)) { + Assert.assertTrue(output.get(i).getFirstEventTimestamp(resultFormat) < + output.get(i - 1).getFirstEventTimestamp(resultFormat)); + } else { + Assert.assertTrue(output.get(i).getFirstEventTimestamp(resultFormat) > + output.get(i - 1).getFirstEventTimestamp(resultFormat)); + } + } + + // check the values are correct + for (int i = 0; i < query.getLimit() && i < output.size(); i++) { + Assert.assertEquals((long) expectedEventTimestamps.get(i), output.get(i).getFirstEventTimestamp(resultFormat)); + } + } +} diff --git a/processing/src/test/java/org/apache/druid/query/scan/ScanQueryRunnerTest.java b/processing/src/test/java/org/apache/druid/query/scan/ScanQueryRunnerTest.java index ec18bdbcdc68..b3a0d0069ac1 100644 --- a/processing/src/test/java/org/apache/druid/query/scan/ScanQueryRunnerTest.java +++ b/processing/src/test/java/org/apache/druid/query/scan/ScanQueryRunnerTest.java @@ -21,11 +21,13 @@ import com.google.common.base.Function; import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; import com.google.common.collect.Iterables; import com.google.common.collect.Lists; import com.google.common.collect.ObjectArrays; import com.google.common.collect.Sets; import com.google.common.hash.Hashing; +import org.apache.commons.lang.ArrayUtils; import org.apache.druid.hll.HyperLogLogCollector; import org.apache.druid.java.util.common.DateTimes; import org.apache.druid.java.util.common.ISE; @@ -121,7 +123,8 @@ public static Iterable constructorFeeder() QueryRunnerTestHelper.makeQueryRunners( new ScanQueryRunnerFactory( toolChest, - new ScanQueryEngine() + new ScanQueryEngine(), + new ScanQueryConfig() ) ), ImmutableList.of(false, true) @@ -215,7 +218,7 @@ public void testFullOnSelectAsCompactedList() ScanQuery query = newTestQuery() .intervals(I_0112_0114) .virtualColumns(EXPR_COLUMN) - .resultFormat(ScanQuery.RESULT_FORMAT_COMPACTED_LIST) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) .build(); HashMap context = new HashMap(); @@ -235,7 +238,11 @@ public void testSelectWithUnderscoreUnderscoreTime() { ScanQuery query = newTestQuery() .intervals(I_0112_0114) - .columns(ColumnHolder.TIME_COLUMN_NAME, QueryRunnerTestHelper.marketDimension, QueryRunnerTestHelper.indexMetric) + .columns( + ColumnHolder.TIME_COLUMN_NAME, + QueryRunnerTestHelper.marketDimension, + QueryRunnerTestHelper.indexMetric + ) .build(); HashMap context = new HashMap(); @@ -317,7 +324,7 @@ public void testSelectWithDimsAndMetsAsCompactedList() ScanQuery query = newTestQuery() .intervals(I_0112_0114) .columns(QueryRunnerTestHelper.marketDimension, QueryRunnerTestHelper.indexMetric) - .resultFormat(ScanQuery.RESULT_FORMAT_COMPACTED_LIST) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) .build(); HashMap context = new HashMap(); @@ -509,6 +516,251 @@ public void testFullSelectNoDimensionAndMetric() verify(expectedResults, results); } + @Test + public void testFullOnSelectWithFilterLimitAndAscendingTimeOrderingListFormat() + { + // limits shouldn't matter -> all rows should be returned if time-ordering on the broker is occurring + for (int limit : new int[]{3, 1, 5, 7, 0}) { + ScanQuery query = newTestQuery() + .intervals(I_0112_0114) + .filters(new SelectorDimFilter(QueryRunnerTestHelper.marketDimension, "spot", null)) + .columns(QueryRunnerTestHelper.qualityDimension, QueryRunnerTestHelper.indexMetric) + .limit(limit) + .order(ScanQuery.Order.ASCENDING) + .context(ImmutableMap.of(ScanQuery.CTX_KEY_OUTERMOST, false)) + .build(); + + HashMap context = new HashMap<>(); + Iterable results = runner.run(QueryPlus.wrap(query), context).toList(); + String[] seg1Results = new String[]{ + "2011-01-12T00:00:00.000Z\tspot\tautomotive\tpreferred\tapreferred\t100.000000", + "2011-01-12T00:00:00.000Z\tspot\tbusiness\tpreferred\tbpreferred\t100.000000", + "2011-01-12T00:00:00.000Z\tspot\tentertainment\tpreferred\tepreferred\t100.000000", + "2011-01-12T00:00:00.000Z\tspot\thealth\tpreferred\thpreferred\t100.000000", + "2011-01-12T00:00:00.000Z\tspot\tmezzanine\tpreferred\tmpreferred\t100.000000", + "2011-01-12T00:00:00.000Z\tspot\tnews\tpreferred\tnpreferred\t100.000000", + "2011-01-12T00:00:00.000Z\tspot\tpremium\tpreferred\tppreferred\t100.000000", + "2011-01-12T00:00:00.000Z\tspot\ttechnology\tpreferred\ttpreferred\t100.000000", + "2011-01-12T00:00:00.000Z\tspot\ttravel\tpreferred\ttpreferred\t100.000000" + }; + String[] seg2Results = new String[]{ + "2011-01-13T00:00:00.000Z\tspot\tautomotive\tpreferred\tapreferred\t94.874713", + "2011-01-13T00:00:00.000Z\tspot\tbusiness\tpreferred\tbpreferred\t103.629399", + "2011-01-13T00:00:00.000Z\tspot\tentertainment\tpreferred\tepreferred\t110.087299", + "2011-01-13T00:00:00.000Z\tspot\thealth\tpreferred\thpreferred\t114.947403", + "2011-01-13T00:00:00.000Z\tspot\tmezzanine\tpreferred\tmpreferred\t104.465767", + "2011-01-13T00:00:00.000Z\tspot\tnews\tpreferred\tnpreferred\t102.851683", + "2011-01-13T00:00:00.000Z\tspot\tpremium\tpreferred\tppreferred\t108.863011", + "2011-01-13T00:00:00.000Z\tspot\ttechnology\tpreferred\ttpreferred\t111.356672", + "2011-01-13T00:00:00.000Z\tspot\ttravel\tpreferred\ttpreferred\t106.236928" + }; + final List>> ascendingEvents = toEvents( + new String[]{ + legacy ? getTimestampName() + ":TIME" : null, + null, + QueryRunnerTestHelper.qualityDimension + ":STRING", + null, + null, + QueryRunnerTestHelper.indexMetric + ":DOUBLE" + }, + (String[]) ArrayUtils.addAll(seg1Results, seg2Results) + ); + List ascendingExpectedResults = toExpected( + ascendingEvents, + legacy ? Lists.newArrayList(getTimestampName(), "quality", "index") : Lists.newArrayList("quality", "index"), + 0, + limit + ); + verify(ascendingExpectedResults, results); + } + } + + @Test + public void testFullOnSelectWithFilterLimitAndDescendingTimeOrderingListFormat() + { + // limits shouldn't matter -> all rows should be returned if time-ordering on the broker is occurring + for (int limit : new int[]{3, 1, 5, 7, 0}) { + ScanQuery query = newTestQuery() + .intervals(I_0112_0114) + .filters(new SelectorDimFilter(QueryRunnerTestHelper.marketDimension, "spot", null)) + .columns(QueryRunnerTestHelper.qualityDimension, QueryRunnerTestHelper.indexMetric) + .limit(limit) + .order(ScanQuery.Order.DESCENDING) + .build(); + + HashMap context = new HashMap<>(); + Iterable results = runner.run(QueryPlus.wrap(query), context).toList(); + String[] seg1Results = new String[]{ + "2011-01-12T00:00:00.000Z\tspot\tautomotive\tpreferred\tapreferred\t100.000000", + "2011-01-12T00:00:00.000Z\tspot\tbusiness\tpreferred\tbpreferred\t100.000000", + "2011-01-12T00:00:00.000Z\tspot\tentertainment\tpreferred\tepreferred\t100.000000", + "2011-01-12T00:00:00.000Z\tspot\thealth\tpreferred\thpreferred\t100.000000", + "2011-01-12T00:00:00.000Z\tspot\tmezzanine\tpreferred\tmpreferred\t100.000000", + "2011-01-12T00:00:00.000Z\tspot\tnews\tpreferred\tnpreferred\t100.000000", + "2011-01-12T00:00:00.000Z\tspot\tpremium\tpreferred\tppreferred\t100.000000", + "2011-01-12T00:00:00.000Z\tspot\ttechnology\tpreferred\ttpreferred\t100.000000", + "2011-01-12T00:00:00.000Z\tspot\ttravel\tpreferred\ttpreferred\t100.000000" + }; + String[] seg2Results = new String[]{ + "2011-01-13T00:00:00.000Z\tspot\tautomotive\tpreferred\tapreferred\t94.874713", + "2011-01-13T00:00:00.000Z\tspot\tbusiness\tpreferred\tbpreferred\t103.629399", + "2011-01-13T00:00:00.000Z\tspot\tentertainment\tpreferred\tepreferred\t110.087299", + "2011-01-13T00:00:00.000Z\tspot\thealth\tpreferred\thpreferred\t114.947403", + "2011-01-13T00:00:00.000Z\tspot\tmezzanine\tpreferred\tmpreferred\t104.465767", + "2011-01-13T00:00:00.000Z\tspot\tnews\tpreferred\tnpreferred\t102.851683", + "2011-01-13T00:00:00.000Z\tspot\tpremium\tpreferred\tppreferred\t108.863011", + "2011-01-13T00:00:00.000Z\tspot\ttechnology\tpreferred\ttpreferred\t111.356672", + "2011-01-13T00:00:00.000Z\tspot\ttravel\tpreferred\ttpreferred\t106.236928" + }; + String[] expectedRet = (String[]) ArrayUtils.addAll(seg1Results, seg2Results); + ArrayUtils.reverse(expectedRet); + final List>> descendingEvents = toEvents( + new String[]{ + legacy ? getTimestampName() + ":TIME" : null, + null, + QueryRunnerTestHelper.qualityDimension + ":STRING", + null, + null, + QueryRunnerTestHelper.indexMetric + ":DOUBLE" + }, + expectedRet + ); + List descendingExpectedResults = toExpected( + descendingEvents, + legacy ? Lists.newArrayList(getTimestampName(), "quality", "index") : Lists.newArrayList("quality", "index"), + 0, + limit + ); + verify(descendingExpectedResults, results); + } + } + + @Test + public void testFullOnSelectWithFilterLimitAndAscendingTimeOrderingCompactedListFormat() + { + String[] seg1Results = new String[]{ + "2011-01-12T00:00:00.000Z\tspot\tautomotive\tpreferred\tapreferred\t100.000000", + "2011-01-12T00:00:00.000Z\tspot\tbusiness\tpreferred\tbpreferred\t100.000000", + "2011-01-12T00:00:00.000Z\tspot\tentertainment\tpreferred\tepreferred\t100.000000", + "2011-01-12T00:00:00.000Z\tspot\thealth\tpreferred\thpreferred\t100.000000", + "2011-01-12T00:00:00.000Z\tspot\tmezzanine\tpreferred\tmpreferred\t100.000000", + "2011-01-12T00:00:00.000Z\tspot\tnews\tpreferred\tnpreferred\t100.000000", + "2011-01-12T00:00:00.000Z\tspot\tpremium\tpreferred\tppreferred\t100.000000", + "2011-01-12T00:00:00.000Z\tspot\ttechnology\tpreferred\ttpreferred\t100.000000", + "2011-01-12T00:00:00.000Z\tspot\ttravel\tpreferred\ttpreferred\t100.000000" + }; + String[] seg2Results = new String[]{ + "2011-01-13T00:00:00.000Z\tspot\tautomotive\tpreferred\tapreferred\t94.874713", + "2011-01-13T00:00:00.000Z\tspot\tbusiness\tpreferred\tbpreferred\t103.629399", + "2011-01-13T00:00:00.000Z\tspot\tentertainment\tpreferred\tepreferred\t110.087299", + "2011-01-13T00:00:00.000Z\tspot\thealth\tpreferred\thpreferred\t114.947403", + "2011-01-13T00:00:00.000Z\tspot\tmezzanine\tpreferred\tmpreferred\t104.465767", + "2011-01-13T00:00:00.000Z\tspot\tnews\tpreferred\tnpreferred\t102.851683", + "2011-01-13T00:00:00.000Z\tspot\tpremium\tpreferred\tppreferred\t108.863011", + "2011-01-13T00:00:00.000Z\tspot\ttechnology\tpreferred\ttpreferred\t111.356672", + "2011-01-13T00:00:00.000Z\tspot\ttravel\tpreferred\ttpreferred\t106.236928" + }; + // limits shouldn't matter -> all rows should be returned if time-ordering on the broker is occurring + for (int limit : new int[]{3, 0}) { + /* Ascending */ + ScanQuery query = newTestQuery() + .intervals(I_0112_0114) + .filters(new SelectorDimFilter(QueryRunnerTestHelper.marketDimension, "spot", null)) + .columns(QueryRunnerTestHelper.qualityDimension, QueryRunnerTestHelper.indexMetric) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .order(ScanQuery.Order.ASCENDING) + .limit(limit) + .build(); + + HashMap context = new HashMap<>(); + Iterable results = runner.run(QueryPlus.wrap(query), context).toList(); + final List>> ascendingEvents = toEvents( + new String[]{ + legacy ? getTimestampName() + ":TIME" : null, + null, + QueryRunnerTestHelper.qualityDimension + ":STRING", + null, + null, + QueryRunnerTestHelper.indexMetric + ":DOUBLE" + }, + (String[]) ArrayUtils.addAll(seg1Results, seg2Results) + ); + List ascendingExpectedResults = toExpected( + ascendingEvents, + legacy ? Lists.newArrayList(getTimestampName(), "quality", "index") : Lists.newArrayList("quality", "index"), + 0, + limit + ); + results = compactedListToRow(results); + verify(ascendingExpectedResults, results); + } + } + + @Test + public void testFullOnSelectWithFilterLimitAndDescendingTimeOrderingCompactedListFormat() + { + String[] seg1Results = new String[]{ + "2011-01-12T00:00:00.000Z\tspot\tautomotive\tpreferred\tapreferred\t100.000000", + "2011-01-12T00:00:00.000Z\tspot\tbusiness\tpreferred\tbpreferred\t100.000000", + "2011-01-12T00:00:00.000Z\tspot\tentertainment\tpreferred\tepreferred\t100.000000", + "2011-01-12T00:00:00.000Z\tspot\thealth\tpreferred\thpreferred\t100.000000", + "2011-01-12T00:00:00.000Z\tspot\tmezzanine\tpreferred\tmpreferred\t100.000000", + "2011-01-12T00:00:00.000Z\tspot\tnews\tpreferred\tnpreferred\t100.000000", + "2011-01-12T00:00:00.000Z\tspot\tpremium\tpreferred\tppreferred\t100.000000", + "2011-01-12T00:00:00.000Z\tspot\ttechnology\tpreferred\ttpreferred\t100.000000", + "2011-01-12T00:00:00.000Z\tspot\ttravel\tpreferred\ttpreferred\t100.000000" + }; + String[] seg2Results = new String[]{ + "2011-01-13T00:00:00.000Z\tspot\tautomotive\tpreferred\tapreferred\t94.874713", + "2011-01-13T00:00:00.000Z\tspot\tbusiness\tpreferred\tbpreferred\t103.629399", + "2011-01-13T00:00:00.000Z\tspot\tentertainment\tpreferred\tepreferred\t110.087299", + "2011-01-13T00:00:00.000Z\tspot\thealth\tpreferred\thpreferred\t114.947403", + "2011-01-13T00:00:00.000Z\tspot\tmezzanine\tpreferred\tmpreferred\t104.465767", + "2011-01-13T00:00:00.000Z\tspot\tnews\tpreferred\tnpreferred\t102.851683", + "2011-01-13T00:00:00.000Z\tspot\tpremium\tpreferred\tppreferred\t108.863011", + "2011-01-13T00:00:00.000Z\tspot\ttechnology\tpreferred\ttpreferred\t111.356672", + "2011-01-13T00:00:00.000Z\tspot\ttravel\tpreferred\ttpreferred\t106.236928" + }; + // limits shouldn't matter -> all rows should be returned if time-ordering on the broker is occurring + for (int limit : new int[]{3, 1}) { + /* Descending */ + ScanQuery query = newTestQuery() + .intervals(I_0112_0114) + .filters(new SelectorDimFilter(QueryRunnerTestHelper.marketDimension, "spot", null)) + .columns(QueryRunnerTestHelper.qualityDimension, QueryRunnerTestHelper.indexMetric) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .order(ScanQuery.Order.DESCENDING) + .context(ImmutableMap.of(ScanQuery.CTX_KEY_OUTERMOST, false)) + .limit(limit) + .build(); + + HashMap context = new HashMap<>(); + Iterable results = runner.run(QueryPlus.wrap(query), context).toList(); + String[] expectedRet = (String[]) ArrayUtils.addAll(seg1Results, seg2Results); + ArrayUtils.reverse(expectedRet); + final List>> descendingEvents = toEvents( + new String[]{ + legacy ? getTimestampName() + ":TIME" : null, + null, + QueryRunnerTestHelper.qualityDimension + ":STRING", + null, + null, + QueryRunnerTestHelper.indexMetric + ":DOUBLE" + }, + expectedRet //segments in reverse order from above + ); + List descendingExpectedResults = toExpected( + descendingEvents, + legacy ? Lists.newArrayList(getTimestampName(), "quality", "index") : Lists.newArrayList("quality", "index"), + 0, + limit + ); + results = compactedListToRow(results); + verify(descendingExpectedResults, results); + } + } + + private List>> toFullEvents(final String[]... valueSet) { return toEvents( @@ -687,7 +939,12 @@ public static void verify( Object exValue = ex.getValue(); if (exValue instanceof Double || exValue instanceof Float) { final double expectedDoubleValue = ((Number) exValue).doubleValue(); - Assert.assertEquals("invalid value for " + ex.getKey(), expectedDoubleValue, ((Number) actVal).doubleValue(), expectedDoubleValue * 1e-6); + Assert.assertEquals( + "invalid value for " + ex.getKey(), + expectedDoubleValue, + ((Number) actVal).doubleValue(), + expectedDoubleValue * 1e-6 + ); } else { Assert.assertEquals("invalid value for " + ex.getKey(), ex.getValue(), actVal); } diff --git a/processing/src/test/java/org/apache/druid/query/scan/ScanQuerySpecTest.java b/processing/src/test/java/org/apache/druid/query/scan/ScanQuerySpecTest.java index ec5069546a4d..b49a1033f2e8 100644 --- a/processing/src/test/java/org/apache/druid/query/scan/ScanQuerySpecTest.java +++ b/processing/src/test/java/org/apache/druid/query/scan/ScanQuerySpecTest.java @@ -53,6 +53,7 @@ public void testSerializationLegacyString() throws Exception + "\"resultFormat\":\"list\"," + "\"batchSize\":20480," + "\"limit\":3," + + "\"order\":\"none\"," + "\"filter\":null," + "\"columns\":[\"market\",\"quality\",\"index\"]," + "\"legacy\":null," @@ -64,9 +65,10 @@ public void testSerializationLegacyString() throws Exception new TableDataSource(QueryRunnerTestHelper.dataSource), new LegacySegmentSpec(Intervals.of("2011-01-12/2011-01-14")), VirtualColumns.EMPTY, - null, + ScanQuery.ResultFormat.RESULT_FORMAT_LIST, 0, 3, + ScanQuery.Order.NONE, null, Arrays.asList("market", "quality", "index"), null, diff --git a/processing/src/test/java/org/apache/druid/query/scan/ScanQueryTestHelper.java b/processing/src/test/java/org/apache/druid/query/scan/ScanQueryTestHelper.java new file mode 100644 index 000000000000..90c3bda79edf --- /dev/null +++ b/processing/src/test/java/org/apache/druid/query/scan/ScanQueryTestHelper.java @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.scan; + +import org.apache.druid.java.util.common.UOE; +import org.apache.druid.segment.column.ColumnHolder; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +public class ScanQueryTestHelper +{ + public static ScanResultValue generateScanResultValue( + long timestamp, + ScanQuery.ResultFormat resultFormat, + int batchSize + ) + { + String segmentId = "some_segment_id"; + List columns = new ArrayList<>(Arrays.asList(ColumnHolder.TIME_COLUMN_NAME, "name", "count")); + List events = new ArrayList<>(); + for (int i = 0; i < batchSize; i++) { + Object event; + if (resultFormat.equals(ScanQuery.ResultFormat.RESULT_FORMAT_LIST)) { + Map eventMap = new HashMap<>(); + eventMap.put(ColumnHolder.TIME_COLUMN_NAME, timestamp); + eventMap.put("name", "Feridun"); + eventMap.put("count", i); + event = eventMap; + } else if (resultFormat.equals(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)) { + event = new ArrayList<>(Arrays.asList( + timestamp, + "Feridun", + i + )); + } else { + throw new UOE("Result format [%s] not supported yet", resultFormat.toString()); + } + events.add(event); + } + return new ScanResultValue(segmentId, columns, events); + } + + public static List> getEventsListResultFormat(ScanResultValue scanResultValue) + { + return (List>) scanResultValue.getEvents(); + } + + public static List> getEventsCompactedListResultFormat(ScanResultValue scanResultValue) + { + return (List>) scanResultValue.getEvents(); + } + +} diff --git a/processing/src/test/java/org/apache/druid/query/scan/ScanResultValueTest.java b/processing/src/test/java/org/apache/druid/query/scan/ScanResultValueTest.java new file mode 100644 index 000000000000..47f82ad8ce59 --- /dev/null +++ b/processing/src/test/java/org/apache/druid/query/scan/ScanResultValueTest.java @@ -0,0 +1,122 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.scan; + +import com.fasterxml.jackson.databind.ObjectMapper; +import org.apache.druid.jackson.DefaultObjectMapper; +import org.apache.druid.segment.column.ColumnHolder; +import org.junit.Assert; +import org.junit.BeforeClass; +import org.junit.Test; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +public class ScanResultValueTest +{ + private static final ObjectMapper JSON_MAPPER = new DefaultObjectMapper(); + private static final long TIME_1 = 1234567890000L; + private static final long TIME_2 = 9876543210000L; + + private static ScanResultValue compactedListSRV; + private static ScanResultValue listSRV; + + @BeforeClass + public static void setup() + { + String segmentId = "some_segment_id"; + List columns = new ArrayList<>(Arrays.asList(ColumnHolder.TIME_COLUMN_NAME, "name", "count")); + List event = new ArrayList<>(Arrays.asList( + TIME_1, + "Feridun", + 4 + )); + List event2 = new ArrayList<>(Arrays.asList( + TIME_2, + "Justin", + 6 + )); + + List> events = Arrays.asList(event, event2); + compactedListSRV = new ScanResultValue(segmentId, columns, events); + + Map eventMap1 = new HashMap<>(); + eventMap1.put(ColumnHolder.TIME_COLUMN_NAME, TIME_1); + eventMap1.put("name", "Feridun"); + eventMap1.put("count", 4); + Map eventMap2 = new HashMap<>(); + eventMap2.put(ColumnHolder.TIME_COLUMN_NAME, TIME_2); + eventMap2.put("name", "Justin"); + eventMap2.put("count", 6); + List> eventMaps = Arrays.asList(eventMap1, eventMap2); + listSRV = new ScanResultValue(segmentId, columns, eventMaps); + } + + @Test + public void testSerdeScanResultValueCompactedList() throws IOException + { + + String serialized = JSON_MAPPER.writeValueAsString(compactedListSRV); + ScanResultValue deserialized = JSON_MAPPER.readValue(serialized, ScanResultValue.class); + Assert.assertEquals(compactedListSRV, deserialized); + } + + @Test + public void testSerdeScanResultValueNonCompactedList() throws IOException + { + + String serialized = JSON_MAPPER.writeValueAsString(listSRV); + ScanResultValue deserialized = JSON_MAPPER.readValue(serialized, ScanResultValue.class); + Assert.assertEquals(listSRV, deserialized); + } + + @Test + public void testGetFirstEventTimestampCompactedList() + { + long timestamp = compactedListSRV.getFirstEventTimestamp(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST); + Assert.assertEquals(TIME_1, timestamp); + } + + @Test + public void testGetFirstEventTimestampNonCompactedList() + { + long timestamp = listSRV.getFirstEventTimestamp(ScanQuery.ResultFormat.RESULT_FORMAT_LIST); + Assert.assertEquals(TIME_1, timestamp); + } + + @Test + public void testToSingleEventScanResultValues() + { + List compactedListScanResultValues = compactedListSRV.toSingleEventScanResultValues(); + for (ScanResultValue srv : compactedListScanResultValues) { + List events = (List) srv.getEvents(); + Assert.assertEquals(1, events.size()); + } + List listScanResultValues = listSRV.toSingleEventScanResultValues(); + for (ScanResultValue srv : compactedListScanResultValues) { + List events = (List) srv.getEvents(); + Assert.assertEquals(1, events.size()); + } + } +} diff --git a/processing/src/test/java/org/apache/druid/query/scan/ScanResultValueTimestampComparatorTest.java b/processing/src/test/java/org/apache/druid/query/scan/ScanResultValueTimestampComparatorTest.java new file mode 100644 index 000000000000..70f2e080b9f2 --- /dev/null +++ b/processing/src/test/java/org/apache/druid/query/scan/ScanResultValueTimestampComparatorTest.java @@ -0,0 +1,194 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.scan; + +import org.apache.druid.java.util.common.DateTimes; +import org.apache.druid.query.Druids; +import org.apache.druid.query.spec.MultipleIntervalSegmentSpec; +import org.apache.druid.query.spec.QuerySegmentSpec; +import org.apache.druid.segment.column.ColumnHolder; +import org.joda.time.Interval; +import org.junit.Assert; +import org.junit.BeforeClass; +import org.junit.Test; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; + +public class ScanResultValueTimestampComparatorTest +{ + private static QuerySegmentSpec intervalSpec; + + @BeforeClass + public static void setup() + { + intervalSpec = new MultipleIntervalSegmentSpec( + Collections.singletonList( + new Interval(DateTimes.of("2012-01-01"), DateTimes.of("2012-01-01").plusHours(1)) + ) + ); + } + + @Test + public void comparisonDescendingListTest() + { + ScanQuery query = Druids.newScanQueryBuilder() + .order(ScanQuery.Order.DESCENDING) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_LIST) + .dataSource("some src") + .intervals(intervalSpec) + .build(); + + ScanResultValueTimestampComparator comparator = new ScanResultValueTimestampComparator(query); + + ArrayList> events1 = new ArrayList<>(); + HashMap event1 = new HashMap<>(); + event1.put(ColumnHolder.TIME_COLUMN_NAME, new Long(42)); + events1.add(event1); + + ScanResultValue s1 = new ScanResultValue( + "segmentId", + Collections.singletonList(ColumnHolder.TIME_COLUMN_NAME), + events1 + ); + + ArrayList> events2 = new ArrayList<>(); + HashMap event2 = new HashMap<>(); + event2.put(ColumnHolder.TIME_COLUMN_NAME, new Long(43)); + events2.add(event2); + + ScanResultValue s2 = new ScanResultValue( + "segmentId", + Collections.singletonList(ColumnHolder.TIME_COLUMN_NAME), + events2 + ); + + Assert.assertEquals(-1, comparator.compare(s1, s2)); + } + + @Test + public void comparisonAscendingListTest() + { + ScanQuery query = Druids.newScanQueryBuilder() + .order(ScanQuery.Order.ASCENDING) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_LIST) + .dataSource("some src") + .intervals(intervalSpec) + .build(); + + ScanResultValueTimestampComparator comparator = new ScanResultValueTimestampComparator(query); + + ArrayList> events1 = new ArrayList<>(); + HashMap event1 = new HashMap<>(); + event1.put(ColumnHolder.TIME_COLUMN_NAME, new Long(42)); + events1.add(event1); + + ScanResultValue s1 = new ScanResultValue( + "segmentId", + Collections.singletonList(ColumnHolder.TIME_COLUMN_NAME), + events1 + ); + + ArrayList> events2 = new ArrayList<>(); + HashMap event2 = new HashMap<>(); + event2.put(ColumnHolder.TIME_COLUMN_NAME, new Long(43)); + events2.add(event2); + + ScanResultValue s2 = new ScanResultValue( + "segmentId", + Collections.singletonList(ColumnHolder.TIME_COLUMN_NAME), + events2 + ); + + Assert.assertEquals(1, comparator.compare(s1, s2)); + } + + @Test + public void comparisonDescendingCompactedListTest() + { + ScanQuery query = Druids.newScanQueryBuilder() + .order(ScanQuery.Order.DESCENDING) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .dataSource("some src") + .intervals(intervalSpec) + .build(); + + ScanResultValueTimestampComparator comparator = new ScanResultValueTimestampComparator(query); + + List> events1 = new ArrayList<>(); + List event1 = Collections.singletonList(new Long(42)); + events1.add(event1); + + ScanResultValue s1 = new ScanResultValue( + "segmentId", + Collections.singletonList(ColumnHolder.TIME_COLUMN_NAME), + events1 + ); + + List> events2 = new ArrayList<>(); + List event2 = Collections.singletonList(new Long(43)); + events2.add(event2); + + ScanResultValue s2 = new ScanResultValue( + "segmentId", + Collections.singletonList(ColumnHolder.TIME_COLUMN_NAME), + events2 + ); + + Assert.assertEquals(-1, comparator.compare(s1, s2)); + } + + @Test + public void comparisonAscendingCompactedListTest() + { + ScanQuery query = Druids.newScanQueryBuilder() + .order(ScanQuery.Order.ASCENDING) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .dataSource("some src") + .intervals(intervalSpec) + .build(); + + ScanResultValueTimestampComparator comparator = new ScanResultValueTimestampComparator(query); + + List> events1 = new ArrayList<>(); + List event1 = Collections.singletonList(new Long(42)); + events1.add(event1); + + ScanResultValue s1 = new ScanResultValue( + "segmentId", + Collections.singletonList(ColumnHolder.TIME_COLUMN_NAME), + events1 + ); + + List> events2 = new ArrayList<>(); + List event2 = Collections.singletonList(new Long(43)); + events2.add(event2); + + ScanResultValue s2 = new ScanResultValue( + "segmentId", + Collections.singletonList(ColumnHolder.TIME_COLUMN_NAME), + events2 + ); + + Assert.assertEquals(1, comparator.compare(s1, s2)); + } +} diff --git a/server/src/main/java/org/apache/druid/client/CachingClusteredClient.java b/server/src/main/java/org/apache/druid/client/CachingClusteredClient.java index 253aa0f68062..eaa08d656c5a 100644 --- a/server/src/main/java/org/apache/druid/client/CachingClusteredClient.java +++ b/server/src/main/java/org/apache/druid/client/CachingClusteredClient.java @@ -199,7 +199,7 @@ public Sequence run(final QueryPlus queryPlus, final Map r } /** - * This class essentially incapsulates the major part of the logic of {@link CachingClusteredClient}. It's state and + * This class essentially encapsulates the major part of the logic of {@link CachingClusteredClient}. It's state and * methods couldn't belong to {@link CachingClusteredClient} itself, because they depend on the specific query object * being run, but {@link QuerySegmentWalker} API is designed so that implementations should be able to accept * arbitrary queries. diff --git a/server/src/main/java/org/apache/druid/server/coordination/ServerManager.java b/server/src/main/java/org/apache/druid/server/coordination/ServerManager.java index d628ef253d64..98e6f3e814a3 100644 --- a/server/src/main/java/org/apache/druid/server/coordination/ServerManager.java +++ b/server/src/main/java/org/apache/druid/server/coordination/ServerManager.java @@ -197,7 +197,7 @@ public QueryRunner apply(PartitionChunk input) ); return CPUTimeMetricQueryRunner.safeBuild( - new FinalizeResultsQueryRunner( + new FinalizeResultsQueryRunner<>( toolChest.mergeResults(factory.mergeRunners(exec, queryRunners)), toolChest ), diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidQuery.java b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidQuery.java index 832c202864b9..98e4ff340dd8 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidQuery.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidQuery.java @@ -988,9 +988,10 @@ public ScanQuery toScanQuery() dataSource, filtration.getQuerySegmentSpec(), selectProjection != null ? VirtualColumns.create(selectProjection.getVirtualColumns()) : VirtualColumns.EMPTY, - ScanQuery.RESULT_FORMAT_COMPACTED_LIST, + ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST, 0, scanLimit, + null, // Will default to "none" filtration.getDimFilter(), Ordering.natural().sortedCopy(ImmutableSet.copyOf(outputRowSignature.getRowOrder())), false, diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/BaseCalciteQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/BaseCalciteQueryTest.java index 29247b540042..c4db78346f8a 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/BaseCalciteQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/BaseCalciteQueryTest.java @@ -370,7 +370,7 @@ public static ExpressionPostAggregator expressionPostAgg(final String name, fina public static Druids.ScanQueryBuilder newScanQueryBuilder() { - return new Druids.ScanQueryBuilder().resultFormat(ScanQuery.RESULT_FORMAT_COMPACTED_LIST) + return new Druids.ScanQueryBuilder().resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) .legacy(false); } diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java index f18c276266e3..687887cc5b62 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java @@ -111,7 +111,7 @@ public void testSelectConstantExpressionFromTable() throws Exception .intervals(querySegmentSpec(Filtration.eternity())) .virtualColumns(expressionVirtualColumn("v0", "2", ValueType.LONG)) .columns("dim1", "v0") - .resultFormat(ScanQuery.RESULT_FORMAT_COMPACTED_LIST) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) .limit(1) .context(QUERY_CONTEXT_DEFAULT) .build() @@ -431,7 +431,7 @@ public void testSelectStar() throws Exception .dataSource(CalciteTests.DATASOURCE1) .intervals(querySegmentSpec(Filtration.eternity())) .columns("__time", "cnt", "dim1", "dim2", "dim3", "m1", "m2", "unique_dim1") - .resultFormat(ScanQuery.RESULT_FORMAT_COMPACTED_LIST) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) .context(QUERY_CONTEXT_DEFAULT) .build() ), @@ -465,7 +465,7 @@ public void testSelectStarOnForbiddenTable() throws Exception .dataSource(CalciteTests.FORBIDDEN_DATASOURCE) .intervals(querySegmentSpec(Filtration.eternity())) .columns("__time", "cnt", "dim1", "dim2", "m1", "m2", "unique_dim1") - .resultFormat(ScanQuery.RESULT_FORMAT_COMPACTED_LIST) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) .context(QUERY_CONTEXT_DEFAULT) .build() ), @@ -511,7 +511,7 @@ public void testExplainSelectStar() throws Exception ImmutableList.of(), ImmutableList.of( new Object[]{ - "DruidQueryRel(query=[{\"queryType\":\"scan\",\"dataSource\":{\"type\":\"table\",\"name\":\"foo\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"virtualColumns\":[],\"resultFormat\":\"compactedList\",\"batchSize\":20480,\"limit\":9223372036854775807,\"filter\":null,\"columns\":[\"__time\",\"cnt\",\"dim1\",\"dim2\",\"dim3\",\"m1\",\"m2\",\"unique_dim1\"],\"legacy\":false,\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\",\"sqlQueryId\":\"dummy\"},\"descending\":false,\"granularity\":{\"type\":\"all\"}}], signature=[{__time:LONG, cnt:LONG, dim1:STRING, dim2:STRING, dim3:STRING, m1:FLOAT, m2:DOUBLE, unique_dim1:COMPLEX}])\n" + "DruidQueryRel(query=[{\"queryType\":\"scan\",\"dataSource\":{\"type\":\"table\",\"name\":\"foo\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"virtualColumns\":[],\"resultFormat\":\"compactedList\",\"batchSize\":20480,\"limit\":9223372036854775807,\"order\":\"none\",\"filter\":null,\"columns\":[\"__time\",\"cnt\",\"dim1\",\"dim2\",\"dim3\",\"m1\",\"m2\",\"unique_dim1\"],\"legacy\":false,\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\",\"sqlQueryId\":\"dummy\"},\"descending\":false,\"granularity\":{\"type\":\"all\"}}], signature=[{__time:LONG, cnt:LONG, dim1:STRING, dim2:STRING, dim3:STRING, m1:FLOAT, m2:DOUBLE, unique_dim1:COMPLEX}])\n" } ) ); @@ -531,7 +531,7 @@ public void testSelectStarWithLimit() throws Exception .intervals(querySegmentSpec(Filtration.eternity())) .columns("__time", "cnt", "dim1", "dim2", "dim3", "m1", "m2", "unique_dim1") .limit(2) - .resultFormat(ScanQuery.RESULT_FORMAT_COMPACTED_LIST) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) .context(QUERY_CONTEXT_DEFAULT) .build() ), @@ -556,7 +556,7 @@ public void testSelectWithProjection() throws Exception ) .columns("v0") .limit(2) - .resultFormat(ScanQuery.RESULT_FORMAT_COMPACTED_LIST) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) .context(QUERY_CONTEXT_DEFAULT) .build() ), @@ -652,7 +652,7 @@ public void testSelectSingleColumnTwice() throws Exception .intervals(querySegmentSpec(Filtration.eternity())) .columns("dim2") .limit(2) - .resultFormat(ScanQuery.RESULT_FORMAT_COMPACTED_LIST) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) .context(QUERY_CONTEXT_DEFAULT) .build() ), @@ -744,14 +744,14 @@ public void testSelfJoinWithFallback() throws Exception .intervals(querySegmentSpec(Filtration.eternity())) .columns("dim1") .filters(not(selector("dim1", "", null))) - .resultFormat(ScanQuery.RESULT_FORMAT_COMPACTED_LIST) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) .context(QUERY_CONTEXT_DEFAULT) .build(), newScanQueryBuilder() .dataSource(CalciteTests.DATASOURCE1) .intervals(querySegmentSpec(Filtration.eternity())) .columns("dim1", "dim2") - .resultFormat(ScanQuery.RESULT_FORMAT_COMPACTED_LIST) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) .context(QUERY_CONTEXT_DEFAULT) .build() ), @@ -767,10 +767,10 @@ public void testExplainSelfJoinWithFallback() throws Exception String emptyStringEq = NullHandling.replaceWithDefault() ? null : "\"\""; final String explanation = "BindableJoin(condition=[=($0, $2)], joinType=[inner])\n" - + " DruidQueryRel(query=[{\"queryType\":\"scan\",\"dataSource\":{\"type\":\"table\",\"name\":\"foo\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"virtualColumns\":[],\"resultFormat\":\"compactedList\",\"batchSize\":20480,\"limit\":9223372036854775807,\"filter\":{\"type\":\"not\",\"field\":{\"type\":\"selector\",\"dimension\":\"dim1\",\"value\":" + + " DruidQueryRel(query=[{\"queryType\":\"scan\",\"dataSource\":{\"type\":\"table\",\"name\":\"foo\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"virtualColumns\":[],\"resultFormat\":\"compactedList\",\"batchSize\":20480,\"limit\":9223372036854775807,\"order\":\"none\",\"filter\":{\"type\":\"not\",\"field\":{\"type\":\"selector\",\"dimension\":\"dim1\",\"value\":" + emptyStringEq + ",\"extractionFn\":null}},\"columns\":[\"dim1\"],\"legacy\":false,\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\",\"sqlQueryId\":\"dummy\"},\"descending\":false,\"granularity\":{\"type\":\"all\"}}], signature=[{dim1:STRING}])\n" - + " DruidQueryRel(query=[{\"queryType\":\"scan\",\"dataSource\":{\"type\":\"table\",\"name\":\"foo\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"virtualColumns\":[],\"resultFormat\":\"compactedList\",\"batchSize\":20480,\"limit\":9223372036854775807,\"filter\":null,\"columns\":[\"dim1\",\"dim2\"],\"legacy\":false,\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\",\"sqlQueryId\":\"dummy\"},\"descending\":false,\"granularity\":{\"type\":\"all\"}}], signature=[{dim1:STRING, dim2:STRING}])\n"; + + " DruidQueryRel(query=[{\"queryType\":\"scan\",\"dataSource\":{\"type\":\"table\",\"name\":\"foo\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"virtualColumns\":[],\"resultFormat\":\"compactedList\",\"batchSize\":20480,\"limit\":9223372036854775807,\"order\":\"none\",\"filter\":null,\"columns\":[\"dim1\",\"dim2\"],\"legacy\":false,\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\",\"sqlQueryId\":\"dummy\"},\"descending\":false,\"granularity\":{\"type\":\"all\"}}], signature=[{dim1:STRING, dim2:STRING}])\n"; testQuery( PLANNER_CONFIG_FALLBACK, @@ -1879,7 +1879,7 @@ public void testSelectStarWithDimFilter() throws Exception ) ) .columns("__time", "cnt", "dim1", "dim2", "dim3", "m1", "m2", "unique_dim1") - .resultFormat(ScanQuery.RESULT_FORMAT_COMPACTED_LIST) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) .context(QUERY_CONTEXT_DEFAULT) .build() ), @@ -6669,7 +6669,7 @@ public void testUsingSubqueryAsPartOfOrFilter() throws Exception + " BindableFilter(condition=[OR(=($0, 'xxx'), CAST(AND(IS NOT NULL($4), <>($2, 0), IS NOT NULL($1))):BOOLEAN)])\n" + " BindableJoin(condition=[=($1, $3)], joinType=[left])\n" + " BindableJoin(condition=[true], joinType=[inner])\n" - + " DruidQueryRel(query=[{\"queryType\":\"scan\",\"dataSource\":{\"type\":\"table\",\"name\":\"foo\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"virtualColumns\":[],\"resultFormat\":\"compactedList\",\"batchSize\":20480,\"limit\":9223372036854775807,\"filter\":null,\"columns\":[\"dim1\",\"dim2\"],\"legacy\":false,\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\",\"sqlQueryId\":\"dummy\"},\"descending\":false,\"granularity\":{\"type\":\"all\"}}], signature=[{dim1:STRING, dim2:STRING}])\n" + + " DruidQueryRel(query=[{\"queryType\":\"scan\",\"dataSource\":{\"type\":\"table\",\"name\":\"foo\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"virtualColumns\":[],\"resultFormat\":\"compactedList\",\"batchSize\":20480,\"limit\":9223372036854775807,\"order\":\"none\",\"filter\":null,\"columns\":[\"dim1\",\"dim2\"],\"legacy\":false,\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\",\"sqlQueryId\":\"dummy\"},\"descending\":false,\"granularity\":{\"type\":\"all\"}}], signature=[{dim1:STRING, dim2:STRING}])\n" + " DruidQueryRel(query=[{\"queryType\":\"timeseries\",\"dataSource\":{\"type\":\"table\",\"name\":\"foo\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"descending\":false,\"virtualColumns\":[],\"filter\":{\"type\":\"like\",\"dimension\":\"dim1\",\"pattern\":\"%bc\",\"escape\":null,\"extractionFn\":null},\"granularity\":{\"type\":\"all\"},\"aggregations\":[{\"type\":\"count\",\"name\":\"a0\"}],\"postAggregations\":[],\"limit\":2147483647,\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"skipEmptyBuckets\":true,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\",\"sqlQueryId\":\"dummy\"}}], signature=[{a0:LONG}])\n" + " DruidQueryRel(query=[{\"queryType\":\"groupBy\",\"dataSource\":{\"type\":\"table\",\"name\":\"foo\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"virtualColumns\":[{\"type\":\"expression\",\"name\":\"v0\",\"expression\":\"1\",\"outputType\":\"LONG\"}],\"filter\":{\"type\":\"like\",\"dimension\":\"dim1\",\"pattern\":\"%bc\",\"escape\":null,\"extractionFn\":null},\"granularity\":{\"type\":\"all\"},\"dimensions\":[{\"type\":\"default\",\"dimension\":\"dim1\",\"outputName\":\"d0\",\"outputType\":\"STRING\"},{\"type\":\"default\",\"dimension\":\"v0\",\"outputName\":\"v0\",\"outputType\":\"LONG\"}],\"aggregations\":[],\"postAggregations\":[],\"having\":null,\"limitSpec\":{\"type\":\"NoopLimitSpec\"},\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\",\"sqlQueryId\":\"dummy\"},\"descending\":false}], signature=[{d0:STRING, v0:LONG}])\n"; @@ -6739,7 +6739,7 @@ public void testUsingSubqueryAsFilterOnTwoColumns() throws Exception ) )) .columns("__time", "cnt", "dim1", "dim2") - .resultFormat(ScanQuery.RESULT_FORMAT_COMPACTED_LIST) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) .context(QUERY_CONTEXT_DEFAULT) .build() ), @@ -7294,7 +7294,7 @@ public void testConcat() throws Exception ValueType.STRING )) .columns("v0") - .resultFormat(ScanQuery.RESULT_FORMAT_COMPACTED_LIST) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) .context(QUERY_CONTEXT_DEFAULT) .build() ), @@ -7320,7 +7320,7 @@ public void testConcat() throws Exception ValueType.STRING )) .columns("v0") - .resultFormat(ScanQuery.RESULT_FORMAT_COMPACTED_LIST) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) .context(QUERY_CONTEXT_DEFAULT) .build() ), @@ -7346,7 +7346,7 @@ public void testTextcat() throws Exception .intervals(querySegmentSpec(Filtration.eternity())) .virtualColumns(expressionVirtualColumn("v0", "concat(\"dim1\",\"dim1\")", ValueType.STRING)) .columns("v0") - .resultFormat(ScanQuery.RESULT_FORMAT_COMPACTED_LIST) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) .context(QUERY_CONTEXT_DEFAULT) .build() ), @@ -7372,7 +7372,7 @@ public void testTextcat() throws Exception ValueType.STRING )) .columns("v0") - .resultFormat(ScanQuery.RESULT_FORMAT_COMPACTED_LIST) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) .context(QUERY_CONTEXT_DEFAULT) .build() ), @@ -7568,7 +7568,7 @@ public void testFilterFloatDimension() throws Exception .intervals(querySegmentSpec(Filtration.eternity())) .columns("dim1") .filters(selector("f1", "0.1", null)) - .resultFormat(ScanQuery.RESULT_FORMAT_COMPACTED_LIST) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) .limit(1) .context(QUERY_CONTEXT_DEFAULT) .build() @@ -7590,7 +7590,7 @@ public void testFilterDoubleDimension() throws Exception .intervals(querySegmentSpec(Filtration.eternity())) .columns("dim1") .filters(selector("d1", "1.7", null)) - .resultFormat(ScanQuery.RESULT_FORMAT_COMPACTED_LIST) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) .limit(1) .context(QUERY_CONTEXT_DEFAULT) .build() @@ -7612,7 +7612,7 @@ public void testFilterLongDimension() throws Exception .intervals(querySegmentSpec(Filtration.eternity())) .columns("dim1") .filters(selector("l1", "7", null)) - .resultFormat(ScanQuery.RESULT_FORMAT_COMPACTED_LIST) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) .limit(1) .context(QUERY_CONTEXT_DEFAULT) .build() diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/util/CalciteTests.java b/sql/src/test/java/org/apache/druid/sql/calcite/util/CalciteTests.java index ace64ad7d756..d692d1b9aadd 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/util/CalciteTests.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/util/CalciteTests.java @@ -508,7 +508,8 @@ public int getNumMergeBuffers() new ScanQueryConfig(), new DefaultGenericQueryMetricsFactory(TestHelper.makeJsonMapper()) ), - new ScanQueryEngine() + new ScanQueryEngine(), + new ScanQueryConfig() ) ) .put(