diff --git a/distribution/pom.xml b/distribution/pom.xml
index 4dca3b03f514..8845d18a31c5 100644
--- a/distribution/pom.xml
+++ b/distribution/pom.xml
@@ -232,8 +232,6 @@
-cio.druid.extensions.contrib:druid-redis-cache-c
- io.druid.extensions.contrib:scan-query
- -cio.druid.extensions.contrib:sqlserver-metadata-storage-cio.druid.extensions.contrib:statsd-emitter
diff --git a/docs/content/development/extensions.md b/docs/content/development/extensions.md
index 3f67839c62bc..bc7283060425 100644
--- a/docs/content/development/extensions.md
+++ b/docs/content/development/extensions.md
@@ -70,7 +70,6 @@ All of these community extensions can be downloaded using *pull-deps* with the c
|statsd-emitter|StatsD metrics emitter|[link](../development/extensions-contrib/statsd.html)|
|kafka-emitter|Kafka metrics emitter|[link](../development/extensions-contrib/kafka-emitter.html)|
|druid-thrift-extensions|Support thrift ingestion |[link](../development/extensions-contrib/thrift.html)|
-|scan-query|Scan query|[link](../development/extensions-contrib/scan-query.html)|
## Promoting Community Extension to Core Extension
diff --git a/docs/content/development/extensions-contrib/scan-query.md b/docs/content/querying/scan-query.md
similarity index 82%
rename from docs/content/development/extensions-contrib/scan-query.md
rename to docs/content/querying/scan-query.md
index 3eef162b254e..7869d564d248 100644
--- a/docs/content/development/extensions-contrib/scan-query.md
+++ b/docs/content/querying/scan-query.md
@@ -31,8 +31,11 @@ There are several main parts to a scan query:
|columns|A String array of dimensions and metrics to scan. If left empty, all dimensions and metrics are returned.|no|
|batchSize|How many rows buffered before return to client. Default is `20480`|no|
|limit|How many rows to return. If not specified, all rows will be returned.|no|
+|legacy|Return results consistent with the legacy "scan-query" contrib extension. Defaults to the value set by `druid.query.scan.legacy`, which in turn defaults to false. See [Legacy mode](#legacy-mode) for details.|no|
|context|An additional JSON Object which can be used to specify certain flags.|no|
+## Example results
+
The format of the result when resultFormat equals to `list`:
```json
@@ -154,4 +157,19 @@ The format of the result when resultFormat equals to `compactedList`:
The biggest difference between select query and scan query is that, scan query doesn't retain all rows in memory before rows can be returned to client.
It will cause memory pressure if too many rows required by select query.
Scan query doesn't have this issue.
-Scan query can return all rows without issuing another pagination query, which is extremely useful when query against historical or realtime node directly.
\ No newline at end of file
+Scan query can return all rows without issuing another pagination query, which is extremely useful when query against historical or realtime node directly.
+
+## Legacy mode
+
+The Scan query supports a legacy mode designed for protocol compatibility with the former scan-query contrib extension.
+In legacy mode you can expect the following behavior changes:
+
+- The __time column is returned as "timestamp" rather than "__time". This will take precedence over any other column
+you may have that is named "timestamp".
+- The __time column is included in the list of columns even if you do not specifically ask for it.
+- Timestamps are returned as ISO8601 time strings rather than integers (milliseconds since 1970-01-01 00:00:00 UTC).
+
+Legacy mode can be triggered either by passing `"legacy" : true` in your query JSON, or by setting
+`druid.query.scan.legacy = true` on your Druid nodes. If you were previously using the scan-query contrib extension,
+the best way to migrate is to activate legacy mode during a rolling upgrade, then switch it off after the upgrade
+is complete.
diff --git a/docs/content/querying/select-query.md b/docs/content/querying/select-query.md
index 41960d30a97e..f3d302bbf2ca 100644
--- a/docs/content/querying/select-query.md
+++ b/docs/content/querying/select-query.md
@@ -2,6 +2,7 @@
layout: doc_page
---
# Select Queries
+
Select queries return raw Druid rows and support pagination.
```json
@@ -19,6 +20,13 @@ Select queries return raw Druid rows and support pagination.
}
```
+
+Consider using the [Scan query](scan-query.html) instead of the Select query if you don't need pagination, and you
+don't need the strict time-ascending or time-descending ordering offered by the Select query. The Scan query returns
+results without pagination, and offers "looser" ordering than Select, but is significantly more efficient in terms of
+both processing time and memory requirements. It is also capable of returning a virtually unlimited number of results.
+
+
There are several main parts to a select query:
|property|description|required?|
diff --git a/docs/content/querying/sql.md b/docs/content/querying/sql.md
index e2b0e3e21a9a..911e2c86c16d 100644
--- a/docs/content/querying/sql.md
+++ b/docs/content/querying/sql.md
@@ -256,7 +256,9 @@ converted to zeroes).
## Query execution
-Queries without aggregations will use Druid's [Select](select-query.html) native query type.
+Queries without aggregations will use Druid's [Scan](scan-query.html) or [Select](select-query.html) native query types.
+Scan is used whenever possible, as it is generally higher performance and more efficient than Select. However, Select
+is used in one case: when the query includes an `ORDER BY __time`, since Scan does not have a sorting feature.
Aggregation queries (using GROUP BY, DISTINCT, or any aggregation functions) will use one of Druid's three native
aggregation query types. Two (Timeseries and TopN) are specialized for specific types of aggregations, whereas the other
diff --git a/docs/content/toc.md b/docs/content/toc.md
index 6eca667d36c9..d18de0e7c734 100644
--- a/docs/content/toc.md
+++ b/docs/content/toc.md
@@ -34,6 +34,7 @@ layout: toc
* [DataSource Metadata](/docs/VERSION/querying/datasourcemetadataquery.html)
* [Search](/docs/VERSION/querying/searchquery.html)
* [Select](/docs/VERSION/querying/select-query.html)
+ * [Scan](/docs/VERSION/querying/scan-query.html)
* Components
* [Datasources](/docs/VERSION/querying/datasource.html)
* [Filters](/docs/VERSION/querying/filters.html)
diff --git a/extensions-contrib/scan-query/pom.xml b/extensions-contrib/scan-query/pom.xml
deleted file mode 100644
index 328be800e5d7..000000000000
--- a/extensions-contrib/scan-query/pom.xml
+++ /dev/null
@@ -1,63 +0,0 @@
-
-
-
-
-
-
- io.druid
- druid
- 0.11.0-SNAPSHOT
- ../../pom.xml
-
- 4.0.0
-
- io.druid.extensions.contrib
- scan-query
- scan-query
- streaming version of select query
-
-
-
- io.druid
- druid-server
- ${project.parent.version}
-
-
- junit
- junit
- test
-
-
- org.easymock
- easymock
- test
-
-
- io.druid
- druid-processing
- ${project.parent.version}
- tests
- test
-
-
-
-
diff --git a/extensions-contrib/scan-query/src/main/java/io/druid/query/scan/ScanQueryDruidModule.java b/extensions-contrib/scan-query/src/main/java/io/druid/query/scan/ScanQueryDruidModule.java
deleted file mode 100644
index e8696d1a0a86..000000000000
--- a/extensions-contrib/scan-query/src/main/java/io/druid/query/scan/ScanQueryDruidModule.java
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Licensed to Metamarkets Group Inc. (Metamarkets) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. Metamarkets licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package io.druid.query.scan;
-
-import com.fasterxml.jackson.databind.Module;
-import com.fasterxml.jackson.databind.jsontype.NamedType;
-import com.fasterxml.jackson.databind.module.SimpleModule;
-import com.google.inject.Binder;
-import io.druid.guice.DruidBinders;
-import io.druid.guice.LazySingleton;
-import io.druid.initialization.DruidModule;
-
-import java.util.Arrays;
-import java.util.List;
-
-public class ScanQueryDruidModule implements DruidModule
-{
- @Override
- public void configure(Binder binder)
- {
- DruidBinders.queryToolChestBinder(binder)
- .addBinding(ScanQuery.class)
- .to(ScanQueryQueryToolChest.class)
- .in(LazySingleton.class);
-
- DruidBinders.queryRunnerFactoryBinder(binder)
- .addBinding(ScanQuery.class)
- .to(ScanQueryRunnerFactory.class)
- .in(LazySingleton.class);
- }
-
- @Override
- public List extends Module> getJacksonModules()
- {
- return Arrays.asList(
- new SimpleModule("ScanQueryDruidModule")
- .registerSubtypes(
- new NamedType(ScanQuery.class, ScanQuery.SCAN)
- )
- );
- }
-}
diff --git a/extensions-contrib/scan-query/src/main/resources/META-INF/services/io.druid.initialization.DruidModule b/extensions-contrib/scan-query/src/main/resources/META-INF/services/io.druid.initialization.DruidModule
deleted file mode 100644
index 1459501bf4a8..000000000000
--- a/extensions-contrib/scan-query/src/main/resources/META-INF/services/io.druid.initialization.DruidModule
+++ /dev/null
@@ -1 +0,0 @@
-io.druid.query.scan.ScanQueryDruidModule
\ No newline at end of file
diff --git a/pom.xml b/pom.xml
index 57d3146ab9b5..d6ebf89433ff 100644
--- a/pom.xml
+++ b/pom.xml
@@ -134,7 +134,6 @@
extensions-contrib/virtual-columnsextensions-contrib/thrift-extensionsextensions-contrib/ambari-metrics-emitter
- extensions-contrib/scan-queryextensions-contrib/sqlserver-metadata-storageextensions-contrib/kafka-emitterextensions-contrib/redis-cache
diff --git a/processing/src/main/java/io/druid/query/Query.java b/processing/src/main/java/io/druid/query/Query.java
index 9e0d8a0f658d..7e537c2a4ad0 100644
--- a/processing/src/main/java/io/druid/query/Query.java
+++ b/processing/src/main/java/io/druid/query/Query.java
@@ -27,6 +27,7 @@
import io.druid.query.filter.DimFilter;
import io.druid.query.groupby.GroupByQuery;
import io.druid.query.metadata.metadata.SegmentMetadataQuery;
+import io.druid.query.scan.ScanQuery;
import io.druid.query.search.search.SearchQuery;
import io.druid.query.select.SelectQuery;
import io.druid.query.spec.QuerySegmentSpec;
@@ -46,6 +47,7 @@
@JsonSubTypes.Type(name = Query.SEARCH, value = SearchQuery.class),
@JsonSubTypes.Type(name = Query.TIME_BOUNDARY, value = TimeBoundaryQuery.class),
@JsonSubTypes.Type(name = Query.GROUP_BY, value = GroupByQuery.class),
+ @JsonSubTypes.Type(name = Query.SCAN, value = ScanQuery.class),
@JsonSubTypes.Type(name = Query.SEGMENT_METADATA, value = SegmentMetadataQuery.class),
@JsonSubTypes.Type(name = Query.SELECT, value = SelectQuery.class),
@JsonSubTypes.Type(name = Query.TOPN, value = TopNQuery.class),
@@ -58,6 +60,7 @@ public interface Query
String SEARCH = "search";
String TIME_BOUNDARY = "timeBoundary";
String GROUP_BY = "groupBy";
+ String SCAN = "scan";
String SEGMENT_METADATA = "segmentMetadata";
String SELECT = "select";
String TOPN = "topN";
diff --git a/processing/src/main/java/io/druid/query/extraction/ExtractionFn.java b/processing/src/main/java/io/druid/query/extraction/ExtractionFn.java
index b3e7cb39098f..df6c86c4867f 100644
--- a/processing/src/main/java/io/druid/query/extraction/ExtractionFn.java
+++ b/processing/src/main/java/io/druid/query/extraction/ExtractionFn.java
@@ -22,6 +22,7 @@
import com.fasterxml.jackson.annotation.JsonSubTypes;
import com.fasterxml.jackson.annotation.JsonTypeInfo;
import io.druid.guice.annotations.ExtensionPoint;
+import io.druid.java.util.common.Cacheable;
import io.druid.query.lookup.LookupExtractionFn;
import io.druid.query.lookup.RegisteredLookupExtractionFn;
@@ -57,16 +58,8 @@
* regular expression with a capture group. When the regular expression matches the value of a dimension,
* the value captured by the group is used for grouping operations instead of the dimension value.
*/
-public interface ExtractionFn
+public interface ExtractionFn extends Cacheable
{
- /**
- * Returns a byte[] unique to all concrete implementations of DimExtractionFn. This byte[] is used to
- * generate a cache key for the specific query.
- *
- * @return a byte[] unit to all concrete implements of DimExtractionFn
- */
- public byte[] getCacheKey();
-
/**
* The "extraction" function. This should map an Object into some String value.
*
diff --git a/extensions-contrib/scan-query/src/main/java/io/druid/query/scan/ScanQuery.java b/processing/src/main/java/io/druid/query/scan/ScanQuery.java
similarity index 75%
rename from extensions-contrib/scan-query/src/main/java/io/druid/query/scan/ScanQuery.java
rename to processing/src/main/java/io/druid/query/scan/ScanQuery.java
index 1606ded6156e..e635191153e2 100644
--- a/extensions-contrib/scan-query/src/main/java/io/druid/query/scan/ScanQuery.java
+++ b/processing/src/main/java/io/druid/query/scan/ScanQuery.java
@@ -20,7 +20,6 @@
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
-import com.fasterxml.jackson.annotation.JsonTypeName;
import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
import io.druid.query.BaseQuery;
@@ -32,39 +31,45 @@
import io.druid.query.filter.SelectorDimFilter;
import io.druid.query.spec.LegacySegmentSpec;
import io.druid.query.spec.QuerySegmentSpec;
+import io.druid.segment.VirtualColumn;
+import io.druid.segment.VirtualColumns;
import org.joda.time.Interval;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
+import java.util.Objects;
-@JsonTypeName("scan")
public class ScanQuery extends BaseQuery
{
- public static final String SCAN = "scan";
public static final String RESULT_FORMAT_LIST = "list";
public static final String RESULT_FORMAT_COMPACTED_LIST = "compactedList";
public static final String RESULT_FORMAT_VALUE_VECTOR = "valueVector";
+ private final VirtualColumns virtualColumns;
private final String resultFormat;
private final int batchSize;
private final long limit;
private final DimFilter dimFilter;
private final List columns;
+ private final Boolean legacy;
@JsonCreator
public ScanQuery(
@JsonProperty("dataSource") DataSource dataSource,
@JsonProperty("intervals") QuerySegmentSpec querySegmentSpec,
+ @JsonProperty("virtualColumns") VirtualColumns virtualColumns,
@JsonProperty("resultFormat") String resultFormat,
@JsonProperty("batchSize") int batchSize,
@JsonProperty("limit") long limit,
@JsonProperty("filter") DimFilter dimFilter,
@JsonProperty("columns") List columns,
+ @JsonProperty("legacy") Boolean legacy,
@JsonProperty("context") Map context
)
{
super(dataSource, querySegmentSpec, false, context);
+ this.virtualColumns = VirtualColumns.nullToEmpty(virtualColumns);
this.resultFormat = resultFormat == null ? RESULT_FORMAT_LIST : resultFormat;
this.batchSize = (batchSize == 0) ? 4096 * 5 : batchSize;
this.limit = (limit == 0) ? Long.MAX_VALUE : limit;
@@ -72,6 +77,13 @@ public ScanQuery(
Preconditions.checkArgument(this.limit > 0, "limit must be greater than 0");
this.dimFilter = dimFilter;
this.columns = columns;
+ this.legacy = legacy;
+ }
+
+ @JsonProperty
+ public VirtualColumns getVirtualColumns()
+ {
+ return virtualColumns;
}
@JsonProperty
@@ -99,6 +111,7 @@ public boolean hasFilters()
}
@Override
+ @JsonProperty
public DimFilter getFilter()
{
return dimFilter;
@@ -110,16 +123,24 @@ public String getType()
return SCAN;
}
- @JsonProperty("filter")
- public DimFilter getDimensionsFilter()
+ @JsonProperty
+ public List getColumns()
{
- return dimFilter;
+ return columns;
}
+ /**
+ * Compatibility mode with the legacy scan-query extension.
+ */
@JsonProperty
- public List getColumns()
+ public Boolean isLegacy()
{
- return columns;
+ return legacy;
+ }
+
+ public ScanQuery withNonNullLegacy(final ScanQueryConfig scanQueryConfig)
+ {
+ return ScanQueryBuilder.copy(this).legacy(legacy != null ? legacy : scanQueryConfig.isLegacy()).build();
}
@Override
@@ -146,7 +167,7 @@ public ScanQuery withDimFilter(DimFilter dimFilter)
}
@Override
- public boolean equals(Object o)
+ public boolean equals(final Object o)
{
if (this == o) {
return true;
@@ -157,49 +178,36 @@ public boolean equals(Object o)
if (!super.equals(o)) {
return false;
}
-
- ScanQuery that = (ScanQuery) o;
-
- if (batchSize != that.batchSize) {
- return false;
- }
- if (limit != that.limit) {
- return false;
- }
- if (resultFormat != null ? !resultFormat.equals(that.resultFormat) : that.resultFormat != null) {
- return false;
- }
- if (dimFilter != null ? !dimFilter.equals(that.dimFilter) : that.dimFilter != null) {
- return false;
- }
- return columns != null ? columns.equals(that.columns) : that.columns == null;
+ final ScanQuery scanQuery = (ScanQuery) o;
+ return batchSize == scanQuery.batchSize &&
+ limit == scanQuery.limit &&
+ legacy == scanQuery.legacy &&
+ Objects.equals(virtualColumns, scanQuery.virtualColumns) &&
+ Objects.equals(resultFormat, scanQuery.resultFormat) &&
+ Objects.equals(dimFilter, scanQuery.dimFilter) &&
+ Objects.equals(columns, scanQuery.columns);
}
@Override
public int hashCode()
{
- int result = super.hashCode();
- result = 31 * result + (resultFormat != null ? resultFormat.hashCode() : 0);
- result = 31 * result + batchSize;
- result = 31 * result + (int) (limit ^ (limit >>> 32));
- result = 31 * result + (dimFilter != null ? dimFilter.hashCode() : 0);
- result = 31 * result + (columns != null ? columns.hashCode() : 0);
- return result;
+ return Objects.hash(super.hashCode(), virtualColumns, resultFormat, batchSize, limit, dimFilter, columns, legacy);
}
@Override
public String toString()
{
return "ScanQuery{" +
- "dataSource='" + getDataSource() + '\'' +
- ", querySegmentSpec=" + getQuerySegmentSpec() +
- ", descending=" + isDescending() +
- ", resultFormat='" + resultFormat + '\'' +
- ", batchSize=" + batchSize +
- ", limit=" + limit +
- ", dimFilter=" + dimFilter +
- ", columns=" + columns +
- '}';
+ "dataSource='" + getDataSource() + '\'' +
+ ", querySegmentSpec=" + getQuerySegmentSpec() +
+ ", virtualColumns=" + getVirtualColumns() +
+ ", resultFormat='" + resultFormat + '\'' +
+ ", batchSize=" + batchSize +
+ ", limit=" + limit +
+ ", dimFilter=" + dimFilter +
+ ", columns=" + columns +
+ ", legacy=" + legacy +
+ '}';
}
/**
@@ -221,23 +229,27 @@ public static class ScanQueryBuilder
{
private DataSource dataSource;
private QuerySegmentSpec querySegmentSpec;
+ private VirtualColumns virtualColumns;
private Map context;
private String resultFormat;
private int batchSize;
private long limit;
private DimFilter dimFilter;
private List columns;
+ private Boolean legacy;
public ScanQueryBuilder()
{
dataSource = null;
querySegmentSpec = null;
+ virtualColumns = null;
context = null;
resultFormat = null;
batchSize = 0;
limit = 0;
dimFilter = null;
columns = Lists.newArrayList();
+ legacy = null;
}
public ScanQuery build()
@@ -245,11 +257,13 @@ public ScanQuery build()
return new ScanQuery(
dataSource,
querySegmentSpec,
+ virtualColumns,
resultFormat,
batchSize,
limit,
dimFilter,
columns,
+ legacy,
context
);
}
@@ -259,11 +273,13 @@ public static ScanQueryBuilder copy(ScanQuery query)
return new ScanQueryBuilder()
.dataSource(query.getDataSource())
.intervals(query.getQuerySegmentSpec())
+ .virtualColumns(query.getVirtualColumns())
.resultFormat(query.getResultFormat())
.batchSize(query.getBatchSize())
.limit(query.getLimit())
.filters(query.getFilter())
.columns(query.getColumns())
+ .legacy(query.isLegacy())
.context(query.getContext());
}
@@ -297,6 +313,22 @@ public ScanQueryBuilder intervals(List l)
return this;
}
+ public ScanQueryBuilder virtualColumns(VirtualColumns virtualColumns)
+ {
+ this.virtualColumns = virtualColumns;
+ return this;
+ }
+
+ public ScanQueryBuilder virtualColumns(List virtualColumns)
+ {
+ return virtualColumns(VirtualColumns.create(virtualColumns));
+ }
+
+ public ScanQueryBuilder virtualColumns(VirtualColumn... virtualColumns)
+ {
+ return virtualColumns(VirtualColumns.create(Arrays.asList(virtualColumns)));
+ }
+
public ScanQueryBuilder context(Map c)
{
context = c;
@@ -350,6 +382,12 @@ public ScanQueryBuilder columns(String... c)
columns = Arrays.asList(c);
return this;
}
+
+ public ScanQueryBuilder legacy(Boolean legacy)
+ {
+ this.legacy = legacy;
+ return this;
+ }
}
public static ScanQueryBuilder newScanQueryBuilder()
diff --git a/processing/src/main/java/io/druid/query/scan/ScanQueryConfig.java b/processing/src/main/java/io/druid/query/scan/ScanQueryConfig.java
new file mode 100644
index 000000000000..da1e9b4d0c1b
--- /dev/null
+++ b/processing/src/main/java/io/druid/query/scan/ScanQueryConfig.java
@@ -0,0 +1,68 @@
+/*
+ * Licensed to Metamarkets Group Inc. (Metamarkets) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. Metamarkets licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package io.druid.query.scan;
+
+import com.fasterxml.jackson.annotation.JsonProperty;
+
+import java.util.Objects;
+
+public class ScanQueryConfig
+{
+ @JsonProperty
+ private boolean legacy = false;
+
+ public boolean isLegacy()
+ {
+ return legacy;
+ }
+
+ public ScanQueryConfig setLegacy(final boolean legacy)
+ {
+ this.legacy = legacy;
+ return this;
+ }
+
+ @Override
+ public boolean equals(final Object o)
+ {
+ if (this == o) {
+ return true;
+ }
+ if (o == null || getClass() != o.getClass()) {
+ return false;
+ }
+ final ScanQueryConfig that = (ScanQueryConfig) o;
+ return legacy == that.legacy;
+ }
+
+ @Override
+ public int hashCode()
+ {
+ return Objects.hash(legacy);
+ }
+
+ @Override
+ public String toString()
+ {
+ return "ScanQueryConfig{" +
+ "legacy=" + legacy +
+ '}';
+ }
+}
diff --git a/extensions-contrib/scan-query/src/main/java/io/druid/query/scan/ScanQueryEngine.java b/processing/src/main/java/io/druid/query/scan/ScanQueryEngine.java
similarity index 59%
rename from extensions-contrib/scan-query/src/main/java/io/druid/query/scan/ScanQueryEngine.java
rename to processing/src/main/java/io/druid/query/scan/ScanQueryEngine.java
index ebaf70906d39..b8e39fb5f8e6 100644
--- a/extensions-contrib/scan-query/src/main/java/io/druid/query/scan/ScanQueryEngine.java
+++ b/processing/src/main/java/io/druid/query/scan/ScanQueryEngine.java
@@ -20,46 +20,51 @@
import com.google.common.base.Function;
import com.google.common.base.Preconditions;
+import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
+import com.google.common.collect.Sets;
+import io.druid.java.util.common.DateTimes;
import io.druid.java.util.common.ISE;
+import io.druid.java.util.common.UOE;
import io.druid.java.util.common.granularity.Granularities;
import io.druid.java.util.common.guava.BaseSequence;
import io.druid.java.util.common.guava.Sequence;
import io.druid.java.util.common.guava.Sequences;
-import io.druid.query.ColumnSelectorPlus;
import io.druid.query.QueryContexts;
import io.druid.query.QueryInterruptedException;
-import io.druid.query.dimension.DefaultDimensionSpec;
-import io.druid.query.dimension.DimensionSpec;
import io.druid.query.filter.Filter;
-import io.druid.query.select.SelectQueryEngine;
import io.druid.segment.Cursor;
-import io.druid.segment.DimensionHandlerUtils;
-import io.druid.segment.LongColumnSelector;
import io.druid.segment.ObjectColumnSelector;
import io.druid.segment.Segment;
import io.druid.segment.StorageAdapter;
-import io.druid.segment.VirtualColumns;
+import io.druid.segment.VirtualColumn;
import io.druid.segment.column.Column;
import io.druid.segment.filter.Filters;
import org.joda.time.Interval;
+import java.util.ArrayList;
import java.util.Arrays;
+import java.util.Collections;
import java.util.Iterator;
+import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
+import java.util.Set;
import java.util.concurrent.TimeoutException;
public class ScanQueryEngine
{
- private static final SelectQueryEngine.SelectStrategyFactory STRATEGY_FACTORY = new SelectQueryEngine.SelectStrategyFactory();
+ private static final String LEGACY_TIMESTAMP_KEY = "timestamp";
+
public Sequence process(
final ScanQuery query,
final Segment segment,
final Map responseContext
)
{
+ // "legacy" should be non-null due to toolChest.mergeResults
+ final boolean legacy = Preconditions.checkNotNull(query.isLegacy(), "WTF?! Expected non-null legacy");
+
if (responseContext.get(ScanQueryRunnerFactory.CTX_COUNT) != null) {
long count = (long) responseContext.get(ScanQueryRunnerFactory.CTX_COUNT);
if (count >= query.getLimit()) {
@@ -77,32 +82,42 @@ public Sequence process(
);
}
- List allDims = Lists.newLinkedList(adapter.getAvailableDimensions());
- List allMetrics = Lists.newLinkedList(adapter.getAvailableMetrics());
- final List allColumns = Lists.newLinkedList();
+ final List allColumns = new ArrayList<>();
+
if (query.getColumns() != null && !query.getColumns().isEmpty()) {
- if (!query.getColumns().contains(ScanResultValue.timestampKey)) {
- allColumns.add(ScanResultValue.timestampKey);
+ if (legacy && !query.getColumns().contains(LEGACY_TIMESTAMP_KEY)) {
+ allColumns.add(LEGACY_TIMESTAMP_KEY);
}
+
+ // Unless we're in legacy mode, allColumns equals query.getColumns() exactly. This is nice since it makes
+ // the compactedList form easier to use.
allColumns.addAll(query.getColumns());
- allDims.retainAll(query.getColumns());
- allMetrics.retainAll(query.getColumns());
} else {
- if (!allDims.contains(ScanResultValue.timestampKey)) {
- allColumns.add(ScanResultValue.timestampKey);
+ final Set availableColumns = Sets.newLinkedHashSet(
+ Iterables.concat(
+ Collections.singleton(legacy ? LEGACY_TIMESTAMP_KEY : Column.TIME_COLUMN_NAME),
+ Iterables.transform(
+ Arrays.asList(query.getVirtualColumns().getVirtualColumns()),
+ VirtualColumn::getOutputName
+ ),
+ adapter.getAvailableDimensions(),
+ adapter.getAvailableMetrics()
+ )
+ );
+
+ allColumns.addAll(availableColumns);
+
+ if (legacy) {
+ allColumns.remove(Column.TIME_COLUMN_NAME);
}
- allColumns.addAll(allDims);
- allColumns.addAll(allMetrics);
}
- final List dims = DefaultDimensionSpec.toSpec(allDims);
- final List metrics = allMetrics;
final List intervals = query.getQuerySegmentSpec().getIntervals();
Preconditions.checkArgument(intervals.size() == 1, "Can only handle a single interval, got[%s]", intervals);
final String segmentId = segment.getIdentifier();
- final Filter filter = Filters.convertToCNFFromQueryContext(query, Filters.toFilter(query.getDimensionsFilter()));
+ final Filter filter = Filters.convertToCNFFromQueryContext(query, Filters.toFilter(query.getFilter()));
if (responseContext.get(ScanQueryRunnerFactory.CTX_COUNT) == null) {
responseContext.put(ScanQueryRunnerFactory.CTX_COUNT, 0L);
@@ -113,7 +128,7 @@ public Sequence process(
adapter.makeCursors(
filter,
intervals.get(0),
- VirtualColumns.EMPTY,
+ query.getVirtualColumns(),
Granularities.ALL,
query.isDescending(),
null
@@ -129,23 +144,21 @@ public Sequence apply(final Cursor cursor)
@Override
public Iterator make()
{
- final LongColumnSelector timestampColumnSelector =
- cursor.getColumnSelectorFactory().makeLongColumnSelector(Column.TIME_COLUMN_NAME);
-
- final List> selectorPlusList = Arrays.asList(
- DimensionHandlerUtils.createColumnSelectorPluses(
- STRATEGY_FACTORY,
- Lists.newArrayList(dims),
- cursor.getColumnSelectorFactory()
- )
- );
-
- final Map metSelectors = Maps.newHashMap();
- for (String metric : metrics) {
- final ObjectColumnSelector metricSelector =
- cursor.getColumnSelectorFactory().makeObjectColumnSelector(metric);
- metSelectors.put(metric, metricSelector);
+ final List columnSelectors = new ArrayList<>(allColumns.size());
+
+ for (String column : allColumns) {
+ final ObjectColumnSelector selector;
+
+ if (legacy && column.equals(LEGACY_TIMESTAMP_KEY)) {
+ selector = cursor.getColumnSelectorFactory()
+ .makeObjectColumnSelector(Column.TIME_COLUMN_NAME);
+ } else {
+ selector = cursor.getColumnSelectorFactory().makeObjectColumnSelector(column);
+ }
+
+ columnSelectors.add(selector);
}
+
final int batchSize = query.getBatchSize();
return new Iterator()
{
@@ -163,15 +176,15 @@ public ScanResultValue next()
if (hasTimeout && System.currentTimeMillis() >= timeoutAt) {
throw new QueryInterruptedException(new TimeoutException());
}
- long lastOffset = offset;
- Object events = null;
- String resultFormat = query.getResultFormat();
- if (ScanQuery.RESULT_FORMAT_VALUE_VECTOR.equals(resultFormat)) {
- throw new UnsupportedOperationException("valueVector is not supported now");
- } else if (ScanQuery.RESULT_FORMAT_COMPACTED_LIST.equals(resultFormat)) {
+ final long lastOffset = offset;
+ final Object events;
+ final String resultFormat = query.getResultFormat();
+ if (ScanQuery.RESULT_FORMAT_COMPACTED_LIST.equals(resultFormat)) {
events = rowsToCompactedList();
- } else {
+ } else if (ScanQuery.RESULT_FORMAT_LIST.equals(resultFormat)) {
events = rowsToList();
+ } else {
+ throw new UOE("resultFormat[%s] is not supported", resultFormat);
}
responseContext.put(
ScanQueryRunnerFactory.CTX_COUNT,
@@ -192,46 +205,48 @@ public void remove()
throw new UnsupportedOperationException();
}
- private Object rowsToCompactedList()
+ private List