From 4b98828007ba1387b018bfa9919e228fbaacad24 Mon Sep 17 00:00:00 2001
From: Gian Merlino
Date: Tue, 11 Aug 2020 19:47:06 -0700
Subject: [PATCH 01/24] DruidInputSource: Fix issues in column projection,
timestamp handling.
DruidInputSource, DruidSegmentReader changes:
1) Remove "dimensions" and "metrics". They are not necessary, because we
can compute which columns we need to read based on what is going to
be used by the timestamp, transform, dimensions, and metrics.
2) Start using ColumnsFilter (see below) to decide which columns we need
to read.
3) Actually respect the "timestampSpec". Previously, it was ignored, and
the timestamp of the returned InputRows was set to the `__time` column
of the input datasource.
(1) and (2) together fix a bug in which the DruidInputSource would not
properly read columns that are used as inputs to a transformSpec.
(3) fixes a bug where the timestampSpec would be ignored if you attempted
to set the column to something other than `__time`.
(1) and (3) are breaking changes.
Web console changes:
1) Remove "Dimensions" and "Metrics" from the Druid input source.
2) Set timestampSpec to `{"column": "__time", "format": "millis"}` for
compatibility with the new behavior.
Other changes:
1) Add ColumnsFilter, a new class that allows input readers to determine
which columns they need to read. Currently, it's only used by the
DruidInputSource, but it could be used by other columnar input sources
in the future.
2) Add a ColumnsFilter to InputRowSchema.
3) Remove the metric names from InputRowSchema (they were unused).
4) Add InputRowSchemas.fromDataSchema method that computes the proper
ColumnsFilter for given timestamp, dimensions, transform, and metrics.
5) Add "getRequiredColumns" method to TransformSpec to support the above.
---
.../druid/data/input/ColumnsFilter.java | 152 +++++
.../druid/data/input/InputRowSchema.java | 25 +-
.../apache/druid/data/input/InputSource.java | 6 +-
.../druid/data/input/MapBasedInputRow.java | 24 +
...rehoseFactoryToInputSourceAdaptorTest.java | 3 +-
.../data/input/impl/ColumnsFilterTest.java | 74 +++
.../druid/data/input/impl/CsvReaderTest.java | 6 +-
.../data/input/impl/DelimitedReaderTest.java | 4 +-
.../impl/InputEntityIteratingReaderTest.java | 4 +-
.../druid/data/input/impl/JsonReaderTest.java | 11 +-
docs/ingestion/native-batch.md | 96 +--
.../data/input/aliyun/OssInputSourceTest.java | 8 +-
.../data/input/avro/AvroOCFReaderTest.java | 5 +-
.../GoogleCloudStorageInputSourceTest.java | 5 +-
.../inputsource/hdfs/HdfsInputSourceTest.java | 3 +-
.../druid/data/input/orc/OrcReaderTest.java | 3 +-
.../parquet/CompatParquetReaderTest.java | 14 +-
.../parquet/DecimalParquetReaderTest.java | 7 +-
.../parquet/FlattenSpecParquetReaderTest.java | 18 +-
.../ParquetReaderResourceLeakTest.java | 4 +-
.../parquet/TimestampsParquetReaderTest.java | 10 +-
.../input/parquet/WikiParquetReaderTest.java | 4 +-
.../data/input/s3/S3InputSourceTest.java | 5 +-
.../common/ReingestionTimelineUtils.java | 3 +
.../indexing/common/task/CompactionTask.java | 6 +-
.../druid/indexing/common/task/IndexTask.java | 14 +-
.../common/task/InputSourceProcessor.java | 19 +-
.../PartialDimensionDistributionTask.java | 20 +-
.../batch/parallel/SinglePhaseSubTask.java | 14 +-
.../IngestSegmentFirehoseFactory.java | 3 +
.../indexing/input/DruidInputSource.java | 47 +-
.../input/DruidSegmentInputFormat.java | 16 +-
.../indexing/input/DruidSegmentReader.java | 209 ++++---
.../druid/indexing/input/InputRowSchemas.java | 130 ++++
.../overlord/sampler/InputSourceSampler.java | 10 +-
.../SeekableStreamIndexTaskRunner.java | 11 +-
.../common/task/CompactionTaskTest.java | 11 +-
.../input/DruidSegmentReaderTest.java | 567 +++++++++++++++++-
.../indexing/input/InputRowSchemasTest.java | 105 ++++
.../RecordSupplierInputSourceTest.java | 3 +-
.../seekablestream/StreamChunkParserTest.java | 5 +-
.../transform/ExpressionTransform.java | 16 +-
.../druid/segment/transform/Transform.java | 7 +
.../segment/transform/TransformSpec.java | 15 +
.../metadata/input/SqlInputSourceTest.java | 4 +-
web-console/src/utils/ingestion-spec.tsx | 24 -
.../views/load-data-view/load-data-view.tsx | 2 +-
47 files changed, 1422 insertions(+), 330 deletions(-)
create mode 100644 core/src/main/java/org/apache/druid/data/input/ColumnsFilter.java
create mode 100644 core/src/test/java/org/apache/druid/data/input/impl/ColumnsFilterTest.java
create mode 100644 indexing-service/src/main/java/org/apache/druid/indexing/input/InputRowSchemas.java
create mode 100644 indexing-service/src/test/java/org/apache/druid/indexing/input/InputRowSchemasTest.java
diff --git a/core/src/main/java/org/apache/druid/data/input/ColumnsFilter.java b/core/src/main/java/org/apache/druid/data/input/ColumnsFilter.java
new file mode 100644
index 000000000000..f391e7e41c40
--- /dev/null
+++ b/core/src/main/java/org/apache/druid/data/input/ColumnsFilter.java
@@ -0,0 +1,152 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.data.input;
+
+import java.util.Collections;
+import java.util.Objects;
+import java.util.Set;
+
+/**
+ * Used by some {@link InputSourceReader} implementations in order to know what columns will need to be read out
+ * of the {@link InputRow} objects they create.
+ *
+ * This is meant to be useful as an optimization: if we're reading from a columnar data format, then when a column
+ * isn't going to be needed, we shouldn't read it.
+ *
+ * @see InputSource#reader accepts objects of this class
+ */
+public abstract class ColumnsFilter
+{
+ /**
+ * Accepts all columns.
+ */
+ public static ColumnsFilter all()
+ {
+ return new ExclusionBased(Collections.emptySet());
+ }
+
+ /**
+ * Accepts a specific list of columns.
+ */
+ public static ColumnsFilter inclusionBased(final Set inclusions)
+ {
+ return new InclusionBased(inclusions);
+ }
+
+
+ /**
+ * Accepts all columns, except those on a specific list.
+ */
+ public static ColumnsFilter exclusionBased(final Set exclusions)
+ {
+ return new ExclusionBased(exclusions);
+ }
+
+ /**
+ * Check if a column should be included or not.
+ */
+ public abstract boolean apply(final String column);
+
+ public static class InclusionBased extends ColumnsFilter
+ {
+ private final Set inclusions;
+
+ private InclusionBased(Set inclusions)
+ {
+ this.inclusions = inclusions;
+ }
+
+ @Override
+ public boolean apply(String column)
+ {
+ return inclusions.contains(column);
+ }
+
+ @Override
+ public boolean equals(Object o)
+ {
+ if (this == o) {
+ return true;
+ }
+ if (o == null || getClass() != o.getClass()) {
+ return false;
+ }
+ InclusionBased that = (InclusionBased) o;
+ return Objects.equals(inclusions, that.inclusions);
+ }
+
+ @Override
+ public int hashCode()
+ {
+ return Objects.hash(inclusions);
+ }
+
+ @Override
+ public String toString()
+ {
+ return "ColumnsFilter.InclusionBased{" +
+ "inclusions=" + inclusions +
+ '}';
+ }
+ }
+
+ public static class ExclusionBased extends ColumnsFilter
+ {
+ private final Set exclusions;
+
+ public ExclusionBased(Set exclusions)
+ {
+ this.exclusions = exclusions;
+ }
+
+ @Override
+ public boolean apply(String column)
+ {
+ return !exclusions.contains(column);
+ }
+
+ @Override
+ public boolean equals(Object o)
+ {
+ if (this == o) {
+ return true;
+ }
+ if (o == null || getClass() != o.getClass()) {
+ return false;
+ }
+ ExclusionBased that = (ExclusionBased) o;
+ return Objects.equals(exclusions, that.exclusions);
+ }
+
+ @Override
+ public int hashCode()
+ {
+ return Objects.hash(exclusions);
+ }
+
+ @Override
+ public String toString()
+ {
+ return "ColumnsFilter.ExclusionBased{" +
+ "exclusions=" + exclusions +
+ '}';
+ }
+ }
+}
diff --git a/core/src/main/java/org/apache/druid/data/input/InputRowSchema.java b/core/src/main/java/org/apache/druid/data/input/InputRowSchema.java
index c908187962e7..227bd3a6d198 100644
--- a/core/src/main/java/org/apache/druid/data/input/InputRowSchema.java
+++ b/core/src/main/java/org/apache/druid/data/input/InputRowSchema.java
@@ -22,8 +22,6 @@
import org.apache.druid.data.input.impl.DimensionsSpec;
import org.apache.druid.data.input.impl.TimestampSpec;
-import java.util.List;
-
/**
* Schema of {@link InputRow}.
*/
@@ -31,13 +29,17 @@ public class InputRowSchema
{
private final TimestampSpec timestampSpec;
private final DimensionsSpec dimensionsSpec;
- private final List metricNames;
+ private final ColumnsFilter columnsFilter;
- public InputRowSchema(TimestampSpec timestampSpec, DimensionsSpec dimensionsSpec, List metricNames)
+ public InputRowSchema(
+ final TimestampSpec timestampSpec,
+ final DimensionsSpec dimensionsSpec,
+ final ColumnsFilter columnsFilter
+ )
{
this.timestampSpec = timestampSpec;
this.dimensionsSpec = dimensionsSpec;
- this.metricNames = metricNames;
+ this.columnsFilter = columnsFilter;
}
public TimestampSpec getTimestampSpec()
@@ -50,8 +52,17 @@ public DimensionsSpec getDimensionsSpec()
return dimensionsSpec;
}
- public List getMetricNames()
+ /**
+ * A {@link ColumnsFilter} that can filter down the list of columns that must be read after flattening.
+ *
+ * Logically, Druid applies ingestion spec components in a particular order: first flattenSpec (if any), then
+ * timestampSpec, then transformSpec, and finally dimensionsSpec and metricsSpec.
+ *
+ * If a flattenSpec is provided, this method returns a filter that should be applied after flattening. So, it will
+ * be based on what needs to pass between the flattenSpec and everything beyond it.
+ */
+ public ColumnsFilter getColumnsFilter()
{
- return metricNames;
+ return columnsFilter;
}
}
diff --git a/core/src/main/java/org/apache/druid/data/input/InputSource.java b/core/src/main/java/org/apache/druid/data/input/InputSource.java
index b0144c51eef5..1dce5f04deac 100644
--- a/core/src/main/java/org/apache/druid/data/input/InputSource.java
+++ b/core/src/main/java/org/apache/druid/data/input/InputSource.java
@@ -76,5 +76,9 @@ public interface InputSource
* @param inputFormat to parse data. It can be null if {@link #needsFormat()} = true
* @param temporaryDirectory to store temp data. It will be cleaned up automatically once the task is finished.
*/
- InputSourceReader reader(InputRowSchema inputRowSchema, @Nullable InputFormat inputFormat, File temporaryDirectory);
+ InputSourceReader reader(
+ InputRowSchema inputRowSchema,
+ @Nullable InputFormat inputFormat,
+ File temporaryDirectory
+ );
}
diff --git a/core/src/main/java/org/apache/druid/data/input/MapBasedInputRow.java b/core/src/main/java/org/apache/druid/data/input/MapBasedInputRow.java
index 59ab8a55710c..e9117f26911a 100644
--- a/core/src/main/java/org/apache/druid/data/input/MapBasedInputRow.java
+++ b/core/src/main/java/org/apache/druid/data/input/MapBasedInputRow.java
@@ -25,8 +25,10 @@
import java.util.List;
import java.util.Map;
+import java.util.Objects;
/**
+ *
*/
@PublicApi
public class MapBasedInputRow extends MapBasedRow implements InputRow
@@ -59,6 +61,28 @@ public List getDimensions()
return dimensions;
}
+ @Override
+ public boolean equals(Object o)
+ {
+ if (this == o) {
+ return true;
+ }
+ if (o == null || getClass() != o.getClass()) {
+ return false;
+ }
+ if (!super.equals(o)) {
+ return false;
+ }
+ MapBasedInputRow that = (MapBasedInputRow) o;
+ return Objects.equals(dimensions, that.dimensions);
+ }
+
+ @Override
+ public int hashCode()
+ {
+ return Objects.hash(super.hashCode(), dimensions);
+ }
+
@Override
public String toString()
{
diff --git a/core/src/test/java/org/apache/druid/data/input/FirehoseFactoryToInputSourceAdaptorTest.java b/core/src/test/java/org/apache/druid/data/input/FirehoseFactoryToInputSourceAdaptorTest.java
index 088bed58cfcb..7a1634721c52 100644
--- a/core/src/test/java/org/apache/druid/data/input/FirehoseFactoryToInputSourceAdaptorTest.java
+++ b/core/src/test/java/org/apache/druid/data/input/FirehoseFactoryToInputSourceAdaptorTest.java
@@ -36,7 +36,6 @@
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
-import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.stream.Stream;
@@ -70,7 +69,7 @@ public void testUnimplementedInputFormat() throws IOException
new InputRowSchema(
inputRowParser.getParseSpec().getTimestampSpec(),
inputRowParser.getParseSpec().getDimensionsSpec(),
- Collections.emptyList()
+ ColumnsFilter.all()
),
null,
null
diff --git a/core/src/test/java/org/apache/druid/data/input/impl/ColumnsFilterTest.java b/core/src/test/java/org/apache/druid/data/input/impl/ColumnsFilterTest.java
new file mode 100644
index 000000000000..d85e9278de66
--- /dev/null
+++ b/core/src/test/java/org/apache/druid/data/input/impl/ColumnsFilterTest.java
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.data.input.impl;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableSet;
+import nl.jqno.equalsverifier.EqualsVerifier;
+import org.apache.druid.data.input.ColumnsFilter;
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.util.List;
+import java.util.stream.Collectors;
+
+public class ColumnsFilterTest
+{
+ private static final List COLUMNS = ImmutableList.of("a", "b", "c");
+
+ @Test
+ public void testAll()
+ {
+ Assert.assertEquals(
+ ImmutableList.of("a", "b", "c"),
+ apply(ColumnsFilter.all(), COLUMNS)
+ );
+ }
+
+ @Test
+ public void testInclusionBased()
+ {
+ Assert.assertEquals(
+ ImmutableList.of("b"),
+ apply(ColumnsFilter.inclusionBased(ImmutableSet.of("b")), COLUMNS)
+ );
+ }
+
+ @Test
+ public void testExclusionBased()
+ {
+ Assert.assertEquals(
+ ImmutableList.of("a", "c"),
+ apply(ColumnsFilter.exclusionBased(ImmutableSet.of("b")), COLUMNS)
+ );
+ }
+
+ @Test
+ public void testEquals()
+ {
+ EqualsVerifier.forClass(ColumnsFilter.InclusionBased.class).usingGetClass().verify();
+ EqualsVerifier.forClass(ColumnsFilter.ExclusionBased.class).usingGetClass().verify();
+ }
+
+ private List apply(ColumnsFilter columnsFilter, List columns)
+ {
+ return columns.stream().filter(columnsFilter::apply).collect(Collectors.toList());
+ }
+}
diff --git a/core/src/test/java/org/apache/druid/data/input/impl/CsvReaderTest.java b/core/src/test/java/org/apache/druid/data/input/impl/CsvReaderTest.java
index ec942379f3b2..c1faa274845c 100644
--- a/core/src/test/java/org/apache/druid/data/input/impl/CsvReaderTest.java
+++ b/core/src/test/java/org/apache/druid/data/input/impl/CsvReaderTest.java
@@ -23,6 +23,7 @@
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Iterables;
import org.apache.druid.common.config.NullHandling;
+import org.apache.druid.data.input.ColumnsFilter;
import org.apache.druid.data.input.InputEntityReader;
import org.apache.druid.data.input.InputRow;
import org.apache.druid.data.input.InputRowSchema;
@@ -37,7 +38,6 @@
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.Arrays;
-import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.stream.Collectors;
@@ -47,7 +47,7 @@ public class CsvReaderTest
private static final InputRowSchema INPUT_ROW_SCHEMA = new InputRowSchema(
new TimestampSpec("ts", "auto", null),
new DimensionsSpec(DimensionsSpec.getDefaultSchemas(Arrays.asList("ts", "name"))),
- Collections.emptyList()
+ ColumnsFilter.all()
);
@BeforeClass
@@ -229,7 +229,7 @@ public void testQuotes() throws IOException
new InputRowSchema(
new TimestampSpec("Timestamp", "auto", null),
new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("Timestamp"))),
- Collections.emptyList()
+ ColumnsFilter.all()
),
source,
null
diff --git a/core/src/test/java/org/apache/druid/data/input/impl/DelimitedReaderTest.java b/core/src/test/java/org/apache/druid/data/input/impl/DelimitedReaderTest.java
index e590ed566a93..c98d8fff6a85 100644
--- a/core/src/test/java/org/apache/druid/data/input/impl/DelimitedReaderTest.java
+++ b/core/src/test/java/org/apache/druid/data/input/impl/DelimitedReaderTest.java
@@ -22,6 +22,7 @@
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Iterables;
import org.apache.druid.common.config.NullHandling;
+import org.apache.druid.data.input.ColumnsFilter;
import org.apache.druid.data.input.InputEntityReader;
import org.apache.druid.data.input.InputRow;
import org.apache.druid.data.input.InputRowSchema;
@@ -35,7 +36,6 @@
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.Arrays;
-import java.util.Collections;
import java.util.List;
import java.util.stream.Collectors;
@@ -44,7 +44,7 @@ public class DelimitedReaderTest
private static final InputRowSchema INPUT_ROW_SCHEMA = new InputRowSchema(
new TimestampSpec("ts", "auto", null),
new DimensionsSpec(DimensionsSpec.getDefaultSchemas(Arrays.asList("ts", "name"))),
- Collections.emptyList()
+ ColumnsFilter.all()
);
@BeforeClass
diff --git a/core/src/test/java/org/apache/druid/data/input/impl/InputEntityIteratingReaderTest.java b/core/src/test/java/org/apache/druid/data/input/impl/InputEntityIteratingReaderTest.java
index e202d152047a..37b35f149829 100644
--- a/core/src/test/java/org/apache/druid/data/input/impl/InputEntityIteratingReaderTest.java
+++ b/core/src/test/java/org/apache/druid/data/input/impl/InputEntityIteratingReaderTest.java
@@ -21,6 +21,7 @@
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Iterables;
+import org.apache.druid.data.input.ColumnsFilter;
import org.apache.druid.data.input.InputRow;
import org.apache.druid.data.input.InputRowSchema;
import org.apache.druid.java.util.common.DateTimes;
@@ -37,7 +38,6 @@
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.util.ArrayList;
-import java.util.Collections;
import java.util.List;
public class InputEntityIteratingReaderTest
@@ -64,7 +64,7 @@ public void test() throws IOException
new DimensionsSpec(
DimensionsSpec.getDefaultSchemas(ImmutableList.of("time", "name", "score"))
),
- Collections.emptyList()
+ ColumnsFilter.all()
),
new CsvInputFormat(
ImmutableList.of("time", "name", "score"),
diff --git a/core/src/test/java/org/apache/druid/data/input/impl/JsonReaderTest.java b/core/src/test/java/org/apache/druid/data/input/impl/JsonReaderTest.java
index a0c19557845f..ea40415678a8 100644
--- a/core/src/test/java/org/apache/druid/data/input/impl/JsonReaderTest.java
+++ b/core/src/test/java/org/apache/druid/data/input/impl/JsonReaderTest.java
@@ -21,6 +21,7 @@
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Iterables;
+import org.apache.druid.data.input.ColumnsFilter;
import org.apache.druid.data.input.InputEntityReader;
import org.apache.druid.data.input.InputRow;
import org.apache.druid.data.input.InputRowSchema;
@@ -66,7 +67,7 @@ public void testParseRow() throws IOException
new InputRowSchema(
new TimestampSpec("timestamp", "iso", null),
new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("bar", "foo"))),
- Collections.emptyList()
+ ColumnsFilter.all()
),
source,
null
@@ -116,7 +117,7 @@ public void testParseRowWithConditional() throws IOException
new InputRowSchema(
new TimestampSpec("timestamp", "iso", null),
new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("foo"))),
- Collections.emptyList()
+ ColumnsFilter.all()
),
source,
null
@@ -158,7 +159,7 @@ public void testParseRowKeepNullColumns() throws IOException
new InputRowSchema(
new TimestampSpec("timestamp", "iso", null),
new DimensionsSpec(DimensionsSpec.getDefaultSchemas(Collections.emptyList())),
- Collections.emptyList()
+ ColumnsFilter.all()
),
source,
null
@@ -200,7 +201,7 @@ public void testKeepNullColumnsWithNoNullValues() throws IOException
new InputRowSchema(
new TimestampSpec("timestamp", "iso", null),
new DimensionsSpec(DimensionsSpec.getDefaultSchemas(Collections.emptyList())),
- Collections.emptyList()
+ ColumnsFilter.all()
),
source,
null
@@ -242,7 +243,7 @@ public void testFalseKeepNullColumns() throws IOException
new InputRowSchema(
new TimestampSpec("timestamp", "iso", null),
new DimensionsSpec(DimensionsSpec.getDefaultSchemas(Collections.emptyList())),
- Collections.emptyList()
+ ColumnsFilter.all()
),
source,
null
diff --git a/docs/ingestion/native-batch.md b/docs/ingestion/native-batch.md
index 8538145844ce..2dfe96428174 100644
--- a/docs/ingestion/native-batch.md
+++ b/docs/ingestion/native-batch.md
@@ -1256,61 +1256,77 @@ no `inputFormat` field needs to be specified in the ingestion spec when using th
|type|This should be "druid".|yes|
|dataSource|A String defining the Druid datasource to fetch rows from|yes|
|interval|A String representing an ISO-8601 interval, which defines the time range to fetch the data over.|yes|
-|dimensions|A list of Strings containing the names of dimension columns to select from the Druid datasource. If the list is empty, no dimensions are returned. If null, all dimensions are returned. |no|
-|metrics|The list of Strings containing the names of metric columns to select. If the list is empty, no metrics are returned. If null, all metrics are returned.|no|
|filter| See [Filters](../querying/filters.md). Only rows that match the filter, if specified, will be returned.|no|
-A minimal example DruidInputSource spec is shown below:
+The Druid input source can be used for a variety of purposes, including:
-```json
-...
- "ioConfig": {
- "type": "index_parallel",
- "inputSource": {
- "type": "druid",
- "dataSource": "wikipedia",
- "interval": "2013-01-01/2013-01-02"
- }
- ...
- },
-...
-```
+- Creating new datasources that are rolled-up copies of existing datasources.
+- Changing the [partitioning or sorting](index.md#partitioning) of a datasource to improve performance.
+- Updating or removing rows using a [`transformSpec`](index.md#transformspec).
-The spec above will read all existing dimension and metric columns from
-the `wikipedia` datasource, including all rows with a timestamp (the `__time` column)
-within the interval `2013-01-01/2013-01-02`.
+When using the Druid input source, the timestamp column shows up as a numeric field named `__time` set to the number
+of milliseconds since the epoch (January 1, 1970 00:00:00 UTC). It is common to use this in the timestampSpec, if you
+want the output timestamp to be equivalent to the input timestamp. In this case, set the timestamp column to `__time`
+and the format to `auto` or `millis`.
-A spec that applies a filter and reads a subset of the original datasource's columns is shown below.
+It is OK for the input and output datasources to be the same. In this case, the reindexed data will overwrite the
+previous data. Generally, if you are going to do this, it is a good idea to test out your reindexing by writing to
+a separate datasource before overwriting your main one.
+
+An example task spec is shown below. It reads from a hypothetical raw datasource `wikipedia_raw` and creates a new
+rolled-up datasource `wikipedia_rollup` by grouping on hour, "countryName", and "page".
```json
-...
+{
+ "type": "index_parallel",
+ "spec": {
+ "dataSchema": {
+ "dataSource": "wikipedia_rollup",
+ "timestampSpec": {
+ "column": "__time",
+ "format": "millis"
+ },
+ "dimensionsSpec": {
+ "dimensions": [
+ "countryName",
+ "page"
+ ]
+ },
+ "metricsSpec": [
+ {
+ "type": "count",
+ "name": "cnt"
+ }
+ ],
+ "granularitySpec": {
+ "type": "uniform",
+ "queryGranularity": "HOUR",
+ "segmentGranularity": "DAY",
+ "intervals": ["2016-06-27/P1D"],
+ "rollup": true
+ }
+ },
"ioConfig": {
"type": "index_parallel",
"inputSource": {
"type": "druid",
- "dataSource": "wikipedia",
- "interval": "2013-01-01/2013-01-02",
- "dimensions": [
- "page",
- "user"
- ],
- "metrics": [
- "added"
- ],
- "filter": {
- "type": "selector",
- "dimension": "page",
- "value": "Druid"
- }
+ "dataSource": "wikipedia_raw",
+ "interval": "2016-06-27/P1D"
}
- ...
},
-...
+ "tuningConfig": {
+ "type": "index_parallel",
+ "partitionsSpec": {
+ "type": "hashed",
+ "numShards": 1
+ },
+ "forceGuaranteedRollup": true,
+ "maxNumConcurrentSubTasks": 1
+ }
+ }
+}
```
-This spec above will only return the `page`, `user` dimensions and `added` metric.
-Only rows where `page` = `Druid` will be returned.
-
### SQL Input Source
The SQL input source is used to read data directly from RDBMS.
diff --git a/extensions-contrib/aliyun-oss-extensions/src/test/java/org/apache/druid/data/input/aliyun/OssInputSourceTest.java b/extensions-contrib/aliyun-oss-extensions/src/test/java/org/apache/druid/data/input/aliyun/OssInputSourceTest.java
index 2bd9d5816acc..5b44b9d826c9 100644
--- a/extensions-contrib/aliyun-oss-extensions/src/test/java/org/apache/druid/data/input/aliyun/OssInputSourceTest.java
+++ b/extensions-contrib/aliyun-oss-extensions/src/test/java/org/apache/druid/data/input/aliyun/OssInputSourceTest.java
@@ -39,6 +39,7 @@
import com.google.inject.Guice;
import com.google.inject.Injector;
import com.google.inject.Provides;
+import org.apache.druid.data.input.ColumnsFilter;
import org.apache.druid.data.input.InputRow;
import org.apache.druid.data.input.InputRowSchema;
import org.apache.druid.data.input.InputSourceReader;
@@ -110,7 +111,8 @@ public class OssInputSourceTest extends InitializedNullHandlingTest
private static final OssClientConfig CLOUD_CONFIG_PROPERTIES = new OssClientConfig(
"test.oss-cn.aliyun.com",
new DefaultPasswordProvider("myKey"),
- new DefaultPasswordProvider("mySecret"));
+ new DefaultPasswordProvider("mySecret")
+ );
private static final List EXPECTED_LOCATION =
ImmutableList.of(new CloudObjectLocation("foo", "bar/file.csv"));
@@ -453,7 +455,7 @@ public void testReader() throws IOException
InputRowSchema someSchema = new InputRowSchema(
new TimestampSpec("time", "auto", null),
new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("dim1", "dim2"))),
- ImmutableList.of("count")
+ ColumnsFilter.all()
);
InputSourceReader reader = inputSource.reader(
@@ -496,7 +498,7 @@ public void testCompressedReader() throws IOException
InputRowSchema someSchema = new InputRowSchema(
new TimestampSpec("time", "auto", null),
new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("dim1", "dim2"))),
- ImmutableList.of("count")
+ ColumnsFilter.all()
);
InputSourceReader reader = inputSource.reader(
diff --git a/extensions-core/avro-extensions/src/test/java/org/apache/druid/data/input/avro/AvroOCFReaderTest.java b/extensions-core/avro-extensions/src/test/java/org/apache/druid/data/input/avro/AvroOCFReaderTest.java
index fe6070be6681..238dfef0651e 100644
--- a/extensions-core/avro-extensions/src/test/java/org/apache/druid/data/input/avro/AvroOCFReaderTest.java
+++ b/extensions-core/avro-extensions/src/test/java/org/apache/druid/data/input/avro/AvroOCFReaderTest.java
@@ -27,6 +27,7 @@
import org.apache.avro.generic.GenericRecord;
import org.apache.druid.data.input.AvroHadoopInputRowParserTest;
import org.apache.druid.data.input.AvroStreamInputRowParserTest;
+import org.apache.druid.data.input.ColumnsFilter;
import org.apache.druid.data.input.InputEntityReader;
import org.apache.druid.data.input.InputRow;
import org.apache.druid.data.input.InputRowSchema;
@@ -43,7 +44,6 @@
import java.io.File;
import java.io.IOException;
-import java.util.List;
import java.util.Map;
public class AvroOCFReaderTest
@@ -150,10 +150,9 @@ private InputEntityReader createReader(
final TimestampSpec timestampSpec = new TimestampSpec("timestamp", "auto", null);
final DimensionsSpec dimensionsSpec = new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of(
"eventType")));
- final List metricNames = ImmutableList.of("someLong");
final AvroOCFInputFormat inputFormat = new AvroOCFInputFormat(mapper, null, readerSchema, null);
- final InputRowSchema schema = new InputRowSchema(timestampSpec, dimensionsSpec, metricNames);
+ final InputRowSchema schema = new InputRowSchema(timestampSpec, dimensionsSpec, ColumnsFilter.all());
final FileEntity entity = new FileEntity(someAvroFile);
return inputFormat.createReader(schema, entity, temporaryFolder.newFolder());
}
diff --git a/extensions-core/google-extensions/src/test/java/org/apache/druid/data/input/google/GoogleCloudStorageInputSourceTest.java b/extensions-core/google-extensions/src/test/java/org/apache/druid/data/input/google/GoogleCloudStorageInputSourceTest.java
index 3888398fa9ea..1f4bea42e40d 100644
--- a/extensions-core/google-extensions/src/test/java/org/apache/druid/data/input/google/GoogleCloudStorageInputSourceTest.java
+++ b/extensions-core/google-extensions/src/test/java/org/apache/druid/data/input/google/GoogleCloudStorageInputSourceTest.java
@@ -31,6 +31,7 @@
import com.google.inject.Guice;
import com.google.inject.Injector;
import com.google.inject.Provides;
+import org.apache.druid.data.input.ColumnsFilter;
import org.apache.druid.data.input.InputRow;
import org.apache.druid.data.input.InputRowSchema;
import org.apache.druid.data.input.InputSourceReader;
@@ -225,7 +226,7 @@ public void testReader() throws IOException
InputRowSchema someSchema = new InputRowSchema(
new TimestampSpec("time", "auto", null),
new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("dim1", "dim2"))),
- ImmutableList.of("count")
+ ColumnsFilter.all()
);
InputSourceReader reader = inputSource.reader(
@@ -268,7 +269,7 @@ public void testCompressedReader() throws IOException
InputRowSchema someSchema = new InputRowSchema(
new TimestampSpec("time", "auto", null),
new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("dim1", "dim2"))),
- ImmutableList.of("count")
+ ColumnsFilter.all()
);
InputSourceReader reader = inputSource.reader(
diff --git a/extensions-core/hdfs-storage/src/test/java/org/apache/druid/inputsource/hdfs/HdfsInputSourceTest.java b/extensions-core/hdfs-storage/src/test/java/org/apache/druid/inputsource/hdfs/HdfsInputSourceTest.java
index 044930f838bf..cf4ee594fd1b 100644
--- a/extensions-core/hdfs-storage/src/test/java/org/apache/druid/inputsource/hdfs/HdfsInputSourceTest.java
+++ b/extensions-core/hdfs-storage/src/test/java/org/apache/druid/inputsource/hdfs/HdfsInputSourceTest.java
@@ -23,6 +23,7 @@
import com.fasterxml.jackson.databind.InjectableValues;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.collect.Iterables;
+import org.apache.druid.data.input.ColumnsFilter;
import org.apache.druid.data.input.InputFormat;
import org.apache.druid.data.input.InputRow;
import org.apache.druid.data.input.InputRowSchema;
@@ -74,7 +75,7 @@ public class HdfsInputSourceTest extends InitializedNullHandlingTest
private static final InputRowSchema INPUT_ROW_SCHEMA = new InputRowSchema(
new TimestampSpec(null, null, null),
DimensionsSpec.EMPTY,
- Collections.emptyList()
+ ColumnsFilter.all()
);
private static final InputFormat INPUT_FORMAT = new CsvInputFormat(
Arrays.asList(TimestampSpec.DEFAULT_COLUMN, COLUMN),
diff --git a/extensions-core/orc-extensions/src/test/java/org/apache/druid/data/input/orc/OrcReaderTest.java b/extensions-core/orc-extensions/src/test/java/org/apache/druid/data/input/orc/OrcReaderTest.java
index bef9b64728f4..9726c0e14671 100644
--- a/extensions-core/orc-extensions/src/test/java/org/apache/druid/data/input/orc/OrcReaderTest.java
+++ b/extensions-core/orc-extensions/src/test/java/org/apache/druid/data/input/orc/OrcReaderTest.java
@@ -21,6 +21,7 @@
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Iterables;
+import org.apache.druid.data.input.ColumnsFilter;
import org.apache.druid.data.input.InputEntityReader;
import org.apache.druid.data.input.InputFormat;
import org.apache.druid.data.input.InputRow;
@@ -259,7 +260,7 @@ private InputEntityReader createReader(
String dataFile
) throws IOException
{
- final InputRowSchema schema = new InputRowSchema(timestampSpec, dimensionsSpec, Collections.emptyList());
+ final InputRowSchema schema = new InputRowSchema(timestampSpec, dimensionsSpec, ColumnsFilter.all());
final FileEntity entity = new FileEntity(new File(dataFile));
return inputFormat.createReader(schema, entity, temporaryFolder.newFolder());
}
diff --git a/extensions-core/parquet-extensions/src/test/java/org/apache/druid/data/input/parquet/CompatParquetReaderTest.java b/extensions-core/parquet-extensions/src/test/java/org/apache/druid/data/input/parquet/CompatParquetReaderTest.java
index f8b586bec67d..60173212b53c 100644
--- a/extensions-core/parquet-extensions/src/test/java/org/apache/druid/data/input/parquet/CompatParquetReaderTest.java
+++ b/extensions-core/parquet-extensions/src/test/java/org/apache/druid/data/input/parquet/CompatParquetReaderTest.java
@@ -20,6 +20,7 @@
package org.apache.druid.data.input.parquet;
import com.google.common.collect.ImmutableList;
+import org.apache.druid.data.input.ColumnsFilter;
import org.apache.druid.data.input.InputEntityReader;
import org.apache.druid.data.input.InputRow;
import org.apache.druid.data.input.InputRowListPlusRawValues;
@@ -34,7 +35,6 @@
import org.junit.Test;
import java.io.IOException;
-import java.util.Collections;
import java.util.List;
/**
@@ -49,7 +49,7 @@ public void testBinaryAsString() throws IOException
InputRowSchema schema = new InputRowSchema(
new TimestampSpec("ts", "auto", null),
new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("field"))),
- ImmutableList.of()
+ ColumnsFilter.all()
);
InputEntityReader reader = createReader(
file,
@@ -114,7 +114,7 @@ public void testParquet1217() throws IOException
InputRowSchema schema = new InputRowSchema(
new TimestampSpec("timestamp", "auto", DateTimes.of("2018-09-01T00:00:00.000Z")),
new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of())),
- ImmutableList.of("metric1")
+ ColumnsFilter.all()
);
List flattenExpr = ImmutableList.of(
new JSONPathFieldSpec(JSONPathFieldType.ROOT, "col", "col"),
@@ -200,7 +200,7 @@ required group nestedIntsColumn (LIST) {
InputRowSchema schema = new InputRowSchema(
new TimestampSpec("timestamp", "auto", DateTimes.of("2018-09-01T00:00:00.000Z")),
new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of())),
- Collections.emptyList()
+ ColumnsFilter.all()
);
List flattenExpr = ImmutableList.of(
new JSONPathFieldSpec(JSONPathFieldType.PATH, "extractByLogicalMap", "$.intToStringColumn.1"),
@@ -315,7 +315,7 @@ public void testOldRepeatedInt() throws IOException
InputRowSchema schema = new InputRowSchema(
new TimestampSpec("timestamp", "auto", DateTimes.of("2018-09-01T00:00:00.000Z")),
new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("repeatedInt"))),
- Collections.emptyList()
+ ColumnsFilter.all()
);
List flattenExpr = ImmutableList.of(
new JSONPathFieldSpec(JSONPathFieldType.ROOT, "repeatedInt", "repeatedInt")
@@ -353,7 +353,7 @@ public void testReadNestedArrayStruct() throws IOException
InputRowSchema schema = new InputRowSchema(
new TimestampSpec("timestamp", "auto", DateTimes.of("2018-09-01T00:00:00.000Z")),
new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("i32_dec", "extracted1", "extracted2"))),
- Collections.emptyList()
+ ColumnsFilter.all()
);
List flattenExpr = ImmutableList.of(
new JSONPathFieldSpec(JSONPathFieldType.PATH, "extracted1", "$.myComplex[0].id"),
@@ -395,7 +395,7 @@ public void testProtoStructWithArray() throws IOException
InputRowSchema schema = new InputRowSchema(
new TimestampSpec("timestamp", "auto", DateTimes.of("2018-09-01T00:00:00.000Z")),
new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of())),
- Collections.emptyList()
+ ColumnsFilter.all()
);
List flattenExpr = ImmutableList.of(
new JSONPathFieldSpec(JSONPathFieldType.PATH, "extractedOptional", "$.optionalMessage.someId"),
diff --git a/extensions-core/parquet-extensions/src/test/java/org/apache/druid/data/input/parquet/DecimalParquetReaderTest.java b/extensions-core/parquet-extensions/src/test/java/org/apache/druid/data/input/parquet/DecimalParquetReaderTest.java
index 50b9fe2df27d..faa80e6d73f3 100644
--- a/extensions-core/parquet-extensions/src/test/java/org/apache/druid/data/input/parquet/DecimalParquetReaderTest.java
+++ b/extensions-core/parquet-extensions/src/test/java/org/apache/druid/data/input/parquet/DecimalParquetReaderTest.java
@@ -20,6 +20,7 @@
package org.apache.druid.data.input.parquet;
import com.google.common.collect.ImmutableList;
+import org.apache.druid.data.input.ColumnsFilter;
import org.apache.druid.data.input.InputEntityReader;
import org.apache.druid.data.input.InputRow;
import org.apache.druid.data.input.InputRowListPlusRawValues;
@@ -49,7 +50,7 @@ public void testReadParquetDecimalFixedLen() throws IOException
InputRowSchema schema = new InputRowSchema(
new TimestampSpec("timestamp", "auto", DateTimes.of("2018-09-01T00:00:00.000Z")),
new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("fixed_len_dec"))),
- ImmutableList.of("metric1")
+ ColumnsFilter.all()
);
List flattenExpr = ImmutableList.of(
new JSONPathFieldSpec(JSONPathFieldType.ROOT, "fixed_len_dec", "fixed_len_dec"),
@@ -86,7 +87,7 @@ public void testReadParquetDecimali32() throws IOException
InputRowSchema schema = new InputRowSchema(
new TimestampSpec("timestamp", "auto", DateTimes.of("2018-09-01T00:00:00.000Z")),
new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("i32_dec"))),
- ImmutableList.of("metric1")
+ ColumnsFilter.all()
);
List flattenExpr = ImmutableList.of(
new JSONPathFieldSpec(JSONPathFieldType.ROOT, "i32_dec", "i32_dec"),
@@ -123,7 +124,7 @@ public void testReadParquetDecimali64() throws IOException
InputRowSchema schema = new InputRowSchema(
new TimestampSpec("timestamp", "auto", DateTimes.of("2018-09-01T00:00:00.000Z")),
new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("i64_dec"))),
- ImmutableList.of("metric1")
+ ColumnsFilter.all()
);
List flattenExpr = ImmutableList.of(
new JSONPathFieldSpec(JSONPathFieldType.ROOT, "i32_dec", "i64_dec"),
diff --git a/extensions-core/parquet-extensions/src/test/java/org/apache/druid/data/input/parquet/FlattenSpecParquetReaderTest.java b/extensions-core/parquet-extensions/src/test/java/org/apache/druid/data/input/parquet/FlattenSpecParquetReaderTest.java
index 5be38dda494d..7ff430667fa5 100644
--- a/extensions-core/parquet-extensions/src/test/java/org/apache/druid/data/input/parquet/FlattenSpecParquetReaderTest.java
+++ b/extensions-core/parquet-extensions/src/test/java/org/apache/druid/data/input/parquet/FlattenSpecParquetReaderTest.java
@@ -20,6 +20,7 @@
package org.apache.druid.data.input.parquet;
import com.google.common.collect.ImmutableList;
+import org.apache.druid.data.input.ColumnsFilter;
import org.apache.druid.data.input.InputEntityReader;
import org.apache.druid.data.input.InputRow;
import org.apache.druid.data.input.InputRowListPlusRawValues;
@@ -33,7 +34,6 @@
import org.junit.Test;
import java.io.IOException;
-import java.util.Collections;
import java.util.List;
/**
@@ -69,7 +69,7 @@ public void testFlat1NoFlattenSpec() throws IOException
InputRowSchema schema = new InputRowSchema(
new TimestampSpec("timestamp", "auto", null),
new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("dim1", "dim2", "dim3", "listDim"))),
- ImmutableList.of("metric1", "metric2")
+ ColumnsFilter.all()
);
JSONPathSpec flattenSpec = new JSONPathSpec(false, ImmutableList.of());
InputEntityReader reader = createReader(
@@ -103,7 +103,7 @@ public void testFlat1Autodiscover() throws IOException
InputRowSchema schema = new InputRowSchema(
new TimestampSpec("timestamp", "auto", null),
new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of())),
- ImmutableList.of("metric1", "metric2")
+ ColumnsFilter.all()
);
InputEntityReader reader = createReader(
file,
@@ -136,7 +136,7 @@ public void testFlat1Flatten() throws IOException
InputRowSchema schema = new InputRowSchema(
new TimestampSpec("timestamp", "auto", null),
new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("dim1", "dim2", "dim3", "list"))),
- ImmutableList.of("metric1", "metric2")
+ ColumnsFilter.all()
);
List flattenExpr = ImmutableList.of(
new JSONPathFieldSpec(JSONPathFieldType.ROOT, "timestamp", null),
@@ -177,7 +177,7 @@ public void testFlat1FlattenSelectListItem() throws IOException
InputRowSchema schema = new InputRowSchema(
new TimestampSpec("timestamp", "auto", null),
new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("dim1", "dim2", "listExtracted"))),
- ImmutableList.of("metric1", "metric2")
+ ColumnsFilter.all()
);
List flattenExpr = ImmutableList.of(
new JSONPathFieldSpec(JSONPathFieldType.ROOT, "timestamp", null),
@@ -217,7 +217,7 @@ public void testNested1NoFlattenSpec() throws IOException
InputRowSchema schema = new InputRowSchema(
new TimestampSpec("timestamp", "auto", null),
new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("dim1"))),
- ImmutableList.of("metric1")
+ ColumnsFilter.all()
);
JSONPathSpec flattenSpec = new JSONPathSpec(false, ImmutableList.of());
InputEntityReader reader = createReader(
@@ -253,7 +253,7 @@ public void testNested1Autodiscover() throws IOException
InputRowSchema schema = new InputRowSchema(
new TimestampSpec("timestamp", "auto", null),
new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of())),
- ImmutableList.of("metric1", "metric2")
+ ColumnsFilter.all()
);
InputEntityReader reader = createReader(
file,
@@ -286,7 +286,7 @@ public void testNested1Flatten() throws IOException
InputRowSchema schema = new InputRowSchema(
new TimestampSpec("timestamp", "auto", null),
new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of())),
- ImmutableList.of("metric1", "metric2")
+ ColumnsFilter.all()
);
List flattenExpr = ImmutableList.of(
new JSONPathFieldSpec(JSONPathFieldType.ROOT, "timestamp", null),
@@ -329,7 +329,7 @@ public void testNested1FlattenSelectListItem() throws IOException
InputRowSchema schema = new InputRowSchema(
new TimestampSpec("timestamp", "auto", null),
new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of())),
- Collections.emptyList()
+ ColumnsFilter.all()
);
List flattenExpr = ImmutableList.of(
new JSONPathFieldSpec(JSONPathFieldType.ROOT, "timestamp", null),
diff --git a/extensions-core/parquet-extensions/src/test/java/org/apache/druid/data/input/parquet/ParquetReaderResourceLeakTest.java b/extensions-core/parquet-extensions/src/test/java/org/apache/druid/data/input/parquet/ParquetReaderResourceLeakTest.java
index 251fa344bb73..f8e56b3f2deb 100644
--- a/extensions-core/parquet-extensions/src/test/java/org/apache/druid/data/input/parquet/ParquetReaderResourceLeakTest.java
+++ b/extensions-core/parquet-extensions/src/test/java/org/apache/druid/data/input/parquet/ParquetReaderResourceLeakTest.java
@@ -20,6 +20,7 @@
package org.apache.druid.data.input.parquet;
import com.google.common.collect.ImmutableList;
+import org.apache.druid.data.input.ColumnsFilter;
import org.apache.druid.data.input.InputEntityReader;
import org.apache.druid.data.input.InputRow;
import org.apache.druid.data.input.InputRowSchema;
@@ -39,7 +40,6 @@
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
-import java.util.Collections;
import java.util.Objects;
public class ParquetReaderResourceLeakTest extends BaseParquetReaderTest
@@ -55,7 +55,7 @@ public void testFetchOnReadCleanupAfterExhaustingIterator() throws IOException
new DimensionsSpec(
DimensionsSpec.getDefaultSchemas(ImmutableList.of("page", "language", "user", "unpatrolled"))
),
- Collections.emptyList()
+ ColumnsFilter.all()
);
FetchingFileEntity entity = new FetchingFileEntity(new File("example/wiki/wiki.parquet"));
ParquetInputFormat parquet = new ParquetInputFormat(JSONPathSpec.DEFAULT, false, new Configuration());
diff --git a/extensions-core/parquet-extensions/src/test/java/org/apache/druid/data/input/parquet/TimestampsParquetReaderTest.java b/extensions-core/parquet-extensions/src/test/java/org/apache/druid/data/input/parquet/TimestampsParquetReaderTest.java
index 19f1544dcff0..c0189fe8bc19 100644
--- a/extensions-core/parquet-extensions/src/test/java/org/apache/druid/data/input/parquet/TimestampsParquetReaderTest.java
+++ b/extensions-core/parquet-extensions/src/test/java/org/apache/druid/data/input/parquet/TimestampsParquetReaderTest.java
@@ -20,6 +20,7 @@
package org.apache.druid.data.input.parquet;
import com.google.common.collect.ImmutableList;
+import org.apache.druid.data.input.ColumnsFilter;
import org.apache.druid.data.input.InputEntityReader;
import org.apache.druid.data.input.InputRow;
import org.apache.druid.data.input.InputRowListPlusRawValues;
@@ -31,7 +32,6 @@
import org.junit.Test;
import java.io.IOException;
-import java.util.Collections;
import java.util.List;
/**
@@ -46,12 +46,12 @@ public void testDateHandling() throws IOException
InputRowSchema schemaAsString = new InputRowSchema(
new TimestampSpec("date_as_string", "Y-M-d", null),
new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of())),
- Collections.emptyList()
+ ColumnsFilter.all()
);
InputRowSchema schemaAsDate = new InputRowSchema(
new TimestampSpec("date_as_date", null, null),
new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of())),
- Collections.emptyList()
+ ColumnsFilter.all()
);
InputEntityReader readerAsString = createReader(
file,
@@ -104,7 +104,7 @@ public void testParseInt96Timestamp() throws IOException
InputRowSchema schema = new InputRowSchema(
new TimestampSpec("ts", "auto", null),
new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of())),
- Collections.emptyList()
+ ColumnsFilter.all()
);
InputEntityReader reader = createReader(file, schema, JSONPathSpec.DEFAULT);
@@ -130,7 +130,7 @@ public void testTimeMillisInInt64() throws IOException
InputRowSchema schema = new InputRowSchema(
new TimestampSpec("time", "auto", null),
new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of())),
- Collections.emptyList()
+ ColumnsFilter.all()
);
InputEntityReader reader = createReader(
file,
diff --git a/extensions-core/parquet-extensions/src/test/java/org/apache/druid/data/input/parquet/WikiParquetReaderTest.java b/extensions-core/parquet-extensions/src/test/java/org/apache/druid/data/input/parquet/WikiParquetReaderTest.java
index 75e5e916ec78..4bc7bac27b2e 100644
--- a/extensions-core/parquet-extensions/src/test/java/org/apache/druid/data/input/parquet/WikiParquetReaderTest.java
+++ b/extensions-core/parquet-extensions/src/test/java/org/apache/druid/data/input/parquet/WikiParquetReaderTest.java
@@ -20,6 +20,7 @@
package org.apache.druid.data.input.parquet;
import com.google.common.collect.ImmutableList;
+import org.apache.druid.data.input.ColumnsFilter;
import org.apache.druid.data.input.InputEntityReader;
import org.apache.druid.data.input.InputRow;
import org.apache.druid.data.input.InputRowListPlusRawValues;
@@ -31,7 +32,6 @@
import org.junit.Test;
import java.io.IOException;
-import java.util.Collections;
import java.util.List;
/**
@@ -45,7 +45,7 @@ public void testWiki() throws IOException
InputRowSchema schema = new InputRowSchema(
new TimestampSpec("timestamp", "iso", null),
new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("page", "language", "user", "unpatrolled"))),
- Collections.emptyList()
+ ColumnsFilter.all()
);
InputEntityReader reader = createReader("example/wiki/wiki.parquet", schema, JSONPathSpec.DEFAULT);
diff --git a/extensions-core/s3-extensions/src/test/java/org/apache/druid/data/input/s3/S3InputSourceTest.java b/extensions-core/s3-extensions/src/test/java/org/apache/druid/data/input/s3/S3InputSourceTest.java
index 8dd82a97ec9d..0bc23f6d3a5c 100644
--- a/extensions-core/s3-extensions/src/test/java/org/apache/druid/data/input/s3/S3InputSourceTest.java
+++ b/extensions-core/s3-extensions/src/test/java/org/apache/druid/data/input/s3/S3InputSourceTest.java
@@ -40,6 +40,7 @@
import com.google.inject.Guice;
import com.google.inject.Injector;
import com.google.inject.Provides;
+import org.apache.druid.data.input.ColumnsFilter;
import org.apache.druid.data.input.InputRow;
import org.apache.druid.data.input.InputRowSchema;
import org.apache.druid.data.input.InputSourceReader;
@@ -508,7 +509,7 @@ public void testReader() throws IOException
InputRowSchema someSchema = new InputRowSchema(
new TimestampSpec("time", "auto", null),
new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("dim1", "dim2"))),
- ImmutableList.of("count")
+ ColumnsFilter.all()
);
InputSourceReader reader = inputSource.reader(
@@ -552,7 +553,7 @@ public void testCompressedReader() throws IOException
InputRowSchema someSchema = new InputRowSchema(
new TimestampSpec("time", "auto", null),
new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("dim1", "dim2"))),
- ImmutableList.of("count")
+ ColumnsFilter.all()
);
InputSourceReader reader = inputSource.reader(
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/ReingestionTimelineUtils.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/ReingestionTimelineUtils.java
index bd9d21457eb6..b1a2fb5ddd94 100644
--- a/indexing-service/src/main/java/org/apache/druid/indexing/common/ReingestionTimelineUtils.java
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/ReingestionTimelineUtils.java
@@ -34,6 +34,9 @@
import java.util.stream.Collectors;
import java.util.stream.IntStream;
+/**
+ * @deprecated only used by {@link org.apache.druid.indexing.firehose.IngestSegmentFirehoseFactory}
+ */
public class ReingestionTimelineUtils
{
/**
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/CompactionTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/CompactionTask.java
index 5cfaf32c92aa..10b7e1599ec5 100644
--- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/CompactionTask.java
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/CompactionTask.java
@@ -620,8 +620,8 @@ private static ParallelIndexIOConfig createIoConfig(
interval,
null,
null,
- dataSchema.getDimensionsSpec().getDimensionNames(),
- Arrays.stream(dataSchema.getAggregators()).map(AggregatorFactory::getName).collect(Collectors.toList()),
+ null,
+ null,
toolbox.getIndexIO(),
coordinatorClient,
segmentLoaderFactory,
@@ -692,7 +692,7 @@ private static DataSchema createDataSchema(
return new DataSchema(
dataSource,
- new TimestampSpec(null, null, null),
+ new TimestampSpec(ColumnHolder.TIME_COLUMN_NAME, "millis", null),
finalDimensionsSpec,
finalMetricsSpec,
granularitySpec,
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/IndexTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/IndexTask.java
index 8367ead3fa85..b636e83c8a5f 100644
--- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/IndexTask.java
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/IndexTask.java
@@ -39,7 +39,6 @@
import org.apache.druid.data.input.FirehoseFactoryToInputSourceAdaptor;
import org.apache.druid.data.input.InputFormat;
import org.apache.druid.data.input.InputRow;
-import org.apache.druid.data.input.InputRowSchema;
import org.apache.druid.data.input.InputSource;
import org.apache.druid.data.input.InputSourceReader;
import org.apache.druid.data.input.Rows;
@@ -69,6 +68,7 @@
import org.apache.druid.indexing.common.task.batch.partition.HashPartitionAnalysis;
import org.apache.druid.indexing.common.task.batch.partition.LinearPartitionAnalysis;
import org.apache.druid.indexing.common.task.batch.partition.PartitionAnalysis;
+import org.apache.druid.indexing.input.InputRowSchemas;
import org.apache.druid.indexing.overlord.sampler.InputSourceSampler;
import org.apache.druid.java.util.common.IAE;
import org.apache.druid.java.util.common.ISE;
@@ -81,7 +81,6 @@
import org.apache.druid.java.util.common.logger.Logger;
import org.apache.druid.java.util.common.parsers.CloseableIterator;
import org.apache.druid.java.util.common.parsers.ParseException;
-import org.apache.druid.query.aggregation.AggregatorFactory;
import org.apache.druid.segment.IndexSpec;
import org.apache.druid.segment.indexing.BatchIOConfig;
import org.apache.druid.segment.indexing.DataSchema;
@@ -124,7 +123,6 @@
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
-import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
@@ -136,7 +134,6 @@
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
-import java.util.stream.Collectors;
public class IndexTask extends AbstractBatchIndexTask implements ChatHandler
{
@@ -740,16 +737,9 @@ private Map> collectIntervalsAndShardSp
Comparators.intervalsByStartThenEnd()
);
final Granularity queryGranularity = granularitySpec.getQueryGranularity();
- final List metricsNames = Arrays.stream(ingestionSchema.getDataSchema().getAggregators())
- .map(AggregatorFactory::getName)
- .collect(Collectors.toList());
final InputSourceReader inputSourceReader = ingestionSchema.getDataSchema().getTransformSpec().decorate(
inputSource.reader(
- new InputRowSchema(
- ingestionSchema.getDataSchema().getTimestampSpec(),
- ingestionSchema.getDataSchema().getDimensionsSpec(),
- metricsNames
- ),
+ InputRowSchemas.fromDataSchema(ingestionSchema.getDataSchema()),
inputSource.needsFormat() ? getInputFormat(ingestionSchema) : null,
tmpDir
)
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/InputSourceProcessor.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/InputSourceProcessor.java
index e88dab25f522..76ac510d6041 100644
--- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/InputSourceProcessor.java
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/InputSourceProcessor.java
@@ -23,18 +23,17 @@
import org.apache.druid.data.input.HandlingInputRowIterator;
import org.apache.druid.data.input.InputFormat;
import org.apache.druid.data.input.InputRow;
-import org.apache.druid.data.input.InputRowSchema;
import org.apache.druid.data.input.InputSource;
import org.apache.druid.data.input.InputSourceReader;
import org.apache.druid.indexer.partitions.DynamicPartitionsSpec;
import org.apache.druid.indexer.partitions.PartitionsSpec;
import org.apache.druid.indexing.common.stats.RowIngestionMeters;
import org.apache.druid.indexing.common.task.batch.parallel.iterator.IndexTaskInputRowIteratorBuilder;
+import org.apache.druid.indexing.input.InputRowSchemas;
import org.apache.druid.java.util.common.ISE;
import org.apache.druid.java.util.common.logger.Logger;
import org.apache.druid.java.util.common.parsers.CloseableIterator;
import org.apache.druid.java.util.common.parsers.ParseException;
-import org.apache.druid.query.aggregation.AggregatorFactory;
import org.apache.druid.segment.indexing.DataSchema;
import org.apache.druid.segment.indexing.granularity.GranularitySpec;
import org.apache.druid.segment.realtime.appenderator.AppenderatorDriverAddResult;
@@ -46,11 +45,8 @@
import javax.annotation.Nullable;
import java.io.File;
import java.io.IOException;
-import java.util.Arrays;
-import java.util.List;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeoutException;
-import java.util.stream.Collectors;
public class InputSourceProcessor
{
@@ -104,16 +100,9 @@ public SegmentsAndCommitMetadata process(
: null;
final GranularitySpec granularitySpec = dataSchema.getGranularitySpec();
- final List metricsNames = Arrays.stream(dataSchema.getAggregators())
- .map(AggregatorFactory::getName)
- .collect(Collectors.toList());
final InputSourceReader inputSourceReader = dataSchema.getTransformSpec().decorate(
inputSource.reader(
- new InputRowSchema(
- dataSchema.getTimestampSpec(),
- dataSchema.getDimensionsSpec(),
- metricsNames
- ),
+ InputRowSchemas.fromDataSchema(dataSchema),
inputFormat,
tmpDir
)
@@ -188,9 +177,9 @@ private void handleParseException(ParseException e)
buildSegmentsMeters.incrementUnparseable();
}
- if (logParseExceptions) {
+// if (logParseExceptions) {
LOG.error(e, "Encountered parse exception");
- }
+// }
if (buildSegmentsSavedParseExceptions != null) {
buildSegmentsSavedParseExceptions.add(e);
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialDimensionDistributionTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialDimensionDistributionTask.java
index 55dbfb4bf689..46e875f0894f 100644
--- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialDimensionDistributionTask.java
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialDimensionDistributionTask.java
@@ -31,7 +31,6 @@
import org.apache.druid.data.input.HandlingInputRowIterator;
import org.apache.druid.data.input.InputFormat;
import org.apache.druid.data.input.InputRow;
-import org.apache.druid.data.input.InputRowSchema;
import org.apache.druid.data.input.InputSource;
import org.apache.druid.data.input.InputSourceReader;
import org.apache.druid.data.input.Rows;
@@ -46,23 +45,21 @@
import org.apache.druid.indexing.common.task.batch.parallel.distribution.StringSketch;
import org.apache.druid.indexing.common.task.batch.parallel.iterator.IndexTaskInputRowIteratorBuilder;
import org.apache.druid.indexing.common.task.batch.parallel.iterator.RangePartitionIndexTaskInputRowIteratorBuilder;
+import org.apache.druid.indexing.input.InputRowSchemas;
import org.apache.druid.java.util.common.granularity.Granularity;
import org.apache.druid.java.util.common.logger.Logger;
import org.apache.druid.java.util.common.parsers.CloseableIterator;
import org.apache.druid.java.util.common.parsers.ParseException;
-import org.apache.druid.query.aggregation.AggregatorFactory;
import org.apache.druid.segment.indexing.DataSchema;
import org.apache.druid.segment.indexing.granularity.GranularitySpec;
import org.joda.time.DateTime;
import org.joda.time.Interval;
import javax.annotation.Nullable;
-import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.function.Supplier;
-import java.util.stream.Collectors;
/**
* The worker task of {@link PartialDimensionDistributionParallelIndexTaskRunner}. This task
@@ -116,7 +113,8 @@ public class PartialDimensionDistributionTask extends PerfectRollupWorkerTask
);
}
- @VisibleForTesting // Only for testing
+ @VisibleForTesting
+ // Only for testing
PartialDimensionDistributionTask(
@Nullable String id,
final String groupId,
@@ -202,19 +200,12 @@ public TaskStatus runTask(TaskToolbox toolbox) throws Exception
InputSource inputSource = ingestionSchema.getIOConfig().getNonNullInputSource(
ingestionSchema.getDataSchema().getParser()
);
- List metricsNames = Arrays.stream(dataSchema.getAggregators())
- .map(AggregatorFactory::getName)
- .collect(Collectors.toList());
InputFormat inputFormat = inputSource.needsFormat()
? ParallelIndexSupervisorTask.getInputFormat(ingestionSchema)
: null;
InputSourceReader inputSourceReader = dataSchema.getTransformSpec().decorate(
inputSource.reader(
- new InputRowSchema(
- dataSchema.getTimestampSpec(),
- dataSchema.getDimensionsSpec(),
- metricsNames
- ),
+ InputRowSchemas.fromDataSchema(dataSchema),
inputFormat,
toolbox.getIndexingTmpDir()
)
@@ -357,7 +348,8 @@ static class DedupInputRowFilter implements InputRowFilter
this(queryGranularity, BLOOM_FILTER_EXPECTED_INSERTIONS, BLOOM_FILTER_EXPECTED_FALSE_POSITIVE_PROBABILTY);
}
- @VisibleForTesting // to allow controlling false positive rate of bloom filter
+ @VisibleForTesting
+ // to allow controlling false positive rate of bloom filter
DedupInputRowFilter(
Granularity queryGranularity,
int bloomFilterExpectedInsertions,
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/SinglePhaseSubTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/SinglePhaseSubTask.java
index 8f6131e92288..97a561f806d4 100644
--- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/SinglePhaseSubTask.java
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/SinglePhaseSubTask.java
@@ -26,7 +26,6 @@
import com.google.common.collect.FluentIterable;
import org.apache.druid.client.indexing.IndexingServiceClient;
import org.apache.druid.data.input.InputRow;
-import org.apache.druid.data.input.InputRowSchema;
import org.apache.druid.data.input.InputSource;
import org.apache.druid.data.input.InputSourceReader;
import org.apache.druid.indexer.TaskStatus;
@@ -41,6 +40,7 @@
import org.apache.druid.indexing.common.task.SegmentAllocators;
import org.apache.druid.indexing.common.task.TaskResource;
import org.apache.druid.indexing.common.task.Tasks;
+import org.apache.druid.indexing.input.InputRowSchemas;
import org.apache.druid.java.util.common.ISE;
import org.apache.druid.java.util.common.Intervals;
import org.apache.druid.java.util.common.StringUtils;
@@ -49,7 +49,6 @@
import org.apache.druid.java.util.common.parsers.CloseableIterator;
import org.apache.druid.java.util.common.parsers.ParseException;
import org.apache.druid.query.DruidMetrics;
-import org.apache.druid.query.aggregation.AggregatorFactory;
import org.apache.druid.segment.indexing.DataSchema;
import org.apache.druid.segment.indexing.RealtimeIOConfig;
import org.apache.druid.segment.indexing.granularity.ArbitraryGranularitySpec;
@@ -73,7 +72,6 @@
import javax.annotation.Nullable;
import java.io.File;
import java.io.IOException;
-import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
@@ -81,7 +79,6 @@
import java.util.Set;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeoutException;
-import java.util.stream.Collectors;
/**
* The worker task of {@link SinglePhaseParallelIndexTaskRunner}. Similar to {@link IndexTask}, but this task
@@ -326,16 +323,9 @@ private Set generateAndPushSegments(
tuningConfig,
getContextValue(Tasks.STORE_COMPACTION_STATE_KEY, Tasks.DEFAULT_STORE_COMPACTION_STATE)
);
- final List metricsNames = Arrays.stream(ingestionSchema.getDataSchema().getAggregators())
- .map(AggregatorFactory::getName)
- .collect(Collectors.toList());
final InputSourceReader inputSourceReader = dataSchema.getTransformSpec().decorate(
inputSource.reader(
- new InputRowSchema(
- ingestionSchema.getDataSchema().getTimestampSpec(),
- ingestionSchema.getDataSchema().getDimensionsSpec(),
- metricsNames
- ),
+ InputRowSchemas.fromDataSchema(ingestionSchema.getDataSchema()),
inputSource.needsFormat() ? ParallelIndexSupervisorTask.getInputFormat(ingestionSchema) : null,
tmpDir
)
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/firehose/IngestSegmentFirehoseFactory.java b/indexing-service/src/main/java/org/apache/druid/indexing/firehose/IngestSegmentFirehoseFactory.java
index 6248828d32b4..1defe67cf14c 100644
--- a/indexing-service/src/main/java/org/apache/druid/indexing/firehose/IngestSegmentFirehoseFactory.java
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/firehose/IngestSegmentFirehoseFactory.java
@@ -61,6 +61,9 @@
import java.util.Map;
import java.util.stream.Stream;
+/**
+ * @deprecated use {@link DruidInputSource} instead
+ */
public class IngestSegmentFirehoseFactory implements FiniteFirehoseFactory>
{
private static final EmittingLogger log = new EmittingLogger(IngestSegmentFirehoseFactory.class);
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/input/DruidInputSource.java b/indexing-service/src/main/java/org/apache/druid/indexing/input/DruidInputSource.java
index b9cc5759cd3b..6ea25dca8633 100644
--- a/indexing-service/src/main/java/org/apache/druid/indexing/input/DruidInputSource.java
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/input/DruidInputSource.java
@@ -39,7 +39,6 @@
import org.apache.druid.data.input.SplitHintSpec;
import org.apache.druid.data.input.impl.InputEntityIteratingReader;
import org.apache.druid.data.input.impl.SplittableInputSource;
-import org.apache.druid.indexing.common.ReingestionTimelineUtils;
import org.apache.druid.indexing.common.RetryPolicy;
import org.apache.druid.indexing.common.RetryPolicyFactory;
import org.apache.druid.indexing.common.SegmentLoaderFactory;
@@ -74,6 +73,11 @@
import java.util.concurrent.ThreadLocalRandom;
import java.util.stream.Stream;
+/**
+ * An {@link org.apache.druid.data.input.InputSource} that allows reading from Druid segments.
+ *
+ * Used internally by {@link org.apache.druid.indexing.common.task.CompactionTask}, and can also be used directly.
+ */
public class DruidInputSource extends AbstractInputSource implements SplittableInputSource>
{
private static final Logger LOG = new Logger(DruidInputSource.class);
@@ -87,13 +91,21 @@ public class DruidInputSource extends AbstractInputSource implements SplittableI
@Nullable
private final List segmentIds;
private final DimFilter dimFilter;
- private final List dimensions;
- private final List metrics;
private final IndexIO indexIO;
private final CoordinatorClient coordinatorClient;
private final SegmentLoaderFactory segmentLoaderFactory;
private final RetryPolicyFactory retryPolicyFactory;
+ /**
+ * Included for serde backwards-compatibility only. Not used.
+ */
+ private final List dimensions;
+
+ /**
+ * Included for serde backwards-compatibility only. Not used.
+ */
+ private final List metrics;
+
@JsonCreator
public DruidInputSource(
@JsonProperty("dataSource") final String dataSource,
@@ -134,6 +146,7 @@ public String getDataSource()
@Nullable
@JsonProperty
+ @JsonInclude(Include.NON_NULL)
public Interval getInterval()
{
return interval;
@@ -148,18 +161,27 @@ public List getSegmentIds()
}
@JsonProperty("filter")
+ @JsonInclude(Include.NON_NULL)
public DimFilter getDimFilter()
{
return dimFilter;
}
+ /**
+ * Included for serde backwards-compatibility only. Not used.
+ */
@JsonProperty
+ @JsonInclude(Include.NON_NULL)
public List getDimensions()
{
return dimensions;
}
+ /**
+ * Included for serde backwards-compatibility only. Not used.
+ */
@JsonProperty
+ @JsonInclude(Include.NON_NULL)
public List getMetrics()
{
return metrics;
@@ -181,25 +203,8 @@ protected InputSourceReader fixedFormatReader(InputRowSchema inputRowSchema, @Nu
.from(partitionHolder)
.transform(chunk -> new DruidSegmentInputEntity(segmentLoader, chunk.getObject(), holder.getInterval()));
}).iterator();
- final List effectiveDimensions = ReingestionTimelineUtils.getDimensionsToReingest(
- dimensions,
- inputRowSchema.getDimensionsSpec(),
- timeline
- );
-
- List effectiveMetrics;
- if (metrics == null) {
- effectiveMetrics = ReingestionTimelineUtils.getUniqueMetrics(timeline);
- } else {
- effectiveMetrics = metrics;
- }
- final DruidSegmentInputFormat inputFormat = new DruidSegmentInputFormat(
- indexIO,
- dimFilter,
- effectiveDimensions,
- effectiveMetrics
- );
+ final DruidSegmentInputFormat inputFormat = new DruidSegmentInputFormat(indexIO, dimFilter);
return new InputEntityIteratingReader(
inputRowSchema,
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/input/DruidSegmentInputFormat.java b/indexing-service/src/main/java/org/apache/druid/indexing/input/DruidSegmentInputFormat.java
index 80f87721357c..4d028596ff08 100644
--- a/indexing-service/src/main/java/org/apache/druid/indexing/input/DruidSegmentInputFormat.java
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/input/DruidSegmentInputFormat.java
@@ -27,26 +27,19 @@
import org.apache.druid.segment.IndexIO;
import java.io.File;
-import java.util.List;
public class DruidSegmentInputFormat implements InputFormat
{
private final IndexIO indexIO;
private final DimFilter dimFilter;
- private List dimensions;
- private List metrics;
- DruidSegmentInputFormat(
+ public DruidSegmentInputFormat(
IndexIO indexIO,
- DimFilter dimFilter,
- List dimensions,
- List metrics
+ DimFilter dimFilter
)
{
this.indexIO = indexIO;
this.dimFilter = dimFilter;
- this.dimensions = dimensions;
- this.metrics = metrics;
}
@Override
@@ -65,8 +58,9 @@ public InputEntityReader createReader(
return new DruidSegmentReader(
source,
indexIO,
- dimensions,
- metrics,
+ inputRowSchema.getTimestampSpec(),
+ inputRowSchema.getDimensionsSpec(),
+ inputRowSchema.getColumnsFilter(),
dimFilter,
temporaryDirectory
);
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/input/DruidSegmentReader.java b/indexing-service/src/main/java/org/apache/druid/indexing/input/DruidSegmentReader.java
index 6460ae43d55d..3eb57b30597d 100644
--- a/indexing-service/src/main/java/org/apache/druid/indexing/input/DruidSegmentReader.java
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/input/DruidSegmentReader.java
@@ -21,12 +21,17 @@
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
+import com.google.common.base.Supplier;
+import com.google.common.collect.Iterables;
+import com.google.common.collect.Sets;
+import org.apache.druid.data.input.ColumnsFilter;
import org.apache.druid.data.input.InputEntity;
import org.apache.druid.data.input.InputEntity.CleanableFile;
import org.apache.druid.data.input.InputRow;
import org.apache.druid.data.input.IntermediateRowParsingReader;
-import org.apache.druid.data.input.MapBasedInputRow;
-import org.apache.druid.java.util.common.DateTimes;
+import org.apache.druid.data.input.impl.DimensionsSpec;
+import org.apache.druid.data.input.impl.MapInputRowParser;
+import org.apache.druid.data.input.impl.TimestampSpec;
import org.apache.druid.java.util.common.granularity.Granularities;
import org.apache.druid.java.util.common.guava.Sequence;
import org.apache.druid.java.util.common.guava.Sequences;
@@ -35,56 +40,62 @@
import org.apache.druid.java.util.common.io.Closer;
import org.apache.druid.java.util.common.parsers.CloseableIterator;
import org.apache.druid.java.util.common.parsers.ParseException;
-import org.apache.druid.query.dimension.DefaultDimensionSpec;
import org.apache.druid.query.filter.DimFilter;
+import org.apache.druid.segment.BaseDoubleColumnValueSelector;
+import org.apache.druid.segment.BaseFloatColumnValueSelector;
import org.apache.druid.segment.BaseLongColumnValueSelector;
import org.apache.druid.segment.BaseObjectColumnValueSelector;
+import org.apache.druid.segment.ColumnProcessorFactory;
+import org.apache.druid.segment.ColumnProcessors;
import org.apache.druid.segment.Cursor;
import org.apache.druid.segment.DimensionSelector;
import org.apache.druid.segment.IndexIO;
import org.apache.druid.segment.QueryableIndexStorageAdapter;
import org.apache.druid.segment.VirtualColumns;
import org.apache.druid.segment.column.ColumnHolder;
+import org.apache.druid.segment.column.ValueType;
import org.apache.druid.segment.data.IndexedInts;
import org.apache.druid.segment.filter.Filters;
import org.apache.druid.segment.realtime.firehose.WindowedStorageAdapter;
import org.apache.druid.utils.CollectionUtils;
-import org.joda.time.DateTime;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
-import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.NoSuchElementException;
+import java.util.Set;
public class DruidSegmentReader extends IntermediateRowParsingReader
),
},
- {
- name: 'inputSource.dimensions',
- label: 'Dimensions',
- type: 'string-array',
- placeholder: '(optional)',
- info: (
-
- The list of dimensions to select. If left empty, no dimensions are returned. If left
- null or not defined, all dimensions are returned.
-
- ),
- },
- {
- name: 'inputSource.metrics',
- label: 'Metrics',
- type: 'string-array',
- placeholder: '(optional)',
- info: (
-
- The list of metrics to select. If left empty, no metrics are returned. If left null or
- not defined, all metrics are selected.
-
- ),
- },
{
name: 'inputSource.filter',
label: 'Filter',
diff --git a/web-console/src/views/load-data-view/load-data-view.tsx b/web-console/src/views/load-data-view/load-data-view.tsx
index be13c22dfbfd..21ae13d43d07 100644
--- a/web-console/src/views/load-data-view/load-data-view.tsx
+++ b/web-console/src/views/load-data-view/load-data-view.tsx
@@ -1127,7 +1127,7 @@ export class LoadDataView extends React.PureComponent
Date: Wed, 12 Aug 2020 00:30:56 -0700
Subject: [PATCH 02/24] Various fixups.
---
.../druid/data/input/ColumnsFilter.java | 2 +-
docs/ingestion/native-batch.md | 6 ++--
.../common/ReingestionTimelineUtils.java | 1 +
.../IngestSegmentFirehoseFactory.java | 1 +
...arallel_druid_input_source_index_task.json | 3 +-
...pedia_reindex_druid_input_source_task.json | 2 +-
...uid_input_source_task_with_transforms.json | 2 +-
.../segment/indexing/TransformSpecTest.java | 31 ++++++++++++++++++-
website/.spelling | 1 +
9 files changed, 41 insertions(+), 8 deletions(-)
diff --git a/core/src/main/java/org/apache/druid/data/input/ColumnsFilter.java b/core/src/main/java/org/apache/druid/data/input/ColumnsFilter.java
index f391e7e41c40..8506ffb735f9 100644
--- a/core/src/main/java/org/apache/druid/data/input/ColumnsFilter.java
+++ b/core/src/main/java/org/apache/druid/data/input/ColumnsFilter.java
@@ -62,7 +62,7 @@ public static ColumnsFilter exclusionBased(final Set exclusions)
/**
* Check if a column should be included or not.
*/
- public abstract boolean apply(final String column);
+ public abstract boolean apply(String column);
public static class InclusionBased extends ColumnsFilter
{
diff --git a/docs/ingestion/native-batch.md b/docs/ingestion/native-batch.md
index 2dfe96428174..5badf841ab37 100644
--- a/docs/ingestion/native-batch.md
+++ b/docs/ingestion/native-batch.md
@@ -1269,9 +1269,9 @@ of milliseconds since the epoch (January 1, 1970 00:00:00 UTC). It is common to
want the output timestamp to be equivalent to the input timestamp. In this case, set the timestamp column to `__time`
and the format to `auto` or `millis`.
-It is OK for the input and output datasources to be the same. In this case, the reindexed data will overwrite the
-previous data. Generally, if you are going to do this, it is a good idea to test out your reindexing by writing to
-a separate datasource before overwriting your main one.
+It is OK for the input and output datasources to be the same. In this case, newly generated data will overwrite the
+previous data for the intervals specified in the `granularitySpec`. Generally, if you are going to do this, it is a
+good idea to test out your reindexing by writing to a separate datasource before overwriting your main one.
An example task spec is shown below. It reads from a hypothetical raw datasource `wikipedia_raw` and creates a new
rolled-up datasource `wikipedia_rollup` by grouping on hour, "countryName", and "page".
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/ReingestionTimelineUtils.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/ReingestionTimelineUtils.java
index b1a2fb5ddd94..8714fa6933ac 100644
--- a/indexing-service/src/main/java/org/apache/druid/indexing/common/ReingestionTimelineUtils.java
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/ReingestionTimelineUtils.java
@@ -37,6 +37,7 @@
/**
* @deprecated only used by {@link org.apache.druid.indexing.firehose.IngestSegmentFirehoseFactory}
*/
+@Deprecated
public class ReingestionTimelineUtils
{
/**
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/firehose/IngestSegmentFirehoseFactory.java b/indexing-service/src/main/java/org/apache/druid/indexing/firehose/IngestSegmentFirehoseFactory.java
index 1defe67cf14c..7039cb32645a 100644
--- a/indexing-service/src/main/java/org/apache/druid/indexing/firehose/IngestSegmentFirehoseFactory.java
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/firehose/IngestSegmentFirehoseFactory.java
@@ -64,6 +64,7 @@
/**
* @deprecated use {@link DruidInputSource} instead
*/
+@Deprecated
public class IngestSegmentFirehoseFactory implements FiniteFirehoseFactory>
{
private static final EmittingLogger log = new EmittingLogger(IngestSegmentFirehoseFactory.class);
diff --git a/integration-tests/src/test/resources/indexer/wikipedia_parallel_druid_input_source_index_task.json b/integration-tests/src/test/resources/indexer/wikipedia_parallel_druid_input_source_index_task.json
index 91702a413574..5a6402e15967 100644
--- a/integration-tests/src/test/resources/indexer/wikipedia_parallel_druid_input_source_index_task.json
+++ b/integration-tests/src/test/resources/indexer/wikipedia_parallel_druid_input_source_index_task.json
@@ -10,7 +10,8 @@
]
},
"timestampSpec": {
- "column": "timestamp"
+ "column": "__time",
+ "format": "millis"
},
"metricsSpec": [
{
diff --git a/integration-tests/src/test/resources/indexer/wikipedia_reindex_druid_input_source_task.json b/integration-tests/src/test/resources/indexer/wikipedia_reindex_druid_input_source_task.json
index 3a5934cf4d37..cf2415c2b45c 100644
--- a/integration-tests/src/test/resources/indexer/wikipedia_reindex_druid_input_source_task.json
+++ b/integration-tests/src/test/resources/indexer/wikipedia_reindex_druid_input_source_task.json
@@ -24,7 +24,7 @@
},
"timestampSpec": {
"column": "__time",
- "format": "iso"
+ "format": "millis"
},
"dimensionsSpec": {
"dimensionExclusions" : ["robot", "continent"]
diff --git a/integration-tests/src/test/resources/indexer/wikipedia_reindex_druid_input_source_task_with_transforms.json b/integration-tests/src/test/resources/indexer/wikipedia_reindex_druid_input_source_task_with_transforms.json
index 3e8a44c5c592..2c2b0372a56c 100644
--- a/integration-tests/src/test/resources/indexer/wikipedia_reindex_druid_input_source_task_with_transforms.json
+++ b/integration-tests/src/test/resources/indexer/wikipedia_reindex_druid_input_source_task_with_transforms.json
@@ -24,7 +24,7 @@
},
"timestampSpec": {
"column": "__time",
- "format": "iso"
+ "format": "millis"
},
"dimensionsSpec": {
"dimensions": [
diff --git a/server/src/test/java/org/apache/druid/segment/indexing/TransformSpecTest.java b/server/src/test/java/org/apache/druid/segment/indexing/TransformSpecTest.java
index 8102a719e8e6..8532ff03909f 100644
--- a/server/src/test/java/org/apache/druid/segment/indexing/TransformSpecTest.java
+++ b/server/src/test/java/org/apache/druid/segment/indexing/TransformSpecTest.java
@@ -22,6 +22,8 @@
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.ImmutableSet;
+import org.apache.druid.common.config.NullHandlingTest;
import org.apache.druid.data.input.InputRow;
import org.apache.druid.data.input.impl.DimensionsSpec;
import org.apache.druid.data.input.impl.InputRowParser;
@@ -40,7 +42,7 @@
import java.util.Map;
-public class TransformSpecTest
+public class TransformSpecTest extends NullHandlingTest
{
private static final MapInputRowParser PARSER = new MapInputRowParser(
new TimeAndDimsParseSpec(
@@ -79,6 +81,11 @@ public void testTransforms()
)
);
+ Assert.assertEquals(
+ ImmutableSet.of("x", "y", "a", "b", "f", "g"),
+ transformSpec.getRequiredColumns()
+ );
+
final InputRowParser> parser = transformSpec.decorate(PARSER);
final InputRow row = parser.parseBatch(ROW1).get(0);
@@ -107,6 +114,11 @@ public void testTransformOverwriteField()
)
);
+ Assert.assertEquals(
+ ImmutableSet.of("x", "y"),
+ transformSpec.getRequiredColumns()
+ );
+
final InputRowParser> parser = transformSpec.decorate(PARSER);
final InputRow row = parser.parseBatch(ROW1).get(0);
@@ -138,6 +150,12 @@ public void testFilterOnTransforms()
)
);
+ Assert.assertEquals(
+ ImmutableSet.of("x", "f", "g", "y", "a", "b"),
+ transformSpec.getRequiredColumns()
+ );
+
+
final InputRowParser> parser = transformSpec.decorate(PARSER);
Assert.assertNotNull(parser.parseBatch(ROW1).get(0));
Assert.assertNull(parser.parseBatch(ROW2).get(0));
@@ -153,6 +171,12 @@ public void testTransformTimeFromOtherFields()
)
);
+ Assert.assertEquals(
+ ImmutableSet.of("a", "b"),
+ transformSpec.getRequiredColumns()
+ );
+
+
final InputRowParser> parser = transformSpec.decorate(PARSER);
final InputRow row = parser.parseBatch(ROW1).get(0);
@@ -171,6 +195,11 @@ public void testTransformTimeFromTime()
)
);
+ Assert.assertEquals(
+ ImmutableSet.of("__time"),
+ transformSpec.getRequiredColumns()
+ );
+
final InputRowParser> parser = transformSpec.decorate(PARSER);
final InputRow row = parser.parseBatch(ROW1).get(0);
diff --git a/website/.spelling b/website/.spelling
index 50e27f00c38b..02bd48f3e50b 100644
--- a/website/.spelling
+++ b/website/.spelling
@@ -995,6 +995,7 @@ baseDir
chatHandlerNumRetries
chatHandlerTimeout
connectorConfig
+countryName
dataSchema's
foldCase
forceGuaranteedRollup
From 94046615f31796f14a88171e42352189449c8748 Mon Sep 17 00:00:00 2001
From: Gian Merlino
Date: Wed, 12 Aug 2020 02:29:06 -0700
Subject: [PATCH 03/24] Uncomment incorrectly commented lines.
---
.../druid/indexing/common/task/InputSourceProcessor.java | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/InputSourceProcessor.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/InputSourceProcessor.java
index 76ac510d6041..3a62a9039c75 100644
--- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/InputSourceProcessor.java
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/InputSourceProcessor.java
@@ -177,9 +177,9 @@ private void handleParseException(ParseException e)
buildSegmentsMeters.incrementUnparseable();
}
-// if (logParseExceptions) {
+ if (logParseExceptions) {
LOG.error(e, "Encountered parse exception");
-// }
+ }
if (buildSegmentsSavedParseExceptions != null) {
buildSegmentsSavedParseExceptions.add(e);
From 6a4a97eb8b803545fcec896d592bfb21d537db86 Mon Sep 17 00:00:00 2001
From: Gian Merlino
Date: Wed, 12 Aug 2020 02:30:55 -0700
Subject: [PATCH 04/24] Move TransformSpecTest to the proper module.
---
.../apache/druid/segment/transform}/TransformSpecTest.java | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
rename {server/src/test/java/org/apache/druid/segment/indexing => processing/src/test/java/org/apache/druid/segment/transform}/TransformSpecTest.java (98%)
diff --git a/server/src/test/java/org/apache/druid/segment/indexing/TransformSpecTest.java b/processing/src/test/java/org/apache/druid/segment/transform/TransformSpecTest.java
similarity index 98%
rename from server/src/test/java/org/apache/druid/segment/indexing/TransformSpecTest.java
rename to processing/src/test/java/org/apache/druid/segment/transform/TransformSpecTest.java
index 8532ff03909f..aadf39f1900d 100644
--- a/server/src/test/java/org/apache/druid/segment/indexing/TransformSpecTest.java
+++ b/processing/src/test/java/org/apache/druid/segment/transform/TransformSpecTest.java
@@ -17,7 +17,7 @@
* under the License.
*/
-package org.apache.druid.segment.indexing;
+package org.apache.druid.segment.transform;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.collect.ImmutableList;
@@ -35,8 +35,6 @@
import org.apache.druid.query.filter.AndDimFilter;
import org.apache.druid.query.filter.SelectorDimFilter;
import org.apache.druid.segment.TestHelper;
-import org.apache.druid.segment.transform.ExpressionTransform;
-import org.apache.druid.segment.transform.TransformSpec;
import org.junit.Assert;
import org.junit.Test;
From df7342752157ad3581d70d0721ea1c265fd7c835 Mon Sep 17 00:00:00 2001
From: Gian Merlino
Date: Tue, 25 Aug 2020 00:34:07 -0700
Subject: [PATCH 05/24] Add
druid.indexer.task.ignoreTimestampSpecForDruidInputSource setting.
---
.../druid/data/input/ColumnsFilter.java | 31 ++++++++++++
.../data/input/impl/ColumnsFilterTest.java | 18 +++++++
docs/configuration/index.md | 2 +
docs/ingestion/native-batch.md | 5 ++
.../indexing/common/config/TaskConfig.java | 13 ++++-
.../indexing/common/task/CompactionTask.java | 3 +-
.../indexing/input/DruidInputSource.java | 47 +++++++++++++++++--
.../docker/environment-configs/middlemanager | 4 ++
...arallel_druid_input_source_index_task.json | 4 +-
9 files changed, 120 insertions(+), 7 deletions(-)
diff --git a/core/src/main/java/org/apache/druid/data/input/ColumnsFilter.java b/core/src/main/java/org/apache/druid/data/input/ColumnsFilter.java
index 8506ffb735f9..554f3ccf1b55 100644
--- a/core/src/main/java/org/apache/druid/data/input/ColumnsFilter.java
+++ b/core/src/main/java/org/apache/druid/data/input/ColumnsFilter.java
@@ -20,6 +20,7 @@
package org.apache.druid.data.input;
import java.util.Collections;
+import java.util.HashSet;
import java.util.Objects;
import java.util.Set;
@@ -64,6 +65,12 @@ public static ColumnsFilter exclusionBased(final Set exclusions)
*/
public abstract boolean apply(String column);
+ /**
+ * Returns a new filter with a particular column added. The returned filter will return true from {@link #apply}
+ * on this column.
+ */
+ public abstract ColumnsFilter plus(final String column);
+
public static class InclusionBased extends ColumnsFilter
{
private final Set inclusions;
@@ -79,6 +86,18 @@ public boolean apply(String column)
return inclusions.contains(column);
}
+ @Override
+ public ColumnsFilter plus(String column)
+ {
+ if (inclusions.contains(column)) {
+ return this;
+ } else {
+ final Set copy = new HashSet<>(inclusions);
+ copy.add(column);
+ return new InclusionBased(copy);
+ }
+ }
+
@Override
public boolean equals(Object o)
{
@@ -122,6 +141,18 @@ public boolean apply(String column)
return !exclusions.contains(column);
}
+ @Override
+ public ColumnsFilter plus(String column)
+ {
+ if (!exclusions.contains(column)) {
+ return this;
+ } else {
+ final Set copy = new HashSet<>(exclusions);
+ copy.remove(column);
+ return new ExclusionBased(copy);
+ }
+ }
+
@Override
public boolean equals(Object o)
{
diff --git a/core/src/test/java/org/apache/druid/data/input/impl/ColumnsFilterTest.java b/core/src/test/java/org/apache/druid/data/input/impl/ColumnsFilterTest.java
index d85e9278de66..00faf4ea5324 100644
--- a/core/src/test/java/org/apache/druid/data/input/impl/ColumnsFilterTest.java
+++ b/core/src/test/java/org/apache/druid/data/input/impl/ColumnsFilterTest.java
@@ -51,6 +51,15 @@ public void testInclusionBased()
);
}
+ @Test
+ public void testInclusionBasedPlus()
+ {
+ Assert.assertEquals(
+ ColumnsFilter.inclusionBased(ImmutableSet.of("a", "b", "c")),
+ ColumnsFilter.inclusionBased(ImmutableSet.of("b", "c")).plus("a").plus("c")
+ );
+ }
+
@Test
public void testExclusionBased()
{
@@ -60,6 +69,15 @@ public void testExclusionBased()
);
}
+ @Test
+ public void testExclusionBasedPlus()
+ {
+ Assert.assertEquals(
+ ColumnsFilter.exclusionBased(ImmutableSet.of("b")),
+ ColumnsFilter.exclusionBased(ImmutableSet.of("b", "c")).plus("a").plus("c")
+ );
+ }
+
@Test
public void testEquals()
{
diff --git a/docs/configuration/index.md b/docs/configuration/index.md
index b02c467141a1..ee782de731c8 100644
--- a/docs/configuration/index.md
+++ b/docs/configuration/index.md
@@ -1228,6 +1228,7 @@ Additional peon configs include:
|`druid.indexer.task.gracefulShutdownTimeout`|Wait this long on middleManager restart for restorable tasks to gracefully exit.|PT5M|
|`druid.indexer.task.hadoopWorkingPath`|Temporary working directory for Hadoop tasks.|`/tmp/druid-indexing`|
|`druid.indexer.task.restoreTasksOnRestart`|If true, MiddleManagers will attempt to stop tasks gracefully on shutdown and restore them on restart.|false|
+|`druid.indexer.task.ignoreTimestampSpecForDruidInputSource`|If true, tasks using the [Druid input source](../ingestion/native-batch.md#druid-input-source) will ignore the provided timestampSpec, and will use the `__time` column of the input datasource. This option is provided for compatibility with ingestion specs written before Druid 0.20.0.|false|
|`druid.indexer.server.maxChatRequests`|Maximum number of concurrent requests served by a task's chat handler. Set to 0 to disable limiting.|0|
If the peon is running in remote mode, there must be an Overlord up and running. Peons in remote mode can set the following configurations:
@@ -1292,6 +1293,7 @@ then the value from the configuration below is used:
|`druid.indexer.task.gracefulShutdownTimeout`|Wait this long on Indexer restart for restorable tasks to gracefully exit.|PT5M|
|`druid.indexer.task.hadoopWorkingPath`|Temporary working directory for Hadoop tasks.|`/tmp/druid-indexing`|
|`druid.indexer.task.restoreTasksOnRestart`|If true, the Indexer will attempt to stop tasks gracefully on shutdown and restore them on restart.|false|
+|`druid.indexer.task.ignoreTimestampSpecForDruidInputSource`|If true, tasks using the [Druid input source](../ingestion/native-batch.md#druid-input-source) will ignore the provided timestampSpec, and will use the `__time` column of the input datasource. This option is provided for compatibility with ingestion specs written before Druid 0.20.0.|false|
|`druid.peon.taskActionClient.retry.minWait`|The minimum retry time to communicate with Overlord.|PT5S|
|`druid.peon.taskActionClient.retry.maxWait`|The maximum retry time to communicate with Overlord.|PT1M|
|`druid.peon.taskActionClient.retry.maxRetryCount`|The maximum number of retries to communicate with Overlord.|60|
diff --git a/docs/ingestion/native-batch.md b/docs/ingestion/native-batch.md
index de5e03f4a63c..c833c15e0eb1 100644
--- a/docs/ingestion/native-batch.md
+++ b/docs/ingestion/native-batch.md
@@ -1329,6 +1329,11 @@ rolled-up datasource `wikipedia_rollup` by grouping on hour, "countryName", and
}
```
+> Note: Older versions (0.19 and earlier) did not respect the timestampSpec when using the Druid input source. If you
+> have ingestion specs that rely on this and cannot rewrite them, set
+> [`druid.indexer.task.ignoreTimestampSpecForDruidInputSource`](../configuration/index.md#indexer-general-configuration)
+> to `true` to enable a compatibility mode where the timestampSpec is ignored.
+
### SQL Input Source
The SQL input source is used to read data directly from RDBMS.
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/config/TaskConfig.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/config/TaskConfig.java
index 7c22dad5b62b..bf887e500e6e 100644
--- a/indexing-service/src/main/java/org/apache/druid/indexing/common/config/TaskConfig.java
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/config/TaskConfig.java
@@ -67,6 +67,9 @@ public class TaskConfig
@JsonProperty
private final List shuffleDataLocations;
+ @JsonProperty
+ private final boolean ignoreTimestampSpecForDruidInputSource;
+
@JsonCreator
public TaskConfig(
@JsonProperty("baseDir") String baseDir,
@@ -77,7 +80,8 @@ public TaskConfig(
@JsonProperty("restoreTasksOnRestart") boolean restoreTasksOnRestart,
@JsonProperty("gracefulShutdownTimeout") Period gracefulShutdownTimeout,
@JsonProperty("directoryLockTimeout") Period directoryLockTimeout,
- @JsonProperty("shuffleDataLocations") List shuffleDataLocations
+ @JsonProperty("shuffleDataLocations") List shuffleDataLocations,
+ @JsonProperty("ignoreTimestampSpecForDruidInputSource") boolean ignoreTimestampSpecForDruidInputSource
)
{
this.baseDir = baseDir == null ? System.getProperty("java.io.tmpdir") : baseDir;
@@ -102,6 +106,7 @@ public TaskConfig(
} else {
this.shuffleDataLocations = shuffleDataLocations;
}
+ this.ignoreTimestampSpecForDruidInputSource = ignoreTimestampSpecForDruidInputSource;
}
@JsonProperty
@@ -178,6 +183,12 @@ public List getShuffleDataLocations()
return shuffleDataLocations;
}
+ @JsonProperty
+ public boolean isIgnoreTimestampSpecForDruidInputSource()
+ {
+ return ignoreTimestampSpecForDruidInputSource;
+ }
+
private String defaultDir(@Nullable String configParameter, final String defaultVal)
{
if (configParameter == null) {
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/CompactionTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/CompactionTask.java
index d7883b94d627..73cb12b2e2f4 100644
--- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/CompactionTask.java
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/CompactionTask.java
@@ -625,7 +625,8 @@ private static ParallelIndexIOConfig createIoConfig(
toolbox.getIndexIO(),
coordinatorClient,
segmentLoaderFactory,
- retryPolicyFactory
+ retryPolicyFactory,
+ toolbox.getConfig()
),
null,
false
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/input/DruidInputSource.java b/indexing-service/src/main/java/org/apache/druid/indexing/input/DruidInputSource.java
index 6119b37d2e77..377d224fd5e1 100644
--- a/indexing-service/src/main/java/org/apache/druid/indexing/input/DruidInputSource.java
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/input/DruidInputSource.java
@@ -26,6 +26,7 @@
import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.base.Preconditions;
import com.google.common.collect.FluentIterable;
+import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Iterators;
import org.apache.druid.client.coordinator.CoordinatorClient;
import org.apache.druid.data.input.AbstractInputSource;
@@ -39,9 +40,11 @@
import org.apache.druid.data.input.SplitHintSpec;
import org.apache.druid.data.input.impl.InputEntityIteratingReader;
import org.apache.druid.data.input.impl.SplittableInputSource;
+import org.apache.druid.data.input.impl.TimestampSpec;
import org.apache.druid.indexing.common.RetryPolicy;
import org.apache.druid.indexing.common.RetryPolicyFactory;
import org.apache.druid.indexing.common.SegmentLoaderFactory;
+import org.apache.druid.indexing.common.config.TaskConfig;
import org.apache.druid.indexing.firehose.WindowedSegmentId;
import org.apache.druid.java.util.common.IAE;
import org.apache.druid.java.util.common.ISE;
@@ -49,6 +52,7 @@
import org.apache.druid.java.util.common.logger.Logger;
import org.apache.druid.query.filter.DimFilter;
import org.apache.druid.segment.IndexIO;
+import org.apache.druid.segment.column.ColumnHolder;
import org.apache.druid.segment.loading.SegmentLoader;
import org.apache.druid.timeline.DataSegment;
import org.apache.druid.timeline.TimelineObjectHolder;
@@ -68,6 +72,7 @@
import java.util.Iterator;
import java.util.List;
import java.util.Map;
+import java.util.Set;
import java.util.SortedMap;
import java.util.TreeMap;
import java.util.concurrent.ThreadLocalRandom;
@@ -82,6 +87,11 @@ public class DruidInputSource extends AbstractInputSource implements SplittableI
{
private static final Logger LOG = new Logger(DruidInputSource.class);
+ /**
+ * Timestamp formats that the standard __time column can be parsed with.
+ */
+ private static final Set STANDARD_TIME_COLUMN_FORMATS = ImmutableSet.of("millis", "__time");
+
private final String dataSource;
// Exactly one of interval and segmentIds should be non-null. Typically 'interval' is specified directly
// by the user creating this firehose and 'segmentIds' is used for sub-tasks if it is split for parallel
@@ -95,6 +105,7 @@ public class DruidInputSource extends AbstractInputSource implements SplittableI
private final CoordinatorClient coordinatorClient;
private final SegmentLoaderFactory segmentLoaderFactory;
private final RetryPolicyFactory retryPolicyFactory;
+ private final TaskConfig taskConfig;
/**
* Included for serde backwards-compatibility only. Not used.
@@ -119,7 +130,8 @@ public DruidInputSource(
@JacksonInject IndexIO indexIO,
@JacksonInject CoordinatorClient coordinatorClient,
@JacksonInject SegmentLoaderFactory segmentLoaderFactory,
- @JacksonInject RetryPolicyFactory retryPolicyFactory
+ @JacksonInject RetryPolicyFactory retryPolicyFactory,
+ @JacksonInject TaskConfig taskConfig
)
{
Preconditions.checkNotNull(dataSource, "dataSource");
@@ -136,6 +148,7 @@ public DruidInputSource(
this.coordinatorClient = Preconditions.checkNotNull(coordinatorClient, "null CoordinatorClient");
this.segmentLoaderFactory = Preconditions.checkNotNull(segmentLoaderFactory, "null SegmentLoaderFactory");
this.retryPolicyFactory = Preconditions.checkNotNull(retryPolicyFactory, "null RetryPolicyFactory");
+ this.taskConfig = Preconditions.checkNotNull(taskConfig, "null taskConfig");
}
@JsonProperty
@@ -206,8 +219,35 @@ protected InputSourceReader fixedFormatReader(InputRowSchema inputRowSchema, @Nu
final DruidSegmentInputFormat inputFormat = new DruidSegmentInputFormat(indexIO, dimFilter);
+ final InputRowSchema inputRowSchemaToUse;
+
+ if (taskConfig.isIgnoreTimestampSpecForDruidInputSource()) {
+ // Legacy compatibility mode; see https://github.com/apache/druid/pull/10267.
+ LOG.warn("Ignoring the provided timestampSpec and reading the __time column instead. To use timestampSpecs with "
+ + "the 'druid' input source, set druid.indexer.task.ignoreTimestampSpecForDruidInputSource to false.");
+
+ inputRowSchemaToUse = new InputRowSchema(
+ new TimestampSpec(ColumnHolder.TIME_COLUMN_NAME, "millis", null),
+ inputRowSchema.getDimensionsSpec(),
+ inputRowSchema.getColumnsFilter().plus(ColumnHolder.TIME_COLUMN_NAME)
+ );
+ } else {
+ inputRowSchemaToUse = inputRowSchema;
+ }
+
+ if (ColumnHolder.TIME_COLUMN_NAME.equals(inputRowSchemaToUse.getTimestampSpec().getTimestampColumn())
+ && !STANDARD_TIME_COLUMN_FORMATS.contains(inputRowSchemaToUse.getTimestampSpec().getTimestampFormat())) {
+ // Slight chance the user did this intentionally, but not likely. Log a warning.
+ LOG.warn(
+ "The provided timestampSpec refers to the %s column without using format %s. If you wanted to read the "
+ + "column as-is, switch formats.",
+ inputRowSchemaToUse.getTimestampSpec().getTimestampColumn(),
+ STANDARD_TIME_COLUMN_FORMATS
+ );
+ }
+
return new InputEntityIteratingReader(
- inputRowSchema,
+ inputRowSchemaToUse,
inputFormat,
entityIterator,
temporaryDirectory
@@ -279,7 +319,8 @@ public SplittableInputSource> withSplit(InputSplit
Date: Tue, 25 Aug 2020 00:39:44 -0700
Subject: [PATCH 06/24] Fix.
---
.../org/apache/druid/indexing/input/DruidInputSource.java | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/input/DruidInputSource.java b/indexing-service/src/main/java/org/apache/druid/indexing/input/DruidInputSource.java
index 377d224fd5e1..ff78d79683f6 100644
--- a/indexing-service/src/main/java/org/apache/druid/indexing/input/DruidInputSource.java
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/input/DruidInputSource.java
@@ -26,7 +26,7 @@
import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.base.Preconditions;
import com.google.common.collect.FluentIterable;
-import com.google.common.collect.ImmutableSet;
+import com.google.common.collect.ImmutableSortedSet;
import com.google.common.collect.Iterators;
import org.apache.druid.client.coordinator.CoordinatorClient;
import org.apache.druid.data.input.AbstractInputSource;
@@ -90,7 +90,7 @@ public class DruidInputSource extends AbstractInputSource implements SplittableI
/**
* Timestamp formats that the standard __time column can be parsed with.
*/
- private static final Set STANDARD_TIME_COLUMN_FORMATS = ImmutableSet.of("millis", "__time");
+ private static final Set STANDARD_TIME_COLUMN_FORMATS = ImmutableSortedSet.of("auto", "millis");
private final String dataSource;
// Exactly one of interval and segmentIds should be non-null. Typically 'interval' is specified directly
@@ -227,7 +227,7 @@ protected InputSourceReader fixedFormatReader(InputRowSchema inputRowSchema, @Nu
+ "the 'druid' input source, set druid.indexer.task.ignoreTimestampSpecForDruidInputSource to false.");
inputRowSchemaToUse = new InputRowSchema(
- new TimestampSpec(ColumnHolder.TIME_COLUMN_NAME, "millis", null),
+ new TimestampSpec(ColumnHolder.TIME_COLUMN_NAME, STANDARD_TIME_COLUMN_FORMATS.iterator().next(), null),
inputRowSchema.getDimensionsSpec(),
inputRowSchema.getColumnsFilter().plus(ColumnHolder.TIME_COLUMN_NAME)
);
From cf68ace26924bb910038b57e4dd4d1d0b064e7de Mon Sep 17 00:00:00 2001
From: Gian Merlino
Date: Tue, 25 Aug 2020 01:24:24 -0700
Subject: [PATCH 07/24] Fix build.
---
.../druid/indexing/kafka/KafkaIndexTaskTest.java | 3 ++-
.../indexing/kinesis/KinesisIndexTaskTest.java | 3 ++-
.../druid/indexing/common/TaskToolboxTest.java | 2 +-
.../AppenderatorDriverRealtimeIndexTaskTest.java | 13 ++++++++++++-
.../druid/indexing/common/task/HadoopTaskTest.java | 3 ++-
.../indexing/common/task/RealtimeIndexTaskTest.java | 13 ++++++++++++-
.../AbstractParallelIndexSupervisorTaskTest.java | 3 ++-
.../overlord/SingleTaskBackgroundRunnerTest.java | 3 ++-
.../druid/indexing/overlord/TaskLifecycleTest.java | 2 +-
.../IntermediaryDataManagerAutoCleanupTest.java | 3 ++-
...termediaryDataManagerManualAddAndDeleteTest.java | 3 ++-
.../worker/ShuffleDataSegmentPusherTest.java | 3 ++-
.../indexing/worker/WorkerTaskManagerTest.java | 3 ++-
.../indexing/worker/WorkerTaskMonitorTest.java | 3 ++-
14 files changed, 46 insertions(+), 14 deletions(-)
diff --git a/extensions-core/kafka-indexing-service/src/test/java/org/apache/druid/indexing/kafka/KafkaIndexTaskTest.java b/extensions-core/kafka-indexing-service/src/test/java/org/apache/druid/indexing/kafka/KafkaIndexTaskTest.java
index c257df296790..e158801b4a99 100644
--- a/extensions-core/kafka-indexing-service/src/test/java/org/apache/druid/indexing/kafka/KafkaIndexTaskTest.java
+++ b/extensions-core/kafka-indexing-service/src/test/java/org/apache/druid/indexing/kafka/KafkaIndexTaskTest.java
@@ -2600,7 +2600,8 @@ private void makeToolboxFactory() throws IOException
true,
null,
null,
- null
+ null,
+ false
);
final TestDerbyConnector derbyConnector = derby.getConnector();
derbyConnector.createDataSourceTable();
diff --git a/extensions-core/kinesis-indexing-service/src/test/java/org/apache/druid/indexing/kinesis/KinesisIndexTaskTest.java b/extensions-core/kinesis-indexing-service/src/test/java/org/apache/druid/indexing/kinesis/KinesisIndexTaskTest.java
index c893c9c738ee..33bfb769427f 100644
--- a/extensions-core/kinesis-indexing-service/src/test/java/org/apache/druid/indexing/kinesis/KinesisIndexTaskTest.java
+++ b/extensions-core/kinesis-indexing-service/src/test/java/org/apache/druid/indexing/kinesis/KinesisIndexTaskTest.java
@@ -2859,7 +2859,8 @@ private void makeToolboxFactory() throws IOException
true,
null,
null,
- null
+ null,
+ false
);
final TestDerbyConnector derbyConnector = derby.getConnector();
derbyConnector.createDataSourceTable();
diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/TaskToolboxTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/TaskToolboxTest.java
index e99e4287441f..d84eb46d6777 100644
--- a/indexing-service/src/test/java/org/apache/druid/indexing/common/TaskToolboxTest.java
+++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/TaskToolboxTest.java
@@ -97,7 +97,7 @@ public void setUp() throws IOException
EasyMock.replay(task, mockHandoffNotifierFactory);
taskToolbox = new TaskToolboxFactory(
- new TaskConfig(temporaryFolder.newFile().toString(), null, null, 50000, null, false, null, null, null),
+ new TaskConfig(temporaryFolder.newFile().toString(), null, null, 50000, null, false, null, null, null, false),
new DruidNode("druid/middlemanager", "localhost", false, 8091, null, true, false),
mockTaskActionClientFactory,
mockEmitter,
diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/AppenderatorDriverRealtimeIndexTaskTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/AppenderatorDriverRealtimeIndexTaskTest.java
index 576235ee4f38..93884de6cdbc 100644
--- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/AppenderatorDriverRealtimeIndexTaskTest.java
+++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/AppenderatorDriverRealtimeIndexTaskTest.java
@@ -1514,7 +1514,18 @@ public SegmentPublishResult announceHistoricalSegments(
};
taskLockbox = new TaskLockbox(taskStorage, mdc);
- final TaskConfig taskConfig = new TaskConfig(directory.getPath(), null, null, 50000, null, true, null, null, null);
+ final TaskConfig taskConfig = new TaskConfig(
+ directory.getPath(),
+ null,
+ null,
+ 50000,
+ null,
+ true,
+ null,
+ null,
+ null,
+ false
+ );
final TaskActionToolbox taskActionToolbox = new TaskActionToolbox(
taskLockbox,
diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/HadoopTaskTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/HadoopTaskTest.java
index 990888e5a96c..caaeea253c73 100644
--- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/HadoopTaskTest.java
+++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/HadoopTaskTest.java
@@ -116,7 +116,8 @@ public TaskStatus runTask(TaskToolbox toolbox)
false,
null,
null,
- null
+ null,
+ false
)).once();
EasyMock.replay(toolbox);
diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/RealtimeIndexTaskTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/RealtimeIndexTaskTest.java
index 432457674635..6b18ce0a145a 100644
--- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/RealtimeIndexTaskTest.java
+++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/RealtimeIndexTaskTest.java
@@ -885,7 +885,18 @@ private TaskToolbox makeToolbox(
final File directory
)
{
- final TaskConfig taskConfig = new TaskConfig(directory.getPath(), null, null, 50000, null, true, null, null, null);
+ final TaskConfig taskConfig = new TaskConfig(
+ directory.getPath(),
+ null,
+ null,
+ 50000,
+ null,
+ true,
+ null,
+ null,
+ null,
+ false
+ );
final TaskLockbox taskLockbox = new TaskLockbox(taskStorage, mdc);
try {
taskStorage.insert(task, TaskStatus.running(task.getId()));
diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/AbstractParallelIndexSupervisorTaskTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/AbstractParallelIndexSupervisorTaskTest.java
index 3ba9441b2f3f..d7f111f430ea 100644
--- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/AbstractParallelIndexSupervisorTaskTest.java
+++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/AbstractParallelIndexSupervisorTaskTest.java
@@ -203,7 +203,8 @@ public void setUpAbstractParallelIndexSupervisorTaskTest() throws IOException
false,
null,
null,
- ImmutableList.of(new StorageLocationConfig(temporaryFolder.newFolder(), null, null))
+ ImmutableList.of(new StorageLocationConfig(temporaryFolder.newFolder(), null, null)),
+ false
),
null
);
diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/SingleTaskBackgroundRunnerTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/SingleTaskBackgroundRunnerTest.java
index 4cfa87d8a360..826c09e606e7 100644
--- a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/SingleTaskBackgroundRunnerTest.java
+++ b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/SingleTaskBackgroundRunnerTest.java
@@ -78,7 +78,8 @@ public void setup() throws IOException
true,
null,
null,
- null
+ null,
+ false
);
final ServiceEmitter emitter = new NoopServiceEmitter();
final TaskToolboxFactory toolboxFactory = new TaskToolboxFactory(
diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskLifecycleTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskLifecycleTest.java
index 2bdc0b7e6107..020f0063b84c 100644
--- a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskLifecycleTest.java
+++ b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskLifecycleTest.java
@@ -594,7 +594,7 @@ private TaskToolboxFactory setUpTaskToolboxFactory(
new TaskAuditLogConfig(true)
);
File tmpDir = temporaryFolder.newFolder();
- taskConfig = new TaskConfig(tmpDir.toString(), null, null, 50000, null, false, null, null, null);
+ taskConfig = new TaskConfig(tmpDir.toString(), null, null, 50000, null, false, null, null, null, false);
SegmentLoaderConfig segmentLoaderConfig = new SegmentLoaderConfig()
{
diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/worker/IntermediaryDataManagerAutoCleanupTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/worker/IntermediaryDataManagerAutoCleanupTest.java
index 7d0233b6b16d..3c59e943d464 100644
--- a/indexing-service/src/test/java/org/apache/druid/indexing/worker/IntermediaryDataManagerAutoCleanupTest.java
+++ b/indexing-service/src/test/java/org/apache/druid/indexing/worker/IntermediaryDataManagerAutoCleanupTest.java
@@ -87,7 +87,8 @@ public Period getIntermediaryPartitionTimeout()
false,
null,
null,
- ImmutableList.of(new StorageLocationConfig(tempDir.newFolder(), null, null))
+ ImmutableList.of(new StorageLocationConfig(tempDir.newFolder(), null, null)),
+ false
);
final IndexingServiceClient indexingServiceClient = new NoopIndexingServiceClient()
{
diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/worker/IntermediaryDataManagerManualAddAndDeleteTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/worker/IntermediaryDataManagerManualAddAndDeleteTest.java
index 15aad92b6a3c..fe6d615714fd 100644
--- a/indexing-service/src/test/java/org/apache/druid/indexing/worker/IntermediaryDataManagerManualAddAndDeleteTest.java
+++ b/indexing-service/src/test/java/org/apache/druid/indexing/worker/IntermediaryDataManagerManualAddAndDeleteTest.java
@@ -70,7 +70,8 @@ public void setup() throws IOException
false,
null,
null,
- ImmutableList.of(new StorageLocationConfig(intermediarySegmentsLocation, 600L, null))
+ ImmutableList.of(new StorageLocationConfig(intermediarySegmentsLocation, 600L, null)),
+ false
);
final IndexingServiceClient indexingServiceClient = new NoopIndexingServiceClient();
intermediaryDataManager = new IntermediaryDataManager(workerConfig, taskConfig, indexingServiceClient);
diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/worker/ShuffleDataSegmentPusherTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/worker/ShuffleDataSegmentPusherTest.java
index 153192633967..509fa39ab06b 100644
--- a/indexing-service/src/test/java/org/apache/druid/indexing/worker/ShuffleDataSegmentPusherTest.java
+++ b/indexing-service/src/test/java/org/apache/druid/indexing/worker/ShuffleDataSegmentPusherTest.java
@@ -69,7 +69,8 @@ public void setup() throws IOException
false,
null,
null,
- ImmutableList.of(new StorageLocationConfig(temporaryFolder.newFolder(), null, null))
+ ImmutableList.of(new StorageLocationConfig(temporaryFolder.newFolder(), null, null)),
+ false
);
final IndexingServiceClient indexingServiceClient = new NoopIndexingServiceClient();
intermediaryDataManager = new IntermediaryDataManager(workerConfig, taskConfig, indexingServiceClient);
diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/worker/WorkerTaskManagerTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/worker/WorkerTaskManagerTest.java
index 3ae7d96e5da3..2da185bef93c 100644
--- a/indexing-service/src/test/java/org/apache/druid/indexing/worker/WorkerTaskManagerTest.java
+++ b/indexing-service/src/test/java/org/apache/druid/indexing/worker/WorkerTaskManagerTest.java
@@ -87,7 +87,8 @@ private WorkerTaskManager createWorkerTaskManager()
false,
null,
null,
- null
+ null,
+ false
);
TaskActionClientFactory taskActionClientFactory = EasyMock.createNiceMock(TaskActionClientFactory.class);
TaskActionClient taskActionClient = EasyMock.createNiceMock(TaskActionClient.class);
diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/worker/WorkerTaskMonitorTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/worker/WorkerTaskMonitorTest.java
index 2fdca5f2c335..099755147ef7 100644
--- a/indexing-service/src/test/java/org/apache/druid/indexing/worker/WorkerTaskMonitorTest.java
+++ b/indexing-service/src/test/java/org/apache/druid/indexing/worker/WorkerTaskMonitorTest.java
@@ -157,7 +157,8 @@ private WorkerTaskMonitor createTaskMonitor()
false,
null,
null,
- null
+ null,
+ false
);
TaskActionClientFactory taskActionClientFactory = EasyMock.createNiceMock(TaskActionClientFactory.class);
TaskActionClient taskActionClient = EasyMock.createNiceMock(TaskActionClient.class);
From 6369cc0aa5e756c661ce9767262aa0fa255d9148 Mon Sep 17 00:00:00 2001
From: Gian Merlino
Date: Tue, 25 Aug 2020 11:18:44 -0700
Subject: [PATCH 08/24] Checkstyle.
---
.../main/java/org/apache/druid/data/input/ColumnsFilter.java | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/core/src/main/java/org/apache/druid/data/input/ColumnsFilter.java b/core/src/main/java/org/apache/druid/data/input/ColumnsFilter.java
index 554f3ccf1b55..b01001f8eec1 100644
--- a/core/src/main/java/org/apache/druid/data/input/ColumnsFilter.java
+++ b/core/src/main/java/org/apache/druid/data/input/ColumnsFilter.java
@@ -69,7 +69,7 @@ public static ColumnsFilter exclusionBased(final Set exclusions)
* Returns a new filter with a particular column added. The returned filter will return true from {@link #apply}
* on this column.
*/
- public abstract ColumnsFilter plus(final String column);
+ public abstract ColumnsFilter plus(String column);
public static class InclusionBased extends ColumnsFilter
{
From 14efe00d0546ab06b026c21f7ce118d9b72f35e7 Mon Sep 17 00:00:00 2001
From: Gian Merlino
Date: Tue, 25 Aug 2020 14:54:28 -0700
Subject: [PATCH 09/24] Misc fixes.
---
.../druid/indexing/common/task/CompactionTaskRunTest.java | 3 ++-
.../apache/druid/indexing/common/task/CompactionTaskTest.java | 2 +-
.../apache/druid/indexing/common/task/IngestionTestBase.java | 3 ++-
.../parallel/AbstractParallelIndexSupervisorTaskTest.java | 2 +-
website/.spelling | 1 +
5 files changed, 7 insertions(+), 4 deletions(-)
diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskRunTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskRunTest.java
index e08361450fb9..6301c15de212 100644
--- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskRunTest.java
+++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskRunTest.java
@@ -41,6 +41,7 @@
import org.apache.druid.indexing.common.SegmentLoaderFactory;
import org.apache.druid.indexing.common.TaskToolbox;
import org.apache.druid.indexing.common.TestUtils;
+import org.apache.druid.indexing.common.config.TaskConfig;
import org.apache.druid.indexing.common.stats.RowIngestionMetersFactory;
import org.apache.druid.indexing.common.task.CompactionTask.Builder;
import org.apache.druid.indexing.firehose.IngestSegmentFirehoseFactory;
@@ -873,7 +874,7 @@ public List getLocations()
);
return new TaskToolbox(
- null,
+ new TaskConfig(null, null, null, null, null, false, null, null, null, false),
null,
createActionClient(task),
null,
diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskTest.java
index 88850031ca99..984c464f80f4 100644
--- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskTest.java
+++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskTest.java
@@ -1284,7 +1284,7 @@ private static class TestTaskToolbox extends TaskToolbox
)
{
super(
- null,
+ new TaskConfig(null, null, null, null, null, false, null, null, null, false),
null,
taskActionClient,
null,
diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IngestionTestBase.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IngestionTestBase.java
index 881c44d5f1c4..86c304476fcf 100644
--- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IngestionTestBase.java
+++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IngestionTestBase.java
@@ -32,6 +32,7 @@
import org.apache.druid.indexing.common.actions.SegmentTransactionalInsertAction;
import org.apache.druid.indexing.common.actions.TaskAction;
import org.apache.druid.indexing.common.actions.TaskActionToolbox;
+import org.apache.druid.indexing.common.config.TaskConfig;
import org.apache.druid.indexing.common.config.TaskStorageConfig;
import org.apache.druid.indexing.common.stats.RowIngestionMetersFactory;
import org.apache.druid.indexing.overlord.HeapMemoryTaskStorage;
@@ -292,7 +293,7 @@ public ListenableFuture run(Task task)
);
final TaskToolbox box = new TaskToolbox(
- null,
+ new TaskConfig(null, null, null, null, null, false, null, null, null, false),
new DruidNode("druid/middlemanager", "localhost", false, 8091, null, true, false),
taskActionClient,
null,
diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/AbstractParallelIndexSupervisorTaskTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/AbstractParallelIndexSupervisorTaskTest.java
index d7f111f430ea..ac287660fe1f 100644
--- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/AbstractParallelIndexSupervisorTaskTest.java
+++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/AbstractParallelIndexSupervisorTaskTest.java
@@ -570,7 +570,7 @@ public static void prepareObjectMapper(
protected TaskToolbox createTaskToolbox(Task task, TaskActionClient actionClient) throws IOException
{
return new TaskToolbox(
- null,
+ new TaskConfig(null, null, null, null, null, false, null, null, null, false),
new DruidNode("druid/middlemanager", "localhost", false, 8091, null, true, false),
actionClient,
null,
diff --git a/website/.spelling b/website/.spelling
index dd0ad828d5db..888d26c9332f 100644
--- a/website/.spelling
+++ b/website/.spelling
@@ -1724,6 +1724,7 @@ successfulSending
taskBlackListCleanupPeriod
tasklogs
timeBoundary
+timestampSpec
tmp
tmpfs
truststore
From 7c6cf83327feef6deea1369f630b519a26f47d7f Mon Sep 17 00:00:00 2001
From: Gian Merlino
Date: Thu, 27 Aug 2020 15:32:56 -0700
Subject: [PATCH 10/24] Fix test.
---
.../parallel/AbstractParallelIndexSupervisorTaskTest.java | 3 +++
1 file changed, 3 insertions(+)
diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/AbstractParallelIndexSupervisorTaskTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/AbstractParallelIndexSupervisorTaskTest.java
index 2140ee954592..b232fee9c314 100644
--- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/AbstractParallelIndexSupervisorTaskTest.java
+++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/AbstractParallelIndexSupervisorTaskTest.java
@@ -513,6 +513,8 @@ public Set getPublishedSegments(Task task)
public void prepareObjectMapper(ObjectMapper objectMapper, IndexIO indexIO)
{
+ final TaskConfig taskConfig = new TaskConfig(null, null, null, null, null, false, null, null, null, false);
+
objectMapper.setInjectableValues(
new InjectableValues.Std()
.addValue(ExprMacroTable.class, LookupEnabledTestExprMacroTable.INSTANCE)
@@ -529,6 +531,7 @@ public void prepareObjectMapper(ObjectMapper objectMapper, IndexIO indexIO)
.addValue(CoordinatorClient.class, coordinatorClient)
.addValue(SegmentLoaderFactory.class, new SegmentLoaderFactory(indexIO, objectMapper))
.addValue(RetryPolicyFactory.class, new RetryPolicyFactory(new RetryPolicyConfig()))
+ .addValue(TaskConfig.class, taskConfig)
);
objectMapper.registerSubtypes(
new NamedType(ParallelIndexSupervisorTask.class, ParallelIndexSupervisorTask.TYPE),
From 530eb3280a04fa9db720e8d07e69126cd1583d22 Mon Sep 17 00:00:00 2001
From: Gian Merlino
Date: Sat, 29 Aug 2020 14:21:59 -0700
Subject: [PATCH 11/24] Move config.
---
integration-tests/docker/environment-configs/common | 6 +++++-
integration-tests/docker/environment-configs/middlemanager | 4 ----
2 files changed, 5 insertions(+), 5 deletions(-)
diff --git a/integration-tests/docker/environment-configs/common b/integration-tests/docker/environment-configs/common
index aba937b9f541..26b36ab45259 100644
--- a/integration-tests/docker/environment-configs/common
+++ b/integration-tests/docker/environment-configs/common
@@ -66,4 +66,8 @@ druid_zk_service_host=druid-zookeeper-kafka
druid_auth_basic_common_maxSyncRetries=20
druid_indexer_logs_directory=/shared/tasklogs
druid_sql_enable=true
-druid_extensions_hadoopDependenciesDir=/shared/hadoop-dependencies
\ No newline at end of file
+druid_extensions_hadoopDependenciesDir=/shared/hadoop-dependencies
+
+# Testing the legacy config from https://github.com/apache/druid/pull/10267
+# Can remove this when the flag is no longer needed
+druid_indexer_task_ignoreTimestampSpecForDruidInputSource=true
diff --git a/integration-tests/docker/environment-configs/middlemanager b/integration-tests/docker/environment-configs/middlemanager
index 1888d7a17751..c92cfd783caa 100644
--- a/integration-tests/docker/environment-configs/middlemanager
+++ b/integration-tests/docker/environment-configs/middlemanager
@@ -37,7 +37,3 @@ druid_auth_basic_common_cacheDirectory=/tmp/authCache/middleManager
druid_startup_logging_logProperties=true
druid_server_https_crlPath=/tls/revocations.crl
druid_worker_capacity=20
-
-# Testing the legacy config from https://github.com/apache/druid/pull/10267
-# Can remove this when the flag is no longer needed
-druid_indexer_task_ignoreTimestampSpecForDruidInputSource=true
From 94f293017b51c2ff2fa03bcdb1f794df199f3078 Mon Sep 17 00:00:00 2001
From: Gian Merlino
Date: Tue, 22 Sep 2020 21:27:22 -0700
Subject: [PATCH 12/24] Fix imports.
---
.../apache/druid/indexing/common/task/InputSourceProcessor.java | 1 -
1 file changed, 1 deletion(-)
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/InputSourceProcessor.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/InputSourceProcessor.java
index 1cc3388fefd9..63ebba8daf7c 100644
--- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/InputSourceProcessor.java
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/InputSourceProcessor.java
@@ -28,7 +28,6 @@
import org.apache.druid.indexer.partitions.DynamicPartitionsSpec;
import org.apache.druid.indexer.partitions.PartitionsSpec;
import org.apache.druid.indexing.common.task.batch.parallel.iterator.IndexTaskInputRowIteratorBuilder;
-import org.apache.druid.indexing.input.InputRowSchemas;
import org.apache.druid.java.util.common.ISE;
import org.apache.druid.java.util.common.logger.Logger;
import org.apache.druid.java.util.common.parsers.CloseableIterator;
From 34d47920168624071c5e8bf17a5bab3ce3ec4b45 Mon Sep 17 00:00:00 2001
From: Gian Merlino
Date: Tue, 22 Sep 2020 22:20:28 -0700
Subject: [PATCH 13/24] Fixup.
---
.../druid/indexing/input/DruidSegmentReader.java | 12 +++++++++++-
1 file changed, 11 insertions(+), 1 deletion(-)
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/input/DruidSegmentReader.java b/indexing-service/src/main/java/org/apache/druid/indexing/input/DruidSegmentReader.java
index 3eb57b30597d..6e91b3a1a8fa 100644
--- a/indexing-service/src/main/java/org/apache/druid/indexing/input/DruidSegmentReader.java
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/input/DruidSegmentReader.java
@@ -28,6 +28,7 @@
import org.apache.druid.data.input.InputEntity;
import org.apache.druid.data.input.InputEntity.CleanableFile;
import org.apache.druid.data.input.InputRow;
+import org.apache.druid.data.input.InputRowSchema;
import org.apache.druid.data.input.IntermediateRowParsingReader;
import org.apache.druid.data.input.impl.DimensionsSpec;
import org.apache.druid.data.input.impl.MapInputRowParser;
@@ -146,7 +147,16 @@ protected CloseableIterator> intermediateRowIterator() throw
@Override
protected List parseInputRows(Map intermediateRow) throws ParseException
{
- return Collections.singletonList(MapInputRowParser.parse(timestampSpec, dimensionsSpec, intermediateRow));
+ return Collections.singletonList(
+ MapInputRowParser.parse(
+ new InputRowSchema(
+ timestampSpec,
+ dimensionsSpec,
+ columnsFilter
+ ),
+ intermediateRow
+ )
+ );
}
@Override
From 96747f4cb0fede4f850a4517d97644788a2dd689 Mon Sep 17 00:00:00 2001
From: Gian Merlino
Date: Mon, 19 Oct 2020 16:31:09 -0700
Subject: [PATCH 14/24] Fix ShuffleResourceTest.
---
.../druid/indexing/worker/shuffle/ShuffleResourceTest.java | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/worker/shuffle/ShuffleResourceTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/worker/shuffle/ShuffleResourceTest.java
index bd1b2117042f..741956a53f7b 100644
--- a/indexing-service/src/test/java/org/apache/druid/indexing/worker/shuffle/ShuffleResourceTest.java
+++ b/indexing-service/src/test/java/org/apache/druid/indexing/worker/shuffle/ShuffleResourceTest.java
@@ -95,7 +95,8 @@ public Period getIntermediaryPartitionTimeout()
false,
null,
null,
- ImmutableList.of(new StorageLocationConfig(tempDir.newFolder(), null, null))
+ ImmutableList.of(new StorageLocationConfig(tempDir.newFolder(), null, null)),
+ false
);
final IndexingServiceClient indexingServiceClient = new NoopIndexingServiceClient()
{
From 2e753d12e5dd3028a1c0ca3c21ec975b1f34b05f Mon Sep 17 00:00:00 2001
From: Gian Merlino
Date: Mon, 19 Oct 2020 17:00:03 -0700
Subject: [PATCH 15/24] Add import.
---
.../java/org/apache/druid/data/input/avro/AvroOCFReaderTest.java | 1 +
1 file changed, 1 insertion(+)
diff --git a/extensions-core/avro-extensions/src/test/java/org/apache/druid/data/input/avro/AvroOCFReaderTest.java b/extensions-core/avro-extensions/src/test/java/org/apache/druid/data/input/avro/AvroOCFReaderTest.java
index ed039072dfcc..4841483c4e1a 100644
--- a/extensions-core/avro-extensions/src/test/java/org/apache/druid/data/input/avro/AvroOCFReaderTest.java
+++ b/extensions-core/avro-extensions/src/test/java/org/apache/druid/data/input/avro/AvroOCFReaderTest.java
@@ -45,6 +45,7 @@
import java.io.File;
import java.io.IOException;
+import java.util.List;
import java.util.Map;
public class AvroOCFReaderTest
From be8a38950524036e25013e52088405fd3eaeb58e Mon Sep 17 00:00:00 2001
From: Gian Merlino
Date: Fri, 4 Dec 2020 00:29:24 -0800
Subject: [PATCH 16/24] Smarter exclusions.
---
.../overlord/sampler/InputSourceSampler.java | 1 -
.../druid/segment/indexing/DataSchema.java | 67 +++++++++++--------
.../segment/indexing/DataSchemaTest.java | 6 +-
3 files changed, 42 insertions(+), 32 deletions(-)
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/sampler/InputSourceSampler.java b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/sampler/InputSourceSampler.java
index b306b3b1195c..94a3e52e82b2 100644
--- a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/sampler/InputSourceSampler.java
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/sampler/InputSourceSampler.java
@@ -50,7 +50,6 @@
import javax.annotation.Nullable;
import java.io.File;
import java.util.ArrayList;
-import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
diff --git a/server/src/main/java/org/apache/druid/segment/indexing/DataSchema.java b/server/src/main/java/org/apache/druid/segment/indexing/DataSchema.java
index 3fc0fa26b272..42b19157577f 100644
--- a/server/src/main/java/org/apache/druid/segment/indexing/DataSchema.java
+++ b/server/src/main/java/org/apache/druid/segment/indexing/DataSchema.java
@@ -27,8 +27,8 @@
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
-import com.google.common.collect.Sets;
import org.apache.druid.common.utils.IdUtils;
+import org.apache.druid.data.input.impl.DimensionSchema;
import org.apache.druid.data.input.impl.DimensionsSpec;
import org.apache.druid.data.input.impl.InputRowParser;
import org.apache.druid.data.input.impl.ParseSpec;
@@ -36,6 +36,7 @@
import org.apache.druid.java.util.common.IAE;
import org.apache.druid.java.util.common.logger.Logger;
import org.apache.druid.query.aggregation.AggregatorFactory;
+import org.apache.druid.segment.column.ColumnHolder;
import org.apache.druid.segment.indexing.granularity.GranularitySpec;
import org.apache.druid.segment.indexing.granularity.UniformGranularitySpec;
import org.apache.druid.segment.transform.TransformSpec;
@@ -45,8 +46,7 @@
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
-import java.util.regex.Pattern;
-import java.util.stream.Collectors;
+import java.util.stream.Stream;
/**
@@ -55,7 +55,6 @@
public class DataSchema
{
private static final Logger log = new Logger(DataSchema.class);
- private static final Pattern INVALIDCHARS = Pattern.compile("(?s).*[^\\S ].*");
private final String dataSource;
private final AggregatorFactory[] aggregators;
private final GranularitySpec granularitySpec;
@@ -150,35 +149,47 @@ private static void validateDatasourceName(String dataSource)
IdUtils.validateId("dataSource", dataSource);
}
+ /**
+ * Computes the {@link DimensionsSpec} that we will actually use. It is derived from, but not necessarily identical
+ * to, the one that we were given.
+ */
private static DimensionsSpec computeDimensionsSpec(
- TimestampSpec timestampSpec,
- DimensionsSpec dimensionsSpec,
- AggregatorFactory[] aggregators
+ final TimestampSpec timestampSpec,
+ final DimensionsSpec dimensionsSpec,
+ final AggregatorFactory[] aggregators
)
{
- final Set dimensionExclusions = new HashSet<>();
-
- final String timestampColumn = timestampSpec.getTimestampColumn();
- if (!(dimensionsSpec.hasCustomDimensions() && dimensionsSpec.getDimensionNames().contains(timestampColumn))) {
- dimensionExclusions.add(timestampColumn);
- }
-
- for (AggregatorFactory aggregator : aggregators) {
- dimensionExclusions.addAll(aggregator.requiredFields());
- dimensionExclusions.add(aggregator.getName());
- }
+ final Set inputFieldNames = new HashSet<>();
+ final Set outputFieldNames = new HashSet<>();
+
+ // Populate inputFieldNames.
+ inputFieldNames.add(timestampSpec.getTimestampColumn());
+ inputFieldNames.addAll(dimensionsSpec.getDimensionNames());
+ Arrays.stream(aggregators)
+ .flatMap(aggregator -> aggregator.requiredFields().stream())
+ .forEach(inputFieldNames::add);
+
+ // Populate outputFieldNames, validating along the way for lack of duplicates.
+ outputFieldNames.add(ColumnHolder.TIME_COLUMN_NAME);
+
+ Stream.concat(
+ dimensionsSpec.getDimensions().stream().map(DimensionSchema::getName),
+ Arrays.stream(aggregators).map(AggregatorFactory::getName)
+ ).forEach(
+ field -> {
+ if (!outputFieldNames.add(field)) {
+ throw new IAE("Cannot specify field [%s] more than once", field);
+ }
+ }
+ );
- final Set metSet = Arrays.stream(aggregators).map(AggregatorFactory::getName).collect(Collectors.toSet());
- final Set dimSet = new HashSet<>(dimensionsSpec.getDimensionNames());
- final Set overlap = Sets.intersection(metSet, dimSet);
- if (!overlap.isEmpty()) {
- throw new IAE(
- "Cannot have overlapping dimensions and metrics of the same name. Please change the name of the metric. Overlap: %s",
- overlap
- );
- }
+ // Set up additional exclusions: all inputs and outputs, minus defined dimensions.
+ final Set additionalDimensionExclusions = new HashSet<>();
+ additionalDimensionExclusions.addAll(inputFieldNames);
+ additionalDimensionExclusions.addAll(outputFieldNames);
+ additionalDimensionExclusions.removeAll(dimensionsSpec.getDimensionNames());
- return dimensionsSpec.withDimensionExclusions(Sets.difference(dimensionExclusions, dimSet));
+ return dimensionsSpec.withDimensionExclusions(additionalDimensionExclusions);
}
@JsonProperty
diff --git a/server/src/test/java/org/apache/druid/segment/indexing/DataSchemaTest.java b/server/src/test/java/org/apache/druid/segment/indexing/DataSchemaTest.java
index 13bf27d2c178..67bed5084b5f 100644
--- a/server/src/test/java/org/apache/druid/segment/indexing/DataSchemaTest.java
+++ b/server/src/test/java/org/apache/druid/segment/indexing/DataSchemaTest.java
@@ -97,7 +97,7 @@ public void testDefaultExclusions()
);
Assert.assertEquals(
- ImmutableSet.of("time", "col1", "col2", "metric1", "metric2"),
+ ImmutableSet.of("__time", "time", "col1", "col2", "metric1", "metric2"),
schema.getDimensionsSpec().getDimensionExclusions()
);
}
@@ -135,7 +135,7 @@ public void testExplicitInclude()
);
Assert.assertEquals(
- ImmutableSet.of("dimC", "col1", "metric1", "metric2"),
+ ImmutableSet.of("__time", "dimC", "col1", "metric1", "metric2"),
schema.getParser().getParseSpec().getDimensionsSpec().getDimensionExclusions()
);
}
@@ -405,7 +405,7 @@ public void testSerde() throws Exception
actual.getParser().getParseSpec(),
new JSONParseSpec(
new TimestampSpec("xXx", null, null),
- new DimensionsSpec(null, Arrays.asList("metric1", "xXx", "col1"), null),
+ new DimensionsSpec(null, Arrays.asList("__time", "metric1", "xXx", "col1"), null),
null,
null,
null
From 76ccfd3f65ef65d0f14d694581d7231c7246fa1b Mon Sep 17 00:00:00 2001
From: Gian Merlino
Date: Sat, 5 Dec 2020 16:26:09 -0800
Subject: [PATCH 17/24] Fixes based on tests.
Also, add TIME_COLUMN constant in the web console.
---
.../indexing/input/DruidInputSource.java | 5 ++---
.../duty/ITAutoCompactionTest.java | 4 ++--
.../src/druid-models/timestamp-spec.tsx | 11 +++++++++--
web-console/src/utils/sampler.ts | 18 +++++++++++-------
.../views/load-data-view/load-data-view.tsx | 19 +++++++++++--------
5 files changed, 35 insertions(+), 22 deletions(-)
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/input/DruidInputSource.java b/indexing-service/src/main/java/org/apache/druid/indexing/input/DruidInputSource.java
index ff78d79683f6..f01f46dde965 100644
--- a/indexing-service/src/main/java/org/apache/druid/indexing/input/DruidInputSource.java
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/input/DruidInputSource.java
@@ -26,7 +26,7 @@
import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.base.Preconditions;
import com.google.common.collect.FluentIterable;
-import com.google.common.collect.ImmutableSortedSet;
+import com.google.common.collect.ImmutableList;
import com.google.common.collect.Iterators;
import org.apache.druid.client.coordinator.CoordinatorClient;
import org.apache.druid.data.input.AbstractInputSource;
@@ -72,7 +72,6 @@
import java.util.Iterator;
import java.util.List;
import java.util.Map;
-import java.util.Set;
import java.util.SortedMap;
import java.util.TreeMap;
import java.util.concurrent.ThreadLocalRandom;
@@ -90,7 +89,7 @@ public class DruidInputSource extends AbstractInputSource implements SplittableI
/**
* Timestamp formats that the standard __time column can be parsed with.
*/
- private static final Set STANDARD_TIME_COLUMN_FORMATS = ImmutableSortedSet.of("auto", "millis");
+ private static final List STANDARD_TIME_COLUMN_FORMATS = ImmutableList.of("millis", "auto");
private final String dataSource;
// Exactly one of interval and segmentIds should be non-null. Typically 'interval' is specified directly
diff --git a/integration-tests/src/test/java/org/apache/druid/tests/coordinator/duty/ITAutoCompactionTest.java b/integration-tests/src/test/java/org/apache/druid/tests/coordinator/duty/ITAutoCompactionTest.java
index 5d1d55ba7b63..5fbe36cef2ff 100644
--- a/integration-tests/src/test/java/org/apache/druid/tests/coordinator/duty/ITAutoCompactionTest.java
+++ b/integration-tests/src/test/java/org/apache/druid/tests/coordinator/duty/ITAutoCompactionTest.java
@@ -121,7 +121,7 @@ public void testAutoCompactionDutySubmitAndVerifyCompaction() throws Exception
fullDatasourceName,
AutoCompactionSnapshot.AutoCompactionScheduleStatus.RUNNING,
0,
- 22489,
+ 22488,
0,
0,
3,
@@ -267,7 +267,7 @@ public void testAutoCompactionDutyCanUpdateTaskSlots() throws Exception
fullDatasourceName,
AutoCompactionSnapshot.AutoCompactionScheduleStatus.RUNNING,
0,
- 22489,
+ 22488,
0,
0,
3,
diff --git a/web-console/src/druid-models/timestamp-spec.tsx b/web-console/src/druid-models/timestamp-spec.tsx
index b6c595b17dab..f6a8263998fa 100644
--- a/web-console/src/druid-models/timestamp-spec.tsx
+++ b/web-console/src/druid-models/timestamp-spec.tsx
@@ -32,11 +32,18 @@ import { Transform } from './transform-spec';
const NO_SUCH_COLUMN = '!!!_no_such_column_!!!';
+export const TIME_COLUMN = '__time';
+
export const PLACEHOLDER_TIMESTAMP_SPEC: TimestampSpec = {
column: NO_SUCH_COLUMN,
missingValue: '1970-01-01T00:00:00Z',
};
+export const REINDEX_TIMESTAMP_SPEC: TimestampSpec = {
+ column: TIME_COLUMN,
+ format: 'millis',
+};
+
export const CONSTANT_TIMESTAMP_SPEC: TimestampSpec = {
column: NO_SUCH_COLUMN,
missingValue: '2010-01-01T00:00:00Z',
@@ -48,7 +55,7 @@ export function getTimestampSchema(spec: IngestionSpec): TimestampSchema {
const transforms: Transform[] =
deepGet(spec, 'spec.dataSchema.transformSpec.transforms') || EMPTY_ARRAY;
- const timeTransform = transforms.find(transform => transform.name === '__time');
+ const timeTransform = transforms.find(transform => transform.name === TIME_COLUMN);
if (timeTransform) return 'expression';
const timestampSpec = deepGet(spec, 'spec.dataSchema.timestampSpec') || EMPTY_OBJECT;
@@ -74,7 +81,7 @@ export function getTimestampSpecExpressionFromSpec(spec: IngestionSpec): string
const transforms: Transform[] =
deepGet(spec, 'spec.dataSchema.transformSpec.transforms') || EMPTY_ARRAY;
- const timeTransform = transforms.find(transform => transform.name === '__time');
+ const timeTransform = transforms.find(transform => transform.name === TIME_COLUMN);
if (!timeTransform) return;
return timeTransform.expression;
}
diff --git a/web-console/src/utils/sampler.ts b/web-console/src/utils/sampler.ts
index 5f8b97eb34e5..43fe5666df56 100644
--- a/web-console/src/utils/sampler.ts
+++ b/web-console/src/utils/sampler.ts
@@ -27,6 +27,8 @@ import {
isDruidSource,
MetricSpec,
PLACEHOLDER_TIMESTAMP_SPEC,
+ REINDEX_TIMESTAMP_SPEC,
+ TIME_COLUMN,
TimestampSpec,
Transform,
TransformSpec,
@@ -150,13 +152,13 @@ export function headerFromSampleResponse(options: HeaderFromSampleResponseOption
let columns = sortWithPrefixSuffix(
dedupe(sampleResponse.data.flatMap(s => (s.parsed ? Object.keys(s.parsed) : []))).sort(),
- columnOrder || ['__time'],
+ columnOrder || [TIME_COLUMN],
suffixColumnOrder || [],
alphanumericCompare,
);
if (ignoreTimeColumn) {
- columns = columns.filter(c => c !== '__time');
+ columns = columns.filter(c => c !== TIME_COLUMN);
}
return columns;
@@ -287,7 +289,7 @@ export async function sampleForConnect(
ioConfig,
dataSchema: {
dataSource: 'sample',
- timestampSpec: PLACEHOLDER_TIMESTAMP_SPEC,
+ timestampSpec: reingestMode ? REINDEX_TIMESTAMP_SPEC : PLACEHOLDER_TIMESTAMP_SPEC,
dimensionsSpec: {},
},
} as any,
@@ -335,13 +337,15 @@ export async function sampleForParser(
sampleStrategy,
);
+ const reingestMode = isDruidSource(spec);
+
const sampleSpec: SampleSpec = {
type: samplerType,
spec: {
ioConfig,
dataSchema: {
dataSource: 'sample',
- timestampSpec: PLACEHOLDER_TIMESTAMP_SPEC,
+ timestampSpec: reingestMode ? REINDEX_TIMESTAMP_SPEC : PLACEHOLDER_TIMESTAMP_SPEC,
dimensionsSpec: {},
},
},
@@ -395,7 +399,7 @@ export async function sampleForTimestamp(
dimensionsSpec: {},
timestampSpec,
transformSpec: {
- transforms: transforms.filter(transform => transform.name === '__time'),
+ transforms: transforms.filter(transform => transform.name === TIME_COLUMN),
},
},
},
@@ -456,7 +460,7 @@ export async function sampleForTransform(
headerFromSampleResponse({
sampleResponse: sampleResponseHack,
ignoreTimeColumn: true,
- columnOrder: ['__time'].concat(inputFormatColumns),
+ columnOrder: [TIME_COLUMN].concat(inputFormatColumns),
}).concat(transforms.map(t => t.name)),
);
}
@@ -515,7 +519,7 @@ export async function sampleForFilter(
headerFromSampleResponse({
sampleResponse: sampleResponseHack,
ignoreTimeColumn: true,
- columnOrder: ['__time'].concat(inputFormatColumns),
+ columnOrder: [TIME_COLUMN].concat(inputFormatColumns),
}).concat(transforms.map(t => t.name)),
);
}
diff --git a/web-console/src/views/load-data-view/load-data-view.tsx b/web-console/src/views/load-data-view/load-data-view.tsx
index a9d6a6274c3c..0cfa0bc881e2 100644
--- a/web-console/src/views/load-data-view/load-data-view.tsx
+++ b/web-console/src/views/load-data-view/load-data-view.tsx
@@ -54,6 +54,7 @@ import {
} from '../../components';
import { FormGroupWithInfo } from '../../components/form-group-with-info/form-group-with-info';
import { AsyncActionDialog } from '../../dialogs';
+import { TIME_COLUMN } from '../../druid-models';
import {
addTimestampTransform,
CONSTANT_TIMESTAMP_SPEC,
@@ -1221,7 +1222,7 @@ export class LoadDataView extends React.PureComponent k !== '__time' && !aggregators[k])
+ .filter(k => k !== TIME_COLUMN && !aggregators[k])
.map(k => ({
name: k,
type: String(inputData.columns![k].type || 'string').toLowerCase(),
@@ -1453,7 +1454,7 @@ export class LoadDataView extends React.PureComponent
Date: Sun, 6 Dec 2020 19:13:12 -0800
Subject: [PATCH 18/24] Adjustments for tests.
---
.../overlord/sampler/InputSourceSampler.java | 6 +--
web-console/e2e-tests/reindexing.spec.ts | 52 +++++++++----------
2 files changed, 29 insertions(+), 29 deletions(-)
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/sampler/InputSourceSampler.java b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/sampler/InputSourceSampler.java
index 94a3e52e82b2..05e31b9a18ee 100644
--- a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/sampler/InputSourceSampler.java
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/sampler/InputSourceSampler.java
@@ -50,7 +50,7 @@
import javax.annotation.Nullable;
import java.io.File;
import java.util.ArrayList;
-import java.util.HashMap;
+import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
@@ -162,10 +162,10 @@ public SamplerResponse sample(
columnNames.remove(SamplerInputRow.SAMPLER_ORDERING_COLUMN);
for (Row row : index) {
- Map parsed = new HashMap<>();
+ Map parsed = new LinkedHashMap<>();
- columnNames.forEach(k -> parsed.put(k, row.getRaw(k)));
parsed.put(ColumnHolder.TIME_COLUMN_NAME, row.getTimestampFromEpoch());
+ columnNames.forEach(k -> parsed.put(k, row.getRaw(k)));
Number sortKey = row.getMetric(SamplerInputRow.SAMPLER_ORDERING_COLUMN);
if (sortKey != null) {
diff --git a/web-console/e2e-tests/reindexing.spec.ts b/web-console/e2e-tests/reindexing.spec.ts
index ae45b735965f..23be54cf74e6 100644
--- a/web-console/e2e-tests/reindexing.spec.ts
+++ b/web-console/e2e-tests/reindexing.spec.ts
@@ -115,50 +115,50 @@ function validateConnectLocalData(preview: string) {
expect(firstLine).toBe(
'Druid row: {' +
'"__time":1442018818771' +
- ',"isRobot":"false"' +
- ',"countryIsoCode":null' +
- ',"added":"36"' +
- ',"regionName":null' +
',"channel":"#en.wikipedia"' +
- ',"delta":"36"' +
- ',"isUnpatrolled":"false"' +
- ',"isNew":"false"' +
- ',"isMinor":"false"' +
- ',"isAnonymous":"false"' +
- ',"deleted":"0"' +
',"cityName":null' +
- ',"metroCode":null' +
- ',"namespace":"Talk"' +
',"comment":"added project"' +
+ ',"countryIsoCode":null' +
',"countryName":null' +
+ ',"isAnonymous":"false"' +
+ ',"isMinor":"false"' +
+ ',"isNew":"false"' +
+ ',"isRobot":"false"' +
+ ',"isUnpatrolled":"false"' +
+ ',"metroCode":null' +
+ ',"namespace":"Talk"' +
',"page":"Talk:Oswald Tilghman"' +
- ',"user":"GELongstreet"' +
',"regionIsoCode":null' +
+ ',"regionName":null' +
+ ',"user":"GELongstreet"' +
+ ',"added":"36"' +
+ ',"deleted":"0"' +
+ ',"delta":"36"' +
'}',
);
const lastLine = lines[lines.length - 1];
expect(lastLine).toBe(
'Druid row: {' +
'"__time":1442020314823' +
- ',"isRobot":"false"' +
- ',"countryIsoCode":null' +
- ',"added":"1"' +
- ',"regionName":null' +
',"channel":"#en.wikipedia"' +
- ',"delta":"1"' +
- ',"isUnpatrolled":"false"' +
- ',"isNew":"false"' +
- ',"isMinor":"true"' +
- ',"isAnonymous":"false"' +
- ',"deleted":"0"' +
',"cityName":null' +
- ',"metroCode":null' +
- ',"namespace":"Main"' +
',"comment":"/* History */[[WP:AWB/T|Typo fixing]], [[WP:AWB/T|typo(s) fixed]]: nothern → northern using [[Project:AWB|AWB]]"' +
+ ',"countryIsoCode":null' +
',"countryName":null' +
+ ',"isAnonymous":"false"' +
+ ',"isMinor":"true"' +
+ ',"isNew":"false"' +
+ ',"isRobot":"false"' +
+ ',"isUnpatrolled":"false"' +
+ ',"metroCode":null' +
+ ',"namespace":"Main"' +
',"page":"Hapoel Katamon Jerusalem F.C."' +
- ',"user":"The Quixotic Potato"' +
',"regionIsoCode":null' +
+ ',"regionName":null' +
+ ',"user":"The Quixotic Potato"' +
+ ',"added":"1"' +
+ ',"deleted":"0"' +
+ ',"delta":"1"' +
'}',
);
}
From 7695d8a5e67ad7a8a8414805b7ff389ad93724cd Mon Sep 17 00:00:00 2001
From: Gian Merlino
Date: Sun, 6 Dec 2020 21:15:47 -0800
Subject: [PATCH 19/24] Reorder test data.
---
web-console/e2e-tests/reindexing.spec.ts | 24 ++++++++++++------------
1 file changed, 12 insertions(+), 12 deletions(-)
diff --git a/web-console/e2e-tests/reindexing.spec.ts b/web-console/e2e-tests/reindexing.spec.ts
index 23be54cf74e6..6c6f68f6f5cc 100644
--- a/web-console/e2e-tests/reindexing.spec.ts
+++ b/web-console/e2e-tests/reindexing.spec.ts
@@ -116,24 +116,24 @@ function validateConnectLocalData(preview: string) {
'Druid row: {' +
'"__time":1442018818771' +
',"channel":"#en.wikipedia"' +
- ',"cityName":null' +
',"comment":"added project"' +
- ',"countryIsoCode":null' +
- ',"countryName":null' +
',"isAnonymous":"false"' +
',"isMinor":"false"' +
',"isNew":"false"' +
',"isRobot":"false"' +
',"isUnpatrolled":"false"' +
- ',"metroCode":null' +
',"namespace":"Talk"' +
',"page":"Talk:Oswald Tilghman"' +
- ',"regionIsoCode":null' +
- ',"regionName":null' +
',"user":"GELongstreet"' +
',"added":"36"' +
',"deleted":"0"' +
',"delta":"36"' +
+ ',"cityName":null' +
+ ',"countryIsoCode":null' +
+ ',"countryName":null' +
+ ',"regionIsoCode":null' +
+ ',"regionName":null' +
+ ',"metroCode":null' +
'}',
);
const lastLine = lines[lines.length - 1];
@@ -141,24 +141,24 @@ function validateConnectLocalData(preview: string) {
'Druid row: {' +
'"__time":1442020314823' +
',"channel":"#en.wikipedia"' +
- ',"cityName":null' +
',"comment":"/* History */[[WP:AWB/T|Typo fixing]], [[WP:AWB/T|typo(s) fixed]]: nothern → northern using [[Project:AWB|AWB]]"' +
- ',"countryIsoCode":null' +
- ',"countryName":null' +
',"isAnonymous":"false"' +
',"isMinor":"true"' +
',"isNew":"false"' +
',"isRobot":"false"' +
',"isUnpatrolled":"false"' +
- ',"metroCode":null' +
',"namespace":"Main"' +
',"page":"Hapoel Katamon Jerusalem F.C."' +
- ',"regionIsoCode":null' +
- ',"regionName":null' +
',"user":"The Quixotic Potato"' +
',"added":"1"' +
',"deleted":"0"' +
',"delta":"1"' +
+ ',"cityName":null' +
+ ',"countryIsoCode":null' +
+ ',"countryName":null' +
+ ',"regionIsoCode":null' +
+ ',"regionName":null' +
+ ',"metroCode":null' +
'}',
);
}
From 02dfb64633adee53d3fef0e7b915ccbf795b51e1 Mon Sep 17 00:00:00 2001
From: Gian Merlino
Date: Mon, 7 Dec 2020 18:28:35 -0800
Subject: [PATCH 20/24] Update docs.
---
docs/configuration/index.md | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/docs/configuration/index.md b/docs/configuration/index.md
index 7b1c4e3db2b6..2812ec73414a 100644
--- a/docs/configuration/index.md
+++ b/docs/configuration/index.md
@@ -1249,7 +1249,7 @@ Additional peon configs include:
|`druid.indexer.task.gracefulShutdownTimeout`|Wait this long on middleManager restart for restorable tasks to gracefully exit.|PT5M|
|`druid.indexer.task.hadoopWorkingPath`|Temporary working directory for Hadoop tasks.|`/tmp/druid-indexing`|
|`druid.indexer.task.restoreTasksOnRestart`|If true, MiddleManagers will attempt to stop tasks gracefully on shutdown and restore them on restart.|false|
-|`druid.indexer.task.ignoreTimestampSpecForDruidInputSource`|If true, tasks using the [Druid input source](../ingestion/native-batch.md#druid-input-source) will ignore the provided timestampSpec, and will use the `__time` column of the input datasource. This option is provided for compatibility with ingestion specs written before Druid 0.20.0.|false|
+|`druid.indexer.task.ignoreTimestampSpecForDruidInputSource`|If true, tasks using the [Druid input source](../ingestion/native-batch.md#druid-input-source) will ignore the provided timestampSpec, and will use the `__time` column of the input datasource. This option is provided for compatibility with ingestion specs written before Druid 0.21.0.|false|
|`druid.indexer.server.maxChatRequests`|Maximum number of concurrent requests served by a task's chat handler. Set to 0 to disable limiting.|0|
If the peon is running in remote mode, there must be an Overlord up and running. Peons in remote mode can set the following configurations:
@@ -1314,7 +1314,7 @@ then the value from the configuration below is used:
|`druid.indexer.task.gracefulShutdownTimeout`|Wait this long on Indexer restart for restorable tasks to gracefully exit.|PT5M|
|`druid.indexer.task.hadoopWorkingPath`|Temporary working directory for Hadoop tasks.|`/tmp/druid-indexing`|
|`druid.indexer.task.restoreTasksOnRestart`|If true, the Indexer will attempt to stop tasks gracefully on shutdown and restore them on restart.|false|
-|`druid.indexer.task.ignoreTimestampSpecForDruidInputSource`|If true, tasks using the [Druid input source](../ingestion/native-batch.md#druid-input-source) will ignore the provided timestampSpec, and will use the `__time` column of the input datasource. This option is provided for compatibility with ingestion specs written before Druid 0.20.0.|false|
+|`druid.indexer.task.ignoreTimestampSpecForDruidInputSource`|If true, tasks using the [Druid input source](../ingestion/native-batch.md#druid-input-source) will ignore the provided timestampSpec, and will use the `__time` column of the input datasource. This option is provided for compatibility with ingestion specs written before Druid 0.21.0.|false|
|`druid.peon.taskActionClient.retry.minWait`|The minimum retry time to communicate with Overlord.|PT5S|
|`druid.peon.taskActionClient.retry.maxWait`|The maximum retry time to communicate with Overlord.|PT1M|
|`druid.peon.taskActionClient.retry.maxRetryCount`|The maximum number of retries to communicate with Overlord.|60|
From 8ce44bd2c43eea9dcfdde9b498bb1416ee32f112 Mon Sep 17 00:00:00 2001
From: Gian Merlino
Date: Fri, 29 Jan 2021 10:30:40 -0800
Subject: [PATCH 21/24] Update docs to say Druid 0.22.0 instead of 0.21.0.
---
docs/configuration/index.md | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/docs/configuration/index.md b/docs/configuration/index.md
index 95159a11c0a9..a0a33b07e2b6 100644
--- a/docs/configuration/index.md
+++ b/docs/configuration/index.md
@@ -1251,7 +1251,7 @@ Additional peon configs include:
|`druid.indexer.task.gracefulShutdownTimeout`|Wait this long on middleManager restart for restorable tasks to gracefully exit.|PT5M|
|`druid.indexer.task.hadoopWorkingPath`|Temporary working directory for Hadoop tasks.|`/tmp/druid-indexing`|
|`druid.indexer.task.restoreTasksOnRestart`|If true, MiddleManagers will attempt to stop tasks gracefully on shutdown and restore them on restart.|false|
-|`druid.indexer.task.ignoreTimestampSpecForDruidInputSource`|If true, tasks using the [Druid input source](../ingestion/native-batch.md#druid-input-source) will ignore the provided timestampSpec, and will use the `__time` column of the input datasource. This option is provided for compatibility with ingestion specs written before Druid 0.21.0.|false|
+|`druid.indexer.task.ignoreTimestampSpecForDruidInputSource`|If true, tasks using the [Druid input source](../ingestion/native-batch.md#druid-input-source) will ignore the provided timestampSpec, and will use the `__time` column of the input datasource. This option is provided for compatibility with ingestion specs written before Druid 0.22.0.|false|
|`druid.indexer.server.maxChatRequests`|Maximum number of concurrent requests served by a task's chat handler. Set to 0 to disable limiting.|0|
If the peon is running in remote mode, there must be an Overlord up and running. Peons in remote mode can set the following configurations:
@@ -1316,7 +1316,7 @@ then the value from the configuration below is used:
|`druid.indexer.task.gracefulShutdownTimeout`|Wait this long on Indexer restart for restorable tasks to gracefully exit.|PT5M|
|`druid.indexer.task.hadoopWorkingPath`|Temporary working directory for Hadoop tasks.|`/tmp/druid-indexing`|
|`druid.indexer.task.restoreTasksOnRestart`|If true, the Indexer will attempt to stop tasks gracefully on shutdown and restore them on restart.|false|
-|`druid.indexer.task.ignoreTimestampSpecForDruidInputSource`|If true, tasks using the [Druid input source](../ingestion/native-batch.md#druid-input-source) will ignore the provided timestampSpec, and will use the `__time` column of the input datasource. This option is provided for compatibility with ingestion specs written before Druid 0.21.0.|false|
+|`druid.indexer.task.ignoreTimestampSpecForDruidInputSource`|If true, tasks using the [Druid input source](../ingestion/native-batch.md#druid-input-source) will ignore the provided timestampSpec, and will use the `__time` column of the input datasource. This option is provided for compatibility with ingestion specs written before Druid 0.22.0.|false|
|`druid.peon.taskActionClient.retry.minWait`|The minimum retry time to communicate with Overlord.|PT5S|
|`druid.peon.taskActionClient.retry.maxWait`|The maximum retry time to communicate with Overlord.|PT1M|
|`druid.peon.taskActionClient.retry.maxRetryCount`|The maximum number of retries to communicate with Overlord.|60|
From ade207e73bbc5410f3c7fa577e1fabce4d82000a Mon Sep 17 00:00:00 2001
From: Gian Merlino
Date: Thu, 25 Feb 2021 08:54:55 -0800
Subject: [PATCH 22/24] Fix test.
---
.../apache/druid/indexing/input/DruidSegmentReaderTest.java | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/input/DruidSegmentReaderTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/input/DruidSegmentReaderTest.java
index 3a01ec939fc3..9270f5f8a573 100644
--- a/indexing-service/src/test/java/org/apache/druid/indexing/input/DruidSegmentReaderTest.java
+++ b/indexing-service/src/test/java/org/apache/druid/indexing/input/DruidSegmentReaderTest.java
@@ -47,6 +47,7 @@
import org.apache.druid.segment.IndexIO;
import org.apache.druid.segment.IndexSpec;
import org.apache.druid.segment.Segment;
+import org.apache.druid.segment.SegmentLazyLoadFailCallback;
import org.apache.druid.segment.TestHelper;
import org.apache.druid.segment.incremental.IncrementalIndex;
import org.apache.druid.segment.incremental.IncrementalIndexSchema;
@@ -597,7 +598,7 @@ public boolean isSegmentLoaded(DataSegment segment)
}
@Override
- public Segment getSegment(DataSegment segment, boolean lazy)
+ public Segment getSegment(DataSegment segment, boolean lazy, SegmentLazyLoadFailCallback loadFailed)
{
throw new UnsupportedOperationException("unused");
}
From 8fb44d7cd1e7f7f19ada2357baf18ae773a2d454 Mon Sep 17 00:00:00 2001
From: Gian Merlino
Date: Thu, 25 Feb 2021 13:05:20 -0800
Subject: [PATCH 23/24] Fix ITAutoCompactionTest.
---
.../druid/tests/coordinator/duty/ITAutoCompactionTest.java | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/integration-tests/src/test/java/org/apache/druid/tests/coordinator/duty/ITAutoCompactionTest.java b/integration-tests/src/test/java/org/apache/druid/tests/coordinator/duty/ITAutoCompactionTest.java
index a68e09cd7796..cdf7c396c70d 100644
--- a/integration-tests/src/test/java/org/apache/druid/tests/coordinator/duty/ITAutoCompactionTest.java
+++ b/integration-tests/src/test/java/org/apache/druid/tests/coordinator/duty/ITAutoCompactionTest.java
@@ -129,7 +129,7 @@ public void testAutoCompactionDutySubmitAndVerifyCompaction() throws Exception
fullDatasourceName,
AutoCompactionSnapshot.AutoCompactionScheduleStatus.RUNNING,
0,
- 22488,
+ 22481,
0,
0,
3,
@@ -275,7 +275,7 @@ public void testAutoCompactionDutyCanUpdateTaskSlots() throws Exception
fullDatasourceName,
AutoCompactionSnapshot.AutoCompactionScheduleStatus.RUNNING,
0,
- 22488,
+ 22481,
0,
0,
3,
From 9bc0481e42ebcdc31f466871fea408b114da6a33 Mon Sep 17 00:00:00 2001
From: Gian Merlino
Date: Wed, 24 Mar 2021 18:30:10 -0700
Subject: [PATCH 24/24] Changes from review & from merging.
---
.../druid/data/input/impl/JsonReaderTest.java | 2 +-
docs/ingestion/native-batch.md | 5 +-
.../indexing/input/DruidInputSource.java | 45 +++-
.../indexing/input/DruidSegmentReader.java | 21 +-
.../indexing/input/DruidInputSourceTest.java | 224 ++++++++++++++++++
.../seekablestream/StreamChunkParserTest.java | 4 +-
6 files changed, 276 insertions(+), 25 deletions(-)
create mode 100644 indexing-service/src/test/java/org/apache/druid/indexing/input/DruidInputSourceTest.java
diff --git a/core/src/test/java/org/apache/druid/data/input/impl/JsonReaderTest.java b/core/src/test/java/org/apache/druid/data/input/impl/JsonReaderTest.java
index 7e5e71b672a1..7ab52a095d51 100644
--- a/core/src/test/java/org/apache/druid/data/input/impl/JsonReaderTest.java
+++ b/core/src/test/java/org/apache/druid/data/input/impl/JsonReaderTest.java
@@ -379,7 +379,7 @@ public void testEmptyJSONText() throws IOException
new InputRowSchema(
new TimestampSpec("timestamp", "iso", null),
new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("bar", "foo"))),
- Collections.emptyList()
+ ColumnsFilter.all()
),
source,
null
diff --git a/docs/ingestion/native-batch.md b/docs/ingestion/native-batch.md
index b4c7ff302e65..dece5bf260c4 100644
--- a/docs/ingestion/native-batch.md
+++ b/docs/ingestion/native-batch.md
@@ -1308,6 +1308,8 @@ and the format to `auto` or `millis`.
It is OK for the input and output datasources to be the same. In this case, newly generated data will overwrite the
previous data for the intervals specified in the `granularitySpec`. Generally, if you are going to do this, it is a
good idea to test out your reindexing by writing to a separate datasource before overwriting your main one.
+Alternatively, if your goals can be satisfied by [compaction](compaction.md), consider that instead as a simpler
+approach.
An example task spec is shown below. It reads from a hypothetical raw datasource `wikipedia_raw` and creates a new
rolled-up datasource `wikipedia_rollup` by grouping on hour, "countryName", and "page".
@@ -1353,8 +1355,7 @@ rolled-up datasource `wikipedia_rollup` by grouping on hour, "countryName", and
"tuningConfig": {
"type": "index_parallel",
"partitionsSpec": {
- "type": "hashed",
- "numShards": 1
+ "type": "hashed"
},
"forceGuaranteedRollup": true,
"maxNumConcurrentSubTasks": 1
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/input/DruidInputSource.java b/indexing-service/src/main/java/org/apache/druid/indexing/input/DruidInputSource.java
index bf08c42f785d..c9d0f4e464b2 100644
--- a/indexing-service/src/main/java/org/apache/druid/indexing/input/DruidInputSource.java
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/input/DruidInputSource.java
@@ -22,7 +22,6 @@
import com.fasterxml.jackson.annotation.JacksonInject;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonInclude;
-import com.fasterxml.jackson.annotation.JsonInclude.Include;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.base.Preconditions;
import com.google.common.collect.FluentIterable;
@@ -73,6 +72,7 @@
import java.util.Iterator;
import java.util.List;
import java.util.Map;
+import java.util.Objects;
import java.util.SortedMap;
import java.util.TreeMap;
import java.util.concurrent.ThreadLocalRandom;
@@ -83,6 +83,7 @@
*
* Used internally by {@link org.apache.druid.indexing.common.task.CompactionTask}, and can also be used directly.
*/
+@JsonInclude(JsonInclude.Include.NON_NULL)
public class DruidInputSource extends AbstractInputSource implements SplittableInputSource>
{
private static final Logger LOG = new Logger(DruidInputSource.class);
@@ -184,7 +185,6 @@ public String getDataSource()
@Nullable
@JsonProperty
- @JsonInclude(Include.NON_NULL)
public Interval getInterval()
{
return interval;
@@ -192,14 +192,12 @@ public Interval getInterval()
@Nullable
@JsonProperty("segments")
- @JsonInclude(Include.NON_NULL)
public List getSegmentIds()
{
return segmentIds;
}
@JsonProperty("filter")
- @JsonInclude(Include.NON_NULL)
public DimFilter getDimFilter()
{
return dimFilter;
@@ -209,7 +207,6 @@ public DimFilter getDimFilter()
* Included for serde backwards-compatibility only. Not used.
*/
@JsonProperty
- @JsonInclude(Include.NON_NULL)
public List getDimensions()
{
return dimensions;
@@ -219,7 +216,6 @@ public List getDimensions()
* Included for serde backwards-compatibility only. Not used.
*/
@JsonProperty
- @JsonInclude(Include.NON_NULL)
public List getMetrics()
{
return metrics;
@@ -355,6 +351,43 @@ public boolean needsFormat()
return false;
}
+ @Override
+ public boolean equals(Object o)
+ {
+ if (this == o) {
+ return true;
+ }
+ if (o == null || getClass() != o.getClass()) {
+ return false;
+ }
+ DruidInputSource that = (DruidInputSource) o;
+ return Objects.equals(dataSource, that.dataSource)
+ && Objects.equals(interval, that.interval)
+ && Objects.equals(segmentIds, that.segmentIds)
+ && Objects.equals(dimFilter, that.dimFilter)
+ && Objects.equals(dimensions, that.dimensions)
+ && Objects.equals(metrics, that.metrics);
+ }
+
+ @Override
+ public int hashCode()
+ {
+ return Objects.hash(dataSource, interval, segmentIds, dimFilter, dimensions, metrics);
+ }
+
+ @Override
+ public String toString()
+ {
+ return "DruidInputSource{" +
+ "dataSource='" + dataSource + '\'' +
+ ", interval=" + interval +
+ ", segmentIds=" + segmentIds +
+ ", dimFilter=" + dimFilter +
+ (dimensions != null ? ", dimensions=" + dimensions : "") +
+ (metrics != null ? ", metrics=" + metrics : "") +
+ '}';
+ }
+
public static Iterator>> createSplits(
CoordinatorClient coordinatorClient,
RetryPolicyFactory retryPolicyFactory,
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/input/DruidSegmentReader.java b/indexing-service/src/main/java/org/apache/druid/indexing/input/DruidSegmentReader.java
index c1ddc26eff77..8e3bfe7108a2 100644
--- a/indexing-service/src/main/java/org/apache/druid/indexing/input/DruidSegmentReader.java
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/input/DruidSegmentReader.java
@@ -75,9 +75,8 @@ public class DruidSegmentReader extends IntermediateRowParsingReader> intermediateRowIterator() throw
@Override
protected List parseInputRows(Map intermediateRow) throws ParseException
{
- return Collections.singletonList(
- MapInputRowParser.parse(
- new InputRowSchema(
- timestampSpec,
- dimensionsSpec,
- columnsFilter
- ),
- intermediateRow
- )
- );
+ return Collections.singletonList(MapInputRowParser.parse(inputRowSchema, intermediateRow));
}
@Override
diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/input/DruidInputSourceTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/input/DruidInputSourceTest.java
new file mode 100644
index 000000000000..dcdc537e9cd8
--- /dev/null
+++ b/indexing-service/src/test/java/org/apache/druid/indexing/input/DruidInputSourceTest.java
@@ -0,0 +1,224 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.indexing.input;
+
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.InjectableValues;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.google.common.collect.ImmutableList;
+import org.apache.druid.client.coordinator.CoordinatorClient;
+import org.apache.druid.data.input.InputSource;
+import org.apache.druid.guice.IndexingServiceInputSourceModule;
+import org.apache.druid.indexing.common.RetryPolicyFactory;
+import org.apache.druid.indexing.common.SegmentLoaderFactory;
+import org.apache.druid.indexing.common.config.TaskConfig;
+import org.apache.druid.indexing.firehose.WindowedSegmentId;
+import org.apache.druid.java.util.common.Intervals;
+import org.apache.druid.segment.IndexIO;
+import org.apache.druid.segment.TestHelper;
+import org.easymock.EasyMock;
+import org.hamcrest.CoreMatchers;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+
+public class DruidInputSourceTest
+{
+ private final IndexIO indexIO = EasyMock.createMock(IndexIO.class);
+ private final CoordinatorClient coordinatorClient = EasyMock.createMock(CoordinatorClient.class);
+ private final SegmentLoaderFactory segmentLoaderFactory = EasyMock.createMock(SegmentLoaderFactory.class);
+ private final RetryPolicyFactory retryPolicyFactory = EasyMock.createMock(RetryPolicyFactory.class);
+ private final TaskConfig taskConfig = EasyMock.createMock(TaskConfig.class);
+
+ private ObjectMapper mapper = null;
+
+ @Rule
+ public ExpectedException expectedException = ExpectedException.none();
+
+ @Before
+ public void setUp()
+ {
+ mapper = TestHelper.makeJsonMapper();
+ mapper.registerModules(new IndexingServiceInputSourceModule().getJacksonModules());
+
+ final InjectableValues.Std injectableValues = (InjectableValues.Std) mapper.getInjectableValues();
+ injectableValues.addValue(IndexIO.class, indexIO);
+ injectableValues.addValue(CoordinatorClient.class, coordinatorClient);
+ injectableValues.addValue(SegmentLoaderFactory.class, segmentLoaderFactory);
+ injectableValues.addValue(RetryPolicyFactory.class, retryPolicyFactory);
+ injectableValues.addValue(TaskConfig.class, taskConfig);
+ }
+
+ @Test
+ public void testSerdeUsingIntervals() throws Exception
+ {
+ final String json = "{"
+ + "\"type\":\"druid\","
+ + "\"dataSource\":\"foo\","
+ + "\"interval\":\"2000-01-01T00:00:00.000Z/2001-01-01T00:00:00.000Z\""
+ + "}";
+
+ final InputSource inputSource = mapper.readValue(json, InputSource.class);
+
+ Assert.assertThat(inputSource, CoreMatchers.instanceOf(DruidInputSource.class));
+ Assert.assertEquals(
+ new DruidInputSource(
+ "foo",
+ Intervals.of("2000/2001"),
+ null,
+ null,
+ null,
+ null,
+ indexIO,
+ coordinatorClient,
+ segmentLoaderFactory,
+ retryPolicyFactory,
+ taskConfig
+ ),
+ inputSource
+ );
+
+ Assert.assertEquals(json, mapper.writeValueAsString(inputSource));
+ }
+
+ @Test
+ public void testSerdeUsingIntervalsAndLegacyDimensionsMetrics() throws Exception
+ {
+ final String json = "{"
+ + "\"type\":\"druid\","
+ + "\"dataSource\":\"foo\","
+ + "\"interval\":\"2000-01-01T00:00:00.000Z/2001-01-01T00:00:00.000Z\","
+ + "\"dimensions\":[\"a\"],"
+ + "\"metrics\":[\"b\"]"
+ + "}";
+
+ final InputSource inputSource = mapper.readValue(json, InputSource.class);
+
+ Assert.assertThat(inputSource, CoreMatchers.instanceOf(DruidInputSource.class));
+ Assert.assertEquals(
+ new DruidInputSource(
+ "foo",
+ Intervals.of("2000/2001"),
+ null,
+ null,
+ ImmutableList.of("a"),
+ ImmutableList.of("b"),
+ indexIO,
+ coordinatorClient,
+ segmentLoaderFactory,
+ retryPolicyFactory,
+ taskConfig
+ ),
+ inputSource
+ );
+
+ Assert.assertEquals(json, mapper.writeValueAsString(inputSource));
+ }
+
+ @Test
+ public void testSerdeUsingSegments() throws Exception
+ {
+ final String json = "{"
+ + "\"type\":\"druid\","
+ + "\"dataSource\":\"foo\","
+ + "\"segments\":["
+ + "{\"segmentId\":\"foo_2000-01-01T00:00:00.000Z_2000-01-01T01:00:00.000Z_abc123\","
+ + "\"intervals\":[\"2000-01-01T00:00:00.000Z/2000-01-01T12:00:00.000Z\"]}"
+ + "]"
+ + "}";
+
+ final InputSource inputSource = mapper.readValue(json, InputSource.class);
+
+ Assert.assertThat(inputSource, CoreMatchers.instanceOf(DruidInputSource.class));
+ Assert.assertEquals(
+ new DruidInputSource(
+ "foo",
+ null,
+ ImmutableList.of(
+ new WindowedSegmentId(
+ "foo_2000-01-01T00:00:00.000Z_2000-01-01T01:00:00.000Z_abc123",
+ ImmutableList.of(Intervals.of("2000-01-01T00/2000-01-01T12"))
+ )
+ ),
+ null,
+ null,
+ null,
+ indexIO,
+ coordinatorClient,
+ segmentLoaderFactory,
+ retryPolicyFactory,
+ taskConfig
+ ),
+ inputSource
+ );
+
+ Assert.assertEquals(json, mapper.writeValueAsString(inputSource));
+ }
+
+ @Test
+ public void testSerdeUsingBothIntervalsAndSegments() throws Exception
+ {
+ final String json = "{"
+ + "\"type\":\"druid\","
+ + "\"dataSource\":\"foo\","
+ + "\"interval\":\"2000-01-01T00:00:00.000Z/2001-01-01T00:00:00.000Z\","
+ + "\"segments\":["
+ + " {\"segmentId\":\"foo_2000-01-01T00:00:00.000Z_2000-01-01T01:00:00.000Z_abc123\","
+ + " \"intervals\":[\"2000-01-01T00:00:00.000Z/2000-01-01T12:00:00.000Z\"]}"
+ + "]"
+ + "}";
+
+
+ expectedException.expect(JsonProcessingException.class);
+ expectedException.expectMessage("Specify exactly one of 'interval' and 'segments'");
+
+ mapper.readValue(json, InputSource.class);
+ }
+
+ @Test
+ public void testSerdeUsingNeitherIntervalsNorSegments() throws Exception
+ {
+ final String json = "{"
+ + "\"type\":\"druid\","
+ + "\"dataSource\":\"foo\""
+ + "}";
+
+ expectedException.expect(JsonProcessingException.class);
+ expectedException.expectMessage("Specify exactly one of 'interval' and 'segments'");
+
+ mapper.readValue(json, InputSource.class);
+ }
+
+ @Test
+ public void testSerdeUsingNoDataSource() throws Exception
+ {
+ final String json = "{"
+ + "\"type\":\"druid\","
+ + "\"interval\":\"2000-01-01T00:00:00.000Z/2001-01-01T00:00:00.000Z\""
+ + "}";
+
+ expectedException.expect(JsonProcessingException.class);
+ expectedException.expectMessage("dataSource");
+
+ mapper.readValue(json, InputSource.class);
+ }
+}
diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/seekablestream/StreamChunkParserTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/seekablestream/StreamChunkParserTest.java
index 92af29e17880..1cab704a2eff 100644
--- a/indexing-service/src/test/java/org/apache/druid/indexing/seekablestream/StreamChunkParserTest.java
+++ b/indexing-service/src/test/java/org/apache/druid/indexing/seekablestream/StreamChunkParserTest.java
@@ -180,7 +180,7 @@ public void parseEmptyNotEndOfShard() throws IOException
final StreamChunkParser chunkParser = new StreamChunkParser<>(
null,
inputFormat,
- new InputRowSchema(TIMESTAMP_SPEC, DimensionsSpec.EMPTY, Collections.emptyList()),
+ new InputRowSchema(TIMESTAMP_SPEC, DimensionsSpec.EMPTY, ColumnsFilter.all()),
TransformSpec.NONE,
temporaryFolder.newFolder(),
row -> true,
@@ -203,7 +203,7 @@ public void parseEmptyEndOfShard() throws IOException
final StreamChunkParser chunkParser = new StreamChunkParser<>(
null,
inputFormat,
- new InputRowSchema(TIMESTAMP_SPEC, DimensionsSpec.EMPTY, Collections.emptyList()),
+ new InputRowSchema(TIMESTAMP_SPEC, DimensionsSpec.EMPTY, ColumnsFilter.all()),
TransformSpec.NONE,
temporaryFolder.newFolder(),
row -> true,