From e1188e3a50455fca1f315b1912cbe7c2bd712985 Mon Sep 17 00:00:00 2001 From: "tejaswini.bandlamudi" Date: Tue, 2 Aug 2022 16:08:41 +0530 Subject: [PATCH 01/33] removes FiniteFirehoseFactory and it's implementations, deprecates FirehoseFactory, EventReceiverFirehoseFactory, Firehose --- .../data/input/FiniteFirehoseFactory.java | 72 -- .../org/apache/druid/data/input/Firehose.java | 1 + .../druid/data/input/FirehoseFactory.java | 7 +- .../FirehoseFactoryToInputSourceAdaptor.java | 104 --- .../druid/data/input/SplitHintSpec.java | 2 - .../AbstractTextFilesFirehoseFactory.java | 144 ---- .../data/input/impl/RetryingInputStream.java | 3 +- .../input/impl/prefetch/CacheManager.java | 97 --- .../data/input/impl/prefetch/FetchConfig.java | 96 --- .../data/input/impl/prefetch/FetchedFile.java | 68 -- .../data/input/impl/prefetch/Fetcher.java | 292 -------- .../data/input/impl/prefetch/FileFetcher.java | 111 --- .../data/input/impl/prefetch/OpenObject.java | 71 -- .../PrefetchableTextFilesFirehoseFactory.java | 318 --------- ...rehoseFactoryToInputSourceAdaptorTest.java | 158 ----- .../data/input/impl/NoopFirehoseFactory.java | 54 -- ...fetchableTextFilesFirehoseFactoryTest.java | 640 ------------------ .../aliyun/OssFirehoseDruidModule.java | 46 -- .../aliyun/StaticOssFirehoseFactory.java | 243 ------- ...rg.apache.druid.initialization.DruidModule | 1 - .../firehose/cloudfiles/CloudFilesBlob.java | 101 --- .../CloudFilesFirehoseDruidModule.java | 48 -- .../StaticCloudFilesFirehoseFactory.java | 162 ----- ...rg.apache.druid.initialization.DruidModule | 1 - .../StaticCloudFilesFirehoseFactoryTest.java | 105 --- .../druid/firehose/azure/AzureBlob.java | 93 --- .../StaticAzureBlobStoreFirehoseFactory.java | 167 ----- .../azure/AzureStorageDruidModule.java | 2 - ...aticAzureBlobStoreFirehoseFactoryTest.java | 104 --- .../druid/firehose/google/GoogleBlob.java | 81 --- .../StaticGoogleBlobStoreFirehoseFactory.java | 155 ----- .../google/GoogleStorageDruidModule.java | 2 - ...ticGoogleBlobStoreFirehoseFactoryTest.java | 63 -- .../firehose/hdfs/HdfsFirehoseFactory.java | 127 ---- .../storage/hdfs/HdfsStorageDruidModule.java | 2 - .../hdfs/HdfsFirehoseFactoryTest.java | 218 ------ .../firehose/s3/S3FirehoseDruidModule.java | 48 -- .../firehose/s3/StaticS3FirehoseFactory.java | 236 ------- ...rg.apache.druid.initialization.DruidModule | 1 - .../s3/StaticS3FirehoseFactoryTest.java | 115 ---- .../guice/IndexingServiceFirehoseModule.java | 4 +- .../common/ReingestionTimelineUtils.java | 140 ---- .../common/task/AbstractBatchIndexTask.java | 57 +- .../druid/indexing/common/task/IndexTask.java | 21 +- ...putSourceSplitParallelIndexTaskRunner.java | 14 +- .../parallel/ParallelIndexIngestionSpec.java | 5 +- .../parallel/ParallelIndexSupervisorTask.java | 14 +- .../PartialDimensionCardinalityTask.java | 4 +- .../PartialDimensionDistributionTask.java | 4 +- .../parallel/PartialSegmentGenerateTask.java | 4 +- .../SinglePhaseParallelIndexTaskRunner.java | 15 +- .../batch/parallel/SinglePhaseSubTask.java | 7 +- .../IngestSegmentFirehoseFactory.java | 330 --------- .../sampler/IndexTaskSamplerSpec.java | 35 +- .../SeekableStreamSamplerSpec.java | 161 +---- ...stRealtimeTask.java => TestIndexTask.java} | 54 +- .../common/task/CompactionTaskRunTest.java | 93 --- .../common/task/IndexIngestionSpecTest.java | 55 -- .../indexing/common/task/IndexTaskTest.java | 198 +++--- .../common/task/IngestionTestBase.java | 57 ++ .../indexing/common/task/TaskSerdeTest.java | 18 +- ...bstractMultiPhaseParallelIndexingTest.java | 18 +- ...stractParallelIndexSupervisorTaskTest.java | 6 +- .../parallel/HashPartitionTaskKillTest.java | 8 +- .../ParallelIndexSupervisorTaskSerdeTest.java | 5 - .../SinglePhaseParallelIndexingTest.java | 29 +- .../IngestSegmentFirehoseFactoryTest.java | 615 ----------------- ...estSegmentFirehoseFactoryTimelineTest.java | 453 ------------- .../overlord/RemoteTaskRunnerTest.java | 38 +- .../sampler/InputSourceSamplerTest.java | 39 +- .../indexing/worker/TaskAnnouncementTest.java | 10 +- .../worker/WorkerTaskMonitorTest.java | 4 +- .../apache/druid/guice/FirehoseModule.java | 12 +- .../firehose/CombiningFirehoseFactory.java | 160 ----- .../EventReceiverFirehoseFactory.java | 1 + .../firehose/HttpFirehoseFactory.java | 165 ----- .../firehose/InlineFirehoseFactory.java | 105 --- .../firehose/LocalFirehoseFactory.java | 120 ---- .../firehose/PrefetchSqlFirehoseFactory.java | 280 -------- .../segment/realtime/firehose/SqlFetcher.java | 101 --- .../realtime/firehose/SqlFirehoseFactory.java | 115 ---- .../CombiningFirehoseFactoryTest.java | 174 ----- .../firehose/HttpFirehoseFactoryTest.java | 145 ---- .../firehose/InlineFirehoseFactoryTest.java | 129 ---- .../firehose/LocalFirehoseFactoryTest.java | 105 --- .../firehose/SqlFirehoseFactoryTest.java | 233 ------- .../cli/validate/DruidJsonValidatorTest.java | 52 +- 87 files changed, 339 insertions(+), 8432 deletions(-) delete mode 100644 core/src/main/java/org/apache/druid/data/input/FiniteFirehoseFactory.java delete mode 100644 core/src/main/java/org/apache/druid/data/input/FirehoseFactoryToInputSourceAdaptor.java delete mode 100644 core/src/main/java/org/apache/druid/data/input/impl/AbstractTextFilesFirehoseFactory.java delete mode 100644 core/src/main/java/org/apache/druid/data/input/impl/prefetch/CacheManager.java delete mode 100644 core/src/main/java/org/apache/druid/data/input/impl/prefetch/FetchConfig.java delete mode 100644 core/src/main/java/org/apache/druid/data/input/impl/prefetch/FetchedFile.java delete mode 100644 core/src/main/java/org/apache/druid/data/input/impl/prefetch/Fetcher.java delete mode 100644 core/src/main/java/org/apache/druid/data/input/impl/prefetch/FileFetcher.java delete mode 100644 core/src/main/java/org/apache/druid/data/input/impl/prefetch/OpenObject.java delete mode 100644 core/src/main/java/org/apache/druid/data/input/impl/prefetch/PrefetchableTextFilesFirehoseFactory.java delete mode 100644 core/src/test/java/org/apache/druid/data/input/FirehoseFactoryToInputSourceAdaptorTest.java delete mode 100644 core/src/test/java/org/apache/druid/data/input/impl/NoopFirehoseFactory.java delete mode 100644 core/src/test/java/org/apache/druid/data/input/impl/prefetch/PrefetchableTextFilesFirehoseFactoryTest.java delete mode 100644 extensions-contrib/aliyun-oss-extensions/src/main/java/org/apache/druid/firehose/aliyun/OssFirehoseDruidModule.java delete mode 100644 extensions-contrib/aliyun-oss-extensions/src/main/java/org/apache/druid/firehose/aliyun/StaticOssFirehoseFactory.java delete mode 100644 extensions-contrib/cloudfiles-extensions/src/main/java/org/apache/druid/firehose/cloudfiles/CloudFilesBlob.java delete mode 100644 extensions-contrib/cloudfiles-extensions/src/main/java/org/apache/druid/firehose/cloudfiles/CloudFilesFirehoseDruidModule.java delete mode 100644 extensions-contrib/cloudfiles-extensions/src/main/java/org/apache/druid/firehose/cloudfiles/StaticCloudFilesFirehoseFactory.java delete mode 100644 extensions-contrib/cloudfiles-extensions/src/test/java/org/apache/druid/firehose/cloudfiles/StaticCloudFilesFirehoseFactoryTest.java delete mode 100644 extensions-core/azure-extensions/src/main/java/org/apache/druid/firehose/azure/AzureBlob.java delete mode 100644 extensions-core/azure-extensions/src/main/java/org/apache/druid/firehose/azure/StaticAzureBlobStoreFirehoseFactory.java delete mode 100644 extensions-core/azure-extensions/src/test/java/org/apache/druid/firehose/azure/StaticAzureBlobStoreFirehoseFactoryTest.java delete mode 100644 extensions-core/google-extensions/src/main/java/org/apache/druid/firehose/google/GoogleBlob.java delete mode 100644 extensions-core/google-extensions/src/main/java/org/apache/druid/firehose/google/StaticGoogleBlobStoreFirehoseFactory.java delete mode 100644 extensions-core/google-extensions/src/test/java/org/apache/druid/firehose/google/StaticGoogleBlobStoreFirehoseFactoryTest.java delete mode 100644 extensions-core/hdfs-storage/src/main/java/org/apache/druid/firehose/hdfs/HdfsFirehoseFactory.java delete mode 100644 extensions-core/hdfs-storage/src/test/java/org/apache/druid/firehose/hdfs/HdfsFirehoseFactoryTest.java delete mode 100644 extensions-core/s3-extensions/src/main/java/org/apache/druid/firehose/s3/S3FirehoseDruidModule.java delete mode 100644 extensions-core/s3-extensions/src/main/java/org/apache/druid/firehose/s3/StaticS3FirehoseFactory.java delete mode 100644 extensions-core/s3-extensions/src/test/java/org/apache/druid/firehose/s3/StaticS3FirehoseFactoryTest.java delete mode 100644 indexing-service/src/main/java/org/apache/druid/indexing/common/ReingestionTimelineUtils.java delete mode 100644 indexing-service/src/main/java/org/apache/druid/indexing/firehose/IngestSegmentFirehoseFactory.java rename indexing-service/src/test/java/org/apache/druid/indexing/common/{TestRealtimeTask.java => TestIndexTask.java} (62%) delete mode 100644 indexing-service/src/test/java/org/apache/druid/indexing/firehose/IngestSegmentFirehoseFactoryTest.java delete mode 100644 indexing-service/src/test/java/org/apache/druid/indexing/firehose/IngestSegmentFirehoseFactoryTimelineTest.java delete mode 100644 server/src/main/java/org/apache/druid/segment/realtime/firehose/CombiningFirehoseFactory.java delete mode 100644 server/src/main/java/org/apache/druid/segment/realtime/firehose/HttpFirehoseFactory.java delete mode 100644 server/src/main/java/org/apache/druid/segment/realtime/firehose/InlineFirehoseFactory.java delete mode 100644 server/src/main/java/org/apache/druid/segment/realtime/firehose/LocalFirehoseFactory.java delete mode 100644 server/src/main/java/org/apache/druid/segment/realtime/firehose/PrefetchSqlFirehoseFactory.java delete mode 100644 server/src/main/java/org/apache/druid/segment/realtime/firehose/SqlFetcher.java delete mode 100644 server/src/main/java/org/apache/druid/segment/realtime/firehose/SqlFirehoseFactory.java delete mode 100644 server/src/test/java/org/apache/druid/segment/realtime/firehose/CombiningFirehoseFactoryTest.java delete mode 100644 server/src/test/java/org/apache/druid/segment/realtime/firehose/HttpFirehoseFactoryTest.java delete mode 100644 server/src/test/java/org/apache/druid/segment/realtime/firehose/InlineFirehoseFactoryTest.java delete mode 100644 server/src/test/java/org/apache/druid/segment/realtime/firehose/LocalFirehoseFactoryTest.java delete mode 100644 server/src/test/java/org/apache/druid/segment/realtime/firehose/SqlFirehoseFactoryTest.java diff --git a/core/src/main/java/org/apache/druid/data/input/FiniteFirehoseFactory.java b/core/src/main/java/org/apache/druid/data/input/FiniteFirehoseFactory.java deleted file mode 100644 index d63ac3b69631..000000000000 --- a/core/src/main/java/org/apache/druid/data/input/FiniteFirehoseFactory.java +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.data.input; - -import com.fasterxml.jackson.annotation.JsonIgnore; -import org.apache.druid.data.input.impl.InputRowParser; - -import javax.annotation.Nullable; -import java.io.IOException; -import java.util.stream.Stream; - -/** - * {@link FiniteFirehoseFactory} designed for batch processing. Its implementations assume that the amount of inputs is - * limited. - * - * This class is deprecated in favor of {@link InputSource}. - * - * @param parser type - * @param input split type - */ -@Deprecated -public interface FiniteFirehoseFactory extends FirehoseFactory -{ - /** - * Returns true if this {@link FiniteFirehoseFactory} supports parallel batch indexing. - */ - @JsonIgnore - @Override - default boolean isSplittable() - { - return true; - } - - /** - * Returns a {@link Stream} for {@link InputSplit}s. In parallel batch indexing, each {@link InputSplit} is processed - * by a sub task. - * - * Listing splits may cause high overhead in some implementations. In this case, {@link InputSplit}s should be listed - * lazily so that the listing overhead could be amortized. - */ - @JsonIgnore - Stream> getSplits(@Nullable SplitHintSpec splitHintSpec) throws IOException; - - /** - * Returns number of splits returned by {@link #getSplits}. - */ - @JsonIgnore - int getNumSplits(@Nullable SplitHintSpec splitHintSpec) throws IOException; - - /** - * Returns the same {@link FiniteFirehoseFactory} but with the given {@link InputSplit}. The returned - * {@link FiniteFirehoseFactory} is used by sub tasks in parallel batch indexing. - */ - FiniteFirehoseFactory withSplit(InputSplit split); -} diff --git a/core/src/main/java/org/apache/druid/data/input/Firehose.java b/core/src/main/java/org/apache/druid/data/input/Firehose.java index 66995bda184a..a9ff6f33159c 100644 --- a/core/src/main/java/org/apache/druid/data/input/Firehose.java +++ b/core/src/main/java/org/apache/druid/data/input/Firehose.java @@ -43,6 +43,7 @@ * and {@link #nextRow()}. *

*/ +@Deprecated @ExtensionPoint public interface Firehose extends Closeable { diff --git a/core/src/main/java/org/apache/druid/data/input/FirehoseFactory.java b/core/src/main/java/org/apache/druid/data/input/FirehoseFactory.java index 986d1d94a5e4..a5dcc4891685 100644 --- a/core/src/main/java/org/apache/druid/data/input/FirehoseFactory.java +++ b/core/src/main/java/org/apache/druid/data/input/FirehoseFactory.java @@ -22,7 +22,6 @@ import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonTypeInfo; import org.apache.druid.data.input.impl.InputRowParser; -import org.apache.druid.data.input.impl.prefetch.PrefetchableTextFilesFirehoseFactory; import org.apache.druid.guice.annotations.ExtensionPoint; import org.apache.druid.java.util.common.parsers.ParseException; @@ -34,7 +33,10 @@ * FirehoseFactory creates a {@link Firehose} which is an interface holding onto the stream of incoming data. * It currently provides two methods for creating a {@link Firehose} and their default implementations call each other * for the backward compatibility. Implementations of this interface must implement one of these methods. + * + * This class is deprecated in favor of {@link InputSource} */ +@Deprecated @ExtensionPoint @JsonTypeInfo(use = JsonTypeInfo.Id.NAME, property = "type") public interface FirehoseFactory @@ -62,9 +64,6 @@ default Firehose connect(T parser) throws IOException, ParseException * If this method returns null, then any attempt to call hasMore(), nextRow() and close() on the return * value will throw a surprising NPE. Throwing IOException on connection failure or runtime exception on * invalid configuration is preferred over returning null. - *

- * Some fire hoses like {@link PrefetchableTextFilesFirehoseFactory} may use a temporary - * directory to cache data in it. * * @param parser an input row parser * @param temporaryDirectory a directory where temporary files are stored diff --git a/core/src/main/java/org/apache/druid/data/input/FirehoseFactoryToInputSourceAdaptor.java b/core/src/main/java/org/apache/druid/data/input/FirehoseFactoryToInputSourceAdaptor.java deleted file mode 100644 index 35322c2a0fdf..000000000000 --- a/core/src/main/java/org/apache/druid/data/input/FirehoseFactoryToInputSourceAdaptor.java +++ /dev/null @@ -1,104 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.data.input; - -import com.google.common.base.Preconditions; -import org.apache.druid.data.input.impl.FirehoseToInputSourceReaderAdaptor; -import org.apache.druid.data.input.impl.InputRowParser; -import org.apache.druid.data.input.impl.SplittableInputSource; - -import javax.annotation.Nullable; -import java.io.File; -import java.io.IOException; -import java.util.stream.Stream; - -public class FirehoseFactoryToInputSourceAdaptor extends AbstractInputSource implements SplittableInputSource -{ - private final FiniteFirehoseFactory firehoseFactory; - private final InputRowParser inputRowParser; - - public FirehoseFactoryToInputSourceAdaptor(FiniteFirehoseFactory firehoseFactory, InputRowParser inputRowParser) - { - this.firehoseFactory = firehoseFactory; - this.inputRowParser = Preconditions.checkNotNull(inputRowParser, "inputRowParser"); - } - - public FiniteFirehoseFactory getFirehoseFactory() - { - return firehoseFactory; - } - - public InputRowParser getInputRowParser() - { - return inputRowParser; - } - - @Override - public boolean isSplittable() - { - return firehoseFactory.isSplittable(); - } - - @Override - public Stream createSplits(InputFormat inputFormat, @Nullable SplitHintSpec splitHintSpec) - throws IOException - { - if (firehoseFactory.isSplittable()) { - return firehoseFactory.getSplits(splitHintSpec); - } else { - throw new UnsupportedOperationException(); - } - } - - @Override - public int estimateNumSplits(InputFormat inputFormat, @Nullable SplitHintSpec splitHintSpec) throws IOException - { - if (firehoseFactory.isSplittable()) { - return firehoseFactory.getNumSplits(splitHintSpec); - } else { - throw new UnsupportedOperationException(); - } - } - - @Override - public SplittableInputSource withSplit(InputSplit split) - { - if (firehoseFactory.isSplittable()) { - return new FirehoseFactoryToInputSourceAdaptor( - firehoseFactory.withSplit(split), - inputRowParser - ); - } else { - throw new UnsupportedOperationException(); - } - } - - @Override - public boolean needsFormat() - { - return false; - } - - @Override - protected InputSourceReader fixedFormatReader(InputRowSchema inputRowSchema, @Nullable File temporaryDirectory) - { - return new FirehoseToInputSourceReaderAdaptor(firehoseFactory, inputRowParser, temporaryDirectory); - } -} diff --git a/core/src/main/java/org/apache/druid/data/input/SplitHintSpec.java b/core/src/main/java/org/apache/druid/data/input/SplitHintSpec.java index 3c40ff840a77..100dfc8443e3 100644 --- a/core/src/main/java/org/apache/druid/data/input/SplitHintSpec.java +++ b/core/src/main/java/org/apache/druid/data/input/SplitHintSpec.java @@ -34,8 +34,6 @@ * want to give some hints to control the amount of data each sub task will read. SplitHintSpec can be used for this * purpose. Implementations can ignore the given hint. * - * @see FiniteFirehoseFactory#getSplits(SplitHintSpec) - * @see FiniteFirehoseFactory#getNumSplits(SplitHintSpec) * @see SplittableInputSource#createSplits * @see SplittableInputSource#estimateNumSplits */ diff --git a/core/src/main/java/org/apache/druid/data/input/impl/AbstractTextFilesFirehoseFactory.java b/core/src/main/java/org/apache/druid/data/input/impl/AbstractTextFilesFirehoseFactory.java deleted file mode 100644 index 592d014a6dc0..000000000000 --- a/core/src/main/java/org/apache/druid/data/input/impl/AbstractTextFilesFirehoseFactory.java +++ /dev/null @@ -1,144 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.data.input.impl; - -import com.google.common.base.Preconditions; -import com.google.common.collect.ImmutableList; -import org.apache.commons.io.IOUtils; -import org.apache.commons.io.LineIterator; -import org.apache.druid.data.input.FiniteFirehoseFactory; -import org.apache.druid.data.input.Firehose; -import org.apache.druid.data.input.InputSplit; -import org.apache.druid.data.input.SplitHintSpec; -import org.apache.druid.java.util.common.logger.Logger; - -import javax.annotation.Nullable; -import java.io.File; -import java.io.IOException; -import java.io.InputStream; -import java.nio.charset.StandardCharsets; -import java.util.Collection; -import java.util.Iterator; -import java.util.List; -import java.util.NoSuchElementException; -import java.util.stream.Stream; - -/** - * This is an abstract class for firehose factory for making firehoses reading text files. - * It provides an unified {@link #connect(StringInputRowParser, File)} implementation for its subclasses. - * - * @param object type representing input data - */ -public abstract class AbstractTextFilesFirehoseFactory - implements FiniteFirehoseFactory -{ - private static final Logger LOG = new Logger(AbstractTextFilesFirehoseFactory.class); - - private List objects; - - @Override - public Firehose connect(StringInputRowParser firehoseParser, File temporaryDirectory) throws IOException - { - initializeObjectsIfNeeded(); - final Iterator iterator = objects.iterator(); - return new FileIteratingFirehose( - new Iterator() - { - @Override - public boolean hasNext() - { - return iterator.hasNext(); - } - - @Override - public LineIterator next() - { - if (!hasNext()) { - throw new NoSuchElementException(); - } - final T object = iterator.next(); - try { - return IOUtils.lineIterator(wrapObjectStream(object, openObjectStream(object)), StandardCharsets.UTF_8); - } - catch (Exception e) { - LOG.error(e, "Exception reading object[%s]", object); - throw new RuntimeException(e); - } - } - }, - firehoseParser - ); - } - - protected void initializeObjectsIfNeeded() throws IOException - { - if (objects == null) { - objects = ImmutableList.copyOf(Preconditions.checkNotNull(initObjects(), "initObjects")); - } - } - - public List getObjects() - { - return objects; - } - - @Override - public Stream> getSplits(@Nullable SplitHintSpec splitHintSpec) throws IOException - { - initializeObjectsIfNeeded(); - return getObjects().stream().map(InputSplit::new); - } - - @Override - public int getNumSplits(@Nullable SplitHintSpec splitHintSpec) throws IOException - { - initializeObjectsIfNeeded(); - return getObjects().size(); - } - - /** - * Initialize objects to be read by this firehose. Since firehose factories are constructed whenever - * org.apache.druid.indexing.common.task.Task objects are deserialized, actual initialization of objects is deferred - * until {@link #connect(StringInputRowParser, File)} is called. - * - * @return a collection of initialized objects. - */ - protected abstract Collection initObjects() throws IOException; - - /** - * Open an input stream from the given object. If the object is compressed, this method should return a byte stream - * as it is compressed. The object compression should be handled in {@link #wrapObjectStream(Object, InputStream)}. - * - * @param object an object to be read - * - * @return an input stream for the object - */ - protected abstract InputStream openObjectStream(T object) throws IOException; - - /** - * Wrap the given input stream if needed. The decompression logic should be applied to the given stream if the object - * is compressed. - * - * @param object an input object - * @param stream a stream for the object - * @return an wrapped input stream - */ - protected abstract InputStream wrapObjectStream(T object, InputStream stream) throws IOException; -} diff --git a/core/src/main/java/org/apache/druid/data/input/impl/RetryingInputStream.java b/core/src/main/java/org/apache/druid/data/input/impl/RetryingInputStream.java index f4693fc81546..d517d5fcb1b7 100644 --- a/core/src/main/java/org/apache/druid/data/input/impl/RetryingInputStream.java +++ b/core/src/main/java/org/apache/druid/data/input/impl/RetryingInputStream.java @@ -24,7 +24,6 @@ import com.google.common.base.Predicate; import com.google.common.base.Throwables; import com.google.common.io.CountingInputStream; -import org.apache.druid.data.input.impl.prefetch.Fetcher; import org.apache.druid.data.input.impl.prefetch.ObjectOpenFunction; import org.apache.druid.java.util.common.IAE; import org.apache.druid.java.util.common.RetryUtils; @@ -36,7 +35,7 @@ import java.io.InputStream; /** - * This class is used by {@link Fetcher} when prefetch is disabled. It's responsible for re-opening the underlying input + * This class is responsible for re-opening the underlying input * stream for the input object on the given {@link #retryCondition}. * * @param object type diff --git a/core/src/main/java/org/apache/druid/data/input/impl/prefetch/CacheManager.java b/core/src/main/java/org/apache/druid/data/input/impl/prefetch/CacheManager.java deleted file mode 100644 index ba8f7db9918d..000000000000 --- a/core/src/main/java/org/apache/druid/data/input/impl/prefetch/CacheManager.java +++ /dev/null @@ -1,97 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.data.input.impl.prefetch; - -import com.google.common.annotations.VisibleForTesting; -import org.apache.druid.java.util.common.ISE; -import org.apache.druid.java.util.common.logger.Logger; - -import java.util.ArrayList; -import java.util.List; - -/** - * A class managing cached files used by {@link PrefetchableTextFilesFirehoseFactory}. - */ -public class CacheManager -{ - private static final Logger LOG = new Logger(CacheManager.class); - - // A roughly max size of total cached objects which means the actual cached size can be bigger. The reason is our - // current client implementations for cloud storages like s3 don't support range scan yet, so we must download the - // whole file at once. It's still possible for the size of cached data to not exceed these variables by estimating the - // after-fetch size, but it makes us to consider the case when any files cannot be fetched due to their large size, - // which makes the implementation complicated. - private final long maxCacheCapacityBytes; - - private final List> files = new ArrayList<>(); - - private long totalCachedBytes; - - public CacheManager(long maxCacheCapacityBytes) - { - this.maxCacheCapacityBytes = maxCacheCapacityBytes; - } - - public boolean isEnabled() - { - return maxCacheCapacityBytes > 0; - } - - public boolean cacheable() - { - // maxCacheCapacityBytes is a rough limit, so if totalCachedBytes is larger than it, no more caching is - // allowed. - return totalCachedBytes < maxCacheCapacityBytes; - } - - FetchedFile cache(FetchedFile fetchedFile) - { - if (!cacheable()) { - throw new ISE( - "Cache space is full. totalCachedBytes[%d], maxCacheCapacityBytes[%d]", - totalCachedBytes, - maxCacheCapacityBytes - ); - } - - final FetchedFile cachedFile = fetchedFile.cache(); - files.add(cachedFile); - totalCachedBytes += cachedFile.length(); - - LOG.info("Object[%s] is cached. Current cached bytes is [%d]", cachedFile.getObject(), totalCachedBytes); - return cachedFile; - } - - List> getFiles() - { - return files; - } - - @VisibleForTesting - long getTotalCachedBytes() - { - return totalCachedBytes; - } - - public long getMaxCacheCapacityBytes() - { - return maxCacheCapacityBytes; - } -} diff --git a/core/src/main/java/org/apache/druid/data/input/impl/prefetch/FetchConfig.java b/core/src/main/java/org/apache/druid/data/input/impl/prefetch/FetchConfig.java deleted file mode 100644 index c645d73bead1..000000000000 --- a/core/src/main/java/org/apache/druid/data/input/impl/prefetch/FetchConfig.java +++ /dev/null @@ -1,96 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.data.input.impl.prefetch; - -import javax.annotation.Nullable; -import java.util.concurrent.TimeUnit; - -/** - * Holds configurations required by {@link Fetcher} for fetching objects. - */ -public class FetchConfig -{ - private static final long DEFAULT_MAX_CACHE_CAPACITY_BYTES = 1024 * 1024 * 1024; // 1GiB - private static final long DEFAULT_MAX_FETCH_CAPACITY_BYTES = 1024 * 1024 * 1024; // 1GiB - private static final long DEFAULT_FETCH_TIMEOUT_MS = TimeUnit.SECONDS.toMillis(60); - private static final int DEFAULT_MAX_FETCH_RETRY = 3; - - // A roughly max size of total fetched objects, but the actual fetched size can be bigger. The reason is our current - // client implementations for cloud storages like s3 don't support range scan yet, so we must download the whole file - // at once. It's still possible for the size of cached/fetched data to not exceed these variables by estimating the - // after-fetch size, but it makes us consider the case when any files cannot be fetched due to their large size, which - // makes the implementation complicated. - private final long maxFetchCapacityBytes; - - private final long maxCacheCapacityBytes; - - private final long prefetchTriggerBytes; - - // timeout for fetching an object from the remote site - private final long fetchTimeout; - - private final int maxFetchRetry; - - public FetchConfig( - @Nullable Long maxCacheCapacityBytes, - @Nullable Long maxFetchCapacityBytes, - @Nullable Long prefetchTriggerBytes, - @Nullable Long fetchTimeout, - @Nullable Integer maxFetchRetry - ) - { - this.maxCacheCapacityBytes = maxCacheCapacityBytes == null - ? DEFAULT_MAX_CACHE_CAPACITY_BYTES - : maxCacheCapacityBytes; - this.maxFetchCapacityBytes = maxFetchCapacityBytes == null - ? DEFAULT_MAX_FETCH_CAPACITY_BYTES - : maxFetchCapacityBytes; - this.prefetchTriggerBytes = prefetchTriggerBytes == null - ? this.maxFetchCapacityBytes / 2 - : prefetchTriggerBytes; - this.fetchTimeout = fetchTimeout == null ? DEFAULT_FETCH_TIMEOUT_MS : fetchTimeout; - this.maxFetchRetry = maxFetchRetry == null ? DEFAULT_MAX_FETCH_RETRY : maxFetchRetry; - } - - public long getMaxCacheCapacityBytes() - { - return maxCacheCapacityBytes; - } - - public long getMaxFetchCapacityBytes() - { - return maxFetchCapacityBytes; - } - - public long getPrefetchTriggerBytes() - { - return prefetchTriggerBytes; - } - - public long getFetchTimeout() - { - return fetchTimeout; - } - - public int getMaxFetchRetry() - { - return maxFetchRetry; - } -} diff --git a/core/src/main/java/org/apache/druid/data/input/impl/prefetch/FetchedFile.java b/core/src/main/java/org/apache/druid/data/input/impl/prefetch/FetchedFile.java deleted file mode 100644 index 9874820e0720..000000000000 --- a/core/src/main/java/org/apache/druid/data/input/impl/prefetch/FetchedFile.java +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.data.input.impl.prefetch; - -import java.io.Closeable; -import java.io.File; - -/** - * A class containing meta information about fetched objects. This class used by {@link Fetcher}. - */ -class FetchedFile -{ - // Original object - private final T object; - // Fetched file stored in local disk - private final File file; - // Closer which is called when the file is not needed anymore. Usually this deletes the file except for cached files. - private final Closeable resourceCloser; - - FetchedFile(T object, File file, Closeable resourceCloser) - { - this.object = object; - this.file = file; - this.resourceCloser = resourceCloser; - } - - long length() - { - return file.length(); - } - - T getObject() - { - return object; - } - - File getFile() - { - return file; - } - - Closeable getResourceCloser() - { - return resourceCloser; - } - - FetchedFile cache() - { - return new FetchedFile<>(object, file, () -> {}); - } -} diff --git a/core/src/main/java/org/apache/druid/data/input/impl/prefetch/Fetcher.java b/core/src/main/java/org/apache/druid/data/input/impl/prefetch/Fetcher.java deleted file mode 100644 index 436f42833439..000000000000 --- a/core/src/main/java/org/apache/druid/data/input/impl/prefetch/Fetcher.java +++ /dev/null @@ -1,292 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.data.input.impl.prefetch; - -import com.google.common.base.Preconditions; -import org.apache.druid.java.util.common.ISE; -import org.apache.druid.java.util.common.logger.Logger; - -import javax.annotation.Nullable; -import java.io.Closeable; -import java.io.File; -import java.io.IOException; -import java.util.ArrayDeque; -import java.util.Deque; -import java.util.Iterator; -import java.util.List; -import java.util.NoSuchElementException; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Future; -import java.util.concurrent.LinkedBlockingQueue; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.TimeoutException; -import java.util.concurrent.atomic.AtomicLong; - -/** - * A file fetcher used by {@link PrefetchableTextFilesFirehoseFactory} and PrefetchSqlFirehoseFactory (in druid-server). - * See the javadoc of {@link PrefetchableTextFilesFirehoseFactory} for more details. - */ -public abstract class Fetcher implements Iterator> -{ - private static final Logger LOG = new Logger(Fetcher.class); - private static final String FETCH_FILE_PREFIX = "fetch-"; - private final CacheManager cacheManager; - private final List objects; - private final ExecutorService fetchExecutor; - - @Nullable - private final File temporaryDirectory; - - private final boolean prefetchEnabled; - - private final LinkedBlockingQueue> fetchedFiles = new LinkedBlockingQueue<>(); - - // Number of bytes of current fetched files. - // This is updated when a file is successfully fetched, a fetched file is deleted, or a fetched file is - // cached. - private final AtomicLong fetchedBytes = new AtomicLong(0); - private final Deque> fetchFutures = new ArrayDeque<>(); - private final FetchConfig fetchConfig; - - // nextFetchIndex indicates which object should be downloaded when fetch is triggered. - // This variable is always read by the same thread regardless of prefetch is enabled or not. - private int nextFetchIndex; - - private int numRemainingObjects; - - public Fetcher( - CacheManager cacheManager, - List objects, - ExecutorService fetchExecutor, - @Nullable File temporaryDirectory, - FetchConfig fetchConfig - ) - { - this.cacheManager = cacheManager; - this.objects = objects; - this.fetchExecutor = fetchExecutor; - this.temporaryDirectory = temporaryDirectory; - this.fetchConfig = fetchConfig; - this.prefetchEnabled = fetchConfig.getMaxFetchCapacityBytes() > 0; - this.numRemainingObjects = objects.size(); - - // (*) If cache is initialized, put all cached files to the queue. - this.fetchedFiles.addAll(cacheManager.getFiles()); - this.nextFetchIndex = fetchedFiles.size(); - if (cacheManager.isEnabled() || prefetchEnabled) { - Preconditions.checkNotNull(temporaryDirectory, "temporaryDirectory"); - } - if (prefetchEnabled) { - fetchIfNeeded(0L); - } - } - - /** - * Submit a fetch task if remainingBytes is smaller than prefetchTriggerBytes. - */ - private void fetchIfNeeded(long remainingBytes) - { - if ((fetchFutures.isEmpty() || fetchFutures.peekLast().isDone()) - && remainingBytes <= fetchConfig.getPrefetchTriggerBytes()) { - Future fetchFuture = fetchExecutor.submit(() -> { - fetch(); - return null; - }); - fetchFutures.add(fetchFuture); - } - } - - /** - * Fetch objects to a local disk up to {@link FetchConfig#maxFetchCapacityBytes}. - * This method is not thread safe and must be called by a single thread. Note that even - * {@link FetchConfig#maxFetchCapacityBytes} is 0, at least 1 file is always fetched. - * This is for simplifying design, and should be improved when our client implementations for cloud storages - * like S3 support range scan. - *

- * This method is called by {@link #fetchExecutor} if prefetch is enabled. Otherwise, it is called by the same - * thread. - */ - private void fetch() throws Exception - { - for (; nextFetchIndex < objects.size() - && fetchedBytes.get() <= fetchConfig.getMaxFetchCapacityBytes(); nextFetchIndex++) { - final T object = objects.get(nextFetchIndex); - LOG.info("Fetching [%d]th object[%s], fetchedBytes[%d]", nextFetchIndex, object, fetchedBytes.get()); - final File outFile = File.createTempFile(FETCH_FILE_PREFIX, null, temporaryDirectory); - fetchedBytes.addAndGet(download(object, outFile)); - fetchedFiles.put(new FetchedFile<>(object, outFile, getFileCloser(outFile, fetchedBytes))); - } - } - - /** - * Downloads an object into a file. The download process could be retried depending on the object source. - * - * @param object an object to be downloaded - * @param outFile a file which the object data is stored - * - * @return number of downloaded bytes - */ - protected abstract long download(T object, File outFile) throws IOException; - - /** - * Generates an instance of {@link OpenObject} for the given object. - */ - protected abstract OpenObject generateOpenObject(T object) throws IOException; - - protected FetchConfig getFetchConfig() - { - return fetchConfig; - } - - @Override - public boolean hasNext() - { - return numRemainingObjects > 0; - } - - @Override - public OpenObject next() - { - if (!hasNext()) { - throw new NoSuchElementException(); - } - - // If fetch() fails, hasNext() always returns true and next() is always called. The below method checks that - // fetch() threw an exception and propagates it if exists. - checkFetchException(false); - - try { - final OpenObject openObject = prefetchEnabled ? openObjectFromLocal() : openObjectFromRemote(); - numRemainingObjects--; - return openObject; - } - catch (IOException e) { - throw new RuntimeException(e); - } - } - - private void checkFetchException(boolean wait) - { - try { - for (Future fetchFuture; (fetchFuture = fetchFutures.poll()) != null; ) { - if (wait) { - fetchFuture.get(fetchConfig.getFetchTimeout(), TimeUnit.MILLISECONDS); - } else { - if (fetchFuture.isDone()) { - fetchFuture.get(); - } else { - fetchFutures.addFirst(fetchFuture); - break; - } - } - } - } - catch (InterruptedException | ExecutionException e) { - throw new RuntimeException(e); - } - catch (TimeoutException e) { - throw new ISE(e, "Failed to fetch, but cannot check the reason in [%d] ms", fetchConfig.getFetchTimeout()); - } - } - - private OpenObject openObjectFromLocal() throws IOException - { - final FetchedFile fetchedFile; - - if (!fetchedFiles.isEmpty()) { - // If there are already fetched files, use them - fetchedFile = fetchedFiles.poll(); - } else { - // Otherwise, wait for fetching - try { - fetchIfNeeded(fetchedBytes.get()); - fetchedFile = fetchedFiles.poll(fetchConfig.getFetchTimeout(), TimeUnit.MILLISECONDS); - if (fetchedFile == null) { - // Check the latest fetch is failed - checkFetchException(true); - // Or throw a timeout exception - throw new RuntimeException(new TimeoutException()); - } - } - catch (InterruptedException e) { - throw new RuntimeException(e); - } - } - final FetchedFile maybeCached = cacheIfPossible(fetchedFile); - // trigger fetch again for subsequent next() calls - fetchIfNeeded(fetchedBytes.get()); - return new OpenObject<>(maybeCached); - } - - private OpenObject openObjectFromRemote() throws IOException - { - if (fetchedFiles.size() > 0) { - // If fetchedFiles is not empty even though prefetching is disabled, they should be cached files. - // We use them first. See (*). - return new OpenObject<>(fetchedFiles.poll()); - } else if (cacheManager.cacheable()) { - // If cache is enabled, first download an object to local storage and cache it. - try { - // Since maxFetchCapacityBytes is 0, at most one file is fetched. - fetch(); - FetchedFile fetchedFile = fetchedFiles.poll(); - if (fetchedFile == null) { - throw new ISE("Cannot fetch object[%s]", objects.get(nextFetchIndex - 1)); - } - final FetchedFile cached = cacheIfPossible(fetchedFile); - return new OpenObject<>(cached); - } - catch (Exception e) { - throw new RuntimeException(e); - } - } else { - final T object = objects.get(nextFetchIndex); - LOG.info("Reading [%d]th object[%s]", nextFetchIndex, object); - nextFetchIndex++; - return generateOpenObject(object); - } - } - - private FetchedFile cacheIfPossible(FetchedFile fetchedFile) - { - if (cacheManager.cacheable()) { - final FetchedFile cachedFile = cacheManager.cache(fetchedFile); - // If the fetchedFile is cached, make a room for fetching more data immediately. - // This is because cache space and fetch space are separated. - fetchedBytes.addAndGet(-fetchedFile.length()); - return cachedFile; - } else { - return fetchedFile; - } - } - - private static Closeable getFileCloser( - final File file, - final AtomicLong fetchedBytes - ) - { - return () -> { - final long fileSize = file.length(); - file.delete(); - fetchedBytes.addAndGet(-fileSize); - }; - } -} diff --git a/core/src/main/java/org/apache/druid/data/input/impl/prefetch/FileFetcher.java b/core/src/main/java/org/apache/druid/data/input/impl/prefetch/FileFetcher.java deleted file mode 100644 index 7d869dacf719..000000000000 --- a/core/src/main/java/org/apache/druid/data/input/impl/prefetch/FileFetcher.java +++ /dev/null @@ -1,111 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.data.input.impl.prefetch; - -import com.google.common.base.Predicate; -import org.apache.druid.data.input.impl.RetryingInputStream; -import org.apache.druid.java.util.common.FileUtils; -import org.apache.druid.java.util.common.StringUtils; - -import javax.annotation.Nullable; -import java.io.Closeable; -import java.io.File; -import java.io.IOException; -import java.util.List; -import java.util.concurrent.ExecutorService; - -/** - * A file fetcher used by {@link PrefetchableTextFilesFirehoseFactory}. - * See the javadoc of {@link PrefetchableTextFilesFirehoseFactory} for more details. - */ -public class FileFetcher extends Fetcher -{ - private static final int BUFFER_SIZE = 1024 * 4; - private final ObjectOpenFunction openObjectFunction; - private final Predicate retryCondition; - private final byte[] buffer; - - FileFetcher( - CacheManager cacheManager, - List objects, - ExecutorService fetchExecutor, - @Nullable File temporaryDirectory, - FetchConfig fetchConfig, - ObjectOpenFunction openObjectFunction, - Predicate retryCondition - ) - { - - super( - cacheManager, - objects, - fetchExecutor, - temporaryDirectory, - fetchConfig - ); - - this.openObjectFunction = openObjectFunction; - this.retryCondition = retryCondition; - this.buffer = new byte[BUFFER_SIZE]; - } - - /** - * Downloads an object. It retries downloading {@link FetchConfig#maxFetchRetry} - * times and throws an exception. - * - * @param object an object to be downloaded - * @param outFile a file which the object data is stored - * - * @return number of downloaded bytes - */ - @Override - protected long download(T object, File outFile) throws IOException - { - return FileUtils.copyLarge( - object, - openObjectFunction, - outFile, - buffer, - retryCondition, - getFetchConfig().getMaxFetchRetry() + 1, - StringUtils.format("Failed to download object[%s]", object) - ); - } - - /** - * Generates an instance of {@link OpenObject} for which the underlying stream may be re-opened and retried - * based on the exception and retry condition. - */ - @Override - protected OpenObject generateOpenObject(T object) throws IOException - { - return new OpenObject<>( - object, - new RetryingInputStream<>(object, openObjectFunction, retryCondition, getFetchConfig().getMaxFetchRetry()), - getNoopCloser() - ); - } - - private static Closeable getNoopCloser() - { - return () -> { - }; - } -} diff --git a/core/src/main/java/org/apache/druid/data/input/impl/prefetch/OpenObject.java b/core/src/main/java/org/apache/druid/data/input/impl/prefetch/OpenObject.java deleted file mode 100644 index 07b3125099c4..000000000000 --- a/core/src/main/java/org/apache/druid/data/input/impl/prefetch/OpenObject.java +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.data.input.impl.prefetch; - -import org.apache.commons.io.FileUtils; - -import java.io.Closeable; -import java.io.IOException; -import java.io.InputStream; - -/** - * A class containing meta information about an opened object. This class is used to put related objects together. It - * contains an original object, an objectStream from the object, and a resourceCloser which knows how to release - * associated resources on closing. - * - * {@link PrefetchableTextFilesFirehoseFactory.ResourceCloseableLineIterator} consumes the objectStream and closes - * it with the resourceCloser. - */ -public class OpenObject -{ - // Original object - private final T object; - // Input stream from the object - private final InputStream objectStream; - // Closer which is called when the file is not needed anymore. Usually this deletes the file except for cached files. - private final Closeable resourceCloser; - - public OpenObject(FetchedFile fetchedFile) throws IOException - { - this(fetchedFile.getObject(), FileUtils.openInputStream(fetchedFile.getFile()), fetchedFile.getResourceCloser()); - } - - public OpenObject(T object, InputStream objectStream, Closeable resourceCloser) - { - this.object = object; - this.objectStream = objectStream; - this.resourceCloser = resourceCloser; - } - - public T getObject() - { - return object; - } - - public InputStream getObjectStream() - { - return objectStream; - } - - public Closeable getResourceCloser() - { - return resourceCloser; - } -} diff --git a/core/src/main/java/org/apache/druid/data/input/impl/prefetch/PrefetchableTextFilesFirehoseFactory.java b/core/src/main/java/org/apache/druid/data/input/impl/prefetch/PrefetchableTextFilesFirehoseFactory.java deleted file mode 100644 index 50eefae73665..000000000000 --- a/core/src/main/java/org/apache/druid/data/input/impl/prefetch/PrefetchableTextFilesFirehoseFactory.java +++ /dev/null @@ -1,318 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.data.input.impl.prefetch; - -import com.fasterxml.jackson.annotation.JsonProperty; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; -import com.google.common.base.Predicate; -import com.google.common.collect.ImmutableList; -import org.apache.commons.io.LineIterator; -import org.apache.druid.data.input.Firehose; -import org.apache.druid.data.input.impl.AbstractTextFilesFirehoseFactory; -import org.apache.druid.data.input.impl.FileIteratingFirehose; -import org.apache.druid.data.input.impl.RetryingInputStream; -import org.apache.druid.data.input.impl.StringInputRowParser; -import org.apache.druid.java.util.common.ISE; -import org.apache.druid.java.util.common.concurrent.Execs; -import org.apache.druid.java.util.common.logger.Logger; - -import javax.annotation.Nullable; -import java.io.Closeable; -import java.io.File; -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.io.Reader; -import java.nio.charset.StandardCharsets; -import java.util.Iterator; -import java.util.List; -import java.util.NoSuchElementException; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.TimeUnit; - -/** - * PrefetchableTextFilesFirehoseFactory is an abstract firehose factory for reading text files. The firehose returned - * by this class provides three key functionalities. - *

- *

- * - Caching: for the first call of {@link #connect(StringInputRowParser, File)}, it caches objects in a local disk - * up to maxCacheCapacityBytes. These caches are NOT deleted until the process terminates, and thus can be used for - * future reads. - *
- * - Fetching: when it reads all cached data, it fetches remaining objects into a local disk and reads data from - * them. For the performance reason, prefetch technique is used, that is, when the size of remaining fetched data is - * smaller than {@link FetchConfig#prefetchTriggerBytes}, a background prefetch thread automatically starts to fetch remaining - * objects. - *
- * - Retry: if an exception occurs while downloading an object, it retries again up to {@link FetchConfig#maxFetchRetry}. - *

- *

- * This implementation can be useful when the cost for reading input objects is large as reading from AWS S3 because - * batch tasks like IndexTask or HadoopIndexTask can read the whole data twice for determining partition specs and - * generating segments if the intervals of GranularitySpec is not specified. - *
- * Prefetching can be turned on/off by setting maxFetchCapacityBytes. Depending on prefetching is enabled or - * disabled, the behavior of the firehose is different like below. - *

- *

- * 1. If prefetch is enabled, this firehose can fetch input objects in background. - *
- * 2. When next() is called, it first checks that there are already fetched files in local storage. - *
- * 2.1 If exists, it simply chooses a fetched file and returns a {@link LineIterator} reading that file. - *
- * 2.2 If there is no fetched files in local storage but some objects are still remained to be read, the firehose - * fetches one of input objects in background immediately. If an IOException occurs while downloading the object, - * it retries up to the maximum retry count. Finally, the firehose returns a {@link LineIterator} only when the - * download operation is successfully finished. - *
- * 3. If prefetch is disabled, the firehose returns a {@link LineIterator} which directly reads the stream opened by - * {@link #openObjectStream}. If there is an IOException, it will throw it and the read will fail. - */ -public abstract class PrefetchableTextFilesFirehoseFactory - extends AbstractTextFilesFirehoseFactory -{ - private static final Logger LOG = new Logger(PrefetchableTextFilesFirehoseFactory.class); - - private static final CacheManager DISABLED_CACHE_MANAGER = new CacheManager(0); - private static final FetchConfig DISABLED_PREFETCH_CONFIG = new FetchConfig(0L, 0L, 0L, 0L, 0); - - private final CacheManager cacheManager; - private final FetchConfig fetchConfig; - - private List objects; - - public PrefetchableTextFilesFirehoseFactory( - @Nullable Long maxCacheCapacityBytes, - @Nullable Long maxFetchCapacityBytes, - @Nullable Long prefetchTriggerBytes, - @Nullable Long fetchTimeout, - @Nullable Integer maxFetchRetry - ) - { - this.fetchConfig = new FetchConfig( - maxCacheCapacityBytes, - maxFetchCapacityBytes, - prefetchTriggerBytes, - fetchTimeout, - maxFetchRetry - ); - this.cacheManager = new CacheManager<>( - fetchConfig.getMaxCacheCapacityBytes() - ); - } - - @JsonProperty - public long getMaxCacheCapacityBytes() - { - return cacheManager.getMaxCacheCapacityBytes(); - } - - @JsonProperty - public long getMaxFetchCapacityBytes() - { - return fetchConfig.getMaxFetchCapacityBytes(); - } - - @JsonProperty - public long getPrefetchTriggerBytes() - { - return fetchConfig.getPrefetchTriggerBytes(); - } - - @JsonProperty - public long getFetchTimeout() - { - return fetchConfig.getFetchTimeout(); - } - - @JsonProperty - public int getMaxFetchRetry() - { - return fetchConfig.getMaxFetchRetry(); - } - - @VisibleForTesting - CacheManager getCacheManager() - { - return cacheManager; - } - - @Override - public Firehose connect(StringInputRowParser firehoseParser, @Nullable File temporaryDirectory) throws IOException - { - return connectInternal(firehoseParser, temporaryDirectory, this.fetchConfig, this.cacheManager); - } - - @Override - public Firehose connectForSampler(StringInputRowParser parser, @Nullable File temporaryDirectory) throws IOException - { - return connectInternal(parser, temporaryDirectory, DISABLED_PREFETCH_CONFIG, DISABLED_CACHE_MANAGER); - } - - private Firehose connectInternal( - StringInputRowParser firehoseParser, - @Nullable File temporaryDirectory, - FetchConfig fetchConfig, - CacheManager cacheManager - ) throws IOException - { - if (objects == null) { - objects = ImmutableList.copyOf(Preconditions.checkNotNull(initObjects(), "objects")); - } - - if (cacheManager.isEnabled() || fetchConfig.getMaxFetchCapacityBytes() > 0) { - Preconditions.checkNotNull(temporaryDirectory, "temporaryDirectory"); - Preconditions.checkArgument( - temporaryDirectory.exists(), - "temporaryDirectory[%s] does not exist", - temporaryDirectory - ); - Preconditions.checkArgument( - temporaryDirectory.isDirectory(), - "temporaryDirectory[%s] is not a directory", - temporaryDirectory - ); - } - - LOG.info("Create a new firehose for [%d] objects", objects.size()); - - // fetchExecutor is responsible for background data fetching - final ExecutorService fetchExecutor = Execs.singleThreaded("firehose_fetch_%d"); - final FileFetcher fetcher = new FileFetcher( - cacheManager, - objects, - fetchExecutor, - temporaryDirectory, - fetchConfig, - new ObjectOpenFunction() - { - @Override - public InputStream open(T object) throws IOException - { - return openObjectStream(object); - } - - @Override - public InputStream open(T object, long start) throws IOException - { - return openObjectStream(object, start); - } - }, - getRetryCondition() - ); - - return new FileIteratingFirehose( - new Iterator() - { - @Override - public boolean hasNext() - { - return fetcher.hasNext(); - } - - @Override - public LineIterator next() - { - if (!hasNext()) { - throw new NoSuchElementException(); - } - - final OpenObject openObject = fetcher.next(); - try { - return new ResourceCloseableLineIterator( - new InputStreamReader( - wrapObjectStream(openObject.getObject(), openObject.getObjectStream()), - StandardCharsets.UTF_8 - ), - openObject.getResourceCloser() - ); - } - catch (IOException e) { - try { - openObject.getResourceCloser().close(); - } - catch (Throwable t) { - e.addSuppressed(t); - } - throw new RuntimeException(e); - } - } - }, - firehoseParser, - () -> { - fetchExecutor.shutdownNow(); - try { - Preconditions.checkState(fetchExecutor.awaitTermination( - fetchConfig.getFetchTimeout(), - TimeUnit.MILLISECONDS - )); - } - catch (InterruptedException e) { - Thread.currentThread().interrupt(); - throw new ISE("Failed to shutdown fetch executor during close"); - } - } - ); - } - - /** - * Returns a predicate describing retry conditions. {@link Fetcher} and {@link RetryingInputStream} will retry on the - * errors satisfying this condition. - */ - protected abstract Predicate getRetryCondition(); - - /** - * Open an input stream from the given object. If the object is compressed, this method should return a byte stream - * as it is compressed. The object compression should be handled in {@link #wrapObjectStream(Object, InputStream)}. - * - * @param object an object to be read - * @param start start offset - * - * @return an input stream for the object - */ - protected abstract InputStream openObjectStream(T object, long start) throws IOException; - - /** - * This class calls the {@link Closeable#close()} method of the resourceCloser when it is closed. - */ - static class ResourceCloseableLineIterator extends LineIterator - { - private final Closeable resourceCloser; - - ResourceCloseableLineIterator(Reader reader, Closeable resourceCloser) throws IllegalArgumentException - { - super(reader); - this.resourceCloser = resourceCloser; - } - - @Override - public void close() - { - try (Closeable ignore = this.resourceCloser) { - super.close(); - } - catch (IOException e) { - throw new RuntimeException(e); - } - } - } -} diff --git a/core/src/test/java/org/apache/druid/data/input/FirehoseFactoryToInputSourceAdaptorTest.java b/core/src/test/java/org/apache/druid/data/input/FirehoseFactoryToInputSourceAdaptorTest.java deleted file mode 100644 index 3def28293272..000000000000 --- a/core/src/test/java/org/apache/druid/data/input/FirehoseFactoryToInputSourceAdaptorTest.java +++ /dev/null @@ -1,158 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.data.input; - -import com.google.common.collect.Iterables; -import org.apache.druid.data.input.impl.CSVParseSpec; -import org.apache.druid.data.input.impl.DimensionsSpec; -import org.apache.druid.data.input.impl.StringInputRowParser; -import org.apache.druid.data.input.impl.TimestampSpec; -import org.apache.druid.java.util.common.DateTimes; -import org.apache.druid.java.util.common.StringUtils; -import org.apache.druid.java.util.common.parsers.CloseableIterator; -import org.apache.druid.java.util.common.parsers.ParseException; -import org.apache.druid.testing.InitializedNullHandlingTest; -import org.junit.Assert; -import org.junit.Test; - -import javax.annotation.Nullable; -import java.io.File; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Iterator; -import java.util.List; -import java.util.stream.Stream; - -public class FirehoseFactoryToInputSourceAdaptorTest extends InitializedNullHandlingTest -{ - @Test - public void testUnimplementedInputFormat() throws IOException - { - final List lines = new ArrayList<>(); - for (int i = 0; i < 10; i++) { - lines.add(StringUtils.format("%d,name_%d,%d", 20190101 + i, i, i + 100)); - } - final TestFirehoseFactory firehoseFactory = new TestFirehoseFactory(lines); - final StringInputRowParser inputRowParser = new StringInputRowParser( - new CSVParseSpec( - new TimestampSpec(null, "yyyyMMdd", null), - new DimensionsSpec(DimensionsSpec.getDefaultSchemas(Arrays.asList("timestamp", "name", "score"))), - ",", - Arrays.asList("timestamp", "name", "score"), - false, - 0 - ), - StringUtils.UTF8_STRING - ); - final FirehoseFactoryToInputSourceAdaptor inputSourceAdaptor = new FirehoseFactoryToInputSourceAdaptor( - firehoseFactory, - inputRowParser - ); - final InputSourceReader reader = inputSourceAdaptor.reader( - new InputRowSchema( - inputRowParser.getParseSpec().getTimestampSpec(), - inputRowParser.getParseSpec().getDimensionsSpec(), - ColumnsFilter.all() - ), - null, - null - ); - final List result = new ArrayList<>(); - try (CloseableIterator iterator = reader.read()) { - while (iterator.hasNext()) { - result.add(iterator.next()); - } - } - Assert.assertEquals(10, result.size()); - for (int i = 0; i < 10; i++) { - Assert.assertEquals(DateTimes.of(StringUtils.format("2019-01-%02d", 1 + i)), result.get(i).getTimestamp()); - Assert.assertEquals( - StringUtils.format("name_%d", i), - Iterables.getOnlyElement(result.get(i).getDimension("name")) - ); - Assert.assertEquals( - StringUtils.format("%d", i + 100), - Iterables.getOnlyElement(result.get(i).getDimension("score")) - ); - } - } - - private static class TestFirehoseFactory implements FiniteFirehoseFactory - { - private final List lines; - - private TestFirehoseFactory(List lines) - { - this.lines = lines; - } - - @Override - public Firehose connect(StringInputRowParser parser, @Nullable File temporaryDirectory) throws ParseException - { - return new Firehose() - { - final Iterator iterator = lines.iterator(); - - @Override - public boolean hasMore() - { - return iterator.hasNext(); - } - - @Override - public InputRow nextRow() - { - return parser.parse(iterator.next()); - } - - @Override - public void close() - { - // do nothing - } - }; - } - - @Override - public boolean isSplittable() - { - return false; - } - - @Override - public Stream> getSplits(@Nullable SplitHintSpec splitHintSpec) - { - return null; - } - - @Override - public int getNumSplits(@Nullable SplitHintSpec splitHintSpec) - { - return 0; - } - - @Override - public FiniteFirehoseFactory withSplit(InputSplit split) - { - return null; - } - } -} diff --git a/core/src/test/java/org/apache/druid/data/input/impl/NoopFirehoseFactory.java b/core/src/test/java/org/apache/druid/data/input/impl/NoopFirehoseFactory.java deleted file mode 100644 index 2669a968d045..000000000000 --- a/core/src/test/java/org/apache/druid/data/input/impl/NoopFirehoseFactory.java +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.data.input.impl; - -import org.apache.druid.data.input.FiniteFirehoseFactory; -import org.apache.druid.data.input.InputSplit; -import org.apache.druid.data.input.SplitHintSpec; - -import javax.annotation.Nullable; -import java.util.stream.Stream; - -public class NoopFirehoseFactory implements FiniteFirehoseFactory -{ - @Override - public String toString() - { - return "NoopFirehoseFactory{}"; - } - - @Override - public Stream getSplits(@Nullable SplitHintSpec splitHintSpec) - { - throw new UnsupportedOperationException(); - } - - @Override - public int getNumSplits(@Nullable SplitHintSpec splitHintSpec) - { - throw new UnsupportedOperationException(); - } - - @Override - public FiniteFirehoseFactory withSplit(InputSplit split) - { - throw new UnsupportedOperationException(); - } -} diff --git a/core/src/test/java/org/apache/druid/data/input/impl/prefetch/PrefetchableTextFilesFirehoseFactoryTest.java b/core/src/test/java/org/apache/druid/data/input/impl/prefetch/PrefetchableTextFilesFirehoseFactoryTest.java deleted file mode 100644 index dd9c384e91a5..000000000000 --- a/core/src/test/java/org/apache/druid/data/input/impl/prefetch/PrefetchableTextFilesFirehoseFactoryTest.java +++ /dev/null @@ -1,640 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.data.input.impl.prefetch; - -import com.google.common.base.Preconditions; -import com.google.common.base.Predicate; -import com.google.common.io.CountingOutputStream; -import io.netty.util.SuppressForbidden; -import org.apache.commons.io.FileUtils; -import org.apache.commons.io.filefilter.TrueFileFilter; -import org.apache.druid.common.config.NullHandling; -import org.apache.druid.data.input.FiniteFirehoseFactory; -import org.apache.druid.data.input.Firehose; -import org.apache.druid.data.input.InputSplit; -import org.apache.druid.data.input.Row; -import org.apache.druid.data.input.impl.CSVParseSpec; -import org.apache.druid.data.input.impl.DimensionsSpec; -import org.apache.druid.data.input.impl.StringInputRowParser; -import org.apache.druid.data.input.impl.TimestampSpec; -import org.apache.druid.java.util.common.DateTimes; -import org.apache.druid.java.util.common.RetryUtils; -import org.apache.druid.java.util.common.StringUtils; -import org.hamcrest.CoreMatchers; -import org.junit.Assert; -import org.junit.BeforeClass; -import org.junit.ClassRule; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.ExpectedException; -import org.junit.rules.TemporaryFolder; - -import java.io.BufferedWriter; -import java.io.File; -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStreamWriter; -import java.io.Writer; -import java.net.SocketException; -import java.nio.charset.StandardCharsets; -import java.nio.file.Files; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.List; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.TimeoutException; - -public class PrefetchableTextFilesFirehoseFactoryTest -{ - private static long FILE_SIZE = -1; - - private static final StringInputRowParser PARSER = new StringInputRowParser( - new CSVParseSpec( - new TimestampSpec( - "timestamp", - "auto", - null - ), - new DimensionsSpec(DimensionsSpec.getDefaultSchemas(Arrays.asList("timestamp", "a", "b"))), - ",", - Arrays.asList("timestamp", "a", "b"), - false, - 0 - ), - StandardCharsets.UTF_8.name() - ); - - @ClassRule - public static TemporaryFolder tempDir = new TemporaryFolder(); - private static File TEST_DIR; - - @Rule - public ExpectedException expectedException = ExpectedException.none(); - - @BeforeClass - public static void setup() throws IOException - { - NullHandling.initializeForTests(); - TEST_DIR = tempDir.newFolder(); - for (int i = 0; i < 100; i++) { - try ( - CountingOutputStream cos = new CountingOutputStream( - Files.newOutputStream(new File(TEST_DIR, "test_" + i).toPath()) - ); - Writer writer = new BufferedWriter(new OutputStreamWriter(cos, StandardCharsets.UTF_8)) - ) { - for (int j = 0; j < 100; j++) { - final String a = StringUtils.format("%d,%03d,%03d\n", (20171220 + i), i, j); - writer.write(a); - } - writer.flush(); - // Every file size must be same - if (FILE_SIZE == -1) { - FILE_SIZE = cos.getCount(); - } else { - Assert.assertEquals(FILE_SIZE, cos.getCount()); - } - } - } - } - - private static void assertResult(List rows) - { - Assert.assertEquals(10000, rows.size()); - rows.sort((r1, r2) -> { - int c = r1.getTimestamp().compareTo(r2.getTimestamp()); - if (c != 0) { - return c; - } - c = Integer.valueOf(r1.getDimension("a").get(0)).compareTo(Integer.valueOf(r2.getDimension("a").get(0))); - if (c != 0) { - return c; - } - - return Integer.valueOf(r1.getDimension("b").get(0)).compareTo(Integer.valueOf(r2.getDimension("b").get(0))); - }); - - for (int i = 0; i < 100; i++) { - for (int j = 0; j < 100; j++) { - final Row row = rows.get(i * 100 + j); - Assert.assertEquals(DateTimes.utc(20171220 + i), row.getTimestamp()); - Assert.assertEquals(i, Integer.valueOf(row.getDimension("a").get(0)).intValue()); - Assert.assertEquals(j, Integer.valueOf(row.getDimension("b").get(0)).intValue()); - } - } - } - - private static void assertNumRemainingCacheFiles(File firehoseTmpDir, int expectedNumFiles) - { - final String[] files = firehoseTmpDir.list(); - Assert.assertNotNull(files); - Assert.assertEquals(expectedNumFiles, files.length); - } - - @SuppressForbidden(reason = "Files#createTempDirectory") - private static File createFirehoseTmpDir(String dirPrefix) throws IOException - { - return Files.createTempDirectory(tempDir.getRoot().toPath(), dirPrefix).toFile(); - } - - @Test - public void testWithoutCacheAndFetch() throws IOException - { - final TestPrefetchableTextFilesFirehoseFactory factory = - TestPrefetchableTextFilesFirehoseFactory.with(TEST_DIR, 0, 0); - - final List rows = new ArrayList<>(); - final File firehoseTmpDir = createFirehoseTmpDir("testWithoutCacheAndFetch"); - try (Firehose firehose = factory.connect(PARSER, firehoseTmpDir)) { - while (firehose.hasMore()) { - rows.add(firehose.nextRow()); - } - } - - Assert.assertEquals(0, factory.getCacheManager().getTotalCachedBytes()); - assertResult(rows); - assertNumRemainingCacheFiles(firehoseTmpDir, 0); - } - - @Test - public void testWithoutCacheAndFetchAgainstConnectionReset() throws IOException - { - final TestPrefetchableTextFilesFirehoseFactory factory = - TestPrefetchableTextFilesFirehoseFactory.withConnectionResets(TEST_DIR, 0, 0, 2); - - final List rows = new ArrayList<>(); - final File firehoseTmpDir = createFirehoseTmpDir("testWithoutCacheAndFetch"); - try (Firehose firehose = factory.connect(PARSER, firehoseTmpDir)) { - while (firehose.hasMore()) { - rows.add(firehose.nextRow()); - } - } - - Assert.assertEquals(0, factory.getCacheManager().getTotalCachedBytes()); - assertResult(rows); - assertNumRemainingCacheFiles(firehoseTmpDir, 0); - } - - @Test - public void testWithoutCache() throws IOException - { - final TestPrefetchableTextFilesFirehoseFactory factory = - TestPrefetchableTextFilesFirehoseFactory.with(TEST_DIR, 0, 2048); - - final List rows = new ArrayList<>(); - final File firehoseTmpDir = createFirehoseTmpDir("testWithoutCache"); - try (Firehose firehose = factory.connect(PARSER, firehoseTmpDir)) { - while (firehose.hasMore()) { - rows.add(firehose.nextRow()); - } - } - - Assert.assertEquals(0, factory.getCacheManager().getTotalCachedBytes()); - assertResult(rows); - assertNumRemainingCacheFiles(firehoseTmpDir, 0); - } - - @Test - public void testWithZeroFetchCapacity() throws IOException - { - final TestPrefetchableTextFilesFirehoseFactory factory = - TestPrefetchableTextFilesFirehoseFactory.with(TEST_DIR, 2048, 0); - - final List rows = new ArrayList<>(); - final File firehoseTmpDir = createFirehoseTmpDir("testWithZeroFetchCapacity"); - try (Firehose firehose = factory.connect(PARSER, firehoseTmpDir)) { - while (firehose.hasMore()) { - rows.add(firehose.nextRow()); - } - } - - assertResult(rows); - assertNumRemainingCacheFiles(firehoseTmpDir, 2); - } - - @Test - public void testWithCacheAndFetch() throws IOException - { - final TestPrefetchableTextFilesFirehoseFactory factory = - TestPrefetchableTextFilesFirehoseFactory.of(TEST_DIR); - - final List rows = new ArrayList<>(); - final File firehoseTmpDir = createFirehoseTmpDir("testWithCacheAndFetch"); - try (Firehose firehose = factory.connect(PARSER, firehoseTmpDir)) { - while (firehose.hasMore()) { - rows.add(firehose.nextRow()); - } - } - - assertResult(rows); - assertNumRemainingCacheFiles(firehoseTmpDir, 2); - } - - @Test - public void testWithLargeCacheAndSmallFetch() throws IOException - { - final TestPrefetchableTextFilesFirehoseFactory factory = - TestPrefetchableTextFilesFirehoseFactory.with(TEST_DIR, 2048, 1024); - - final List rows = new ArrayList<>(); - final File firehoseTmpDir = createFirehoseTmpDir("testWithLargeCacheAndSmallFetch"); - try (Firehose firehose = factory.connect(PARSER, firehoseTmpDir)) { - while (firehose.hasMore()) { - rows.add(firehose.nextRow()); - } - } - - assertResult(rows); - assertNumRemainingCacheFiles(firehoseTmpDir, 2); - } - - @Test - public void testWithSmallCacheAndLargeFetch() throws IOException - { - final TestPrefetchableTextFilesFirehoseFactory factory = - TestPrefetchableTextFilesFirehoseFactory.with(TEST_DIR, 1024, 2048); - - final List rows = new ArrayList<>(); - final File firehoseTmpDir = createFirehoseTmpDir("testWithSmallCacheAndLargeFetch"); - try (Firehose firehose = factory.connect(PARSER, firehoseTmpDir)) { - while (firehose.hasMore()) { - rows.add(firehose.nextRow()); - } - } - - assertResult(rows); - assertNumRemainingCacheFiles(firehoseTmpDir, 1); - } - - @Test - public void testRetry() throws IOException - { - final TestPrefetchableTextFilesFirehoseFactory factory = - TestPrefetchableTextFilesFirehoseFactory.withOpenExceptions(TEST_DIR, 1); - - final List rows = new ArrayList<>(); - final File firehoseTmpDir = createFirehoseTmpDir("testRetry"); - try (Firehose firehose = factory.connect(PARSER, firehoseTmpDir)) { - while (firehose.hasMore()) { - rows.add(firehose.nextRow()); - } - } - - assertResult(rows); - assertNumRemainingCacheFiles(firehoseTmpDir, 2); - } - - @Test - public void testMaxRetry() throws IOException - { - expectedException.expect(RuntimeException.class); - expectedException.expectCause(CoreMatchers.instanceOf(ExecutionException.class)); - expectedException.expectMessage("Exception for retry test"); - - final TestPrefetchableTextFilesFirehoseFactory factory = - TestPrefetchableTextFilesFirehoseFactory.withOpenExceptions(TEST_DIR, 5); - - try (Firehose firehose = factory.connect(PARSER, createFirehoseTmpDir("testMaxRetry"))) { - while (firehose.hasMore()) { - firehose.nextRow(); - } - } - } - - @Test - public void testTimeout() throws IOException - { - expectedException.expect(RuntimeException.class); - expectedException.expectCause(CoreMatchers.instanceOf(TimeoutException.class)); - - final TestPrefetchableTextFilesFirehoseFactory factory = - TestPrefetchableTextFilesFirehoseFactory.withSleepMillis(TEST_DIR, 1000); - - try (Firehose firehose = factory.connect(PARSER, createFirehoseTmpDir("testTimeout"))) { - while (firehose.hasMore()) { - firehose.nextRow(); - } - } - } - - @Test - public void testReconnectWithCacheAndPrefetch() throws IOException - { - final TestPrefetchableTextFilesFirehoseFactory factory = - TestPrefetchableTextFilesFirehoseFactory.of(TEST_DIR); - final File firehoseTmpDir = createFirehoseTmpDir("testReconnectWithCacheAndPrefetch"); - - for (int i = 0; i < 5; i++) { - final List rows = new ArrayList<>(); - try (Firehose firehose = factory.connect(PARSER, firehoseTmpDir)) { - if (i > 0) { - Assert.assertEquals(FILE_SIZE * 2, factory.getCacheManager().getTotalCachedBytes()); - } - - while (firehose.hasMore()) { - rows.add(firehose.nextRow()); - } - } - assertResult(rows); - assertNumRemainingCacheFiles(firehoseTmpDir, 2); - } - } - - @Test - public void testReconnectWithCache() throws IOException - { - final TestPrefetchableTextFilesFirehoseFactory factory = - TestPrefetchableTextFilesFirehoseFactory.with(TEST_DIR, 2048, 0); - final File firehoseTmpDir = createFirehoseTmpDir("testReconnectWithCache"); - - for (int i = 0; i < 5; i++) { - final List rows = new ArrayList<>(); - try (Firehose firehose = factory.connect(PARSER, firehoseTmpDir)) { - if (i > 0) { - Assert.assertEquals(FILE_SIZE * 2, factory.getCacheManager().getTotalCachedBytes()); - } - - while (firehose.hasMore()) { - rows.add(firehose.nextRow()); - } - } - assertResult(rows); - assertNumRemainingCacheFiles(firehoseTmpDir, 2); - } - } - - static class TestPrefetchableTextFilesFirehoseFactory extends PrefetchableTextFilesFirehoseFactory - { - private final long sleepMillis; - private final File baseDir; - private int numOpenExceptions; - private int maxConnectionResets; - - static TestPrefetchableTextFilesFirehoseFactory with(File baseDir, long cacheCapacity, long fetchCapacity) - { - return new TestPrefetchableTextFilesFirehoseFactory( - baseDir, - 1024, - cacheCapacity, - fetchCapacity, - 60_000, // fetch timeout - 3, - 0, - 0, - 0 - ); - } - - static TestPrefetchableTextFilesFirehoseFactory of(File baseDir) - { - return new TestPrefetchableTextFilesFirehoseFactory( - baseDir, - 1024, - 2048, - 2048, - 3, - 0, - 0, - 0 - ); - } - - static TestPrefetchableTextFilesFirehoseFactory withOpenExceptions(File baseDir, int count) - { - return new TestPrefetchableTextFilesFirehoseFactory( - baseDir, - 1024, - 2048, - 2048, - 3, - count, - 0, - 0 - ); - } - - static TestPrefetchableTextFilesFirehoseFactory withConnectionResets( - File baseDir, - long cacheCapacity, - long fetchCapacity, - int numConnectionResets - ) - { - return new TestPrefetchableTextFilesFirehoseFactory( - baseDir, - fetchCapacity / 2, - cacheCapacity, - fetchCapacity, - 3, - 0, - numConnectionResets, - 0 - ); - } - - static TestPrefetchableTextFilesFirehoseFactory withSleepMillis(File baseDir, long ms) - { - return new TestPrefetchableTextFilesFirehoseFactory( - baseDir, - 1024, - 2048, - 2048, - 100, - 3, - 0, - 0, - ms - ); - } - - private static long computeTimeout(int maxRetry) - { - // See RetryUtils.nextRetrySleepMillis() - final double maxFuzzyMultiplier = 2.; - return (long) Math.min( - RetryUtils.MAX_SLEEP_MILLIS, - RetryUtils.BASE_SLEEP_MILLIS * Math.pow(2, maxRetry - 1) * maxFuzzyMultiplier - ); - } - - TestPrefetchableTextFilesFirehoseFactory( - File baseDir, - long prefetchTriggerThreshold, - long maxCacheCapacityBytes, - long maxFetchCapacityBytes, - int maxRetry, - int numOpenExceptions, - int numConnectionResets, - long sleepMillis - ) - { - this( - baseDir, - prefetchTriggerThreshold, - maxCacheCapacityBytes, - maxFetchCapacityBytes, - computeTimeout(maxRetry), - maxRetry, - numOpenExceptions, - numConnectionResets, - sleepMillis - ); - } - - TestPrefetchableTextFilesFirehoseFactory( - File baseDir, - long prefetchTriggerThreshold, - long maxCacheCapacityBytes, - long maxFetchCapacityBytes, - long fetchTimeout, - int maxRetry, - int numOpenExceptions, - int maxConnectionResets, - long sleepMillis - ) - { - super( - maxCacheCapacityBytes, - maxFetchCapacityBytes, - prefetchTriggerThreshold, - fetchTimeout, - maxRetry - ); - this.numOpenExceptions = numOpenExceptions; - this.maxConnectionResets = maxConnectionResets; - this.sleepMillis = sleepMillis; - this.baseDir = baseDir; - } - - @Override - protected Collection initObjects() - { - return FileUtils.listFiles( - Preconditions.checkNotNull(baseDir).getAbsoluteFile(), - TrueFileFilter.INSTANCE, - TrueFileFilter.INSTANCE - ); - } - - @Override - protected InputStream openObjectStream(File object) throws IOException - { - if (numOpenExceptions > 0) { - numOpenExceptions--; - throw new IOException("Exception for retry test"); - } - if (sleepMillis > 0) { - try { - Thread.sleep(sleepMillis); - } - catch (InterruptedException e) { - throw new RuntimeException(e); - } - } - return maxConnectionResets > 0 ? - new TestInputStream(FileUtils.openInputStream(object), maxConnectionResets) : - FileUtils.openInputStream(object); - } - - @Override - protected InputStream wrapObjectStream(File object, InputStream stream) - { - return stream; - } - - @Override - protected Predicate getRetryCondition() - { - return e -> e instanceof IOException; - } - - @Override - protected InputStream openObjectStream(File object, long start) throws IOException - { - if (numOpenExceptions > 0) { - numOpenExceptions--; - throw new IOException("Exception for retry test"); - } - if (sleepMillis > 0) { - try { - Thread.sleep(sleepMillis); - } - catch (InterruptedException e) { - throw new RuntimeException(e); - } - } - - final InputStream in = FileUtils.openInputStream(object); - in.skip(start); - - return maxConnectionResets > 0 ? new TestInputStream(in, maxConnectionResets) : in; - } - - private int readCount; - private int numConnectionResets; - - @Override - public FiniteFirehoseFactory withSplit(InputSplit split) - { - throw new UnsupportedOperationException(); - } - - private class TestInputStream extends InputStream - { - private static final int NUM_READ_COUNTS_BEFORE_ERROR = 10; - private final InputStream delegate; - private final int maxConnectionResets; - - TestInputStream( - InputStream delegate, - int maxConnectionResets - ) - { - this.delegate = delegate; - this.maxConnectionResets = maxConnectionResets; - } - - @Override - public int read() throws IOException - { - if (readCount++ % NUM_READ_COUNTS_BEFORE_ERROR == 0) { - if (numConnectionResets++ < maxConnectionResets) { - // Simulate connection reset - throw new SocketException("Test Connection reset"); - } - } - return delegate.read(); - } - - @Override - public int read(byte[] b, int off, int len) throws IOException - { - if (readCount++ % NUM_READ_COUNTS_BEFORE_ERROR == 0) { - if (numConnectionResets++ < maxConnectionResets) { - // Simulate connection reset - throw new SocketException("Test Connection reset"); - } - } - return delegate.read(b, off, len); - } - } - } -} diff --git a/extensions-contrib/aliyun-oss-extensions/src/main/java/org/apache/druid/firehose/aliyun/OssFirehoseDruidModule.java b/extensions-contrib/aliyun-oss-extensions/src/main/java/org/apache/druid/firehose/aliyun/OssFirehoseDruidModule.java deleted file mode 100644 index 864717657fe8..000000000000 --- a/extensions-contrib/aliyun-oss-extensions/src/main/java/org/apache/druid/firehose/aliyun/OssFirehoseDruidModule.java +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.firehose.aliyun; - -import com.fasterxml.jackson.databind.Module; -import com.fasterxml.jackson.databind.jsontype.NamedType; -import com.fasterxml.jackson.databind.module.SimpleModule; -import com.google.common.collect.ImmutableList; -import com.google.inject.Binder; -import org.apache.druid.initialization.DruidModule; - -import java.util.List; - -public class OssFirehoseDruidModule implements DruidModule -{ - @Override - public List getJacksonModules() - { - return ImmutableList.of( - new SimpleModule().registerSubtypes(new NamedType(StaticOssFirehoseFactory.class, "static-aliyun-oss")) - ); - } - - @Override - public void configure(Binder binder) - { - - } -} diff --git a/extensions-contrib/aliyun-oss-extensions/src/main/java/org/apache/druid/firehose/aliyun/StaticOssFirehoseFactory.java b/extensions-contrib/aliyun-oss-extensions/src/main/java/org/apache/druid/firehose/aliyun/StaticOssFirehoseFactory.java deleted file mode 100644 index d71198826dc7..000000000000 --- a/extensions-contrib/aliyun-oss-extensions/src/main/java/org/apache/druid/firehose/aliyun/StaticOssFirehoseFactory.java +++ /dev/null @@ -1,243 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.firehose.aliyun; - -import com.aliyun.oss.OSS; -import com.aliyun.oss.OSSException; -import com.aliyun.oss.model.GetObjectRequest; -import com.aliyun.oss.model.OSSObject; -import com.aliyun.oss.model.OSSObjectSummary; -import com.fasterxml.jackson.annotation.JacksonInject; -import com.fasterxml.jackson.annotation.JsonCreator; -import com.fasterxml.jackson.annotation.JsonProperty; -import com.google.common.base.Preconditions; -import com.google.common.base.Predicate; -import org.apache.druid.data.input.FiniteFirehoseFactory; -import org.apache.druid.data.input.InputSplit; -import org.apache.druid.data.input.impl.StringInputRowParser; -import org.apache.druid.data.input.impl.prefetch.PrefetchableTextFilesFirehoseFactory; -import org.apache.druid.java.util.common.IAE; -import org.apache.druid.java.util.common.ISE; -import org.apache.druid.java.util.common.logger.Logger; -import org.apache.druid.storage.aliyun.OssStorageDruidModule; -import org.apache.druid.storage.aliyun.OssUtils; -import org.apache.druid.utils.CompressionUtils; - -import java.io.IOException; -import java.io.InputStream; -import java.net.URI; -import java.util.ArrayList; -import java.util.Collection; -import java.util.Collections; -import java.util.Iterator; -import java.util.List; -import java.util.Objects; -import java.util.stream.Collectors; - -/** - * Builds firehoses that read from a predefined list of aliyun OSS objects and then dry up. - */ -public class StaticOssFirehoseFactory extends PrefetchableTextFilesFirehoseFactory -{ - private static final Logger log = new Logger(StaticOssFirehoseFactory.class); - - private final OSS client; - private final List uris; - private final List prefixes; - - @JsonCreator - public StaticOssFirehoseFactory( - @JacksonInject OSS client, - @JsonProperty("uris") List uris, - @JsonProperty("prefixes") List prefixes, - @JsonProperty("maxCacheCapacityBytes") Long maxCacheCapacityBytes, - @JsonProperty("maxFetchCapacityBytes") Long maxFetchCapacityBytes, - @JsonProperty("prefetchTriggerBytes") Long prefetchTriggerBytes, - @JsonProperty("fetchTimeout") Long fetchTimeout, - @JsonProperty("maxFetchRetry") Integer maxFetchRetry - ) - { - super(maxCacheCapacityBytes, maxFetchCapacityBytes, prefetchTriggerBytes, fetchTimeout, maxFetchRetry); - this.client = Preconditions.checkNotNull(client, "client"); - this.uris = uris == null ? new ArrayList<>() : uris; - this.prefixes = prefixes == null ? new ArrayList<>() : prefixes; - - if (!this.uris.isEmpty() && !this.prefixes.isEmpty()) { - throw new IAE("uris and prefixes cannot be used together"); - } - - if (this.uris.isEmpty() && this.prefixes.isEmpty()) { - throw new IAE("uris or prefixes must be specified"); - } - - for (final URI inputURI : this.uris) { - Preconditions.checkArgument(OssStorageDruidModule.SCHEME.equals(inputURI.getScheme()), - "input uri scheme == %s (%s)", - OssStorageDruidModule.SCHEME, - inputURI); - } - - for (final URI inputURI : this.prefixes) { - Preconditions.checkArgument(OssStorageDruidModule.SCHEME.equals(inputURI.getScheme()), - "input uri scheme == %s (%s)", - OssStorageDruidModule.SCHEME, - inputURI); - } - } - - @JsonProperty - public List getUris() - { - return uris; - } - - @JsonProperty("prefixes") - public List getPrefixes() - { - return prefixes; - } - - @Override - protected Collection initObjects() - { - if (!uris.isEmpty()) { - return uris; - } else { - final List objects = new ArrayList<>(); - for (final URI prefix : prefixes) { - final Iterator objectSummaryIterator = OssUtils.objectSummaryIterator( - client, - Collections.singletonList(prefix), - OssUtils.MAX_LISTING_LENGTH - ); - - objectSummaryIterator.forEachRemaining(objects::add); - } - return objects.stream().map(OssUtils::summaryToUri).collect(Collectors.toList()); - } - } - - @Override - protected InputStream openObjectStream(URI object) throws IOException - { - try { - // Get data of the given object and open an input stream - final String bucket = object.getAuthority(); - final String key = OssUtils.extractKey(object); - - final OSSObject ossObject = client.getObject(bucket, key); - if (ossObject == null) { - throw new ISE("Failed to get an Aliyun OSS object for bucket[%s] and key[%s]", bucket, key); - } - return ossObject.getObjectContent(); - } - catch (OSSException e) { - throw new IOException(e); - } - } - - @Override - protected InputStream openObjectStream(URI object, long start) throws IOException - { - final String bucket = object.getAuthority(); - final String key = OssUtils.extractKey(object); - - final GetObjectRequest request = new GetObjectRequest(bucket, key); - try { - final OSSObject ossObject = client.getObject(request); - if (ossObject == null) { - throw new ISE( - "Failed to get an Aliyun OSS object for bucket[%s], key[%s], and start[%d]", - bucket, - key, - start - ); - } - InputStream is = ossObject.getObjectContent(); - is.skip(start); - return is; - } - catch (OSSException e) { - throw new IOException(e); - } - } - - @Override - protected InputStream wrapObjectStream(URI object, InputStream stream) throws IOException - { - return CompressionUtils.decompress(stream, OssUtils.extractKey(object)); - } - - @Override - public boolean equals(Object o) - { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - - StaticOssFirehoseFactory that = (StaticOssFirehoseFactory) o; - - return Objects.equals(uris, that.uris) && - Objects.equals(prefixes, that.prefixes) && - getMaxCacheCapacityBytes() == that.getMaxCacheCapacityBytes() && - getMaxFetchCapacityBytes() == that.getMaxFetchCapacityBytes() && - getPrefetchTriggerBytes() == that.getPrefetchTriggerBytes() && - getFetchTimeout() == that.getFetchTimeout() && - getMaxFetchRetry() == that.getMaxFetchRetry(); - } - - @Override - public int hashCode() - { - return Objects.hash( - uris, - prefixes, - getMaxCacheCapacityBytes(), - getMaxFetchCapacityBytes(), - getPrefetchTriggerBytes(), - getFetchTimeout(), - getMaxFetchRetry() - ); - } - - @Override - protected Predicate getRetryCondition() - { - return OssUtils.RETRYABLE; - } - - @Override - public FiniteFirehoseFactory withSplit(InputSplit split) - { - return new StaticOssFirehoseFactory( - client, - Collections.singletonList(split.get()), - null, - getMaxCacheCapacityBytes(), - getMaxFetchCapacityBytes(), - getPrefetchTriggerBytes(), - getFetchTimeout(), - getMaxFetchRetry() - ); - } -} diff --git a/extensions-contrib/aliyun-oss-extensions/src/main/resources/META-INF/services/org.apache.druid.initialization.DruidModule b/extensions-contrib/aliyun-oss-extensions/src/main/resources/META-INF/services/org.apache.druid.initialization.DruidModule index 3d434e7c9021..8613c8380131 100644 --- a/extensions-contrib/aliyun-oss-extensions/src/main/resources/META-INF/services/org.apache.druid.initialization.DruidModule +++ b/extensions-contrib/aliyun-oss-extensions/src/main/resources/META-INF/services/org.apache.druid.initialization.DruidModule @@ -14,5 +14,4 @@ # limitations under the License. org.apache.druid.storage.aliyun.OssStorageDruidModule -org.apache.druid.firehose.aliyun.OssFirehoseDruidModule org.apache.druid.data.input.aliyun.OssInputSourceDruidModule \ No newline at end of file diff --git a/extensions-contrib/cloudfiles-extensions/src/main/java/org/apache/druid/firehose/cloudfiles/CloudFilesBlob.java b/extensions-contrib/cloudfiles-extensions/src/main/java/org/apache/druid/firehose/cloudfiles/CloudFilesBlob.java deleted file mode 100644 index edc0bbb65c06..000000000000 --- a/extensions-contrib/cloudfiles-extensions/src/main/java/org/apache/druid/firehose/cloudfiles/CloudFilesBlob.java +++ /dev/null @@ -1,101 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.firehose.cloudfiles; - -import com.fasterxml.jackson.annotation.JsonCreator; -import com.fasterxml.jackson.annotation.JsonProperty; - -import javax.validation.constraints.NotNull; -import java.util.Objects; - -public class CloudFilesBlob -{ - @JsonProperty - @NotNull - private String container; - - @JsonProperty - @NotNull - private String path; - - @JsonProperty - @NotNull - private String region; - - @JsonCreator - public CloudFilesBlob( - @JsonProperty("container") String container, - @JsonProperty("path") String path, - @JsonProperty("region") String region - ) - { - this.container = container; - this.path = path; - this.region = region; - } - - public String getContainer() - { - return container; - } - - public String getPath() - { - return path; - } - - public String getRegion() - { - return region; - } - - @Override - public String toString() - { - return "CloudFilesBlob{" - + "container=" + container - + ",path=" + path - + ",region=" + region - + "}"; - } - - @Override - public boolean equals(Object o) - { - if (this == o) { - return true; - } - - if (o == null || getClass() != o.getClass()) { - return false; - } - - final CloudFilesBlob that = (CloudFilesBlob) o; - return Objects.equals(container, that.container) && - Objects.equals(path, that.path) && - Objects.equals(region, that.region); - } - - @Override - public int hashCode() - { - return Objects.hash(container, path, region); - } -} diff --git a/extensions-contrib/cloudfiles-extensions/src/main/java/org/apache/druid/firehose/cloudfiles/CloudFilesFirehoseDruidModule.java b/extensions-contrib/cloudfiles-extensions/src/main/java/org/apache/druid/firehose/cloudfiles/CloudFilesFirehoseDruidModule.java deleted file mode 100644 index 3531e7584e45..000000000000 --- a/extensions-contrib/cloudfiles-extensions/src/main/java/org/apache/druid/firehose/cloudfiles/CloudFilesFirehoseDruidModule.java +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.firehose.cloudfiles; - -import com.fasterxml.jackson.databind.Module; -import com.fasterxml.jackson.databind.jsontype.NamedType; -import com.fasterxml.jackson.databind.module.SimpleModule; -import com.google.common.collect.ImmutableList; -import com.google.inject.Binder; -import org.apache.druid.initialization.DruidModule; - -import java.util.List; - -public class CloudFilesFirehoseDruidModule implements DruidModule -{ - - @Override - public List getJacksonModules() - { - return ImmutableList.of( - new SimpleModule().registerSubtypes( - new NamedType(StaticCloudFilesFirehoseFactory.class, "staticcloudfiles"))); - } - - @Override - public void configure(Binder arg0) - { - - } - -} diff --git a/extensions-contrib/cloudfiles-extensions/src/main/java/org/apache/druid/firehose/cloudfiles/StaticCloudFilesFirehoseFactory.java b/extensions-contrib/cloudfiles-extensions/src/main/java/org/apache/druid/firehose/cloudfiles/StaticCloudFilesFirehoseFactory.java deleted file mode 100644 index f0de9f7e98de..000000000000 --- a/extensions-contrib/cloudfiles-extensions/src/main/java/org/apache/druid/firehose/cloudfiles/StaticCloudFilesFirehoseFactory.java +++ /dev/null @@ -1,162 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.firehose.cloudfiles; - -import com.fasterxml.jackson.annotation.JacksonInject; -import com.fasterxml.jackson.annotation.JsonCreator; -import com.fasterxml.jackson.annotation.JsonProperty; -import com.google.common.base.Predicate; -import org.apache.druid.data.input.FiniteFirehoseFactory; -import org.apache.druid.data.input.InputSplit; -import org.apache.druid.data.input.impl.StringInputRowParser; -import org.apache.druid.data.input.impl.prefetch.PrefetchableTextFilesFirehoseFactory; -import org.apache.druid.java.util.common.logger.Logger; -import org.apache.druid.storage.cloudfiles.CloudFilesByteSource; -import org.apache.druid.storage.cloudfiles.CloudFilesObjectApiProxy; -import org.apache.druid.storage.cloudfiles.CloudFilesUtils; -import org.apache.druid.utils.CompressionUtils; -import org.jclouds.rackspace.cloudfiles.v1.CloudFilesApi; - -import java.io.IOException; -import java.io.InputStream; -import java.util.Collection; -import java.util.Collections; -import java.util.List; -import java.util.Objects; - -public class StaticCloudFilesFirehoseFactory extends PrefetchableTextFilesFirehoseFactory -{ - private static final Logger log = new Logger(StaticCloudFilesFirehoseFactory.class); - - private final CloudFilesApi cloudFilesApi; - private final List blobs; - - @JsonCreator - public StaticCloudFilesFirehoseFactory( - @JacksonInject CloudFilesApi cloudFilesApi, - @JsonProperty("blobs") List blobs, - @JsonProperty("maxCacheCapacityBytes") Long maxCacheCapacityBytes, - @JsonProperty("maxFetchCapacityBytes") Long maxFetchCapacityBytes, - @JsonProperty("prefetchTriggerBytes") Long prefetchTriggerBytes, - @JsonProperty("fetchTimeout") Long fetchTimeout, - @JsonProperty("maxFetchRetry") Integer maxFetchRetry - ) - { - super(maxCacheCapacityBytes, maxFetchCapacityBytes, prefetchTriggerBytes, fetchTimeout, maxFetchRetry); - this.cloudFilesApi = cloudFilesApi; - this.blobs = blobs; - } - - @JsonProperty - public List getBlobs() - { - return blobs; - } - - @Override - protected Collection initObjects() - { - return blobs; - } - - @Override - protected InputStream openObjectStream(CloudFilesBlob object) throws IOException - { - return openObjectStream(object, 0); - } - - @Override - protected InputStream openObjectStream(CloudFilesBlob object, long start) throws IOException - { - return createCloudFilesByteSource(object).openStream(start); - } - - private CloudFilesByteSource createCloudFilesByteSource(CloudFilesBlob object) - { - final String region = object.getRegion(); - final String container = object.getContainer(); - final String path = object.getPath(); - - log.info("Retrieving file from region[%s], container[%s] and path [%s]", - region, container, path - ); - CloudFilesObjectApiProxy objectApi = new CloudFilesObjectApiProxy(cloudFilesApi, region, container); - return new CloudFilesByteSource(objectApi, path); - } - - @Override - protected InputStream wrapObjectStream(CloudFilesBlob object, InputStream stream) throws IOException - { - return CompressionUtils.decompress(stream, object.getPath()); - } - - @Override - public boolean equals(Object o) - { - if (o == this) { - return true; - } - - if (o == null || getClass() != o.getClass()) { - return false; - } - - final StaticCloudFilesFirehoseFactory that = (StaticCloudFilesFirehoseFactory) o; - return Objects.equals(blobs, that.blobs) && - getMaxCacheCapacityBytes() == that.getMaxCacheCapacityBytes() && - getMaxFetchCapacityBytes() == that.getMaxFetchCapacityBytes() && - getPrefetchTriggerBytes() == that.getPrefetchTriggerBytes() && - getFetchTimeout() == that.getFetchTimeout() && - getMaxFetchRetry() == that.getMaxFetchRetry(); - } - - @Override - public int hashCode() - { - return Objects.hash( - blobs, - getMaxCacheCapacityBytes(), - getMaxFetchCapacityBytes(), - getPrefetchTriggerBytes(), - getFetchTimeout(), - getMaxFetchRetry() - ); - } - - @Override - protected Predicate getRetryCondition() - { - return CloudFilesUtils.CLOUDFILESRETRY; - } - - @Override - public FiniteFirehoseFactory withSplit(InputSplit split) - { - return new StaticCloudFilesFirehoseFactory( - cloudFilesApi, - Collections.singletonList(split.get()), - getMaxCacheCapacityBytes(), - getMaxFetchCapacityBytes(), - getPrefetchTriggerBytes(), - getFetchTimeout(), - getMaxFetchRetry() - ); - } -} diff --git a/extensions-contrib/cloudfiles-extensions/src/main/resources/META-INF/services/org.apache.druid.initialization.DruidModule b/extensions-contrib/cloudfiles-extensions/src/main/resources/META-INF/services/org.apache.druid.initialization.DruidModule index 81a1411f89b6..367f66497ba9 100644 --- a/extensions-contrib/cloudfiles-extensions/src/main/resources/META-INF/services/org.apache.druid.initialization.DruidModule +++ b/extensions-contrib/cloudfiles-extensions/src/main/resources/META-INF/services/org.apache.druid.initialization.DruidModule @@ -16,4 +16,3 @@ # under the License. org.apache.druid.storage.cloudfiles.CloudFilesStorageDruidModule -org.apache.druid.firehose.cloudfiles.CloudFilesFirehoseDruidModule diff --git a/extensions-contrib/cloudfiles-extensions/src/test/java/org/apache/druid/firehose/cloudfiles/StaticCloudFilesFirehoseFactoryTest.java b/extensions-contrib/cloudfiles-extensions/src/test/java/org/apache/druid/firehose/cloudfiles/StaticCloudFilesFirehoseFactoryTest.java deleted file mode 100644 index 48f3ca694fa3..000000000000 --- a/extensions-contrib/cloudfiles-extensions/src/test/java/org/apache/druid/firehose/cloudfiles/StaticCloudFilesFirehoseFactoryTest.java +++ /dev/null @@ -1,105 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.firehose.cloudfiles; - -import com.fasterxml.jackson.databind.Module; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.fasterxml.jackson.databind.module.SimpleModule; -import com.fasterxml.jackson.module.guice.ObjectMapperModule; -import com.google.common.collect.ImmutableList; -import com.google.inject.Binder; -import com.google.inject.Guice; -import com.google.inject.Injector; -import com.google.inject.Provides; -import org.apache.druid.initialization.DruidModule; -import org.apache.druid.jackson.DefaultObjectMapper; -import org.easymock.EasyMock; -import org.jclouds.rackspace.cloudfiles.v1.CloudFilesApi; -import org.junit.Assert; -import org.junit.Test; - -import java.io.IOException; -import java.util.List; - -public class StaticCloudFilesFirehoseFactoryTest -{ - private static final CloudFilesApi API = EasyMock.niceMock(CloudFilesApi.class); - - @Test - public void testSerde() throws IOException - { - final ObjectMapper mapper = createObjectMapper(new TestModule()); - - final List blobs = ImmutableList.of( - new CloudFilesBlob("container", "foo", "bar"), - new CloudFilesBlob("container", "foo", "bar2") - ); - - final StaticCloudFilesFirehoseFactory factory = new StaticCloudFilesFirehoseFactory( - API, - blobs, - 2048L, - 1024L, - 512L, - 100L, - 5 - ); - - final StaticCloudFilesFirehoseFactory outputFact = mapper.readValue( - mapper.writeValueAsString(factory), - StaticCloudFilesFirehoseFactory.class - ); - - Assert.assertEquals(factory, outputFact); - } - - private static ObjectMapper createObjectMapper(DruidModule baseModule) - { - final ObjectMapper baseMapper = new DefaultObjectMapper(); - baseModule.getJacksonModules().forEach(baseMapper::registerModule); - - final Injector injector = Guice.createInjector( - new ObjectMapperModule(), - baseModule - ); - return injector.getInstance(ObjectMapper.class); - } - - private static class TestModule implements DruidModule - { - @Override - public List getJacksonModules() - { - return ImmutableList.of(new SimpleModule()); - } - - @Override - public void configure(Binder binder) - { - - } - - @Provides - public CloudFilesApi getRestS3Service() - { - return API; - } - } -} diff --git a/extensions-core/azure-extensions/src/main/java/org/apache/druid/firehose/azure/AzureBlob.java b/extensions-core/azure-extensions/src/main/java/org/apache/druid/firehose/azure/AzureBlob.java deleted file mode 100644 index ef8278eff913..000000000000 --- a/extensions-core/azure-extensions/src/main/java/org/apache/druid/firehose/azure/AzureBlob.java +++ /dev/null @@ -1,93 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.firehose.azure; - -import com.fasterxml.jackson.annotation.JsonCreator; -import com.fasterxml.jackson.annotation.JsonProperty; - -import javax.validation.constraints.NotNull; -import java.util.Objects; - -/** - * Represents an Azure based blob. Used with {@link StaticAzureBlobStoreFirehoseFactory}. - * - * @deprecated as of version 0.18.0 because support for firehose has been discontinued. Please use - * {@link org.apache.druid.data.input.azure.AzureEntity} with {@link org.apache.druid.data.input.azure.AzureInputSource} - * instead. - */ -@Deprecated -public class AzureBlob -{ - @JsonProperty - @NotNull - private String container; - - @JsonProperty - @NotNull - private String path; - - @JsonCreator - public AzureBlob(@JsonProperty("container") String container, @JsonProperty("path") String path) - { - this.container = container; - this.path = path; - } - - public String getContainer() - { - return container; - } - - public String getPath() - { - return path; - } - - @Override - public String toString() - { - return "AzureBlob{" - + "container=" + container - + ",path=" + path - + "}"; - } - - @Override - public boolean equals(Object o) - { - if (o == this) { - return true; - } - - if (o == null || getClass() != o.getClass()) { - return false; - } - - final AzureBlob that = (AzureBlob) o; - return Objects.equals(container, that.container) && - Objects.equals(path, that.path); - } - - @Override - public int hashCode() - { - return Objects.hash(container, path); - } -} diff --git a/extensions-core/azure-extensions/src/main/java/org/apache/druid/firehose/azure/StaticAzureBlobStoreFirehoseFactory.java b/extensions-core/azure-extensions/src/main/java/org/apache/druid/firehose/azure/StaticAzureBlobStoreFirehoseFactory.java deleted file mode 100644 index 449873523f2d..000000000000 --- a/extensions-core/azure-extensions/src/main/java/org/apache/druid/firehose/azure/StaticAzureBlobStoreFirehoseFactory.java +++ /dev/null @@ -1,167 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.firehose.azure; - -import com.fasterxml.jackson.annotation.JacksonInject; -import com.fasterxml.jackson.annotation.JsonCreator; -import com.fasterxml.jackson.annotation.JsonProperty; -import com.google.common.base.Preconditions; -import com.google.common.base.Predicate; -import org.apache.druid.data.input.FiniteFirehoseFactory; -import org.apache.druid.data.input.InputSplit; -import org.apache.druid.data.input.impl.StringInputRowParser; -import org.apache.druid.data.input.impl.prefetch.PrefetchableTextFilesFirehoseFactory; -import org.apache.druid.java.util.common.StringUtils; -import org.apache.druid.storage.azure.AzureByteSource; -import org.apache.druid.storage.azure.AzureStorage; -import org.apache.druid.storage.azure.AzureUtils; -import org.apache.druid.utils.CompressionUtils; - -import java.io.IOException; -import java.io.InputStream; -import java.util.Collection; -import java.util.Collections; -import java.util.List; -import java.util.Objects; - -/** - * This class is heavily inspired by the StaticS3FirehoseFactory class in the org.apache.druid.firehose.s3 package - * - * @deprecated as of version 0.18.0 because support for firehose has been discontinued. Please use - * {@link org.apache.druid.data.input.azure.AzureInputSource} instead. - */ -@Deprecated -public class StaticAzureBlobStoreFirehoseFactory extends PrefetchableTextFilesFirehoseFactory -{ - private final AzureStorage azureStorage; - private final List blobs; - - @JsonCreator - public StaticAzureBlobStoreFirehoseFactory( - @JacksonInject AzureStorage azureStorage, - @JsonProperty("blobs") List blobs, - @JsonProperty("maxCacheCapacityBytes") Long maxCacheCapacityBytes, - @JsonProperty("maxFetchCapacityBytes") Long maxFetchCapacityBytes, - @JsonProperty("prefetchTriggerBytes") Long prefetchTriggerBytes, - @JsonProperty("fetchTimeout") Long fetchTimeout, - @JsonProperty("maxFetchRetry") Integer maxFetchRetry - ) - { - super(maxCacheCapacityBytes, maxFetchCapacityBytes, prefetchTriggerBytes, fetchTimeout, maxFetchRetry); - this.blobs = blobs; - this.azureStorage = azureStorage; - } - - @JsonProperty - public List getBlobs() - { - return blobs; - } - - @Override - protected Collection initObjects() - { - return blobs; - } - - @Override - protected InputStream openObjectStream(AzureBlob object) throws IOException - { - return makeByteSource(azureStorage, object).openStream(); - } - - @Override - protected InputStream openObjectStream(AzureBlob object, long start) throws IOException - { - // BlobInputStream.skip() moves the next read offset instead of skipping first 'start' bytes. - final InputStream in = openObjectStream(object); - final long skip = in.skip(start); - Preconditions.checkState(skip == start, "start offset was [%s] but [%s] bytes were skipped", start, skip); - return in; - } - - @Override - protected InputStream wrapObjectStream(AzureBlob object, InputStream stream) throws IOException - { - return CompressionUtils.decompress(stream, object.getPath()); - } - - private static AzureByteSource makeByteSource(AzureStorage azureStorage, AzureBlob object) - { - final String container = object.getContainer(); - final String path = StringUtils.maybeRemoveLeadingSlash(object.getPath()); - - return new AzureByteSource(azureStorage, container, path); - } - - @Override - public boolean equals(Object o) - { - if (this == o) { - return true; - } - - if (o == null || getClass() != o.getClass()) { - return false; - } - - final StaticAzureBlobStoreFirehoseFactory that = (StaticAzureBlobStoreFirehoseFactory) o; - - return Objects.equals(blobs, that.blobs) && - getMaxCacheCapacityBytes() == that.getMaxCacheCapacityBytes() && - getMaxFetchCapacityBytes() == that.getMaxFetchCapacityBytes() && - getPrefetchTriggerBytes() == that.getPrefetchTriggerBytes() && - getFetchTimeout() == that.getFetchTimeout() && - getMaxFetchRetry() == that.getMaxFetchRetry(); - } - - @Override - public int hashCode() - { - return Objects.hash( - blobs, - getMaxCacheCapacityBytes(), - getMaxFetchCapacityBytes(), - getPrefetchTriggerBytes(), - getFetchTimeout(), - getMaxFetchRetry() - ); - } - - @Override - protected Predicate getRetryCondition() - { - return AzureUtils.AZURE_RETRY; - } - - @Override - public FiniteFirehoseFactory withSplit(InputSplit split) - { - return new StaticAzureBlobStoreFirehoseFactory( - azureStorage, - Collections.singletonList(split.get()), - getMaxCacheCapacityBytes(), - getMaxFetchCapacityBytes(), - getPrefetchTriggerBytes(), - getFetchTimeout(), - getMaxFetchRetry() - ); - } -} diff --git a/extensions-core/azure-extensions/src/main/java/org/apache/druid/storage/azure/AzureStorageDruidModule.java b/extensions-core/azure-extensions/src/main/java/org/apache/druid/storage/azure/AzureStorageDruidModule.java index e870aa08eca5..c7c4655a3d21 100644 --- a/extensions-core/azure-extensions/src/main/java/org/apache/druid/storage/azure/AzureStorageDruidModule.java +++ b/extensions-core/azure-extensions/src/main/java/org/apache/druid/storage/azure/AzureStorageDruidModule.java @@ -33,7 +33,6 @@ import com.microsoft.azure.storage.blob.CloudBlobClient; import org.apache.druid.data.input.azure.AzureEntityFactory; import org.apache.druid.data.input.azure.AzureInputSource; -import org.apache.druid.firehose.azure.StaticAzureBlobStoreFirehoseFactory; import org.apache.druid.guice.Binders; import org.apache.druid.guice.JsonConfigProvider; import org.apache.druid.guice.LazySingleton; @@ -84,7 +83,6 @@ public void setupModule(SetupContext context) } }, new SimpleModule().registerSubtypes( - new NamedType(StaticAzureBlobStoreFirehoseFactory.class, "static-azure-blobstore"), new NamedType(AzureInputSource.class, SCHEME) ) ); diff --git a/extensions-core/azure-extensions/src/test/java/org/apache/druid/firehose/azure/StaticAzureBlobStoreFirehoseFactoryTest.java b/extensions-core/azure-extensions/src/test/java/org/apache/druid/firehose/azure/StaticAzureBlobStoreFirehoseFactoryTest.java deleted file mode 100644 index 52751e211718..000000000000 --- a/extensions-core/azure-extensions/src/test/java/org/apache/druid/firehose/azure/StaticAzureBlobStoreFirehoseFactoryTest.java +++ /dev/null @@ -1,104 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.firehose.azure; - -import com.fasterxml.jackson.databind.Module; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.fasterxml.jackson.databind.module.SimpleModule; -import com.fasterxml.jackson.module.guice.ObjectMapperModule; -import com.google.common.collect.ImmutableList; -import com.google.inject.Binder; -import com.google.inject.Guice; -import com.google.inject.Injector; -import com.google.inject.Provides; -import org.apache.druid.initialization.DruidModule; -import org.apache.druid.jackson.DefaultObjectMapper; -import org.apache.druid.storage.azure.AzureStorage; -import org.junit.Assert; -import org.junit.Test; - -import java.io.IOException; -import java.util.List; - -public class StaticAzureBlobStoreFirehoseFactoryTest -{ - private static final AzureStorage STORAGE = new AzureStorage(null); - - @Test - public void testSerde() throws IOException - { - final ObjectMapper mapper = createObjectMapper(new TestModule()); - - final List blobs = ImmutableList.of( - new AzureBlob("foo", "bar"), - new AzureBlob("foo", "bar2") - ); - - final StaticAzureBlobStoreFirehoseFactory factory = new StaticAzureBlobStoreFirehoseFactory( - STORAGE, - blobs, - 2048L, - 1024L, - 512L, - 100L, - 5 - ); - - final StaticAzureBlobStoreFirehoseFactory outputFact = mapper.readValue( - mapper.writeValueAsString(factory), - StaticAzureBlobStoreFirehoseFactory.class - ); - - Assert.assertEquals(factory, outputFact); - } - - private static ObjectMapper createObjectMapper(DruidModule baseModule) - { - final ObjectMapper baseMapper = new DefaultObjectMapper(); - baseModule.getJacksonModules().forEach(baseMapper::registerModule); - - final Injector injector = Guice.createInjector( - new ObjectMapperModule(), - baseModule - ); - return injector.getInstance(ObjectMapper.class); - } - - private static class TestModule implements DruidModule - { - @Override - public List getJacksonModules() - { - return ImmutableList.of(new SimpleModule()); - } - - @Override - public void configure(Binder binder) - { - - } - - @Provides - public AzureStorage getRestS3Service() - { - return STORAGE; - } - } -} diff --git a/extensions-core/google-extensions/src/main/java/org/apache/druid/firehose/google/GoogleBlob.java b/extensions-core/google-extensions/src/main/java/org/apache/druid/firehose/google/GoogleBlob.java deleted file mode 100644 index 0e51b62080cf..000000000000 --- a/extensions-core/google-extensions/src/main/java/org/apache/druid/firehose/google/GoogleBlob.java +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.firehose.google; - -import com.fasterxml.jackson.annotation.JsonCreator; -import com.fasterxml.jackson.annotation.JsonProperty; - -import java.util.Objects; - -public class GoogleBlob -{ - private final String bucket; - private final String path; - - @JsonCreator - public GoogleBlob(@JsonProperty("bucket") String bucket, @JsonProperty("path") String path) - { - this.bucket = bucket; - this.path = path; - } - - @JsonProperty - public String getBucket() - { - return bucket; - } - - @JsonProperty - public String getPath() - { - return path; - } - - @Override - public String toString() - { - return "GoogleBlob {" - + "bucket=" + bucket - + ",path=" + path - + "}"; - } - - @Override - public boolean equals(Object o) - { - if (this == o) { - return true; - } - - if (o == null || getClass() != o.getClass()) { - return false; - } - - final GoogleBlob that = (GoogleBlob) o; - return Objects.equals(bucket, that.bucket) && - Objects.equals(path, that.path); - } - - @Override - public int hashCode() - { - return Objects.hash(bucket, path); - } -} diff --git a/extensions-core/google-extensions/src/main/java/org/apache/druid/firehose/google/StaticGoogleBlobStoreFirehoseFactory.java b/extensions-core/google-extensions/src/main/java/org/apache/druid/firehose/google/StaticGoogleBlobStoreFirehoseFactory.java deleted file mode 100644 index 22437d780e13..000000000000 --- a/extensions-core/google-extensions/src/main/java/org/apache/druid/firehose/google/StaticGoogleBlobStoreFirehoseFactory.java +++ /dev/null @@ -1,155 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.firehose.google; - -import com.fasterxml.jackson.annotation.JacksonInject; -import com.fasterxml.jackson.annotation.JsonCreator; -import com.fasterxml.jackson.annotation.JsonProperty; -import com.google.common.base.Predicate; -import org.apache.druid.data.input.FiniteFirehoseFactory; -import org.apache.druid.data.input.InputSplit; -import org.apache.druid.data.input.impl.StringInputRowParser; -import org.apache.druid.data.input.impl.prefetch.PrefetchableTextFilesFirehoseFactory; -import org.apache.druid.java.util.common.StringUtils; -import org.apache.druid.storage.google.GoogleByteSource; -import org.apache.druid.storage.google.GoogleStorage; -import org.apache.druid.storage.google.GoogleUtils; -import org.apache.druid.utils.CompressionUtils; - -import java.io.IOException; -import java.io.InputStream; -import java.util.Collection; -import java.util.Collections; -import java.util.List; -import java.util.Objects; - -public class StaticGoogleBlobStoreFirehoseFactory extends PrefetchableTextFilesFirehoseFactory -{ - private final GoogleStorage storage; - private final List blobs; - - @JsonCreator - public StaticGoogleBlobStoreFirehoseFactory( - @JacksonInject GoogleStorage storage, - @JsonProperty("blobs") List blobs, - @JsonProperty("maxCacheCapacityBytes") Long maxCacheCapacityBytes, - @JsonProperty("maxFetchCapacityBytes") Long maxFetchCapacityBytes, - @JsonProperty("prefetchTriggerBytes") Long prefetchTriggerBytes, - @JsonProperty("fetchTimeout") Long fetchTimeout, - @JsonProperty("maxFetchRetry") Integer maxFetchRetry - ) - { - super(maxCacheCapacityBytes, maxFetchCapacityBytes, prefetchTriggerBytes, fetchTimeout, maxFetchRetry); - this.storage = storage; - this.blobs = blobs; - } - - @JsonProperty - public List getBlobs() - { - return blobs; - } - - @Override - protected Collection initObjects() - { - return blobs; - } - - @Override - protected InputStream openObjectStream(GoogleBlob object) throws IOException - { - return openObjectStream(object, 0); - } - - @Override - protected InputStream openObjectStream(GoogleBlob object, long start) throws IOException - { - return createGoogleByteSource(object).openStream(start); - } - - private GoogleByteSource createGoogleByteSource(GoogleBlob object) - { - final String bucket = object.getBucket(); - final String path = StringUtils.maybeRemoveLeadingSlash(object.getPath()); - - return new GoogleByteSource(storage, bucket, path); - } - - @Override - protected InputStream wrapObjectStream(GoogleBlob object, InputStream stream) throws IOException - { - return CompressionUtils.decompress(stream, object.getPath()); - } - - @Override - public boolean equals(Object o) - { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - - final StaticGoogleBlobStoreFirehoseFactory that = (StaticGoogleBlobStoreFirehoseFactory) o; - - return Objects.equals(blobs, that.blobs) && - getMaxCacheCapacityBytes() == that.getMaxCacheCapacityBytes() && - getMaxFetchCapacityBytes() == that.getMaxFetchCapacityBytes() && - getPrefetchTriggerBytes() == that.getPrefetchTriggerBytes() && - getFetchTimeout() == that.getFetchTimeout() && - getMaxFetchRetry() == that.getMaxFetchRetry(); - } - - @Override - public int hashCode() - { - return Objects.hash( - blobs, - getMaxCacheCapacityBytes(), - getMaxFetchCapacityBytes(), - getPrefetchTriggerBytes(), - getFetchTimeout(), - getMaxFetchRetry() - ); - } - - @Override - protected Predicate getRetryCondition() - { - return GoogleUtils::isRetryable; - } - - @Override - public FiniteFirehoseFactory withSplit(InputSplit split) - { - return new StaticGoogleBlobStoreFirehoseFactory( - storage, - Collections.singletonList(split.get()), - getMaxCacheCapacityBytes(), - getMaxFetchCapacityBytes(), - getPrefetchTriggerBytes(), - getFetchTimeout(), - getMaxFetchRetry() - ); - } -} - diff --git a/extensions-core/google-extensions/src/main/java/org/apache/druid/storage/google/GoogleStorageDruidModule.java b/extensions-core/google-extensions/src/main/java/org/apache/druid/storage/google/GoogleStorageDruidModule.java index 838b46f2287f..8aa77b0298f5 100644 --- a/extensions-core/google-extensions/src/main/java/org/apache/druid/storage/google/GoogleStorageDruidModule.java +++ b/extensions-core/google-extensions/src/main/java/org/apache/druid/storage/google/GoogleStorageDruidModule.java @@ -34,7 +34,6 @@ import com.google.inject.multibindings.MapBinder; import org.apache.druid.data.SearchableVersionedDataFinder; import org.apache.druid.data.input.google.GoogleCloudStorageInputSource; -import org.apache.druid.firehose.google.StaticGoogleBlobStoreFirehoseFactory; import org.apache.druid.guice.Binders; import org.apache.druid.guice.JsonConfigProvider; import org.apache.druid.guice.LazySingleton; @@ -77,7 +76,6 @@ public void setupModule(SetupContext context) } }, new SimpleModule().registerSubtypes( - new NamedType(StaticGoogleBlobStoreFirehoseFactory.class, "static-google-blobstore"), new NamedType(GoogleCloudStorageInputSource.class, SCHEME) ) ); diff --git a/extensions-core/google-extensions/src/test/java/org/apache/druid/firehose/google/StaticGoogleBlobStoreFirehoseFactoryTest.java b/extensions-core/google-extensions/src/test/java/org/apache/druid/firehose/google/StaticGoogleBlobStoreFirehoseFactoryTest.java deleted file mode 100644 index c9996b53ccbc..000000000000 --- a/extensions-core/google-extensions/src/test/java/org/apache/druid/firehose/google/StaticGoogleBlobStoreFirehoseFactoryTest.java +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.firehose.google; - -import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.common.collect.ImmutableList; -import org.apache.druid.data.input.google.GoogleCloudStorageInputSourceTest; -import org.apache.druid.storage.google.GoogleStorage; -import org.junit.Assert; -import org.junit.Test; - -import java.io.IOException; -import java.util.List; - -public class StaticGoogleBlobStoreFirehoseFactoryTest -{ - private static final GoogleStorage STORAGE = new GoogleStorage(null); - - @Test - public void testSerde() throws IOException - { - final ObjectMapper mapper = GoogleCloudStorageInputSourceTest.createGoogleObjectMapper(); - - final List blobs = ImmutableList.of( - new GoogleBlob("foo", "bar"), - new GoogleBlob("foo", "bar2") - ); - - final StaticGoogleBlobStoreFirehoseFactory factory = new StaticGoogleBlobStoreFirehoseFactory( - STORAGE, - blobs, - 2048L, - 1024L, - 512L, - 100L, - 5 - ); - - final StaticGoogleBlobStoreFirehoseFactory outputFact = mapper.readValue( - mapper.writeValueAsString(factory), - StaticGoogleBlobStoreFirehoseFactory.class - ); - - Assert.assertEquals(factory, outputFact); - } -} diff --git a/extensions-core/hdfs-storage/src/main/java/org/apache/druid/firehose/hdfs/HdfsFirehoseFactory.java b/extensions-core/hdfs-storage/src/main/java/org/apache/druid/firehose/hdfs/HdfsFirehoseFactory.java deleted file mode 100644 index f7fac9f3b479..000000000000 --- a/extensions-core/hdfs-storage/src/main/java/org/apache/druid/firehose/hdfs/HdfsFirehoseFactory.java +++ /dev/null @@ -1,127 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.firehose.hdfs; - -import com.fasterxml.jackson.annotation.JacksonInject; -import com.fasterxml.jackson.annotation.JsonCreator; -import com.fasterxml.jackson.annotation.JsonProperty; -import com.google.common.base.Predicate; -import org.apache.druid.data.input.FiniteFirehoseFactory; -import org.apache.druid.data.input.InputSplit; -import org.apache.druid.data.input.impl.StringInputRowParser; -import org.apache.druid.data.input.impl.prefetch.PrefetchableTextFilesFirehoseFactory; -import org.apache.druid.guice.Hdfs; -import org.apache.druid.inputsource.hdfs.HdfsInputSource; -import org.apache.druid.inputsource.hdfs.HdfsInputSourceConfig; -import org.apache.druid.storage.hdfs.HdfsDataSegmentPuller; -import org.apache.druid.utils.CompressionUtils; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataInputStream; -import org.apache.hadoop.fs.Path; - -import java.io.IOException; -import java.io.InputStream; -import java.util.Collection; -import java.util.List; - -public class HdfsFirehoseFactory extends PrefetchableTextFilesFirehoseFactory -{ - private final List inputPaths; - private final Configuration conf; - private final HdfsInputSourceConfig inputSourceConfig; - - @JsonCreator - public HdfsFirehoseFactory( - @JsonProperty("paths") Object inputPaths, - @JsonProperty("maxCacheCapacityBytes") Long maxCacheCapacityBytes, - @JsonProperty("maxFetchCapacityBytes") Long maxFetchCapacityBytes, - @JsonProperty("prefetchTriggerBytes") Long prefetchTriggerBytes, - @JsonProperty("fetchTimeout") Long fetchTimeout, - @JsonProperty("maxFetchRetry") Integer maxFetchRetry, - @JacksonInject @Hdfs Configuration conf, - @JacksonInject HdfsInputSourceConfig inputSourceConfig - ) - { - super(maxCacheCapacityBytes, maxFetchCapacityBytes, prefetchTriggerBytes, fetchTimeout, maxFetchRetry); - this.inputPaths = HdfsInputSource.coerceInputPathsToList(inputPaths, "paths"); - this.conf = conf; - this.inputSourceConfig = inputSourceConfig; - this.inputPaths.forEach(p -> HdfsInputSource.verifyProtocol(conf, inputSourceConfig, p)); - } - - @JsonProperty("paths") - public List getInputPaths() - { - return inputPaths; - } - - @Override - protected Collection initObjects() throws IOException - { - return HdfsInputSource.getPaths(inputPaths, conf); - } - - @Override - protected InputStream openObjectStream(Path path) throws IOException - { - return path.getFileSystem(conf).open(path); - } - - @Override - protected InputStream openObjectStream(Path path, long start) throws IOException - { - final FSDataInputStream in = path.getFileSystem(conf).open(path); - in.seek(start); - return in; - } - - @Override - protected InputStream wrapObjectStream(Path path, InputStream stream) throws IOException - { - return CompressionUtils.decompress(stream, path.getName()); - } - - @Override - protected Predicate getRetryCondition() - { - return HdfsDataSegmentPuller.RETRY_PREDICATE; - } - - @Override - public boolean isSplittable() - { - return true; - } - - @Override - public FiniteFirehoseFactory withSplit(InputSplit split) - { - return new HdfsFirehoseFactory( - split.get().toString(), - getMaxCacheCapacityBytes(), - getMaxFetchCapacityBytes(), - getPrefetchTriggerBytes(), - getFetchTimeout(), - getMaxFetchRetry(), - conf, - inputSourceConfig - ); - } -} diff --git a/extensions-core/hdfs-storage/src/main/java/org/apache/druid/storage/hdfs/HdfsStorageDruidModule.java b/extensions-core/hdfs-storage/src/main/java/org/apache/druid/storage/hdfs/HdfsStorageDruidModule.java index 3ca8e23535e1..68c7650960b8 100644 --- a/extensions-core/hdfs-storage/src/main/java/org/apache/druid/storage/hdfs/HdfsStorageDruidModule.java +++ b/extensions-core/hdfs-storage/src/main/java/org/apache/druid/storage/hdfs/HdfsStorageDruidModule.java @@ -26,7 +26,6 @@ import com.google.inject.Inject; import com.google.inject.multibindings.MapBinder; import org.apache.druid.data.SearchableVersionedDataFinder; -import org.apache.druid.firehose.hdfs.HdfsFirehoseFactory; import org.apache.druid.guice.Binders; import org.apache.druid.guice.Hdfs; import org.apache.druid.guice.JsonConfigProvider; @@ -66,7 +65,6 @@ public List getJacksonModules() return Collections.singletonList( new SimpleModule().registerSubtypes( new NamedType(HdfsLoadSpec.class, HdfsStorageDruidModule.SCHEME), - new NamedType(HdfsFirehoseFactory.class, HdfsStorageDruidModule.SCHEME), new NamedType(HdfsInputSource.class, HdfsStorageDruidModule.SCHEME) ) ); diff --git a/extensions-core/hdfs-storage/src/test/java/org/apache/druid/firehose/hdfs/HdfsFirehoseFactoryTest.java b/extensions-core/hdfs-storage/src/test/java/org/apache/druid/firehose/hdfs/HdfsFirehoseFactoryTest.java deleted file mode 100644 index e96a773c0cef..000000000000 --- a/extensions-core/hdfs-storage/src/test/java/org/apache/druid/firehose/hdfs/HdfsFirehoseFactoryTest.java +++ /dev/null @@ -1,218 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.firehose.hdfs; - -import com.fasterxml.jackson.databind.InjectableValues.Std; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.common.collect.ImmutableSet; -import org.apache.druid.data.input.FirehoseFactory; -import org.apache.druid.inputsource.hdfs.HdfsInputSourceConfig; -import org.apache.druid.storage.hdfs.HdfsStorageDruidModule; -import org.apache.hadoop.conf.Configuration; -import org.junit.Assert; -import org.junit.BeforeClass; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.ExpectedException; - -import java.io.IOException; -import java.util.Collections; - -public class HdfsFirehoseFactoryTest -{ - private static final HdfsInputSourceConfig DEFAULT_INPUT_SOURCE_CONFIG = new HdfsInputSourceConfig(null); - private static final Configuration DEFAULT_CONFIGURATION = new Configuration(); - - @BeforeClass - public static void setup() - { - DEFAULT_CONFIGURATION.set("fs.default.name", "hdfs://localhost:7020"); - } - - @Rule - public ExpectedException expectedException = ExpectedException.none(); - - @Test - public void testArrayPaths() throws IOException - { - final HdfsFirehoseFactory firehoseFactory = new HdfsFirehoseFactory( - Collections.singletonList("/foo/bar"), - null, - null, - null, - null, - null, - DEFAULT_CONFIGURATION, - DEFAULT_INPUT_SOURCE_CONFIG - ); - - final ObjectMapper mapper = createMapper(); - - final HdfsFirehoseFactory firehoseFactory2 = (HdfsFirehoseFactory) - mapper.readValue(mapper.writeValueAsString(firehoseFactory), FirehoseFactory.class); - - Assert.assertEquals( - firehoseFactory.getInputPaths(), - firehoseFactory2.getInputPaths() - ); - } - - @Test - public void testStringPaths() throws IOException - { - final HdfsFirehoseFactory firehoseFactory = new HdfsFirehoseFactory( - "/foo/bar", - null, - null, - null, - null, - null, - DEFAULT_CONFIGURATION, - DEFAULT_INPUT_SOURCE_CONFIG - ); - final ObjectMapper mapper = createMapper(); - - final HdfsFirehoseFactory firehoseFactory2 = (HdfsFirehoseFactory) - mapper.readValue(mapper.writeValueAsString(firehoseFactory), FirehoseFactory.class); - - Assert.assertEquals( - firehoseFactory.getInputPaths(), - firehoseFactory2.getInputPaths() - ); - } - - @Test - public void testConstructorAllowsOnlyDefaultProtocol() - { - new HdfsFirehoseFactory( - "hdfs://localhost:7020/foo/bar", - null, - null, - null, - null, - null, - DEFAULT_CONFIGURATION, - DEFAULT_INPUT_SOURCE_CONFIG - ); - - expectedException.expect(IllegalArgumentException.class); - expectedException.expectMessage("Only [hdfs] protocols are allowed"); - new HdfsFirehoseFactory( - "file:/foo/bar", - null, - null, - null, - null, - null, - DEFAULT_CONFIGURATION, - DEFAULT_INPUT_SOURCE_CONFIG - ); - } - - @Test - public void testConstructorAllowsOnlyCustomProtocol() - { - final Configuration conf = new Configuration(); - conf.set("fs.ftp.impl", "org.apache.hadoop.fs.ftp.FTPFileSystem"); - new HdfsFirehoseFactory( - "ftp://localhost:21/foo/bar", - null, - null, - null, - null, - null, - DEFAULT_CONFIGURATION, - new HdfsInputSourceConfig(ImmutableSet.of("ftp")) - ); - - expectedException.expect(IllegalArgumentException.class); - expectedException.expectMessage("Only [druid] protocols are allowed"); - new HdfsFirehoseFactory( - "hdfs://localhost:7020/foo/bar", - null, - null, - null, - null, - null, - DEFAULT_CONFIGURATION, - new HdfsInputSourceConfig(ImmutableSet.of("druid")) - ); - } - - @Test - public void testConstructorWithDefaultHdfs() - { - new HdfsFirehoseFactory( - "/foo/bar*", - null, - null, - null, - null, - null, - DEFAULT_CONFIGURATION, - DEFAULT_INPUT_SOURCE_CONFIG - ); - - new HdfsFirehoseFactory( - "foo/bar*", - null, - null, - null, - null, - null, - DEFAULT_CONFIGURATION, - DEFAULT_INPUT_SOURCE_CONFIG - ); - - new HdfsFirehoseFactory( - "hdfs:///foo/bar*", - null, - null, - null, - null, - null, - DEFAULT_CONFIGURATION, - DEFAULT_INPUT_SOURCE_CONFIG - ); - - new HdfsFirehoseFactory( - "hdfs://localhost:10020/foo/bar*", // different hdfs - null, - null, - null, - null, - null, - DEFAULT_CONFIGURATION, - DEFAULT_INPUT_SOURCE_CONFIG - ); - } - - private static ObjectMapper createMapper() - { - final ObjectMapper mapper = new ObjectMapper(); - new HdfsStorageDruidModule().getJacksonModules().forEach(mapper::registerModule); - mapper.setInjectableValues( - new Std() - .addValue(Configuration.class, DEFAULT_CONFIGURATION) - .addValue(HdfsInputSourceConfig.class, DEFAULT_INPUT_SOURCE_CONFIG) - ); - return mapper; - } -} diff --git a/extensions-core/s3-extensions/src/main/java/org/apache/druid/firehose/s3/S3FirehoseDruidModule.java b/extensions-core/s3-extensions/src/main/java/org/apache/druid/firehose/s3/S3FirehoseDruidModule.java deleted file mode 100644 index 6cb5ac5c1d84..000000000000 --- a/extensions-core/s3-extensions/src/main/java/org/apache/druid/firehose/s3/S3FirehoseDruidModule.java +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.firehose.s3; - -import com.fasterxml.jackson.databind.Module; -import com.fasterxml.jackson.databind.jsontype.NamedType; -import com.fasterxml.jackson.databind.module.SimpleModule; -import com.google.common.collect.ImmutableList; -import com.google.inject.Binder; -import org.apache.druid.initialization.DruidModule; - -import java.util.List; - -/** - */ -public class S3FirehoseDruidModule implements DruidModule -{ - @Override - public List getJacksonModules() - { - return ImmutableList.of( - new SimpleModule().registerSubtypes(new NamedType(StaticS3FirehoseFactory.class, "static-s3")) - ); - } - - @Override - public void configure(Binder binder) - { - - } -} diff --git a/extensions-core/s3-extensions/src/main/java/org/apache/druid/firehose/s3/StaticS3FirehoseFactory.java b/extensions-core/s3-extensions/src/main/java/org/apache/druid/firehose/s3/StaticS3FirehoseFactory.java deleted file mode 100644 index 0bf93313dc6a..000000000000 --- a/extensions-core/s3-extensions/src/main/java/org/apache/druid/firehose/s3/StaticS3FirehoseFactory.java +++ /dev/null @@ -1,236 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.firehose.s3; - -import com.amazonaws.services.s3.model.AmazonS3Exception; -import com.amazonaws.services.s3.model.GetObjectRequest; -import com.amazonaws.services.s3.model.S3Object; -import com.amazonaws.services.s3.model.S3ObjectSummary; -import com.fasterxml.jackson.annotation.JacksonInject; -import com.fasterxml.jackson.annotation.JsonCreator; -import com.fasterxml.jackson.annotation.JsonProperty; -import com.google.common.base.Preconditions; -import com.google.common.base.Predicate; -import org.apache.druid.data.input.FiniteFirehoseFactory; -import org.apache.druid.data.input.InputSplit; -import org.apache.druid.data.input.impl.StringInputRowParser; -import org.apache.druid.data.input.impl.prefetch.PrefetchableTextFilesFirehoseFactory; -import org.apache.druid.java.util.common.IAE; -import org.apache.druid.java.util.common.ISE; -import org.apache.druid.java.util.common.logger.Logger; -import org.apache.druid.storage.s3.S3Utils; -import org.apache.druid.storage.s3.ServerSideEncryptingAmazonS3; -import org.apache.druid.utils.CompressionUtils; - -import java.io.IOException; -import java.io.InputStream; -import java.net.URI; -import java.util.ArrayList; -import java.util.Collection; -import java.util.Collections; -import java.util.Iterator; -import java.util.List; -import java.util.Objects; -import java.util.stream.Collectors; - -/** - * Builds firehoses that read from a predefined list of S3 objects and then dry up. - */ -public class StaticS3FirehoseFactory extends PrefetchableTextFilesFirehoseFactory -{ - private static final Logger log = new Logger(StaticS3FirehoseFactory.class); - private static final int MAX_LISTING_LENGTH = 1024; - - private final ServerSideEncryptingAmazonS3 s3Client; - private final List uris; - private final List prefixes; - - @JsonCreator - public StaticS3FirehoseFactory( - @JacksonInject ServerSideEncryptingAmazonS3 s3Client, - @JsonProperty("uris") List uris, - @JsonProperty("prefixes") List prefixes, - @JsonProperty("maxCacheCapacityBytes") Long maxCacheCapacityBytes, - @JsonProperty("maxFetchCapacityBytes") Long maxFetchCapacityBytes, - @JsonProperty("prefetchTriggerBytes") Long prefetchTriggerBytes, - @JsonProperty("fetchTimeout") Long fetchTimeout, - @JsonProperty("maxFetchRetry") Integer maxFetchRetry - ) - { - super(maxCacheCapacityBytes, maxFetchCapacityBytes, prefetchTriggerBytes, fetchTimeout, maxFetchRetry); - this.s3Client = Preconditions.checkNotNull(s3Client, "s3Client"); - this.uris = uris == null ? new ArrayList<>() : uris; - this.prefixes = prefixes == null ? new ArrayList<>() : prefixes; - - if (!this.uris.isEmpty() && !this.prefixes.isEmpty()) { - throw new IAE("uris and prefixes cannot be used together"); - } - - if (this.uris.isEmpty() && this.prefixes.isEmpty()) { - throw new IAE("uris or prefixes must be specified"); - } - - for (final URI inputURI : this.uris) { - Preconditions.checkArgument("s3".equals(inputURI.getScheme()), "input uri scheme == s3 (%s)", inputURI); - } - - for (final URI inputURI : this.prefixes) { - Preconditions.checkArgument("s3".equals(inputURI.getScheme()), "input uri scheme == s3 (%s)", inputURI); - } - } - - @JsonProperty - public List getUris() - { - return uris; - } - - @JsonProperty("prefixes") - public List getPrefixes() - { - return prefixes; - } - - @Override - protected Collection initObjects() - { - if (!uris.isEmpty()) { - return uris; - } else { - final List objects = new ArrayList<>(); - for (final URI prefix : prefixes) { - final Iterator objectSummaryIterator = S3Utils.objectSummaryIterator( - s3Client, - Collections.singletonList(prefix), - MAX_LISTING_LENGTH - ); - - objectSummaryIterator.forEachRemaining(objects::add); - } - return objects.stream().map(S3Utils::summaryToUri).collect(Collectors.toList()); - } - } - - @Override - protected InputStream openObjectStream(URI object) throws IOException - { - try { - // Get data of the given object and open an input stream - final String bucket = object.getAuthority(); - final String key = S3Utils.extractS3Key(object); - - final S3Object s3Object = s3Client.getObject(bucket, key); - if (s3Object == null) { - throw new ISE("Failed to get an s3 object for bucket[%s] and key[%s]", bucket, key); - } - return s3Object.getObjectContent(); - } - catch (AmazonS3Exception e) { - throw new IOException(e); - } - } - - @Override - protected InputStream openObjectStream(URI object, long start) throws IOException - { - final String bucket = object.getAuthority(); - final String key = S3Utils.extractS3Key(object); - - final GetObjectRequest request = new GetObjectRequest(bucket, key); - request.setRange(start); - try { - final S3Object s3Object = s3Client.getObject(request); - if (s3Object == null) { - throw new ISE( - "Failed to get an s3 object for bucket[%s], key[%s], and start[%d]", - bucket, - key, - start - ); - } - return s3Object.getObjectContent(); - } - catch (AmazonS3Exception e) { - throw new IOException(e); - } - } - - @Override - protected InputStream wrapObjectStream(URI object, InputStream stream) throws IOException - { - return CompressionUtils.decompress(stream, S3Utils.extractS3Key(object)); - } - - @Override - public boolean equals(Object o) - { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - - StaticS3FirehoseFactory that = (StaticS3FirehoseFactory) o; - - return Objects.equals(uris, that.uris) && - Objects.equals(prefixes, that.prefixes) && - getMaxCacheCapacityBytes() == that.getMaxCacheCapacityBytes() && - getMaxFetchCapacityBytes() == that.getMaxFetchCapacityBytes() && - getPrefetchTriggerBytes() == that.getPrefetchTriggerBytes() && - getFetchTimeout() == that.getFetchTimeout() && - getMaxFetchRetry() == that.getMaxFetchRetry(); - } - - @Override - public int hashCode() - { - return Objects.hash( - uris, - prefixes, - getMaxCacheCapacityBytes(), - getMaxFetchCapacityBytes(), - getPrefetchTriggerBytes(), - getFetchTimeout(), - getMaxFetchRetry() - ); - } - - @Override - protected Predicate getRetryCondition() - { - return S3Utils.S3RETRY; - } - - @Override - public FiniteFirehoseFactory withSplit(InputSplit split) - { - return new StaticS3FirehoseFactory( - s3Client, - Collections.singletonList(split.get()), - null, - getMaxCacheCapacityBytes(), - getMaxFetchCapacityBytes(), - getPrefetchTriggerBytes(), - getFetchTimeout(), - getMaxFetchRetry() - ); - } -} diff --git a/extensions-core/s3-extensions/src/main/resources/META-INF/services/org.apache.druid.initialization.DruidModule b/extensions-core/s3-extensions/src/main/resources/META-INF/services/org.apache.druid.initialization.DruidModule index 51968bbb97bf..4861e22cb3d5 100644 --- a/extensions-core/s3-extensions/src/main/resources/META-INF/services/org.apache.druid.initialization.DruidModule +++ b/extensions-core/s3-extensions/src/main/resources/META-INF/services/org.apache.druid.initialization.DruidModule @@ -14,5 +14,4 @@ # limitations under the License. org.apache.druid.storage.s3.S3StorageDruidModule -org.apache.druid.firehose.s3.S3FirehoseDruidModule org.apache.druid.data.input.s3.S3InputSourceDruidModule diff --git a/extensions-core/s3-extensions/src/test/java/org/apache/druid/firehose/s3/StaticS3FirehoseFactoryTest.java b/extensions-core/s3-extensions/src/test/java/org/apache/druid/firehose/s3/StaticS3FirehoseFactoryTest.java deleted file mode 100644 index c809bf37040d..000000000000 --- a/extensions-core/s3-extensions/src/test/java/org/apache/druid/firehose/s3/StaticS3FirehoseFactoryTest.java +++ /dev/null @@ -1,115 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.firehose.s3; - -import com.amazonaws.services.s3.AmazonS3Client; -import com.fasterxml.jackson.databind.ObjectMapper; -import org.apache.druid.data.input.FiniteFirehoseFactory; -import org.apache.druid.data.input.impl.StringInputRowParser; -import org.apache.druid.data.input.s3.S3InputSourceTest; -import org.apache.druid.storage.s3.NoopServerSideEncryption; -import org.apache.druid.storage.s3.ServerSideEncryptingAmazonS3; -import org.easymock.EasyMock; -import org.junit.Assert; -import org.junit.Test; - -import java.io.IOException; -import java.net.URI; -import java.util.Arrays; -import java.util.Comparator; -import java.util.List; -import java.util.stream.Collectors; - -/** - */ -public class StaticS3FirehoseFactoryTest -{ - private static final AmazonS3Client S3_CLIENT = EasyMock.createNiceMock(AmazonS3Client.class); - private static final ServerSideEncryptingAmazonS3 SERVICE = new ServerSideEncryptingAmazonS3( - S3_CLIENT, - new NoopServerSideEncryption() - ); - - @Test - public void testSerde() throws Exception - { - final ObjectMapper mapper = S3InputSourceTest.createS3ObjectMapper(); - - final List uris = Arrays.asList( - new URI("s3://foo/bar/file.gz"), - new URI("s3://bar/foo/file2.gz") - ); - - final StaticS3FirehoseFactory factory = new StaticS3FirehoseFactory( - SERVICE, - uris, - null, - 2048L, - 1024L, - 512L, - 100L, - 5 - ); - - final StaticS3FirehoseFactory outputFact = mapper.readValue( - mapper.writeValueAsString(factory), - StaticS3FirehoseFactory.class - ); - - Assert.assertEquals(factory, outputFact); - } - - @Test - public void testWithSplit() throws IOException - { - final List uris = Arrays.asList( - URI.create("s3://foo/bar/file.gz"), - URI.create("s3://bar/foo/file2.gz") - ); - uris.sort(Comparator.comparing(URI::toString)); - - final StaticS3FirehoseFactory factory = new StaticS3FirehoseFactory( - SERVICE, - uris, - null, - 2048L, - 1024L, - 512L, - 100L, - 5 - ); - final List> subFactories = factory - .getSplits(null) - .map(factory::withSplit) - .sorted(Comparator.comparing(eachFactory -> { - final StaticS3FirehoseFactory staticS3FirehoseFactory = (StaticS3FirehoseFactory) eachFactory; - return staticS3FirehoseFactory.getUris().toString(); - })) - .collect(Collectors.toList()); - - Assert.assertEquals(uris.size(), subFactories.size()); - for (int i = 0; i < uris.size(); i++) { - final StaticS3FirehoseFactory staticS3FirehoseFactory = (StaticS3FirehoseFactory) subFactories.get(i); - final List subFactoryUris = staticS3FirehoseFactory.getUris(); - Assert.assertEquals(1, subFactoryUris.size()); - Assert.assertEquals(uris.get(i), subFactoryUris.get(0)); - } - } -} diff --git a/indexing-service/src/main/java/org/apache/druid/guice/IndexingServiceFirehoseModule.java b/indexing-service/src/main/java/org/apache/druid/guice/IndexingServiceFirehoseModule.java index 83d9825d14f4..8e4952210997 100644 --- a/indexing-service/src/main/java/org/apache/druid/guice/IndexingServiceFirehoseModule.java +++ b/indexing-service/src/main/java/org/apache/druid/guice/IndexingServiceFirehoseModule.java @@ -24,7 +24,6 @@ import com.fasterxml.jackson.databind.module.SimpleModule; import com.google.common.collect.ImmutableList; import com.google.inject.Binder; -import org.apache.druid.indexing.firehose.IngestSegmentFirehoseFactory; import org.apache.druid.initialization.DruidModule; import org.apache.druid.segment.realtime.firehose.EventReceiverFirehoseFactory; @@ -38,8 +37,7 @@ public List getJacksonModules() return ImmutableList.of( new SimpleModule("IndexingServiceFirehoseModule") .registerSubtypes( - new NamedType(EventReceiverFirehoseFactory.class, "receiver"), - new NamedType(IngestSegmentFirehoseFactory.class, "ingestSegment") + new NamedType(EventReceiverFirehoseFactory.class, "receiver") ) ); } diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/ReingestionTimelineUtils.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/ReingestionTimelineUtils.java deleted file mode 100644 index 8714fa6933ac..000000000000 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/ReingestionTimelineUtils.java +++ /dev/null @@ -1,140 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.indexing.common; - -import com.google.common.collect.BiMap; -import com.google.common.collect.HashBiMap; -import com.google.common.collect.Lists; -import org.apache.druid.data.input.impl.DimensionsSpec; -import org.apache.druid.timeline.DataSegment; -import org.apache.druid.timeline.TimelineObjectHolder; -import org.apache.druid.timeline.partition.PartitionChunk; - -import javax.annotation.Nullable; -import javax.validation.constraints.NotNull; -import java.util.List; -import java.util.Set; -import java.util.stream.Collectors; -import java.util.stream.IntStream; - -/** - * @deprecated only used by {@link org.apache.druid.indexing.firehose.IngestSegmentFirehoseFactory} - */ -@Deprecated -public class ReingestionTimelineUtils -{ - /** - * @param timelineSegments A list of timeline objects, such as that returned by VersionedIntervalTimeline.lookup(). - * @param excludeDimensions Dimensions to be excluded - * @return A list of all the unique dimension column names present in the segments within timelineSegments - */ - public static List getUniqueDimensions( - List> timelineSegments, - @Nullable Set excludeDimensions - ) - { - final BiMap uniqueDims = HashBiMap.create(); - - // Here, we try to retain the order of dimensions as they were specified since the order of dimensions may be - // optimized for performance. - // Dimensions are extracted from the recent segments to olders because recent segments are likely to be queried more - // frequently, and thus the performance should be optimized for recent ones rather than old ones. - - // timelineSegments are sorted in order of interval - int index = 0; - for (TimelineObjectHolder timelineHolder : Lists.reverse(timelineSegments)) { - for (PartitionChunk chunk : timelineHolder.getObject()) { - for (String dimension : chunk.getObject().getDimensions()) { - if (!uniqueDims.containsKey(dimension) && - (excludeDimensions == null || !excludeDimensions.contains(dimension))) { - uniqueDims.put(dimension, index++); - } - } - } - } - - final BiMap orderedDims = uniqueDims.inverse(); - return IntStream.range(0, orderedDims.size()) - .mapToObj(orderedDims::get) - .collect(Collectors.toList()); - } - - /** - * @param timelineSegments A list of timeline objects, such as that returned by VersionedIntervalTimeline.lookup(). - * @return A list of all the unique metric column names present in the segments within timelineSegments - */ - public static List getUniqueMetrics(List> timelineSegments) - { - final BiMap uniqueMetrics = HashBiMap.create(); - - // Here, we try to retain the order of metrics as they were specified. Metrics are extracted from the recent - // segments to olders. - - // timelineSegments are sorted in order of interval - int[] index = {0}; - for (TimelineObjectHolder timelineHolder : Lists.reverse(timelineSegments)) { - for (PartitionChunk chunk : timelineHolder.getObject()) { - for (String metric : chunk.getObject().getMetrics()) { - uniqueMetrics.computeIfAbsent( - metric, - k -> { - return index[0]++; - } - ); - } - } - } - - final BiMap orderedMetrics = uniqueMetrics.inverse(); - return IntStream.range(0, orderedMetrics.size()) - .mapToObj(orderedMetrics::get) - .collect(Collectors.toList()); - } - - /** - * Utility function to get dimensions that should be ingested. The preferred order is - * - Explicit dimensions if they are provided. - * - Custom dimensions are provided in the inputSpec. - * - Calculate dimensions from the timeline but exclude any dimension exclusions. - * - * @param explicitDimensions sent as part of the re-ingestion InputSource. - * @param dimensionsSpec from the provided ingestion spec. - * @param timeLineSegments for the datasource that is being read. - * @return - */ - public static List getDimensionsToReingest( - @Nullable List explicitDimensions, - @NotNull DimensionsSpec dimensionsSpec, - @NotNull List> timeLineSegments) - { - final List dims; - if (explicitDimensions != null) { - dims = explicitDimensions; - } else if (dimensionsSpec.hasCustomDimensions()) { - dims = dimensionsSpec.getDimensionNames(); - } else { - dims = ReingestionTimelineUtils.getUniqueDimensions( - timeLineSegments, - dimensionsSpec.getDimensionExclusions() - ); - } - return dims; - } -} diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AbstractBatchIndexTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AbstractBatchIndexTask.java index 8fcadc301d22..076298cca07f 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AbstractBatchIndexTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AbstractBatchIndexTask.java @@ -25,7 +25,6 @@ import com.google.common.collect.ImmutableList; import com.google.errorprone.annotations.concurrent.GuardedBy; import org.apache.druid.client.indexing.ClientCompactionTaskTransformSpec; -import org.apache.druid.data.input.FirehoseFactory; import org.apache.druid.data.input.InputFormat; import org.apache.druid.data.input.InputRow; import org.apache.druid.data.input.InputSource; @@ -45,8 +44,6 @@ import org.apache.druid.indexing.common.task.IndexTask.IndexTuningConfig; import org.apache.druid.indexing.common.task.batch.MaxAllowedLocksExceededException; import org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexTuningConfig; -import org.apache.druid.indexing.firehose.IngestSegmentFirehoseFactory; -import org.apache.druid.indexing.firehose.WindowedSegmentId; import org.apache.druid.indexing.input.InputRowSchemas; import org.apache.druid.indexing.overlord.Segments; import org.apache.druid.java.util.common.IAE; @@ -599,13 +596,9 @@ static Granularity findGranularityFromSegments(List segments) } /** - * If the given firehoseFactory is {@link IngestSegmentFirehoseFactory}, then it finds the segments to lock - * from the firehoseFactory. This is because those segments will be read by this task no matter what segments would be - * filtered by intervalsToRead, so they need to be locked. *

- * However, firehoseFactory is not IngestSegmentFirehoseFactory, it means this task will overwrite some segments - * with data read from some input source outside of Druid. As a result, only the segments falling in intervalsToRead - * should be locked. + * This task will overwrite some segments with data read from some input source outside of Druid. + * As a result, only the segments falling in intervalsToRead should be locked. *

* The order of segments within the returned list is unspecified, but each segment is guaranteed to appear in the list * only once. @@ -613,48 +606,14 @@ static Granularity findGranularityFromSegments(List segments) protected static List findInputSegments( String dataSource, TaskActionClient actionClient, - List intervalsToRead, - FirehoseFactory firehoseFactory + List intervalsToRead ) throws IOException { - if (firehoseFactory instanceof IngestSegmentFirehoseFactory) { - // intervalsToRead is ignored here. - final List inputSegments = ((IngestSegmentFirehoseFactory) firehoseFactory).getSegments(); - if (inputSegments == null) { - final Interval inputInterval = Preconditions.checkNotNull( - ((IngestSegmentFirehoseFactory) firehoseFactory).getInterval(), - "input interval" - ); - - return ImmutableList.copyOf( - actionClient.submit( - new RetrieveUsedSegmentsAction(dataSource, inputInterval, null, Segments.ONLY_VISIBLE) - ) - ); - } else { - final List inputSegmentIds = - inputSegments.stream().map(WindowedSegmentId::getSegmentId).collect(Collectors.toList()); - final Collection dataSegmentsInIntervals = actionClient.submit( - new RetrieveUsedSegmentsAction( - dataSource, - null, - inputSegments.stream() - .flatMap(windowedSegmentId -> windowedSegmentId.getIntervals().stream()) - .collect(Collectors.toSet()), - Segments.ONLY_VISIBLE - ) - ); - return dataSegmentsInIntervals.stream() - .filter(segment -> inputSegmentIds.contains(segment.getId().toString())) - .collect(Collectors.toList()); - } - } else { - return ImmutableList.copyOf( - actionClient.submit( - new RetrieveUsedSegmentsAction(dataSource, null, intervalsToRead, Segments.ONLY_VISIBLE) - ) - ); - } + return ImmutableList.copyOf( + actionClient.submit( + new RetrieveUsedSegmentsAction(dataSource, null, intervalsToRead, Segments.ONLY_VISIBLE) + ) + ); } /** diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/IndexTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/IndexTask.java index 288107d0b944..6af2a394fb13 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/IndexTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/IndexTask.java @@ -32,14 +32,11 @@ import com.google.common.hash.HashFunction; import com.google.common.hash.Hashing; import com.google.common.util.concurrent.ListenableFuture; -import org.apache.druid.data.input.FiniteFirehoseFactory; import org.apache.druid.data.input.FirehoseFactory; -import org.apache.druid.data.input.FirehoseFactoryToInputSourceAdaptor; import org.apache.druid.data.input.InputFormat; import org.apache.druid.data.input.InputRow; import org.apache.druid.data.input.InputSource; import org.apache.druid.data.input.Rows; -import org.apache.druid.data.input.impl.InputRowParser; import org.apache.druid.hll.HyperLogLogCollector; import org.apache.druid.indexer.Checks; import org.apache.druid.indexer.IngestionState; @@ -269,8 +266,7 @@ public List findSegmentsToLock(TaskActionClient taskActionClient, L return findInputSegments( getDataSource(), taskActionClient, - intervals, - ingestionSchema.ioConfig.firehoseFactory + intervals ); } @@ -486,9 +482,7 @@ public TaskStatus runTask(final TaskToolbox toolbox) .inputIntervals() .isEmpty(); - final InputSource inputSource = ingestionSchema.getIOConfig().getNonNullInputSource( - ingestionSchema.getDataSchema().getParser() - ); + final InputSource inputSource = ingestionSchema.getIOConfig().getInputSource(); final File tmpDir = toolbox.getIndexingTmpDir(); @@ -1196,16 +1190,9 @@ public InputFormat getInputFormat() return inputFormat; } - public InputSource getNonNullInputSource(@Nullable InputRowParser inputRowParser) + public InputSource getNonNullInputSource() { - if (inputSource == null) { - return new FirehoseFactoryToInputSourceAdaptor( - (FiniteFirehoseFactory) firehoseFactory, - inputRowParser - ); - } else { - return inputSource; - } + return Preconditions.checkNotNull(inputSource, "inputSource"); } public InputFormat getNonNullInputFormat() diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/InputSourceSplitParallelIndexTaskRunner.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/InputSourceSplitParallelIndexTaskRunner.java index 3f41dba24c16..bfdad0115562 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/InputSourceSplitParallelIndexTaskRunner.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/InputSourceSplitParallelIndexTaskRunner.java @@ -20,7 +20,6 @@ package org.apache.druid.indexing.common.task.batch.parallel; import org.apache.druid.data.input.FirehoseFactory; -import org.apache.druid.data.input.FirehoseFactoryToInputSourceAdaptor; import org.apache.druid.data.input.InputSource; import org.apache.druid.data.input.InputSplit; import org.apache.druid.data.input.impl.SplittableInputSource; @@ -59,9 +58,7 @@ abstract class InputSourceSplitParallelIndexTaskRunner newTaskSpec(InputSplit split) { final FirehoseFactory firehoseFactory; final InputSource inputSource; - if (baseInputSource instanceof FirehoseFactoryToInputSourceAdaptor) { - firehoseFactory = ((FirehoseFactoryToInputSourceAdaptor) baseInputSource).getFirehoseFactory().withSplit(split); - inputSource = null; - } else { - firehoseFactory = null; - inputSource = baseInputSource.withSplit(split); - } + firehoseFactory = null; + inputSource = baseInputSource.withSplit(split); final ParallelIndexIngestionSpec subTaskIngestionSpec = new ParallelIndexIngestionSpec( ingestionSchema.getDataSchema(), new ParallelIndexIOConfig( diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexIngestionSpec.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexIngestionSpec.java index 5d057cded060..60a164560d55 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexIngestionSpec.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexIngestionSpec.java @@ -22,7 +22,6 @@ import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; import com.google.common.collect.ImmutableList; -import org.apache.druid.data.input.FirehoseFactoryToInputSourceAdaptor; import org.apache.druid.indexer.Checks; import org.apache.druid.indexer.Property; import org.apache.druid.java.util.common.IAE; @@ -45,9 +44,7 @@ public ParallelIndexIngestionSpec( super(dataSchema, ioConfig, tuningConfig); if (dataSchema.getParserMap() != null && ioConfig.getInputSource() != null) { - if (!(ioConfig.getInputSource() instanceof FirehoseFactoryToInputSourceAdaptor)) { - throw new IAE("Cannot use parser and inputSource together. Try using inputFormat instead of parser."); - } + throw new IAE("Cannot use parser and inputSource together. Try using inputFormat instead of parser."); } if (ioConfig.getInputSource() != null && ioConfig.getInputSource().needsFormat()) { Checks.checkOneNotNullOrEmpty( diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTask.java index f8381ffafe8f..950f6aaf6951 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTask.java @@ -30,7 +30,6 @@ import org.apache.datasketches.hll.Union; import org.apache.datasketches.memory.Memory; import org.apache.druid.common.guava.FutureUtils; -import org.apache.druid.data.input.FiniteFirehoseFactory; import org.apache.druid.data.input.InputFormat; import org.apache.druid.data.input.InputSource; import org.apache.druid.indexer.IngestionState; @@ -121,9 +120,9 @@ import java.util.stream.Collectors; /** - * ParallelIndexSupervisorTask is capable of running multiple subTasks for parallel indexing. This is - * applicable if the input {@link FiniteFirehoseFactory} is splittable. While this task is running, it can submit - * multiple child tasks to overlords. This task succeeds only when all its child tasks succeed; otherwise it fails. + * ParallelIndexSupervisorTask is capable of running multiple subTasks for parallel indexing. + * While this task is running, it can submit multiple child tasks to overlords. This task + * succeeds only when all its child tasks succeed; otherwise it fails. * * @see ParallelIndexTaskRunner */ @@ -241,9 +240,7 @@ public ParallelIndexSupervisorTask( checkPartitionsSpecForForceGuaranteedRollup(ingestionSchema.getTuningConfig().getGivenOrDefaultPartitionsSpec()); } - this.baseInputSource = ingestionSchema.getIOConfig().getNonNullInputSource( - ingestionSchema.getDataSchema().getParser() - ); + this.baseInputSource = ingestionSchema.getIOConfig().getNonNullInputSource(); this.missingIntervalsInOverwriteMode = (getIngestionMode() != IngestionMode.APPEND) && ingestionSchema.getDataSchema() @@ -426,8 +423,7 @@ public List findSegmentsToLock(TaskActionClient taskActionClient, L return findInputSegments( getDataSource(), taskActionClient, - intervals, - ingestionSchema.getIOConfig().getFirehoseFactory() + intervals ); } diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialDimensionCardinalityTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialDimensionCardinalityTask.java index 0b4def915ac0..250df1afc3f2 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialDimensionCardinalityTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialDimensionCardinalityTask.java @@ -156,9 +156,7 @@ public TaskStatus runTask(TaskToolbox toolbox) throws Exception HashedPartitionsSpec partitionsSpec = (HashedPartitionsSpec) tuningConfig.getPartitionsSpec(); Preconditions.checkNotNull(partitionsSpec, "partitionsSpec required in tuningConfig"); - InputSource inputSource = ingestionSchema.getIOConfig().getNonNullInputSource( - ingestionSchema.getDataSchema().getParser() - ); + InputSource inputSource = ingestionSchema.getIOConfig().getNonNullInputSource(); InputFormat inputFormat = inputSource.needsFormat() ? ParallelIndexSupervisorTask.getInputFormat(ingestionSchema) : null; diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialDimensionDistributionTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialDimensionDistributionTask.java index e491e0244250..3764ec3884b4 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialDimensionDistributionTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialDimensionDistributionTask.java @@ -203,9 +203,7 @@ public TaskStatus runTask(TaskToolbox toolbox) throws Exception ); boolean isAssumeGrouped = partitionsSpec.isAssumeGrouped(); - InputSource inputSource = ingestionSchema.getIOConfig().getNonNullInputSource( - ingestionSchema.getDataSchema().getParser() - ); + InputSource inputSource = ingestionSchema.getIOConfig().getNonNullInputSource(); InputFormat inputFormat = inputSource.needsFormat() ? ParallelIndexSupervisorTask.getInputFormat(ingestionSchema) : null; diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialSegmentGenerateTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialSegmentGenerateTask.java index 03a66d1fd972..212f0c1ab17a 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialSegmentGenerateTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialSegmentGenerateTask.java @@ -109,9 +109,7 @@ abstract class PartialSegmentGenerateTask e @Override public final TaskStatus runTask(TaskToolbox toolbox) throws Exception { - final InputSource inputSource = ingestionSchema.getIOConfig().getNonNullInputSource( - ingestionSchema.getDataSchema().getParser() - ); + final InputSource inputSource = ingestionSchema.getIOConfig().getNonNullInputSource(); final ParallelIndexSupervisorTaskClient taskClient = toolbox.getSupervisorTaskClientProvider().build( supervisorTaskId, diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/SinglePhaseParallelIndexTaskRunner.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/SinglePhaseParallelIndexTaskRunner.java index 1d51d6fa9cab..7f781e466edd 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/SinglePhaseParallelIndexTaskRunner.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/SinglePhaseParallelIndexTaskRunner.java @@ -22,7 +22,6 @@ import com.google.common.annotations.VisibleForTesting; import org.apache.commons.lang3.mutable.MutableObject; import org.apache.druid.data.input.FirehoseFactory; -import org.apache.druid.data.input.FirehoseFactoryToInputSourceAdaptor; import org.apache.druid.data.input.InputSource; import org.apache.druid.data.input.InputSplit; import org.apache.druid.data.input.impl.SplittableInputSource; @@ -118,9 +117,7 @@ public class SinglePhaseParallelIndexTaskRunner extends ParallelIndexPhaseRunner context ); this.ingestionSchema = ingestionSchema; - this.baseInputSource = (SplittableInputSource) ingestionSchema.getIOConfig().getNonNullInputSource( - ingestionSchema.getDataSchema().getParser() - ); + this.baseInputSource = (SplittableInputSource) ingestionSchema.getIOConfig().getNonNullInputSource(); } @VisibleForTesting @@ -171,13 +168,9 @@ SubTaskSpec newTaskSpec(InputSplit split) { final FirehoseFactory firehoseFactory; final InputSource inputSource; - if (baseInputSource instanceof FirehoseFactoryToInputSourceAdaptor) { - firehoseFactory = ((FirehoseFactoryToInputSourceAdaptor) baseInputSource).getFirehoseFactory().withSplit(split); - inputSource = null; - } else { - firehoseFactory = null; - inputSource = baseInputSource.withSplit(split); - } + firehoseFactory = null; + inputSource = baseInputSource.withSplit(split); + final Map subtaskContext = new HashMap<>(getContext()); return new SinglePhaseSubTaskSpec( getBaseSubtaskSpecName() + "_" + getAndIncrementNextSpecId(), diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/SinglePhaseSubTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/SinglePhaseSubTask.java index db325f8778dc..b1cb7eab86c9 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/SinglePhaseSubTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/SinglePhaseSubTask.java @@ -247,9 +247,7 @@ public TaskStatus runTask(final TaskToolbox toolbox) ingestionSchema.getTuningConfig().getMaxSavedParseExceptions() ); - final InputSource inputSource = ingestionSchema.getIOConfig().getNonNullInputSource( - ingestionSchema.getDataSchema().getParser() - ); + final InputSource inputSource = ingestionSchema.getIOConfig().getNonNullInputSource(); final ParallelIndexSupervisorTaskClient taskClient = toolbox.getSupervisorTaskClientProvider().build( supervisorTaskId, @@ -307,8 +305,7 @@ public List findSegmentsToLock(TaskActionClient taskActionClient, L return findInputSegments( getDataSource(), taskActionClient, - intervals, - ingestionSchema.getIOConfig().getFirehoseFactory() + intervals ); } diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/firehose/IngestSegmentFirehoseFactory.java b/indexing-service/src/main/java/org/apache/druid/indexing/firehose/IngestSegmentFirehoseFactory.java deleted file mode 100644 index f20a0ddd1a1b..000000000000 --- a/indexing-service/src/main/java/org/apache/druid/indexing/firehose/IngestSegmentFirehoseFactory.java +++ /dev/null @@ -1,330 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.indexing.firehose; - -import com.fasterxml.jackson.annotation.JacksonInject; -import com.fasterxml.jackson.annotation.JsonCreator; -import com.fasterxml.jackson.annotation.JsonProperty; -import com.google.common.base.Function; -import com.google.common.base.Preconditions; -import com.google.common.collect.Iterables; -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; -import org.apache.druid.client.coordinator.CoordinatorClient; -import org.apache.druid.data.input.FiniteFirehoseFactory; -import org.apache.druid.data.input.Firehose; -import org.apache.druid.data.input.InputSplit; -import org.apache.druid.data.input.SegmentsSplitHintSpec; -import org.apache.druid.data.input.SplitHintSpec; -import org.apache.druid.data.input.impl.InputRowParser; -import org.apache.druid.indexing.common.ReingestionTimelineUtils; -import org.apache.druid.indexing.common.RetryPolicyFactory; -import org.apache.druid.indexing.common.SegmentCacheManagerFactory; -import org.apache.druid.indexing.input.DruidInputSource; -import org.apache.druid.java.util.common.HumanReadableBytes; -import org.apache.druid.java.util.common.IAE; -import org.apache.druid.java.util.common.parsers.ParseException; -import org.apache.druid.java.util.emitter.EmittingLogger; -import org.apache.druid.query.filter.DimFilter; -import org.apache.druid.segment.IndexIO; -import org.apache.druid.segment.QueryableIndexStorageAdapter; -import org.apache.druid.segment.loading.SegmentCacheManager; -import org.apache.druid.segment.loading.SegmentLoadingException; -import org.apache.druid.segment.realtime.firehose.IngestSegmentFirehose; -import org.apache.druid.segment.realtime.firehose.WindowedStorageAdapter; -import org.apache.druid.segment.transform.TransformSpec; -import org.apache.druid.timeline.DataSegment; -import org.apache.druid.timeline.TimelineObjectHolder; -import org.apache.druid.timeline.partition.PartitionChunk; -import org.joda.time.Interval; - -import javax.annotation.Nullable; -import java.io.File; -import java.io.IOException; -import java.util.List; -import java.util.Map; -import java.util.stream.Stream; - -/** - * @deprecated use {@link DruidInputSource} instead - */ -@Deprecated -public class IngestSegmentFirehoseFactory implements FiniteFirehoseFactory> -{ - private static final EmittingLogger log = new EmittingLogger(IngestSegmentFirehoseFactory.class); - private final String dataSource; - // Exactly one of interval and segmentIds should be non-null. Typically 'interval' is specified directly - // by the user creating this firehose and 'segmentIds' is used for sub-tasks if it is split for parallel - // batch ingestion. - @Nullable - private final Interval interval; - @Nullable - private final List segmentIds; - private final DimFilter dimFilter; - private final List dimensions; - private final List metrics; - @Nullable - private final Long maxInputSegmentBytesPerTask; - private final IndexIO indexIO; - private final CoordinatorClient coordinatorClient; - private final SegmentCacheManagerFactory segmentCacheManagerFactory; - private final RetryPolicyFactory retryPolicyFactory; - - private List>> splits; - - @JsonCreator - public IngestSegmentFirehoseFactory( - @JsonProperty("dataSource") final String dataSource, - @JsonProperty("interval") @Nullable Interval interval, - // Specifying "segments" is intended only for when this FirehoseFactory has split itself, - // not for direct end user use. - @JsonProperty("segments") @Nullable List segmentIds, - @JsonProperty("filter") DimFilter dimFilter, - @JsonProperty("dimensions") List dimensions, - @JsonProperty("metrics") List metrics, - @JsonProperty("maxInputSegmentBytesPerTask") @Deprecated @Nullable Long maxInputSegmentBytesPerTask, - @JacksonInject IndexIO indexIO, - @JacksonInject CoordinatorClient coordinatorClient, - @JacksonInject SegmentCacheManagerFactory segmentCacheManagerFactory, - @JacksonInject RetryPolicyFactory retryPolicyFactory - ) - { - Preconditions.checkNotNull(dataSource, "dataSource"); - if ((interval == null && segmentIds == null) || (interval != null && segmentIds != null)) { - throw new IAE("Specify exactly one of 'interval' and 'segments'"); - } - this.dataSource = dataSource; - this.interval = interval; - this.segmentIds = segmentIds; - this.dimFilter = dimFilter; - this.dimensions = dimensions; - this.metrics = metrics; - this.maxInputSegmentBytesPerTask = maxInputSegmentBytesPerTask; - this.indexIO = Preconditions.checkNotNull(indexIO, "null IndexIO"); - this.coordinatorClient = Preconditions.checkNotNull(coordinatorClient, "null CoordinatorClient"); - this.segmentCacheManagerFactory = Preconditions.checkNotNull(segmentCacheManagerFactory, "null segmentCacheManagerFactory"); - this.retryPolicyFactory = Preconditions.checkNotNull(retryPolicyFactory, "null RetryPolicyFactory"); - } - - @Override - public FiniteFirehoseFactory> withSplit(InputSplit> split) - { - return new IngestSegmentFirehoseFactory( - dataSource, - null, - split.get(), - dimFilter, - dimensions, - metrics, - maxInputSegmentBytesPerTask, - indexIO, - coordinatorClient, - segmentCacheManagerFactory, - retryPolicyFactory - ); - } - - @JsonProperty - public String getDataSource() - { - return dataSource; - } - - @JsonProperty - @Nullable - public Interval getInterval() - { - return interval; - } - - @JsonProperty - @Nullable - public List getSegments() - { - return segmentIds; - } - - @JsonProperty("filter") - public DimFilter getDimensionsFilter() - { - return dimFilter; - } - - @JsonProperty - public List getDimensions() - { - return dimensions; - } - - @JsonProperty - public List getMetrics() - { - return metrics; - } - - @Nullable - @JsonProperty - public Long getMaxInputSegmentBytesPerTask() - { - return maxInputSegmentBytesPerTask; - } - - @Override - public Firehose connect(InputRowParser inputRowParser, File temporaryDirectory) throws ParseException - { - log.debug( - "Connecting firehose: dataSource[%s], interval[%s], segmentIds[%s]", - dataSource, - interval, - segmentIds - ); - - final List> timeLineSegments = getTimeline(); - - // Download all segments locally. - // Note: this requires enough local storage space to fit all of the segments, even though - // IngestSegmentFirehose iterates over the segments in series. We may want to change this - // to download files lazily, perhaps sharing code with PrefetchableTextFilesFirehoseFactory. - final SegmentCacheManager segmentCacheManager = segmentCacheManagerFactory.manufacturate(temporaryDirectory); - Map segmentFileMap = Maps.newLinkedHashMap(); - for (TimelineObjectHolder holder : timeLineSegments) { - for (PartitionChunk chunk : holder.getObject()) { - final DataSegment segment = chunk.getObject(); - - segmentFileMap.computeIfAbsent(segment, k -> { - try { - return segmentCacheManager.getSegmentFiles(segment); - } - catch (SegmentLoadingException e) { - throw new RuntimeException(e); - } - }); - } - } - - final List dims = ReingestionTimelineUtils.getDimensionsToReingest( - dimensions, - inputRowParser.getParseSpec().getDimensionsSpec(), - timeLineSegments - ); - final List metricsList = metrics == null - ? ReingestionTimelineUtils.getUniqueMetrics(timeLineSegments) - : metrics; - - final List adapters = Lists.newArrayList( - Iterables.concat( - Iterables.transform( - timeLineSegments, - new Function, Iterable>() { - @Override - public Iterable apply(final TimelineObjectHolder holder) - { - return - Iterables.transform( - holder.getObject(), - new Function, WindowedStorageAdapter>() { - @Override - public WindowedStorageAdapter apply(final PartitionChunk input) - { - final DataSegment segment = input.getObject(); - try { - return new WindowedStorageAdapter( - new QueryableIndexStorageAdapter( - indexIO.loadIndex( - Preconditions.checkNotNull( - segmentFileMap.get(segment), - "File for segment %s", segment.getId() - ) - ) - ), - holder.getInterval() - ); - } - catch (IOException e) { - throw new RuntimeException(e); - } - } - } - ); - } - } - ) - ) - ); - - final TransformSpec transformSpec = TransformSpec.fromInputRowParser(inputRowParser); - return new IngestSegmentFirehose(adapters, transformSpec, dims, metricsList, dimFilter); - } - - private List> getTimeline() - { - if (interval == null) { - return DruidInputSource.getTimelineForSegmentIds(coordinatorClient, dataSource, segmentIds); - } else { - return DruidInputSource.getTimelineForInterval(coordinatorClient, retryPolicyFactory, dataSource, interval); - } - } - - private void initializeSplitsIfNeeded(@Nullable SplitHintSpec splitHintSpec) - { - if (splits != null) { - return; - } - - splits = Lists.newArrayList( - DruidInputSource.createSplits( - coordinatorClient, - retryPolicyFactory, - dataSource, - interval, - splitHintSpec == null - ? new SegmentsSplitHintSpec( - maxInputSegmentBytesPerTask == null - ? null - : new HumanReadableBytes(maxInputSegmentBytesPerTask), - null - ) - : splitHintSpec - ) - ); - } - - @Override - public boolean isSplittable() - { - // Specifying 'segments' to this factory instead of 'interval' is intended primarily for internal use by - // parallel batch injection: we don't need to support splitting a list of segments. - return interval != null; - } - - @Override - public Stream>> getSplits(@Nullable SplitHintSpec splitHintSpec) - { - initializeSplitsIfNeeded(splitHintSpec); - return splits.stream(); - } - - @Override - public int getNumSplits(@Nullable SplitHintSpec splitHintSpec) - { - initializeSplitsIfNeeded(splitHintSpec); - return splits.size(); - } -} diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/sampler/IndexTaskSamplerSpec.java b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/sampler/IndexTaskSamplerSpec.java index 4140e014ef7a..a0b84e1e7571 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/sampler/IndexTaskSamplerSpec.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/sampler/IndexTaskSamplerSpec.java @@ -25,13 +25,9 @@ import com.google.common.base.Preconditions; import org.apache.druid.client.indexing.SamplerResponse; import org.apache.druid.client.indexing.SamplerSpec; -import org.apache.druid.data.input.FiniteFirehoseFactory; -import org.apache.druid.data.input.FirehoseFactory; -import org.apache.druid.data.input.FirehoseFactoryToInputSourceAdaptor; import org.apache.druid.data.input.InputFormat; import org.apache.druid.data.input.InputSource; import org.apache.druid.indexing.common.task.IndexTask; -import org.apache.druid.java.util.common.IAE; import org.apache.druid.segment.indexing.DataSchema; import javax.annotation.Nullable; @@ -61,28 +57,17 @@ public IndexTaskSamplerSpec( Preconditions.checkNotNull(ingestionSpec.getIOConfig(), "[spec.ioConfig] is required"); - if (ingestionSpec.getIOConfig().getInputSource() != null) { - this.inputSource = ingestionSpec.getIOConfig().getInputSource(); - if (ingestionSpec.getIOConfig().getInputSource().needsFormat()) { - this.inputFormat = Preconditions.checkNotNull( - ingestionSpec.getIOConfig().getInputFormat(), - "[spec.ioConfig.inputFormat] is required" - ); - } else { - this.inputFormat = null; - } - } else { - final FirehoseFactory firehoseFactory = Preconditions.checkNotNull( - ingestionSpec.getIOConfig().getFirehoseFactory(), - "[spec.ioConfig.firehose] is required" - ); - if (!(firehoseFactory instanceof FiniteFirehoseFactory)) { - throw new IAE("firehose should be an instance of FiniteFirehoseFactory"); - } - this.inputSource = new FirehoseFactoryToInputSourceAdaptor( - (FiniteFirehoseFactory) firehoseFactory, - ingestionSpec.getDataSchema().getParser() + this.inputSource = Preconditions.checkNotNull( + ingestionSpec.getIOConfig().getInputSource(), + "[spec.ioConfig.inputSource] is required" + ); + + if (inputSource.needsFormat()) { + this.inputFormat = Preconditions.checkNotNull( + ingestionSpec.getIOConfig().getInputFormat(), + "[spec.ioConfig.inputFormat] is required" ); + } else { this.inputFormat = null; } diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/SeekableStreamSamplerSpec.java b/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/SeekableStreamSamplerSpec.java index 3be7a4feb191..424511c8e8e0 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/SeekableStreamSamplerSpec.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/SeekableStreamSamplerSpec.java @@ -23,20 +23,9 @@ import com.google.common.base.Throwables; import org.apache.druid.client.indexing.SamplerResponse; import org.apache.druid.client.indexing.SamplerSpec; -import org.apache.druid.data.input.ByteBufferInputRowParser; -import org.apache.druid.data.input.FiniteFirehoseFactory; -import org.apache.druid.data.input.Firehose; -import org.apache.druid.data.input.FirehoseFactoryToInputSourceAdaptor; -import org.apache.druid.data.input.InputEntity; import org.apache.druid.data.input.InputFormat; -import org.apache.druid.data.input.InputRow; -import org.apache.druid.data.input.InputRowListPlusRawValues; import org.apache.druid.data.input.InputSource; -import org.apache.druid.data.input.InputSplit; -import org.apache.druid.data.input.SplitHintSpec; import org.apache.druid.data.input.impl.ByteEntity; -import org.apache.druid.data.input.impl.InputRowParser; -import org.apache.druid.data.input.impl.StringInputRowParser; import org.apache.druid.indexing.overlord.sampler.InputSourceSampler; import org.apache.druid.indexing.overlord.sampler.SamplerConfig; import org.apache.druid.indexing.overlord.sampler.SamplerException; @@ -44,17 +33,9 @@ import org.apache.druid.indexing.seekablestream.supervisor.SeekableStreamSupervisorIOConfig; import org.apache.druid.indexing.seekablestream.supervisor.SeekableStreamSupervisorSpec; import org.apache.druid.indexing.seekablestream.supervisor.SeekableStreamSupervisorTuningConfig; -import org.apache.druid.java.util.common.parsers.CloseableIterator; -import org.apache.druid.java.util.common.parsers.ParseException; import org.apache.druid.segment.indexing.DataSchema; import javax.annotation.Nullable; -import java.io.File; -import java.io.IOException; -import java.nio.ByteBuffer; -import java.util.List; -import java.util.Map; -import java.util.stream.Stream; public abstract class SeekableStreamSamplerSpec implements SamplerSpec { @@ -87,139 +68,29 @@ public SamplerResponse sample() { final InputSource inputSource; final InputFormat inputFormat; - if (dataSchema.getParser() != null) { - inputSource = new FirehoseFactoryToInputSourceAdaptor( - new SeekableStreamSamplerFirehoseFactory(), - dataSchema.getParser() - ); - inputFormat = null; - } else { - RecordSupplier recordSupplier; - try { - recordSupplier = createRecordSupplier(); - } - catch (Exception e) { - throw new SamplerException(e, "Unable to create RecordSupplier: %s", Throwables.getRootCause(e).getMessage()); - } + RecordSupplier recordSupplier; - inputSource = new RecordSupplierInputSource<>( - ioConfig.getStream(), - recordSupplier, - ioConfig.isUseEarliestSequenceNumber() - ); - inputFormat = Preconditions.checkNotNull( - ioConfig.getInputFormat(), - "[spec.ioConfig.inputFormat] is required" - ); + try { + recordSupplier = createRecordSupplier(); } - - return inputSourceSampler.sample(inputSource, inputFormat, dataSchema, samplerConfig); - } - - protected abstract RecordSupplier createRecordSupplier(); - - private class SeekableStreamSamplerFirehoseFactory implements FiniteFirehoseFactory - { - @Override - public Firehose connect(ByteBufferInputRowParser parser, @Nullable File temporaryDirectory) - { - throw new UnsupportedOperationException(); - } - - @Override - public Firehose connectForSampler(ByteBufferInputRowParser parser, @Nullable File temporaryDirectory) - { - return new SeekableStreamSamplerFirehose(parser); - } - - @Override - public boolean isSplittable() - { - return false; - } - - @Override - public Stream> getSplits(@Nullable SplitHintSpec splitHintSpec) - { - throw new UnsupportedOperationException(); + catch (Exception e) { + throw new SamplerException(e, "Unable to create RecordSupplier: %s", Throwables.getRootCause(e).getMessage()); } - @Override - public int getNumSplits(@Nullable SplitHintSpec splitHintSpec) - { - throw new UnsupportedOperationException(); - } + inputSource = new RecordSupplierInputSource<>( + ioConfig.getStream(), + recordSupplier, + ioConfig.isUseEarliestSequenceNumber() + ); + inputFormat = Preconditions.checkNotNull( + ioConfig.getInputFormat(), + "[spec.ioConfig.inputFormat] is required" + ); - @Override - public FiniteFirehoseFactory withSplit(InputSplit split) - { - throw new UnsupportedOperationException(); - } + return inputSourceSampler.sample(inputSource, inputFormat, dataSchema, samplerConfig); } - private class SeekableStreamSamplerFirehose implements Firehose - { - private final InputRowParser parser; - private final CloseableIterator entityIterator; - - protected SeekableStreamSamplerFirehose(InputRowParser parser) - { - this.parser = parser; - if (parser instanceof StringInputRowParser) { - ((StringInputRowParser) parser).startFileFromBeginning(); - } - - RecordSupplierInputSource inputSource = new RecordSupplierInputSource<>( - ioConfig.getStream(), - createRecordSupplier(), - ioConfig.isUseEarliestSequenceNumber() - ); - this.entityIterator = inputSource.createEntityIterator(); - } - - @Override - public boolean hasMore() - { - return entityIterator.hasNext(); - } - - @Override - public InputRow nextRow() - { - throw new UnsupportedOperationException(); - } - - @Override - public InputRowListPlusRawValues nextRowWithRaw() - { - final ByteBuffer bb = ((ByteEntity) entityIterator.next()).getBuffer(); - - final Map rawColumns; - try { - if (parser instanceof StringInputRowParser) { - rawColumns = ((StringInputRowParser) parser).buildStringKeyMap(bb); - } else { - rawColumns = null; - } - } - catch (ParseException e) { - return InputRowListPlusRawValues.of(null, e); - } - - try { - final List rows = parser.parseBatch(bb); - return InputRowListPlusRawValues.of(rows.isEmpty() ? null : rows, rawColumns); - } - catch (ParseException e) { - return InputRowListPlusRawValues.of(rawColumns, e); - } - } + protected abstract RecordSupplier createRecordSupplier(); - @Override - public void close() throws IOException - { - entityIterator.close(); - } - } } diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/TestRealtimeTask.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/TestIndexTask.java similarity index 62% rename from indexing-service/src/test/java/org/apache/druid/indexing/common/TestRealtimeTask.java rename to indexing-service/src/test/java/org/apache/druid/indexing/common/TestIndexTask.java index e8975e39ff43..72b0a610156f 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/TestRealtimeTask.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/TestIndexTask.java @@ -24,26 +24,27 @@ import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.annotation.JsonTypeName; import com.fasterxml.jackson.databind.ObjectMapper; +import org.apache.druid.data.input.impl.JsonInputFormat; +import org.apache.druid.data.input.impl.LocalInputSource; import org.apache.druid.indexer.TaskStatus; -import org.apache.druid.indexing.common.task.RealtimeIndexTask; +import org.apache.druid.indexer.partitions.DynamicPartitionsSpec; +import org.apache.druid.indexing.common.task.IndexTask; import org.apache.druid.indexing.common.task.TaskResource; import org.apache.druid.query.aggregation.AggregatorFactory; +import org.apache.druid.segment.IndexSpec; import org.apache.druid.segment.indexing.DataSchema; -import org.apache.druid.segment.indexing.RealtimeIOConfig; -import org.apache.druid.segment.realtime.FireDepartment; -import org.apache.druid.segment.realtime.firehose.LocalFirehoseFactory; import java.io.File; /** */ -@JsonTypeName("test_realtime") -public class TestRealtimeTask extends RealtimeIndexTask +@JsonTypeName("test_index") +public class TestIndexTask extends IndexTask { private final TaskStatus status; @JsonCreator - public TestRealtimeTask( + public TestIndexTask( @JsonProperty("id") String id, @JsonProperty("resource") TaskResource taskResource, @JsonProperty("dataSource") String dataSource, @@ -54,13 +55,42 @@ public TestRealtimeTask( super( id, taskResource, - new FireDepartment( + new IndexIngestionSpec( new DataSchema(dataSource, null, new AggregatorFactory[]{}, null, null, mapper), - new RealtimeIOConfig( - new LocalFirehoseFactory(new File("lol"), "rofl", null), - (schema, config, metrics) -> null + new IndexTask.IndexIOConfig( + null, + new LocalInputSource(new File("lol"), "rofl"), + new JsonInputFormat(null, null, null), + false, + false ), - null + + new IndexTask.IndexTuningConfig( + null, + null, + null, + 10, + null, + null, + null, + null, + null, + null, + new DynamicPartitionsSpec(10000, null), + new IndexSpec(), + null, + 3, + false, + null, + null, + null, + null, + null, + null, + null, + null, + null + ) ), null ); diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskRunTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskRunTest.java index b0ddf61c5602..7ad65b1780c3 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskRunTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskRunTest.java @@ -32,7 +32,6 @@ import org.apache.druid.data.input.impl.CSVParseSpec; import org.apache.druid.data.input.impl.DimensionsSpec; import org.apache.druid.data.input.impl.ParseSpec; -import org.apache.druid.data.input.impl.StringInputRowParser; import org.apache.druid.data.input.impl.TimestampSpec; import org.apache.druid.indexer.TaskState; import org.apache.druid.indexer.TaskStatus; @@ -47,7 +46,6 @@ import org.apache.druid.indexing.common.config.TaskConfig; import org.apache.druid.indexing.common.task.CompactionTask.Builder; import org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexTuningConfig; -import org.apache.druid.indexing.firehose.IngestSegmentFirehoseFactory; import org.apache.druid.indexing.overlord.Segments; import org.apache.druid.jackson.DefaultObjectMapper; import org.apache.druid.java.util.common.ISE; @@ -70,7 +68,6 @@ import org.apache.druid.segment.IndexSpec; import org.apache.druid.segment.QueryableIndexStorageAdapter; import org.apache.druid.segment.VirtualColumns; -import org.apache.druid.segment.indexing.DataSchema; import org.apache.druid.segment.indexing.granularity.UniformGranularitySpec; import org.apache.druid.segment.join.NoopJoinableFactory; import org.apache.druid.segment.loading.LocalDataSegmentPuller; @@ -1488,96 +1485,6 @@ public void testRunIndexAndCompactForSameSegmentAtTheSameTime2() throws Exceptio Assert.assertEquals(TaskState.FAILED, compactionResult.lhs.getStatusCode()); } - /** - * Run a regular index task that's equivalent to the compaction task in {@link #testRunWithDynamicPartitioning()}, - * using {@link IngestSegmentFirehoseFactory}. - *

- * This is not entirely CompactionTask related, but it's similar conceptually and it requires - * similar setup to what this test suite already has. - *

- * It could be moved to a separate test class if needed. - */ - @Test - public void testRunRegularIndexTaskWithIngestSegmentFirehose() throws Exception - { - runIndexTask(); - - IndexTask indexTask = new IndexTask( - null, - null, - new IndexTask.IndexIngestionSpec( - new DataSchema( - "test", - getObjectMapper().convertValue( - new StringInputRowParser( - DEFAULT_PARSE_SPEC, - null - ), - Map.class - ), - new AggregatorFactory[]{ - new LongSumAggregatorFactory("val", "val") - }, - new UniformGranularitySpec( - Granularities.HOUR, - Granularities.MINUTE, - null - ), - null, - getObjectMapper() - ), - new IndexTask.IndexIOConfig( - new IngestSegmentFirehoseFactory( - DATA_SOURCE, - Intervals.of("2014-01-01/2014-01-02"), - null, - null, - null, - null, - null, - getIndexIO(), - coordinatorClient, - segmentCacheManagerFactory, - RETRY_POLICY_FACTORY - ), - false, - false - ), - IndexTaskTest.createTuningConfig(5000000, null, null, Long.MAX_VALUE, null, false, true) - ), - null - ); - - // This is a regular index so we need to explicitly add this context to store the CompactionState - indexTask.addToContext(Tasks.STORE_COMPACTION_STATE_KEY, true); - - final Pair> resultPair = runTask(indexTask); - - Assert.assertTrue(resultPair.lhs.isSuccess()); - - final List segments = resultPair.rhs; - Assert.assertEquals(3, segments.size()); - - for (int i = 0; i < 3; i++) { - Assert.assertEquals( - Intervals.of("2014-01-01T0%d:00:00/2014-01-01T0%d:00:00", i, i + 1), - segments.get(i).getInterval() - ); - Assert.assertEquals( - getDefaultCompactionState(Granularities.HOUR, Granularities.MINUTE, ImmutableList.of()), - segments.get(i).getLastCompactionState() - ); - if (lockGranularity == LockGranularity.SEGMENT) { - Assert.assertEquals( - new NumberedOverwriteShardSpec(32768, 0, 2, (short) 1, (short) 1), - segments.get(i).getShardSpec() - ); - } else { - Assert.assertEquals(new NumberedShardSpec(0, 1), segments.get(i).getShardSpec()); - } - } - } - private Pair> runIndexTask() throws Exception { return runIndexTask(null, null, false); diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IndexIngestionSpecTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IndexIngestionSpecTest.java index 6af2e194c1f3..7555c6b1e378 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IndexIngestionSpecTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IndexIngestionSpecTest.java @@ -20,11 +20,8 @@ package org.apache.druid.indexing.common.task; import com.google.common.collect.ImmutableMap; -import org.apache.druid.data.input.impl.DimensionsSpec; -import org.apache.druid.data.input.impl.NoopFirehoseFactory; import org.apache.druid.data.input.impl.NoopInputFormat; import org.apache.druid.data.input.impl.NoopInputSource; -import org.apache.druid.data.input.impl.TimestampSpec; import org.apache.druid.indexing.common.task.IndexTask.IndexIOConfig; import org.apache.druid.indexing.common.task.IndexTask.IndexIngestionSpec; import org.apache.druid.java.util.common.granularity.Granularities; @@ -91,56 +88,4 @@ public void testParserAndInputSource() null ); } - - @Test - public void testFirehoseAndInputSource() - { - expectedException.expect(IllegalArgumentException.class); - expectedException.expectMessage( - "At most one of [Property{name='firehose', value=NoopFirehoseFactory{}}, Property{name='inputSource'" - ); - final IndexIngestionSpec spec = new IndexIngestionSpec( - new DataSchema( - "dataSource", - new TimestampSpec(null, null, null), - DimensionsSpec.EMPTY, - new AggregatorFactory[0], - new ArbitraryGranularitySpec(Granularities.NONE, null), - null - ), - new IndexIOConfig( - new NoopFirehoseFactory(), - new NoopInputSource(), - null, - null, - null - ), - null - ); - } - - @Test - public void testFirehoseAndInputFormat() - { - expectedException.expect(IllegalArgumentException.class); - expectedException.expectMessage("Cannot use firehose and inputFormat together."); - final IndexIngestionSpec spec = new IndexIngestionSpec( - new DataSchema( - "dataSource", - new TimestampSpec(null, null, null), - DimensionsSpec.EMPTY, - new AggregatorFactory[0], - new ArbitraryGranularitySpec(Granularities.NONE, null), - null - ), - new IndexIOConfig( - new NoopFirehoseFactory(), - null, - new NoopInputFormat(), - null, - null - ), - null - ); - } } diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IndexTaskTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IndexTaskTest.java index babc3ff1fd2c..49bc6399dbd4 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IndexTaskTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IndexTaskTest.java @@ -38,7 +38,6 @@ import org.apache.druid.data.input.impl.LongDimensionSchema; import org.apache.druid.data.input.impl.ParseSpec; import org.apache.druid.data.input.impl.StringDimensionSchema; -import org.apache.druid.data.input.impl.StringInputRowParser; import org.apache.druid.data.input.impl.TimestampSpec; import org.apache.druid.indexer.TaskState; import org.apache.druid.indexer.TaskStatus; @@ -87,7 +86,6 @@ import org.apache.druid.segment.loading.SegmentLocalCacheManager; import org.apache.druid.segment.loading.StorageLocationConfig; import org.apache.druid.segment.realtime.appenderator.AppenderatorsManager; -import org.apache.druid.segment.realtime.firehose.LocalFirehoseFactory; import org.apache.druid.segment.realtime.firehose.WindowedStorageAdapter; import org.apache.druid.segment.realtime.plumber.NoopSegmentHandoffNotifierFactory; import org.apache.druid.segment.transform.ExpressionTransform; @@ -958,7 +956,9 @@ public void testCSVFileWithHeaderColumnOverride() throws Exception ingestionSpec = createIngestionSpec( jsonMapper, tmpDir, - new CSVParseSpec(timestampSpec, DimensionsSpec.EMPTY, null, columns, true, 0), + timestampSpec, + DimensionsSpec.EMPTY, + new CsvInputFormat(columns, null, null, true, 0), null, null, tuningConfig, @@ -969,9 +969,7 @@ public void testCSVFileWithHeaderColumnOverride() throws Exception ingestionSpec = createIngestionSpec( jsonMapper, tmpDir, - timestampSpec, - DimensionsSpec.EMPTY, - new CsvInputFormat(columns, null, null, true, 0), + new CSVParseSpec(timestampSpec, DimensionsSpec.EMPTY, null, columns, true, 0), null, null, tuningConfig, @@ -1486,12 +1484,6 @@ public void testReportParseException() throws Exception false, false ); - expectedMessages = ImmutableList.of( - StringUtils.format( - "Timestamp[unparseable] is unparseable! Event: {time=unparseable, d=a, val=1} (Path: %s, Record: 1, Line: 2)", - tmpFile.toURI() - ) - ); } else { indexIngestionSpec = createIngestionSpec( jsonMapper, @@ -1503,11 +1495,14 @@ public void testReportParseException() throws Exception false, false ); - expectedMessages = ImmutableList.of( - "Timestamp[unparseable] is unparseable! Event: {time=unparseable, d=a, val=1}" - ); } + expectedMessages = ImmutableList.of( + StringUtils.format( + "Timestamp[unparseable] is unparseable! Event: {time=unparseable, d=a, val=1} (Path: %s, Record: 1, Line: 2)", + tmpFile.toURI() + ) + ); IndexTask indexTask = new IndexTask( null, null, @@ -1661,36 +1656,24 @@ public void testMultipleParseExceptionsSuccess() throws Exception .get(RowIngestionMeters.BUILD_SEGMENTS); List expectedMessages; - if (useInputFormatApi) { - expectedMessages = Arrays.asList( - StringUtils.format("Unable to parse row [this is not JSON] (Path: %s, Record: 6, Line: 9)", tmpFile.toURI()), - StringUtils.format( - "Timestamp[99999999999-01-01T00:00:10Z] is unparseable! Event: {time=99999999999-01-01T00:00:10Z, dim=b, dimLong=2, dimFloat=3.0, val=1} (Path: %s, Record: 6, Line: 8)", - tmpFile.toURI() - ), - StringUtils.format( - "Unable to parse row [{\"time\":9.0x,\"dim\":\"a\",\"dimLong\":2,\"dimFloat\":3.0,\"val\":1}] (Path: %s, Record: 5, Line: 6)", - tmpFile.toURI() - ), - "Unable to parse value[notnumber] for field[val]", - "could not convert value [notnumber] to float", - "could not convert value [notnumber] to long", - StringUtils.format( - "Timestamp[unparseable] is unparseable! Event: {time=unparseable, dim=a, dimLong=2, dimFloat=3.0, val=1} (Path: %s, Record: 1, Line: 1)", - tmpFile.toURI() - ) - ); - } else { - expectedMessages = Arrays.asList( - "Unable to parse row [this is not JSON]", - "Timestamp[99999999999-01-01T00:00:10Z] is unparseable! Event: {time=99999999999-01-01T00:00:10Z, dim=b, dimLong=2, dimFloat=3.0, val=1}", - "Unable to parse row [{\"time\":9.0x,\"dim\":\"a\",\"dimLong\":2,\"dimFloat\":3.0,\"val\":1}]", - "Unable to parse value[notnumber] for field[val]", - "could not convert value [notnumber] to float", - "could not convert value [notnumber] to long", - "Timestamp[unparseable] is unparseable! Event: {time=unparseable, dim=a, dimLong=2, dimFloat=3.0, val=1}" - ); - } + expectedMessages = Arrays.asList( + StringUtils.format("Unable to parse row [this is not JSON] (Path: %s, Record: 6, Line: 9)", tmpFile.toURI()), + StringUtils.format( + "Timestamp[99999999999-01-01T00:00:10Z] is unparseable! Event: {time=99999999999-01-01T00:00:10Z, dim=b, dimLong=2, dimFloat=3.0, val=1} (Path: %s, Record: 6, Line: 8)", + tmpFile.toURI() + ), + StringUtils.format( + "Unable to parse row [{\"time\":9.0x,\"dim\":\"a\",\"dimLong\":2,\"dimFloat\":3.0,\"val\":1}] (Path: %s, Record: 5, Line: 6)", + tmpFile.toURI() + ), + "Unable to parse value[notnumber] for field[val]", + "could not convert value [notnumber] to float", + "could not convert value [notnumber] to long", + StringUtils.format( + "Timestamp[unparseable] is unparseable! Event: {time=unparseable, dim=a, dimLong=2, dimFloat=3.0, val=1} (Path: %s, Record: 1, Line: 1)", + tmpFile.toURI() + ) + ); List actualMessages = parseExceptionReports.stream().map((r) -> { return ((List) r.get("details")).get(0); @@ -1715,30 +1698,21 @@ public void testMultipleParseExceptionsSuccess() throws Exception .getUnparseableEvents() .get(RowIngestionMeters.DETERMINE_PARTITIONS); - if (useInputFormatApi) { - expectedMessages = Arrays.asList( - StringUtils.format("Unable to parse row [this is not JSON] (Path: %s, Record: 6, Line: 9)", tmpFile.toURI()), - StringUtils.format( - "Timestamp[99999999999-01-01T00:00:10Z] is unparseable! Event: {time=99999999999-01-01T00:00:10Z, dim=b, dimLong=2, dimFloat=3.0, val=1} (Path: %s, Record: 6, Line: 8)", - tmpFile.toURI() - ), - StringUtils.format( - "Unable to parse row [{\"time\":9.0x,\"dim\":\"a\",\"dimLong\":2,\"dimFloat\":3.0,\"val\":1}] (Path: %s, Record: 5, Line: 6)", - tmpFile.toURI() - ), - StringUtils.format( - "Timestamp[unparseable] is unparseable! Event: {time=unparseable, dim=a, dimLong=2, dimFloat=3.0, val=1} (Path: %s, Record: 1, Line: 1)", - tmpFile.toURI() - ) - ); - } else { - expectedMessages = Arrays.asList( - "Unable to parse row [this is not JSON]", - "Timestamp[99999999999-01-01T00:00:10Z] is unparseable! Event: {time=99999999999-01-01T00:00:10Z, dim=b, dimLong=2, dimFloat=3.0, val=1}", - "Unable to parse row [{\"time\":9.0x,\"dim\":\"a\",\"dimLong\":2,\"dimFloat\":3.0,\"val\":1}]", - "Timestamp[unparseable] is unparseable! Event: {time=unparseable, dim=a, dimLong=2, dimFloat=3.0, val=1}" - ); - } + expectedMessages = Arrays.asList( + StringUtils.format("Unable to parse row [this is not JSON] (Path: %s, Record: 6, Line: 9)", tmpFile.toURI()), + StringUtils.format( + "Timestamp[99999999999-01-01T00:00:10Z] is unparseable! Event: {time=99999999999-01-01T00:00:10Z, dim=b, dimLong=2, dimFloat=3.0, val=1} (Path: %s, Record: 6, Line: 8)", + tmpFile.toURI() + ), + StringUtils.format( + "Unable to parse row [{\"time\":9.0x,\"dim\":\"a\",\"dimLong\":2,\"dimFloat\":3.0,\"val\":1}] (Path: %s, Record: 5, Line: 6)", + tmpFile.toURI() + ), + StringUtils.format( + "Timestamp[unparseable] is unparseable! Event: {time=unparseable, dim=a, dimLong=2, dimFloat=3.0, val=1} (Path: %s, Record: 1, Line: 1)", + tmpFile.toURI() + ) + ); actualMessages = parseExceptionReports.stream().map((r) -> { return ((List) r.get("details")).get(0); @@ -1827,20 +1801,6 @@ public void testMultipleParseExceptionsFailure() throws Exception false, false ); - expectedMessages = Arrays.asList( - StringUtils.format( - "Timestamp[99999999999-01-01T00:00:10Z] is unparseable! Event: {time=99999999999-01-01T00:00:10Z, dim=b, dimLong=2, dimFloat=3.0, val=1} (Path: %s, Record: 3, Line: 6)", - tmpFile.toURI() - ), - StringUtils.format( - "Timestamp[9.0] is unparseable! Event: {time=9.0, dim=a, dimLong=2, dimFloat=3.0, val=1} (Path: %s, Record: 2, Line: 4)", - tmpFile.toURI() - ), - StringUtils.format( - "Timestamp[unparseable] is unparseable! Event: {time=unparseable, dim=a, dimLong=2, dimFloat=3.0, val=1} (Path: %s, Record: 1, Line: 2)", - tmpFile.toURI() - ) - ); } else { ingestionSpec = createIngestionSpec( jsonMapper, @@ -1852,13 +1812,22 @@ public void testMultipleParseExceptionsFailure() throws Exception false, false ); - expectedMessages = Arrays.asList( - "Timestamp[99999999999-01-01T00:00:10Z] is unparseable! Event: {time=99999999999-01-01T00:00:10Z, dim=b, dimLong=2, dimFloat=3.0, val=1}", - "Timestamp[9.0] is unparseable! Event: {time=9.0, dim=a, dimLong=2, dimFloat=3.0, val=1}", - "Timestamp[unparseable] is unparseable! Event: {time=unparseable, dim=a, dimLong=2, dimFloat=3.0, val=1}" - ); } + expectedMessages = Arrays.asList( + StringUtils.format( + "Timestamp[99999999999-01-01T00:00:10Z] is unparseable! Event: {time=99999999999-01-01T00:00:10Z, dim=b, dimLong=2, dimFloat=3.0, val=1} (Path: %s, Record: 3, Line: 6)", + tmpFile.toURI() + ), + StringUtils.format( + "Timestamp[9.0] is unparseable! Event: {time=9.0, dim=a, dimLong=2, dimFloat=3.0, val=1} (Path: %s, Record: 2, Line: 4)", + tmpFile.toURI() + ), + StringUtils.format( + "Timestamp[unparseable] is unparseable! Event: {time=unparseable, dim=a, dimLong=2, dimFloat=3.0, val=1} (Path: %s, Record: 1, Line: 2)", + tmpFile.toURI() + ) + ); IndexTask indexTask = new IndexTask( null, null, @@ -1885,7 +1854,7 @@ public void testMultipleParseExceptionsFailure() throws Exception RowIngestionMeters.PROCESSED_WITH_ERROR, 0, RowIngestionMeters.PROCESSED, 1, RowIngestionMeters.UNPARSEABLE, 3, - RowIngestionMeters.THROWN_AWAY, useInputFormatApi ? 1 : 2 + RowIngestionMeters.THROWN_AWAY, 1 ) ); @@ -1980,11 +1949,6 @@ public void testMultipleParseExceptionsFailureAtDeterminePartitions() throws Exc false, false ); - expectedMessages = Arrays.asList( - StringUtils.format("Timestamp[99999999999-01-01T00:00:10Z] is unparseable! Event: {time=99999999999-01-01T00:00:10Z, dim=b, dimLong=2, dimFloat=3.0, val=1} (Path: %s, Record: 3, Line: 6)", tmpFile.toURI()), - StringUtils.format("Timestamp[9.0] is unparseable! Event: {time=9.0, dim=a, dimLong=2, dimFloat=3.0, val=1} (Path: %s, Record: 2, Line: 4)", tmpFile.toURI()), - StringUtils.format("Timestamp[unparseable] is unparseable! Event: {time=unparseable, dim=a, dimLong=2, dimFloat=3.0, val=1} (Path: %s, Record: 1, Line: 2)", tmpFile.toURI()) - ); } else { ingestionSpec = createIngestionSpec( jsonMapper, @@ -1996,13 +1960,13 @@ public void testMultipleParseExceptionsFailureAtDeterminePartitions() throws Exc false, false ); - expectedMessages = Arrays.asList( - "Timestamp[99999999999-01-01T00:00:10Z] is unparseable! Event: {time=99999999999-01-01T00:00:10Z, dim=b, dimLong=2, dimFloat=3.0, val=1}", - "Timestamp[9.0] is unparseable! Event: {time=9.0, dim=a, dimLong=2, dimFloat=3.0, val=1}", - "Timestamp[unparseable] is unparseable! Event: {time=unparseable, dim=a, dimLong=2, dimFloat=3.0, val=1}" - ); } + expectedMessages = Arrays.asList( + StringUtils.format("Timestamp[99999999999-01-01T00:00:10Z] is unparseable! Event: {time=99999999999-01-01T00:00:10Z, dim=b, dimLong=2, dimFloat=3.0, val=1} (Path: %s, Record: 3, Line: 6)", tmpFile.toURI()), + StringUtils.format("Timestamp[9.0] is unparseable! Event: {time=9.0, dim=a, dimLong=2, dimFloat=3.0, val=1} (Path: %s, Record: 2, Line: 4)", tmpFile.toURI()), + StringUtils.format("Timestamp[unparseable] is unparseable! Event: {time=unparseable, dim=a, dimLong=2, dimFloat=3.0, val=1} (Path: %s, Record: 1, Line: 2)", tmpFile.toURI()) + ); IndexTask indexTask = new IndexTask( null, null, @@ -2022,7 +1986,7 @@ public void testMultipleParseExceptionsFailureAtDeterminePartitions() throws Exc RowIngestionMeters.PROCESSED_WITH_ERROR, 0, RowIngestionMeters.PROCESSED, 1, RowIngestionMeters.UNPARSEABLE, 3, - RowIngestionMeters.THROWN_AWAY, useInputFormatApi ? 1 : 2 + RowIngestionMeters.THROWN_AWAY, 1 ), RowIngestionMeters.BUILD_SEGMENTS, ImmutableMap.of( @@ -2172,12 +2136,6 @@ public void testCsvWithHeaderOfEmptyTimestamp() throws Exception false, false ); - expectedMessages = ImmutableList.of( - StringUtils.format( - "Timestamp[null] is unparseable! Event: {column_1=2014-01-01T00:00:10Z, column_2=a, column_3=1} (Path: %s, Record: 1, Line: 2)", - tmpFile.toURI() - ) - ); } else { ingestionSpec = createIngestionSpec( jsonMapper, @@ -2189,11 +2147,14 @@ public void testCsvWithHeaderOfEmptyTimestamp() throws Exception false, false ); - expectedMessages = ImmutableList.of( - "Timestamp[null] is unparseable! Event: {column_1=2014-01-01T00:00:10Z, column_2=a, column_3=1}" - ); } + expectedMessages = ImmutableList.of( + StringUtils.format( + "Timestamp[null] is unparseable! Event: {column_1=2014-01-01T00:00:10Z, column_2=a, column_3=1} (Path: %s, Record: 1, Line: 2)", + tmpFile.toURI() + ) + ); IndexTask indexTask = new IndexTask( null, null, @@ -2954,16 +2915,12 @@ private static IndexIngestionSpec createIngestionSpec( tuningConfig ); } else { + parseSpec = parseSpec != null ? parseSpec : DEFAULT_PARSE_SPEC; return new IndexIngestionSpec( new DataSchema( DATASOURCE, - objectMapper.convertValue( - new StringInputRowParser( - parseSpec != null ? parseSpec : DEFAULT_PARSE_SPEC, - null - ), - Map.class - ), + parseSpec.getTimestampSpec(), + parseSpec.getDimensionsSpec(), new AggregatorFactory[]{ new LongSumAggregatorFactory("val", "val") }, @@ -2973,14 +2930,13 @@ private static IndexIngestionSpec createIngestionSpec( Collections.singletonList(Intervals.of("2014/2015")) ), transformSpec, + null, objectMapper ), new IndexIOConfig( - new LocalFirehoseFactory( - baseDir, - "druid*", - null - ), + null, + new LocalInputSource(baseDir, "druid*"), + createInputFormatFromParseSpec(parseSpec), appendToExisting, dropExisting ), diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IngestionTestBase.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IngestionTestBase.java index c6d10f2b87e3..11196d86876b 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IngestionTestBase.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IngestionTestBase.java @@ -23,6 +23,18 @@ import com.google.common.base.Optional; import com.google.common.util.concurrent.Futures; import com.google.common.util.concurrent.ListenableFuture; +import org.apache.druid.data.input.FirehoseFactory; +import org.apache.druid.data.input.InputFormat; +import org.apache.druid.data.input.impl.CSVParseSpec; +import org.apache.druid.data.input.impl.CsvInputFormat; +import org.apache.druid.data.input.impl.DelimitedInputFormat; +import org.apache.druid.data.input.impl.DelimitedParseSpec; +import org.apache.druid.data.input.impl.InputRowParser; +import org.apache.druid.data.input.impl.JSONParseSpec; +import org.apache.druid.data.input.impl.JsonInputFormat; +import org.apache.druid.data.input.impl.ParseSpec; +import org.apache.druid.data.input.impl.RegexInputFormat; +import org.apache.druid.data.input.impl.RegexParseSpec; import org.apache.druid.indexer.TaskStatus; import org.apache.druid.indexing.common.SegmentCacheManagerFactory; import org.apache.druid.indexing.common.SingleFileTaskReportFileWriter; @@ -46,6 +58,7 @@ import org.apache.druid.indexing.overlord.autoscaling.ScalingStats; import org.apache.druid.java.util.common.ISE; import org.apache.druid.java.util.common.Pair; +import org.apache.druid.java.util.common.RE; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.java.util.emitter.EmittingLogger; import org.apache.druid.metadata.EntryExistsException; @@ -220,6 +233,50 @@ public IndexMergerV9Factory getIndexMergerV9Factory() return testUtils.getIndexMergerV9Factory(); } + /** + * Converts ParseSpec to InputFormat for indexing tests. To be used until {@link FirehoseFactory} + * & {@link InputRowParser} is deprecated and removed. + * + * @param parseSpec + * @return + */ + public static InputFormat createInputFormatFromParseSpec(ParseSpec parseSpec) + { + if (parseSpec instanceof JSONParseSpec) { + JSONParseSpec jsonParseSpec = (JSONParseSpec) parseSpec; + return new JsonInputFormat(jsonParseSpec.getFlattenSpec(), jsonParseSpec.getFeatureSpec(), jsonParseSpec.getKeepNullColumns()); + } else if (parseSpec instanceof CSVParseSpec) { + CSVParseSpec csvParseSpec = (CSVParseSpec) parseSpec; + boolean getColumnsFromHeader = csvParseSpec.isHasHeaderRow() && csvParseSpec.getSkipHeaderRows() == 0; + return new CsvInputFormat( + csvParseSpec.getColumns(), + csvParseSpec.getListDelimiter(), + getColumnsFromHeader ? null : true, + getColumnsFromHeader ? true : null, + csvParseSpec.getSkipHeaderRows() + ); + } else if (parseSpec instanceof DelimitedParseSpec) { + DelimitedParseSpec delimitedParseSpec = (DelimitedParseSpec) parseSpec; + boolean getColumnsFromHeader = delimitedParseSpec.isHasHeaderRow() && delimitedParseSpec.getSkipHeaderRows() == 0; + return new DelimitedInputFormat( + delimitedParseSpec.getColumns(), + delimitedParseSpec.getListDelimiter(), + delimitedParseSpec.getDelimiter(), + getColumnsFromHeader ? null : true, + getColumnsFromHeader ? true : null, + delimitedParseSpec.getSkipHeaderRows() + ); + } else if (parseSpec instanceof RegexParseSpec) { + RegexParseSpec regexParseSpec = (RegexParseSpec) parseSpec; + return new RegexInputFormat( + regexParseSpec.getPattern(), + regexParseSpec.getListDelimiter(), + regexParseSpec.getColumns()); + } else { + throw new RE(StringUtils.format("Unsupported ParseSpec format %s", parseSpec.toString())); + } + } + public class TestLocalTaskActionClientFactory implements TaskActionClientFactory { @Override diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/TaskSerdeTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/TaskSerdeTest.java index 499ba22bb19f..7509d55ddbd2 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/TaskSerdeTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/TaskSerdeTest.java @@ -24,7 +24,10 @@ import com.fasterxml.jackson.databind.jsontype.NamedType; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; +import org.apache.druid.data.input.Firehose; +import org.apache.druid.data.input.FirehoseFactory; import org.apache.druid.data.input.impl.DimensionsSpec; +import org.apache.druid.data.input.impl.InputRowParser; import org.apache.druid.data.input.impl.LocalInputSource; import org.apache.druid.data.input.impl.NoopInputFormat; import org.apache.druid.data.input.impl.TimestampSpec; @@ -49,7 +52,6 @@ import org.apache.druid.segment.indexing.RealtimeTuningConfig; import org.apache.druid.segment.indexing.granularity.UniformGranularitySpec; import org.apache.druid.segment.realtime.FireDepartment; -import org.apache.druid.segment.realtime.firehose.LocalFirehoseFactory; import org.apache.druid.server.security.AuthTestUtils; import org.apache.druid.timeline.partition.NoneShardSpec; import org.hamcrest.CoreMatchers; @@ -402,7 +404,7 @@ public void testRealtimeIndexTaskSerde() throws Exception jsonMapper ), new RealtimeIOConfig( - new LocalFirehoseFactory(new File("lol"), "rofl", null), + new MockFirehoseFactory(), (schema, config, metrics) -> null ), @@ -432,6 +434,9 @@ public void testRealtimeIndexTaskSerde() throws Exception null ); + jsonMapper.registerSubtypes( + new NamedType(MockFirehoseFactory.class, "mockFirehoseFactory") + ); final String json = jsonMapper.writeValueAsString(task); Thread.sleep(100); // Just want to run the clock a bit to make sure the task id doesn't change @@ -587,4 +592,13 @@ public void testHadoopIndexTaskSerde() throws Exception Assert.assertEquals("blah", task.getClasspathPrefix()); Assert.assertEquals("blah", task2.getClasspathPrefix()); } + + private static class MockFirehoseFactory implements FirehoseFactory + { + @Override + public Firehose connect(InputRowParser parser, File temporaryDirectory) + { + return null; + } + } } diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/AbstractMultiPhaseParallelIndexingTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/AbstractMultiPhaseParallelIndexingTest.java index 38f70408232d..9c6733d2543e 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/AbstractMultiPhaseParallelIndexingTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/AbstractMultiPhaseParallelIndexingTest.java @@ -25,7 +25,6 @@ import org.apache.druid.data.input.impl.DimensionsSpec; import org.apache.druid.data.input.impl.LocalInputSource; import org.apache.druid.data.input.impl.ParseSpec; -import org.apache.druid.data.input.impl.StringInputRowParser; import org.apache.druid.data.input.impl.TimestampSpec; import org.apache.druid.indexer.TaskState; import org.apache.druid.indexer.TaskStatus; @@ -59,7 +58,6 @@ import org.apache.druid.segment.loading.SegmentLoadingException; import org.apache.druid.segment.loading.SegmentLocalCacheLoader; import org.apache.druid.segment.loading.TombstoneLoadSpec; -import org.apache.druid.segment.realtime.firehose.LocalFirehoseFactory; import org.apache.druid.timeline.DataSegment; import org.joda.time.Interval; import org.junit.Assert; @@ -243,22 +241,20 @@ protected ParallelIndexSupervisorTask createTask( } else { Preconditions.checkArgument(inputFormat == null); ParallelIndexIOConfig ioConfig = new ParallelIndexIOConfig( - new LocalFirehoseFactory(inputDirectory, filter, null), + null, + new LocalInputSource(inputDirectory, filter), + createInputFormatFromParseSpec(parseSpec), appendToExisting, dropExisting ); - //noinspection unchecked ingestionSpec = new ParallelIndexIngestionSpec( new DataSchema( - "dataSource", - getObjectMapper().convertValue( - new StringInputRowParser(parseSpec, null), - Map.class - ), + DATASOURCE, + parseSpec.getTimestampSpec(), + parseSpec.getDimensionsSpec(), DEFAULT_METRICS_SPEC, granularitySpec, - null, - getObjectMapper() + null ), ioConfig, tuningConfig diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/AbstractParallelIndexSupervisorTaskTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/AbstractParallelIndexSupervisorTaskTest.java index ecb00917a9b3..94fb3baba092 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/AbstractParallelIndexSupervisorTaskTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/AbstractParallelIndexSupervisorTaskTest.java @@ -888,8 +888,10 @@ protected void compareTaskReports( Map expectedPayload = (Map) expectedReports.get("payload"); Map actualPayload = (Map) actualReports.get("payload"); Assert.assertEquals(expectedPayload.get("ingestionState"), actualPayload.get("ingestionState")); - Assert.assertEquals(expectedPayload.get("rowStats"), actualPayload.get("rowStats")); - Assert.assertEquals(expectedPayload.get("ingestionState"), actualPayload.get("ingestionState")); + + Map expectedTotals = (Map) expectedPayload.get("totals"); + Map actualTotals = (Map) actualReports.get("totals"); + Assert.assertEquals(expectedTotals, actualTotals); List expectedParseExceptionReports = (List) ((Map) diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/HashPartitionTaskKillTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/HashPartitionTaskKillTest.java index 536844f226ae..7dc464093ee2 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/HashPartitionTaskKillTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/HashPartitionTaskKillTest.java @@ -47,7 +47,6 @@ import org.apache.druid.segment.indexing.DataSchema; import org.apache.druid.segment.indexing.granularity.GranularitySpec; import org.apache.druid.segment.indexing.granularity.UniformGranularitySpec; -import org.apache.druid.segment.realtime.firehose.LocalFirehoseFactory; import org.joda.time.Interval; import org.junit.Assert; import org.junit.Before; @@ -267,8 +266,11 @@ private ParallelIndexSupervisorTask createTestTask( } else { Preconditions.checkArgument(inputFormat == null); ParallelIndexIOConfig ioConfig = new ParallelIndexIOConfig( - new LocalFirehoseFactory(inputDir, filter, null), - appendToExisting + null, + new LocalInputSource(inputDir, filter), + createInputFormatFromParseSpec(parseSpec), + appendToExisting, + null ); //noinspection unchecked ingestionSpec = new ParallelIndexIngestionSpec( diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTaskSerdeTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTaskSerdeTest.java index 7d8d681c4ab3..6c83c225fa9f 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTaskSerdeTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTaskSerdeTest.java @@ -20,7 +20,6 @@ package org.apache.druid.indexing.common.task.batch.parallel; import com.fasterxml.jackson.databind.ObjectMapper; -import com.fasterxml.jackson.databind.jsontype.NamedType; import org.apache.druid.common.config.NullHandling; import org.apache.druid.data.input.impl.CsvInputFormat; import org.apache.druid.data.input.impl.DimensionsSpec; @@ -38,7 +37,6 @@ import org.apache.druid.query.aggregation.LongSumAggregatorFactory; import org.apache.druid.segment.indexing.DataSchema; import org.apache.druid.segment.indexing.granularity.UniformGranularitySpec; -import org.apache.druid.segment.realtime.firehose.LocalFirehoseFactory; import org.hamcrest.CoreMatchers; import org.joda.time.Interval; import org.junit.Assert; @@ -67,9 +65,6 @@ private static ObjectMapper createObjectMapper() { TestUtils testUtils = new TestUtils(); ObjectMapper objectMapper = testUtils.getTestObjectMapper(); - objectMapper.registerSubtypes( - new NamedType(LocalFirehoseFactory.class, "local") - ); return objectMapper; } diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/SinglePhaseParallelIndexingTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/SinglePhaseParallelIndexingTest.java index 031dd68d4cd0..0998258d3a18 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/SinglePhaseParallelIndexingTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/SinglePhaseParallelIndexingTest.java @@ -27,7 +27,6 @@ import org.apache.druid.data.input.impl.DimensionsSpec; import org.apache.druid.data.input.impl.JsonInputFormat; import org.apache.druid.data.input.impl.LocalInputSource; -import org.apache.druid.data.input.impl.StringInputRowParser; import org.apache.druid.indexer.TaskState; import org.apache.druid.indexing.common.LockGranularity; import org.apache.druid.indexing.common.TaskToolbox; @@ -47,7 +46,6 @@ import org.apache.druid.segment.incremental.RowIngestionMetersTotals; import org.apache.druid.segment.indexing.DataSchema; import org.apache.druid.segment.indexing.granularity.UniformGranularitySpec; -import org.apache.druid.segment.realtime.firehose.LocalFirehoseFactory; import org.apache.druid.timeline.DataSegment; import org.apache.druid.timeline.Partitions; import org.apache.druid.timeline.VersionedIntervalTimeline; @@ -946,27 +944,22 @@ private ParallelIndexSupervisorTask newTask( ingestionSpec = new ParallelIndexIngestionSpec( new DataSchema( "dataSource", - getObjectMapper().convertValue( - new StringInputRowParser( - DEFAULT_PARSE_SPEC, - null - ), - Map.class - ), - new AggregatorFactory[]{ - new LongSumAggregatorFactory("val", "val") - }, + DEFAULT_TIMESTAMP_SPEC, + DEFAULT_DIMENSIONS_SPEC, + DEFAULT_METRICS_SPEC, new UniformGranularitySpec( segmentGranularity, Granularities.MINUTE, interval == null ? null : Collections.singletonList(interval) ), - null, - getObjectMapper() + null ), new ParallelIndexIOConfig( - new LocalFirehoseFactory(inputDir, inputSourceFilter, null), - appendToExisting + null, + new LocalInputSource(inputDir, inputSourceFilter), + createInputFormatFromParseSpec(DEFAULT_PARSE_SPEC), + appendToExisting, + null ), tuningConfig ); @@ -984,10 +977,10 @@ private ParallelIndexSupervisorTask newTask( private String getErrorMessageForUnparseableTimestamp() { - return useInputFormatApi ? StringUtils.format( + return StringUtils.format( "Timestamp[2017unparseable] is unparseable! Event: {ts=2017unparseable} (Path: %s, Record: 5, Line: 5)", new File(inputDir, "test_0").toURI() - ) : "Timestamp[2017unparseable] is unparseable! Event: {ts=2017unparseable}"; + ); } private static class SettableSplittableLocalInputSource extends LocalInputSource diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/firehose/IngestSegmentFirehoseFactoryTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/firehose/IngestSegmentFirehoseFactoryTest.java deleted file mode 100644 index 591a3adb84cd..000000000000 --- a/indexing-service/src/test/java/org/apache/druid/indexing/firehose/IngestSegmentFirehoseFactoryTest.java +++ /dev/null @@ -1,615 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.indexing.firehose; - -import com.fasterxml.jackson.databind.ObjectMapper; -import com.fasterxml.jackson.databind.introspect.AnnotationIntrospectorPair; -import com.fasterxml.jackson.databind.module.SimpleModule; -import com.google.common.base.Preconditions; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableMap; -import com.google.common.collect.ImmutableSet; -import com.google.inject.Binder; -import com.google.inject.Module; -import org.apache.druid.client.coordinator.CoordinatorClient; -import org.apache.druid.data.input.Firehose; -import org.apache.druid.data.input.FirehoseFactory; -import org.apache.druid.data.input.InputRow; -import org.apache.druid.data.input.impl.DimensionsSpec; -import org.apache.druid.data.input.impl.InputRowParser; -import org.apache.druid.data.input.impl.JSONParseSpec; -import org.apache.druid.data.input.impl.MapInputRowParser; -import org.apache.druid.data.input.impl.TimeAndDimsParseSpec; -import org.apache.druid.data.input.impl.TimestampSpec; -import org.apache.druid.guice.GuiceAnnotationIntrospector; -import org.apache.druid.guice.GuiceInjectableValues; -import org.apache.druid.guice.GuiceInjectors; -import org.apache.druid.indexing.common.ReingestionTimelineUtils; -import org.apache.druid.indexing.common.RetryPolicyConfig; -import org.apache.druid.indexing.common.RetryPolicyFactory; -import org.apache.druid.indexing.common.SegmentCacheManagerFactory; -import org.apache.druid.indexing.common.TestUtils; -import org.apache.druid.indexing.common.config.TaskStorageConfig; -import org.apache.druid.indexing.common.task.NoopTask; -import org.apache.druid.indexing.common.task.Task; -import org.apache.druid.indexing.overlord.HeapMemoryTaskStorage; -import org.apache.druid.indexing.overlord.Segments; -import org.apache.druid.indexing.overlord.TaskLockbox; -import org.apache.druid.indexing.overlord.TaskStorage; -import org.apache.druid.java.util.common.FileUtils; -import org.apache.druid.java.util.common.Intervals; -import org.apache.druid.java.util.common.JodaUtils; -import org.apache.druid.java.util.common.StringUtils; -import org.apache.druid.java.util.common.logger.Logger; -import org.apache.druid.java.util.emitter.service.ServiceEmitter; -import org.apache.druid.math.expr.ExprMacroTable; -import org.apache.druid.metadata.IndexerSQLMetadataStorageCoordinator; -import org.apache.druid.query.aggregation.DoubleSumAggregatorFactory; -import org.apache.druid.query.aggregation.LongSumAggregatorFactory; -import org.apache.druid.query.expression.TestExprMacroTable; -import org.apache.druid.query.filter.SelectorDimFilter; -import org.apache.druid.segment.IndexIO; -import org.apache.druid.segment.IndexMergerV9; -import org.apache.druid.segment.IndexSpec; -import org.apache.druid.segment.TestHelper; -import org.apache.druid.segment.column.ColumnHolder; -import org.apache.druid.segment.handoff.SegmentHandoffNotifierFactory; -import org.apache.druid.segment.incremental.IncrementalIndex; -import org.apache.druid.segment.incremental.IncrementalIndexSchema; -import org.apache.druid.segment.incremental.OnheapIncrementalIndex; -import org.apache.druid.segment.loading.LocalDataSegmentPuller; -import org.apache.druid.segment.loading.LocalLoadSpec; -import org.apache.druid.segment.realtime.firehose.CombiningFirehoseFactory; -import org.apache.druid.segment.transform.ExpressionTransform; -import org.apache.druid.segment.transform.TransformSpec; -import org.apache.druid.server.metrics.NoopServiceEmitter; -import org.apache.druid.timeline.DataSegment; -import org.apache.druid.timeline.TimelineObjectHolder; -import org.apache.druid.timeline.partition.NumberedPartitionChunk; -import org.apache.druid.timeline.partition.NumberedShardSpec; -import org.apache.druid.timeline.partition.PartitionChunk; -import org.apache.druid.timeline.partition.PartitionHolder; -import org.easymock.EasyMock; -import org.joda.time.Interval; -import org.junit.After; -import org.junit.AfterClass; -import org.junit.Assert; -import org.junit.Before; -import org.junit.BeforeClass; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; - -import java.io.File; -import java.io.IOException; -import java.nio.file.Paths; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.stream.Collectors; -import java.util.stream.IntStream; - -/** - * - */ -@RunWith(Parameterized.class) -public class IngestSegmentFirehoseFactoryTest -{ - private static final ObjectMapper MAPPER; - private static final IndexMergerV9 INDEX_MERGER_V9; - private static final IndexIO INDEX_IO; - private static final TaskStorage TASK_STORAGE; - private static final IndexerSQLMetadataStorageCoordinator MDC; - private static final TaskLockbox TASK_LOCKBOX; - private static final Task TASK; - - @Rule - public TemporaryFolder temporaryFolder = new TemporaryFolder(); - - static { - TestUtils testUtils = new TestUtils(); - MAPPER = setupInjectablesInObjectMapper(TestHelper.makeJsonMapper()); - INDEX_MERGER_V9 = testUtils.getTestIndexMergerV9(); - INDEX_IO = testUtils.getTestIndexIO(); - TASK_STORAGE = new HeapMemoryTaskStorage( - new TaskStorageConfig(null) - { - } - ); - MDC = new IndexerSQLMetadataStorageCoordinator(null, null, null) - { - private final Set published = new HashSet<>(); - - @Override - public List retrieveUsedSegmentsForIntervals( - String dataSource, - List interval, - Segments visibility - ) - { - return ImmutableList.copyOf(SEGMENT_SET); - } - - @Override - public List retrieveUnusedSegmentsForInterval(String dataSource, Interval interval) - { - return ImmutableList.of(); - } - - @Override - public Set announceHistoricalSegments(Set segments) - { - Set added = new HashSet<>(); - for (final DataSegment segment : segments) { - if (published.add(segment)) { - added.add(segment); - } - } - - return ImmutableSet.copyOf(added); - } - - @Override - public void deleteSegments(Set segments) - { - // do nothing - } - }; - TASK_LOCKBOX = new TaskLockbox(TASK_STORAGE, MDC); - TASK = NoopTask.create(); - TASK_LOCKBOX.add(TASK); - } - - @Parameterized.Parameters(name = "{0}") - public static Collection constructorFeeder() throws IOException - { - final IndexSpec indexSpec = new IndexSpec(); - - final IncrementalIndexSchema schema = new IncrementalIndexSchema.Builder() - .withMinTimestamp(JodaUtils.MIN_INSTANT) - .withDimensionsSpec(ROW_PARSER) - .withMetrics( - new LongSumAggregatorFactory(METRIC_LONG_NAME, DIM_LONG_NAME), - new DoubleSumAggregatorFactory(METRIC_FLOAT_NAME, DIM_FLOAT_NAME) - ) - .build(); - final IncrementalIndex index = new OnheapIncrementalIndex.Builder() - .setIndexSchema(schema) - .setMaxRowCount(MAX_ROWS * MAX_SHARD_NUMBER) - .build(); - - for (Integer i = 0; i < MAX_ROWS; ++i) { - index.add(ROW_PARSER.parseBatch(buildRow(i.longValue())).get(0)); - } - - FileUtils.mkdirp(PERSIST_DIR); - INDEX_MERGER_V9.persist(index, PERSIST_DIR, indexSpec, null); - - final CoordinatorClient cc = new CoordinatorClient(null, null) - { - @Override - public Collection fetchUsedSegmentsInDataSourceForIntervals( - String dataSource, - List intervals - ) - { - return ImmutableSet.copyOf(SEGMENT_SET); - } - }; - - SegmentHandoffNotifierFactory notifierFactory = EasyMock.createNiceMock(SegmentHandoffNotifierFactory.class); - EasyMock.replay(notifierFactory); - - final SegmentCacheManagerFactory slf = new SegmentCacheManagerFactory(MAPPER); - final RetryPolicyFactory retryPolicyFactory = new RetryPolicyFactory(new RetryPolicyConfig()); - - Collection values = new ArrayList<>(); - for (InputRowParser parser : Arrays.asList( - ROW_PARSER, - new MapInputRowParser( - new JSONParseSpec( - new TimestampSpec(TIME_COLUMN, "auto", null), - DimensionsSpec.builder() - .setDimensionExclusions(ImmutableList.of(DIM_FLOAT_NAME, DIM_LONG_NAME)) - .build(), - null, - null, - null - ) - ) - )) { - for (List dim_names : Arrays.>asList(null, ImmutableList.of(DIM_NAME))) { - for (List metric_names : Arrays.>asList( - null, - ImmutableList.of(METRIC_LONG_NAME, METRIC_FLOAT_NAME) - )) { - for (Boolean wrapInCombining : Arrays.asList(false, true)) { - final IngestSegmentFirehoseFactory isfFactory = new IngestSegmentFirehoseFactory( - TASK.getDataSource(), - Intervals.ETERNITY, - null, - new SelectorDimFilter(DIM_NAME, DIM_VALUE, null), - dim_names, - metric_names, - null, - INDEX_IO, - cc, - slf, - retryPolicyFactory - ); - final FirehoseFactory factory = wrapInCombining - ? new CombiningFirehoseFactory(ImmutableList.of(isfFactory)) - : isfFactory; - values.add( - new Object[]{ - StringUtils.format( - "DimNames[%s]MetricNames[%s]ParserDimNames[%s]WrapInCombining[%s]", - dim_names == null ? "null" : "dims", - metric_names == null ? "null" : "metrics", - parser == ROW_PARSER ? "dims" : "null", - wrapInCombining - ), - factory, - parser - } - ); - } - } - } - } - return values; - } - - public static ObjectMapper setupInjectablesInObjectMapper(ObjectMapper objectMapper) - { - objectMapper.registerModule( - new SimpleModule("testModule").registerSubtypes(LocalLoadSpec.class) - ); - - final GuiceAnnotationIntrospector guiceIntrospector = new GuiceAnnotationIntrospector(); - objectMapper.setAnnotationIntrospectors( - new AnnotationIntrospectorPair( - guiceIntrospector, - objectMapper.getSerializationConfig().getAnnotationIntrospector() - ), - new AnnotationIntrospectorPair( - guiceIntrospector, - objectMapper.getDeserializationConfig().getAnnotationIntrospector() - ) - ); - objectMapper.setInjectableValues( - new GuiceInjectableValues( - GuiceInjectors.makeStartupInjectorWithModules( - ImmutableList.of( - new Module() - { - @Override - public void configure(Binder binder) - { - binder.bind(LocalDataSegmentPuller.class); - binder.bind(ExprMacroTable.class).toInstance(TestExprMacroTable.INSTANCE); - } - } - ) - ) - ) - ); - return objectMapper; - } - - public IngestSegmentFirehoseFactoryTest( - String testName, - FirehoseFactory factory, - InputRowParser rowParser - ) - { - this.factory = factory; - - // Must decorate the parser, since IngestSegmentFirehoseFactory will undecorate it. - this.rowParser = TransformSpec.NONE.decorate(rowParser); - } - - private static final Logger log = new Logger(IngestSegmentFirehoseFactoryTest.class); - private static final String DATA_SOURCE_NAME = "testDataSource"; - private static final String DATA_SOURCE_VERSION = "version"; - private static final Integer BINARY_VERSION = -1; - private static final String DIM_NAME = "testDimName"; - private static final String DIM_VALUE = "testDimValue"; - private static final String DIM_LONG_NAME = "testDimLongName"; - private static final String DIM_FLOAT_NAME = "testDimFloatName"; - private static final String METRIC_LONG_NAME = "testLongMetric"; - private static final String METRIC_FLOAT_NAME = "testFloatMetric"; - private static final Long METRIC_LONG_VALUE = 1L; - private static final Float METRIC_FLOAT_VALUE = 1.0f; - private static final String TIME_COLUMN = "ts"; - private static final Integer MAX_SHARD_NUMBER = 10; - private static final Integer MAX_ROWS = 10; - private static final File TMP_DIR = FileUtils.createTempDir(); - private static final File PERSIST_DIR = Paths.get(TMP_DIR.getAbsolutePath(), "indexTestMerger").toFile(); - private static final List SEGMENT_SET = new ArrayList<>(MAX_SHARD_NUMBER); - - private final FirehoseFactory factory; - private final InputRowParser rowParser; - private File tempDir; - - private static final InputRowParser> ROW_PARSER = new MapInputRowParser( - new TimeAndDimsParseSpec( - new TimestampSpec(TIME_COLUMN, "auto", null), - DimensionsSpec.builder() - .setDimensions(DimensionsSpec.getDefaultSchemas(ImmutableList.of(DIM_NAME))) - .setDimensionExclusions(ImmutableList.of(DIM_FLOAT_NAME, DIM_LONG_NAME)) - .build() - ) - ); - - private static Map buildRow(Long ts) - { - return ImmutableMap.of( - TIME_COLUMN, ts, - DIM_NAME, DIM_VALUE, - DIM_FLOAT_NAME, METRIC_FLOAT_VALUE, - DIM_LONG_NAME, METRIC_LONG_VALUE - ); - } - - private static DataSegment buildSegment(Integer shardNumber) - { - Preconditions.checkArgument(shardNumber < MAX_SHARD_NUMBER); - Preconditions.checkArgument(shardNumber >= 0); - return new DataSegment( - DATA_SOURCE_NAME, - Intervals.ETERNITY, - DATA_SOURCE_VERSION, - ImmutableMap.of( - "type", "local", - "path", PERSIST_DIR.getAbsolutePath() - ), - ImmutableList.of(DIM_NAME), - ImmutableList.of(METRIC_LONG_NAME, METRIC_FLOAT_NAME), - new NumberedShardSpec( - shardNumber, - MAX_SHARD_NUMBER - ), - BINARY_VERSION, - 0L - ); - } - - @BeforeClass - public static void setUpStatic() - { - for (int i = 0; i < MAX_SHARD_NUMBER; ++i) { - SEGMENT_SET.add(buildSegment(i)); - } - } - - @AfterClass - public static void tearDownStatic() - { - recursivelyDelete(TMP_DIR); - } - - private static void recursivelyDelete(final File dir) - { - if (dir != null) { - if (dir.isDirectory()) { - final File[] files = dir.listFiles(); - if (files != null) { - for (File file : files) { - recursivelyDelete(file); - } - } - } else { - if (!dir.delete()) { - log.warn("Could not delete file at [%s]", dir.getAbsolutePath()); - } - } - } - } - - @Before - public void setup() throws IOException - { - tempDir = temporaryFolder.newFolder(); - } - - @After - public void teardown() - { - tempDir.delete(); - } - - @Test - public void sanityTest() - { - if (factory instanceof CombiningFirehoseFactory) { - // This method tests IngestSegmentFirehoseFactory-specific methods. - return; - } - final IngestSegmentFirehoseFactory isfFactory = (IngestSegmentFirehoseFactory) factory; - Assert.assertEquals(TASK.getDataSource(), isfFactory.getDataSource()); - if (isfFactory.getDimensions() != null) { - Assert.assertArrayEquals(new String[]{DIM_NAME}, isfFactory.getDimensions().toArray()); - } - Assert.assertEquals(Intervals.ETERNITY, isfFactory.getInterval()); - if (isfFactory.getMetrics() != null) { - Assert.assertEquals( - ImmutableSet.of(METRIC_LONG_NAME, METRIC_FLOAT_NAME), - ImmutableSet.copyOf(isfFactory.getMetrics()) - ); - } - } - - @Test - public void simpleFirehoseReadingTest() throws IOException - { - Assert.assertEquals(MAX_SHARD_NUMBER.longValue(), SEGMENT_SET.size()); - Integer rowcount = 0; - try (final Firehose firehose = factory.connect(rowParser, TMP_DIR)) { - while (firehose.hasMore()) { - InputRow row = firehose.nextRow(); - Assert.assertArrayEquals(new String[]{DIM_NAME}, row.getDimensions().toArray()); - Assert.assertArrayEquals(new String[]{DIM_VALUE}, row.getDimension(DIM_NAME).toArray()); - Assert.assertEquals(METRIC_LONG_VALUE.longValue(), row.getMetric(METRIC_LONG_NAME)); - Assert.assertEquals( - METRIC_FLOAT_VALUE, - row.getMetric(METRIC_FLOAT_NAME).floatValue(), - METRIC_FLOAT_VALUE * 0.0001 - ); - ++rowcount; - } - } - Assert.assertEquals((int) MAX_SHARD_NUMBER * MAX_ROWS, (int) rowcount); - } - - @Test - public void testTransformSpec() throws IOException - { - Assert.assertEquals(MAX_SHARD_NUMBER.longValue(), SEGMENT_SET.size()); - Integer rowcount = 0; - final TransformSpec transformSpec = new TransformSpec( - new SelectorDimFilter(ColumnHolder.TIME_COLUMN_NAME, "1", null), - ImmutableList.of( - new ExpressionTransform(METRIC_FLOAT_NAME, METRIC_FLOAT_NAME + " * 10", ExprMacroTable.nil()) - ) - ); - int skipped = 0; - try (final Firehose firehose = - factory.connect(transformSpec.decorate(rowParser), TMP_DIR)) { - while (firehose.hasMore()) { - InputRow row = firehose.nextRow(); - if (row == null) { - skipped++; - continue; - } - Assert.assertArrayEquals(new String[]{DIM_NAME}, row.getDimensions().toArray()); - Assert.assertArrayEquals(new String[]{DIM_VALUE}, row.getDimension(DIM_NAME).toArray()); - Assert.assertEquals(METRIC_LONG_VALUE.longValue(), row.getMetric(METRIC_LONG_NAME).longValue()); - Assert.assertEquals( - METRIC_FLOAT_VALUE * 10, - row.getMetric(METRIC_FLOAT_NAME).floatValue(), - METRIC_FLOAT_VALUE * 0.0001 - ); - ++rowcount; - } - } - Assert.assertEquals(90, skipped); - Assert.assertEquals((int) MAX_ROWS, (int) rowcount); - } - - @Test - public void testGetUniqueDimensionsAndMetrics() - { - final int numSegmentsPerPartitionChunk = 5; - final int numPartitionChunksPerTimelineObject = 10; - final int numSegments = numSegmentsPerPartitionChunk * numPartitionChunksPerTimelineObject; - final Interval interval = Intervals.of("2017-01-01/2017-01-02"); - final String version = "1"; - - final List> timelineSegments = new ArrayList<>(); - for (int i = 0; i < numPartitionChunksPerTimelineObject; i++) { - final List> chunks = new ArrayList<>(); - for (int j = 0; j < numSegmentsPerPartitionChunk; j++) { - final List dims = IntStream.range(i, i + numSegmentsPerPartitionChunk) - .mapToObj(suffix -> "dim" + suffix) - .collect(Collectors.toList()); - final List metrics = IntStream.range(i, i + numSegmentsPerPartitionChunk) - .mapToObj(suffix -> "met" + suffix) - .collect(Collectors.toList()); - final DataSegment segment = new DataSegment( - "ds", - interval, - version, - ImmutableMap.of(), - dims, - metrics, - new NumberedShardSpec(numPartitionChunksPerTimelineObject, i), - 1, - 1 - ); - - final PartitionChunk partitionChunk = new NumberedPartitionChunk<>( - i, - numPartitionChunksPerTimelineObject, - segment - ); - chunks.add(partitionChunk); - } - final TimelineObjectHolder timelineHolder = new TimelineObjectHolder<>( - interval, - version, - new PartitionHolder<>(chunks) - ); - timelineSegments.add(timelineHolder); - } - - final String[] expectedDims = new String[]{ - "dim9", - "dim10", - "dim11", - "dim12", - "dim13", - "dim8", - "dim7", - "dim6", - "dim5", - "dim4", - "dim3", - "dim2", - "dim1", - "dim0" - }; - final String[] expectedMetrics = new String[]{ - "met9", - "met10", - "met11", - "met12", - "met13", - "met8", - "met7", - "met6", - "met5", - "met4", - "met3", - "met2", - "met1", - "met0" - }; - Assert.assertEquals( - Arrays.asList(expectedDims), - ReingestionTimelineUtils.getUniqueDimensions(timelineSegments, null) - ); - Assert.assertEquals( - Arrays.asList(expectedMetrics), - ReingestionTimelineUtils.getUniqueMetrics(timelineSegments) - ); - } - - private static ServiceEmitter newMockEmitter() - { - return new NoopServiceEmitter(); - } -} diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/firehose/IngestSegmentFirehoseFactoryTimelineTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/firehose/IngestSegmentFirehoseFactoryTimelineTest.java deleted file mode 100644 index 61527089728b..000000000000 --- a/indexing-service/src/test/java/org/apache/druid/indexing/firehose/IngestSegmentFirehoseFactoryTimelineTest.java +++ /dev/null @@ -1,453 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.indexing.firehose; - -import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableMap; -import com.google.common.collect.ImmutableSet; -import com.google.common.collect.Iterables; -import org.apache.druid.client.coordinator.CoordinatorClient; -import org.apache.druid.data.input.FiniteFirehoseFactory; -import org.apache.druid.data.input.Firehose; -import org.apache.druid.data.input.InputRow; -import org.apache.druid.data.input.InputSplit; -import org.apache.druid.data.input.MapBasedInputRow; -import org.apache.druid.data.input.impl.DimensionsSpec; -import org.apache.druid.data.input.impl.InputRowParser; -import org.apache.druid.data.input.impl.JSONParseSpec; -import org.apache.druid.data.input.impl.MapInputRowParser; -import org.apache.druid.data.input.impl.TimestampSpec; -import org.apache.druid.indexing.common.RetryPolicyConfig; -import org.apache.druid.indexing.common.RetryPolicyFactory; -import org.apache.druid.indexing.common.SegmentCacheManagerFactory; -import org.apache.druid.indexing.common.TestUtils; -import org.apache.druid.java.util.common.DateTimes; -import org.apache.druid.java.util.common.FileUtils; -import org.apache.druid.java.util.common.Intervals; -import org.apache.druid.java.util.common.JodaUtils; -import org.apache.druid.query.aggregation.LongSumAggregatorFactory; -import org.apache.druid.query.filter.TrueDimFilter; -import org.apache.druid.segment.IndexIO; -import org.apache.druid.segment.IndexMergerV9; -import org.apache.druid.segment.IndexSpec; -import org.apache.druid.segment.handoff.SegmentHandoffNotifierFactory; -import org.apache.druid.segment.incremental.IncrementalIndex; -import org.apache.druid.segment.incremental.IncrementalIndexSchema; -import org.apache.druid.segment.incremental.IndexSizeExceededException; -import org.apache.druid.segment.incremental.OnheapIncrementalIndex; -import org.apache.druid.segment.transform.TransformSpec; -import org.apache.druid.timeline.DataSegment; -import org.apache.druid.timeline.SegmentId; -import org.apache.druid.timeline.partition.LinearShardSpec; -import org.easymock.EasyMock; -import org.joda.time.Interval; -import org.junit.After; -import org.junit.Assert; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; - -import java.io.File; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.UUID; -import java.util.stream.Collectors; - -@RunWith(Parameterized.class) -public class IngestSegmentFirehoseFactoryTimelineTest -{ - private static final String DATA_SOURCE = "foo"; - private static final String TIME_COLUMN = "t"; - private static final String[] DIMENSIONS = new String[]{"d1"}; - private static final String[] METRICS = new String[]{"m1"}; - - // Must decorate the parser, since IngestSegmentFirehoseFactory will undecorate it. - private static final InputRowParser> ROW_PARSER = TransformSpec.NONE.decorate( - new MapInputRowParser( - new JSONParseSpec( - new TimestampSpec(TIME_COLUMN, "auto", null), - new DimensionsSpec( - DimensionsSpec.getDefaultSchemas(Arrays.asList(DIMENSIONS)) - ), - null, - null, - null - ) - ) - ); - - private final IngestSegmentFirehoseFactory factory; - private final File tmpDir; - private final int expectedCount; - private final long expectedSum; - private final int segmentCount; - - private static final ObjectMapper MAPPER; - private static final IndexIO INDEX_IO; - private static final IndexMergerV9 INDEX_MERGER_V9; - - static { - TestUtils testUtils = new TestUtils(); - MAPPER = IngestSegmentFirehoseFactoryTest.setupInjectablesInObjectMapper(testUtils.getTestObjectMapper()); - INDEX_IO = testUtils.getTestIndexIO(); - INDEX_MERGER_V9 = testUtils.getTestIndexMergerV9(); - } - - public IngestSegmentFirehoseFactoryTimelineTest( - String name, - IngestSegmentFirehoseFactory factory, - File tmpDir, - int expectedCount, - long expectedSum, - int segmentCount - ) - { - this.factory = factory; - this.tmpDir = tmpDir; - this.expectedCount = expectedCount; - this.expectedSum = expectedSum; - this.segmentCount = segmentCount; - } - - @Test - public void test() throws Exception - { - // Junit 4.12 doesn't have a good way to run tearDown after multiple tests in a Parameterized - // class run. (Junit 4.13 adds @AfterParam but isn't released yet.) Fake it by just running - // "tests" in series inside one @Test. - testSimple(); - testSplit(); - } - - private void testSimple() throws Exception - { - int count = 0; - long sum = 0; - - try (final Firehose firehose = factory.connect(ROW_PARSER, tmpDir)) { - while (firehose.hasMore()) { - final InputRow row = firehose.nextRow(); - count++; - sum += row.getMetric(METRICS[0]).longValue(); - } - } - - Assert.assertEquals("count", expectedCount, count); - Assert.assertEquals("sum", expectedSum, sum); - } - - private void testSplit() throws Exception - { - Assert.assertTrue(factory.isSplittable()); - final int numSplits = factory.getNumSplits(null); - // We set maxInputSegmentBytesPerSplit to 2 so each segment should become a byte. - Assert.assertEquals(segmentCount, numSplits); - final List>> splits = - factory.getSplits(null).collect(Collectors.toList()); - Assert.assertEquals(numSplits, splits.size()); - - int count = 0; - long sum = 0; - - for (InputSplit> split : splits) { - final FiniteFirehoseFactory> splitFactory = - factory.withSplit(split); - try (final Firehose firehose = splitFactory.connect(ROW_PARSER, tmpDir)) { - while (firehose.hasMore()) { - final InputRow row = firehose.nextRow(); - count++; - sum += row.getMetric(METRICS[0]).longValue(); - } - } - } - - Assert.assertEquals("count", expectedCount, count); - Assert.assertEquals("sum", expectedSum, sum); - - } - - @After - public void tearDown() throws Exception - { - FileUtils.deleteDirectory(tmpDir); - } - - private static TestCase tc( - String intervalString, - int expectedCount, - long expectedSum, - DataSegmentMaker... segmentMakers - ) - { - final File tmpDir = FileUtils.createTempDir(); - final Set segments = new HashSet<>(); - for (DataSegmentMaker segmentMaker : segmentMakers) { - segments.add(segmentMaker.make(tmpDir)); - } - - return new TestCase( - tmpDir, - Intervals.of(intervalString), - expectedCount, - expectedSum, - segments - ); - } - - private static DataSegmentMaker ds( - String intervalString, - String version, - int partitionNum, - InputRow... rows - ) - { - return new DataSegmentMaker(Intervals.of(intervalString), version, partitionNum, Arrays.asList(rows)); - } - - private static InputRow ir(String timeString, long metricValue) - { - return new MapBasedInputRow( - DateTimes.of(timeString).getMillis(), - Arrays.asList(DIMENSIONS), - ImmutableMap.of( - TIME_COLUMN, DateTimes.of(timeString).toString(), - DIMENSIONS[0], "bar", - METRICS[0], metricValue - ) - ); - } - - private static Map persist(File tmpDir, InputRow... rows) - { - final File persistDir = new File(tmpDir, UUID.randomUUID().toString()); - final IncrementalIndexSchema schema = new IncrementalIndexSchema.Builder() - .withMinTimestamp(JodaUtils.MIN_INSTANT) - .withDimensionsSpec(ROW_PARSER) - .withMetrics(new LongSumAggregatorFactory(METRICS[0], METRICS[0])) - .build(); - final IncrementalIndex index = new OnheapIncrementalIndex.Builder() - .setIndexSchema(schema) - .setMaxRowCount(rows.length) - .build(); - - for (InputRow row : rows) { - try { - index.add(row); - } - catch (IndexSizeExceededException e) { - throw new RuntimeException(e); - } - } - - try { - INDEX_MERGER_V9.persist(index, persistDir, new IndexSpec(), null); - } - catch (IOException e) { - throw new RuntimeException(e); - } - - return ImmutableMap.of( - "type", "local", - "path", persistDir.getAbsolutePath() - ); - } - - @Parameterized.Parameters(name = "{0}") - public static Collection constructorFeeder() - { - final List testCases = ImmutableList.of( - tc( - "2000/2000T02", 3, 7, - ds("2000/2000T01", "v1", 0, ir("2000", 1), ir("2000T00:01", 2)), - ds("2000T01/2000T02", "v1", 0, ir("2000T01", 4)) - ) /* Adjacent segments */, - tc( - "2000/2000T02", 3, 7, - ds("2000/2000T02", "v1", 0, ir("2000", 1), ir("2000T00:01", 2), ir("2000T01", 8)), - ds("2000T01/2000T02", "v2", 0, ir("2000T01:01", 4)) - ) /* 1H segment overlaid on top of 2H segment */, - tc( - "2000/2000-01-02", 4, 23, - ds("2000/2000-01-02", "v1", 0, ir("2000", 1), ir("2000T00:01", 2), ir("2000T01", 8), ir("2000T02", 16)), - ds("2000T01/2000T02", "v2", 0, ir("2000T01:01", 4)) - ) /* 1H segment overlaid on top of 1D segment */, - tc( - "2000/2000T02", 4, 15, - ds("2000/2000T02", "v1", 0, ir("2000", 1), ir("2000T00:01", 2), ir("2000T01", 8)), - ds("2000/2000T02", "v1", 1, ir("2000T01:01", 4)) - ) /* Segment set with two segments for the same interval */, - tc( - "2000T01/2000T02", 1, 2, - ds("2000/2000T03", "v1", 0, ir("2000", 1), ir("2000T01", 2), ir("2000T02", 4)) - ) /* Segment wider than desired interval */, - tc( - "2000T02/2000T04", 2, 12, - ds("2000/2000T03", "v1", 0, ir("2000", 1), ir("2000T01", 2), ir("2000T02", 4)), - ds("2000T03/2000T04", "v1", 0, ir("2000T03", 8)) - ) /* Segment intersecting desired interval */ - ); - - final List constructors = new ArrayList<>(); - - for (final TestCase testCase : testCases) { - SegmentHandoffNotifierFactory notifierFactory = EasyMock.createNiceMock(SegmentHandoffNotifierFactory.class); - EasyMock.replay(notifierFactory); - final SegmentCacheManagerFactory slf = new SegmentCacheManagerFactory(MAPPER); - final RetryPolicyFactory retryPolicyFactory = new RetryPolicyFactory(new RetryPolicyConfig()); - final CoordinatorClient cc = new CoordinatorClient(null, null) - { - @Override - public Collection fetchUsedSegmentsInDataSourceForIntervals( - String dataSource, - List intervals - ) - { - // Expect the interval we asked for - if (intervals.equals(ImmutableList.of(testCase.interval))) { - return ImmutableSet.copyOf(testCase.segments); - } else { - throw new IllegalArgumentException("BAD"); - } - } - - @Override - public DataSegment fetchUsedSegment(String dataSource, String segmentId) - { - return testCase.segments - .stream() - .filter(s -> s.getId().toString().equals(segmentId)) - .findAny() - .get(); // throwing if not found is exactly what the real code does - } - }; - final IngestSegmentFirehoseFactory factory = new IngestSegmentFirehoseFactory( - DATA_SOURCE, - testCase.interval, - null, - TrueDimFilter.instance(), - Arrays.asList(DIMENSIONS), - Arrays.asList(METRICS), - // Split as much as possible - 1L, - INDEX_IO, - cc, - slf, - retryPolicyFactory - ); - - constructors.add( - new Object[]{ - testCase.toString(), - factory, - testCase.tmpDir, - testCase.expectedCount, - testCase.expectedSum, - testCase.segments.size() - } - ); - } - - return constructors; - } - - private static class TestCase - { - final File tmpDir; - final Interval interval; - final int expectedCount; - final long expectedSum; - final Set segments; - - public TestCase( - File tmpDir, - Interval interval, - int expectedCount, - long expectedSum, - Set segments - ) - { - this.tmpDir = tmpDir; - this.interval = interval; - this.expectedCount = expectedCount; - this.expectedSum = expectedSum; - this.segments = segments; - } - - @Override - public String toString() - { - final List segmentIds = new ArrayList<>(); - for (DataSegment segment : segments) { - segmentIds.add(segment.getId()); - } - return "TestCase{" + - "interval=" + interval + - ", expectedCount=" + expectedCount + - ", expectedSum=" + expectedSum + - ", segments=" + segmentIds + - '}'; - } - } - - private static class DataSegmentMaker - { - final Interval interval; - final String version; - final int partitionNum; - final List rows; - - public DataSegmentMaker( - Interval interval, - String version, - int partitionNum, - List rows - ) - { - this.interval = interval; - this.version = version; - this.partitionNum = partitionNum; - this.rows = rows; - } - - public DataSegment make(File tmpDir) - { - final Map loadSpec = persist(tmpDir, Iterables.toArray(rows, InputRow.class)); - - return new DataSegment( - DATA_SOURCE, - interval, - version, - loadSpec, - Arrays.asList(DIMENSIONS), - Arrays.asList(METRICS), - new LinearShardSpec(partitionNum), - -1, - 2L - ); - } - } -} diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/RemoteTaskRunnerTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/RemoteTaskRunnerTest.java index 2be0cc654117..c39cc9e749d0 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/RemoteTaskRunnerTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/RemoteTaskRunnerTest.java @@ -37,7 +37,7 @@ import org.apache.druid.indexer.TaskState; import org.apache.druid.indexer.TaskStatus; import org.apache.druid.indexing.common.IndexingServiceCondition; -import org.apache.druid.indexing.common.TestRealtimeTask; +import org.apache.druid.indexing.common.TestIndexTask; import org.apache.druid.indexing.common.TestTasks; import org.apache.druid.indexing.common.TestUtils; import org.apache.druid.indexing.common.task.Task; @@ -251,7 +251,7 @@ public void testRunSameAvailabilityGroup() throws Exception { doSetup(); - TestRealtimeTask task1 = new TestRealtimeTask( + TestIndexTask task1 = new TestIndexTask( "rt1", new TaskResource("rt1", 1), "foo", @@ -262,7 +262,7 @@ public void testRunSameAvailabilityGroup() throws Exception Assert.assertTrue(taskAnnounced(task1.getId())); mockWorkerRunningTask(task1); - TestRealtimeTask task2 = new TestRealtimeTask( + TestIndexTask task2 = new TestIndexTask( "rt2", new TaskResource("rt1", 1), "foo", @@ -271,7 +271,7 @@ public void testRunSameAvailabilityGroup() throws Exception ); remoteTaskRunner.run(task2); - TestRealtimeTask task3 = new TestRealtimeTask( + TestIndexTask task3 = new TestIndexTask( "rt3", new TaskResource("rt2", 1), "foo", @@ -314,7 +314,7 @@ public void testRunWithCapacity() throws Exception { doSetup(); - TestRealtimeTask task1 = new TestRealtimeTask( + TestIndexTask task1 = new TestIndexTask( "rt1", new TaskResource("rt1", 1), "foo", @@ -325,7 +325,7 @@ public void testRunWithCapacity() throws Exception Assert.assertTrue(taskAnnounced(task1.getId())); mockWorkerRunningTask(task1); - TestRealtimeTask task2 = new TestRealtimeTask( + TestIndexTask task2 = new TestIndexTask( "rt2", new TaskResource("rt2", 3), "foo", @@ -334,7 +334,7 @@ public void testRunWithCapacity() throws Exception ); remoteTaskRunner.run(task2); - TestRealtimeTask task3 = new TestRealtimeTask( + TestIndexTask task3 = new TestIndexTask( "rt3", new TaskResource("rt3", 2), "foo", @@ -406,7 +406,7 @@ public void testBootstrap() throws Exception makeRemoteTaskRunner(rtrConfig); - TestRealtimeTask task1 = new TestRealtimeTask( + TestIndexTask task1 = new TestIndexTask( "first", new TaskResource("first", 1), "foo", @@ -417,7 +417,7 @@ public void testBootstrap() throws Exception Assert.assertTrue(taskAnnounced(task1.getId())); mockWorkerRunningTask(task1); - TestRealtimeTask task = new TestRealtimeTask( + TestIndexTask task = new TestIndexTask( "second", new TaskResource("task", 2), "foo", @@ -426,7 +426,7 @@ public void testBootstrap() throws Exception ); remoteTaskRunner.run(task); - TestRealtimeTask task2 = new TestRealtimeTask( + TestIndexTask task2 = new TestIndexTask( "second", new TaskResource("second", 2), "foo", @@ -457,7 +457,7 @@ public String apply(RemoteTaskRunnerWorkItem input) public void testRunWithTaskComplete() throws Exception { doSetup(); - TestRealtimeTask task1 = new TestRealtimeTask( + TestIndexTask task1 = new TestIndexTask( "testTask", new TaskResource("testTask", 2), "foo", @@ -816,7 +816,7 @@ public void testBlacklistZKWorkers() throws Exception makeRemoteTaskRunner(rtrConfig); - TestRealtimeTask task1 = new TestRealtimeTask( + TestIndexTask task1 = new TestIndexTask( "realtime1", new TaskResource("realtime1", 1), "foo", @@ -834,7 +834,7 @@ public void testBlacklistZKWorkers() throws Exception remoteTaskRunner.findWorkerRunningTask(task1.getId()).getContinuouslyFailedTasksCount() ); - TestRealtimeTask task2 = new TestRealtimeTask( + TestIndexTask task2 = new TestIndexTask( "realtime2", new TaskResource("realtime2", 1), "foo", @@ -869,7 +869,7 @@ public void testBlacklistZKWorkers() throws Exception remoteTaskRunner.findWorkerRunningTask(task2.getId()).getContinuouslyFailedTasksCount() ); - TestRealtimeTask task3 = new TestRealtimeTask( + TestIndexTask task3 = new TestIndexTask( "realtime3", new TaskResource("realtime3", 1), "foo", @@ -908,7 +908,7 @@ public void testBlacklistZKWorkers25Percent() throws Exception for (int i = 1; i < 13; i++) { String taskId = StringUtils.format("rt-%d", i); - TestRealtimeTask task = new TestRealtimeTask( + TestIndexTask task = new TestIndexTask( taskId, new TaskResource(taskId, 1), "foo", @@ -963,7 +963,7 @@ public void testBlacklistZKWorkers50Percent() throws Exception for (int i = 1; i < 13; i++) { String taskId = StringUtils.format("rt-%d", i); - TestRealtimeTask task = new TestRealtimeTask( + TestIndexTask task = new TestIndexTask( taskId, new TaskResource(taskId, 1), "foo", @@ -1008,13 +1008,13 @@ public void testSuccessfulTaskOnBlacklistedWorker() throws Exception makeRemoteTaskRunner(rtrConfig); - TestRealtimeTask task1 = new TestRealtimeTask( + TestIndexTask task1 = new TestIndexTask( "realtime1", new TaskResource("realtime1", 1), "foo", TaskStatus.success("realtime1"), jsonMapper ); - TestRealtimeTask task2 = new TestRealtimeTask( + TestIndexTask task2 = new TestIndexTask( "realtime2", new TaskResource("realtime2", 1), "foo", TaskStatus.success("realtime2"), jsonMapper ); - TestRealtimeTask task3 = new TestRealtimeTask( + TestIndexTask task3 = new TestIndexTask( "realtime3", new TaskResource("realtime3", 1), "foo", TaskStatus.success("realtime3"), jsonMapper ); diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/sampler/InputSourceSamplerTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/sampler/InputSourceSamplerTest.java index 89a4642d0998..0decdb8f740b 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/sampler/InputSourceSamplerTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/sampler/InputSourceSamplerTest.java @@ -26,7 +26,6 @@ import net.thisptr.jackson.jq.internal.misc.Lists; import org.apache.druid.client.indexing.SamplerResponse; import org.apache.druid.client.indexing.SamplerResponse.SamplerResponseRow; -import org.apache.druid.data.input.FirehoseFactoryToInputSourceAdaptor; import org.apache.druid.data.input.InputFormat; import org.apache.druid.data.input.InputRowSchema; import org.apache.druid.data.input.InputSource; @@ -59,7 +58,6 @@ import org.apache.druid.segment.indexing.DataSchema; import org.apache.druid.segment.indexing.granularity.GranularitySpec; import org.apache.druid.segment.indexing.granularity.UniformGranularitySpec; -import org.apache.druid.segment.realtime.firehose.InlineFirehoseFactory; import org.apache.druid.segment.transform.ExpressionTransform; import org.apache.druid.segment.transform.TransformSpec; import org.apache.druid.testing.InitializedNullHandlingTest; @@ -1249,11 +1247,8 @@ public void testMultipleJsonStringInOneBlock() throws IOException // first n rows are related to the first json block which fails to parse // String parseExceptionMessage; - if (useInputFormatApi) { - parseExceptionMessage = "Timestamp[bad_timestamp] is unparseable! Event: {t=bad_timestamp, dim1=foo, met1=6}"; - } else { - parseExceptionMessage = "Timestamp[bad_timestamp] is unparseable! Event: {t=bad_timestamp, dim1=foo, met1=6}"; - } + parseExceptionMessage = "Timestamp[bad_timestamp] is unparseable! Event: {t=bad_timestamp, dim1=foo, met1=6}"; + for (; index < illegalRows; index++) { assertEqualsSamplerResponseRow( new SamplerResponseRow( @@ -1426,39 +1421,19 @@ private Map getParserMap(InputRowParser parser) throws IOExcepti private InputSource createInputSource(List rows, DataSchema dataSchema) { final String data = String.join("\n", rows); - if (useInputFormatApi) { - return new InlineInputSource(data); - } else { - return new FirehoseFactoryToInputSourceAdaptor( - new InlineFirehoseFactory(data), - createInputRowParser( - dataSchema == null ? new TimestampSpec(null, null, null) : dataSchema.getTimestampSpec(), - dataSchema == null ? new DimensionsSpec(null) : dataSchema.getDimensionsSpec() - ) - ); - } + return new InlineInputSource(data); } private String getUnparseableTimestampString() { - if (useInputFormatApi) { - return ParserType.STR_CSV.equals(parserType) - ? "Timestamp[bad_timestamp] is unparseable! Event: {t=bad_timestamp, dim1=foo, dim2=null, met1=6} (Line: 6)" - : "Timestamp[bad_timestamp] is unparseable! Event: {t=bad_timestamp, dim1=foo, met1=6} (Line: 6)"; - } else { - return ParserType.STR_CSV.equals(parserType) - ? "Timestamp[bad_timestamp] is unparseable! Event: {t=bad_timestamp, dim1=foo, dim2=null, met1=6}" - : "Timestamp[bad_timestamp] is unparseable! Event: {t=bad_timestamp, dim1=foo, met1=6}"; - } + return ParserType.STR_CSV.equals(parserType) + ? "Timestamp[bad_timestamp] is unparseable! Event: {t=bad_timestamp, dim1=foo, dim2=null, met1=6} (Line: 6)" + : "Timestamp[bad_timestamp] is unparseable! Event: {t=bad_timestamp, dim1=foo, met1=6} (Line: 6)"; } private String unparseableTimestampErrorString(Map rawColumns, int line) { - if (useInputFormatApi) { - return StringUtils.format("Timestamp[null] is unparseable! Event: %s (Line: %d)", rawColumns, line); - } else { - return StringUtils.format("Timestamp[null] is unparseable! Event: %s", rawColumns); - } + return StringUtils.format("Timestamp[null] is unparseable! Event: %s (Line: %d)", rawColumns, line); } @Nullable diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/worker/TaskAnnouncementTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/worker/TaskAnnouncementTest.java index 3eed893b3721..c61bb40bfd76 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/worker/TaskAnnouncementTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/worker/TaskAnnouncementTest.java @@ -31,12 +31,10 @@ import org.apache.druid.segment.indexing.DataSchema; import org.apache.druid.segment.indexing.RealtimeIOConfig; import org.apache.druid.segment.realtime.FireDepartment; -import org.apache.druid.segment.realtime.firehose.LocalFirehoseFactory; +import org.easymock.EasyMock; import org.junit.Assert; import org.junit.Test; -import java.io.File; - public class TaskAnnouncementTest { private final ObjectMapper jsonMapper; @@ -50,15 +48,13 @@ public TaskAnnouncementTest() @Test public void testBackwardsCompatibleSerde() throws Exception { + final RealtimeIOConfig realtimeIOConfig = EasyMock.createNiceMock(RealtimeIOConfig.class); final Task task = new RealtimeIndexTask( "theid", new TaskResource("rofl", 2), new FireDepartment( new DataSchema("foo", null, new AggregatorFactory[0], null, null, new DefaultObjectMapper()), - new RealtimeIOConfig( - new LocalFirehoseFactory(new File("lol"), "rofl", null), - (schema, config, metrics) -> null - ), + realtimeIOConfig, null ), null diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/worker/WorkerTaskMonitorTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/worker/WorkerTaskMonitorTest.java index b6261db8983e..5c942d39ea35 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/worker/WorkerTaskMonitorTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/worker/WorkerTaskMonitorTest.java @@ -33,7 +33,7 @@ import org.apache.druid.indexing.common.IndexingServiceCondition; import org.apache.druid.indexing.common.SegmentCacheManagerFactory; import org.apache.druid.indexing.common.TaskToolboxFactory; -import org.apache.druid.indexing.common.TestRealtimeTask; +import org.apache.druid.indexing.common.TestIndexTask; import org.apache.druid.indexing.common.TestTasks; import org.apache.druid.indexing.common.TestUtils; import org.apache.druid.indexing.common.actions.TaskActionClient; @@ -145,7 +145,7 @@ public String getBase() // Start a task monitor workerTaskMonitor = createTaskMonitor(); TestTasks.registerSubtypes(jsonMapper); - jsonMapper.registerSubtypes(new NamedType(TestRealtimeTask.class, "test_realtime")); + jsonMapper.registerSubtypes(new NamedType(TestIndexTask.class, "test_realtime")); workerTaskMonitor.start(); task = TestTasks.immediateSuccess("test"); diff --git a/server/src/main/java/org/apache/druid/guice/FirehoseModule.java b/server/src/main/java/org/apache/druid/guice/FirehoseModule.java index c95b0cd42eec..fe6461bf2158 100644 --- a/server/src/main/java/org/apache/druid/guice/FirehoseModule.java +++ b/server/src/main/java/org/apache/druid/guice/FirehoseModule.java @@ -25,13 +25,8 @@ import com.google.inject.Binder; import org.apache.druid.initialization.DruidModule; import org.apache.druid.segment.realtime.firehose.ClippedFirehoseFactory; -import org.apache.druid.segment.realtime.firehose.CombiningFirehoseFactory; import org.apache.druid.segment.realtime.firehose.EventReceiverFirehoseFactory; import org.apache.druid.segment.realtime.firehose.FixedCountFirehoseFactory; -import org.apache.druid.segment.realtime.firehose.HttpFirehoseFactory; -import org.apache.druid.segment.realtime.firehose.InlineFirehoseFactory; -import org.apache.druid.segment.realtime.firehose.LocalFirehoseFactory; -import org.apache.druid.segment.realtime.firehose.SqlFirehoseFactory; import org.apache.druid.segment.realtime.firehose.TimedShutoffFirehoseFactory; import java.util.Collections; @@ -52,13 +47,8 @@ public List getJacksonModules() .registerSubtypes( new NamedType(ClippedFirehoseFactory.class, "clipped"), new NamedType(TimedShutoffFirehoseFactory.class, "timed"), - new NamedType(LocalFirehoseFactory.class, "local"), - new NamedType(HttpFirehoseFactory.class, "http"), new NamedType(EventReceiverFirehoseFactory.class, "receiver"), - new NamedType(CombiningFirehoseFactory.class, "combining"), - new NamedType(FixedCountFirehoseFactory.class, "fixedCount"), - new NamedType(SqlFirehoseFactory.class, "sql"), - new NamedType(InlineFirehoseFactory.class, "inline") + new NamedType(FixedCountFirehoseFactory.class, "fixedCount") ) ); } diff --git a/server/src/main/java/org/apache/druid/segment/realtime/firehose/CombiningFirehoseFactory.java b/server/src/main/java/org/apache/druid/segment/realtime/firehose/CombiningFirehoseFactory.java deleted file mode 100644 index 6e61e19a1ea5..000000000000 --- a/server/src/main/java/org/apache/druid/segment/realtime/firehose/CombiningFirehoseFactory.java +++ /dev/null @@ -1,160 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.segment.realtime.firehose; - -import com.fasterxml.jackson.annotation.JsonCreator; -import com.fasterxml.jackson.annotation.JsonProperty; -import com.google.common.base.Preconditions; -import org.apache.druid.data.input.FiniteFirehoseFactory; -import org.apache.druid.data.input.Firehose; -import org.apache.druid.data.input.FirehoseFactory; -import org.apache.druid.data.input.InputRow; -import org.apache.druid.data.input.InputSplit; -import org.apache.druid.data.input.SplitHintSpec; -import org.apache.druid.data.input.impl.InputRowParser; -import org.apache.druid.java.util.emitter.EmittingLogger; - -import javax.annotation.Nullable; -import java.io.File; -import java.io.IOException; -import java.util.Iterator; -import java.util.List; -import java.util.stream.Stream; - -/** - * Creates firehose that combines data from different Firehoses. Useful for ingesting data from multiple sources. - */ -public class CombiningFirehoseFactory implements FiniteFirehoseFactory> -{ - private static final EmittingLogger log = new EmittingLogger(CombiningFirehoseFactory.class); - - private final List delegateFactoryList; - - @JsonCreator - public CombiningFirehoseFactory( - @JsonProperty("delegates") List delegateFactoryList - ) - { - Preconditions.checkArgument(!delegateFactoryList.isEmpty()); - this.delegateFactoryList = delegateFactoryList; - } - - @Override - public Firehose connect(InputRowParser parser, File temporaryDirectory) - { - return new CombiningFirehose(parser, temporaryDirectory); - } - - @JsonProperty("delegates") - public List getDelegateFactoryList() - { - return delegateFactoryList; - } - - @Override - public boolean isSplittable() - { - return false; - } - - @Override - public Stream>> getSplits( - @Nullable SplitHintSpec splitHintSpec - ) - { - return Stream.of(new InputSplit<>(delegateFactoryList)); - } - - @Override - public int getNumSplits(@Nullable SplitHintSpec splitHintSpec) - { - return 1; - } - - @Override - public FiniteFirehoseFactory> withSplit(InputSplit> split) - { - return new CombiningFirehoseFactory(split.get()); - } - - class CombiningFirehose implements Firehose - { - private final InputRowParser parser; - private final File temporaryDirectory; - private final Iterator firehoseFactoryIterator; - private volatile Firehose currentFirehose; - - CombiningFirehose(InputRowParser parser, File temporaryDirectory) - { - this.firehoseFactoryIterator = delegateFactoryList.iterator(); - this.parser = parser; - this.temporaryDirectory = temporaryDirectory; - nextFirehose(); - } - - private void nextFirehose() - { - if (firehoseFactoryIterator.hasNext()) { - try { - if (currentFirehose != null) { - currentFirehose.close(); - } - - currentFirehose = firehoseFactoryIterator.next().connect(parser, temporaryDirectory); - } - catch (IOException e) { - if (currentFirehose != null) { - try { - currentFirehose.close(); - } - catch (IOException e2) { - log.error(e, "Unable to close currentFirehose!"); - throw new RuntimeException(e2); - } - } - throw new RuntimeException(e); - } - } - } - - @Override - public boolean hasMore() throws IOException - { - return currentFirehose.hasMore(); - } - - @Nullable - @Override - public InputRow nextRow() throws IOException - { - InputRow rv = currentFirehose.nextRow(); - if (!currentFirehose.hasMore()) { - nextFirehose(); - } - return rv; - } - - @Override - public void close() throws IOException - { - currentFirehose.close(); - } - } -} diff --git a/server/src/main/java/org/apache/druid/segment/realtime/firehose/EventReceiverFirehoseFactory.java b/server/src/main/java/org/apache/druid/segment/realtime/firehose/EventReceiverFirehoseFactory.java index 23c0a58e51d4..4fad62a6d462 100644 --- a/server/src/main/java/org/apache/druid/segment/realtime/firehose/EventReceiverFirehoseFactory.java +++ b/server/src/main/java/org/apache/druid/segment/realtime/firehose/EventReceiverFirehoseFactory.java @@ -79,6 +79,7 @@ * Builds firehoses that accept events through the {@link EventReceiver} interface. Can also register these * firehoses with an {@link ServiceAnnouncingChatHandlerProvider}. */ +@Deprecated public class EventReceiverFirehoseFactory implements FirehoseFactory>> { private static final EmittingLogger log = new EmittingLogger(EventReceiverFirehoseFactory.class); diff --git a/server/src/main/java/org/apache/druid/segment/realtime/firehose/HttpFirehoseFactory.java b/server/src/main/java/org/apache/druid/segment/realtime/firehose/HttpFirehoseFactory.java deleted file mode 100644 index bbd797f2d3d1..000000000000 --- a/server/src/main/java/org/apache/druid/segment/realtime/firehose/HttpFirehoseFactory.java +++ /dev/null @@ -1,165 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.segment.realtime.firehose; - -import com.fasterxml.jackson.annotation.JacksonInject; -import com.fasterxml.jackson.annotation.JsonCreator; -import com.fasterxml.jackson.annotation.JsonProperty; -import com.google.common.base.Preconditions; -import com.google.common.base.Predicate; -import org.apache.druid.data.input.FiniteFirehoseFactory; -import org.apache.druid.data.input.InputSplit; -import org.apache.druid.data.input.impl.HttpEntity; -import org.apache.druid.data.input.impl.HttpInputSource; -import org.apache.druid.data.input.impl.HttpInputSourceConfig; -import org.apache.druid.data.input.impl.StringInputRowParser; -import org.apache.druid.data.input.impl.prefetch.PrefetchableTextFilesFirehoseFactory; -import org.apache.druid.metadata.PasswordProvider; -import org.apache.druid.utils.CompressionUtils; - -import javax.annotation.Nullable; -import java.io.IOException; -import java.io.InputStream; -import java.net.URI; -import java.util.Collection; -import java.util.Collections; -import java.util.List; -import java.util.Objects; - -public class HttpFirehoseFactory extends PrefetchableTextFilesFirehoseFactory -{ - private final List uris; - @Nullable - private final String httpAuthenticationUsername; - @Nullable - private final PasswordProvider httpAuthenticationPasswordProvider; - private final HttpInputSourceConfig inputSourceConfig; - - @JsonCreator - public HttpFirehoseFactory( - @JsonProperty("uris") List uris, - @JsonProperty("maxCacheCapacityBytes") Long maxCacheCapacityBytes, - @JsonProperty("maxFetchCapacityBytes") Long maxFetchCapacityBytes, - @JsonProperty("prefetchTriggerBytes") Long prefetchTriggerBytes, - @JsonProperty("fetchTimeout") Long fetchTimeout, - @JsonProperty("maxFetchRetry") Integer maxFetchRetry, - @JsonProperty("httpAuthenticationUsername") @Nullable String httpAuthenticationUsername, - @JsonProperty("httpAuthenticationPassword") @Nullable PasswordProvider httpAuthenticationPasswordProvider, - @JacksonInject HttpInputSourceConfig inputSourceConfig - ) - { - super(maxCacheCapacityBytes, maxFetchCapacityBytes, prefetchTriggerBytes, fetchTimeout, maxFetchRetry); - Preconditions.checkArgument(uris.size() > 0, "Empty URIs"); - HttpInputSource.throwIfInvalidProtocols(inputSourceConfig, uris); - this.uris = uris; - this.httpAuthenticationUsername = httpAuthenticationUsername; - this.httpAuthenticationPasswordProvider = httpAuthenticationPasswordProvider; - this.inputSourceConfig = inputSourceConfig; - } - - @Nullable - @JsonProperty - public String getHttpAuthenticationUsername() - { - return httpAuthenticationUsername; - } - - @Nullable - @JsonProperty("httpAuthenticationPassword") - public PasswordProvider getHttpAuthenticationPasswordProvider() - { - return httpAuthenticationPasswordProvider; - } - - @JsonProperty - public List getUris() - { - return uris; - } - - @Override - protected Collection initObjects() - { - return uris; - } - - @Override - protected InputStream openObjectStream(URI object) throws IOException - { - // A negative start value will ensure no bytes of the InputStream are skipped - return openObjectStream(object, 0); - } - - @Override - protected InputStream openObjectStream(URI object, long start) throws IOException - { - return HttpEntity.openInputStream(object, httpAuthenticationUsername, httpAuthenticationPasswordProvider, start); - } - - @Override - protected InputStream wrapObjectStream(URI object, InputStream stream) throws IOException - { - return CompressionUtils.decompress(stream, object.getPath()); - } - - @Override - public boolean equals(Object o) - { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - HttpFirehoseFactory that = (HttpFirehoseFactory) o; - return uris.equals(that.uris) && - Objects.equals(httpAuthenticationUsername, that.httpAuthenticationUsername) && - Objects.equals(httpAuthenticationPasswordProvider, that.httpAuthenticationPasswordProvider) && - inputSourceConfig.equals(that.inputSourceConfig); - } - - @Override - public int hashCode() - { - return Objects.hash(uris, httpAuthenticationUsername, httpAuthenticationPasswordProvider, inputSourceConfig); - } - - @Override - protected Predicate getRetryCondition() - { - return e -> e instanceof IOException; - } - - @Override - public FiniteFirehoseFactory withSplit(InputSplit split) - { - return new HttpFirehoseFactory( - Collections.singletonList(split.get()), - getMaxCacheCapacityBytes(), - getMaxFetchCapacityBytes(), - getPrefetchTriggerBytes(), - getFetchTimeout(), - getMaxFetchRetry(), - getHttpAuthenticationUsername(), - httpAuthenticationPasswordProvider, - inputSourceConfig - ); - } -} diff --git a/server/src/main/java/org/apache/druid/segment/realtime/firehose/InlineFirehoseFactory.java b/server/src/main/java/org/apache/druid/segment/realtime/firehose/InlineFirehoseFactory.java deleted file mode 100644 index f609cbe87f3f..000000000000 --- a/server/src/main/java/org/apache/druid/segment/realtime/firehose/InlineFirehoseFactory.java +++ /dev/null @@ -1,105 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.segment.realtime.firehose; - -import com.fasterxml.jackson.annotation.JsonCreator; -import com.fasterxml.jackson.annotation.JsonProperty; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; -import org.apache.druid.data.input.FiniteFirehoseFactory; -import org.apache.druid.data.input.Firehose; -import org.apache.druid.data.input.InputSplit; -import org.apache.druid.data.input.SplitHintSpec; -import org.apache.druid.data.input.impl.StringInputRowParser; - -import javax.annotation.Nullable; -import java.io.File; -import java.util.Objects; -import java.util.stream.Stream; - -/** - * Creates firehose that produces data inlined in its own spec - */ -public class InlineFirehoseFactory implements FiniteFirehoseFactory -{ - private final String data; - - @VisibleForTesting - @JsonCreator - public InlineFirehoseFactory(@JsonProperty("data") String data) - { - this.data = Preconditions.checkNotNull(data, "data"); - } - - @JsonProperty - public String getData() - { - return data; - } - - @Override - public Firehose connect(StringInputRowParser parser, @Nullable File temporaryDirectory) - { - return new InlineFirehose(data, parser); - } - - @Override - public boolean equals(Object o) - { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - InlineFirehoseFactory factory = (InlineFirehoseFactory) o; - return data.equals(factory.data); - } - - @Override - public int hashCode() - { - return Objects.hash(data); - } - - @Override - public boolean isSplittable() - { - return false; - } - - @Override - public Stream> getSplits(@Nullable SplitHintSpec splitHintSpec) - { - return Stream.of(new InputSplit<>(data)); - } - - @Override - public int getNumSplits(@Nullable SplitHintSpec splitHintSpec) - { - return 1; - } - - @Override - public FiniteFirehoseFactory withSplit(InputSplit split) - { - return new InlineFirehoseFactory(split.get()); - } -} diff --git a/server/src/main/java/org/apache/druid/segment/realtime/firehose/LocalFirehoseFactory.java b/server/src/main/java/org/apache/druid/segment/realtime/firehose/LocalFirehoseFactory.java deleted file mode 100644 index e11d1967b3be..000000000000 --- a/server/src/main/java/org/apache/druid/segment/realtime/firehose/LocalFirehoseFactory.java +++ /dev/null @@ -1,120 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.segment.realtime.firehose; - -import com.fasterxml.jackson.annotation.JsonCreator; -import com.fasterxml.jackson.annotation.JsonProperty; -import com.google.common.base.Preconditions; -import org.apache.commons.io.FileUtils; -import org.apache.commons.io.filefilter.TrueFileFilter; -import org.apache.commons.io.filefilter.WildcardFileFilter; -import org.apache.druid.data.input.FiniteFirehoseFactory; -import org.apache.druid.data.input.InputSplit; -import org.apache.druid.data.input.impl.AbstractTextFilesFirehoseFactory; -import org.apache.druid.data.input.impl.StringInputRowParser; -import org.apache.druid.utils.CompressionUtils; - -import javax.annotation.Nullable; -import java.io.File; -import java.io.IOException; -import java.io.InputStream; -import java.util.ArrayList; -import java.util.Collection; -import java.util.Comparator; -import java.util.List; - -/** - * Firehose that reads data from files on local disk - */ -public class LocalFirehoseFactory extends AbstractTextFilesFirehoseFactory -{ - private final File baseDir; - private final String filter; - @Nullable - private final StringInputRowParser parser; - - @JsonCreator - public LocalFirehoseFactory( - @JsonProperty("baseDir") File baseDir, - @JsonProperty("filter") String filter, - // Backwards compatible - @Nullable @JsonProperty("parser") StringInputRowParser parser - ) - { - this.baseDir = baseDir; - this.filter = filter; - this.parser = parser; - } - - @JsonProperty - public File getBaseDir() - { - return baseDir; - } - - @JsonProperty - public String getFilter() - { - return filter; - } - - @JsonProperty - @Nullable - public StringInputRowParser getParser() - { - return parser; - } - - @Override - protected Collection initObjects() - { - final Collection files = FileUtils.listFiles( - Preconditions.checkNotNull(this.baseDir, "baseDir").getAbsoluteFile(), - new WildcardFileFilter(filter), - TrueFileFilter.INSTANCE - ); - - // Sort files for consistent ordering from run to run. - final List fileList = files instanceof List ? (List) files : new ArrayList<>(files); - fileList.sort(Comparator.naturalOrder()); - return fileList; - } - - @Override - protected InputStream openObjectStream(File object) throws IOException - { - return FileUtils.openInputStream(object); - } - - @Override - protected InputStream wrapObjectStream(File object, InputStream stream) throws IOException - { - return CompressionUtils.decompress(stream, object.getPath()); - } - - @Override - public FiniteFirehoseFactory withSplit(InputSplit split) - { - final File newFile = split.get(); - final File baseDir = newFile.getParentFile(); - final String filter = newFile.getName(); - return new LocalFirehoseFactory(baseDir, filter, parser); - } -} diff --git a/server/src/main/java/org/apache/druid/segment/realtime/firehose/PrefetchSqlFirehoseFactory.java b/server/src/main/java/org/apache/druid/segment/realtime/firehose/PrefetchSqlFirehoseFactory.java deleted file mode 100644 index 683fef38a126..000000000000 --- a/server/src/main/java/org/apache/druid/segment/realtime/firehose/PrefetchSqlFirehoseFactory.java +++ /dev/null @@ -1,280 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.segment.realtime.firehose; - -import com.fasterxml.jackson.annotation.JsonProperty; -import com.fasterxml.jackson.core.type.TypeReference; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.common.base.Preconditions; -import com.google.common.collect.ImmutableList; -import org.apache.commons.io.LineIterator; -import org.apache.druid.data.input.FiniteFirehoseFactory; -import org.apache.druid.data.input.Firehose; -import org.apache.druid.data.input.InputSplit; -import org.apache.druid.data.input.SplitHintSpec; -import org.apache.druid.data.input.impl.InputRowParser; -import org.apache.druid.data.input.impl.prefetch.CacheManager; -import org.apache.druid.data.input.impl.prefetch.FetchConfig; -import org.apache.druid.data.input.impl.prefetch.Fetcher; -import org.apache.druid.data.input.impl.prefetch.JsonIterator; -import org.apache.druid.data.input.impl.prefetch.ObjectOpenFunction; -import org.apache.druid.data.input.impl.prefetch.OpenObject; -import org.apache.druid.java.util.common.ISE; -import org.apache.druid.java.util.common.concurrent.Execs; -import org.apache.druid.java.util.common.logger.Logger; - -import javax.annotation.Nullable; -import java.io.File; -import java.io.IOException; -import java.io.InputStream; -import java.util.Collection; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.NoSuchElementException; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.TimeUnit; -import java.util.stream.Stream; - -/** - * PrefetchSqlFirehoseFactory is an abstract firehose factory for reading prefetched sql resultset data. Regardless - * of whether prefetching is enabled or not, for each sql object the entire result set is fetched into a file in the local disk. - * This class defines prefetching as caching the resultsets into local disk in case multiple sql queries are present. - * When prefetching is enabled, the following functionalities are provided: - *

- *

- * - Caching: for the first call of {@link #connect(InputRowParser, File)}, it caches objects in a local disk - * up to maxCacheCapacityBytes. These caches are NOT deleted until the process terminates, and thus can be used for - * future reads. - *
- * - Fetching: when it reads all cached data, it fetches remaining objects into a local disk and reads data from - * them. For the performance reason, prefetch technique is used, that is, when the size of remaining fetched data is - * smaller than {@link FetchConfig#prefetchTriggerBytes}, a background prefetch thread automatically starts to fetch remaining - * objects. - *
- *

- *

- * This implementation aims to avoid maintaining a persistent connection to the database by prefetching the resultset into disk. - *
- * Prefetching can be turned on/off by setting maxFetchCapacityBytes. Depending on prefetching is enabled or - * disabled, the behavior of the firehose is different like below. - *

- *

- * 1. If prefetch is enabled this firehose can fetch input objects in background. - *
- * 2. When next() is called, it first checks that there are already fetched files in local storage. - *
- * 2.1 If exists, it simply chooses a fetched file and returns a {@link LineIterator} reading that file. - *
- * 2.2 If there is no fetched files in local storage but some objects are still remained to be read, the firehose - * fetches one of input objects in background immediately. Finally, the firehose returns an iterator of {@link JsonIterator} - * for deserializing the saved resultset. - *
- * 3. If prefetch is disabled, the firehose saves the resultset to file and returns an iterator of {@link JsonIterator} - * which directly reads the stream opened by {@link #openObjectStream}. If there is an IOException, it will throw it - * and the read will fail. - */ -public abstract class PrefetchSqlFirehoseFactory - implements FiniteFirehoseFactory>, T> -{ - private static final Logger LOG = new Logger(PrefetchSqlFirehoseFactory.class); - - private final FetchConfig fetchConfig; - private final CacheManager cacheManager; - private List objects; - private ObjectMapper objectMapper; - - - public PrefetchSqlFirehoseFactory( - Long maxCacheCapacityBytes, - Long maxFetchCapacityBytes, - Long prefetchTriggerBytes, - Long fetchTimeout, - ObjectMapper objectMapper - ) - { - this.fetchConfig = new FetchConfig( - maxCacheCapacityBytes, - maxFetchCapacityBytes, - prefetchTriggerBytes, - fetchTimeout, - 0 - ); - this.cacheManager = new CacheManager<>( - fetchConfig.getMaxCacheCapacityBytes() - ); - this.objectMapper = objectMapper; - } - - @JsonProperty - public long getMaxCacheCapacityBytes() - { - return cacheManager.getMaxCacheCapacityBytes(); - } - - @JsonProperty - public long getMaxFetchCapacityBytes() - { - return fetchConfig.getMaxFetchCapacityBytes(); - } - - @JsonProperty - public long getPrefetchTriggerBytes() - { - return fetchConfig.getPrefetchTriggerBytes(); - } - - @JsonProperty - public long getFetchTimeout() - { - return fetchConfig.getFetchTimeout(); - } - - @Override - public Firehose connect(InputRowParser> firehoseParser, @Nullable File temporaryDirectory) - { - if (objects == null) { - objects = ImmutableList.copyOf(Preconditions.checkNotNull(initObjects(), "objects")); - } - if (cacheManager.isEnabled() || fetchConfig.getMaxFetchCapacityBytes() > 0) { - Preconditions.checkNotNull(temporaryDirectory, "temporaryDirectory"); - Preconditions.checkArgument( - temporaryDirectory.exists(), - "temporaryDirectory[%s] does not exist", - temporaryDirectory - ); - Preconditions.checkArgument( - temporaryDirectory.isDirectory(), - "temporaryDirectory[%s] is not a directory", - temporaryDirectory - ); - } - - LOG.info("Create a new firehose for [%d] queries", objects.size()); - - // fetchExecutor is responsible for background data fetching - final ExecutorService fetchExecutor = Execs.singleThreaded("firehose_fetch_%d"); - final Fetcher fetcher = new SqlFetcher<>( - cacheManager, - objects, - fetchExecutor, - temporaryDirectory, - fetchConfig, - new ObjectOpenFunction() - { - @Override - public InputStream open(T object, File outFile) throws IOException - { - return openObjectStream(object, outFile); - } - - @Override - public InputStream open(T object) throws IOException - { - final File outFile = File.createTempFile("sqlresults_", null, temporaryDirectory); - return openObjectStream(object, outFile); - } - } - ); - - return new SqlFirehose( - new Iterator>>() - { - @Override - public boolean hasNext() - { - return fetcher.hasNext(); - } - - @Override - public JsonIterator> next() - { - if (!hasNext()) { - throw new NoSuchElementException(); - } - try { - TypeReference> type = new TypeReference>() - { - }; - final OpenObject openObject = fetcher.next(); - final InputStream stream = openObject.getObjectStream(); - return new JsonIterator<>(type, stream, openObject.getResourceCloser(), objectMapper); - } - catch (Exception ioe) { - throw new RuntimeException(ioe); - } - } - }, - firehoseParser, - () -> { - fetchExecutor.shutdownNow(); - try { - Preconditions.checkState(fetchExecutor.awaitTermination( - fetchConfig.getFetchTimeout(), - TimeUnit.MILLISECONDS - )); - } - catch (InterruptedException e) { - Thread.currentThread().interrupt(); - throw new ISE("Failed to shutdown fetch executor during close"); - } - } - ); - } - - protected void initializeObjectsIfNeeded() - { - if (objects == null) { - objects = ImmutableList.copyOf(Preconditions.checkNotNull(initObjects(), "initObjects")); - } - } - - public List getObjects() - { - return objects; - } - - @Override - public Stream> getSplits(@Nullable SplitHintSpec splitHintSpec) - { - initializeObjectsIfNeeded(); - return getObjects().stream().map(InputSplit::new); - } - - @Override - public int getNumSplits(@Nullable SplitHintSpec splitHintSpec) - { - initializeObjectsIfNeeded(); - return getObjects().size(); - } - - /** - * Open an input stream from the given object. The object is fetched into the file and an input - * stream to the file is provided. - * - * @param object an object to be read - * @param filename file to which the object is fetched into - * - * @return an input stream to the file - */ - protected abstract InputStream openObjectStream(T object, File filename) throws IOException; - - protected abstract Collection initObjects(); -} diff --git a/server/src/main/java/org/apache/druid/segment/realtime/firehose/SqlFetcher.java b/server/src/main/java/org/apache/druid/segment/realtime/firehose/SqlFetcher.java deleted file mode 100644 index 63385f7fdbb3..000000000000 --- a/server/src/main/java/org/apache/druid/segment/realtime/firehose/SqlFetcher.java +++ /dev/null @@ -1,101 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.segment.realtime.firehose; - -import org.apache.druid.data.input.impl.prefetch.CacheManager; -import org.apache.druid.data.input.impl.prefetch.FetchConfig; -import org.apache.druid.data.input.impl.prefetch.Fetcher; -import org.apache.druid.data.input.impl.prefetch.ObjectOpenFunction; -import org.apache.druid.data.input.impl.prefetch.OpenObject; - -import javax.annotation.Nullable; -import java.io.File; -import java.io.IOException; -import java.util.List; -import java.util.concurrent.ExecutorService; - - -/** - * A file fetcher used by {@link PrefetchSqlFirehoseFactory}. - * See the javadoc of {@link PrefetchSqlFirehoseFactory} for more details. - */ -public class SqlFetcher extends Fetcher -{ - private static final String FETCH_FILE_PREFIX = "sqlfetch-"; - - @Nullable - private final File temporaryDirectory; - - private final ObjectOpenFunction openObjectFunction; - - SqlFetcher( - CacheManager cacheManager, - List objects, - ExecutorService fetchExecutor, - @Nullable File temporaryDirectory, - FetchConfig fetchConfig, - ObjectOpenFunction openObjectFunction - ) - { - - super( - cacheManager, - objects, - fetchExecutor, - temporaryDirectory, - fetchConfig - ); - this.temporaryDirectory = temporaryDirectory; - this.openObjectFunction = openObjectFunction; - } - - /** - * Downloads the entire resultset object into a file. This avoids maintaining a - * persistent connection to the database. The retry is performed at the query execution layer. - * - * @param object sql query for which the resultset is to be downloaded - * @param outFile a file which the object data is stored - * - * @return size of downloaded resultset - */ - - @Override - protected long download(T object, File outFile) throws IOException - { - openObjectFunction.open(object, outFile); - return outFile.length(); - } - - /** - * Generates an instance of {@link OpenObject} for the given object. This is usually called - * when prefetching is disabled. The retry is performed at the query execution layer. - */ - - @Override - protected OpenObject generateOpenObject(T object) throws IOException - { - final File outFile = File.createTempFile(FETCH_FILE_PREFIX, null, temporaryDirectory); - return new OpenObject<>( - object, - openObjectFunction.open(object, outFile), - outFile::delete - ); - } -} diff --git a/server/src/main/java/org/apache/druid/segment/realtime/firehose/SqlFirehoseFactory.java b/server/src/main/java/org/apache/druid/segment/realtime/firehose/SqlFirehoseFactory.java deleted file mode 100644 index 0b5863d671d6..000000000000 --- a/server/src/main/java/org/apache/druid/segment/realtime/firehose/SqlFirehoseFactory.java +++ /dev/null @@ -1,115 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.segment.realtime.firehose; - -import com.fasterxml.jackson.annotation.JacksonInject; -import com.fasterxml.jackson.annotation.JsonCreator; -import com.fasterxml.jackson.annotation.JsonProperty; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.common.base.Preconditions; -import org.apache.druid.data.input.FiniteFirehoseFactory; -import org.apache.druid.data.input.InputSplit; -import org.apache.druid.data.input.impl.InputRowParser; -import org.apache.druid.guice.annotations.Smile; -import org.apache.druid.metadata.MetadataStorageConnectorConfig; -import org.apache.druid.metadata.SQLFirehoseDatabaseConnector; -import org.apache.druid.metadata.input.SqlEntity; - -import javax.annotation.Nullable; -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.io.InputStream; -import java.util.Collection; -import java.util.Collections; -import java.util.List; -import java.util.Map; - -public class SqlFirehoseFactory extends PrefetchSqlFirehoseFactory -{ - @JsonProperty - private final List sqls; - @Nullable - @JsonProperty - private final MetadataStorageConnectorConfig connectorConfig; - private final ObjectMapper objectMapper; - @JsonProperty - private final SQLFirehoseDatabaseConnector sqlFirehoseDatabaseConnector; - private final boolean foldCase; - - @JsonCreator - public SqlFirehoseFactory( - @JsonProperty("sqls") List sqls, - @JsonProperty("maxCacheCapacityBytes") Long maxCacheCapacityBytes, - @JsonProperty("maxFetchCapacityBytes") Long maxFetchCapacityBytes, - @JsonProperty("prefetchTriggerBytes") Long prefetchTriggerBytes, - @JsonProperty("fetchTimeout") Long fetchTimeout, - @JsonProperty("foldCase") boolean foldCase, - @JsonProperty("database") SQLFirehoseDatabaseConnector sqlFirehoseDatabaseConnector, - @JacksonInject @Smile ObjectMapper objectMapper - ) - { - super( - maxCacheCapacityBytes, - maxFetchCapacityBytes, - prefetchTriggerBytes, - fetchTimeout, - objectMapper - ); - Preconditions.checkArgument(sqls.size() > 0, "No SQL queries provided"); - - this.sqls = sqls; - this.objectMapper = objectMapper; - this.sqlFirehoseDatabaseConnector = Preconditions.checkNotNull( - sqlFirehoseDatabaseConnector, - "SQL Metadata Connector not configured!" - ); - this.foldCase = foldCase; - this.connectorConfig = null; - } - - @Override - protected InputStream openObjectStream(String sql, File fileName) throws IOException - { - SqlEntity.openCleanableFile(sql, sqlFirehoseDatabaseConnector, objectMapper, foldCase, fileName); - return new FileInputStream(fileName); - } - - @Override - protected Collection initObjects() - { - return sqls; - } - - @Override - public FiniteFirehoseFactory>, String> withSplit(InputSplit split) - { - return new SqlFirehoseFactory( - Collections.singletonList(split.get()), - getMaxCacheCapacityBytes(), - getMaxFetchCapacityBytes(), - getPrefetchTriggerBytes(), - getFetchTimeout(), - foldCase, - sqlFirehoseDatabaseConnector, - objectMapper - ); - } -} diff --git a/server/src/test/java/org/apache/druid/segment/realtime/firehose/CombiningFirehoseFactoryTest.java b/server/src/test/java/org/apache/druid/segment/realtime/firehose/CombiningFirehoseFactoryTest.java deleted file mode 100644 index 14114701f2ec..000000000000 --- a/server/src/test/java/org/apache/druid/segment/realtime/firehose/CombiningFirehoseFactoryTest.java +++ /dev/null @@ -1,174 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.segment.realtime.firehose; - -import org.apache.druid.data.input.FiniteFirehoseFactory; -import org.apache.druid.data.input.Firehose; -import org.apache.druid.data.input.FirehoseFactory; -import org.apache.druid.data.input.InputRow; -import org.apache.druid.data.input.InputSplit; -import org.apache.druid.data.input.Row; -import org.apache.druid.data.input.impl.InputRowParser; -import org.apache.druid.java.util.common.DateTimes; -import org.apache.druid.java.util.common.parsers.ParseException; -import org.joda.time.DateTime; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; - -import javax.annotation.Nullable; -import java.io.File; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.Iterator; -import java.util.List; -import java.util.Optional; - -public class CombiningFirehoseFactoryTest -{ - private CombiningFirehoseFactory combiningFirehoseFactory; - private List delegateFirehoses; - - @Before - public void setUp() - { - delegateFirehoses = Arrays.asList( - new ListFirehoseFactory(Arrays.asList(makeRow(1, 1), makeRow(2, 2))), - new ListFirehoseFactory(Arrays.asList(makeRow(3, 3), makeRow(4, 4), makeRow(5, 5))) - ); - combiningFirehoseFactory = new CombiningFirehoseFactory(delegateFirehoses); - } - - @Test - public void testCombiningfirehose() throws IOException - { - final Firehose firehose = combiningFirehoseFactory.connect(null, null); - for (int i = 1; i < 6; i++) { - Assert.assertTrue(firehose.hasMore()); - final InputRow inputRow = firehose.nextRow(); - Assert.assertEquals(i, inputRow.getTimestampFromEpoch()); - Assert.assertEquals(i, inputRow.getMetric("test").floatValue(), 0); - } - Assert.assertFalse(firehose.hasMore()); - } - - @Test - public void testFirehoseNotParallelizable() - { - Optional>> maybeFirehoseWithSplit = combiningFirehoseFactory.getSplits(null) - .findFirst(); - - Assert.assertTrue(maybeFirehoseWithSplit.isPresent()); - FiniteFirehoseFactory> firehoseWithSplit = combiningFirehoseFactory.withSplit( - maybeFirehoseWithSplit.get()); - Assert.assertTrue(firehoseWithSplit instanceof CombiningFirehoseFactory); - Assert.assertFalse(combiningFirehoseFactory.isSplittable()); - Assert.assertEquals(delegateFirehoses, ((CombiningFirehoseFactory) firehoseWithSplit).getDelegateFactoryList()); - } - - - private InputRow makeRow(final long timestamp, final float metricValue) - { - return new InputRow() - { - @Override - public List getDimensions() - { - return Collections.singletonList("testDim"); - } - - @Override - public long getTimestampFromEpoch() - { - return timestamp; - } - - @Override - public DateTime getTimestamp() - { - return DateTimes.utc(timestamp); - } - - @Override - public List getDimension(String dimension) - { - return new ArrayList<>(); - } - - @Override - public Number getMetric(String metric) - { - return metricValue; - } - - @Override - public Object getRaw(String dimension) - { - return null; - } - - @Override - public int compareTo(Row o) - { - return 0; - } - }; - } - - public static class ListFirehoseFactory implements FirehoseFactory - { - private final List rows; - - ListFirehoseFactory(List rows) - { - this.rows = rows; - } - - @Override - public Firehose connect(InputRowParser inputRowParser, File temporaryDirectory) throws ParseException - { - final Iterator iterator = rows.iterator(); - return new Firehose() - { - @Override - public boolean hasMore() - { - return iterator.hasNext(); - } - - @Nullable - @Override - public InputRow nextRow() - { - return iterator.next(); - } - - @Override - public void close() - { - // Do nothing - } - }; - } - - } -} diff --git a/server/src/test/java/org/apache/druid/segment/realtime/firehose/HttpFirehoseFactoryTest.java b/server/src/test/java/org/apache/druid/segment/realtime/firehose/HttpFirehoseFactoryTest.java deleted file mode 100644 index 867c49778d59..000000000000 --- a/server/src/test/java/org/apache/druid/segment/realtime/firehose/HttpFirehoseFactoryTest.java +++ /dev/null @@ -1,145 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.segment.realtime.firehose; - -import com.fasterxml.jackson.databind.InjectableValues.Std; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableSet; -import org.apache.druid.data.input.impl.HttpInputSourceConfig; -import org.apache.druid.jackson.DefaultObjectMapper; -import org.apache.druid.metadata.DefaultPasswordProvider; -import org.junit.Assert; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.ExpectedException; - -import java.io.IOException; -import java.net.URI; - -public class HttpFirehoseFactoryTest -{ - @Rule - public ExpectedException expectedException = ExpectedException.none(); - - @Test - public void testSerde() throws IOException - { - final HttpInputSourceConfig inputSourceConfig = new HttpInputSourceConfig(null); - final ObjectMapper mapper = new DefaultObjectMapper(); - mapper.setInjectableValues(new Std().addValue( - HttpInputSourceConfig.class, - inputSourceConfig - )); - - final DefaultPasswordProvider pwProvider = new DefaultPasswordProvider("testPassword"); - final HttpFirehoseFactory factory = new HttpFirehoseFactory( - ImmutableList.of(URI.create("http://foo/bar"), URI.create("http://foo/bar2")), - 2048L, - 1024L, - 512L, - 100L, - 5, - "testUser", - pwProvider, - inputSourceConfig - ); - - final HttpFirehoseFactory outputFact = mapper.readValue( - mapper.writeValueAsString(factory), - HttpFirehoseFactory.class - ); - - Assert.assertEquals(factory, outputFact); - } - - @Test - public void testConstructorAllowsOnlyDefaultProtocols() - { - new HttpFirehoseFactory( - ImmutableList.of(URI.create("http:///")), - null, - null, - null, - null, - null, - null, - null, - new HttpInputSourceConfig(null) - ); - - new HttpFirehoseFactory( - ImmutableList.of(URI.create("https:///")), - null, - null, - null, - null, - null, - null, - null, - new HttpInputSourceConfig(null) - ); - - expectedException.expect(IllegalArgumentException.class); - expectedException.expectMessage("Only [http, https] protocols are allowed"); - new HttpFirehoseFactory( - ImmutableList.of(URI.create("my-protocol:///")), - null, - null, - null, - null, - null, - null, - null, - new HttpInputSourceConfig(null) - ); - } - - @Test - public void testConstructorAllowsOnlyCustomProtocols() - { - final HttpInputSourceConfig customConfig = new HttpInputSourceConfig(ImmutableSet.of("druid")); - new HttpFirehoseFactory( - ImmutableList.of(URI.create("druid:///")), - null, - null, - null, - null, - null, - null, - null, - customConfig - ); - - expectedException.expect(IllegalArgumentException.class); - expectedException.expectMessage("Only [druid] protocols are allowed"); - new HttpFirehoseFactory( - ImmutableList.of(URI.create("https:///")), - null, - null, - null, - null, - null, - null, - null, - customConfig - ); - } -} diff --git a/server/src/test/java/org/apache/druid/segment/realtime/firehose/InlineFirehoseFactoryTest.java b/server/src/test/java/org/apache/druid/segment/realtime/firehose/InlineFirehoseFactoryTest.java deleted file mode 100644 index 053811e452bc..000000000000 --- a/server/src/test/java/org/apache/druid/segment/realtime/firehose/InlineFirehoseFactoryTest.java +++ /dev/null @@ -1,129 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.segment.realtime.firehose; - -import com.fasterxml.jackson.databind.ObjectMapper; -import org.apache.druid.data.input.FiniteFirehoseFactory; -import org.apache.druid.data.input.Firehose; -import org.apache.druid.data.input.InputRow; -import org.apache.druid.data.input.InputSplit; -import org.apache.druid.data.input.impl.CSVParseSpec; -import org.apache.druid.data.input.impl.DimensionsSpec; -import org.apache.druid.data.input.impl.StringInputRowParser; -import org.apache.druid.data.input.impl.TimestampSpec; -import org.apache.druid.jackson.DefaultObjectMapper; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; - -import java.io.File; -import java.io.IOException; -import java.nio.charset.StandardCharsets; -import java.util.Arrays; -import java.util.List; -import java.util.Optional; - -@SuppressWarnings({"NullableProblems", "ConstantConditions"}) -public class InlineFirehoseFactoryTest -{ - private static final String DIMENSION_0 = "timestamp"; - private static final String DIMENSION_1 = "value"; - private static final List DIMENSIONS = Arrays.asList(DIMENSION_0, DIMENSION_1); - private static final String DELIMITER = ","; - private static final StringInputRowParser PARSER = new StringInputRowParser( - new CSVParseSpec( - new TimestampSpec( - DIMENSION_0, - "auto", - null - ), - new DimensionsSpec(DimensionsSpec.getDefaultSchemas(DIMENSIONS)), - DELIMITER, - DIMENSIONS, - false, - 0 - ), - StandardCharsets.UTF_8.name() - ); - private static final File NO_TEMP_DIR = null; - private static final String TIMESTAMP = "0"; - private static final String VALUE = "a"; - private static final String DATA = TIMESTAMP + DELIMITER + VALUE; - - private InlineFirehoseFactory target; - - @Before - public void setUp() - { - target = new InlineFirehoseFactory(DATA); - } - - @Test - public void testInterfaceImplementation() - { - Assert.assertTrue(target instanceof FiniteFirehoseFactory); - Assert.assertFalse(target.isSplittable()); - Assert.assertEquals(1, target.getNumSplits(null)); - } - - @Test(expected = NullPointerException.class) - public void testContstructorDataRequired() - { - new InlineFirehoseFactory(null); - } - - @Test - public void testGetData() - { - Assert.assertEquals(DATA, target.getData()); - } - - @Test - public void testConnect() throws IOException - { - Firehose firehose = target.connect(PARSER, NO_TEMP_DIR); - InputRow row = firehose.nextRow(); - Assert.assertNotNull(row); - List values = row.getDimension(DIMENSION_1); - Assert.assertNotNull(values); - Assert.assertEquals(1, values.size()); - Assert.assertEquals(VALUE, values.get(0)); - } - - @Test - public void testForcedSplitAndClone() - { - Optional> inputSplitOptional = target.getSplits(null).findFirst(); - Assert.assertTrue(inputSplitOptional.isPresent()); - FiniteFirehoseFactory cloneWithSplit = target.withSplit(inputSplitOptional.get()); - Assert.assertTrue(cloneWithSplit instanceof InlineFirehoseFactory); - Assert.assertEquals(DATA, ((InlineFirehoseFactory) cloneWithSplit).getData()); - } - - @Test - public void testSerde() throws IOException - { - final ObjectMapper objectMapper = new DefaultObjectMapper(); - InlineFirehoseFactory factory = new InlineFirehoseFactory(DATA); - String serialized = objectMapper.writeValueAsString(factory); - InlineFirehoseFactory deserialized = objectMapper.readValue(serialized, InlineFirehoseFactory.class); - Assert.assertEquals(factory, deserialized); - } -} diff --git a/server/src/test/java/org/apache/druid/segment/realtime/firehose/LocalFirehoseFactoryTest.java b/server/src/test/java/org/apache/druid/segment/realtime/firehose/LocalFirehoseFactoryTest.java deleted file mode 100644 index 2e4a820c1b87..000000000000 --- a/server/src/test/java/org/apache/druid/segment/realtime/firehose/LocalFirehoseFactoryTest.java +++ /dev/null @@ -1,105 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.segment.realtime.firehose; - -import org.apache.druid.data.input.Firehose; -import org.apache.druid.data.input.Row; -import org.apache.druid.data.input.impl.CSVParseSpec; -import org.apache.druid.data.input.impl.DimensionsSpec; -import org.apache.druid.data.input.impl.StringInputRowParser; -import org.apache.druid.data.input.impl.TimestampSpec; -import org.apache.druid.testing.InitializedNullHandlingTest; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; - -import java.io.IOException; -import java.io.Writer; -import java.nio.charset.StandardCharsets; -import java.nio.file.Files; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Comparator; -import java.util.List; - -public class LocalFirehoseFactoryTest extends InitializedNullHandlingTest -{ - @Rule - public final TemporaryFolder temporaryFolder = new TemporaryFolder(); - - private LocalFirehoseFactory factory; - - @Before - public void setup() throws IOException - { - for (int i = 0; i < 5; i++) { - try (final Writer writer = - Files.newBufferedWriter(temporaryFolder.newFile("test_" + i).toPath(), StandardCharsets.UTF_8)) { - writer.write((20171225 + i) + "," + i + "th test file\n"); - } - } - - for (int i = 0; i < 5; i++) { - try (final Writer writer = - Files.newBufferedWriter(temporaryFolder.newFile("filtered_" + i).toPath(), StandardCharsets.UTF_8)) { - writer.write((20171225 + i) + "," + i + "th filtered file\n"); - } - } - - factory = new LocalFirehoseFactory(temporaryFolder.getRoot(), "test_*", null); - } - - @Test - public void testConnect() throws IOException - { - try (final Firehose firehose = factory.connect(new StringInputRowParser( - new CSVParseSpec( - new TimestampSpec( - "timestamp", - "auto", - null - ), - new DimensionsSpec( - DimensionsSpec.getDefaultSchemas(Arrays.asList("timestamp", "a")) - ), - ",", - Arrays.asList("timestamp", "a"), - false, - 0 - ), - StandardCharsets.UTF_8.name() - ), null)) { - final List rows = new ArrayList<>(); - while (firehose.hasMore()) { - rows.add(firehose.nextRow()); - } - - Assert.assertEquals(5, rows.size()); - rows.sort(Comparator.comparing(Row::getTimestamp)); - for (int i = 0; i < 5; i++) { - final List dimVals = rows.get(i).getDimension("a"); - Assert.assertEquals(1, dimVals.size()); - Assert.assertEquals(i + "th test file", dimVals.get(0)); - } - } - } -} diff --git a/server/src/test/java/org/apache/druid/segment/realtime/firehose/SqlFirehoseFactoryTest.java b/server/src/test/java/org/apache/druid/segment/realtime/firehose/SqlFirehoseFactoryTest.java deleted file mode 100644 index bd92c16c96b9..000000000000 --- a/server/src/test/java/org/apache/druid/segment/realtime/firehose/SqlFirehoseFactoryTest.java +++ /dev/null @@ -1,233 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.segment.realtime.firehose; - -import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.common.collect.ImmutableList; -import org.apache.druid.data.input.Firehose; -import org.apache.druid.data.input.Row; -import org.apache.druid.data.input.impl.DimensionsSpec; -import org.apache.druid.data.input.impl.InputRowParser; -import org.apache.druid.data.input.impl.MapInputRowParser; -import org.apache.druid.data.input.impl.TimeAndDimsParseSpec; -import org.apache.druid.data.input.impl.TimestampSpec; -import org.apache.druid.java.util.common.FileUtils; -import org.apache.druid.java.util.common.StringUtils; -import org.apache.druid.metadata.TestDerbyConnector; -import org.apache.druid.metadata.input.SqlTestUtils; -import org.apache.druid.segment.TestHelper; -import org.apache.druid.segment.transform.TransformSpec; -import org.junit.AfterClass; -import org.junit.Assert; -import org.junit.BeforeClass; -import org.junit.Rule; -import org.junit.Test; - -import java.io.File; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Comparator; -import java.util.List; - -public class SqlFirehoseFactoryTest -{ - private static final List FIREHOSE_TMP_DIRS = new ArrayList<>(); - private static File TEST_DIR; - private final String TABLE_NAME_1 = "FOOS_TABLE_1"; - private final String TABLE_NAME_2 = "FOOS_TABLE_2"; - - private final List SQLLIST1 = ImmutableList.of("SELECT timestamp,a,b FROM FOOS_TABLE_1"); - private final List SQLLIST2 = ImmutableList.of( - "SELECT timestamp,a,b FROM FOOS_TABLE_1", - "SELECT timestamp,a,b FROM FOOS_TABLE_2" - ); - - @Rule - public final TestDerbyConnector.DerbyConnectorRule derbyConnectorRule = new TestDerbyConnector.DerbyConnectorRule(); - private final ObjectMapper mapper = TestHelper.makeSmileMapper(); - - private final InputRowParser parser = TransformSpec.NONE.decorate( - new MapInputRowParser( - new TimeAndDimsParseSpec( - new TimestampSpec("timestamp", "auto", null), - new DimensionsSpec( - DimensionsSpec.getDefaultSchemas(Arrays.asList("timestamp", "a", "b")) - ) - ) - ) - ); - private TestDerbyConnector derbyConnector; - - @BeforeClass - public static void setup() throws IOException - { - TEST_DIR = File.createTempFile(SqlFirehoseFactoryTest.class.getSimpleName(), "testDir"); - org.apache.commons.io.FileUtils.forceDelete(TEST_DIR); - FileUtils.mkdirp(TEST_DIR); - } - - @AfterClass - public static void teardown() throws IOException - { - org.apache.commons.io.FileUtils.forceDelete(TEST_DIR); - for (File dir : FIREHOSE_TMP_DIRS) { - org.apache.commons.io.FileUtils.forceDelete(dir); - } - } - - private void assertResult(List rows, List sqls) - { - Assert.assertEquals(10 * sqls.size(), rows.size()); - rows.sort(Comparator.comparing(Row::getTimestamp) - .thenComparingInt(r -> Integer.valueOf(r.getDimension("a").get(0))) - .thenComparingInt(r -> Integer.valueOf(r.getDimension("b").get(0)))); - int rowCount = 0; - for (int i = 0; i < 10; i++) { - for (int j = 0; j < sqls.size(); j++) { - final Row row = rows.get(rowCount); - String timestampSt = StringUtils.format("2011-01-12T00:0%s:00.000Z", i); - Assert.assertEquals(timestampSt, row.getTimestamp().toString()); - Assert.assertEquals(i, Integer.valueOf(row.getDimension("a").get(0)).intValue()); - Assert.assertEquals(i, Integer.valueOf(row.getDimension("b").get(0)).intValue()); - rowCount++; - } - } - } - - private void assertNumRemainingCacheFiles(File firehoseTmpDir, int expectedNumFiles) - { - final String[] files = firehoseTmpDir.list(); - Assert.assertNotNull(files); - Assert.assertEquals(expectedNumFiles, files.length); - } - - private File createFirehoseTmpDir(String dirSuffix) throws IOException - { - final File firehoseTempDir = File.createTempFile( - SqlFirehoseFactoryTest.class.getSimpleName(), - dirSuffix - ); - org.apache.commons.io.FileUtils.forceDelete(firehoseTempDir); - FileUtils.mkdirp(firehoseTempDir); - FIREHOSE_TMP_DIRS.add(firehoseTempDir); - return firehoseTempDir; - } - - @Test - public void testWithoutCacheAndFetch() throws Exception - { - derbyConnector = derbyConnectorRule.getConnector(); - SqlTestUtils testUtils = new SqlTestUtils(derbyConnector); - testUtils.createAndUpdateTable(TABLE_NAME_1, 10); - final SqlFirehoseFactory factory = - new SqlFirehoseFactory( - SQLLIST1, - 0L, - 0L, - 0L, - 0L, - true, - testUtils.getDerbyFirehoseConnector(), - mapper - ); - - final List rows = new ArrayList<>(); - final File firehoseTmpDir = createFirehoseTmpDir("testWithoutCacheAndFetch"); - try (Firehose firehose = factory.connect(parser, firehoseTmpDir)) { - while (firehose.hasMore()) { - rows.add(firehose.nextRow()); - } - } - - assertResult(rows, SQLLIST1); - assertNumRemainingCacheFiles(firehoseTmpDir, 0); - testUtils.dropTable(TABLE_NAME_1); - } - - - @Test - public void testWithoutCache() throws IOException - { - derbyConnector = derbyConnectorRule.getConnector(); - SqlTestUtils testUtils = new SqlTestUtils(derbyConnector); - testUtils.createAndUpdateTable(TABLE_NAME_1, 10); - final SqlFirehoseFactory factory = - new SqlFirehoseFactory( - SQLLIST1, - 0L, - null, - null, - null, - true, - testUtils.getDerbyFirehoseConnector(), - mapper - ); - - - final List rows = new ArrayList<>(); - final File firehoseTmpDir = createFirehoseTmpDir("testWithoutCache"); - try (Firehose firehose = factory.connect(parser, firehoseTmpDir)) { - while (firehose.hasMore()) { - rows.add(firehose.nextRow()); - } - } - - assertResult(rows, SQLLIST1); - assertNumRemainingCacheFiles(firehoseTmpDir, 0); - testUtils.dropTable(TABLE_NAME_1); - } - - - @Test - public void testWithCacheAndFetch() throws IOException - { - derbyConnector = derbyConnectorRule.getConnector(); - SqlTestUtils testUtils = new SqlTestUtils(derbyConnector); - testUtils.createAndUpdateTable(TABLE_NAME_1, 10); - testUtils.createAndUpdateTable(TABLE_NAME_2, 10); - - final SqlFirehoseFactory factory = new - SqlFirehoseFactory( - SQLLIST2, - null, - null, - 0L, - null, - true, - testUtils.getDerbyFirehoseConnector(), - mapper - ); - - final List rows = new ArrayList<>(); - final File firehoseTmpDir = createFirehoseTmpDir("testWithCacheAndFetch"); - try (Firehose firehose = factory.connect(parser, firehoseTmpDir)) { - while (firehose.hasMore()) { - rows.add(firehose.nextRow()); - } - } - - assertResult(rows, SQLLIST2); - assertNumRemainingCacheFiles(firehoseTmpDir, 2); - testUtils.dropTable(TABLE_NAME_1); - testUtils.dropTable(TABLE_NAME_2); - - } -} diff --git a/services/src/test/java/org/apache/druid/cli/validate/DruidJsonValidatorTest.java b/services/src/test/java/org/apache/druid/cli/validate/DruidJsonValidatorTest.java index 1d309e0925cb..5f792b892b2b 100644 --- a/services/src/test/java/org/apache/druid/cli/validate/DruidJsonValidatorTest.java +++ b/services/src/test/java/org/apache/druid/cli/validate/DruidJsonValidatorTest.java @@ -19,26 +19,21 @@ package org.apache.druid.cli.validate; -import com.fasterxml.jackson.databind.Module; import com.fasterxml.jackson.databind.ObjectMapper; import com.github.rvesse.airline.Cli; import com.google.inject.Injector; -import org.apache.druid.guice.FirehoseModule; +import org.apache.druid.data.input.impl.JsonInputFormat; +import org.apache.druid.data.input.impl.LocalInputSource; import org.apache.druid.guice.GuiceInjectors; -import org.apache.druid.indexing.common.task.RealtimeIndexTask; +import org.apache.druid.indexer.partitions.DynamicPartitionsSpec; +import org.apache.druid.indexing.common.task.IndexTask; import org.apache.druid.indexing.common.task.TaskResource; import org.apache.druid.jackson.DefaultObjectMapper; import org.apache.druid.java.util.common.granularity.Granularities; import org.apache.druid.query.aggregation.AggregatorFactory; import org.apache.druid.segment.IndexSpec; import org.apache.druid.segment.indexing.DataSchema; -import org.apache.druid.segment.indexing.RealtimeIOConfig; -import org.apache.druid.segment.indexing.RealtimeTuningConfig; import org.apache.druid.segment.indexing.granularity.UniformGranularitySpec; -import org.apache.druid.segment.realtime.FireDepartment; -import org.apache.druid.segment.realtime.firehose.LocalFirehoseFactory; -import org.apache.druid.timeline.partition.NoneShardSpec; -import org.joda.time.Period; import org.junit.After; import org.junit.Assert; import org.junit.Before; @@ -134,14 +129,10 @@ public void write(String str) public void testTaskValidator() throws Exception { final ObjectMapper jsonMapper = new DefaultObjectMapper(); - for (final Module jacksonModule : new FirehoseModule().getJacksonModules()) { - jsonMapper.registerModule(jacksonModule); - } - - final RealtimeIndexTask task = new RealtimeIndexTask( + final IndexTask task = new IndexTask( null, new TaskResource("rofl", 2), - new FireDepartment( + new IndexTask.IndexIngestionSpec( new DataSchema( "foo", null, @@ -150,28 +141,35 @@ public void testTaskValidator() throws Exception null, jsonMapper ), - new RealtimeIOConfig( - new LocalFirehoseFactory(new File("lol"), "rofl", null), - (schema, config, metrics) -> null + new IndexTask.IndexIOConfig( + null, + new LocalInputSource(new File("lol"), "rofl"), + new JsonInputFormat(null, null, null), + false, + false ), - new RealtimeTuningConfig( + new IndexTask.IndexTuningConfig( null, - 1, null, null, - new Period("PT10M"), + 10, null, null, null, null, - 1, - NoneShardSpec.instance(), - new IndexSpec(), + null, + null, + new DynamicPartitionsSpec(10000, null), new IndexSpec(), - 0, - 0, - true, + null, + 3, + false, + null, + null, + null, + null, + null, null, null, null, From 23f370b934cc8058ce210fc22ee6160e79d4ffd7 Mon Sep 17 00:00:00 2001 From: "tejaswini.bandlamudi" Date: Tue, 2 Aug 2022 17:53:58 +0530 Subject: [PATCH 02/33] nit --- .../org/apache/druid/indexing/worker/WorkerTaskMonitorTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/worker/WorkerTaskMonitorTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/worker/WorkerTaskMonitorTest.java index 5c942d39ea35..975d932da2d1 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/worker/WorkerTaskMonitorTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/worker/WorkerTaskMonitorTest.java @@ -145,7 +145,7 @@ public String getBase() // Start a task monitor workerTaskMonitor = createTaskMonitor(); TestTasks.registerSubtypes(jsonMapper); - jsonMapper.registerSubtypes(new NamedType(TestIndexTask.class, "test_realtime")); + jsonMapper.registerSubtypes(new NamedType(TestIndexTask.class, "test_index")); workerTaskMonitor.start(); task = TestTasks.immediateSuccess("test"); From d11e740724b43f137a0afa942f53fb5587be949e Mon Sep 17 00:00:00 2001 From: "tejaswini.bandlamudi" Date: Tue, 2 Aug 2022 18:31:46 +0530 Subject: [PATCH 03/33] assumption for sampler API --- .../indexing/seekablestream/SeekableStreamSamplerSpec.java | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/SeekableStreamSamplerSpec.java b/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/SeekableStreamSamplerSpec.java index 424511c8e8e0..389d7c2c7521 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/SeekableStreamSamplerSpec.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/SeekableStreamSamplerSpec.java @@ -44,6 +44,7 @@ public abstract class SeekableStreamSamplerSpec recordSupplier; @@ -83,10 +84,6 @@ public SamplerResponse sample() recordSupplier, ioConfig.isUseEarliestSequenceNumber() ); - inputFormat = Preconditions.checkNotNull( - ioConfig.getInputFormat(), - "[spec.ioConfig.inputFormat] is required" - ); return inputSourceSampler.sample(inputSource, inputFormat, dataSchema, samplerConfig); } From 10ac5b31d502398d223e4350f13d90dbff8f2e19 Mon Sep 17 00:00:00 2001 From: "tejaswini.bandlamudi" Date: Wed, 3 Aug 2022 11:13:40 +0530 Subject: [PATCH 04/33] removes com.fasterxml.jackson.module:jackson-module-guice dependency for druid-cloudfiles-extensions --- extensions-contrib/cloudfiles-extensions/pom.xml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/extensions-contrib/cloudfiles-extensions/pom.xml b/extensions-contrib/cloudfiles-extensions/pom.xml index a608423d893c..d6dd4d619a9d 100644 --- a/extensions-contrib/cloudfiles-extensions/pom.xml +++ b/extensions-contrib/cloudfiles-extensions/pom.xml @@ -143,10 +143,6 @@ rackspace-cloudfiles-uk ${jclouds.version} - - com.fasterxml.jackson.module - jackson-module-guice - From e07d4ee6374bd7cec4defdeae5c602b0dfb9c288 Mon Sep 17 00:00:00 2001 From: "tejaswini.bandlamudi" Date: Tue, 9 Aug 2022 23:18:44 +0530 Subject: [PATCH 05/33] Modifies firehose configuration to input source in IT index tests, ingestion docs and SeekableStreamSamplerSpec.sample(..) to use new SeekableStreamSamplerInputSource --- .../input/impl/FileIteratingFirehose.java | 118 ------ .../FirehoseToInputSourceReaderAdaptor.java | 123 ------- .../input/impl/FileIteratingFirehoseTest.java | 171 --------- docs/configuration/index.md | 5 +- docs/development/extensions-core/google.md | 2 - docs/development/extensions-core/mysql.md | 34 +- .../development/extensions-core/postgresql.md | 34 +- docs/ingestion/native-batch-firehose.md | 341 ------------------ docs/ingestion/native-batch.md | 2 +- .../SeekableStreamSamplerSpec.java | 197 +++++++++- .../parallel/HashPartitionTaskKillTest.java | 10 +- .../ITCombiningFirehoseFactoryIndexTest.java | 2 +- ...ia_combining_input_source_index_task.json} | 50 ++- ...ipedia_index_task_with_dimension_spec.json | 48 ++- ...edia_index_task_with_granularity_spec.json | 28 +- .../wikipedia_index_task_with_transform.json | 46 ++- ...ia_index_with_merge_column_limit_task.json | 48 ++- .../indexer/wikipedia_merge_index_task.json | 26 +- .../indexer/wikipedia_merge_reindex_task.json | 30 +- .../wikipedia_parallel_index_task.json | 48 ++- ...ia_parallel_ingest_segment_index_task.json | 30 +- .../wikipedia_parallel_reindex_task.json | 48 ++- .../indexer/wikipedia_reindex_task.json | 26 +- ...ikipedia_reindex_task_with_transforms.json | 98 +++-- .../wikipedia_with_timestamp_index_task.json | 50 ++- .../indexer/wikiticker_index_task.json | 33 +- .../realtime/firehose/InlineFirehose.java | 91 ----- .../realtime/firehose/SqlFirehose.java | 95 ----- .../realtime/firehose/InlineFirehoseTest.java | 218 ----------- .../realtime/firehose/SqlFirehoseTest.java | 269 -------------- 30 files changed, 512 insertions(+), 1809 deletions(-) delete mode 100644 core/src/main/java/org/apache/druid/data/input/impl/FileIteratingFirehose.java delete mode 100644 core/src/main/java/org/apache/druid/data/input/impl/FirehoseToInputSourceReaderAdaptor.java delete mode 100644 core/src/test/java/org/apache/druid/data/input/impl/FileIteratingFirehoseTest.java delete mode 100644 docs/ingestion/native-batch-firehose.md rename integration-tests/src/test/resources/indexer/{wikipedia_combining_firehose_index_task.json => wikipedia_combining_input_source_index_task.json} (70%) delete mode 100644 server/src/main/java/org/apache/druid/segment/realtime/firehose/InlineFirehose.java delete mode 100644 server/src/main/java/org/apache/druid/segment/realtime/firehose/SqlFirehose.java delete mode 100644 server/src/test/java/org/apache/druid/segment/realtime/firehose/InlineFirehoseTest.java delete mode 100644 server/src/test/java/org/apache/druid/segment/realtime/firehose/SqlFirehoseTest.java diff --git a/core/src/main/java/org/apache/druid/data/input/impl/FileIteratingFirehose.java b/core/src/main/java/org/apache/druid/data/input/impl/FileIteratingFirehose.java deleted file mode 100644 index 42c627df2138..000000000000 --- a/core/src/main/java/org/apache/druid/data/input/impl/FileIteratingFirehose.java +++ /dev/null @@ -1,118 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.data.input.impl; - -import org.apache.commons.io.LineIterator; -import org.apache.druid.data.input.Firehose; -import org.apache.druid.data.input.InputRow; -import org.apache.druid.data.input.InputRowListPlusRawValues; -import org.apache.druid.java.util.common.parsers.ParseException; - -import javax.annotation.Nullable; -import java.io.Closeable; -import java.io.IOException; -import java.util.Iterator; -import java.util.NoSuchElementException; - -public class FileIteratingFirehose implements Firehose -{ - private final Iterator lineIterators; - private final StringInputRowParser parser; - - private LineIterator lineIterator = null; - - private final Closeable closer; - - public FileIteratingFirehose( - Iterator lineIterators, - StringInputRowParser parser - ) - { - this(lineIterators, parser, null); - } - - public FileIteratingFirehose( - Iterator lineIterators, - StringInputRowParser parser, - Closeable closer - ) - { - this.lineIterators = lineIterators; - this.parser = parser; - this.closer = closer; - } - - @Override - public boolean hasMore() throws IOException - { - while ((lineIterator == null || !lineIterator.hasNext()) && lineIterators.hasNext()) { - lineIterator = getNextLineIterator(); - } - - return lineIterator != null && lineIterator.hasNext(); - } - - @Nullable - @Override - public InputRow nextRow() throws IOException - { - if (!hasMore()) { - throw new NoSuchElementException(); - } - - return parser.parse(lineIterator.next()); - } - - @Override - public InputRowListPlusRawValues nextRowWithRaw() throws IOException - { - if (!hasMore()) { - throw new NoSuchElementException(); - } - - String raw = lineIterator.next(); - try { - return InputRowListPlusRawValues.of(parser.parse(raw), parser.parseString(raw)); - } - catch (ParseException e) { - return InputRowListPlusRawValues.of(parser.parseString(raw), e); - } - } - - private LineIterator getNextLineIterator() throws IOException - { - if (lineIterator != null) { - lineIterator.close(); - } - - final LineIterator iterator = lineIterators.next(); - parser.startFileFromBeginning(); - return iterator; - } - - @Override - public void close() throws IOException - { - try (Closeable ignore = closer; - Closeable ignore2 = lineIterator) { - // close both via try-with-resources - } - } -} diff --git a/core/src/main/java/org/apache/druid/data/input/impl/FirehoseToInputSourceReaderAdaptor.java b/core/src/main/java/org/apache/druid/data/input/impl/FirehoseToInputSourceReaderAdaptor.java deleted file mode 100644 index 697c64fe7a37..000000000000 --- a/core/src/main/java/org/apache/druid/data/input/impl/FirehoseToInputSourceReaderAdaptor.java +++ /dev/null @@ -1,123 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.data.input.impl; - -import org.apache.druid.data.input.Firehose; -import org.apache.druid.data.input.FirehoseFactory; -import org.apache.druid.data.input.InputRow; -import org.apache.druid.data.input.InputRowListPlusRawValues; -import org.apache.druid.data.input.InputSourceReader; -import org.apache.druid.java.util.common.parsers.CloseableIterator; - -import java.io.File; -import java.io.IOException; -import java.io.UncheckedIOException; - -public class FirehoseToInputSourceReaderAdaptor implements InputSourceReader -{ - private final FirehoseFactory firehoseFactory; - private final InputRowParser inputRowParser; - private final File temporaryDirectory; - - public FirehoseToInputSourceReaderAdaptor( - FirehoseFactory firehoseFactory, - InputRowParser inputRowParser, - File temporaryDirectory - ) - { - this.firehoseFactory = firehoseFactory; - this.inputRowParser = inputRowParser; - this.temporaryDirectory = temporaryDirectory; - } - - @Override - public CloseableIterator read() throws IOException - { - return new CloseableIterator() - { - final Firehose firehose = firehoseFactory.connect(inputRowParser, temporaryDirectory); - - @Override - public boolean hasNext() - { - try { - return firehose.hasMore(); - } - catch (IOException e) { - throw new UncheckedIOException(e); - } - } - - @Override - public InputRow next() - { - try { - return firehose.nextRow(); - } - catch (IOException e) { - throw new UncheckedIOException(e); - } - } - - @Override - public void close() throws IOException - { - firehose.close(); - } - }; - } - - @Override - public CloseableIterator sample() throws IOException - { - return new CloseableIterator() - { - final Firehose firehose = firehoseFactory.connectForSampler(inputRowParser, temporaryDirectory); - - @Override - public boolean hasNext() - { - try { - return firehose.hasMore(); - } - catch (IOException e) { - throw new UncheckedIOException(e); - } - } - - @Override - public InputRowListPlusRawValues next() - { - try { - return firehose.nextRowWithRaw(); - } - catch (IOException e) { - throw new UncheckedIOException(e); - } - } - - @Override - public void close() throws IOException - { - firehose.close(); - } - }; - } -} diff --git a/core/src/test/java/org/apache/druid/data/input/impl/FileIteratingFirehoseTest.java b/core/src/test/java/org/apache/druid/data/input/impl/FileIteratingFirehoseTest.java deleted file mode 100644 index 14c2382c89ad..000000000000 --- a/core/src/test/java/org/apache/druid/data/input/impl/FileIteratingFirehoseTest.java +++ /dev/null @@ -1,171 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.data.input.impl; - -import com.google.common.base.Joiner; -import com.google.common.collect.ImmutableList; -import org.apache.commons.io.LineIterator; -import org.apache.druid.common.config.NullHandlingTest; -import org.apache.druid.data.input.InputRow; -import org.junit.Assert; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; -import org.junit.runners.Parameterized.Parameters; - -import java.io.Closeable; -import java.io.IOException; -import java.io.Reader; -import java.io.StringReader; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.List; -import java.util.stream.Collectors; -import java.util.stream.IntStream; - -@RunWith(Parameterized.class) -public class FileIteratingFirehoseTest extends NullHandlingTest -{ - @Parameters(name = "{0}, {1}") - public static Collection constructorFeeder() - { - final List> inputTexts = ImmutableList.of( - ImmutableList.of("2000,foo"), - ImmutableList.of("2000,foo\n2000,bar\n"), - ImmutableList.of("2000,foo\n2000,bar\n", "2000,baz"), - ImmutableList.of("2000,foo\n2000,bar\n", "", "2000,baz"), - ImmutableList.of("2000,foo\n2000,bar\n", "", "2000,baz", ""), - ImmutableList.of("2000,foo\n2000,bar\n2000,baz", "", "2000,baz", "2000,foo\n2000,bar\n3000,baz"), - ImmutableList.of(""), - ImmutableList.of() - ); - - final List args = new ArrayList<>(); - for (int numSkipHeadRows = 0; numSkipHeadRows < 3; numSkipHeadRows++) { - for (List texts : inputTexts) { - args.add(new Object[] {texts, numSkipHeadRows}); - } - } - - return args; - } - - private static final char[] LINE_CHARS = "\n".toCharArray(); - - private final StringInputRowParser parser; - private final List inputs; - private final List expectedResults; - - public FileIteratingFirehoseTest(List texts, int numSkipHeaderRows) - { - parser = new StringInputRowParser( - new CSVParseSpec( - new TimestampSpec("ts", "auto", null), - new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("x"))), - ",", - ImmutableList.of("ts", "x"), - false, - numSkipHeaderRows - ), - null - ); - - this.inputs = texts; - this.expectedResults = inputs.stream() - .map(input -> input.split("\n")) - .flatMap(lines -> { - final List filteredLines = Arrays - .stream(lines) - .filter(line -> line.length() > 0) - .map(line -> line.split(",")[1]) - .collect(Collectors.toList()); - - final int numRealSkippedRows = Math.min(filteredLines.size(), numSkipHeaderRows); - IntStream.range(0, numRealSkippedRows).forEach(i -> filteredLines.set(i, null)); - return filteredLines.stream(); - }) - .collect(Collectors.toList()); - } - - @Test - public void testFirehose() throws Exception - { - final List lineIterators = inputs.stream() - .map(s -> new LineIterator(new StringReader(s))) - .collect(Collectors.toList()); - - try (final FileIteratingFirehose firehose = new FileIteratingFirehose(lineIterators.iterator(), parser)) { - final List results = new ArrayList<>(); - - while (firehose.hasMore()) { - final InputRow inputRow = firehose.nextRow(); - if (inputRow == null) { - results.add(null); - } else { - results.add(Joiner.on("|").join(inputRow.getDimension("x"))); - } - } - - Assert.assertEquals(expectedResults, results); - } - } - - @Test(expected = RuntimeException.class) - public void testClose() throws IOException - { - final LineIterator lineIterator = new LineIterator(new Reader() - { - @Override - public int read(char[] cbuf, int off, int len) - { - System.arraycopy(LINE_CHARS, 0, cbuf, 0, LINE_CHARS.length); - return LINE_CHARS.length; - } - - @Override - public void close() - { - throw new RuntimeException("close test for FileIteratingFirehose"); - } - }); - - final TestCloseable closeable = new TestCloseable(); - final FileIteratingFirehose firehose = new FileIteratingFirehose( - ImmutableList.of(lineIterator).iterator(), - parser, - closeable - ); - firehose.hasMore(); // initialize lineIterator - firehose.close(); - Assert.assertTrue(closeable.closed); - } - - private static final class TestCloseable implements Closeable - { - private boolean closed; - - @Override - public void close() - { - closed = true; - } - } -} diff --git a/docs/configuration/index.md b/docs/configuration/index.md index 2246727ad288..65cfda0b89a4 100644 --- a/docs/configuration/index.md +++ b/docs/configuration/index.md @@ -555,7 +555,7 @@ This deep storage is used to interface with Cassandra. Note that the `druid-cas #### HDFS input source You can set the following property to specify permissible protocols for -the [HDFS input source](../ingestion/native-batch-input-source.md#hdfs-input-source) and the [HDFS firehose](../ingestion/native-batch-firehose.md#hdfsfirehose). +the [HDFS input source](../ingestion/native-batch-input-source.md#hdfs-input-source). |Property|Possible Values|Description|Default| |--------|---------------|-----------|-------| @@ -565,7 +565,7 @@ the [HDFS input source](../ingestion/native-batch-input-source.md#hdfs-input-sou #### HTTP input source You can set the following property to specify permissible protocols for -the [HTTP input source](../ingestion/native-batch-input-source.md#http-input-source) and the [HTTP firehose](../ingestion/native-batch-firehose.md#httpfirehose). +the [HTTP input source](../ingestion/native-batch-input-source.md#http-input-source). |Property|Possible Values|Description|Default| |--------|---------------|-----------|-------| @@ -578,7 +578,6 @@ the [HTTP input source](../ingestion/native-batch-input-source.md#http-input-sou You can use the following properties to specify permissible JDBC options for: - [SQL input source](../ingestion/native-batch-input-source.md#sql-input-source) -- [SQL firehose](../ingestion/native-batch-firehose.md#sqlfirehose), - [globally cached JDBC lookups](../development/extensions-core/lookups-cached-global.md#jdbc-lookup) - [JDBC Data Fetcher for per-lookup caching](../development/extensions-core/druid-lookups.md#data-fetcher-layer). diff --git a/docs/development/extensions-core/google.md b/docs/development/extensions-core/google.md index eaed4eb567c5..813f9827e90c 100644 --- a/docs/development/extensions-core/google.md +++ b/docs/development/extensions-core/google.md @@ -40,8 +40,6 @@ The [Google Cloud Storage input source](../../ingestion/native-batch-input-sourc to read objects directly from Google Cloud Storage. If you use the [Hadoop task](../../ingestion/hadoop.md), you can read data from Google Cloud Storage by specifying the paths in your [`inputSpec`](../../ingestion/hadoop.md#inputspec). -Objects can also be read directly from Google Cloud Storage via the [StaticGoogleBlobStoreFirehose](../../ingestion/native-batch-firehose.md#staticgoogleblobstorefirehose) - ### Deep Storage Deep storage can be written to Google Cloud Storage either via this extension or the [druid-hdfs-storage extension](../extensions-core/hdfs.md). diff --git a/docs/development/extensions-core/mysql.md b/docs/development/extensions-core/mysql.md index 7cb013cbdaf2..076d0015a340 100644 --- a/docs/development/extensions-core/mysql.md +++ b/docs/development/extensions-core/mysql.md @@ -125,7 +125,7 @@ If using the MariaDB connector library, set `druid.metadata.mysql.driver.driverC |`druid.metadata.mysql.ssl.enabledSSLCipherSuites`|Overrides the existing cipher suites with these cipher suites.|none|no| |`druid.metadata.mysql.ssl.enabledTLSProtocols`|Overrides the TLS protocols with these protocols.|none|no| -### MySQL InputSource and Firehose +### MySQL InputSource ```json { @@ -133,22 +133,17 @@ If using the MariaDB connector library, set `druid.metadata.mysql.driver.driverC "spec": { "dataSchema": { "dataSource": "some_datasource", - "parser": { - "parseSpec": { - "format": "timeAndDims", - "dimensionsSpec": { - "dimensionExclusions": [], - "dimensions": [ - "dim1", - "dim2", - "dim3" - ] - }, - "timestampSpec": { - "format": "auto", - "column": "ts" - } - } + "dimensionsSpec": { + "dimensionExclusions": [], + "dimensions": [ + "dim1", + "dim2", + "dim3" + ] + }, + "timestampSpec": { + "format": "auto", + "column": "ts" }, "metricsSpec": [], "granularitySpec": { @@ -167,7 +162,7 @@ If using the MariaDB connector library, set `druid.metadata.mysql.driver.driverC }, "ioConfig": { "type": "index_parallel", - "firehose": { + "inputSource": { "type": "sql", "database": { "type": "mysql", @@ -180,6 +175,9 @@ If using the MariaDB connector library, set `druid.metadata.mysql.driver.driverC "sqls": [ "SELECT * FROM some_table" ] + }, + "inputFormat": { + "format": "json" } }, "tuningConfig": { diff --git a/docs/development/extensions-core/postgresql.md b/docs/development/extensions-core/postgresql.md index b495fc262d24..73a33aa0f3b9 100644 --- a/docs/development/extensions-core/postgresql.md +++ b/docs/development/extensions-core/postgresql.md @@ -87,7 +87,7 @@ In most cases, the configuration options map directly to the [postgres JDBC conn ### PostgreSQL Firehose -The PostgreSQL extension provides an implementation of an [SqlFirehose](../../ingestion/native-batch-firehose.md) which can be used to ingest data into Druid from a PostgreSQL database. +The PostgreSQL extension provides an implementation of an [SQL input source](../../ingestion/native-batch-input-source.md) which can be used to ingest data into Druid from a PostgreSQL database. ```json { @@ -95,22 +95,17 @@ The PostgreSQL extension provides an implementation of an [SqlFirehose](../../in "spec": { "dataSchema": { "dataSource": "some_datasource", - "parser": { - "parseSpec": { - "format": "timeAndDims", - "dimensionsSpec": { - "dimensionExclusions": [], - "dimensions": [ - "dim1", - "dim2", - "dim3" - ] - }, - "timestampSpec": { - "format": "auto", - "column": "ts" - } - } + "dimensionsSpec": { + "dimensionExclusions": [], + "dimensions": [ + "dim1", + "dim2", + "dim3" + ] + }, + "timestampSpec": { + "format": "auto", + "column": "ts" }, "metricsSpec": [], "granularitySpec": { @@ -129,7 +124,7 @@ The PostgreSQL extension provides an implementation of an [SqlFirehose](../../in }, "ioConfig": { "type": "index_parallel", - "firehose": { + "inputSource": { "type": "sql", "database": { "type": "postgresql", @@ -142,6 +137,9 @@ The PostgreSQL extension provides an implementation of an [SqlFirehose](../../in "sqls": [ "SELECT * FROM some_table" ] + }, + "inputFormat": { + "format": "json" } }, "tuningConfig": { diff --git a/docs/ingestion/native-batch-firehose.md b/docs/ingestion/native-batch-firehose.md deleted file mode 100644 index a6c76b53ccc8..000000000000 --- a/docs/ingestion/native-batch-firehose.md +++ /dev/null @@ -1,341 +0,0 @@ ---- -id: native-batch-firehose -title: "Native batch ingestion with firehose" -sidebar_label: "Firehose" ---- - - - - -Firehoses are deprecated in 0.17.0. It's highly recommended to use the [Native batch ingestion input sources](./native-batch-input-source.md) instead. - -There are several firehoses readily available in Druid, some are meant for examples, others can be used directly in a production environment. - -## StaticS3Firehose - -> You need to include the [`druid-s3-extensions`](../development/extensions-core/s3.md) as an extension to use the StaticS3Firehose. - -This firehose ingests events from a predefined list of S3 objects. -This firehose is _splittable_ and can be used by the [Parallel task](./native-batch.md). -Since each split represents an object in this firehose, each worker task of `index_parallel` will read an object. - -Sample spec: - -```json -"firehose" : { - "type" : "static-s3", - "uris": ["s3://foo/bar/file.gz", "s3://bar/foo/file2.gz"] -} -``` - -This firehose provides caching and prefetching features. In the Simple task, a firehose can be read twice if intervals or -shardSpecs are not specified, and, in this case, caching can be useful. Prefetching is preferred when direct scan of objects is slow. -Note that prefetching or caching isn't that useful in the Parallel task. - -|property|description|default|required?| -|--------|-----------|-------|---------| -|type|This should be `static-s3`.|None|yes| -|uris|JSON array of URIs where s3 files to be ingested are located.|None|`uris` or `prefixes` must be set| -|prefixes|JSON array of URI prefixes for the locations of s3 files to be ingested.|None|`uris` or `prefixes` must be set| -|maxCacheCapacityBytes|Maximum size of the cache space in bytes. 0 means disabling cache. Cached files are not removed until the ingestion task completes.|1073741824|no| -|maxFetchCapacityBytes|Maximum size of the fetch space in bytes. 0 means disabling prefetch. Prefetched files are removed immediately once they are read.|1073741824|no| -|prefetchTriggerBytes|Threshold to trigger prefetching s3 objects.|maxFetchCapacityBytes / 2|no| -|fetchTimeout|Timeout for fetching an s3 object.|60000|no| -|maxFetchRetry|Maximum retry for fetching an s3 object.|3|no| - -## StaticGoogleBlobStoreFirehose - -> You need to include the [`druid-google-extensions`](../development/extensions-core/google.md) as an extension to use the StaticGoogleBlobStoreFirehose. - -This firehose ingests events, similar to the StaticS3Firehose, but from an Google Cloud Store. - -As with the S3 blobstore, it is assumed to be gzipped if the extension ends in .gz - -This firehose is _splittable_ and can be used by the [Parallel task](./native-batch.md). -Since each split represents an object in this firehose, each worker task of `index_parallel` will read an object. - -Sample spec: - -```json -"firehose" : { - "type" : "static-google-blobstore", - "blobs": [ - { - "bucket": "foo", - "path": "/path/to/your/file.json" - }, - { - "bucket": "bar", - "path": "/another/path.json" - } - ] -} -``` - -This firehose provides caching and prefetching features. In the Simple task, a firehose can be read twice if intervals or -shardSpecs are not specified, and, in this case, caching can be useful. Prefetching is preferred when direct scan of objects is slow. -Note that prefetching or caching isn't that useful in the Parallel task. - -|property|description|default|required?| -|--------|-----------|-------|---------| -|type|This should be `static-google-blobstore`.|None|yes| -|blobs|JSON array of Google Blobs.|None|yes| -|maxCacheCapacityBytes|Maximum size of the cache space in bytes. 0 means disabling cache. Cached files are not removed until the ingestion task completes.|1073741824|no| -|maxFetchCapacityBytes|Maximum size of the fetch space in bytes. 0 means disabling prefetch. Prefetched files are removed immediately once they are read.|1073741824|no| -|prefetchTriggerBytes|Threshold to trigger prefetching Google Blobs.|maxFetchCapacityBytes / 2|no| -|fetchTimeout|Timeout for fetching a Google Blob.|60000|no| -|maxFetchRetry|Maximum retry for fetching a Google Blob.|3|no| - -Google Blobs: - -|property|description|default|required?| -|--------|-----------|-------|---------| -|bucket|Name of the Google Cloud bucket|None|yes| -|path|The path where data is located.|None|yes| - -## HDFSFirehose - -> You need to include the [`druid-hdfs-storage`](../development/extensions-core/hdfs.md) as an extension to use the HDFSFirehose. - -This firehose ingests events from a predefined list of files from the HDFS storage. -This firehose is _splittable_ and can be used by the [Parallel task](./native-batch.md). -Since each split represents an HDFS file, each worker task of `index_parallel` will read files. - -Sample spec: - -```json -"firehose" : { - "type" : "hdfs", - "paths": "/foo/bar,/foo/baz" -} -``` - -This firehose provides caching and prefetching features. During native batch indexing, a firehose can be read twice if -`intervals` are not specified, and, in this case, caching can be useful. Prefetching is preferred when direct scanning -of files is slow. -Note that prefetching or caching isn't that useful in the Parallel task. - -|Property|Description|Default| -|--------|-----------|-------| -|type|This should be `hdfs`.|none (required)| -|paths|HDFS paths. Can be either a JSON array or comma-separated string of paths. Wildcards like `*` are supported in these paths.|none (required)| -|maxCacheCapacityBytes|Maximum size of the cache space in bytes. 0 means disabling cache. Cached files are not removed until the ingestion task completes.|1073741824| -|maxFetchCapacityBytes|Maximum size of the fetch space in bytes. 0 means disabling prefetch. Prefetched files are removed immediately once they are read.|1073741824| -|prefetchTriggerBytes|Threshold to trigger prefetching files.|maxFetchCapacityBytes / 2| -|fetchTimeout|Timeout for fetching each file.|60000| -|maxFetchRetry|Maximum number of retries for fetching each file.|3| - -You can also ingest from other storage using the HDFS firehose if the HDFS client supports that storage. -However, if you want to ingest from cloud storage, consider using the service-specific input source for your data storage. -If you want to use a non-hdfs protocol with the HDFS firehose, you need to include the protocol you want -in `druid.ingestion.hdfs.allowedProtocols`. See [HDFS firehose security configuration](../configuration/index.md#hdfs-input-source) for more details. - -## LocalFirehose - -This Firehose can be used to read the data from files on local disk, and is mainly intended for proof-of-concept testing, and works with `string` typed parsers. -This Firehose is _splittable_ and can be used by [native parallel index tasks](native-batch.md). -Since each split represents a file in this Firehose, each worker task of `index_parallel` will read a file. -A sample local Firehose spec is shown below: - -```json -{ - "type": "local", - "filter" : "*.csv", - "baseDir": "/data/directory" -} -``` - -|property|description|required?| -|--------|-----------|---------| -|type|This should be "local".|yes| -|filter|A wildcard filter for files. See [here](http://commons.apache.org/proper/commons-io/apidocs/org/apache/commons/io/filefilter/WildcardFileFilter) for more information.|yes| -|baseDir|directory to search recursively for files to be ingested. |yes| - - - -## HttpFirehose - -This Firehose can be used to read the data from remote sites via HTTP, and works with `string` typed parsers. -This Firehose is _splittable_ and can be used by [native parallel index tasks](native-batch.md). -Since each split represents a file in this Firehose, each worker task of `index_parallel` will read a file. -A sample HTTP Firehose spec is shown below: - -```json -{ - "type": "http", - "uris": ["http://example.com/uri1", "http://example2.com/uri2"] -} -``` - -You can only use protocols listed in the `druid.ingestion.http.allowedProtocols` property as HTTP firehose input sources. -The `http` and `https` protocols are allowed by default. See [HTTP firehose security configuration](../configuration/index.md#http-input-source) for more details. - -The below configurations can be optionally used if the URIs specified in the spec require a Basic Authentication Header. -Omitting these fields from your spec will result in HTTP requests with no Basic Authentication Header. - -|property|description|default| -|--------|-----------|-------| -|httpAuthenticationUsername|Username to use for authentication with specified URIs|None| -|httpAuthenticationPassword|PasswordProvider to use with specified URIs|None| - -Example with authentication fields using the DefaultPassword provider (this requires the password to be in the ingestion spec): - -```json -{ - "type": "http", - "uris": ["http://example.com/uri1", "http://example2.com/uri2"], - "httpAuthenticationUsername": "username", - "httpAuthenticationPassword": "password123" -} -``` - -You can also use the other existing Druid PasswordProviders. Here is an example using the EnvironmentVariablePasswordProvider: - -```json -{ - "type": "http", - "uris": ["http://example.com/uri1", "http://example2.com/uri2"], - "httpAuthenticationUsername": "username", - "httpAuthenticationPassword": { - "type": "environment", - "variable": "HTTP_FIREHOSE_PW" - } -} -``` - -The below configurations can optionally be used for tuning the Firehose performance. -Note that prefetching or caching isn't that useful in the Parallel task. - -|property|description|default| -|--------|-----------|-------| -|maxCacheCapacityBytes|Maximum size of the cache space in bytes. 0 means disabling cache. Cached files are not removed until the ingestion task completes.|1073741824| -|maxFetchCapacityBytes|Maximum size of the fetch space in bytes. 0 means disabling prefetch. Prefetched files are removed immediately once they are read.|1073741824| -|prefetchTriggerBytes|Threshold to trigger prefetching HTTP objects.|maxFetchCapacityBytes / 2| -|fetchTimeout|Timeout for fetching an HTTP object.|60000| -|maxFetchRetry|Maximum retries for fetching an HTTP object.|3| - - - -## IngestSegmentFirehose - -This Firehose can be used to read the data from existing druid segments, potentially using a new schema and changing the name, dimensions, metrics, rollup, etc. of the segment. -This Firehose is _splittable_ and can be used by [native parallel index tasks](native-batch.md). -This firehose will accept any type of parser, but will only utilize the list of dimensions and the timestamp specification. - A sample ingest Firehose spec is shown below: - -```json -{ - "type": "ingestSegment", - "dataSource": "wikipedia", - "interval": "2013-01-01/2013-01-02" -} -``` - -|property|description|required?| -|--------|-----------|---------| -|type|This should be "ingestSegment".|yes| -|dataSource|A String defining the data source to fetch rows from, very similar to a table in a relational database|yes| -|interval|A String representing the ISO-8601 interval. This defines the time range to fetch the data over.|yes| -|dimensions|The list of dimensions to select. If left empty, no dimensions are returned. If left null or not defined, all dimensions are returned. |no| -|metrics|The list of metrics to select. If left empty, no metrics are returned. If left null or not defined, all metrics are selected.|no| -|filter| See [Filters](../querying/filters.md)|no| -|maxInputSegmentBytesPerTask|Deprecated. Use [Segments Split Hint Spec](./native-batch.md#segments-split-hint-spec) instead. When used with the native parallel index task, the maximum number of bytes of input segments to process in a single task. If a single segment is larger than this number, it will be processed by itself in a single task (input segments are never split across tasks). Defaults to 150MB.|no| - - - -## SqlFirehose - -This Firehose can be used to ingest events residing in an RDBMS. The database connection information is provided as part of the ingestion spec. -For each query, the results are fetched locally and indexed. -If there are multiple queries from which data needs to be indexed, queries are prefetched in the background, up to `maxFetchCapacityBytes` bytes. -This Firehose is _splittable_ and can be used by [native parallel index tasks](native-batch.md). -This firehose will accept any type of parser, but will only utilize the list of dimensions and the timestamp specification. See the extension documentation for more detailed ingestion examples. - -Requires one of the following extensions: - * [MySQL Metadata Store](../development/extensions-core/mysql.md). - * [PostgreSQL Metadata Store](../development/extensions-core/postgresql.md). - - -```json -{ - "type": "sql", - "database": { - "type": "mysql", - "connectorConfig": { - "connectURI": "jdbc:mysql://host:port/schema", - "user": "user", - "password": "password" - } - }, - "sqls": ["SELECT * FROM table1", "SELECT * FROM table2"] -} -``` - -|property|description|default|required?| -|--------|-----------|-------|---------| -|type|This should be "sql".||Yes| -|database|Specifies the database connection details. The database type corresponds to the extension that supplies the `connectorConfig` support. The specified extension must be loaded into Druid:

  • [mysql-metadata-storage](../development/extensions-core/mysql.md) for `mysql`
  • [postgresql-metadata-storage](../development/extensions-core/postgresql.md) extension for `postgresql`.


You can selectively allow JDBC properties in `connectURI`. See [JDBC connections security config](../configuration/index.md#jdbc-connections-to-external-databases) for more details.||Yes| -|maxCacheCapacityBytes|Maximum size of the cache space in bytes. 0 means disabling cache. Cached files are not removed until the ingestion task completes.|1073741824|No| -|maxFetchCapacityBytes|Maximum size of the fetch space in bytes. 0 means disabling prefetch. Prefetched files are removed immediately once they are read.|1073741824|No| -|prefetchTriggerBytes|Threshold to trigger prefetching SQL result objects.|maxFetchCapacityBytes / 2|No| -|fetchTimeout|Timeout for fetching the result set.|60000|No| -|foldCase|Toggle case folding of database column names. This may be enabled in cases where the database returns case insensitive column names in query results.|false|No| -|sqls|List of SQL queries where each SQL query would retrieve the data to be indexed.||Yes| - -### Database - -|property|description|default|required?| -|--------|-----------|-------|---------| -|type|The type of database to query. Valid values are `mysql` and `postgresql`_||Yes| -|connectorConfig|Specify the database connection properties via `connectURI`, `user` and `password`||Yes| - -## InlineFirehose - -This Firehose can be used to read the data inlined in its own spec. -It can be used for demos or for quickly testing out parsing and schema, and works with `string` typed parsers. -A sample inline Firehose spec is shown below: - -```json -{ - "type": "inline", - "data": "0,values,formatted\n1,as,CSV" -} -``` - -|property|description|required?| -|--------|-----------|---------| -|type|This should be "inline".|yes| -|data|Inlined data to ingest.|yes| - -## CombiningFirehose - -This Firehose can be used to combine and merge data from a list of different Firehoses. - -```json -{ - "type": "combining", - "delegates": [ { firehose1 }, { firehose2 }, ... ] -} -``` - -|property|description|required?| -|--------|-----------|---------| -|type|This should be "combining"|yes| -|delegates|List of Firehoses to combine data from|yes| \ No newline at end of file diff --git a/docs/ingestion/native-batch.md b/docs/ingestion/native-batch.md index aba390b228ce..42268c1362f1 100644 --- a/docs/ingestion/native-batch.md +++ b/docs/ingestion/native-batch.md @@ -261,7 +261,7 @@ The size-based split hint spec affects all splittable input sources except for t #### Segments Split Hint Spec -The segments split hint spec is used only for [`DruidInputSource`](./native-batch-input-source.md) and legacy `IngestSegmentFirehose`. +The segments split hint spec is used only for [`DruidInputSource`](./native-batch-input-source.md). |property|description|default|required?| |--------|-----------|-------|---------| diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/SeekableStreamSamplerSpec.java b/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/SeekableStreamSamplerSpec.java index 389d7c2c7521..a815caaa66c0 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/SeekableStreamSamplerSpec.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/SeekableStreamSamplerSpec.java @@ -23,9 +23,20 @@ import com.google.common.base.Throwables; import org.apache.druid.client.indexing.SamplerResponse; import org.apache.druid.client.indexing.SamplerSpec; +import org.apache.druid.data.input.AbstractInputSource; +import org.apache.druid.data.input.InputEntity; import org.apache.druid.data.input.InputFormat; +import org.apache.druid.data.input.InputRow; +import org.apache.druid.data.input.InputRowListPlusRawValues; +import org.apache.druid.data.input.InputRowSchema; import org.apache.druid.data.input.InputSource; +import org.apache.druid.data.input.InputSourceReader; +import org.apache.druid.data.input.InputSplit; +import org.apache.druid.data.input.SplitHintSpec; import org.apache.druid.data.input.impl.ByteEntity; +import org.apache.druid.data.input.impl.InputRowParser; +import org.apache.druid.data.input.impl.SplittableInputSource; +import org.apache.druid.data.input.impl.StringInputRowParser; import org.apache.druid.indexing.overlord.sampler.InputSourceSampler; import org.apache.druid.indexing.overlord.sampler.SamplerConfig; import org.apache.druid.indexing.overlord.sampler.SamplerException; @@ -33,9 +44,17 @@ import org.apache.druid.indexing.seekablestream.supervisor.SeekableStreamSupervisorIOConfig; import org.apache.druid.indexing.seekablestream.supervisor.SeekableStreamSupervisorSpec; import org.apache.druid.indexing.seekablestream.supervisor.SeekableStreamSupervisorTuningConfig; +import org.apache.druid.java.util.common.parsers.CloseableIterator; +import org.apache.druid.java.util.common.parsers.ParseException; import org.apache.druid.segment.indexing.DataSchema; import javax.annotation.Nullable; +import java.io.File; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.List; +import java.util.Map; +import java.util.stream.Stream; public abstract class SeekableStreamSamplerSpec implements SamplerSpec { @@ -44,7 +63,6 @@ public abstract class SeekableStreamSamplerSpec recordSupplier; - RecordSupplier recordSupplier; + try { + recordSupplier = createRecordSupplier(); + } + catch (Exception e) { + throw new SamplerException(e, "Unable to create RecordSupplier: %s", Throwables.getRootCause(e).getMessage()); + } - try { - recordSupplier = createRecordSupplier(); + inputSource = new RecordSupplierInputSource<>( + ioConfig.getStream(), + recordSupplier, + ioConfig.isUseEarliestSequenceNumber() + ); + inputFormat = Preconditions.checkNotNull( + ioConfig.getInputFormat(), + "[spec.ioConfig.inputFormat] is required" + ); } - catch (Exception e) { - throw new SamplerException(e, "Unable to create RecordSupplier: %s", Throwables.getRootCause(e).getMessage()); - } - - inputSource = new RecordSupplierInputSource<>( - ioConfig.getStream(), - recordSupplier, - ioConfig.isUseEarliestSequenceNumber() - ); return inputSourceSampler.sample(inputSource, inputFormat, dataSchema, samplerConfig); } protected abstract RecordSupplier createRecordSupplier(); + private class SeekableStreamSamplerInputSource extends AbstractInputSource implements SplittableInputSource + { + private final InputRowParser parser; + + public SeekableStreamSamplerInputSource(InputRowParser parser) + { + this.parser = parser; + } + + public InputRowParser getParser() + { + return parser; + } + + @Override + public boolean isSplittable() + { + return false; + } + + @Override + public Stream createSplits(InputFormat inputFormat, @Nullable SplitHintSpec splitHintSpec) + throws IOException + { + throw new UnsupportedOperationException(); + } + + @Override + public int estimateNumSplits(InputFormat inputFormat, @Nullable SplitHintSpec splitHintSpec) throws IOException + { + throw new UnsupportedOperationException(); + } + + @Override + public SplittableInputSource withSplit(InputSplit split) + { + throw new UnsupportedOperationException(); + } + + @Override + public boolean needsFormat() + { + return false; + } + + @Override + protected InputSourceReader fixedFormatReader(InputRowSchema inputRowSchema, @Nullable File temporaryDirectory) + { + return new SeekableStreamSamplerInputSourceReader(parser); + } + } + + private class SeekableStreamSamplerInputSourceReader implements InputSourceReader + { + private final InputRowParser parser; + private final CloseableIterator entityIterator; + + public SeekableStreamSamplerInputSourceReader(InputRowParser parser) + { + this.parser = parser; + if (parser instanceof StringInputRowParser) { + ((StringInputRowParser) parser).startFileFromBeginning(); + } + + RecordSupplierInputSource inputSource = new RecordSupplierInputSource<>( + ioConfig.getStream(), + createRecordSupplier(), + ioConfig.isUseEarliestSequenceNumber() + ); + this.entityIterator = inputSource.createEntityIterator(); + } + + @Override + public CloseableIterator read() throws IOException + { + return new CloseableIterator() + { + + @Override + public boolean hasNext() + { + return entityIterator.hasNext(); + } + + @Override + public InputRow next() + { + throw new UnsupportedOperationException(); + } + + @Override + public void close() throws IOException + { + entityIterator.close(); + } + }; + } + + @Override + public CloseableIterator sample() throws IOException + { + return new CloseableIterator() + { + @Override + public boolean hasNext() + { + return entityIterator.hasNext(); + } + + @Override + public InputRowListPlusRawValues next() + { + final ByteBuffer bb = ((ByteEntity) entityIterator.next()).getBuffer(); + + final Map rawColumns; + try { + if (parser instanceof StringInputRowParser) { + rawColumns = ((StringInputRowParser) parser).buildStringKeyMap(bb); + } else { + rawColumns = null; + } + } + catch (ParseException e) { + return InputRowListPlusRawValues.of(null, e); + } + + try { + final List rows = parser.parseBatch(bb); + return InputRowListPlusRawValues.of(rows.isEmpty() ? null : rows, rawColumns); + } + catch (ParseException e) { + return InputRowListPlusRawValues.of(rawColumns, e); + } + } + + @Override + public void close() throws IOException + { + entityIterator.close(); + } + }; + } + } } diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/HashPartitionTaskKillTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/HashPartitionTaskKillTest.java index 7dc464093ee2..39eaa4af62a1 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/HashPartitionTaskKillTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/HashPartitionTaskKillTest.java @@ -26,7 +26,6 @@ import org.apache.druid.data.input.impl.DimensionsSpec; import org.apache.druid.data.input.impl.LocalInputSource; import org.apache.druid.data.input.impl.ParseSpec; -import org.apache.druid.data.input.impl.StringInputRowParser; import org.apache.druid.data.input.impl.TimestampSpec; import org.apache.druid.indexer.TaskState; import org.apache.druid.indexer.TaskStatus; @@ -276,16 +275,13 @@ private ParallelIndexSupervisorTask createTestTask( ingestionSpec = new ParallelIndexIngestionSpec( new DataSchema( "dataSource", - getObjectMapper().convertValue( - new StringInputRowParser(parseSpec, null), - Map.class - ), + parseSpec.getTimestampSpec(), + parseSpec.getDimensionsSpec(), new AggregatorFactory[]{ new LongSumAggregatorFactory("val", "val") }, granularitySpec, - null, - getObjectMapper() + null ), ioConfig, tuningConfig diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITCombiningFirehoseFactoryIndexTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITCombiningFirehoseFactoryIndexTest.java index c1350f550684..c53d92562ba6 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITCombiningFirehoseFactoryIndexTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITCombiningFirehoseFactoryIndexTest.java @@ -38,7 +38,7 @@ public class ITCombiningFirehoseFactoryIndexTest extends AbstractITBatchIndexTes private static final String INDEX_QUERIES_RESOURCE = "/indexer/wikipedia_index_queries.json"; private static final String INDEX_DATASOURCE = "wikipedia_index_test"; - private static final String COMBINING_INDEX_TASK = "/indexer/wikipedia_combining_firehose_index_task.json"; + private static final String COMBINING_INDEX_TASK = "/indexer/wikipedia_combining_input_source_index_task.json"; private static final String COMBINING_QUERIES_RESOURCE = "/indexer/wikipedia_combining_firehose_index_queries.json"; private static final String COMBINING_INDEX_DATASOURCE = "wikipedia_comb_index_test"; diff --git a/integration-tests/src/test/resources/indexer/wikipedia_combining_firehose_index_task.json b/integration-tests/src/test/resources/indexer/wikipedia_combining_input_source_index_task.json similarity index 70% rename from integration-tests/src/test/resources/indexer/wikipedia_combining_firehose_index_task.json rename to integration-tests/src/test/resources/indexer/wikipedia_combining_input_source_index_task.json index 1e7deffc6b5b..5e88b33a7237 100644 --- a/integration-tests/src/test/resources/indexer/wikipedia_combining_firehose_index_task.json +++ b/integration-tests/src/test/resources/indexer/wikipedia_combining_input_source_index_task.json @@ -3,6 +3,25 @@ "spec": { "dataSchema": { "dataSource": "%%DATASOURCE%%", + "timestampSpec": { + "column": "timestamp" + }, + "dimensionsSpec": { + "dimensions": [ + "page", + {"type": "string", "name": "language", "createBitmapIndex": false}, + "user", + "unpatrolled", + "newPage", + "robot", + "anonymous", + "namespace", + "continent", + "country", + "region", + "city" + ] + }, "metricsSpec": [ { "type": "count", @@ -43,35 +62,11 @@ "segmentGranularity": "DAY", "queryGranularity": "second", "intervals" : [ "2013-08-31/2013-09-02" ] - }, - "parser": { - "parseSpec": { - "format" : "json", - "timestampSpec": { - "column": "timestamp" - }, - "dimensionsSpec": { - "dimensions": [ - "page", - {"type": "string", "name": "language", "createBitmapIndex": false}, - "user", - "unpatrolled", - "newPage", - "robot", - "anonymous", - "namespace", - "continent", - "country", - "region", - "city" - ] - } - } } }, "ioConfig": { "type": "index", - "firehose": { + "inputSource": { "type": "combining", "delegates": [ { @@ -80,11 +75,14 @@ "filter": "wikipedia_combining_index_data.json" }, { - "type": "ingestSegment", + "type": "druid", "dataSource": "%%COMBINING_DATASOURCE%%", "interval": "2013-08-31/2013-09-02" } ] + }, + "inputFormat": { + "format" : "json" } }, "tuningConfig": { diff --git a/integration-tests/src/test/resources/indexer/wikipedia_index_task_with_dimension_spec.json b/integration-tests/src/test/resources/indexer/wikipedia_index_task_with_dimension_spec.json index 1fa8b4eba321..30841e90047a 100644 --- a/integration-tests/src/test/resources/indexer/wikipedia_index_task_with_dimension_spec.json +++ b/integration-tests/src/test/resources/indexer/wikipedia_index_task_with_dimension_spec.json @@ -3,6 +3,25 @@ "spec": { "dataSchema": { "dataSource": "%%DATASOURCE%%", + "timestampSpec": { + "column": "timestamp" + }, + "dimensionsSpec": { + "dimensions": [ + "page", + {"type": "string", "name": "language", "createBitmapIndex": false}, + "user", + "unpatrolled", + "newPage", + "robot", + "anonymous", + "namespace", + "continent", + "country", + "region", + "city" + ] + }, "metricsSpec": [ { "type": "count", @@ -43,38 +62,17 @@ "segmentGranularity": "DAY", "queryGranularity": "DAY", "intervals" : [ "2013-08-31/2013-09-02" ] - }, - "parser": { - "parseSpec": { - "format" : "json", - "timestampSpec": { - "column": "timestamp" - }, - "dimensionsSpec": { - "dimensions": [ - "page", - {"type": "string", "name": "language", "createBitmapIndex": false}, - "user", - "unpatrolled", - "newPage", - "robot", - "anonymous", - "namespace", - "continent", - "country", - "region", - "city" - ] - } - } } }, "ioConfig": { "type": "index", - "firehose": { + "inputSource": { "type": "local", "baseDir": "/resources/data/batch_index/json", "filter": "wikipedia_index_data*" + }, + "inputFormat": { + "format" : "json" } }, "tuningConfig": { diff --git a/integration-tests/src/test/resources/indexer/wikipedia_index_task_with_granularity_spec.json b/integration-tests/src/test/resources/indexer/wikipedia_index_task_with_granularity_spec.json index 544d191d9257..18d1c8d2ced3 100644 --- a/integration-tests/src/test/resources/indexer/wikipedia_index_task_with_granularity_spec.json +++ b/integration-tests/src/test/resources/indexer/wikipedia_index_task_with_granularity_spec.json @@ -3,6 +3,14 @@ "spec": { "dataSchema": { "dataSource": "%%DATASOURCE%%", + "timestampSpec": { + "column": "timestamp" + }, + "dimensionsSpec": { + "dimensions": [ + {"type": "string", "name": "language", "createBitmapIndex": false} + ] + }, "metricsSpec": [ { "type": "count", @@ -39,27 +47,17 @@ "fieldName": "user" } ], - "granularitySpec": %%GRANULARITYSPEC%%, - "parser": { - "parseSpec": { - "format" : "json", - "timestampSpec": { - "column": "timestamp" - }, - "dimensionsSpec": { - "dimensions": [ - {"type": "string", "name": "language", "createBitmapIndex": false} - ] - } - } - } + "granularitySpec": %%GRANULARITYSPEC%% }, "ioConfig": { "type": "index", - "firehose": { + "inputSource": { "type": "local", "baseDir": "/resources/data/batch_index/json", "filter": "wikipedia_index_data*" + }, + "inputFormat": { + "format" : "json" } }, "tuningConfig": { diff --git a/integration-tests/src/test/resources/indexer/wikipedia_index_task_with_transform.json b/integration-tests/src/test/resources/indexer/wikipedia_index_task_with_transform.json index 9f3128fb303e..a34bc9313f1a 100644 --- a/integration-tests/src/test/resources/indexer/wikipedia_index_task_with_transform.json +++ b/integration-tests/src/test/resources/indexer/wikipedia_index_task_with_transform.json @@ -3,6 +3,24 @@ "spec": { "dataSchema": { "dataSource": "%%DATASOURCE%%", + "timestampSpec": { + "column": "timestamp" + }, + "dimensionsSpec": { + "dimensions": [ + "page", + {"type": "string", "name": "language", "createBitmapIndex": false}, + "user", + "unpatrolled", + "robot", + "anonymous", + "namespace", + "continent", + "country", + "region", + "city" + ] + }, "metricsSpec": [ { "type": "count", @@ -49,29 +67,6 @@ "queryGranularity": "second", "intervals" : [ "2013-08-31/2013-09-02" ] }, - "parser": { - "parseSpec": { - "format" : "json", - "timestampSpec": { - "column": "timestamp" - }, - "dimensionsSpec": { - "dimensions": [ - "page", - {"type": "string", "name": "language", "createBitmapIndex": false}, - "user", - "unpatrolled", - "robot", - "anonymous", - "namespace", - "continent", - "country", - "region", - "city" - ] - } - } - }, "transformSpec": { "transforms": [ { @@ -89,10 +84,13 @@ }, "ioConfig": { "type": "index", - "firehose": { + "inputSource": { "type": "local", "baseDir": "/resources/data/batch_index/json", "filter": "wikipedia_index_data*" + }, + "inputFormat": { + "format" : "json" } }, "tuningConfig": { diff --git a/integration-tests/src/test/resources/indexer/wikipedia_index_with_merge_column_limit_task.json b/integration-tests/src/test/resources/indexer/wikipedia_index_with_merge_column_limit_task.json index 35b115c9f191..2ac1d503eedc 100644 --- a/integration-tests/src/test/resources/indexer/wikipedia_index_with_merge_column_limit_task.json +++ b/integration-tests/src/test/resources/indexer/wikipedia_index_with_merge_column_limit_task.json @@ -3,6 +3,25 @@ "spec": { "dataSchema": { "dataSource": "%%DATASOURCE%%", + "timestampSpec": { + "column": "timestamp" + }, + "dimensionsSpec": { + "dimensions": [ + "page", + {"type": "string", "name": "language", "createBitmapIndex": false}, + "user", + "unpatrolled", + "newPage", + "robot", + "anonymous", + "namespace", + "continent", + "country", + "region", + "city" + ] + }, "metricsSpec": [ { "type": "count", @@ -43,38 +62,17 @@ "segmentGranularity": "DAY", "queryGranularity": "second", "intervals" : [ "2013-08-31/2013-09-02" ] - }, - "parser": { - "parseSpec": { - "format" : "json", - "timestampSpec": { - "column": "timestamp" - }, - "dimensionsSpec": { - "dimensions": [ - "page", - {"type": "string", "name": "language", "createBitmapIndex": false}, - "user", - "unpatrolled", - "newPage", - "robot", - "anonymous", - "namespace", - "continent", - "country", - "region", - "city" - ] - } - } } }, "ioConfig": { "type": "index", - "firehose": { + "inputSource": { "type": "local", "baseDir": "/resources/data/batch_index/json", "filter": "wikipedia_index_data*" + }, + "inputFormat": { + "format" : "json" } }, "tuningConfig": { diff --git a/integration-tests/src/test/resources/indexer/wikipedia_merge_index_task.json b/integration-tests/src/test/resources/indexer/wikipedia_merge_index_task.json index 43264a8c6751..9b16e3d338dd 100644 --- a/integration-tests/src/test/resources/indexer/wikipedia_merge_index_task.json +++ b/integration-tests/src/test/resources/indexer/wikipedia_merge_index_task.json @@ -3,6 +3,14 @@ "spec": { "dataSchema": { "dataSource": "%%DATASOURCE%%", + "timestampSpec": { + "column": "timestamp" + }, + "dimensionsSpec": { + "dimensions": [ + "continent" + ] + }, "metricsSpec": [ { "type": "count", @@ -38,27 +46,17 @@ "segmentGranularity": "DAY", "queryGranularity": "DAY", "intervals" : [ "2013-08-31/2013-09-02" ] - }, - "parser": { - "parseSpec": { - "format" : "json", - "timestampSpec": { - "column": "timestamp" - }, - "dimensionsSpec": { - "dimensions": [ - "continent" - ] - } - } } }, "ioConfig": { "type": "index", - "firehose": { + "inputSource": { "type": "local", "baseDir": "/resources/data/batch_index/json", "filter": "wikipedia_index_data*" + }, + "inputFormat": { + "format" : "json" } }, "tuningConfig": { diff --git a/integration-tests/src/test/resources/indexer/wikipedia_merge_reindex_task.json b/integration-tests/src/test/resources/indexer/wikipedia_merge_reindex_task.json index 127461dd117c..a4aa97777163 100644 --- a/integration-tests/src/test/resources/indexer/wikipedia_merge_reindex_task.json +++ b/integration-tests/src/test/resources/indexer/wikipedia_merge_reindex_task.json @@ -3,6 +3,15 @@ "spec": { "dataSchema": { "dataSource": "%%REINDEX_DATASOURCE%%", + "timestampSpec": { + "column": "timestamp", + "format": "iso" + }, + "dimensionsSpec": { + "dimensions": [ + "continent" + ] + }, "metricsSpec": [ { "type": "doubleSum", @@ -34,28 +43,17 @@ "segmentGranularity": "DAY", "queryGranularity": "DAY", "intervals" : [ "2013-08-31/2013-09-01" ] - }, - "parser": { - "parseSpec": { - "format" : "json", - "timestampSpec": { - "column": "timestamp", - "format": "iso" - }, - "dimensionsSpec": { - "dimensions": [ - "continent" - ] - } - } } }, "ioConfig": { "type": "index", - "firehose": { - "type": "ingestSegment", + "inputSource": { + "type": "druid", "dataSource": "%%DATASOURCE%%", "interval": "2013-08-31/2013-09-01" + }, + "inputFormat": { + "format" : "json" } }, "tuningConfig": { diff --git a/integration-tests/src/test/resources/indexer/wikipedia_parallel_index_task.json b/integration-tests/src/test/resources/indexer/wikipedia_parallel_index_task.json index e83b1109da02..faca3a788baf 100644 --- a/integration-tests/src/test/resources/indexer/wikipedia_parallel_index_task.json +++ b/integration-tests/src/test/resources/indexer/wikipedia_parallel_index_task.json @@ -3,6 +3,25 @@ "spec": { "dataSchema": { "dataSource": "%%DATASOURCE%%", + "timestampSpec": { + "column": "timestamp" + }, + "dimensionsSpec": { + "dimensions": [ + "page", + {"type": "string", "name": "language", "createBitmapIndex": false}, + "user", + "unpatrolled", + "newPage", + "robot", + "anonymous", + "namespace", + "continent", + "country", + "region", + "city" + ] + }, "metricsSpec": [ { "type": "count", @@ -28,38 +47,17 @@ "segmentGranularity": "DAY", "queryGranularity": "second", "intervals" : [ "2013-08-31/2013-09-02" ] - }, - "parser": { - "parseSpec": { - "format" : "json", - "timestampSpec": { - "column": "timestamp" - }, - "dimensionsSpec": { - "dimensions": [ - "page", - {"type": "string", "name": "language", "createBitmapIndex": false}, - "user", - "unpatrolled", - "newPage", - "robot", - "anonymous", - "namespace", - "continent", - "country", - "region", - "city" - ] - } - } } }, "ioConfig": { "type": "index_parallel", - "firehose": { + "inputSource": { "type": "local", "baseDir": "/resources/data/batch_index/json", "filter": "wikipedia_index_data*" + }, + "inputFormat": { + "format" : "json" } }, "tuningConfig": { diff --git a/integration-tests/src/test/resources/indexer/wikipedia_parallel_ingest_segment_index_task.json b/integration-tests/src/test/resources/indexer/wikipedia_parallel_ingest_segment_index_task.json index e995a0f299e9..5376fb4957c7 100644 --- a/integration-tests/src/test/resources/indexer/wikipedia_parallel_ingest_segment_index_task.json +++ b/integration-tests/src/test/resources/indexer/wikipedia_parallel_ingest_segment_index_task.json @@ -3,6 +3,15 @@ "spec": { "dataSchema": { "dataSource": "%%REINDEX_DATASOURCE%%", + "timestampSpec": { + "column": "timestamp" + }, + "dimensionsSpec": { + "dimensionExclusions": [ + "robot", + "continent" + ] + }, "metricsSpec": [ { "type": "count", @@ -30,29 +39,18 @@ "intervals": [ "2013-08-31/2013-09-02" ] - }, - "parser": { - "parseSpec": { - "format": "json", - "timestampSpec": { - "column": "timestamp" - }, - "dimensionsSpec": { - "dimensionExclusions": [ - "robot", - "continent" - ] - } - } } }, "ioConfig": { "type": "index_parallel", - "firehose": { - "type": "ingestSegment", + "inputSource": { + "type": "druid", "dataSource": "%%DATASOURCE%%", "interval": "2013-08-31/2013-09-02", "maxInputSegmentBytesPerTask": 1 + }, + "inputFormat": { + "format": "json" } }, "tuningConfig": { diff --git a/integration-tests/src/test/resources/indexer/wikipedia_parallel_reindex_task.json b/integration-tests/src/test/resources/indexer/wikipedia_parallel_reindex_task.json index 9ab153c6862d..e6630c166947 100644 --- a/integration-tests/src/test/resources/indexer/wikipedia_parallel_reindex_task.json +++ b/integration-tests/src/test/resources/indexer/wikipedia_parallel_reindex_task.json @@ -3,6 +3,25 @@ "spec": { "dataSchema": { "dataSource": "%%DATASOURCE%%", + "timestampSpec": { + "column": "timestamp" + }, + "dimensionsSpec": { + "dimensions": [ + "page", + {"type": "string", "name": "language", "createBitmapIndex": false}, + "user", + "unpatrolled", + "newPage", + "robot", + "anonymous", + "namespace", + "continent", + "country", + "region", + "city" + ] + }, "metricsSpec": [ { "type": "count", @@ -27,38 +46,17 @@ "granularitySpec": { "segmentGranularity": "DAY", "queryGranularity": "second" - }, - "parser": { - "parseSpec": { - "format" : "json", - "timestampSpec": { - "column": "timestamp" - }, - "dimensionsSpec": { - "dimensions": [ - "page", - {"type": "string", "name": "language", "createBitmapIndex": false}, - "user", - "unpatrolled", - "newPage", - "robot", - "anonymous", - "namespace", - "continent", - "country", - "region", - "city" - ] - } - } } }, "ioConfig": { "type": "index_parallel", - "firehose": { + "inputsource": { "type": "local", "baseDir": "/resources/data/batch_index/json", "filter": "wikipedia_index_data2*" + }, + "inputFormat": { + "format" : "json" } }, "tuningConfig": { diff --git a/integration-tests/src/test/resources/indexer/wikipedia_reindex_task.json b/integration-tests/src/test/resources/indexer/wikipedia_reindex_task.json index e277a9127f49..b5bc2df46b30 100644 --- a/integration-tests/src/test/resources/indexer/wikipedia_reindex_task.json +++ b/integration-tests/src/test/resources/indexer/wikipedia_reindex_task.json @@ -3,6 +3,13 @@ "spec": { "dataSchema": { "dataSource": "%%REINDEX_DATASOURCE%%", + "timestampSpec": { + "column": "timestamp", + "format": "iso" + }, + "dimensionsSpec": { + "dimensionExclusions" : ["robot", "continent"] + }, "metricsSpec": [ { "type": "doubleSum", @@ -24,26 +31,17 @@ "segmentGranularity": "DAY", "queryGranularity": "second", "intervals" : [ "2013-08-31/2013-09-01" ] - }, - "parser": { - "parseSpec": { - "format" : "json", - "timestampSpec": { - "column": "timestamp", - "format": "iso" - }, - "dimensionsSpec": { - "dimensionExclusions" : ["robot", "continent"] - } - } } }, "ioConfig": { "type": "index", - "firehose": { - "type": "ingestSegment", + "inputSource": { + "type": "druid", "dataSource": "%%DATASOURCE%%", "interval": "2013-08-31/2013-09-01" + }, + "inputFormat": { + "format" : "json" } }, "tuningConfig": { diff --git a/integration-tests/src/test/resources/indexer/wikipedia_reindex_task_with_transforms.json b/integration-tests/src/test/resources/indexer/wikipedia_reindex_task_with_transforms.json index 029b136d441b..076b7643b686 100644 --- a/integration-tests/src/test/resources/indexer/wikipedia_reindex_task_with_transforms.json +++ b/integration-tests/src/test/resources/indexer/wikipedia_reindex_task_with_transforms.json @@ -3,6 +3,24 @@ "spec": { "dataSchema": { "dataSource": "%%REINDEX_DATASOURCE%%", + "timestampSpec": { + "column": "timestamp", + "format": "iso" + }, + "dimensionsSpec": { + "dimensions": [ + {"type": "string", "name": "language", "createBitmapIndex": false}, + "user", + "unpatrolled", + "page", + "newPage", + "anonymous", + "namespace", + "country", + "region", + "city" + ] + }, "metricsSpec": [ { "type": "doubleSum", @@ -40,65 +58,45 @@ "queryGranularity": "second", "intervals" : [ "2013-08-31/2013-09-01" ] }, - "parser": { - "parseSpec": { - "format" : "json", - "timestampSpec": { - "column": "timestamp", - "format": "iso" + "transformSpec": { + "transforms": [ + { + "type": "expression", + "name": "newPage", + "expression": "page" + }, + { + "type": "expression", + "name": "city", + "expression": "concat('city-', city)" }, - "dimensionsSpec": { - "dimensions": [ - {"type": "string", "name": "language", "createBitmapIndex": false}, - "user", - "unpatrolled", - "page", - "newPage", - "anonymous", - "namespace", - "country", - "region", - "city" - ] + { + "type": "expression", + "name": "one-plus-triple-added", + "expression": "\"triple-added\" + 1" }, - "transformSpec": { - "transforms": [ - { - "type": "expression", - "name": "newPage", - "expression": "page" - }, - { - "type": "expression", - "name": "city", - "expression": "concat('city-', city)" - }, - { - "type": "expression", - "name": "one-plus-triple-added", - "expression": "\"triple-added\" + 1" - }, - { - "type": "expression", - "name": "delta", - "expression": "\"delta\" / 2" - }, - { - "type": "expression", - "name": "double-deleted", - "expression": "deleted * 2" - } - ] + { + "type": "expression", + "name": "delta", + "expression": "\"delta\" / 2" + }, + { + "type": "expression", + "name": "double-deleted", + "expression": "deleted * 2" } - } + ] } }, "ioConfig": { "type": "index", - "firehose": { - "type": "ingestSegment", + "inputSource": { + "type": "druid", "dataSource": "%%DATASOURCE%%", "interval": "2013-08-31/2013-09-01" + }, + "inputFormat": { + "format" : "json" } }, "tuningConfig": { diff --git a/integration-tests/src/test/resources/indexer/wikipedia_with_timestamp_index_task.json b/integration-tests/src/test/resources/indexer/wikipedia_with_timestamp_index_task.json index 4f13b70cb725..447e90e9a39f 100644 --- a/integration-tests/src/test/resources/indexer/wikipedia_with_timestamp_index_task.json +++ b/integration-tests/src/test/resources/indexer/wikipedia_with_timestamp_index_task.json @@ -3,6 +3,26 @@ "spec": { "dataSchema": { "dataSource": "%%DATASOURCE%%", + "timestampSpec": { + "column": "timestamp" + }, + "dimensionsSpec": { + "dimensions": [ + "page", + {"type": "string", "name": "language", "createBitmapIndex": false}, + "user", + "unpatrolled", + "newPage", + "robot", + "anonymous", + "namespace", + "continent", + "country", + "region", + "city", + "timestamp" + ] + }, "metricsSpec": [ { "type": "count", @@ -43,39 +63,17 @@ "segmentGranularity": "DAY", "queryGranularity": "second", "intervals" : [ "2013-08-31/2013-09-02" ] - }, - "parser": { - "parseSpec": { - "format" : "json", - "timestampSpec": { - "column": "timestamp" - }, - "dimensionsSpec": { - "dimensions": [ - "page", - {"type": "string", "name": "language", "createBitmapIndex": false}, - "user", - "unpatrolled", - "newPage", - "robot", - "anonymous", - "namespace", - "continent", - "country", - "region", - "city", - "timestamp" - ] - } - } } }, "ioConfig": { "type": "index", - "firehose": { + "inputSource": { "type": "local", "baseDir": "/resources/data/batch_index/json", "filter": "wikipedia_index_data*" + }, + "inputFormat": { + "format" : "json" } }, "tuningConfig": { diff --git a/integration-tests/src/test/resources/indexer/wikiticker_index_task.json b/integration-tests/src/test/resources/indexer/wikiticker_index_task.json index d450c7b9458b..b24e7ee6e59c 100644 --- a/integration-tests/src/test/resources/indexer/wikiticker_index_task.json +++ b/integration-tests/src/test/resources/indexer/wikiticker_index_task.json @@ -3,6 +3,17 @@ "spec": { "dataSchema": { "dataSource": "%%DATASOURCE%%", + "dimensionsSpec": { + "dimensions": [ + "channel", + "page", + "user" + ] + }, + "timestampSpec": { + "format": "auto", + "column": "time" + }, "granularitySpec": { "type": "uniform", "segmentGranularity": "day", @@ -11,23 +22,6 @@ "2015-09-12/2015-09-13" ] }, - "parser": { - "type": "hadoopyString", - "parseSpec": { - "format": "json", - "dimensionsSpec": { - "dimensions": [ - "channel", - "page", - "user" - ] - }, - "timestampSpec": { - "format": "auto", - "column": "time" - } - } - }, "metricsSpec": [ { "name": "count", @@ -52,10 +46,13 @@ }, "ioConfig": { "type": "index", - "firehose": { + "inputSource": { "type": "local", "baseDir": "/shared/wikiticker-it", "filter": "wikiticker-2015-09-12-sampled.json.gz" + }, + "inputFormat": { + "format": "json" } }, "tuningConfig": { diff --git a/server/src/main/java/org/apache/druid/segment/realtime/firehose/InlineFirehose.java b/server/src/main/java/org/apache/druid/segment/realtime/firehose/InlineFirehose.java deleted file mode 100644 index 75897f4d8a5f..000000000000 --- a/server/src/main/java/org/apache/druid/segment/realtime/firehose/InlineFirehose.java +++ /dev/null @@ -1,91 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.segment.realtime.firehose; - -import org.apache.commons.io.IOUtils; -import org.apache.commons.io.LineIterator; -import org.apache.druid.data.input.Firehose; -import org.apache.druid.data.input.InputRow; -import org.apache.druid.data.input.InputRowListPlusRawValues; -import org.apache.druid.data.input.impl.StringInputRowParser; -import org.apache.druid.java.util.common.parsers.ParseException; - -import java.io.ByteArrayInputStream; -import java.io.IOException; -import java.io.InputStream; -import java.nio.charset.Charset; -import java.util.NoSuchElementException; - -/** - * Firehose that produces data from its own spec - */ -public class InlineFirehose implements Firehose -{ - private final StringInputRowParser parser; - private final LineIterator lineIterator; - - InlineFirehose(String data, StringInputRowParser parser) - { - this.parser = parser; - - Charset charset = Charset.forName(parser.getEncoding()); - InputStream stream = new ByteArrayInputStream(data.getBytes(charset)); - lineIterator = IOUtils.lineIterator(stream, charset); - } - - @Override - public boolean hasMore() - { - return lineIterator.hasNext(); - } - - @Override - public InputRow nextRow() - { - return parser.parse(nextRaw()); - } - - private String nextRaw() - { - if (!hasMore()) { - throw new NoSuchElementException(); - } - - return lineIterator.next(); - } - - @Override - public InputRowListPlusRawValues nextRowWithRaw() - { - String raw = nextRaw(); - try { - return InputRowListPlusRawValues.of(parser.parse(raw), parser.parseString(raw)); - } - catch (ParseException e) { - return InputRowListPlusRawValues.of(parser.parseString(raw), e); - } - } - - @Override - public void close() throws IOException - { - lineIterator.close(); - } -} diff --git a/server/src/main/java/org/apache/druid/segment/realtime/firehose/SqlFirehose.java b/server/src/main/java/org/apache/druid/segment/realtime/firehose/SqlFirehose.java deleted file mode 100644 index 02c1dca387b8..000000000000 --- a/server/src/main/java/org/apache/druid/segment/realtime/firehose/SqlFirehose.java +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.segment.realtime.firehose; - -import com.google.common.collect.Iterators; -import org.apache.druid.data.input.Firehose; -import org.apache.druid.data.input.InputRow; -import org.apache.druid.data.input.impl.InputRowParser; -import org.apache.druid.data.input.impl.MapInputRowParser; -import org.apache.druid.data.input.impl.prefetch.JsonIterator; -import org.apache.druid.segment.transform.TransformSpec; -import org.apache.druid.segment.transform.Transformer; - -import javax.annotation.Nullable; -import java.io.Closeable; -import java.io.IOException; -import java.util.Iterator; -import java.util.Map; - -public class SqlFirehose implements Firehose -{ - private final Iterator>> resultIterator; - private final MapInputRowParser parser; - private final Closeable closer; - @Nullable - private JsonIterator> lineIterator = null; - private final Transformer transformer; - - public SqlFirehose( - Iterator>> lineIterators, - InputRowParser parser, - Closeable closer - ) - { - this.resultIterator = lineIterators; - this.parser = new MapInputRowParser(parser.getParseSpec()); - // transformer is created from the original decorated parser (which should always be decorated) - this.transformer = TransformSpec.fromInputRowParser(parser).toTransformer(); - this.closer = closer; - } - - @Override - public boolean hasMore() - { - while ((lineIterator == null || !lineIterator.hasNext()) && resultIterator.hasNext()) { - lineIterator = getNextLineIterator(); - } - - return lineIterator != null && lineIterator.hasNext(); - } - - @Nullable - @Override - public InputRow nextRow() - { - assert lineIterator != null; - final Map mapToParse = lineIterator.next(); - return transformer.transform(Iterators.getOnlyElement(parser.parseBatch(mapToParse).iterator())); - } - - private JsonIterator> getNextLineIterator() - { - if (lineIterator != null) { - lineIterator = null; - } - - return resultIterator.next(); - } - - @Override - public void close() throws IOException - { - if (lineIterator != null) { - lineIterator.close(); - } - closer.close(); - } -} diff --git a/server/src/test/java/org/apache/druid/segment/realtime/firehose/InlineFirehoseTest.java b/server/src/test/java/org/apache/druid/segment/realtime/firehose/InlineFirehoseTest.java deleted file mode 100644 index fc777b1021ab..000000000000 --- a/server/src/test/java/org/apache/druid/segment/realtime/firehose/InlineFirehoseTest.java +++ /dev/null @@ -1,218 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.segment.realtime.firehose; - -import com.google.common.collect.Iterables; -import org.apache.druid.data.input.InputRow; -import org.apache.druid.data.input.InputRowListPlusRawValues; -import org.apache.druid.data.input.impl.CSVParseSpec; -import org.apache.druid.data.input.impl.DimensionsSpec; -import org.apache.druid.data.input.impl.StringInputRowParser; -import org.apache.druid.data.input.impl.TimestampSpec; -import org.junit.Assert; -import org.junit.Test; - -import java.io.IOException; -import java.nio.charset.Charset; -import java.nio.charset.StandardCharsets; -import java.util.Arrays; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.NoSuchElementException; - -@SuppressWarnings("ConstantConditions") -public class InlineFirehoseTest -{ - private static final String DIMENSION_0 = "timestamp"; - private static final String DIMENSION_1 = "value"; - private static final List DIMENSIONS = Arrays.asList(DIMENSION_0, DIMENSION_1); - private static final String DELIMITER = ","; - private static final Charset CHARSET = StandardCharsets.UTF_8; - private static final StringInputRowParser PARSER = new StringInputRowParser( - new CSVParseSpec( - new TimestampSpec( - DIMENSION_0, - "auto", - null - ), - new DimensionsSpec(DimensionsSpec.getDefaultSchemas(DIMENSIONS)), - DELIMITER, - DIMENSIONS, - false, - 0 - ), - CHARSET.name() - ); - private static final String EMPTY = ""; - private static final String TIMESTAMP_0 = "0"; - private static final String VALUE_0 = "a"; - private static final String NOT_EMPTY = TIMESTAMP_0 + DELIMITER + VALUE_0; - private static final String PARSEABLE = NOT_EMPTY; - private static final String NOT_PARSEABLE = VALUE_0 + DELIMITER + TIMESTAMP_0; - private static final String TIMESTAMP_1 = "1"; - private static final String VALUE_1 = "b"; - private static final String LINE_0 = TIMESTAMP_0 + DELIMITER + VALUE_0; - private static final String LINE_1 = TIMESTAMP_1 + DELIMITER + VALUE_1; - private static final String MULTILINE = LINE_0 + "\n" + LINE_1; - - @Test - public void testHasMoreEmpty() - { - InlineFirehose target = create(EMPTY); - Assert.assertFalse(target.hasMore()); - } - - @Test - public void testHasMoreNotEmpty() - { - InlineFirehose target = create(NOT_EMPTY); - Assert.assertTrue(target.hasMore()); - } - - @Test(expected = NoSuchElementException.class) - public void testNextRowEmpty() - { - InlineFirehose target = create(EMPTY); - target.nextRow(); - } - - @Test - public void testNextRowNotEmpty() - { - InlineFirehose target = create(NOT_EMPTY); - InputRow row = target.nextRow(); - assertRowValue(VALUE_0, row); - } - - @Test(expected = NoSuchElementException.class) - public void testNextRowWithRawEmpty() - { - InlineFirehose target = create(EMPTY); - target.nextRowWithRaw(); - } - - @Test - public void testNextRowWithRawParseable() - { - final String data = PARSEABLE; - InlineFirehose target = create(data); - InputRowListPlusRawValues rowPlusRaw = target.nextRowWithRaw(); - - InputRow row = Iterables.getOnlyElement(rowPlusRaw.getInputRows()); - assertRowValue(VALUE_0, row); - - Map raw = rowPlusRaw.getRawValues(); - Map expected = new HashMap<>(); - expected.put("timestamp", TIMESTAMP_0); - expected.put("value", VALUE_0); - Assert.assertEquals(expected, raw); - - Assert.assertNull(rowPlusRaw.getParseException()); - } - - @Test - public void testNextRowWithRawNotParseable() - { - final String data = NOT_PARSEABLE; - InlineFirehose target = create(data); - InputRowListPlusRawValues rowPlusRaw = target.nextRowWithRaw(); - - Assert.assertNull(rowPlusRaw.getInputRows()); - - Map raw = rowPlusRaw.getRawValues(); - Map expected = new HashMap<>(); - expected.put("timestamp", VALUE_0); - expected.put("value", TIMESTAMP_0); - Assert.assertEquals(expected, raw); - - - Assert.assertNotNull(rowPlusRaw.getParseException()); - } - - @Test - public void testCloseOpen() throws IOException - { - InlineFirehose target = create(NOT_EMPTY); - target.close(); - try { - target.nextRow(); - Assert.fail("Should not be able to read from closed firehose"); - } - catch (NoSuchElementException ignored) { - } - } - - @Test - public void testCloseNotOpen() - { - InlineFirehose target = create(NOT_EMPTY); - try { - target.close(); - } - catch (IOException e) { - Assert.fail("Should be able to close an opened firehose"); - } - try { - target.close(); - } - catch (IOException e) { - Assert.fail("Should be able to close a closed firehose"); - } - } - - @Test - public void testMultiline() - { - InlineFirehose target = create(MULTILINE); - - // First line - Assert.assertTrue(target.hasMore()); - InputRow row0 = target.nextRow(); - assertRowValue(VALUE_0, row0); - - // Second line - InputRowListPlusRawValues rowPlusRaw = target.nextRowWithRaw(); - assertRowValue(VALUE_1, Iterables.getOnlyElement(rowPlusRaw.getInputRows())); - Map raw = rowPlusRaw.getRawValues(); - Map expected = new HashMap<>(); - expected.put("timestamp", TIMESTAMP_1); - expected.put("value", VALUE_1); - Assert.assertEquals(expected, raw); - Assert.assertNull(rowPlusRaw.getParseException()); - - Assert.assertFalse(target.hasMore()); - } - - private static InlineFirehose create(String data) - { - return new InlineFirehose(data, PARSER); - } - - private static void assertRowValue(String expected, InputRow row) - { - Assert.assertNotNull(row); - List values = row.getDimension(DIMENSION_1); - Assert.assertNotNull(values); - Assert.assertEquals(1, values.size()); - Assert.assertEquals(expected, values.get(0)); - } -} - diff --git a/server/src/test/java/org/apache/druid/segment/realtime/firehose/SqlFirehoseTest.java b/server/src/test/java/org/apache/druid/segment/realtime/firehose/SqlFirehoseTest.java deleted file mode 100644 index 026b442881f7..000000000000 --- a/server/src/test/java/org/apache/druid/segment/realtime/firehose/SqlFirehoseTest.java +++ /dev/null @@ -1,269 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.segment.realtime.firehose; - -import com.fasterxml.jackson.core.JsonGenerator; -import com.fasterxml.jackson.core.type.TypeReference; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.fasterxml.jackson.databind.SerializerProvider; -import com.fasterxml.jackson.dataformat.smile.SmileFactory; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableMap; -import org.apache.druid.data.input.InputRow; -import org.apache.druid.data.input.impl.DimensionsSpec; -import org.apache.druid.data.input.impl.InputRowParser; -import org.apache.druid.data.input.impl.MapInputRowParser; -import org.apache.druid.data.input.impl.StringInputRowParser; -import org.apache.druid.data.input.impl.TimeAndDimsParseSpec; -import org.apache.druid.data.input.impl.TimestampSpec; -import org.apache.druid.data.input.impl.prefetch.JsonIterator; -import org.apache.druid.java.util.common.FileUtils; -import org.apache.druid.java.util.common.jackson.JacksonUtils; -import org.apache.druid.math.expr.ExprMacroTable; -import org.apache.druid.segment.transform.ExpressionTransform; -import org.apache.druid.segment.transform.TransformSpec; -import org.apache.druid.segment.transform.TransformingStringInputRowParser; -import org.junit.After; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; - -import java.io.Closeable; -import java.io.File; -import java.io.FileInputStream; -import java.io.FileOutputStream; -import java.io.IOException; -import java.nio.charset.Charset; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; -import java.util.stream.Collectors; - -public class SqlFirehoseTest -{ - private static final TypeReference> TYPE_REF = new TypeReference>() - { - }; - private static File TEST_DIR; - private List> inputs; - private List fileList; - private InputRowParser parser; - private ObjectMapper objectMapper; - - - @Before - public void setup() throws IOException - { - TEST_DIR = File.createTempFile(SqlFirehose.class.getSimpleName(), "testDir"); - org.apache.commons.io.FileUtils.forceDelete(TEST_DIR); - FileUtils.mkdirp(TEST_DIR); - - final List> inputTexts = ImmutableList.of( - ImmutableMap.of("x", "foostring1", "timestamp", 2000), - ImmutableMap.of("x", "foostring2", "timestamp", 2000) - ); - List testFile = new ArrayList<>(); - this.objectMapper = new ObjectMapper(new SmileFactory()); - int i = 0; - for (Map m : inputTexts) { - File file = new File(TEST_DIR, "test_" + i++); - try (FileOutputStream fos = new FileOutputStream(file)) { - final JsonGenerator jg = objectMapper.getFactory().createGenerator(fos); - final SerializerProvider serializers = objectMapper.getSerializerProviderInstance(); - jg.writeStartArray(); - JacksonUtils.writeObjectUsingSerializerProvider(jg, serializers, m); - jg.writeEndArray(); - jg.close(); - testFile.add(new FileInputStream(file)); - } - } - - this.fileList = testFile; - parser = TransformSpec.NONE.decorate( - new MapInputRowParser( - new TimeAndDimsParseSpec( - new TimestampSpec("timestamp", "auto", null), - new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("x"))) - ) - ) - ); - - this.inputs = inputTexts; - } - - @Test - public void testFirehose() throws Exception - { - final TestCloseable closeable = new TestCloseable(); - List expectedResults = new ArrayList<>(); - for (Map map : inputs) { - expectedResults.add(map.get("x")); - } - final List>> lineIterators = - fileList.stream() - .map(s -> new JsonIterator>(TYPE_REF, s, closeable, objectMapper)) - .collect(Collectors.toList()); - - try (final SqlFirehose firehose = new SqlFirehose(lineIterators.iterator(), parser, closeable)) { - final List results = new ArrayList<>(); - - while (firehose.hasMore()) { - final InputRow inputRow = firehose.nextRow(); - if (inputRow == null) { - results.add(null); - } else { - results.add(inputRow.getDimension("x").get(0)); - } - } - - Assert.assertEquals(expectedResults, results); - } - } - - @Test - public void testFirehoseStringParser() throws Exception - { - final TestCloseable closeable = new TestCloseable(); - List expectedResults = new ArrayList<>(); - for (Map map : inputs) { - expectedResults.add(map.get("x")); - } - - final List>> lineIterators = - fileList.stream() - .map(s -> new JsonIterator>(TYPE_REF, s, closeable, objectMapper)) - .collect(Collectors.toList()); - - final InputRowParser stringParser = TransformSpec.NONE.decorate( - new StringInputRowParser( - new TimeAndDimsParseSpec( - new TimestampSpec("timestamp", "auto", null), - new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("x"))) - ), - Charset.defaultCharset().name() - ) - ); - try (final SqlFirehose firehose = new SqlFirehose(lineIterators.iterator(), stringParser, closeable)) { - final List results = new ArrayList<>(); - - while (firehose.hasMore()) { - final InputRow inputRow = firehose.nextRow(); - if (inputRow == null) { - results.add(null); - } else { - results.add(inputRow.getDimension("x").get(0)); - } - } - - Assert.assertEquals(expectedResults, results); - } - } - - @Test - public void testFirehoseTransformingParser() throws Exception - { - final TestCloseable closeable = new TestCloseable(); - List expectedResults = new ArrayList<>(); - for (Map map : inputs) { - expectedResults.add(map.get("x") + "foo"); - } - - final List>> lineIterators = - fileList.stream() - .map(s -> new JsonIterator>(TYPE_REF, s, closeable, objectMapper)) - .collect(Collectors.toList()); - - final InputRowParser stringParser = new TransformingStringInputRowParser( - new TimeAndDimsParseSpec( - new TimestampSpec("timestamp", "auto", null), - new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("x"))) - ), - Charset.defaultCharset().name(), - new TransformSpec( - null, - ImmutableList.of( - new ExpressionTransform("xfoo", "concat(x,'foo')", ExprMacroTable.nil()) - ) - ) - ); - - try (final SqlFirehose firehose = new SqlFirehose(lineIterators.iterator(), stringParser, closeable)) { - final List results = new ArrayList<>(); - - while (firehose.hasMore()) { - final InputRow inputRow = firehose.nextRow(); - if (inputRow == null) { - results.add(null); - } else { - results.add(inputRow.getDimension("xfoo").get(0)); - } - } - - Assert.assertEquals(expectedResults, results); - } - } - - @Test - public void testClose() throws IOException - { - File file = File.createTempFile("test", "", TEST_DIR); - final TestCloseable closeable = new TestCloseable(); - try (FileOutputStream fos = new FileOutputStream(file)) { - final JsonGenerator jg = objectMapper.getFactory().createGenerator(fos); - jg.writeStartArray(); - jg.writeEndArray(); - jg.close(); - } - - final JsonIterator> jsonIterator = new JsonIterator<>( - TYPE_REF, - new FileInputStream(file), - closeable, - objectMapper - ); - - final SqlFirehose firehose = new SqlFirehose( - ImmutableList.of(jsonIterator).iterator(), - parser, - closeable - ); - firehose.hasMore(); // initialize lineIterator - firehose.close(); - Assert.assertTrue(closeable.closed); - } - - @After - public void teardown() throws IOException - { - org.apache.commons.io.FileUtils.forceDelete(TEST_DIR); - } - - private static final class TestCloseable implements Closeable - { - private boolean closed; - - @Override - public void close() - { - closed = true; - } - } -} - From 67825958c131f4477e9a84b50708f638439fd338 Mon Sep 17 00:00:00 2001 From: "tejaswini.bandlamudi" Date: Thu, 11 Aug 2022 18:43:44 +0530 Subject: [PATCH 06/33] improves test coverage --- .../indexing/kafka/KafkaSamplerSpecTest.java | 88 ++++- .../kinesis/KinesisSamplerSpecTest.java | 116 +++++- .../SeekableStreamSamplerSpec.java | 8 +- .../ParallelIndexSupervisorTaskTest.java | 81 ++++ .../SeekableStreamSamplerSpecTest.java | 348 ++++++++++++++++++ .../segment/transform/TransformSpec.java | 15 - 6 files changed, 622 insertions(+), 34 deletions(-) create mode 100644 indexing-service/src/test/java/org/apache/druid/indexing/seekablestream/SeekableStreamSamplerSpecTest.java diff --git a/extensions-core/kafka-indexing-service/src/test/java/org/apache/druid/indexing/kafka/KafkaSamplerSpecTest.java b/extensions-core/kafka-indexing-service/src/test/java/org/apache/druid/indexing/kafka/KafkaSamplerSpecTest.java index 92ab899bbf55..40bbe08ca441 100644 --- a/extensions-core/kafka-indexing-service/src/test/java/org/apache/druid/indexing/kafka/KafkaSamplerSpecTest.java +++ b/extensions-core/kafka-indexing-service/src/test/java/org/apache/druid/indexing/kafka/KafkaSamplerSpecTest.java @@ -24,11 +24,15 @@ import com.google.common.collect.ImmutableMap; import org.apache.curator.test.TestingCluster; import org.apache.druid.client.indexing.SamplerResponse; +import org.apache.druid.client.indexing.SamplerSpec; import org.apache.druid.data.input.impl.DimensionsSpec; import org.apache.druid.data.input.impl.FloatDimensionSchema; +import org.apache.druid.data.input.impl.InputRowParser; +import org.apache.druid.data.input.impl.JSONParseSpec; import org.apache.druid.data.input.impl.JsonInputFormat; import org.apache.druid.data.input.impl.LongDimensionSchema; import org.apache.druid.data.input.impl.StringDimensionSchema; +import org.apache.druid.data.input.impl.StringInputRowParser; import org.apache.druid.data.input.impl.TimestampSpec; import org.apache.druid.indexing.kafka.supervisor.KafkaSupervisorIOConfig; import org.apache.druid.indexing.kafka.supervisor.KafkaSupervisorSpec; @@ -37,6 +41,7 @@ import org.apache.druid.indexing.overlord.sampler.SamplerConfig; import org.apache.druid.indexing.overlord.sampler.SamplerException; import org.apache.druid.indexing.overlord.sampler.SamplerTestUtils; +import org.apache.druid.jackson.DefaultObjectMapper; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.java.util.common.granularity.Granularities; import org.apache.druid.java.util.common.parsers.JSONPathSpec; @@ -56,9 +61,11 @@ import org.junit.Test; import org.junit.rules.ExpectedException; +import java.io.IOException; import java.util.Arrays; import java.util.Iterator; import java.util.List; +import java.util.Map; public class KafkaSamplerSpecTest extends InitializedNullHandlingTest { @@ -165,6 +172,85 @@ public void testSample() OBJECT_MAPPER ); + runSamplerAndCompareResponse(samplerSpec, true); + } + + @Test + public void testWithInputRowParser() throws IOException + { + insertData(generateRecords(TOPIC)); + + ObjectMapper objectMapper = new DefaultObjectMapper(); + TimestampSpec timestampSpec = new TimestampSpec("timestamp", "iso", null); + DimensionsSpec dimensionsSpec = new DimensionsSpec( + Arrays.asList( + new StringDimensionSchema("dim1"), + new StringDimensionSchema("dim1t"), + new StringDimensionSchema("dim2"), + new LongDimensionSchema("dimLong"), + new FloatDimensionSchema("dimFloat") + ) + ); + InputRowParser parser = new StringInputRowParser(new JSONParseSpec(timestampSpec, dimensionsSpec, JSONPathSpec.DEFAULT, null, null), "UTF8"); + + DataSchema dataSchema = new DataSchema( + "test_ds", + objectMapper.readValue(objectMapper.writeValueAsBytes(parser), Map.class), + new AggregatorFactory[]{ + new DoubleSumAggregatorFactory("met1sum", "met1"), + new CountAggregatorFactory("rows") + }, + new UniformGranularitySpec(Granularities.DAY, Granularities.NONE, null), + null, + objectMapper + ); + + KafkaSupervisorSpec supervisorSpec = new KafkaSupervisorSpec( + null, + dataSchema, + null, + new KafkaSupervisorIOConfig( + TOPIC, + null, + null, + null, + null, + kafkaServer.consumerProperties(), + null, + null, + null, + null, + true, + null, + null, + null, + null + ), + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null + ); + + KafkaSamplerSpec samplerSpec = new KafkaSamplerSpec( + supervisorSpec, + new SamplerConfig(5, null), + new InputSourceSampler(), + OBJECT_MAPPER + ); + + runSamplerAndCompareResponse(samplerSpec, false); + } + + private static void runSamplerAndCompareResponse(SamplerSpec samplerSpec, boolean useInputFormat) + { SamplerResponse response = samplerSpec.sample(); Assert.assertEquals(5, response.getNumRowsRead()); @@ -256,7 +342,7 @@ public void testSample() null, null, true, - "Unable to parse row [unparseable] into JSON" + "Unable to parse row [unparseable]" + (useInputFormat ? " into JSON" : "") ), it.next()); Assert.assertFalse(it.hasNext()); diff --git a/extensions-core/kinesis-indexing-service/src/test/java/org/apache/druid/indexing/kinesis/KinesisSamplerSpecTest.java b/extensions-core/kinesis-indexing-service/src/test/java/org/apache/druid/indexing/kinesis/KinesisSamplerSpecTest.java index 918b25d49004..2f9d381120c2 100644 --- a/extensions-core/kinesis-indexing-service/src/test/java/org/apache/druid/indexing/kinesis/KinesisSamplerSpecTest.java +++ b/extensions-core/kinesis-indexing-service/src/test/java/org/apache/druid/indexing/kinesis/KinesisSamplerSpecTest.java @@ -24,14 +24,18 @@ import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import org.apache.druid.client.indexing.SamplerResponse; +import org.apache.druid.client.indexing.SamplerSpec; import org.apache.druid.common.aws.AWSCredentialsConfig; import org.apache.druid.common.config.NullHandling; import org.apache.druid.data.input.impl.ByteEntity; import org.apache.druid.data.input.impl.DimensionsSpec; import org.apache.druid.data.input.impl.FloatDimensionSchema; +import org.apache.druid.data.input.impl.InputRowParser; +import org.apache.druid.data.input.impl.JSONParseSpec; import org.apache.druid.data.input.impl.JsonInputFormat; import org.apache.druid.data.input.impl.LongDimensionSchema; import org.apache.druid.data.input.impl.StringDimensionSchema; +import org.apache.druid.data.input.impl.StringInputRowParser; import org.apache.druid.data.input.impl.TimestampSpec; import org.apache.druid.indexing.kinesis.supervisor.KinesisSupervisorIOConfig; import org.apache.druid.indexing.kinesis.supervisor.KinesisSupervisorSpec; @@ -40,6 +44,7 @@ import org.apache.druid.indexing.overlord.sampler.SamplerTestUtils; import org.apache.druid.indexing.seekablestream.common.OrderedPartitionableRecord; import org.apache.druid.indexing.seekablestream.common.StreamPartition; +import org.apache.druid.jackson.DefaultObjectMapper; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.java.util.common.granularity.Granularities; import org.apache.druid.java.util.common.parsers.JSONPathSpec; @@ -53,10 +58,12 @@ import org.junit.Assert; import org.junit.Test; +import java.io.IOException; import java.util.Arrays; import java.util.Collections; import java.util.Iterator; import java.util.List; +import java.util.Map; public class KinesisSamplerSpecTest extends EasyMockSupport { @@ -111,30 +118,93 @@ private static List> gene } @Test(timeout = 10_000L) - public void testSample() throws Exception + public void testSample() throws InterruptedException { - EasyMock.expect(recordSupplier.getPartitionIds(STREAM)).andReturn(ImmutableSet.of(SHARD_ID)).once(); - - recordSupplier.assign(ImmutableSet.of(StreamPartition.of(STREAM, SHARD_ID))); - EasyMock.expectLastCall().once(); + KinesisSupervisorSpec supervisorSpec = new KinesisSupervisorSpec( + null, + DATA_SCHEMA, + null, + new KinesisSupervisorIOConfig( + STREAM, + new JsonInputFormat(new JSONPathSpec(true, ImmutableList.of()), ImmutableMap.of(), false), + null, + null, + null, + null, + null, + null, + null, + true, + null, + null, + null, + null, + null, + null, + null, + null, + null, + false + ), + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null + ); - recordSupplier.seekToEarliest(ImmutableSet.of(StreamPartition.of(STREAM, SHARD_ID))); - EasyMock.expectLastCall().once(); + KinesisSamplerSpec samplerSpec = new TestableKinesisSamplerSpec( + supervisorSpec, + new SamplerConfig(5, null), + new InputSourceSampler(), + null + ); - EasyMock.expect(recordSupplier.poll(EasyMock.anyLong())).andReturn(generateRecords(STREAM)).once(); + runSamplerAndCompareResponse(samplerSpec, true); + } - recordSupplier.close(); - EasyMock.expectLastCall().once(); + @Test + public void testWithInputRowParser() throws IOException + { + ObjectMapper objectMapper = new DefaultObjectMapper(); + TimestampSpec timestampSpec = new TimestampSpec("timestamp", "iso", null); + DimensionsSpec dimensionsSpec = new DimensionsSpec( + Arrays.asList( + new StringDimensionSchema("dim1"), + new StringDimensionSchema("dim1t"), + new StringDimensionSchema("dim2"), + new LongDimensionSchema("dimLong"), + new FloatDimensionSchema("dimFloat") + ) + ); + InputRowParser parser = new StringInputRowParser(new JSONParseSpec(timestampSpec, dimensionsSpec, JSONPathSpec.DEFAULT, null, null), "UTF8"); - replayAll(); + DataSchema dataSchema = new DataSchema( + "test_ds", + objectMapper.readValue(objectMapper.writeValueAsBytes(parser), Map.class), + new AggregatorFactory[]{ + new DoubleSumAggregatorFactory("met1sum", "met1"), + new CountAggregatorFactory("rows") + }, + new UniformGranularitySpec(Granularities.DAY, Granularities.NONE, null), + null, + objectMapper + ); KinesisSupervisorSpec supervisorSpec = new KinesisSupervisorSpec( null, - DATA_SCHEMA, + dataSchema, null, new KinesisSupervisorIOConfig( STREAM, - new JsonInputFormat(new JSONPathSpec(true, ImmutableList.of()), ImmutableMap.of(), false), + null, null, null, null, @@ -174,6 +244,24 @@ public void testSample() throws Exception new InputSourceSampler(), null ); + } + + private void runSamplerAndCompareResponse(SamplerSpec samplerSpec, boolean useInputFormat) throws InterruptedException + { + EasyMock.expect(recordSupplier.getPartitionIds(STREAM)).andReturn(ImmutableSet.of(SHARD_ID)).once(); + + recordSupplier.assign(ImmutableSet.of(StreamPartition.of(STREAM, SHARD_ID))); + EasyMock.expectLastCall().once(); + + recordSupplier.seekToEarliest(ImmutableSet.of(StreamPartition.of(STREAM, SHARD_ID))); + EasyMock.expectLastCall().once(); + + EasyMock.expect(recordSupplier.poll(EasyMock.anyLong())).andReturn(generateRecords(STREAM)).once(); + + recordSupplier.close(); + EasyMock.expectLastCall().once(); + + replayAll(); SamplerResponse response = samplerSpec.sample(); @@ -268,7 +356,7 @@ public void testSample() throws Exception null, null, true, - "Unable to parse row [unparseable] into JSON" + "Unable to parse row [unparseable]" + (useInputFormat ? " into JSON" : "") ), it.next()); Assert.assertFalse(it.hasNext()); diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/SeekableStreamSamplerSpec.java b/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/SeekableStreamSamplerSpec.java index a815caaa66c0..894e66d338dd 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/SeekableStreamSamplerSpec.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/SeekableStreamSamplerSpec.java @@ -138,13 +138,12 @@ public boolean isSplittable() @Override public Stream createSplits(InputFormat inputFormat, @Nullable SplitHintSpec splitHintSpec) - throws IOException { throw new UnsupportedOperationException(); } @Override - public int estimateNumSplits(InputFormat inputFormat, @Nullable SplitHintSpec splitHintSpec) throws IOException + public int estimateNumSplits(InputFormat inputFormat, @Nullable SplitHintSpec splitHintSpec) { throw new UnsupportedOperationException(); } @@ -189,7 +188,7 @@ public SeekableStreamSamplerInputSourceReader(InputRowParser parser) } @Override - public CloseableIterator read() throws IOException + public CloseableIterator read() { return new CloseableIterator() { @@ -215,7 +214,7 @@ public void close() throws IOException } @Override - public CloseableIterator sample() throws IOException + public CloseableIterator sample() { return new CloseableIterator() { @@ -243,6 +242,7 @@ public InputRowListPlusRawValues next() } try { + bb.position(0); final List rows = parser.parseBatch(bb); return InputRowListPlusRawValues.of(rows.isEmpty() ? null : rows, rawColumns); } diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTaskTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTaskTest.java index 212914556560..e6ebf92043df 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTaskTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTaskTest.java @@ -27,11 +27,15 @@ import org.apache.druid.data.input.InputSource; import org.apache.druid.data.input.impl.DimensionsSpec; import org.apache.druid.data.input.impl.InlineInputSource; +import org.apache.druid.data.input.impl.JSONParseSpec; import org.apache.druid.data.input.impl.JsonInputFormat; +import org.apache.druid.data.input.impl.StringInputRowParser; import org.apache.druid.data.input.impl.TimestampSpec; import org.apache.druid.indexer.partitions.HashedPartitionsSpec; import org.apache.druid.indexer.partitions.PartitionsSpec; import org.apache.druid.indexer.partitions.SingleDimensionPartitionsSpec; +import org.apache.druid.jackson.DefaultObjectMapper; +import org.apache.druid.java.util.common.IAE; import org.apache.druid.java.util.common.Intervals; import org.apache.druid.java.util.http.client.response.StringFullResponseHolder; import org.apache.druid.rpc.HttpResponseException; @@ -291,6 +295,83 @@ public void testFailToConstructWhenBothAppendToExistingAndForceGuaranteedRollupA null ); } + + @Test + public void testFailToConstructWhenBothInputSourceAndParserAreSet() + { + final ObjectMapper mapper = new DefaultObjectMapper(); + final ParallelIndexIOConfig ioConfig = new ParallelIndexIOConfig( + null, + new InlineInputSource("test"), + null, + false, + null + ); + final ParallelIndexTuningConfig tuningConfig = new ParallelIndexTuningConfig( + null, + null, + null, + 10, + 1000L, + null, + null, + null, + null, + new HashedPartitionsSpec(null, 10, null), + new IndexSpec( + new RoaringBitmapSerdeFactory(true), + CompressionStrategy.UNCOMPRESSED, + CompressionStrategy.LZF, + LongEncodingStrategy.LONGS + ), + new IndexSpec(), + 1, + true, + true, + 10000L, + OffHeapMemorySegmentWriteOutMediumFactory.instance(), + null, + 10, + 100, + 20L, + new Duration(3600), + 128, + null, + null, + false, + null, + null, + null, + null, + null + ); + + expectedException.expect(IAE.class); + expectedException.expectMessage("Cannot use parser and inputSource together. Try using inputFormat instead of parser."); + new ParallelIndexIngestionSpec( + new DataSchema( + "datasource", + mapper.convertValue( + new StringInputRowParser( + new JSONParseSpec( + new TimestampSpec(null, null, null), + DimensionsSpec.EMPTY, + null, + null, + null + ) + ), + Map.class + ), + null, + null, + null, + mapper + ), + ioConfig, + tuningConfig + ); + } } public static class StaticUtilsTest diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/seekablestream/SeekableStreamSamplerSpecTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/seekablestream/SeekableStreamSamplerSpecTest.java new file mode 100644 index 000000000000..b5e06d81e491 --- /dev/null +++ b/indexing-service/src/test/java/org/apache/druid/indexing/seekablestream/SeekableStreamSamplerSpecTest.java @@ -0,0 +1,348 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.indexing.seekablestream; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSet; +import org.apache.druid.client.indexing.SamplerResponse; +import org.apache.druid.common.config.NullHandling; +import org.apache.druid.data.input.InputFormat; +import org.apache.druid.data.input.impl.ByteEntity; +import org.apache.druid.data.input.impl.DimensionsSpec; +import org.apache.druid.data.input.impl.FloatDimensionSchema; +import org.apache.druid.data.input.impl.JSONParseSpec; +import org.apache.druid.data.input.impl.LongDimensionSchema; +import org.apache.druid.data.input.impl.StringDimensionSchema; +import org.apache.druid.data.input.impl.StringInputRowParser; +import org.apache.druid.data.input.impl.TimestampSpec; +import org.apache.druid.indexing.overlord.sampler.InputSourceSampler; +import org.apache.druid.indexing.overlord.sampler.SamplerConfig; +import org.apache.druid.indexing.overlord.sampler.SamplerTestUtils; +import org.apache.druid.indexing.seekablestream.common.OrderedPartitionableRecord; +import org.apache.druid.indexing.seekablestream.common.RecordSupplier; +import org.apache.druid.indexing.seekablestream.common.StreamPartition; +import org.apache.druid.indexing.seekablestream.supervisor.SeekableStreamSupervisorIOConfig; +import org.apache.druid.indexing.seekablestream.supervisor.SeekableStreamSupervisorSpec; +import org.apache.druid.indexing.seekablestream.supervisor.autoscaler.AutoScalerConfig; +import org.apache.druid.jackson.DefaultObjectMapper; +import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.java.util.common.granularity.Granularities; +import org.apache.druid.java.util.common.parsers.JSONPathSpec; +import org.apache.druid.query.aggregation.AggregatorFactory; +import org.apache.druid.query.aggregation.CountAggregatorFactory; +import org.apache.druid.query.aggregation.DoubleSumAggregatorFactory; +import org.apache.druid.segment.indexing.DataSchema; +import org.apache.druid.segment.indexing.granularity.UniformGranularitySpec; +import org.easymock.EasyMock; +import org.easymock.EasyMockSupport; +import org.joda.time.DateTime; +import org.joda.time.Period; +import org.junit.Assert; +import org.junit.Test; + +import javax.annotation.Nullable; +import java.util.Arrays; +import java.util.Collections; +import java.util.Iterator; +import java.util.List; +import java.util.Map; + +public class SeekableStreamSamplerSpecTest extends EasyMockSupport +{ + private static final ObjectMapper OBJECT_MAPPER = new DefaultObjectMapper(); + private static final String STREAM = "sampling"; + private static final String SHARD_ID = "1"; + private static final DataSchema DATA_SCHEMA = new DataSchema( + "test_ds", + OBJECT_MAPPER.convertValue( + new StringInputRowParser( + new JSONParseSpec( + new TimestampSpec("timestamp", "iso", null), + new DimensionsSpec( + Arrays.asList( + new StringDimensionSchema("dim1"), + new StringDimensionSchema("dim1t"), + new StringDimensionSchema("dim2"), + new LongDimensionSchema("dimLong"), + new FloatDimensionSchema("dimFloat") + ) + ), + new JSONPathSpec(true, ImmutableList.of()), + ImmutableMap.of(), + false + ) + ), + Map.class + ), + new AggregatorFactory[]{ + new DoubleSumAggregatorFactory("met1sum", "met1"), + new CountAggregatorFactory("rows") + }, + new UniformGranularitySpec(Granularities.DAY, Granularities.NONE, null), + null, + OBJECT_MAPPER + ); + + private final SeekableStreamSupervisorSpec supervisorSpec = mock(SeekableStreamSupervisorSpec.class); + private final SeekableStreamSupervisorIOConfig supervisorIOConfig = new TestableSeekableStreamSupervisorIOConfig( + STREAM, + null, + null, + null, + null, + null, + null, + true, + null, + null, + null, + null, + null + ); + + static { + NullHandling.initializeForTests(); + } + + private final RecordSupplier recordSupplier = mock(RecordSupplier.class); + + private static List> generateRecords(String stream) + { + return ImmutableList.of( + new OrderedPartitionableRecord<>(stream, "1", "0", jb("2008", "a", "y", "10", "20.0", "1.0")), + new OrderedPartitionableRecord<>(stream, "1", "1", jb("2009", "b", "y", "10", "20.0", "1.0")), + new OrderedPartitionableRecord<>(stream, "1", "2", jb("2010", "c", "y", "10", "20.0", "1.0")), + new OrderedPartitionableRecord<>( + stream, + "1", + "5", + jb("246140482-04-24T15:36:27.903Z", "x", "z", "10", "20.0", "1.0") + ), + new OrderedPartitionableRecord<>( + stream, + "1", + "6", + Collections.singletonList(new ByteEntity(StringUtils.toUtf8("unparseable"))) + ), + new OrderedPartitionableRecord<>(stream, "1", "8", Collections.singletonList(new ByteEntity(StringUtils.toUtf8("{}")))) + ); + } + + @Test(timeout = 10_000L) + public void testSample() throws Exception + { + EasyMock.expect(recordSupplier.getPartitionIds(STREAM)).andReturn(ImmutableSet.of(SHARD_ID)).once(); + EasyMock.expect(supervisorSpec.getDataSchema()).andReturn(DATA_SCHEMA).once(); + EasyMock.expect(supervisorSpec.getIoConfig()).andReturn(supervisorIOConfig).once(); + EasyMock.expect(supervisorSpec.getTuningConfig()).andReturn(null).once(); + + recordSupplier.assign(ImmutableSet.of(StreamPartition.of(STREAM, SHARD_ID))); + EasyMock.expectLastCall().once(); + + recordSupplier.seekToEarliest(ImmutableSet.of(StreamPartition.of(STREAM, SHARD_ID))); + EasyMock.expectLastCall().once(); + + EasyMock.expect(recordSupplier.poll(EasyMock.anyLong())).andReturn(generateRecords(STREAM)).once(); + + recordSupplier.close(); + EasyMock.expectLastCall().once(); + + replayAll(); + + SeekableStreamSamplerSpec samplerSpec = new TestableSeekableStreamSamplerSpec( + supervisorSpec, + new SamplerConfig(5, null), + new InputSourceSampler() + ); + + SamplerResponse response = samplerSpec.sample(); + + verifyAll(); + + Assert.assertEquals(5, response.getNumRowsRead()); + Assert.assertEquals(3, response.getNumRowsIndexed()); + Assert.assertEquals(5, response.getData().size()); + + Iterator it = response.getData().iterator(); + + Assert.assertEquals(new SamplerResponse.SamplerResponseRow( + ImmutableMap.builder() + .put("timestamp", "2008") + .put("dim1", "a") + .put("dim2", "y") + .put("dimLong", "10") + .put("dimFloat", "20.0") + .put("met1", "1.0") + .build(), + new SamplerTestUtils.MapAllowingNullValuesBuilder() + .put("__time", 1199145600000L) + .put("dim1", "a") + .put("dim1t", null) + .put("dim2", "y") + .put("dimLong", 10L) + .put("dimFloat", 20.0F) + .put("rows", 1L) + .put("met1sum", 1.0) + .build(), + null, + null + ), it.next()); + Assert.assertEquals(new SamplerResponse.SamplerResponseRow( + ImmutableMap.builder() + .put("timestamp", "2009") + .put("dim1", "b") + .put("dim2", "y") + .put("dimLong", "10") + .put("dimFloat", "20.0") + .put("met1", "1.0") + .build(), + new SamplerTestUtils.MapAllowingNullValuesBuilder() + .put("__time", 1230768000000L) + .put("dim1", "b") + .put("dim1t", null) + .put("dim2", "y") + .put("dimLong", 10L) + .put("dimFloat", 20.0F) + .put("rows", 1L) + .put("met1sum", 1.0) + .build(), + null, + null + ), it.next()); + Assert.assertEquals(new SamplerResponse.SamplerResponseRow( + ImmutableMap.builder() + .put("timestamp", "2010") + .put("dim1", "c") + .put("dim2", "y") + .put("dimLong", "10") + .put("dimFloat", "20.0") + .put("met1", "1.0") + .build(), + new SamplerTestUtils.MapAllowingNullValuesBuilder() + .put("__time", 1262304000000L) + .put("dim1", "c") + .put("dim1t", null) + .put("dim2", "y") + .put("dimLong", 10L) + .put("dimFloat", 20.0F) + .put("rows", 1L) + .put("met1sum", 1.0) + .build(), + null, + null + ), it.next()); + Assert.assertEquals(new SamplerResponse.SamplerResponseRow( + ImmutableMap.builder() + .put("timestamp", "246140482-04-24T15:36:27.903Z") + .put("dim1", "x") + .put("dim2", "z") + .put("dimLong", "10") + .put("dimFloat", "20.0") + .put("met1", "1.0") + .build(), + null, + true, + "Encountered row with timestamp[246140482-04-24T15:36:27.903Z] that cannot be represented as a long: [{timestamp=246140482-04-24T15:36:27.903Z, dim1=x, dim2=z, dimLong=10, dimFloat=20.0, met1=1.0}]" + ), it.next()); + Assert.assertEquals(new SamplerResponse.SamplerResponseRow( + null, + null, + true, + "Unable to parse row [unparseable] into JSON" + ), it.next()); + + Assert.assertFalse(it.hasNext()); + } + + private static List jb(String ts, String dim1, String dim2, String dimLong, String dimFloat, String met1) + { + try { + return Collections.singletonList(new ByteEntity(new ObjectMapper().writeValueAsBytes( + ImmutableMap.builder() + .put("timestamp", ts) + .put("dim1", dim1) + .put("dim2", dim2) + .put("dimLong", dimLong) + .put("dimFloat", dimFloat) + .put("met1", met1) + .build() + ))); + } + catch (Exception e) { + throw new RuntimeException(e); + } + } + + private class TestableSeekableStreamSamplerSpec extends SeekableStreamSamplerSpec + { + private TestableSeekableStreamSamplerSpec( + SeekableStreamSupervisorSpec ingestionSpec, + SamplerConfig samplerConfig, + InputSourceSampler inputSourceSampler + ) + { + super(ingestionSpec, samplerConfig, inputSourceSampler); + } + + @Override + protected RecordSupplier createRecordSupplier() + { + return recordSupplier; + } + } + + private static class TestableSeekableStreamSupervisorIOConfig extends SeekableStreamSupervisorIOConfig + { + private TestableSeekableStreamSupervisorIOConfig( + String stream, + @Nullable InputFormat inputFormat, + Integer replicas, + Integer taskCount, + Period taskDuration, + Period startDelay, + Period period, + Boolean useEarliestSequenceNumber, + Period completionTimeout, + Period lateMessageRejectionPeriod, + Period earlyMessageRejectionPeriod, + @Nullable AutoScalerConfig autoScalerConfig, + DateTime lateMessageRejectionStartDateTime + ) + { + super( + stream, + inputFormat, + replicas, + taskCount, + taskDuration, + startDelay, + period, + useEarliestSequenceNumber, + completionTimeout, + lateMessageRejectionPeriod, + earlyMessageRejectionPeriod, + autoScalerConfig, + lateMessageRejectionStartDateTime + ); + } + } +} diff --git a/processing/src/main/java/org/apache/druid/segment/transform/TransformSpec.java b/processing/src/main/java/org/apache/druid/segment/transform/TransformSpec.java index 1391da394b01..62e85f2c84f7 100644 --- a/processing/src/main/java/org/apache/druid/segment/transform/TransformSpec.java +++ b/processing/src/main/java/org/apache/druid/segment/transform/TransformSpec.java @@ -66,21 +66,6 @@ public TransformSpec( } } - public static TransformSpec fromInputRowParser(final InputRowParser parser) - { - // Hack: some firehoses and input specs must extract transformSpec from the parser, since they do not - // actually use the parser, but still must respect the transformSpec. This method should extract whatever - // transformSpec "decorate" had put in. - - if (parser instanceof TransformingInputRowParser) { - return ((TransformingInputRowParser) parser).getTransformSpec(); - } else if (parser instanceof TransformingStringInputRowParser) { - return ((TransformingStringInputRowParser) parser).getTransformSpec(); - } else { - throw new ISE("Parser was not decorated, but should have been"); - } - } - @JsonProperty @Nullable public DimFilter getFilter() From 62a32621ebc6bf03e9d22fd75435a5d26a4f4eef Mon Sep 17 00:00:00 2001 From: "tejaswini.bandlamudi" Date: Thu, 11 Aug 2022 18:50:23 +0530 Subject: [PATCH 07/33] unremoves native-batch-firehose --- docs/ingestion/native-batch-firehose.md | 341 ++++++++++++++++++++++++ 1 file changed, 341 insertions(+) create mode 100644 docs/ingestion/native-batch-firehose.md diff --git a/docs/ingestion/native-batch-firehose.md b/docs/ingestion/native-batch-firehose.md new file mode 100644 index 000000000000..a6c76b53ccc8 --- /dev/null +++ b/docs/ingestion/native-batch-firehose.md @@ -0,0 +1,341 @@ +--- +id: native-batch-firehose +title: "Native batch ingestion with firehose" +sidebar_label: "Firehose" +--- + + + + +Firehoses are deprecated in 0.17.0. It's highly recommended to use the [Native batch ingestion input sources](./native-batch-input-source.md) instead. + +There are several firehoses readily available in Druid, some are meant for examples, others can be used directly in a production environment. + +## StaticS3Firehose + +> You need to include the [`druid-s3-extensions`](../development/extensions-core/s3.md) as an extension to use the StaticS3Firehose. + +This firehose ingests events from a predefined list of S3 objects. +This firehose is _splittable_ and can be used by the [Parallel task](./native-batch.md). +Since each split represents an object in this firehose, each worker task of `index_parallel` will read an object. + +Sample spec: + +```json +"firehose" : { + "type" : "static-s3", + "uris": ["s3://foo/bar/file.gz", "s3://bar/foo/file2.gz"] +} +``` + +This firehose provides caching and prefetching features. In the Simple task, a firehose can be read twice if intervals or +shardSpecs are not specified, and, in this case, caching can be useful. Prefetching is preferred when direct scan of objects is slow. +Note that prefetching or caching isn't that useful in the Parallel task. + +|property|description|default|required?| +|--------|-----------|-------|---------| +|type|This should be `static-s3`.|None|yes| +|uris|JSON array of URIs where s3 files to be ingested are located.|None|`uris` or `prefixes` must be set| +|prefixes|JSON array of URI prefixes for the locations of s3 files to be ingested.|None|`uris` or `prefixes` must be set| +|maxCacheCapacityBytes|Maximum size of the cache space in bytes. 0 means disabling cache. Cached files are not removed until the ingestion task completes.|1073741824|no| +|maxFetchCapacityBytes|Maximum size of the fetch space in bytes. 0 means disabling prefetch. Prefetched files are removed immediately once they are read.|1073741824|no| +|prefetchTriggerBytes|Threshold to trigger prefetching s3 objects.|maxFetchCapacityBytes / 2|no| +|fetchTimeout|Timeout for fetching an s3 object.|60000|no| +|maxFetchRetry|Maximum retry for fetching an s3 object.|3|no| + +## StaticGoogleBlobStoreFirehose + +> You need to include the [`druid-google-extensions`](../development/extensions-core/google.md) as an extension to use the StaticGoogleBlobStoreFirehose. + +This firehose ingests events, similar to the StaticS3Firehose, but from an Google Cloud Store. + +As with the S3 blobstore, it is assumed to be gzipped if the extension ends in .gz + +This firehose is _splittable_ and can be used by the [Parallel task](./native-batch.md). +Since each split represents an object in this firehose, each worker task of `index_parallel` will read an object. + +Sample spec: + +```json +"firehose" : { + "type" : "static-google-blobstore", + "blobs": [ + { + "bucket": "foo", + "path": "/path/to/your/file.json" + }, + { + "bucket": "bar", + "path": "/another/path.json" + } + ] +} +``` + +This firehose provides caching and prefetching features. In the Simple task, a firehose can be read twice if intervals or +shardSpecs are not specified, and, in this case, caching can be useful. Prefetching is preferred when direct scan of objects is slow. +Note that prefetching or caching isn't that useful in the Parallel task. + +|property|description|default|required?| +|--------|-----------|-------|---------| +|type|This should be `static-google-blobstore`.|None|yes| +|blobs|JSON array of Google Blobs.|None|yes| +|maxCacheCapacityBytes|Maximum size of the cache space in bytes. 0 means disabling cache. Cached files are not removed until the ingestion task completes.|1073741824|no| +|maxFetchCapacityBytes|Maximum size of the fetch space in bytes. 0 means disabling prefetch. Prefetched files are removed immediately once they are read.|1073741824|no| +|prefetchTriggerBytes|Threshold to trigger prefetching Google Blobs.|maxFetchCapacityBytes / 2|no| +|fetchTimeout|Timeout for fetching a Google Blob.|60000|no| +|maxFetchRetry|Maximum retry for fetching a Google Blob.|3|no| + +Google Blobs: + +|property|description|default|required?| +|--------|-----------|-------|---------| +|bucket|Name of the Google Cloud bucket|None|yes| +|path|The path where data is located.|None|yes| + +## HDFSFirehose + +> You need to include the [`druid-hdfs-storage`](../development/extensions-core/hdfs.md) as an extension to use the HDFSFirehose. + +This firehose ingests events from a predefined list of files from the HDFS storage. +This firehose is _splittable_ and can be used by the [Parallel task](./native-batch.md). +Since each split represents an HDFS file, each worker task of `index_parallel` will read files. + +Sample spec: + +```json +"firehose" : { + "type" : "hdfs", + "paths": "/foo/bar,/foo/baz" +} +``` + +This firehose provides caching and prefetching features. During native batch indexing, a firehose can be read twice if +`intervals` are not specified, and, in this case, caching can be useful. Prefetching is preferred when direct scanning +of files is slow. +Note that prefetching or caching isn't that useful in the Parallel task. + +|Property|Description|Default| +|--------|-----------|-------| +|type|This should be `hdfs`.|none (required)| +|paths|HDFS paths. Can be either a JSON array or comma-separated string of paths. Wildcards like `*` are supported in these paths.|none (required)| +|maxCacheCapacityBytes|Maximum size of the cache space in bytes. 0 means disabling cache. Cached files are not removed until the ingestion task completes.|1073741824| +|maxFetchCapacityBytes|Maximum size of the fetch space in bytes. 0 means disabling prefetch. Prefetched files are removed immediately once they are read.|1073741824| +|prefetchTriggerBytes|Threshold to trigger prefetching files.|maxFetchCapacityBytes / 2| +|fetchTimeout|Timeout for fetching each file.|60000| +|maxFetchRetry|Maximum number of retries for fetching each file.|3| + +You can also ingest from other storage using the HDFS firehose if the HDFS client supports that storage. +However, if you want to ingest from cloud storage, consider using the service-specific input source for your data storage. +If you want to use a non-hdfs protocol with the HDFS firehose, you need to include the protocol you want +in `druid.ingestion.hdfs.allowedProtocols`. See [HDFS firehose security configuration](../configuration/index.md#hdfs-input-source) for more details. + +## LocalFirehose + +This Firehose can be used to read the data from files on local disk, and is mainly intended for proof-of-concept testing, and works with `string` typed parsers. +This Firehose is _splittable_ and can be used by [native parallel index tasks](native-batch.md). +Since each split represents a file in this Firehose, each worker task of `index_parallel` will read a file. +A sample local Firehose spec is shown below: + +```json +{ + "type": "local", + "filter" : "*.csv", + "baseDir": "/data/directory" +} +``` + +|property|description|required?| +|--------|-----------|---------| +|type|This should be "local".|yes| +|filter|A wildcard filter for files. See [here](http://commons.apache.org/proper/commons-io/apidocs/org/apache/commons/io/filefilter/WildcardFileFilter) for more information.|yes| +|baseDir|directory to search recursively for files to be ingested. |yes| + + + +## HttpFirehose + +This Firehose can be used to read the data from remote sites via HTTP, and works with `string` typed parsers. +This Firehose is _splittable_ and can be used by [native parallel index tasks](native-batch.md). +Since each split represents a file in this Firehose, each worker task of `index_parallel` will read a file. +A sample HTTP Firehose spec is shown below: + +```json +{ + "type": "http", + "uris": ["http://example.com/uri1", "http://example2.com/uri2"] +} +``` + +You can only use protocols listed in the `druid.ingestion.http.allowedProtocols` property as HTTP firehose input sources. +The `http` and `https` protocols are allowed by default. See [HTTP firehose security configuration](../configuration/index.md#http-input-source) for more details. + +The below configurations can be optionally used if the URIs specified in the spec require a Basic Authentication Header. +Omitting these fields from your spec will result in HTTP requests with no Basic Authentication Header. + +|property|description|default| +|--------|-----------|-------| +|httpAuthenticationUsername|Username to use for authentication with specified URIs|None| +|httpAuthenticationPassword|PasswordProvider to use with specified URIs|None| + +Example with authentication fields using the DefaultPassword provider (this requires the password to be in the ingestion spec): + +```json +{ + "type": "http", + "uris": ["http://example.com/uri1", "http://example2.com/uri2"], + "httpAuthenticationUsername": "username", + "httpAuthenticationPassword": "password123" +} +``` + +You can also use the other existing Druid PasswordProviders. Here is an example using the EnvironmentVariablePasswordProvider: + +```json +{ + "type": "http", + "uris": ["http://example.com/uri1", "http://example2.com/uri2"], + "httpAuthenticationUsername": "username", + "httpAuthenticationPassword": { + "type": "environment", + "variable": "HTTP_FIREHOSE_PW" + } +} +``` + +The below configurations can optionally be used for tuning the Firehose performance. +Note that prefetching or caching isn't that useful in the Parallel task. + +|property|description|default| +|--------|-----------|-------| +|maxCacheCapacityBytes|Maximum size of the cache space in bytes. 0 means disabling cache. Cached files are not removed until the ingestion task completes.|1073741824| +|maxFetchCapacityBytes|Maximum size of the fetch space in bytes. 0 means disabling prefetch. Prefetched files are removed immediately once they are read.|1073741824| +|prefetchTriggerBytes|Threshold to trigger prefetching HTTP objects.|maxFetchCapacityBytes / 2| +|fetchTimeout|Timeout for fetching an HTTP object.|60000| +|maxFetchRetry|Maximum retries for fetching an HTTP object.|3| + + + +## IngestSegmentFirehose + +This Firehose can be used to read the data from existing druid segments, potentially using a new schema and changing the name, dimensions, metrics, rollup, etc. of the segment. +This Firehose is _splittable_ and can be used by [native parallel index tasks](native-batch.md). +This firehose will accept any type of parser, but will only utilize the list of dimensions and the timestamp specification. + A sample ingest Firehose spec is shown below: + +```json +{ + "type": "ingestSegment", + "dataSource": "wikipedia", + "interval": "2013-01-01/2013-01-02" +} +``` + +|property|description|required?| +|--------|-----------|---------| +|type|This should be "ingestSegment".|yes| +|dataSource|A String defining the data source to fetch rows from, very similar to a table in a relational database|yes| +|interval|A String representing the ISO-8601 interval. This defines the time range to fetch the data over.|yes| +|dimensions|The list of dimensions to select. If left empty, no dimensions are returned. If left null or not defined, all dimensions are returned. |no| +|metrics|The list of metrics to select. If left empty, no metrics are returned. If left null or not defined, all metrics are selected.|no| +|filter| See [Filters](../querying/filters.md)|no| +|maxInputSegmentBytesPerTask|Deprecated. Use [Segments Split Hint Spec](./native-batch.md#segments-split-hint-spec) instead. When used with the native parallel index task, the maximum number of bytes of input segments to process in a single task. If a single segment is larger than this number, it will be processed by itself in a single task (input segments are never split across tasks). Defaults to 150MB.|no| + + + +## SqlFirehose + +This Firehose can be used to ingest events residing in an RDBMS. The database connection information is provided as part of the ingestion spec. +For each query, the results are fetched locally and indexed. +If there are multiple queries from which data needs to be indexed, queries are prefetched in the background, up to `maxFetchCapacityBytes` bytes. +This Firehose is _splittable_ and can be used by [native parallel index tasks](native-batch.md). +This firehose will accept any type of parser, but will only utilize the list of dimensions and the timestamp specification. See the extension documentation for more detailed ingestion examples. + +Requires one of the following extensions: + * [MySQL Metadata Store](../development/extensions-core/mysql.md). + * [PostgreSQL Metadata Store](../development/extensions-core/postgresql.md). + + +```json +{ + "type": "sql", + "database": { + "type": "mysql", + "connectorConfig": { + "connectURI": "jdbc:mysql://host:port/schema", + "user": "user", + "password": "password" + } + }, + "sqls": ["SELECT * FROM table1", "SELECT * FROM table2"] +} +``` + +|property|description|default|required?| +|--------|-----------|-------|---------| +|type|This should be "sql".||Yes| +|database|Specifies the database connection details. The database type corresponds to the extension that supplies the `connectorConfig` support. The specified extension must be loaded into Druid:

  • [mysql-metadata-storage](../development/extensions-core/mysql.md) for `mysql`
  • [postgresql-metadata-storage](../development/extensions-core/postgresql.md) extension for `postgresql`.


You can selectively allow JDBC properties in `connectURI`. See [JDBC connections security config](../configuration/index.md#jdbc-connections-to-external-databases) for more details.||Yes| +|maxCacheCapacityBytes|Maximum size of the cache space in bytes. 0 means disabling cache. Cached files are not removed until the ingestion task completes.|1073741824|No| +|maxFetchCapacityBytes|Maximum size of the fetch space in bytes. 0 means disabling prefetch. Prefetched files are removed immediately once they are read.|1073741824|No| +|prefetchTriggerBytes|Threshold to trigger prefetching SQL result objects.|maxFetchCapacityBytes / 2|No| +|fetchTimeout|Timeout for fetching the result set.|60000|No| +|foldCase|Toggle case folding of database column names. This may be enabled in cases where the database returns case insensitive column names in query results.|false|No| +|sqls|List of SQL queries where each SQL query would retrieve the data to be indexed.||Yes| + +### Database + +|property|description|default|required?| +|--------|-----------|-------|---------| +|type|The type of database to query. Valid values are `mysql` and `postgresql`_||Yes| +|connectorConfig|Specify the database connection properties via `connectURI`, `user` and `password`||Yes| + +## InlineFirehose + +This Firehose can be used to read the data inlined in its own spec. +It can be used for demos or for quickly testing out parsing and schema, and works with `string` typed parsers. +A sample inline Firehose spec is shown below: + +```json +{ + "type": "inline", + "data": "0,values,formatted\n1,as,CSV" +} +``` + +|property|description|required?| +|--------|-----------|---------| +|type|This should be "inline".|yes| +|data|Inlined data to ingest.|yes| + +## CombiningFirehose + +This Firehose can be used to combine and merge data from a list of different Firehoses. + +```json +{ + "type": "combining", + "delegates": [ { firehose1 }, { firehose2 }, ... ] +} +``` + +|property|description|required?| +|--------|-----------|---------| +|type|This should be "combining"|yes| +|delegates|List of Firehoses to combine data from|yes| \ No newline at end of file From 6c3e09f486fa2c9909c66f73524c115270336abe Mon Sep 17 00:00:00 2001 From: "tejaswini.bandlamudi" Date: Thu, 11 Aug 2022 21:37:19 +0530 Subject: [PATCH 08/33] nit --- .../SeekableStreamSamplerSpecTest.java | 98 ++++++++++--------- .../transform/TransformingInputRowParser.java | 5 - .../TransformingStringInputRowParser.java | 5 - 3 files changed, 50 insertions(+), 58 deletions(-) diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/seekablestream/SeekableStreamSamplerSpecTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/seekablestream/SeekableStreamSamplerSpecTest.java index b5e06d81e491..3d764c8314b5 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/seekablestream/SeekableStreamSamplerSpecTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/seekablestream/SeekableStreamSamplerSpecTest.java @@ -71,53 +71,8 @@ public class SeekableStreamSamplerSpecTest extends EasyMockSupport private static final ObjectMapper OBJECT_MAPPER = new DefaultObjectMapper(); private static final String STREAM = "sampling"; private static final String SHARD_ID = "1"; - private static final DataSchema DATA_SCHEMA = new DataSchema( - "test_ds", - OBJECT_MAPPER.convertValue( - new StringInputRowParser( - new JSONParseSpec( - new TimestampSpec("timestamp", "iso", null), - new DimensionsSpec( - Arrays.asList( - new StringDimensionSchema("dim1"), - new StringDimensionSchema("dim1t"), - new StringDimensionSchema("dim2"), - new LongDimensionSchema("dimLong"), - new FloatDimensionSchema("dimFloat") - ) - ), - new JSONPathSpec(true, ImmutableList.of()), - ImmutableMap.of(), - false - ) - ), - Map.class - ), - new AggregatorFactory[]{ - new DoubleSumAggregatorFactory("met1sum", "met1"), - new CountAggregatorFactory("rows") - }, - new UniformGranularitySpec(Granularities.DAY, Granularities.NONE, null), - null, - OBJECT_MAPPER - ); private final SeekableStreamSupervisorSpec supervisorSpec = mock(SeekableStreamSupervisorSpec.class); - private final SeekableStreamSupervisorIOConfig supervisorIOConfig = new TestableSeekableStreamSupervisorIOConfig( - STREAM, - null, - null, - null, - null, - null, - null, - true, - null, - null, - null, - null, - null - ); static { NullHandling.initializeForTests(); @@ -148,10 +103,57 @@ private static List> gene } @Test(timeout = 10_000L) - public void testSample() throws Exception + public void testSampleWithInputRowParser() throws Exception { + final DataSchema dataSchema = new DataSchema( + "test_ds", + OBJECT_MAPPER.convertValue( + new StringInputRowParser( + new JSONParseSpec( + new TimestampSpec("timestamp", "iso", null), + new DimensionsSpec( + Arrays.asList( + new StringDimensionSchema("dim1"), + new StringDimensionSchema("dim1t"), + new StringDimensionSchema("dim2"), + new LongDimensionSchema("dimLong"), + new FloatDimensionSchema("dimFloat") + ) + ), + new JSONPathSpec(true, ImmutableList.of()), + ImmutableMap.of(), + false + ) + ), + Map.class + ), + new AggregatorFactory[]{ + new DoubleSumAggregatorFactory("met1sum", "met1"), + new CountAggregatorFactory("rows") + }, + new UniformGranularitySpec(Granularities.DAY, Granularities.NONE, null), + null, + OBJECT_MAPPER + ); + + final SeekableStreamSupervisorIOConfig supervisorIOConfig = new TestableSeekableStreamSupervisorIOConfig( + STREAM, + null, + null, + null, + null, + null, + null, + true, + null, + null, + null, + null, + null + ); + EasyMock.expect(recordSupplier.getPartitionIds(STREAM)).andReturn(ImmutableSet.of(SHARD_ID)).once(); - EasyMock.expect(supervisorSpec.getDataSchema()).andReturn(DATA_SCHEMA).once(); + EasyMock.expect(supervisorSpec.getDataSchema()).andReturn(dataSchema).once(); EasyMock.expect(supervisorSpec.getIoConfig()).andReturn(supervisorIOConfig).once(); EasyMock.expect(supervisorSpec.getTuningConfig()).andReturn(null).once(); @@ -267,7 +269,7 @@ public void testSample() throws Exception null, null, true, - "Unable to parse row [unparseable] into JSON" + "Unable to parse row [unparseable]" ), it.next()); Assert.assertFalse(it.hasNext()); diff --git a/processing/src/main/java/org/apache/druid/segment/transform/TransformingInputRowParser.java b/processing/src/main/java/org/apache/druid/segment/transform/TransformingInputRowParser.java index ea6da0b36a63..53204314f4d9 100644 --- a/processing/src/main/java/org/apache/druid/segment/transform/TransformingInputRowParser.java +++ b/processing/src/main/java/org/apache/druid/segment/transform/TransformingInputRowParser.java @@ -57,9 +57,4 @@ public InputRowParser withParseSpec(final ParseSpec parseSpec) { return new TransformingInputRowParser<>(parser.withParseSpec(parseSpec), transformSpec); } - - public TransformSpec getTransformSpec() - { - return transformSpec; - } } diff --git a/processing/src/main/java/org/apache/druid/segment/transform/TransformingStringInputRowParser.java b/processing/src/main/java/org/apache/druid/segment/transform/TransformingStringInputRowParser.java index d9a0aa2d954a..cba455d46915 100644 --- a/processing/src/main/java/org/apache/druid/segment/transform/TransformingStringInputRowParser.java +++ b/processing/src/main/java/org/apache/druid/segment/transform/TransformingStringInputRowParser.java @@ -62,9 +62,4 @@ public StringInputRowParser withParseSpec(final ParseSpec parseSpec) { return new TransformingStringInputRowParser(parseSpec, getEncoding(), transformSpec); } - - public TransformSpec getTransformSpec() - { - return transformSpec; - } } From 01e0c90a104ad79f65d20c0cd4ed180568d3de60 Mon Sep 17 00:00:00 2001 From: "tejaswini.bandlamudi" Date: Fri, 12 Aug 2022 00:25:08 +0530 Subject: [PATCH 09/33] nit --- docs/development/extensions-core/mysql.md | 2 +- docs/development/extensions-core/postgresql.md | 2 +- docs/ingestion/native-batch.md | 2 +- .../indexer/wikipedia_combining_input_source_index_task.json | 2 +- .../indexer/wikipedia_index_task_with_dimension_spec.json | 2 +- .../indexer/wikipedia_index_task_with_granularity_spec.json | 2 +- .../resources/indexer/wikipedia_index_task_with_transform.json | 2 +- .../indexer/wikipedia_index_with_merge_column_limit_task.json | 2 +- .../src/test/resources/indexer/wikipedia_merge_index_task.json | 2 +- .../test/resources/indexer/wikipedia_merge_reindex_task.json | 2 +- .../test/resources/indexer/wikipedia_parallel_index_task.json | 2 +- .../indexer/wikipedia_parallel_ingest_segment_index_task.json | 2 +- .../test/resources/indexer/wikipedia_parallel_reindex_task.json | 2 +- .../src/test/resources/indexer/wikipedia_reindex_task.json | 2 +- .../indexer/wikipedia_reindex_task_with_transforms.json | 2 +- .../resources/indexer/wikipedia_with_timestamp_index_task.json | 2 +- .../src/test/resources/indexer/wikiticker_index_task.json | 2 +- 17 files changed, 17 insertions(+), 17 deletions(-) diff --git a/docs/development/extensions-core/mysql.md b/docs/development/extensions-core/mysql.md index 076d0015a340..f7c300c16ac0 100644 --- a/docs/development/extensions-core/mysql.md +++ b/docs/development/extensions-core/mysql.md @@ -177,7 +177,7 @@ If using the MariaDB connector library, set `druid.metadata.mysql.driver.driverC ] }, "inputFormat": { - "format": "json" + "type": "json" } }, "tuningConfig": { diff --git a/docs/development/extensions-core/postgresql.md b/docs/development/extensions-core/postgresql.md index 73a33aa0f3b9..b9037cce0e47 100644 --- a/docs/development/extensions-core/postgresql.md +++ b/docs/development/extensions-core/postgresql.md @@ -139,7 +139,7 @@ The PostgreSQL extension provides an implementation of an [SQL input source](../ ] }, "inputFormat": { - "format": "json" + "type": "json" } }, "tuningConfig": { diff --git a/docs/ingestion/native-batch.md b/docs/ingestion/native-batch.md index 42268c1362f1..2552c452c275 100644 --- a/docs/ingestion/native-batch.md +++ b/docs/ingestion/native-batch.md @@ -588,7 +588,7 @@ An example of the result is "filter": "lineitem.tbl.5" }, "inputFormat": { - "format": "tsv", + "type": "tsv", "delimiter": "|", "columns": [ "l_orderkey", diff --git a/integration-tests/src/test/resources/indexer/wikipedia_combining_input_source_index_task.json b/integration-tests/src/test/resources/indexer/wikipedia_combining_input_source_index_task.json index 5e88b33a7237..8c15b54e2052 100644 --- a/integration-tests/src/test/resources/indexer/wikipedia_combining_input_source_index_task.json +++ b/integration-tests/src/test/resources/indexer/wikipedia_combining_input_source_index_task.json @@ -82,7 +82,7 @@ ] }, "inputFormat": { - "format" : "json" + "type" : "json" } }, "tuningConfig": { diff --git a/integration-tests/src/test/resources/indexer/wikipedia_index_task_with_dimension_spec.json b/integration-tests/src/test/resources/indexer/wikipedia_index_task_with_dimension_spec.json index 30841e90047a..ac63d7f892c5 100644 --- a/integration-tests/src/test/resources/indexer/wikipedia_index_task_with_dimension_spec.json +++ b/integration-tests/src/test/resources/indexer/wikipedia_index_task_with_dimension_spec.json @@ -72,7 +72,7 @@ "filter": "wikipedia_index_data*" }, "inputFormat": { - "format" : "json" + "type" : "json" } }, "tuningConfig": { diff --git a/integration-tests/src/test/resources/indexer/wikipedia_index_task_with_granularity_spec.json b/integration-tests/src/test/resources/indexer/wikipedia_index_task_with_granularity_spec.json index 18d1c8d2ced3..f594792ff56b 100644 --- a/integration-tests/src/test/resources/indexer/wikipedia_index_task_with_granularity_spec.json +++ b/integration-tests/src/test/resources/indexer/wikipedia_index_task_with_granularity_spec.json @@ -57,7 +57,7 @@ "filter": "wikipedia_index_data*" }, "inputFormat": { - "format" : "json" + "type" : "json" } }, "tuningConfig": { diff --git a/integration-tests/src/test/resources/indexer/wikipedia_index_task_with_transform.json b/integration-tests/src/test/resources/indexer/wikipedia_index_task_with_transform.json index a34bc9313f1a..ff9423b7db66 100644 --- a/integration-tests/src/test/resources/indexer/wikipedia_index_task_with_transform.json +++ b/integration-tests/src/test/resources/indexer/wikipedia_index_task_with_transform.json @@ -90,7 +90,7 @@ "filter": "wikipedia_index_data*" }, "inputFormat": { - "format" : "json" + "type" : "json" } }, "tuningConfig": { diff --git a/integration-tests/src/test/resources/indexer/wikipedia_index_with_merge_column_limit_task.json b/integration-tests/src/test/resources/indexer/wikipedia_index_with_merge_column_limit_task.json index 2ac1d503eedc..49957a512d46 100644 --- a/integration-tests/src/test/resources/indexer/wikipedia_index_with_merge_column_limit_task.json +++ b/integration-tests/src/test/resources/indexer/wikipedia_index_with_merge_column_limit_task.json @@ -72,7 +72,7 @@ "filter": "wikipedia_index_data*" }, "inputFormat": { - "format" : "json" + "type" : "json" } }, "tuningConfig": { diff --git a/integration-tests/src/test/resources/indexer/wikipedia_merge_index_task.json b/integration-tests/src/test/resources/indexer/wikipedia_merge_index_task.json index 9b16e3d338dd..268a3aef4a85 100644 --- a/integration-tests/src/test/resources/indexer/wikipedia_merge_index_task.json +++ b/integration-tests/src/test/resources/indexer/wikipedia_merge_index_task.json @@ -56,7 +56,7 @@ "filter": "wikipedia_index_data*" }, "inputFormat": { - "format" : "json" + "type" : "json" } }, "tuningConfig": { diff --git a/integration-tests/src/test/resources/indexer/wikipedia_merge_reindex_task.json b/integration-tests/src/test/resources/indexer/wikipedia_merge_reindex_task.json index a4aa97777163..040fff005ce7 100644 --- a/integration-tests/src/test/resources/indexer/wikipedia_merge_reindex_task.json +++ b/integration-tests/src/test/resources/indexer/wikipedia_merge_reindex_task.json @@ -53,7 +53,7 @@ "interval": "2013-08-31/2013-09-01" }, "inputFormat": { - "format" : "json" + "type" : "json" } }, "tuningConfig": { diff --git a/integration-tests/src/test/resources/indexer/wikipedia_parallel_index_task.json b/integration-tests/src/test/resources/indexer/wikipedia_parallel_index_task.json index faca3a788baf..ba259fd830f4 100644 --- a/integration-tests/src/test/resources/indexer/wikipedia_parallel_index_task.json +++ b/integration-tests/src/test/resources/indexer/wikipedia_parallel_index_task.json @@ -57,7 +57,7 @@ "filter": "wikipedia_index_data*" }, "inputFormat": { - "format" : "json" + "type" : "json" } }, "tuningConfig": { diff --git a/integration-tests/src/test/resources/indexer/wikipedia_parallel_ingest_segment_index_task.json b/integration-tests/src/test/resources/indexer/wikipedia_parallel_ingest_segment_index_task.json index 5376fb4957c7..3749923cd20c 100644 --- a/integration-tests/src/test/resources/indexer/wikipedia_parallel_ingest_segment_index_task.json +++ b/integration-tests/src/test/resources/indexer/wikipedia_parallel_ingest_segment_index_task.json @@ -50,7 +50,7 @@ "maxInputSegmentBytesPerTask": 1 }, "inputFormat": { - "format": "json" + "type": "json" } }, "tuningConfig": { diff --git a/integration-tests/src/test/resources/indexer/wikipedia_parallel_reindex_task.json b/integration-tests/src/test/resources/indexer/wikipedia_parallel_reindex_task.json index e6630c166947..1843d63901f9 100644 --- a/integration-tests/src/test/resources/indexer/wikipedia_parallel_reindex_task.json +++ b/integration-tests/src/test/resources/indexer/wikipedia_parallel_reindex_task.json @@ -56,7 +56,7 @@ "filter": "wikipedia_index_data2*" }, "inputFormat": { - "format" : "json" + "type" : "json" } }, "tuningConfig": { diff --git a/integration-tests/src/test/resources/indexer/wikipedia_reindex_task.json b/integration-tests/src/test/resources/indexer/wikipedia_reindex_task.json index b5bc2df46b30..861f08ab81b6 100644 --- a/integration-tests/src/test/resources/indexer/wikipedia_reindex_task.json +++ b/integration-tests/src/test/resources/indexer/wikipedia_reindex_task.json @@ -41,7 +41,7 @@ "interval": "2013-08-31/2013-09-01" }, "inputFormat": { - "format" : "json" + "type" : "json" } }, "tuningConfig": { diff --git a/integration-tests/src/test/resources/indexer/wikipedia_reindex_task_with_transforms.json b/integration-tests/src/test/resources/indexer/wikipedia_reindex_task_with_transforms.json index 076b7643b686..071e66133f37 100644 --- a/integration-tests/src/test/resources/indexer/wikipedia_reindex_task_with_transforms.json +++ b/integration-tests/src/test/resources/indexer/wikipedia_reindex_task_with_transforms.json @@ -96,7 +96,7 @@ "interval": "2013-08-31/2013-09-01" }, "inputFormat": { - "format" : "json" + "type" : "json" } }, "tuningConfig": { diff --git a/integration-tests/src/test/resources/indexer/wikipedia_with_timestamp_index_task.json b/integration-tests/src/test/resources/indexer/wikipedia_with_timestamp_index_task.json index 447e90e9a39f..28a038bfcebb 100644 --- a/integration-tests/src/test/resources/indexer/wikipedia_with_timestamp_index_task.json +++ b/integration-tests/src/test/resources/indexer/wikipedia_with_timestamp_index_task.json @@ -73,7 +73,7 @@ "filter": "wikipedia_index_data*" }, "inputFormat": { - "format" : "json" + "type" : "json" } }, "tuningConfig": { diff --git a/integration-tests/src/test/resources/indexer/wikiticker_index_task.json b/integration-tests/src/test/resources/indexer/wikiticker_index_task.json index b24e7ee6e59c..9ee4b0ee151d 100644 --- a/integration-tests/src/test/resources/indexer/wikiticker_index_task.json +++ b/integration-tests/src/test/resources/indexer/wikiticker_index_task.json @@ -52,7 +52,7 @@ "filter": "wikiticker-2015-09-12-sampled.json.gz" }, "inputFormat": { - "format": "json" + "type": "json" } }, "tuningConfig": { From c9f816ccd6c0433faf286db5bce28a19f483186c Mon Sep 17 00:00:00 2001 From: "tejaswini.bandlamudi" Date: Fri, 12 Aug 2022 07:45:32 +0530 Subject: [PATCH 10/33] nit --- .../test/resources/indexer/wikipedia_parallel_reindex_task.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integration-tests/src/test/resources/indexer/wikipedia_parallel_reindex_task.json b/integration-tests/src/test/resources/indexer/wikipedia_parallel_reindex_task.json index 1843d63901f9..21d064843b3b 100644 --- a/integration-tests/src/test/resources/indexer/wikipedia_parallel_reindex_task.json +++ b/integration-tests/src/test/resources/indexer/wikipedia_parallel_reindex_task.json @@ -50,7 +50,7 @@ }, "ioConfig": { "type": "index_parallel", - "inputsource": { + "inputSource": { "type": "local", "baseDir": "/resources/data/batch_index/json", "filter": "wikipedia_index_data2*" From 2aa663982a51578fa3b7f68da56223a8d6b9ecf1 Mon Sep 17 00:00:00 2001 From: "tejaswini.bandlamudi" Date: Fri, 12 Aug 2022 11:34:48 +0530 Subject: [PATCH 11/33] removes ITCombiningFirehoseFactoryIndexTest --- .../ITCombiningFirehoseFactoryIndexTest.java | 100 ------------------ ...dia_combining_input_source_index_task.json | 93 ---------------- 2 files changed, 193 deletions(-) delete mode 100644 integration-tests/src/test/java/org/apache/druid/tests/indexer/ITCombiningFirehoseFactoryIndexTest.java delete mode 100644 integration-tests/src/test/resources/indexer/wikipedia_combining_input_source_index_task.json diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITCombiningFirehoseFactoryIndexTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITCombiningFirehoseFactoryIndexTest.java deleted file mode 100644 index c53d92562ba6..000000000000 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITCombiningFirehoseFactoryIndexTest.java +++ /dev/null @@ -1,100 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.tests.indexer; - -import com.fasterxml.jackson.core.JsonProcessingException; -import org.apache.druid.java.util.common.Pair; -import org.apache.druid.java.util.common.StringUtils; -import org.apache.druid.testing.guice.DruidTestModuleFactory; -import org.apache.druid.tests.TestNGGroup; -import org.testng.annotations.Guice; -import org.testng.annotations.Test; - -import java.io.Closeable; -import java.util.function.Function; - -@Test(groups = {TestNGGroup.BATCH_INDEX, TestNGGroup.QUICKSTART_COMPATIBLE}) -@Guice(moduleFactory = DruidTestModuleFactory.class) -public class ITCombiningFirehoseFactoryIndexTest extends AbstractITBatchIndexTest -{ - private static final String INDEX_TASK = "/indexer/wikipedia_index_task.json"; - private static final String INDEX_QUERIES_RESOURCE = "/indexer/wikipedia_index_queries.json"; - private static final String INDEX_DATASOURCE = "wikipedia_index_test"; - - private static final String COMBINING_INDEX_TASK = "/indexer/wikipedia_combining_input_source_index_task.json"; - private static final String COMBINING_QUERIES_RESOURCE = "/indexer/wikipedia_combining_firehose_index_queries.json"; - private static final String COMBINING_INDEX_DATASOURCE = "wikipedia_comb_index_test"; - - @Test - public void testIndexData() throws Exception - { - try ( - final Closeable ignored1 = unloader(INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix()); - final Closeable ignored2 = unloader(COMBINING_INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix()); - ) { - final Function combiningFirehoseSpecTransform = spec -> { - try { - return StringUtils.replace( - spec, - "%%COMBINING_DATASOURCE%%", - INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix() - ); - } - catch (Exception e) { - throw new RuntimeException(e); - } - }; - final Function transform = spec -> { - try { - return StringUtils.replace( - spec, - "%%SEGMENT_AVAIL_TIMEOUT_MILLIS%%", - jsonMapper.writeValueAsString("0") - ); - } - catch (JsonProcessingException e) { - throw new RuntimeException(e); - } - }; - - doIndexTest( - INDEX_DATASOURCE, - INDEX_TASK, - transform, - INDEX_QUERIES_RESOURCE, - false, - true, - true, - new Pair<>(false, false) - ); - doIndexTest( - COMBINING_INDEX_DATASOURCE, - COMBINING_INDEX_TASK, - combiningFirehoseSpecTransform, - COMBINING_QUERIES_RESOURCE, - false, - true, - true, - new Pair<>(false, false) - ); - } - } - -} diff --git a/integration-tests/src/test/resources/indexer/wikipedia_combining_input_source_index_task.json b/integration-tests/src/test/resources/indexer/wikipedia_combining_input_source_index_task.json deleted file mode 100644 index 8c15b54e2052..000000000000 --- a/integration-tests/src/test/resources/indexer/wikipedia_combining_input_source_index_task.json +++ /dev/null @@ -1,93 +0,0 @@ -{ - "type": "index", - "spec": { - "dataSchema": { - "dataSource": "%%DATASOURCE%%", - "timestampSpec": { - "column": "timestamp" - }, - "dimensionsSpec": { - "dimensions": [ - "page", - {"type": "string", "name": "language", "createBitmapIndex": false}, - "user", - "unpatrolled", - "newPage", - "robot", - "anonymous", - "namespace", - "continent", - "country", - "region", - "city" - ] - }, - "metricsSpec": [ - { - "type": "count", - "name": "count" - }, - { - "type": "doubleSum", - "name": "added", - "fieldName": "added" - }, - { - "type": "doubleSum", - "name": "deleted", - "fieldName": "deleted" - }, - { - "type": "doubleSum", - "name": "delta", - "fieldName": "delta" - }, - { - "name": "thetaSketch", - "type": "thetaSketch", - "fieldName": "user" - }, - { - "name": "quantilesDoublesSketch", - "type": "quantilesDoublesSketch", - "fieldName": "delta" - }, - { - "name": "HLLSketchBuild", - "type": "HLLSketchBuild", - "fieldName": "user" - } - ], - "granularitySpec": { - "segmentGranularity": "DAY", - "queryGranularity": "second", - "intervals" : [ "2013-08-31/2013-09-02" ] - } - }, - "ioConfig": { - "type": "index", - "inputSource": { - "type": "combining", - "delegates": [ - { - "type": "local", - "baseDir": "/resources/indexer", - "filter": "wikipedia_combining_index_data.json" - }, - { - "type": "druid", - "dataSource": "%%COMBINING_DATASOURCE%%", - "interval": "2013-08-31/2013-09-02" - } - ] - }, - "inputFormat": { - "type" : "json" - } - }, - "tuningConfig": { - "type": "index", - "maxRowsPerSegment": 3 - } - } -} From a1a9ce65c57285b350e1451beb69ef76ccf13106 Mon Sep 17 00:00:00 2001 From: "tejaswini.bandlamudi" Date: Fri, 12 Aug 2022 23:23:24 +0530 Subject: [PATCH 12/33] deprecates FirehoseFactory implementations --- .../AppenderatorDriverRealtimeIndexTask.java | 1 + .../common/task/RealtimeIndexTask.java | 1 + .../overlord/RemoteTaskRunnerTest.java | 24 +++++++++---------- .../firehose/ClippedFirehoseFactory.java | 1 + .../firehose/TimedShutoffFirehoseFactory.java | 1 + 5 files changed, 16 insertions(+), 12 deletions(-) diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AppenderatorDriverRealtimeIndexTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AppenderatorDriverRealtimeIndexTask.java index 7eb60b27ccbd..c9e11a61c971 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AppenderatorDriverRealtimeIndexTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AppenderatorDriverRealtimeIndexTask.java @@ -112,6 +112,7 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; +@Deprecated public class AppenderatorDriverRealtimeIndexTask extends AbstractTask implements ChatHandler { private static final String CTX_KEY_LOOKUP_TIER = "lookupTier"; diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/RealtimeIndexTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/RealtimeIndexTask.java index 65c578c49a0c..681ac50dff8c 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/RealtimeIndexTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/RealtimeIndexTask.java @@ -78,6 +78,7 @@ import java.util.concurrent.CountDownLatch; import java.util.concurrent.ThreadLocalRandom; +@Deprecated public class RealtimeIndexTask extends AbstractTask { public static final String CTX_KEY_LOOKUP_TIER = "lookupTier"; diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/RemoteTaskRunnerTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/RemoteTaskRunnerTest.java index c39cc9e749d0..b5e7a043d510 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/RemoteTaskRunnerTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/RemoteTaskRunnerTest.java @@ -817,10 +817,10 @@ public void testBlacklistZKWorkers() throws Exception makeRemoteTaskRunner(rtrConfig); TestIndexTask task1 = new TestIndexTask( - "realtime1", - new TaskResource("realtime1", 1), + "test_index1", + new TaskResource("test_index1", 1), "foo", - TaskStatus.success("realtime1"), + TaskStatus.success("test_index1"), jsonMapper ); Future taskFuture1 = remoteTaskRunner.run(task1); @@ -835,10 +835,10 @@ public void testBlacklistZKWorkers() throws Exception ); TestIndexTask task2 = new TestIndexTask( - "realtime2", - new TaskResource("realtime2", 1), + "test_index2", + new TaskResource("test_index2", 1), "foo", - TaskStatus.running("realtime2"), + TaskStatus.running("test_index2"), jsonMapper ); Future taskFuture2 = remoteTaskRunner.run(task2); @@ -870,10 +870,10 @@ public void testBlacklistZKWorkers() throws Exception ); TestIndexTask task3 = new TestIndexTask( - "realtime3", - new TaskResource("realtime3", 1), + "test_index3", + new TaskResource("test_index3", 1), "foo", - TaskStatus.running("realtime3"), + TaskStatus.running("test_index3"), jsonMapper ); Future taskFuture3 = remoteTaskRunner.run(task3); @@ -1009,13 +1009,13 @@ public void testSuccessfulTaskOnBlacklistedWorker() throws Exception makeRemoteTaskRunner(rtrConfig); TestIndexTask task1 = new TestIndexTask( - "realtime1", new TaskResource("realtime1", 1), "foo", TaskStatus.success("realtime1"), jsonMapper + "test_index1", new TaskResource("test_index1", 1), "foo", TaskStatus.success("test_index1"), jsonMapper ); TestIndexTask task2 = new TestIndexTask( - "realtime2", new TaskResource("realtime2", 1), "foo", TaskStatus.success("realtime2"), jsonMapper + "test_index2", new TaskResource("test_index2", 1), "foo", TaskStatus.success("test_index2"), jsonMapper ); TestIndexTask task3 = new TestIndexTask( - "realtime3", new TaskResource("realtime3", 1), "foo", TaskStatus.success("realtime3"), jsonMapper + "test_index3", new TaskResource("test_index3", 1), "foo", TaskStatus.success("test_index3"), jsonMapper ); Future taskFuture1 = remoteTaskRunner.run(task1); diff --git a/server/src/main/java/org/apache/druid/segment/realtime/firehose/ClippedFirehoseFactory.java b/server/src/main/java/org/apache/druid/segment/realtime/firehose/ClippedFirehoseFactory.java index 9654d1f11b52..9ca595ce5c28 100644 --- a/server/src/main/java/org/apache/druid/segment/realtime/firehose/ClippedFirehoseFactory.java +++ b/server/src/main/java/org/apache/druid/segment/realtime/firehose/ClippedFirehoseFactory.java @@ -34,6 +34,7 @@ /** * Creates firehoses clipped to a particular time interval. Useful for enforcing min time, max time, and time windows. */ +@Deprecated public class ClippedFirehoseFactory implements FirehoseFactory { private final FirehoseFactory delegate; diff --git a/server/src/main/java/org/apache/druid/segment/realtime/firehose/TimedShutoffFirehoseFactory.java b/server/src/main/java/org/apache/druid/segment/realtime/firehose/TimedShutoffFirehoseFactory.java index 88d6d6ede228..9bfda42c8aac 100644 --- a/server/src/main/java/org/apache/druid/segment/realtime/firehose/TimedShutoffFirehoseFactory.java +++ b/server/src/main/java/org/apache/druid/segment/realtime/firehose/TimedShutoffFirehoseFactory.java @@ -43,6 +43,7 @@ * Each {@link Firehose} created by this factory spins up and manages one thread for calling {@link Firehose#close()} * asynchronously at the specified {@link #shutoffTime}. */ +@Deprecated public class TimedShutoffFirehoseFactory implements FirehoseFactory { private static final EmittingLogger log = new EmittingLogger(FirehoseFactory.class); From 0682657e4aeacef03d1e15503ebd999b220c5564 Mon Sep 17 00:00:00 2001 From: "tejaswini.bandlamudi" Date: Tue, 27 Sep 2022 22:11:13 +0530 Subject: [PATCH 13/33] merge latest changes --- .../apache/druid/indexing/kafka/KafkaSamplerSpecTest.java | 5 +++-- .../druid/indexing/kinesis/KinesisSamplerSpecTest.java | 4 ++-- .../seekablestream/SeekableStreamSamplerSpecTest.java | 4 ++-- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/extensions-core/kafka-indexing-service/src/test/java/org/apache/druid/indexing/kafka/KafkaSamplerSpecTest.java b/extensions-core/kafka-indexing-service/src/test/java/org/apache/druid/indexing/kafka/KafkaSamplerSpecTest.java index 757131f1bde0..76cb4633ab2d 100644 --- a/extensions-core/kafka-indexing-service/src/test/java/org/apache/druid/indexing/kafka/KafkaSamplerSpecTest.java +++ b/extensions-core/kafka-indexing-service/src/test/java/org/apache/druid/indexing/kafka/KafkaSamplerSpecTest.java @@ -225,6 +225,7 @@ public void testWithInputRowParser() throws IOException null, null, null, + null, null ), null, @@ -242,8 +243,8 @@ public void testWithInputRowParser() throws IOException KafkaSamplerSpec samplerSpec = new KafkaSamplerSpec( supervisorSpec, - new SamplerConfig(5, null), - new InputSourceSampler(), + new SamplerConfig(5, null, null, null), + new InputSourceSampler(new DefaultObjectMapper()), OBJECT_MAPPER ); diff --git a/extensions-core/kinesis-indexing-service/src/test/java/org/apache/druid/indexing/kinesis/KinesisSamplerSpecTest.java b/extensions-core/kinesis-indexing-service/src/test/java/org/apache/druid/indexing/kinesis/KinesisSamplerSpecTest.java index ca0384fe5677..912b67985f3f 100644 --- a/extensions-core/kinesis-indexing-service/src/test/java/org/apache/druid/indexing/kinesis/KinesisSamplerSpecTest.java +++ b/extensions-core/kinesis-indexing-service/src/test/java/org/apache/druid/indexing/kinesis/KinesisSamplerSpecTest.java @@ -240,8 +240,8 @@ public void testWithInputRowParser() throws IOException KinesisSamplerSpec samplerSpec = new TestableKinesisSamplerSpec( supervisorSpec, - new SamplerConfig(5, null), - new InputSourceSampler(), + new SamplerConfig(5, null, null, null), + new InputSourceSampler(new DefaultObjectMapper()), null ); } diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/seekablestream/SeekableStreamSamplerSpecTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/seekablestream/SeekableStreamSamplerSpecTest.java index 3d764c8314b5..4ddd60b5e23f 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/seekablestream/SeekableStreamSamplerSpecTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/seekablestream/SeekableStreamSamplerSpecTest.java @@ -172,8 +172,8 @@ public void testSampleWithInputRowParser() throws Exception SeekableStreamSamplerSpec samplerSpec = new TestableSeekableStreamSamplerSpec( supervisorSpec, - new SamplerConfig(5, null), - new InputSourceSampler() + new SamplerConfig(5, null, null, null), + new InputSourceSampler(new DefaultObjectMapper()) ); SamplerResponse response = samplerSpec.sample(); From ed89707b3eacfc8fc4d5fb29a412b478a121340b Mon Sep 17 00:00:00 2001 From: "tejaswini.bandlamudi" Date: Wed, 28 Sep 2022 03:12:49 +0530 Subject: [PATCH 14/33] merge latest changes --- .../java/org/apache/druid/indexing/common/TestIndexTask.java | 2 +- .../apache/druid/indexing/common/task/IngestionTestBase.java | 2 +- .../org/apache/druid/cli/validate/DruidJsonValidatorTest.java | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/TestIndexTask.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/TestIndexTask.java index 72b0a610156f..4c3f9651dba3 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/TestIndexTask.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/TestIndexTask.java @@ -60,7 +60,7 @@ public TestIndexTask( new IndexTask.IndexIOConfig( null, new LocalInputSource(new File("lol"), "rofl"), - new JsonInputFormat(null, null, null), + new JsonInputFormat(null, null, null, null, null), false, false ), diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IngestionTestBase.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IngestionTestBase.java index 11196d86876b..7c321fe26390 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IngestionTestBase.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IngestionTestBase.java @@ -244,7 +244,7 @@ public static InputFormat createInputFormatFromParseSpec(ParseSpec parseSpec) { if (parseSpec instanceof JSONParseSpec) { JSONParseSpec jsonParseSpec = (JSONParseSpec) parseSpec; - return new JsonInputFormat(jsonParseSpec.getFlattenSpec(), jsonParseSpec.getFeatureSpec(), jsonParseSpec.getKeepNullColumns()); + return new JsonInputFormat(jsonParseSpec.getFlattenSpec(), jsonParseSpec.getFeatureSpec(), jsonParseSpec.getKeepNullColumns(), null, null); } else if (parseSpec instanceof CSVParseSpec) { CSVParseSpec csvParseSpec = (CSVParseSpec) parseSpec; boolean getColumnsFromHeader = csvParseSpec.isHasHeaderRow() && csvParseSpec.getSkipHeaderRows() == 0; diff --git a/services/src/test/java/org/apache/druid/cli/validate/DruidJsonValidatorTest.java b/services/src/test/java/org/apache/druid/cli/validate/DruidJsonValidatorTest.java index 5f792b892b2b..3b920dfa656b 100644 --- a/services/src/test/java/org/apache/druid/cli/validate/DruidJsonValidatorTest.java +++ b/services/src/test/java/org/apache/druid/cli/validate/DruidJsonValidatorTest.java @@ -144,7 +144,7 @@ public void testTaskValidator() throws Exception new IndexTask.IndexIOConfig( null, new LocalInputSource(new File("lol"), "rofl"), - new JsonInputFormat(null, null, null), + new JsonInputFormat(null, null, null, null, null), false, false ), From 98383e41681e7b5da89744ef173eabfdd269cc9b Mon Sep 17 00:00:00 2001 From: "tejaswini.bandlamudi" Date: Wed, 8 Feb 2023 13:06:24 +0530 Subject: [PATCH 15/33] fix conflicts --- .../apache/druid/indexing/common/task/TaskSerdeTest.java | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/TaskSerdeTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/TaskSerdeTest.java index 88ebdf01042f..aaebc28920ae 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/TaskSerdeTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/TaskSerdeTest.java @@ -24,7 +24,10 @@ import com.fasterxml.jackson.databind.jsontype.NamedType; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; +import org.apache.druid.data.input.Firehose; +import org.apache.druid.data.input.FirehoseFactory; import org.apache.druid.data.input.impl.DimensionsSpec; +import org.apache.druid.data.input.impl.InputRowParser; import org.apache.druid.data.input.impl.LocalInputSource; import org.apache.druid.data.input.impl.NoopInputFormat; import org.apache.druid.data.input.impl.TimestampSpec; @@ -43,13 +46,11 @@ import org.apache.druid.query.aggregation.AggregatorFactory; import org.apache.druid.query.aggregation.DoubleSumAggregatorFactory; import org.apache.druid.segment.IndexSpec; -import org.apache.druid.segment.incremental.RowIngestionMetersFactory; import org.apache.druid.segment.indexing.DataSchema; import org.apache.druid.segment.indexing.RealtimeIOConfig; import org.apache.druid.segment.indexing.RealtimeTuningConfig; import org.apache.druid.segment.indexing.granularity.UniformGranularitySpec; import org.apache.druid.segment.realtime.FireDepartment; -import org.apache.druid.segment.realtime.firehose.LocalFirehoseFactory; import org.apache.druid.server.security.AuthTestUtils; import org.apache.druid.timeline.partition.NoneShardSpec; import org.hamcrest.CoreMatchers; @@ -64,7 +65,6 @@ public class TaskSerdeTest { private final ObjectMapper jsonMapper; - private final RowIngestionMetersFactory rowIngestionMetersFactory; private final IndexSpec indexSpec = new IndexSpec(); @Rule @@ -74,7 +74,6 @@ public TaskSerdeTest() { TestUtils testUtils = new TestUtils(); jsonMapper = testUtils.getTestObjectMapper(); - rowIngestionMetersFactory = testUtils.getRowIngestionMetersFactory(); for (final Module jacksonModule : new FirehoseModule().getJacksonModules()) { jsonMapper.registerModule(jacksonModule); @@ -402,7 +401,7 @@ public void testRealtimeIndexTaskSerde() throws Exception jsonMapper ), new RealtimeIOConfig( - new LocalFirehoseFactory(new File("lol"), "rofl", null), + new MockFirehoseFactory(), (schema, config, metrics) -> null ), From 87a865484280618df4e086796646e60ea04f718c Mon Sep 17 00:00:00 2001 From: "tejaswini.bandlamudi" Date: Wed, 8 Feb 2023 13:18:49 +0530 Subject: [PATCH 16/33] fix conflicts --- .../seekablestream/SeekableStreamSamplerSpec.java | 7 +++++++ .../seekablestream/SeekableStreamSamplerSpecTest.java | 8 ++++++-- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/SeekableStreamSamplerSpec.java b/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/SeekableStreamSamplerSpec.java index 07641bb74d9e..1c3ed81a0878 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/SeekableStreamSamplerSpec.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/SeekableStreamSamplerSpec.java @@ -32,6 +32,7 @@ import org.apache.druid.data.input.InputSource; import org.apache.druid.data.input.InputSourceReader; import org.apache.druid.data.input.InputSplit; +import org.apache.druid.data.input.InputStats; import org.apache.druid.data.input.SplitHintSpec; import org.apache.druid.data.input.impl.ByteEntity; import org.apache.druid.data.input.impl.InputRowParser; @@ -215,6 +216,12 @@ public void close() throws IOException }; } + @Override + public CloseableIterator read(InputStats inputStats) throws IOException + { + return null; + } + @Override public CloseableIterator sample() { diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/seekablestream/SeekableStreamSamplerSpecTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/seekablestream/SeekableStreamSamplerSpecTest.java index 4ddd60b5e23f..8bc91f7c1fa6 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/seekablestream/SeekableStreamSamplerSpecTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/seekablestream/SeekableStreamSamplerSpecTest.java @@ -40,6 +40,7 @@ import org.apache.druid.indexing.seekablestream.common.OrderedPartitionableRecord; import org.apache.druid.indexing.seekablestream.common.RecordSupplier; import org.apache.druid.indexing.seekablestream.common.StreamPartition; +import org.apache.druid.indexing.seekablestream.supervisor.IdleConfig; import org.apache.druid.indexing.seekablestream.supervisor.SeekableStreamSupervisorIOConfig; import org.apache.druid.indexing.seekablestream.supervisor.SeekableStreamSupervisorSpec; import org.apache.druid.indexing.seekablestream.supervisor.autoscaler.AutoScalerConfig; @@ -149,6 +150,7 @@ public void testSampleWithInputRowParser() throws Exception null, null, null, + null, null ); @@ -327,7 +329,8 @@ private TestableSeekableStreamSupervisorIOConfig( Period lateMessageRejectionPeriod, Period earlyMessageRejectionPeriod, @Nullable AutoScalerConfig autoScalerConfig, - DateTime lateMessageRejectionStartDateTime + DateTime lateMessageRejectionStartDateTime, + @Nullable IdleConfig idleConfig ) { super( @@ -343,7 +346,8 @@ private TestableSeekableStreamSupervisorIOConfig( lateMessageRejectionPeriod, earlyMessageRejectionPeriod, autoScalerConfig, - lateMessageRejectionStartDateTime + lateMessageRejectionStartDateTime, + idleConfig ); } } From d5c7c2452f4a1bb8bc023c17cb9540b00f39c381 Mon Sep 17 00:00:00 2001 From: "tejaswini.bandlamudi" Date: Wed, 8 Feb 2023 14:17:25 +0530 Subject: [PATCH 17/33] fix conflicts --- .../org/apache/druid/indexing/kafka/KafkaSamplerSpecTest.java | 1 + 1 file changed, 1 insertion(+) diff --git a/extensions-core/kafka-indexing-service/src/test/java/org/apache/druid/indexing/kafka/KafkaSamplerSpecTest.java b/extensions-core/kafka-indexing-service/src/test/java/org/apache/druid/indexing/kafka/KafkaSamplerSpecTest.java index d3bf222d7aa1..47327dbeee14 100644 --- a/extensions-core/kafka-indexing-service/src/test/java/org/apache/druid/indexing/kafka/KafkaSamplerSpecTest.java +++ b/extensions-core/kafka-indexing-service/src/test/java/org/apache/druid/indexing/kafka/KafkaSamplerSpecTest.java @@ -227,6 +227,7 @@ public void testWithInputRowParser() throws IOException null, null, null, + null, null ), null, From 6f531fc2d8a73d4f1571d3ab5401387752e1544c Mon Sep 17 00:00:00 2001 From: "tejaswini.bandlamudi" Date: Wed, 8 Feb 2023 19:00:44 +0530 Subject: [PATCH 18/33] register MockFirehoseFactory --- .../org/apache/druid/indexing/common/task/TaskSerdeTest.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/TaskSerdeTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/TaskSerdeTest.java index aaebc28920ae..7bc6548dc209 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/TaskSerdeTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/TaskSerdeTest.java @@ -80,7 +80,8 @@ public TaskSerdeTest() } jsonMapper.registerSubtypes( new NamedType(ParallelIndexTuningConfig.class, "index_parallel"), - new NamedType(IndexTuningConfig.class, "index") + new NamedType(IndexTuningConfig.class, "index"), + new NamedType(MockFirehoseFactory.class, "mock") ); } From 411cb1b7100977e4d9466272b2532b55d254ca26 Mon Sep 17 00:00:00 2001 From: "tejaswini.bandlamudi" Date: Mon, 20 Feb 2023 22:11:22 +0530 Subject: [PATCH 19/33] processedBytes val correction while using parse spec --- .../indexing/common/task/AbstractBatchIndexTask.java | 1 - .../apache/druid/indexing/common/task/IndexTaskTest.java | 9 ++++----- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AbstractBatchIndexTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AbstractBatchIndexTask.java index d211e11af3be..6d4609ed3d93 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AbstractBatchIndexTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AbstractBatchIndexTask.java @@ -82,7 +82,6 @@ import java.io.File; import java.io.IOException; import java.util.ArrayList; -import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.Iterator; diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IndexTaskTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IndexTaskTest.java index 3e498c2444ea..5b6dcbfcc421 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IndexTaskTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IndexTaskTest.java @@ -1620,13 +1620,12 @@ public void testMultipleParseExceptionsSuccess() throws Exception IngestionStatsAndErrorsTaskReportData reportData = getTaskReportData(); - final int processedBytes = useInputFormatApi ? 657 : 0; Map expectedMetrics = ImmutableMap.of( RowIngestionMeters.DETERMINE_PARTITIONS, ImmutableMap.of( RowIngestionMeters.PROCESSED_WITH_ERROR, 0, RowIngestionMeters.PROCESSED, 4, - RowIngestionMeters.PROCESSED_BYTES, processedBytes, + RowIngestionMeters.PROCESSED_BYTES, 657, RowIngestionMeters.UNPARSEABLE, 4, RowIngestionMeters.THROWN_AWAY, 1 ), @@ -1634,7 +1633,7 @@ public void testMultipleParseExceptionsSuccess() throws Exception ImmutableMap.of( RowIngestionMeters.PROCESSED_WITH_ERROR, 3, RowIngestionMeters.PROCESSED, 1, - RowIngestionMeters.PROCESSED_BYTES, processedBytes, + RowIngestionMeters.PROCESSED_BYTES, 657, RowIngestionMeters.UNPARSEABLE, 4, RowIngestionMeters.THROWN_AWAY, 1 ) @@ -1829,7 +1828,7 @@ public void testMultipleParseExceptionsFailure() throws Exception ImmutableMap.of( RowIngestionMeters.PROCESSED_WITH_ERROR, 0, RowIngestionMeters.PROCESSED, 1, - RowIngestionMeters.PROCESSED_BYTES, useInputFormatApi ? 182 : 0, + RowIngestionMeters.PROCESSED_BYTES, 182, RowIngestionMeters.UNPARSEABLE, 3, RowIngestionMeters.THROWN_AWAY, 1 ) @@ -1954,7 +1953,7 @@ public void testMultipleParseExceptionsFailureAtDeterminePartitions() throws Exc ImmutableMap.of( RowIngestionMeters.PROCESSED_WITH_ERROR, 0, RowIngestionMeters.PROCESSED, 1, - RowIngestionMeters.PROCESSED_BYTES, useInputFormatApi ? 182 : 0, + RowIngestionMeters.PROCESSED_BYTES, 182, RowIngestionMeters.UNPARSEABLE, 3, RowIngestionMeters.THROWN_AWAY, 1 ), From d7820c1276422939644767e372538950df5c3e3e Mon Sep 17 00:00:00 2001 From: "tejaswini.bandlamudi" Date: Thu, 23 Feb 2023 22:48:01 +0530 Subject: [PATCH 20/33] update revised ITs spec files --- ...a => ITCombiningInputSourceIndexTest.java} | 10 +- ...CombiningInputSourceParallelIndexTest.java | 2 +- .../testsEx/indexer/ITTransformTest.java | 54 --------- ...combining_input_source_index_queries.json} | 0 ...ia_combining_input_source_index_task.json} | 50 ++++---- ...ipedia_index_task_with_dimension_spec.json | 48 ++++---- ...edia_index_task_with_granularity_spec.json | 28 +++-- .../wikipedia_index_task_with_transform.json | 103 ----------------- ...ia_index_with_merge_column_limit_task.json | 48 ++++---- .../indexer/wikipedia_merge_index_task.json | 26 ++--- .../indexer/wikipedia_merge_reindex_task.json | 30 +++-- .../wikipedia_parallel_index_task.json | 48 ++++---- ...ia_parallel_ingest_segment_index_task.json | 30 +++-- .../wikipedia_parallel_reindex_task.json | 48 ++++---- .../indexer/wikipedia_reindex_task.json | 26 ++--- ...ikipedia_reindex_task_with_transforms.json | 108 ------------------ .../wikipedia_with_timestamp_index_task.json | 54 ++++----- .../indexer/wikiticker_index_task.json | 33 +++--- ...combining_input_source_index_queries.json} | 0 .../duty/ITAutoCompactionTest.java | 6 + ...CombiningInputSourceParallelIndexTest.java | 2 +- .../druid/tests/indexer/ITTransformTest.java | 51 --------- ...combining_input_source_index_queries.json} | 0 .../wikipedia_index_task_with_transform.json | 101 ---------------- ...ikipedia_reindex_task_with_transforms.json | 106 ----------------- 25 files changed, 236 insertions(+), 776 deletions(-) rename integration-tests-ex/cases/src/test/java/org/apache/druid/testsEx/indexer/{ITCombiningFirehoseFactoryIndexTest.java => ITCombiningInputSourceIndexTest.java} (91%) rename integration-tests-ex/cases/src/test/resources/indexer/{wikipedia_combining_firehose_index_queries.json => wikipedia_combining_input_source_index_queries.json} (100%) rename integration-tests-ex/cases/src/test/resources/indexer/{wikipedia_combining_firehose_index_task.json => wikipedia_combining_input_source_index_task.json} (70%) delete mode 100644 integration-tests-ex/cases/src/test/resources/indexer/wikipedia_index_task_with_transform.json delete mode 100644 integration-tests-ex/cases/src/test/resources/indexer/wikipedia_reindex_task_with_transforms.json rename integration-tests-ex/cases/src/test/resources/multi-stage-query/{wikipedia_combining_firehose_index_queries.json => wikipedia_combining_input_source_index_queries.json} (100%) rename integration-tests/src/test/resources/indexer/{wikipedia_combining_firehose_index_queries.json => wikipedia_combining_input_source_index_queries.json} (100%) delete mode 100644 integration-tests/src/test/resources/indexer/wikipedia_index_task_with_transform.json delete mode 100644 integration-tests/src/test/resources/indexer/wikipedia_reindex_task_with_transforms.json diff --git a/integration-tests-ex/cases/src/test/java/org/apache/druid/testsEx/indexer/ITCombiningFirehoseFactoryIndexTest.java b/integration-tests-ex/cases/src/test/java/org/apache/druid/testsEx/indexer/ITCombiningInputSourceIndexTest.java similarity index 91% rename from integration-tests-ex/cases/src/test/java/org/apache/druid/testsEx/indexer/ITCombiningFirehoseFactoryIndexTest.java rename to integration-tests-ex/cases/src/test/java/org/apache/druid/testsEx/indexer/ITCombiningInputSourceIndexTest.java index 8d394a0297ee..3e53648b3c3a 100644 --- a/integration-tests-ex/cases/src/test/java/org/apache/druid/testsEx/indexer/ITCombiningFirehoseFactoryIndexTest.java +++ b/integration-tests-ex/cases/src/test/java/org/apache/druid/testsEx/indexer/ITCombiningInputSourceIndexTest.java @@ -33,14 +33,14 @@ @RunWith(DruidTestRunner.class) @Category(BatchIndex.class) -public class ITCombiningFirehoseFactoryIndexTest extends AbstractITBatchIndexTest +public class ITCombiningInputSourceIndexTest extends AbstractITBatchIndexTest { private static final String INDEX_TASK = "/indexer/wikipedia_index_task.json"; private static final String INDEX_QUERIES_RESOURCE = "/indexer/wikipedia_index_queries.json"; private static final String INDEX_DATASOURCE = "wikipedia_index_test"; - private static final String COMBINING_INDEX_TASK = "/indexer/wikipedia_combining_firehose_index_task.json"; - private static final String COMBINING_QUERIES_RESOURCE = "/indexer/wikipedia_combining_firehose_index_queries.json"; + private static final String COMBINING_INDEX_TASK = "/indexer/wikipedia_combining_input_source_index_task.json"; + private static final String COMBINING_QUERIES_RESOURCE = "/indexer/wikipedia_combining_input_source_index_queries.json"; private static final String COMBINING_INDEX_DATASOURCE = "wikipedia_comb_index_test"; @Test @@ -50,7 +50,7 @@ public void testIndexData() throws Exception final Closeable ignored1 = unloader(INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix()); final Closeable ignored2 = unloader(COMBINING_INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix()); ) { - final Function combiningFirehoseSpecTransform = spec -> { + final Function combiningInputSourceSpecTransform = spec -> { try { return StringUtils.replace( spec, @@ -88,7 +88,7 @@ public void testIndexData() throws Exception doIndexTest( COMBINING_INDEX_DATASOURCE, COMBINING_INDEX_TASK, - combiningFirehoseSpecTransform, + combiningInputSourceSpecTransform, COMBINING_QUERIES_RESOURCE, false, true, diff --git a/integration-tests-ex/cases/src/test/java/org/apache/druid/testsEx/indexer/ITCombiningInputSourceParallelIndexTest.java b/integration-tests-ex/cases/src/test/java/org/apache/druid/testsEx/indexer/ITCombiningInputSourceParallelIndexTest.java index 5ec9ea0dbc45..75b5a0e2815e 100644 --- a/integration-tests-ex/cases/src/test/java/org/apache/druid/testsEx/indexer/ITCombiningInputSourceParallelIndexTest.java +++ b/integration-tests-ex/cases/src/test/java/org/apache/druid/testsEx/indexer/ITCombiningInputSourceParallelIndexTest.java @@ -42,7 +42,7 @@ public class ITCombiningInputSourceParallelIndexTest extends AbstractITBatchInde private static final String INDEX_DATASOURCE = "wikipedia_index_test"; private static final String COMBINING_INDEX_TASK = "/indexer/wikipedia_combining_input_source_index_parallel_task.json"; - private static final String COMBINING_QUERIES_RESOURCE = "/indexer/wikipedia_combining_firehose_index_queries.json"; + private static final String COMBINING_QUERIES_RESOURCE = "/indexer/wikipedia_combining_input_source_index_queries.json"; private static final String COMBINING_INDEX_DATASOURCE = "wikipedia_comb_index_test"; @Test diff --git a/integration-tests-ex/cases/src/test/java/org/apache/druid/testsEx/indexer/ITTransformTest.java b/integration-tests-ex/cases/src/test/java/org/apache/druid/testsEx/indexer/ITTransformTest.java index 61cb33fd2c60..f8769461f6a5 100644 --- a/integration-tests-ex/cases/src/test/java/org/apache/druid/testsEx/indexer/ITTransformTest.java +++ b/integration-tests-ex/cases/src/test/java/org/apache/druid/testsEx/indexer/ITTransformTest.java @@ -22,7 +22,6 @@ import org.apache.druid.java.util.common.Pair; import org.apache.druid.testsEx.categories.BatchIndex; import org.apache.druid.testsEx.config.DruidTestRunner; -import org.junit.Ignore; import org.junit.Test; import org.junit.experimental.categories.Category; import org.junit.runner.RunWith; @@ -34,12 +33,10 @@ @Category(BatchIndex.class) public class ITTransformTest extends AbstractITBatchIndexTest { - private static final String INDEX_TASK_WITH_FIREHOSE = "/indexer/wikipedia_index_task_with_transform.json"; private static final String INDEX_TASK_WITH_INPUT_SOURCE = "/indexer/wikipedia_index_task_with_inputsource_transform.json"; private static final String INDEX_QUERIES_RESOURCE = "/indexer/wikipedia_index_queries_with_transform.json"; private static final String INDEX_DATASOURCE = "wikipedia_index_test"; - private static final String REINDEX_TASK = "/indexer/wikipedia_reindex_task_with_transforms.json"; private static final String REINDEX_TASK_WITH_DRUID_INPUT_SOURCE = "/indexer/wikipedia_reindex_druid_input_source_task_with_transforms.json"; private static final String REINDEX_QUERIES_RESOURCE = "/indexer/wikipedia_reindex_queries_with_transforms.json"; private static final String REINDEX_DATASOURCE = "wikipedia_reindex_test"; @@ -71,55 +68,4 @@ public void testIndexAndReIndexWithTransformSpec() throws IOException ); } } - - @Test - @Ignore - public void testIndexAndReIndexUsingIngestSegmentWithTransforms() throws IOException - { - // TODO: re-instate this test when https://github.com/apache/druid/issues/9591 is fixed - // Move the re-index step into testIndexAndReIndexWithTransformSpec for faster tests! - final String reindexDatasource = REINDEX_DATASOURCE + "-testIndexData"; - try ( - final Closeable ignored1 = unloader(INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix()); - final Closeable ignored2 = unloader(reindexDatasource + config.getExtraDatasourceNameSuffix()) - ) { - doIndexTest( - INDEX_DATASOURCE, - INDEX_TASK_WITH_INPUT_SOURCE, - INDEX_QUERIES_RESOURCE, - false, - true, - true, - new Pair<>(false, false) - ); - doReindexTest( - INDEX_DATASOURCE, - reindexDatasource, - REINDEX_TASK, - REINDEX_QUERIES_RESOURCE, - new Pair<>(false, false) - ); - } - } - - @Test - @Ignore - public void testIndexWithFirehoseAndTransforms() throws IOException - { - // TODO: re-instate this test when https://github.com/apache/druid/issues/9589 is fixed - final String indexDatasource = INDEX_DATASOURCE + "-firehose"; - try ( - final Closeable ignored1 = unloader(indexDatasource + config.getExtraDatasourceNameSuffix()); - ) { - doIndexTest( - indexDatasource, - INDEX_TASK_WITH_FIREHOSE, - INDEX_QUERIES_RESOURCE, - false, - true, - true, - new Pair<>(false, false) - ); - } - } } diff --git a/integration-tests-ex/cases/src/test/resources/indexer/wikipedia_combining_firehose_index_queries.json b/integration-tests-ex/cases/src/test/resources/indexer/wikipedia_combining_input_source_index_queries.json similarity index 100% rename from integration-tests-ex/cases/src/test/resources/indexer/wikipedia_combining_firehose_index_queries.json rename to integration-tests-ex/cases/src/test/resources/indexer/wikipedia_combining_input_source_index_queries.json diff --git a/integration-tests-ex/cases/src/test/resources/indexer/wikipedia_combining_firehose_index_task.json b/integration-tests-ex/cases/src/test/resources/indexer/wikipedia_combining_input_source_index_task.json similarity index 70% rename from integration-tests-ex/cases/src/test/resources/indexer/wikipedia_combining_firehose_index_task.json rename to integration-tests-ex/cases/src/test/resources/indexer/wikipedia_combining_input_source_index_task.json index 1e7deffc6b5b..50e2dece004b 100644 --- a/integration-tests-ex/cases/src/test/resources/indexer/wikipedia_combining_firehose_index_task.json +++ b/integration-tests-ex/cases/src/test/resources/indexer/wikipedia_combining_input_source_index_task.json @@ -3,6 +3,25 @@ "spec": { "dataSchema": { "dataSource": "%%DATASOURCE%%", + "timestampSpec": { + "column": "timestamp" + }, + "dimensionsSpec": { + "dimensions": [ + "page", + {"type": "string", "name": "language", "createBitmapIndex": false}, + "user", + "unpatrolled", + "newPage", + "robot", + "anonymous", + "namespace", + "continent", + "country", + "region", + "city" + ] + }, "metricsSpec": [ { "type": "count", @@ -43,35 +62,11 @@ "segmentGranularity": "DAY", "queryGranularity": "second", "intervals" : [ "2013-08-31/2013-09-02" ] - }, - "parser": { - "parseSpec": { - "format" : "json", - "timestampSpec": { - "column": "timestamp" - }, - "dimensionsSpec": { - "dimensions": [ - "page", - {"type": "string", "name": "language", "createBitmapIndex": false}, - "user", - "unpatrolled", - "newPage", - "robot", - "anonymous", - "namespace", - "continent", - "country", - "region", - "city" - ] - } - } } }, "ioConfig": { "type": "index", - "firehose": { + "inputSource": { "type": "combining", "delegates": [ { @@ -80,11 +75,14 @@ "filter": "wikipedia_combining_index_data.json" }, { - "type": "ingestSegment", + "type": "druid", "dataSource": "%%COMBINING_DATASOURCE%%", "interval": "2013-08-31/2013-09-02" } ] + }, + "inputFormat": { + "type": "json" } }, "tuningConfig": { diff --git a/integration-tests-ex/cases/src/test/resources/indexer/wikipedia_index_task_with_dimension_spec.json b/integration-tests-ex/cases/src/test/resources/indexer/wikipedia_index_task_with_dimension_spec.json index 1fa8b4eba321..1d6dc6ed3004 100644 --- a/integration-tests-ex/cases/src/test/resources/indexer/wikipedia_index_task_with_dimension_spec.json +++ b/integration-tests-ex/cases/src/test/resources/indexer/wikipedia_index_task_with_dimension_spec.json @@ -3,6 +3,25 @@ "spec": { "dataSchema": { "dataSource": "%%DATASOURCE%%", + "timestampSpec": { + "column": "timestamp" + }, + "dimensionsSpec": { + "dimensions": [ + "page", + {"type": "string", "name": "language", "createBitmapIndex": false}, + "user", + "unpatrolled", + "newPage", + "robot", + "anonymous", + "namespace", + "continent", + "country", + "region", + "city" + ] + }, "metricsSpec": [ { "type": "count", @@ -43,38 +62,17 @@ "segmentGranularity": "DAY", "queryGranularity": "DAY", "intervals" : [ "2013-08-31/2013-09-02" ] - }, - "parser": { - "parseSpec": { - "format" : "json", - "timestampSpec": { - "column": "timestamp" - }, - "dimensionsSpec": { - "dimensions": [ - "page", - {"type": "string", "name": "language", "createBitmapIndex": false}, - "user", - "unpatrolled", - "newPage", - "robot", - "anonymous", - "namespace", - "continent", - "country", - "region", - "city" - ] - } - } } }, "ioConfig": { "type": "index", - "firehose": { + "inputSource": { "type": "local", "baseDir": "/resources/data/batch_index/json", "filter": "wikipedia_index_data*" + }, + "inputFormat": { + "type": "json" } }, "tuningConfig": { diff --git a/integration-tests-ex/cases/src/test/resources/indexer/wikipedia_index_task_with_granularity_spec.json b/integration-tests-ex/cases/src/test/resources/indexer/wikipedia_index_task_with_granularity_spec.json index 544d191d9257..832b43b7da7f 100644 --- a/integration-tests-ex/cases/src/test/resources/indexer/wikipedia_index_task_with_granularity_spec.json +++ b/integration-tests-ex/cases/src/test/resources/indexer/wikipedia_index_task_with_granularity_spec.json @@ -3,6 +3,14 @@ "spec": { "dataSchema": { "dataSource": "%%DATASOURCE%%", + "timestampSpec": { + "column": "timestamp" + }, + "dimensionsSpec": { + "dimensions": [ + {"type": "string", "name": "language", "createBitmapIndex": false} + ] + }, "metricsSpec": [ { "type": "count", @@ -39,27 +47,17 @@ "fieldName": "user" } ], - "granularitySpec": %%GRANULARITYSPEC%%, - "parser": { - "parseSpec": { - "format" : "json", - "timestampSpec": { - "column": "timestamp" - }, - "dimensionsSpec": { - "dimensions": [ - {"type": "string", "name": "language", "createBitmapIndex": false} - ] - } - } - } + "granularitySpec": %%GRANULARITYSPEC%% }, "ioConfig": { "type": "index", - "firehose": { + "inputSource": { "type": "local", "baseDir": "/resources/data/batch_index/json", "filter": "wikipedia_index_data*" + }, + "inputFormat": { + "type": "json" } }, "tuningConfig": { diff --git a/integration-tests-ex/cases/src/test/resources/indexer/wikipedia_index_task_with_transform.json b/integration-tests-ex/cases/src/test/resources/indexer/wikipedia_index_task_with_transform.json deleted file mode 100644 index 9f3128fb303e..000000000000 --- a/integration-tests-ex/cases/src/test/resources/indexer/wikipedia_index_task_with_transform.json +++ /dev/null @@ -1,103 +0,0 @@ -{ - "type": "index", - "spec": { - "dataSchema": { - "dataSource": "%%DATASOURCE%%", - "metricsSpec": [ - { - "type": "count", - "name": "count" - }, - { - "type": "doubleSum", - "name": "added", - "fieldName": "added" - }, - { - "type": "doubleSum", - "name": "triple-added", - "fieldName": "triple-added" - }, - { - "type": "doubleSum", - "name": "deleted", - "fieldName": "deleted" - }, - { - "type": "doubleSum", - "name": "delta", - "fieldName": "delta" - }, - { - "name": "thetaSketch", - "type": "thetaSketch", - "fieldName": "user" - }, - { - "name": "quantilesDoublesSketch", - "type": "quantilesDoublesSketch", - "fieldName": "delta" - }, - { - "name": "HLLSketchBuild", - "type": "HLLSketchBuild", - "fieldName": "user" - } - ], - "granularitySpec": { - "segmentGranularity": "DAY", - "queryGranularity": "second", - "intervals" : [ "2013-08-31/2013-09-02" ] - }, - "parser": { - "parseSpec": { - "format" : "json", - "timestampSpec": { - "column": "timestamp" - }, - "dimensionsSpec": { - "dimensions": [ - "page", - {"type": "string", "name": "language", "createBitmapIndex": false}, - "user", - "unpatrolled", - "robot", - "anonymous", - "namespace", - "continent", - "country", - "region", - "city" - ] - } - } - }, - "transformSpec": { - "transforms": [ - { - "type": "expression", - "name": "language", - "expression": "concat('language-', language)" - }, - { - "type": "expression", - "name": "triple-added", - "expression": "added * 3" - } - ] - } - }, - "ioConfig": { - "type": "index", - "firehose": { - "type": "local", - "baseDir": "/resources/data/batch_index/json", - "filter": "wikipedia_index_data*" - } - }, - "tuningConfig": { - "type": "index", - "maxRowsPerSegment": 3 - } - } -} \ No newline at end of file diff --git a/integration-tests-ex/cases/src/test/resources/indexer/wikipedia_index_with_merge_column_limit_task.json b/integration-tests-ex/cases/src/test/resources/indexer/wikipedia_index_with_merge_column_limit_task.json index 35b115c9f191..3f49cc9f120c 100644 --- a/integration-tests-ex/cases/src/test/resources/indexer/wikipedia_index_with_merge_column_limit_task.json +++ b/integration-tests-ex/cases/src/test/resources/indexer/wikipedia_index_with_merge_column_limit_task.json @@ -3,6 +3,25 @@ "spec": { "dataSchema": { "dataSource": "%%DATASOURCE%%", + "timestampSpec": { + "column": "timestamp" + }, + "dimensionsSpec": { + "dimensions": [ + "page", + {"type": "string", "name": "language", "createBitmapIndex": false}, + "user", + "unpatrolled", + "newPage", + "robot", + "anonymous", + "namespace", + "continent", + "country", + "region", + "city" + ] + }, "metricsSpec": [ { "type": "count", @@ -43,38 +62,17 @@ "segmentGranularity": "DAY", "queryGranularity": "second", "intervals" : [ "2013-08-31/2013-09-02" ] - }, - "parser": { - "parseSpec": { - "format" : "json", - "timestampSpec": { - "column": "timestamp" - }, - "dimensionsSpec": { - "dimensions": [ - "page", - {"type": "string", "name": "language", "createBitmapIndex": false}, - "user", - "unpatrolled", - "newPage", - "robot", - "anonymous", - "namespace", - "continent", - "country", - "region", - "city" - ] - } - } } }, "ioConfig": { "type": "index", - "firehose": { + "inputSource": { "type": "local", "baseDir": "/resources/data/batch_index/json", "filter": "wikipedia_index_data*" + }, + "inputFormat": { + "type": "json" } }, "tuningConfig": { diff --git a/integration-tests-ex/cases/src/test/resources/indexer/wikipedia_merge_index_task.json b/integration-tests-ex/cases/src/test/resources/indexer/wikipedia_merge_index_task.json index 43264a8c6751..36bb6a9c27ad 100644 --- a/integration-tests-ex/cases/src/test/resources/indexer/wikipedia_merge_index_task.json +++ b/integration-tests-ex/cases/src/test/resources/indexer/wikipedia_merge_index_task.json @@ -3,6 +3,14 @@ "spec": { "dataSchema": { "dataSource": "%%DATASOURCE%%", + "timestampSpec": { + "column": "timestamp" + }, + "dimensionsSpec": { + "dimensions": [ + "continent" + ] + }, "metricsSpec": [ { "type": "count", @@ -38,27 +46,17 @@ "segmentGranularity": "DAY", "queryGranularity": "DAY", "intervals" : [ "2013-08-31/2013-09-02" ] - }, - "parser": { - "parseSpec": { - "format" : "json", - "timestampSpec": { - "column": "timestamp" - }, - "dimensionsSpec": { - "dimensions": [ - "continent" - ] - } - } } }, "ioConfig": { "type": "index", - "firehose": { + "inputSource": { "type": "local", "baseDir": "/resources/data/batch_index/json", "filter": "wikipedia_index_data*" + }, + "inputFormat": { + "type": "json" } }, "tuningConfig": { diff --git a/integration-tests-ex/cases/src/test/resources/indexer/wikipedia_merge_reindex_task.json b/integration-tests-ex/cases/src/test/resources/indexer/wikipedia_merge_reindex_task.json index 127461dd117c..4a8b60e9466f 100644 --- a/integration-tests-ex/cases/src/test/resources/indexer/wikipedia_merge_reindex_task.json +++ b/integration-tests-ex/cases/src/test/resources/indexer/wikipedia_merge_reindex_task.json @@ -3,6 +3,15 @@ "spec": { "dataSchema": { "dataSource": "%%REINDEX_DATASOURCE%%", + "timestampSpec": { + "column": "timestamp", + "format": "iso" + }, + "dimensionsSpec": { + "dimensions": [ + "continent" + ] + }, "metricsSpec": [ { "type": "doubleSum", @@ -34,28 +43,17 @@ "segmentGranularity": "DAY", "queryGranularity": "DAY", "intervals" : [ "2013-08-31/2013-09-01" ] - }, - "parser": { - "parseSpec": { - "format" : "json", - "timestampSpec": { - "column": "timestamp", - "format": "iso" - }, - "dimensionsSpec": { - "dimensions": [ - "continent" - ] - } - } } }, "ioConfig": { "type": "index", - "firehose": { - "type": "ingestSegment", + "inputSource": { + "type": "druid", "dataSource": "%%DATASOURCE%%", "interval": "2013-08-31/2013-09-01" + }, + "inputFormat": { + "type": "json" } }, "tuningConfig": { diff --git a/integration-tests-ex/cases/src/test/resources/indexer/wikipedia_parallel_index_task.json b/integration-tests-ex/cases/src/test/resources/indexer/wikipedia_parallel_index_task.json index e83b1109da02..ba259fd830f4 100644 --- a/integration-tests-ex/cases/src/test/resources/indexer/wikipedia_parallel_index_task.json +++ b/integration-tests-ex/cases/src/test/resources/indexer/wikipedia_parallel_index_task.json @@ -3,6 +3,25 @@ "spec": { "dataSchema": { "dataSource": "%%DATASOURCE%%", + "timestampSpec": { + "column": "timestamp" + }, + "dimensionsSpec": { + "dimensions": [ + "page", + {"type": "string", "name": "language", "createBitmapIndex": false}, + "user", + "unpatrolled", + "newPage", + "robot", + "anonymous", + "namespace", + "continent", + "country", + "region", + "city" + ] + }, "metricsSpec": [ { "type": "count", @@ -28,38 +47,17 @@ "segmentGranularity": "DAY", "queryGranularity": "second", "intervals" : [ "2013-08-31/2013-09-02" ] - }, - "parser": { - "parseSpec": { - "format" : "json", - "timestampSpec": { - "column": "timestamp" - }, - "dimensionsSpec": { - "dimensions": [ - "page", - {"type": "string", "name": "language", "createBitmapIndex": false}, - "user", - "unpatrolled", - "newPage", - "robot", - "anonymous", - "namespace", - "continent", - "country", - "region", - "city" - ] - } - } } }, "ioConfig": { "type": "index_parallel", - "firehose": { + "inputSource": { "type": "local", "baseDir": "/resources/data/batch_index/json", "filter": "wikipedia_index_data*" + }, + "inputFormat": { + "type" : "json" } }, "tuningConfig": { diff --git a/integration-tests-ex/cases/src/test/resources/indexer/wikipedia_parallel_ingest_segment_index_task.json b/integration-tests-ex/cases/src/test/resources/indexer/wikipedia_parallel_ingest_segment_index_task.json index e995a0f299e9..3749923cd20c 100644 --- a/integration-tests-ex/cases/src/test/resources/indexer/wikipedia_parallel_ingest_segment_index_task.json +++ b/integration-tests-ex/cases/src/test/resources/indexer/wikipedia_parallel_ingest_segment_index_task.json @@ -3,6 +3,15 @@ "spec": { "dataSchema": { "dataSource": "%%REINDEX_DATASOURCE%%", + "timestampSpec": { + "column": "timestamp" + }, + "dimensionsSpec": { + "dimensionExclusions": [ + "robot", + "continent" + ] + }, "metricsSpec": [ { "type": "count", @@ -30,29 +39,18 @@ "intervals": [ "2013-08-31/2013-09-02" ] - }, - "parser": { - "parseSpec": { - "format": "json", - "timestampSpec": { - "column": "timestamp" - }, - "dimensionsSpec": { - "dimensionExclusions": [ - "robot", - "continent" - ] - } - } } }, "ioConfig": { "type": "index_parallel", - "firehose": { - "type": "ingestSegment", + "inputSource": { + "type": "druid", "dataSource": "%%DATASOURCE%%", "interval": "2013-08-31/2013-09-02", "maxInputSegmentBytesPerTask": 1 + }, + "inputFormat": { + "type": "json" } }, "tuningConfig": { diff --git a/integration-tests-ex/cases/src/test/resources/indexer/wikipedia_parallel_reindex_task.json b/integration-tests-ex/cases/src/test/resources/indexer/wikipedia_parallel_reindex_task.json index 9ab153c6862d..8e3a5bebd458 100644 --- a/integration-tests-ex/cases/src/test/resources/indexer/wikipedia_parallel_reindex_task.json +++ b/integration-tests-ex/cases/src/test/resources/indexer/wikipedia_parallel_reindex_task.json @@ -3,6 +3,25 @@ "spec": { "dataSchema": { "dataSource": "%%DATASOURCE%%", + "timestampSpec": { + "column": "timestamp" + }, + "dimensionsSpec": { + "dimensions": [ + "page", + {"type": "string", "name": "language", "createBitmapIndex": false}, + "user", + "unpatrolled", + "newPage", + "robot", + "anonymous", + "namespace", + "continent", + "country", + "region", + "city" + ] + }, "metricsSpec": [ { "type": "count", @@ -27,38 +46,17 @@ "granularitySpec": { "segmentGranularity": "DAY", "queryGranularity": "second" - }, - "parser": { - "parseSpec": { - "format" : "json", - "timestampSpec": { - "column": "timestamp" - }, - "dimensionsSpec": { - "dimensions": [ - "page", - {"type": "string", "name": "language", "createBitmapIndex": false}, - "user", - "unpatrolled", - "newPage", - "robot", - "anonymous", - "namespace", - "continent", - "country", - "region", - "city" - ] - } - } } }, "ioConfig": { "type": "index_parallel", - "firehose": { + "inputSource": { "type": "local", "baseDir": "/resources/data/batch_index/json", "filter": "wikipedia_index_data2*" + }, + "inputFormat": { + "type": "json" } }, "tuningConfig": { diff --git a/integration-tests-ex/cases/src/test/resources/indexer/wikipedia_reindex_task.json b/integration-tests-ex/cases/src/test/resources/indexer/wikipedia_reindex_task.json index e277a9127f49..8a526715f41f 100644 --- a/integration-tests-ex/cases/src/test/resources/indexer/wikipedia_reindex_task.json +++ b/integration-tests-ex/cases/src/test/resources/indexer/wikipedia_reindex_task.json @@ -3,6 +3,13 @@ "spec": { "dataSchema": { "dataSource": "%%REINDEX_DATASOURCE%%", + "timestampSpec": { + "column": "timestamp", + "format": "iso" + }, + "dimensionsSpec": { + "dimensionExclusions" : ["robot", "continent"] + }, "metricsSpec": [ { "type": "doubleSum", @@ -24,26 +31,17 @@ "segmentGranularity": "DAY", "queryGranularity": "second", "intervals" : [ "2013-08-31/2013-09-01" ] - }, - "parser": { - "parseSpec": { - "format" : "json", - "timestampSpec": { - "column": "timestamp", - "format": "iso" - }, - "dimensionsSpec": { - "dimensionExclusions" : ["robot", "continent"] - } - } } }, "ioConfig": { "type": "index", - "firehose": { - "type": "ingestSegment", + "inputSource": { + "type": "druid", "dataSource": "%%DATASOURCE%%", "interval": "2013-08-31/2013-09-01" + }, + "inputFormat": { + "type": "json" } }, "tuningConfig": { diff --git a/integration-tests-ex/cases/src/test/resources/indexer/wikipedia_reindex_task_with_transforms.json b/integration-tests-ex/cases/src/test/resources/indexer/wikipedia_reindex_task_with_transforms.json deleted file mode 100644 index 029b136d441b..000000000000 --- a/integration-tests-ex/cases/src/test/resources/indexer/wikipedia_reindex_task_with_transforms.json +++ /dev/null @@ -1,108 +0,0 @@ -{ - "type": "index", - "spec": { - "dataSchema": { - "dataSource": "%%REINDEX_DATASOURCE%%", - "metricsSpec": [ - { - "type": "doubleSum", - "name": "added", - "fieldName": "added" - }, - { - "type": "doubleSum", - "name": "triple-added", - "fieldName": "triple-added" - }, - { - "type": "doubleSum", - "name": "one-plus-triple-added", - "fieldName": "one-plus-triple-added" - }, - { - "type": "doubleSum", - "name": "deleted", - "fieldName": "deleted" - }, - { - "type": "doubleSum", - "name": "double-deleted", - "fieldName": "double-deleted" - }, - { - "type": "doubleSum", - "name": "delta", - "fieldName": "delta" - } - ], - "granularitySpec": { - "segmentGranularity": "DAY", - "queryGranularity": "second", - "intervals" : [ "2013-08-31/2013-09-01" ] - }, - "parser": { - "parseSpec": { - "format" : "json", - "timestampSpec": { - "column": "timestamp", - "format": "iso" - }, - "dimensionsSpec": { - "dimensions": [ - {"type": "string", "name": "language", "createBitmapIndex": false}, - "user", - "unpatrolled", - "page", - "newPage", - "anonymous", - "namespace", - "country", - "region", - "city" - ] - }, - "transformSpec": { - "transforms": [ - { - "type": "expression", - "name": "newPage", - "expression": "page" - }, - { - "type": "expression", - "name": "city", - "expression": "concat('city-', city)" - }, - { - "type": "expression", - "name": "one-plus-triple-added", - "expression": "\"triple-added\" + 1" - }, - { - "type": "expression", - "name": "delta", - "expression": "\"delta\" / 2" - }, - { - "type": "expression", - "name": "double-deleted", - "expression": "deleted * 2" - } - ] - } - } - } - }, - "ioConfig": { - "type": "index", - "firehose": { - "type": "ingestSegment", - "dataSource": "%%DATASOURCE%%", - "interval": "2013-08-31/2013-09-01" - } - }, - "tuningConfig": { - "type": "index" - } - } -} diff --git a/integration-tests-ex/cases/src/test/resources/indexer/wikipedia_with_timestamp_index_task.json b/integration-tests-ex/cases/src/test/resources/indexer/wikipedia_with_timestamp_index_task.json index 4f13b70cb725..ec29d49e43d1 100644 --- a/integration-tests-ex/cases/src/test/resources/indexer/wikipedia_with_timestamp_index_task.json +++ b/integration-tests-ex/cases/src/test/resources/indexer/wikipedia_with_timestamp_index_task.json @@ -3,6 +3,27 @@ "spec": { "dataSchema": { "dataSource": "%%DATASOURCE%%", + "timestampSpec": { + "column": "timestamp", + "format": "auto" //test + }, + "dimensionsSpec": { + "dimensions": [ + "page", + {"type": "string", "name": "language", "createBitmapIndex": false}, + "user", + "unpatrolled", + "newPage", + "robot", + "anonymous", + "namespace", + "continent", + "country", + "region", + "city", + "timestamp" + ] + }, "metricsSpec": [ { "type": "count", @@ -43,40 +64,19 @@ "segmentGranularity": "DAY", "queryGranularity": "second", "intervals" : [ "2013-08-31/2013-09-02" ] - }, - "parser": { - "parseSpec": { - "format" : "json", - "timestampSpec": { - "column": "timestamp" - }, - "dimensionsSpec": { - "dimensions": [ - "page", - {"type": "string", "name": "language", "createBitmapIndex": false}, - "user", - "unpatrolled", - "newPage", - "robot", - "anonymous", - "namespace", - "continent", - "country", - "region", - "city", - "timestamp" - ] - } - } } }, "ioConfig": { "type": "index", - "firehose": { + "inputSource": { "type": "local", "baseDir": "/resources/data/batch_index/json", "filter": "wikipedia_index_data*" - } + }, + "inputFormat": { + "type": "json" + }, + "appendToExisting": false // test }, "tuningConfig": { "type": "index", diff --git a/integration-tests-ex/cases/src/test/resources/indexer/wikiticker_index_task.json b/integration-tests-ex/cases/src/test/resources/indexer/wikiticker_index_task.json index d450c7b9458b..9ee4b0ee151d 100644 --- a/integration-tests-ex/cases/src/test/resources/indexer/wikiticker_index_task.json +++ b/integration-tests-ex/cases/src/test/resources/indexer/wikiticker_index_task.json @@ -3,6 +3,17 @@ "spec": { "dataSchema": { "dataSource": "%%DATASOURCE%%", + "dimensionsSpec": { + "dimensions": [ + "channel", + "page", + "user" + ] + }, + "timestampSpec": { + "format": "auto", + "column": "time" + }, "granularitySpec": { "type": "uniform", "segmentGranularity": "day", @@ -11,23 +22,6 @@ "2015-09-12/2015-09-13" ] }, - "parser": { - "type": "hadoopyString", - "parseSpec": { - "format": "json", - "dimensionsSpec": { - "dimensions": [ - "channel", - "page", - "user" - ] - }, - "timestampSpec": { - "format": "auto", - "column": "time" - } - } - }, "metricsSpec": [ { "name": "count", @@ -52,10 +46,13 @@ }, "ioConfig": { "type": "index", - "firehose": { + "inputSource": { "type": "local", "baseDir": "/shared/wikiticker-it", "filter": "wikiticker-2015-09-12-sampled.json.gz" + }, + "inputFormat": { + "type": "json" } }, "tuningConfig": { diff --git a/integration-tests-ex/cases/src/test/resources/multi-stage-query/wikipedia_combining_firehose_index_queries.json b/integration-tests-ex/cases/src/test/resources/multi-stage-query/wikipedia_combining_input_source_index_queries.json similarity index 100% rename from integration-tests-ex/cases/src/test/resources/multi-stage-query/wikipedia_combining_firehose_index_queries.json rename to integration-tests-ex/cases/src/test/resources/multi-stage-query/wikipedia_combining_input_source_index_queries.json diff --git a/integration-tests/src/test/java/org/apache/druid/tests/coordinator/duty/ITAutoCompactionTest.java b/integration-tests/src/test/java/org/apache/druid/tests/coordinator/duty/ITAutoCompactionTest.java index 3df5c230fc48..8abac846ff83 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/coordinator/duty/ITAutoCompactionTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/coordinator/duty/ITAutoCompactionTest.java @@ -1523,6 +1523,10 @@ private void loadData(String indexTask) throws Exception private void loadData(String indexTask, Map specs) throws Exception { + LOG.info("Number of segments present before loading data for datasource %s - %s", + fullDatasourceName, coordinator.getSegments(fullDatasourceName)); + LOG.info("Old segments metadata for datasource %s - %s", + fullDatasourceName, coordinator.getFullSegmentsMetadata(fullDatasourceName)); String taskSpec = getResourceAsString(indexTask); taskSpec = StringUtils.replace(taskSpec, "%%DATASOURCE%%", fullDatasourceName); taskSpec = StringUtils.replace( @@ -1699,6 +1703,8 @@ private void verifySegmentsCount(int numExpectedSegments) () -> { int metadataSegmentCount = coordinator.getSegments(fullDatasourceName).size(); LOG.info("Current metadata segment count: %d, expected: %d", metadataSegmentCount, numExpectedSegments); + LOG.info("Segments metadata for datasource %s - %s", + fullDatasourceName, coordinator.getFullSegmentsMetadata(fullDatasourceName)); return metadataSegmentCount == numExpectedSegments; }, "Compaction segment count check" diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITCombiningInputSourceParallelIndexTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITCombiningInputSourceParallelIndexTest.java index ce49b457b0aa..7619bcf8b6e0 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITCombiningInputSourceParallelIndexTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITCombiningInputSourceParallelIndexTest.java @@ -41,7 +41,7 @@ public class ITCombiningInputSourceParallelIndexTest extends AbstractITBatchInde private static final String INDEX_DATASOURCE = "wikipedia_index_test"; private static final String COMBINING_INDEX_TASK = "/indexer/wikipedia_combining_input_source_index_parallel_task.json"; - private static final String COMBINING_QUERIES_RESOURCE = "/indexer/wikipedia_combining_firehose_index_queries.json"; + private static final String COMBINING_QUERIES_RESOURCE = "/indexer/wikipedia_combining_input_source_index_queries.json"; private static final String COMBINING_INDEX_DATASOURCE = "wikipedia_comb_index_test"; @Test diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITTransformTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITTransformTest.java index e98623dcd899..045450a04eab 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITTransformTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITTransformTest.java @@ -32,12 +32,10 @@ @Guice(moduleFactory = DruidTestModuleFactory.class) public class ITTransformTest extends AbstractITBatchIndexTest { - private static final String INDEX_TASK_WITH_FIREHOSE = "/indexer/wikipedia_index_task_with_transform.json"; private static final String INDEX_TASK_WITH_INPUT_SOURCE = "/indexer/wikipedia_index_task_with_inputsource_transform.json"; private static final String INDEX_QUERIES_RESOURCE = "/indexer/wikipedia_index_queries_with_transform.json"; private static final String INDEX_DATASOURCE = "wikipedia_index_test"; - private static final String REINDEX_TASK = "/indexer/wikipedia_reindex_task_with_transforms.json"; private static final String REINDEX_TASK_WITH_DRUID_INPUT_SOURCE = "/indexer/wikipedia_reindex_druid_input_source_task_with_transforms.json"; private static final String REINDEX_QUERIES_RESOURCE = "/indexer/wikipedia_reindex_queries_with_transforms.json"; private static final String REINDEX_DATASOURCE = "wikipedia_reindex_test"; @@ -69,53 +67,4 @@ public void testIndexAndReIndexWithTransformSpec() throws IOException ); } } - - @Test(enabled = false) - public void testIndexAndReIndexUsingIngestSegmentWithTransforms() throws IOException - { - // TODO: re-instate this test when https://github.com/apache/druid/issues/9591 is fixed - // Move the re-index step into testIndexAndReIndexWithTransformSpec for faster tests! - final String reindexDatasource = REINDEX_DATASOURCE + "-testIndexData"; - try ( - final Closeable ignored1 = unloader(INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix()); - final Closeable ignored2 = unloader(reindexDatasource + config.getExtraDatasourceNameSuffix()) - ) { - doIndexTest( - INDEX_DATASOURCE, - INDEX_TASK_WITH_INPUT_SOURCE, - INDEX_QUERIES_RESOURCE, - false, - true, - true, - new Pair<>(false, false) - ); - doReindexTest( - INDEX_DATASOURCE, - reindexDatasource, - REINDEX_TASK, - REINDEX_QUERIES_RESOURCE, - new Pair<>(false, false) - ); - } - } - - @Test(enabled = false) - public void testIndexWithFirehoseAndTransforms() throws IOException - { - // TODO: re-instate this test when https://github.com/apache/druid/issues/9589 is fixed - final String indexDatasource = INDEX_DATASOURCE + "-firehose"; - try ( - final Closeable ignored1 = unloader(indexDatasource + config.getExtraDatasourceNameSuffix()); - ) { - doIndexTest( - indexDatasource, - INDEX_TASK_WITH_FIREHOSE, - INDEX_QUERIES_RESOURCE, - false, - true, - true, - new Pair<>(false, false) - ); - } - } } diff --git a/integration-tests/src/test/resources/indexer/wikipedia_combining_firehose_index_queries.json b/integration-tests/src/test/resources/indexer/wikipedia_combining_input_source_index_queries.json similarity index 100% rename from integration-tests/src/test/resources/indexer/wikipedia_combining_firehose_index_queries.json rename to integration-tests/src/test/resources/indexer/wikipedia_combining_input_source_index_queries.json diff --git a/integration-tests/src/test/resources/indexer/wikipedia_index_task_with_transform.json b/integration-tests/src/test/resources/indexer/wikipedia_index_task_with_transform.json deleted file mode 100644 index ff9423b7db66..000000000000 --- a/integration-tests/src/test/resources/indexer/wikipedia_index_task_with_transform.json +++ /dev/null @@ -1,101 +0,0 @@ -{ - "type": "index", - "spec": { - "dataSchema": { - "dataSource": "%%DATASOURCE%%", - "timestampSpec": { - "column": "timestamp" - }, - "dimensionsSpec": { - "dimensions": [ - "page", - {"type": "string", "name": "language", "createBitmapIndex": false}, - "user", - "unpatrolled", - "robot", - "anonymous", - "namespace", - "continent", - "country", - "region", - "city" - ] - }, - "metricsSpec": [ - { - "type": "count", - "name": "count" - }, - { - "type": "doubleSum", - "name": "added", - "fieldName": "added" - }, - { - "type": "doubleSum", - "name": "triple-added", - "fieldName": "triple-added" - }, - { - "type": "doubleSum", - "name": "deleted", - "fieldName": "deleted" - }, - { - "type": "doubleSum", - "name": "delta", - "fieldName": "delta" - }, - { - "name": "thetaSketch", - "type": "thetaSketch", - "fieldName": "user" - }, - { - "name": "quantilesDoublesSketch", - "type": "quantilesDoublesSketch", - "fieldName": "delta" - }, - { - "name": "HLLSketchBuild", - "type": "HLLSketchBuild", - "fieldName": "user" - } - ], - "granularitySpec": { - "segmentGranularity": "DAY", - "queryGranularity": "second", - "intervals" : [ "2013-08-31/2013-09-02" ] - }, - "transformSpec": { - "transforms": [ - { - "type": "expression", - "name": "language", - "expression": "concat('language-', language)" - }, - { - "type": "expression", - "name": "triple-added", - "expression": "added * 3" - } - ] - } - }, - "ioConfig": { - "type": "index", - "inputSource": { - "type": "local", - "baseDir": "/resources/data/batch_index/json", - "filter": "wikipedia_index_data*" - }, - "inputFormat": { - "type" : "json" - } - }, - "tuningConfig": { - "type": "index", - "maxRowsPerSegment": 3 - } - } -} \ No newline at end of file diff --git a/integration-tests/src/test/resources/indexer/wikipedia_reindex_task_with_transforms.json b/integration-tests/src/test/resources/indexer/wikipedia_reindex_task_with_transforms.json deleted file mode 100644 index 071e66133f37..000000000000 --- a/integration-tests/src/test/resources/indexer/wikipedia_reindex_task_with_transforms.json +++ /dev/null @@ -1,106 +0,0 @@ -{ - "type": "index", - "spec": { - "dataSchema": { - "dataSource": "%%REINDEX_DATASOURCE%%", - "timestampSpec": { - "column": "timestamp", - "format": "iso" - }, - "dimensionsSpec": { - "dimensions": [ - {"type": "string", "name": "language", "createBitmapIndex": false}, - "user", - "unpatrolled", - "page", - "newPage", - "anonymous", - "namespace", - "country", - "region", - "city" - ] - }, - "metricsSpec": [ - { - "type": "doubleSum", - "name": "added", - "fieldName": "added" - }, - { - "type": "doubleSum", - "name": "triple-added", - "fieldName": "triple-added" - }, - { - "type": "doubleSum", - "name": "one-plus-triple-added", - "fieldName": "one-plus-triple-added" - }, - { - "type": "doubleSum", - "name": "deleted", - "fieldName": "deleted" - }, - { - "type": "doubleSum", - "name": "double-deleted", - "fieldName": "double-deleted" - }, - { - "type": "doubleSum", - "name": "delta", - "fieldName": "delta" - } - ], - "granularitySpec": { - "segmentGranularity": "DAY", - "queryGranularity": "second", - "intervals" : [ "2013-08-31/2013-09-01" ] - }, - "transformSpec": { - "transforms": [ - { - "type": "expression", - "name": "newPage", - "expression": "page" - }, - { - "type": "expression", - "name": "city", - "expression": "concat('city-', city)" - }, - { - "type": "expression", - "name": "one-plus-triple-added", - "expression": "\"triple-added\" + 1" - }, - { - "type": "expression", - "name": "delta", - "expression": "\"delta\" / 2" - }, - { - "type": "expression", - "name": "double-deleted", - "expression": "deleted * 2" - } - ] - } - }, - "ioConfig": { - "type": "index", - "inputSource": { - "type": "druid", - "dataSource": "%%DATASOURCE%%", - "interval": "2013-08-31/2013-09-01" - }, - "inputFormat": { - "type" : "json" - } - }, - "tuningConfig": { - "type": "index" - } - } -} From 1b293b297bdbe3a1683bb69d3bccba3f95452967 Mon Sep 17 00:00:00 2001 From: "tejaswini.bandlamudi" Date: Fri, 24 Feb 2023 01:14:04 +0530 Subject: [PATCH 21/33] test compaction IT failures fix --- .github/workflows/reusable-revised-its.yml | 2 +- .../ITCombiningInputSourceIndexTest.java | 101 ------------------ ...dia_combining_input_source_index_task.json | 93 ---------------- ...ipedia_index_task_with_dimension_spec.json | 6 +- .../wikipedia_with_timestamp_index_task.json | 4 +- .../duty/ITAutoCompactionTest.java | 4 - .../tests/indexer/ITCompactionTaskTest.java | 2 + ...ipedia_index_task_with_dimension_spec.json | 6 +- .../wikipedia_with_timestamp_index_task.json | 6 +- 9 files changed, 17 insertions(+), 207 deletions(-) delete mode 100644 integration-tests-ex/cases/src/test/java/org/apache/druid/testsEx/indexer/ITCombiningInputSourceIndexTest.java delete mode 100644 integration-tests-ex/cases/src/test/resources/indexer/wikipedia_combining_input_source_index_task.json diff --git a/.github/workflows/reusable-revised-its.yml b/.github/workflows/reusable-revised-its.yml index dc3482bb5f53..8d0b57abf047 100644 --- a/.github/workflows/reusable-revised-its.yml +++ b/.github/workflows/reusable-revised-its.yml @@ -95,7 +95,7 @@ jobs: source ./integration-tests-ex/image/target/env.sh docker tag $DRUID_IT_IMAGE_NAME $DRUID_IT_IMAGE_NAME-jdk${{ matrix.jdk }} echo $DRUID_IT_IMAGE_NAME - docker save "$DRUID_IT_IMAGE_NAME" | gzip > druid-container-jdk${{ matrix.jdk }}.tar.gz + docker save "$DRUID_IT_IMAGE_NAME" | gzip > druid-container-jdk${{ inputs.build_jdk }}.tar.gz - name: Load docker image run: | diff --git a/integration-tests-ex/cases/src/test/java/org/apache/druid/testsEx/indexer/ITCombiningInputSourceIndexTest.java b/integration-tests-ex/cases/src/test/java/org/apache/druid/testsEx/indexer/ITCombiningInputSourceIndexTest.java deleted file mode 100644 index 3e53648b3c3a..000000000000 --- a/integration-tests-ex/cases/src/test/java/org/apache/druid/testsEx/indexer/ITCombiningInputSourceIndexTest.java +++ /dev/null @@ -1,101 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.testsEx.indexer; - -import com.fasterxml.jackson.core.JsonProcessingException; -import org.apache.druid.java.util.common.Pair; -import org.apache.druid.java.util.common.StringUtils; -import org.apache.druid.testsEx.categories.BatchIndex; -import org.apache.druid.testsEx.config.DruidTestRunner; -import org.junit.Test; -import org.junit.experimental.categories.Category; -import org.junit.runner.RunWith; - -import java.io.Closeable; -import java.util.function.Function; - -@RunWith(DruidTestRunner.class) -@Category(BatchIndex.class) -public class ITCombiningInputSourceIndexTest extends AbstractITBatchIndexTest -{ - private static final String INDEX_TASK = "/indexer/wikipedia_index_task.json"; - private static final String INDEX_QUERIES_RESOURCE = "/indexer/wikipedia_index_queries.json"; - private static final String INDEX_DATASOURCE = "wikipedia_index_test"; - - private static final String COMBINING_INDEX_TASK = "/indexer/wikipedia_combining_input_source_index_task.json"; - private static final String COMBINING_QUERIES_RESOURCE = "/indexer/wikipedia_combining_input_source_index_queries.json"; - private static final String COMBINING_INDEX_DATASOURCE = "wikipedia_comb_index_test"; - - @Test - public void testIndexData() throws Exception - { - try ( - final Closeable ignored1 = unloader(INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix()); - final Closeable ignored2 = unloader(COMBINING_INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix()); - ) { - final Function combiningInputSourceSpecTransform = spec -> { - try { - return StringUtils.replace( - spec, - "%%COMBINING_DATASOURCE%%", - INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix() - ); - } - catch (Exception e) { - throw new RuntimeException(e); - } - }; - final Function transform = spec -> { - try { - return StringUtils.replace( - spec, - "%%SEGMENT_AVAIL_TIMEOUT_MILLIS%%", - jsonMapper.writeValueAsString("0") - ); - } - catch (JsonProcessingException e) { - throw new RuntimeException(e); - } - }; - - doIndexTest( - INDEX_DATASOURCE, - INDEX_TASK, - transform, - INDEX_QUERIES_RESOURCE, - false, - true, - true, - new Pair<>(false, false) - ); - doIndexTest( - COMBINING_INDEX_DATASOURCE, - COMBINING_INDEX_TASK, - combiningInputSourceSpecTransform, - COMBINING_QUERIES_RESOURCE, - false, - true, - true, - new Pair<>(false, false) - ); - } - } - -} diff --git a/integration-tests-ex/cases/src/test/resources/indexer/wikipedia_combining_input_source_index_task.json b/integration-tests-ex/cases/src/test/resources/indexer/wikipedia_combining_input_source_index_task.json deleted file mode 100644 index 50e2dece004b..000000000000 --- a/integration-tests-ex/cases/src/test/resources/indexer/wikipedia_combining_input_source_index_task.json +++ /dev/null @@ -1,93 +0,0 @@ -{ - "type": "index", - "spec": { - "dataSchema": { - "dataSource": "%%DATASOURCE%%", - "timestampSpec": { - "column": "timestamp" - }, - "dimensionsSpec": { - "dimensions": [ - "page", - {"type": "string", "name": "language", "createBitmapIndex": false}, - "user", - "unpatrolled", - "newPage", - "robot", - "anonymous", - "namespace", - "continent", - "country", - "region", - "city" - ] - }, - "metricsSpec": [ - { - "type": "count", - "name": "count" - }, - { - "type": "doubleSum", - "name": "added", - "fieldName": "added" - }, - { - "type": "doubleSum", - "name": "deleted", - "fieldName": "deleted" - }, - { - "type": "doubleSum", - "name": "delta", - "fieldName": "delta" - }, - { - "name": "thetaSketch", - "type": "thetaSketch", - "fieldName": "user" - }, - { - "name": "quantilesDoublesSketch", - "type": "quantilesDoublesSketch", - "fieldName": "delta" - }, - { - "name": "HLLSketchBuild", - "type": "HLLSketchBuild", - "fieldName": "user" - } - ], - "granularitySpec": { - "segmentGranularity": "DAY", - "queryGranularity": "second", - "intervals" : [ "2013-08-31/2013-09-02" ] - } - }, - "ioConfig": { - "type": "index", - "inputSource": { - "type": "combining", - "delegates": [ - { - "type": "local", - "baseDir": "/resources/indexer", - "filter": "wikipedia_combining_index_data.json" - }, - { - "type": "druid", - "dataSource": "%%COMBINING_DATASOURCE%%", - "interval": "2013-08-31/2013-09-02" - } - ] - }, - "inputFormat": { - "type": "json" - } - }, - "tuningConfig": { - "type": "index", - "maxRowsPerSegment": 3 - } - } -} diff --git a/integration-tests-ex/cases/src/test/resources/indexer/wikipedia_index_task_with_dimension_spec.json b/integration-tests-ex/cases/src/test/resources/indexer/wikipedia_index_task_with_dimension_spec.json index 1d6dc6ed3004..1ad3d46885b4 100644 --- a/integration-tests-ex/cases/src/test/resources/indexer/wikipedia_index_task_with_dimension_spec.json +++ b/integration-tests-ex/cases/src/test/resources/indexer/wikipedia_index_task_with_dimension_spec.json @@ -4,7 +4,8 @@ "dataSchema": { "dataSource": "%%DATASOURCE%%", "timestampSpec": { - "column": "timestamp" + "column": "timestamp", + "format": "auto" }, "dimensionsSpec": { "dimensions": [ @@ -73,7 +74,8 @@ }, "inputFormat": { "type": "json" - } + }, + "appendToExisting": false }, "tuningConfig": { "type": "index", diff --git a/integration-tests-ex/cases/src/test/resources/indexer/wikipedia_with_timestamp_index_task.json b/integration-tests-ex/cases/src/test/resources/indexer/wikipedia_with_timestamp_index_task.json index ec29d49e43d1..9f573fcb5faf 100644 --- a/integration-tests-ex/cases/src/test/resources/indexer/wikipedia_with_timestamp_index_task.json +++ b/integration-tests-ex/cases/src/test/resources/indexer/wikipedia_with_timestamp_index_task.json @@ -5,7 +5,7 @@ "dataSource": "%%DATASOURCE%%", "timestampSpec": { "column": "timestamp", - "format": "auto" //test + "format": "auto" }, "dimensionsSpec": { "dimensions": [ @@ -76,7 +76,7 @@ "inputFormat": { "type": "json" }, - "appendToExisting": false // test + "appendToExisting": false }, "tuningConfig": { "type": "index", diff --git a/integration-tests/src/test/java/org/apache/druid/tests/coordinator/duty/ITAutoCompactionTest.java b/integration-tests/src/test/java/org/apache/druid/tests/coordinator/duty/ITAutoCompactionTest.java index 8abac846ff83..6371482a765c 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/coordinator/duty/ITAutoCompactionTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/coordinator/duty/ITAutoCompactionTest.java @@ -1523,10 +1523,6 @@ private void loadData(String indexTask) throws Exception private void loadData(String indexTask, Map specs) throws Exception { - LOG.info("Number of segments present before loading data for datasource %s - %s", - fullDatasourceName, coordinator.getSegments(fullDatasourceName)); - LOG.info("Old segments metadata for datasource %s - %s", - fullDatasourceName, coordinator.getFullSegmentsMetadata(fullDatasourceName)); String taskSpec = getResourceAsString(indexTask); taskSpec = StringUtils.replace(taskSpec, "%%DATASOURCE%%", fullDatasourceName); taskSpec = StringUtils.replace( diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITCompactionTaskTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITCompactionTaskTest.java index 957c8a5522c4..ba77cb0a3505 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITCompactionTaskTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITCompactionTaskTest.java @@ -299,6 +299,8 @@ private void checkNumberOfSegments(int numExpectedSegments) () -> { int metadataSegmentCount = coordinator.getSegments(fullDatasourceName).size(); LOG.info("Current metadata segment count: %d, expected: %d", metadataSegmentCount, numExpectedSegments); + LOG.info("Segments metadata for datasource %s - %s", + fullDatasourceName, coordinator.getFullSegmentsMetadata(fullDatasourceName)); return metadataSegmentCount == numExpectedSegments; }, "Segment count check" diff --git a/integration-tests/src/test/resources/indexer/wikipedia_index_task_with_dimension_spec.json b/integration-tests/src/test/resources/indexer/wikipedia_index_task_with_dimension_spec.json index ac63d7f892c5..acde3cc54368 100644 --- a/integration-tests/src/test/resources/indexer/wikipedia_index_task_with_dimension_spec.json +++ b/integration-tests/src/test/resources/indexer/wikipedia_index_task_with_dimension_spec.json @@ -4,7 +4,8 @@ "dataSchema": { "dataSource": "%%DATASOURCE%%", "timestampSpec": { - "column": "timestamp" + "column": "timestamp", + "format": "auto" }, "dimensionsSpec": { "dimensions": [ @@ -73,7 +74,8 @@ }, "inputFormat": { "type" : "json" - } + }, + "appendToExisting": false }, "tuningConfig": { "type": "index", diff --git a/integration-tests/src/test/resources/indexer/wikipedia_with_timestamp_index_task.json b/integration-tests/src/test/resources/indexer/wikipedia_with_timestamp_index_task.json index 28a038bfcebb..6b64ee2f8211 100644 --- a/integration-tests/src/test/resources/indexer/wikipedia_with_timestamp_index_task.json +++ b/integration-tests/src/test/resources/indexer/wikipedia_with_timestamp_index_task.json @@ -4,7 +4,8 @@ "dataSchema": { "dataSource": "%%DATASOURCE%%", "timestampSpec": { - "column": "timestamp" + "column": "timestamp", + "format": "auto" }, "dimensionsSpec": { "dimensions": [ @@ -74,7 +75,8 @@ }, "inputFormat": { "type" : "json" - } + }, + "appendToExisting": false }, "tuningConfig": { "type": "index", From ea9eff0e0f9374e84c658d043e0f03fc6e325d49 Mon Sep 17 00:00:00 2001 From: "tejaswini.bandlamudi" Date: Fri, 24 Feb 2023 03:18:55 +0530 Subject: [PATCH 22/33] fix BatchIndex IT kill task lock revoked failure --- .../testsEx/indexer/AbstractITBatchIndexTest.java | 3 +++ .../druid/testsEx/indexer/AbstractIndexerTest.java | 13 +++++-------- .../coordinator/duty/ITAutoCompactionTest.java | 9 ++++----- .../tests/indexer/AbstractITBatchIndexTest.java | 3 +++ .../druid/tests/indexer/AbstractIndexerTest.java | 13 +++++-------- .../druid/tests/indexer/ITCompactionTaskTest.java | 10 +++++----- 6 files changed, 25 insertions(+), 26 deletions(-) diff --git a/integration-tests-ex/cases/src/test/java/org/apache/druid/testsEx/indexer/AbstractITBatchIndexTest.java b/integration-tests-ex/cases/src/test/java/org/apache/druid/testsEx/indexer/AbstractITBatchIndexTest.java index 2edb13a04999..fb02d8f8dad6 100644 --- a/integration-tests-ex/cases/src/test/java/org/apache/druid/testsEx/indexer/AbstractITBatchIndexTest.java +++ b/integration-tests-ex/cases/src/test/java/org/apache/druid/testsEx/indexer/AbstractITBatchIndexTest.java @@ -456,6 +456,9 @@ protected void submitTaskAndWait( Pair segmentAvailabilityConfirmationPair ) { + // Wait for any existing kill tasks to complete before submitting new index task otherwise + // kill tasks can fail with interval lock revoked. + waitForAllTasksToCompleteForDataSource(dataSourceName); final List oldVersions = waitForNewVersion ? coordinator.getAvailableSegments(dataSourceName) : null; long startSubTaskCount = -1; diff --git a/integration-tests-ex/cases/src/test/java/org/apache/druid/testsEx/indexer/AbstractIndexerTest.java b/integration-tests-ex/cases/src/test/java/org/apache/druid/testsEx/indexer/AbstractIndexerTest.java index 5e53330e705b..9032c7f4a604 100644 --- a/integration-tests-ex/cases/src/test/java/org/apache/druid/testsEx/indexer/AbstractIndexerTest.java +++ b/integration-tests-ex/cases/src/test/java/org/apache/druid/testsEx/indexer/AbstractIndexerTest.java @@ -103,6 +103,9 @@ protected void unloadAndKillData(final String dataSource) protected String submitIndexTask(String indexTask, final String fullDatasourceName) throws Exception { + // Wait for any existing kill tasks to complete before submitting new index task otherwise + // kill tasks can fail with interval lock revoked. + waitForAllTasksToCompleteForDataSource(fullDatasourceName); String taskSpec = getResourceAsString(indexTask); taskSpec = StringUtils.replace(taskSpec, "%%DATASOURCE%%", fullDatasourceName); taskSpec = StringUtils.replace( @@ -135,14 +138,8 @@ private void unloadAndKillData(final String dataSource, String start, String end Interval interval = Intervals.of(start + "/" + end); coordinator.unloadSegmentsForDataSource(dataSource); ITRetryUtil.retryUntilFalse( - new Callable() - { - @Override - public Boolean call() - { - return coordinator.areSegmentsLoaded(dataSource); - } - }, "Segment Unloading" + () -> coordinator.areSegmentsLoaded(dataSource), + "Segment Unloading" ); coordinator.deleteSegmentsDataSource(dataSource, interval); waitForAllTasksToCompleteForDataSource(dataSource); diff --git a/integration-tests/src/test/java/org/apache/druid/tests/coordinator/duty/ITAutoCompactionTest.java b/integration-tests/src/test/java/org/apache/druid/tests/coordinator/duty/ITAutoCompactionTest.java index 6371482a765c..b72eda5ec430 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/coordinator/duty/ITAutoCompactionTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/coordinator/duty/ITAutoCompactionTest.java @@ -1697,11 +1697,10 @@ private void verifySegmentsCount(int numExpectedSegments) { ITRetryUtil.retryUntilTrue( () -> { - int metadataSegmentCount = coordinator.getSegments(fullDatasourceName).size(); - LOG.info("Current metadata segment count: %d, expected: %d", metadataSegmentCount, numExpectedSegments); - LOG.info("Segments metadata for datasource %s - %s", - fullDatasourceName, coordinator.getFullSegmentsMetadata(fullDatasourceName)); - return metadataSegmentCount == numExpectedSegments; + List metadataSegments = coordinator.getFullSegmentsMetadata(fullDatasourceName); + LOG.info("Current metadata segment count: %d, expected: %d", metadataSegments.size(), numExpectedSegments); + LOG.info("Segments metadata - %s", metadataSegments.stream().map(x->x.getId() + " - " + x.getSize())); + return metadataSegments.size() == numExpectedSegments; }, "Compaction segment count check" ); diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractITBatchIndexTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractITBatchIndexTest.java index 8bcfaa0df134..0f160580db51 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractITBatchIndexTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractITBatchIndexTest.java @@ -340,6 +340,9 @@ protected void submitTaskAndWait( Pair segmentAvailabilityConfirmationPair ) { + // Wait for any existing kill tasks to complete before submitting new index task otherwise + // kill tasks can fail with interval lock revoked. + waitForAllTasksToCompleteForDataSource(dataSourceName); final List oldVersions = waitForNewVersion ? coordinator.getAvailableSegments(dataSourceName) : null; long startSubTaskCount = -1; diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractIndexerTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractIndexerTest.java index 1bb6e8e12c8d..d613534bb511 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractIndexerTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractIndexerTest.java @@ -110,6 +110,9 @@ protected void unloadAndKillData(final String dataSource) protected String submitIndexTask(String indexTask, final String fullDatasourceName) throws Exception { + // Wait for any existing kill tasks to complete before submitting new index task otherwise + // kill tasks can fail with interval lock revoked. + waitForAllTasksToCompleteForDataSource(fullDatasourceName); String taskSpec = getResourceAsString(indexTask); taskSpec = StringUtils.replace(taskSpec, "%%DATASOURCE%%", fullDatasourceName); taskSpec = StringUtils.replace( @@ -142,14 +145,8 @@ private void unloadAndKillData(final String dataSource, String start, String end Interval interval = Intervals.of(start + "/" + end); coordinator.unloadSegmentsForDataSource(dataSource); ITRetryUtil.retryUntilFalse( - new Callable() - { - @Override - public Boolean call() - { - return coordinator.areSegmentsLoaded(dataSource); - } - }, "Segment Unloading" + () -> coordinator.areSegmentsLoaded(dataSource), + "Segment Unloading" ); coordinator.deleteSegmentsDataSource(dataSource, interval); waitForAllTasksToCompleteForDataSource(dataSource); diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITCompactionTaskTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITCompactionTaskTest.java index ba77cb0a3505..6e832dc1e3bc 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITCompactionTaskTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITCompactionTaskTest.java @@ -29,6 +29,7 @@ import org.apache.druid.testing.guice.DruidTestModuleFactory; import org.apache.druid.testing.utils.ITRetryUtil; import org.apache.druid.tests.TestNGGroup; +import org.apache.druid.timeline.DataSegment; import org.joda.time.Interval; import org.joda.time.chrono.ISOChronology; import org.testng.annotations.BeforeMethod; @@ -297,11 +298,10 @@ private void checkNumberOfSegments(int numExpectedSegments) { ITRetryUtil.retryUntilTrue( () -> { - int metadataSegmentCount = coordinator.getSegments(fullDatasourceName).size(); - LOG.info("Current metadata segment count: %d, expected: %d", metadataSegmentCount, numExpectedSegments); - LOG.info("Segments metadata for datasource %s - %s", - fullDatasourceName, coordinator.getFullSegmentsMetadata(fullDatasourceName)); - return metadataSegmentCount == numExpectedSegments; + List metadataSegments = coordinator.getFullSegmentsMetadata(fullDatasourceName); + LOG.info("Current metadata segment count: %d, expected: %d", metadataSegments.size(), numExpectedSegments); + LOG.info("Segments metadata - %s", metadataSegments.stream().map(x->x.getId() + " - " + x.getSize())); + return metadataSegments.size() == numExpectedSegments; }, "Segment count check" ); From 71b2c41e26d172912d4c1133fb322f9fcd972804 Mon Sep 17 00:00:00 2001 From: "tejaswini.bandlamudi" Date: Fri, 24 Feb 2023 10:54:59 +0530 Subject: [PATCH 23/33] use files instead of filter, test query granularity --- .../tests/coordinator/duty/ITAutoCompactionTest.java | 4 +++- .../druid/tests/indexer/ITCompactionTaskTest.java | 4 +++- .../wikipedia_index_task_with_dimension_spec.json | 11 +++++++---- .../indexer/wikipedia_with_timestamp_index_task.json | 7 +++++-- 4 files changed, 18 insertions(+), 8 deletions(-) diff --git a/integration-tests/src/test/java/org/apache/druid/tests/coordinator/duty/ITAutoCompactionTest.java b/integration-tests/src/test/java/org/apache/druid/tests/coordinator/duty/ITAutoCompactionTest.java index b72eda5ec430..55236fc22f31 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/coordinator/duty/ITAutoCompactionTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/coordinator/duty/ITAutoCompactionTest.java @@ -89,6 +89,7 @@ import java.util.Map; import java.util.Set; import java.util.UUID; +import java.util.stream.Collectors; @Test(groups = {TestNGGroup.COMPACTION}) @Guice(moduleFactory = DruidTestModuleFactory.class) @@ -1699,7 +1700,8 @@ private void verifySegmentsCount(int numExpectedSegments) () -> { List metadataSegments = coordinator.getFullSegmentsMetadata(fullDatasourceName); LOG.info("Current metadata segment count: %d, expected: %d", metadataSegments.size(), numExpectedSegments); - LOG.info("Segments metadata - %s", metadataSegments.stream().map(x->x.getId() + " - " + x.getSize())); + LOG.info("Segments metadata - %s", metadataSegments.stream().map(x->x.getId() + " - " + x.getSize()).collect( + Collectors.toList())); return metadataSegments.size() == numExpectedSegments; }, "Compaction segment count check" diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITCompactionTaskTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITCompactionTaskTest.java index 6e832dc1e3bc..172d458479d5 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITCompactionTaskTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITCompactionTaskTest.java @@ -47,6 +47,7 @@ import java.util.List; import java.util.Map; import java.util.Set; +import java.util.stream.Collectors; @Test(groups = {TestNGGroup.COMPACTION, TestNGGroup.QUICKSTART_COMPATIBLE}) @Guice(moduleFactory = DruidTestModuleFactory.class) @@ -300,7 +301,8 @@ private void checkNumberOfSegments(int numExpectedSegments) () -> { List metadataSegments = coordinator.getFullSegmentsMetadata(fullDatasourceName); LOG.info("Current metadata segment count: %d, expected: %d", metadataSegments.size(), numExpectedSegments); - LOG.info("Segments metadata - %s", metadataSegments.stream().map(x->x.getId() + " - " + x.getSize())); + LOG.info("Segments metadata - %s", metadataSegments.stream().map(x->x.getId() + " - " + x.getSize()).collect( + Collectors.toList())); return metadataSegments.size() == numExpectedSegments; }, "Segment count check" diff --git a/integration-tests/src/test/resources/indexer/wikipedia_index_task_with_dimension_spec.json b/integration-tests/src/test/resources/indexer/wikipedia_index_task_with_dimension_spec.json index acde3cc54368..a34cd31f939e 100644 --- a/integration-tests/src/test/resources/indexer/wikipedia_index_task_with_dimension_spec.json +++ b/integration-tests/src/test/resources/indexer/wikipedia_index_task_with_dimension_spec.json @@ -61,7 +61,7 @@ ], "granularitySpec": { "segmentGranularity": "DAY", - "queryGranularity": "DAY", + "queryGranularity": "second", "intervals" : [ "2013-08-31/2013-09-02" ] } }, @@ -69,11 +69,14 @@ "type": "index", "inputSource": { "type": "local", - "baseDir": "/resources/data/batch_index/json", - "filter": "wikipedia_index_data*" + "files": [ + "/resources/data/batch_index/json/wikipedia_index_data1.json", + "/resources/data/batch_index/json/wikipedia_index_data2.json", + "/resources/data/batch_index/json/wikipedia_index_data3.json" + ] }, "inputFormat": { - "type" : "json" + "type": "json" }, "appendToExisting": false }, diff --git a/integration-tests/src/test/resources/indexer/wikipedia_with_timestamp_index_task.json b/integration-tests/src/test/resources/indexer/wikipedia_with_timestamp_index_task.json index 6b64ee2f8211..9957d3934762 100644 --- a/integration-tests/src/test/resources/indexer/wikipedia_with_timestamp_index_task.json +++ b/integration-tests/src/test/resources/indexer/wikipedia_with_timestamp_index_task.json @@ -70,8 +70,11 @@ "type": "index", "inputSource": { "type": "local", - "baseDir": "/resources/data/batch_index/json", - "filter": "wikipedia_index_data*" + "files": [ + "/resources/data/batch_index/json/wikipedia_index_data1.json", + "/resources/data/batch_index/json/wikipedia_index_data2.json", + "/resources/data/batch_index/json/wikipedia_index_data3.json" + ] }, "inputFormat": { "type" : "json" From 19ad33b5efd24d5856122b1265edc3de5f58421a Mon Sep 17 00:00:00 2001 From: "tejaswini.bandlamudi" Date: Sat, 25 Feb 2023 03:06:56 +0530 Subject: [PATCH 24/33] debug compaction failures --- .github/workflows/standard-its.yml | 228 +++++++++--------- .../unit-and-integration-tests-unified.yml | 56 ++--- .../duty/ITAutoCompactionTest.java | 27 ++- ...ipedia_index_task_with_dimension_spec.json | 2 +- 4 files changed, 167 insertions(+), 146 deletions(-) diff --git a/.github/workflows/standard-its.yml b/.github/workflows/standard-its.yml index 9909ce012fe2..7f3797f0992c 100644 --- a/.github/workflows/standard-its.yml +++ b/.github/workflows/standard-its.yml @@ -24,7 +24,7 @@ jobs: fail-fast: false matrix: jdk: [8, 11] - testing_group: [batch-index, input-format, input-source, perfect-rollup-parallel-batch-index, kafka-index, kafka-index-slow, kafka-transactional-index, kafka-transactional-index-slow, kafka-data-format, ldap-security, realtime-index, append-ingestion, compaction] + testing_group: [compaction] uses: ./.github/workflows/reusable-standard-its.yml with: build_jdk: 8 @@ -38,7 +38,7 @@ jobs: fail-fast: false matrix: jdk: [8, 11] - testing_group: [input-source, perfect-rollup-parallel-batch-index, kafka-index, kafka-transactional-index, kafka-index-slow, kafka-transactional-index-slow, kafka-data-format, append-ingestion, compaction] + testing_group: [compaction] uses: ./.github/workflows/reusable-standard-its.yml with: build_jdk: 8 @@ -47,115 +47,115 @@ jobs: use_indexer: indexer group: ${{ matrix.testing_group }} - integration-query-tests-middleManager: - strategy: - fail-fast: false - matrix: - jdk: [8, 11] - testing_group: [query, query-retry, query-error, security, high-availability] - uses: ./.github/workflows/reusable-standard-its.yml - with: - build_jdk: 8 - runtime_jdk: ${{ matrix.jdk }} - testing_groups: -Dgroups=${{ matrix.testing_group }} - use_indexer: middleManager - override_config_path: ./environment-configs/test-groups/prepopulated-data - group: ${{ matrix.testing_group }} - - integration-query-tests-middleManager-mariaDB: - strategy: - fail-fast: false - matrix: - jdk: [8, 11] - uses: ./.github/workflows/reusable-standard-its.yml - with: - build_jdk: 8 - runtime_jdk: ${{ matrix.jdk }} - testing_groups: -Dgroups=query - use_indexer: middleManager - mysql_driver: org.mariadb.jdbc.Driver - override_config_path: ./environment-configs/test-groups/prepopulated-data - group: query - - integration-shuffle-deep-store-tests: - strategy: - fail-fast: false - matrix: - indexer: [indexer, middleManager] - uses: ./.github/workflows/reusable-standard-its.yml - with: - build_jdk: 8 - runtime_jdk: 8 - testing_groups: -Dgroups=shuffle-deep-store - use_indexer: ${{ matrix.indexer }} - override_config_path: ./environment-configs/test-groups/shuffle-deep-store - group: shuffle deep store - - integration-custom-coordinator-duties-tests: - uses: ./.github/workflows/reusable-standard-its.yml - with: - build_jdk: 8 - runtime_jdk: 8 - testing_groups: -Dgroups=custom-coordinator-duties - use_indexer: middleManager - override_config_path: ./environment-configs/test-groups/custom-coordinator-duties - group: custom coordinator duties - - integration-k8s-leadership-tests: - name: (Compile=openjdk8, Run=openjdk8, Cluster Build On K8s) ITNestedQueryPushDownTest integration test - runs-on: ubuntu-22.04 - env: - MVN: mvn --no-snapshot-updates - MAVEN_SKIP: -P skip-static-checks -Dweb.console.skip=true -Dmaven.javadoc.skip=true - CONFIG_FILE: k8s_run_config_file.json - IT_TEST: -Dit.test=ITNestedQueryPushDownTest - POD_NAME: int-test - POD_NAMESPACE: default - BUILD_DRUID_CLUSTER: true - steps: - - name: Checkout branch - uses: actions/checkout@v3 - - - name: Setup java - run: export JAVA_HOME=$JAVA_HOME_8_X64 - - - name: Restore Maven repository - id: maven-restore - uses: actions/cache/restore@v3 - with: - path: ~/.m2/repository - key: maven-${{ runner.os }}-8-${{ github.sha }} - - - name: Maven build - if: steps.maven-restore.outputs.cache-hit != 'true' - run: | - ./it.sh ci - - - name: Run IT - id: test - run: | - # Debug echo - echo "MAVEN_OPTS='-Xmx2048m' ${MVN} verify -pl integration-tests -P int-tests-config-file ${IT_TEST} ${MAVEN_SKIP} -Dpod.name=${POD_NAME} -Dpod.namespace=${POD_NAMESPACE} -Dbuild.druid.cluster=${BUILD_DRUID_CLUSTER}" - MAVEN_OPTS='-Xmx2048m' ${MVN} verify -pl integration-tests -P int-tests-config-file ${IT_TEST} ${MAVEN_SKIP} -Dpod.name=${POD_NAME} -Dpod.namespace=${POD_NAMESPACE} -Dbuild.druid.cluster=${BUILD_DRUID_CLUSTER} - - - name: Debug on failure - if: ${{ failure() && steps.test.conclusion == 'failure' }} - run: | - for v in broker middlemanager router coordinator historical ; do - echo "------------------------druid-tiny-cluster-"$v"s-0-------------------------"; - sudo /usr/local/bin/kubectl logs --tail 1000 druid-tiny-cluster-"$v"s-0; - done - - integration-other-tests: - strategy: - fail-fast: false - matrix: - jdk: [8, 11] - indexer: [middleManager, indexer] - uses: ./.github/workflows/reusable-standard-its.yml - with: - build_jdk: 8 - runtime_jdk: ${{ matrix.jdk }} - testing_groups: -DexcludedGroups=batch-index,input-format,input-source,perfect-rollup-parallel-batch-index,kafka-index,query,query-retry,query-error,realtime-index,security,ldap-security,s3-deep-storage,gcs-deep-storage,azure-deep-storage,hdfs-deep-storage,s3-ingestion,kinesis-index,kinesis-data-format,kafka-transactional-index,kafka-index-slow,kafka-transactional-index-slow,kafka-data-format,hadoop-s3-to-s3-deep-storage,hadoop-s3-to-hdfs-deep-storage,hadoop-azure-to-azure-deep-storage,hadoop-azure-to-hdfs-deep-storage,hadoop-gcs-to-gcs-deep-storage,hadoop-gcs-to-hdfs-deep-storage,aliyun-oss-deep-storage,append-ingestion,compaction,high-availability,upgrade,shuffle-deep-store,custom-coordinator-duties - use_indexer: ${{ matrix.indexer }} - group: other +# integration-query-tests-middleManager: +# strategy: +# fail-fast: false +# matrix: +# jdk: [8, 11] +# testing_group: [query, query-retry, query-error, security, high-availability] +# uses: ./.github/workflows/reusable-standard-its.yml +# with: +# build_jdk: 8 +# runtime_jdk: ${{ matrix.jdk }} +# testing_groups: -Dgroups=${{ matrix.testing_group }} +# use_indexer: middleManager +# override_config_path: ./environment-configs/test-groups/prepopulated-data +# group: ${{ matrix.testing_group }} +# +# integration-query-tests-middleManager-mariaDB: +# strategy: +# fail-fast: false +# matrix: +# jdk: [8, 11] +# uses: ./.github/workflows/reusable-standard-its.yml +# with: +# build_jdk: 8 +# runtime_jdk: ${{ matrix.jdk }} +# testing_groups: -Dgroups=query +# use_indexer: middleManager +# mysql_driver: org.mariadb.jdbc.Driver +# override_config_path: ./environment-configs/test-groups/prepopulated-data +# group: query +# +# integration-shuffle-deep-store-tests: +# strategy: +# fail-fast: false +# matrix: +# indexer: [indexer, middleManager] +# uses: ./.github/workflows/reusable-standard-its.yml +# with: +# build_jdk: 8 +# runtime_jdk: 8 +# testing_groups: -Dgroups=shuffle-deep-store +# use_indexer: ${{ matrix.indexer }} +# override_config_path: ./environment-configs/test-groups/shuffle-deep-store +# group: shuffle deep store +# +# integration-custom-coordinator-duties-tests: +# uses: ./.github/workflows/reusable-standard-its.yml +# with: +# build_jdk: 8 +# runtime_jdk: 8 +# testing_groups: -Dgroups=custom-coordinator-duties +# use_indexer: middleManager +# override_config_path: ./environment-configs/test-groups/custom-coordinator-duties +# group: custom coordinator duties +# +# integration-k8s-leadership-tests: +# name: (Compile=openjdk8, Run=openjdk8, Cluster Build On K8s) ITNestedQueryPushDownTest integration test +# runs-on: ubuntu-22.04 +# env: +# MVN: mvn --no-snapshot-updates +# MAVEN_SKIP: -P skip-static-checks -Dweb.console.skip=true -Dmaven.javadoc.skip=true +# CONFIG_FILE: k8s_run_config_file.json +# IT_TEST: -Dit.test=ITNestedQueryPushDownTest +# POD_NAME: int-test +# POD_NAMESPACE: default +# BUILD_DRUID_CLUSTER: true +# steps: +# - name: Checkout branch +# uses: actions/checkout@v3 +# +# - name: Setup java +# run: export JAVA_HOME=$JAVA_HOME_8_X64 +# +# - name: Restore Maven repository +# id: maven-restore +# uses: actions/cache/restore@v3 +# with: +# path: ~/.m2/repository +# key: maven-${{ runner.os }}-8-${{ github.sha }} +# +# - name: Maven build +# if: steps.maven-restore.outputs.cache-hit != 'true' +# run: | +# ./it.sh ci +# +# - name: Run IT +# id: test +# run: | +# # Debug echo +# echo "MAVEN_OPTS='-Xmx2048m' ${MVN} verify -pl integration-tests -P int-tests-config-file ${IT_TEST} ${MAVEN_SKIP} -Dpod.name=${POD_NAME} -Dpod.namespace=${POD_NAMESPACE} -Dbuild.druid.cluster=${BUILD_DRUID_CLUSTER}" +# MAVEN_OPTS='-Xmx2048m' ${MVN} verify -pl integration-tests -P int-tests-config-file ${IT_TEST} ${MAVEN_SKIP} -Dpod.name=${POD_NAME} -Dpod.namespace=${POD_NAMESPACE} -Dbuild.druid.cluster=${BUILD_DRUID_CLUSTER} +# +# - name: Debug on failure +# if: ${{ failure() && steps.test.conclusion == 'failure' }} +# run: | +# for v in broker middlemanager router coordinator historical ; do +# echo "------------------------druid-tiny-cluster-"$v"s-0-------------------------"; +# sudo /usr/local/bin/kubectl logs --tail 1000 druid-tiny-cluster-"$v"s-0; +# done +# +# integration-other-tests: +# strategy: +# fail-fast: false +# matrix: +# jdk: [8, 11] +# indexer: [middleManager, indexer] +# uses: ./.github/workflows/reusable-standard-its.yml +# with: +# build_jdk: 8 +# runtime_jdk: ${{ matrix.jdk }} +# testing_groups: -DexcludedGroups=batch-index,input-format,input-source,perfect-rollup-parallel-batch-index,kafka-index,query,query-retry,query-error,realtime-index,security,ldap-security,s3-deep-storage,gcs-deep-storage,azure-deep-storage,hdfs-deep-storage,s3-ingestion,kinesis-index,kinesis-data-format,kafka-transactional-index,kafka-index-slow,kafka-transactional-index-slow,kafka-data-format,hadoop-s3-to-s3-deep-storage,hadoop-s3-to-hdfs-deep-storage,hadoop-azure-to-azure-deep-storage,hadoop-azure-to-hdfs-deep-storage,hadoop-gcs-to-gcs-deep-storage,hadoop-gcs-to-hdfs-deep-storage,aliyun-oss-deep-storage,append-ingestion,compaction,high-availability,upgrade,shuffle-deep-store,custom-coordinator-duties +# use_indexer: ${{ matrix.indexer }} +# group: other diff --git a/.github/workflows/unit-and-integration-tests-unified.yml b/.github/workflows/unit-and-integration-tests-unified.yml index 2555aa48a935..1e1f0e45fc6c 100644 --- a/.github/workflows/unit-and-integration-tests-unified.yml +++ b/.github/workflows/unit-and-integration-tests-unified.yml @@ -106,35 +106,35 @@ jobs: echo $DRUID_IT_IMAGE_NAME docker save "$DRUID_IT_IMAGE_NAME" | gzip > druid-container-jdk${{ matrix.jdk }}.tar.gz - unit-tests: - strategy: - fail-fast: false - matrix: - sql_compatibility: [ false, true ] - name: unit tests (jdk8, sql-compat=${{ matrix.sql_compatibility }}) - uses: ./.github/workflows/unit-tests.yml - needs: build - with: - jdk: 8 - sql_compatibility: ${{ matrix.sql_compatibility }} - - unit-tests-phase2: - strategy: - fail-fast: false - matrix: - jdk: [11, 17] - sql_compatibility: [ false, true ] - name: unit tests (jdk${{ matrix.jdk }}, sql-compat=${{ matrix.sql_compatibility }}) - uses: ./.github/workflows/unit-tests.yml - needs: unit-tests - with: - jdk: ${{ matrix.jdk }} - sql_compatibility: ${{ matrix.sql_compatibility }} +# unit-tests: +# strategy: +# fail-fast: false +# matrix: +# sql_compatibility: [ false, true ] +# name: unit tests (jdk8, sql-compat=${{ matrix.sql_compatibility }}) +# uses: ./.github/workflows/unit-tests.yml +# needs: build +# with: +# jdk: 8 +# sql_compatibility: ${{ matrix.sql_compatibility }} +# +# unit-tests-phase2: +# strategy: +# fail-fast: false +# matrix: +# jdk: [11, 17] +# sql_compatibility: [ false, true ] +# name: unit tests (jdk${{ matrix.jdk }}, sql-compat=${{ matrix.sql_compatibility }}) +# uses: ./.github/workflows/unit-tests.yml +# needs: unit-tests +# with: +# jdk: ${{ matrix.jdk }} +# sql_compatibility: ${{ matrix.sql_compatibility }} standard-its: - needs: unit-tests + needs: build uses: ./.github/workflows/standard-its.yml - revised-its: - needs: unit-tests - uses: ./.github/workflows/revised-its.yml +# revised-its: +# needs: unit-tests +# uses: ./.github/workflows/revised-its.yml diff --git a/integration-tests/src/test/java/org/apache/druid/tests/coordinator/duty/ITAutoCompactionTest.java b/integration-tests/src/test/java/org/apache/druid/tests/coordinator/duty/ITAutoCompactionTest.java index 55236fc22f31..898af7c6642e 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/coordinator/duty/ITAutoCompactionTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/coordinator/duty/ITAutoCompactionTest.java @@ -1298,7 +1298,7 @@ public void testAutoCompactionDutyWithDimensionsSpec() throws Exception { // Index data with dimensions "page", "language", "user", "unpatrolled", "newPage", "robot", "anonymous", // "namespace", "continent", "country", "region", "city" - loadData(INDEX_TASK_WITH_DIMENSION_SPEC); + loadDataWithReportLog(INDEX_TASK_WITH_DIMENSION_SPEC); try (final Closeable ignored = unloader(fullDatasourceName)) { final List intervalsBeforeCompaction = coordinator.getSegmentIntervals(fullDatasourceName); intervalsBeforeCompaction.sort(null); @@ -1345,7 +1345,7 @@ public void testAutoCompactionDutyWithDimensionsSpec() throws Exception @Test public void testAutoCompactionDutyWithFilter() throws Exception { - loadData(INDEX_TASK_WITH_DIMENSION_SPEC); + loadDataWithReportLog(INDEX_TASK_WITH_DIMENSION_SPEC); try (final Closeable ignored = unloader(fullDatasourceName)) { final List intervalsBeforeCompaction = coordinator.getSegmentIntervals(fullDatasourceName); intervalsBeforeCompaction.sort(null); @@ -1393,7 +1393,7 @@ public void testAutoCompactionDutyWithFilter() throws Exception @Test public void testAutoCompactionDutyWithMetricsSpec() throws Exception { - loadData(INDEX_TASK_WITH_DIMENSION_SPEC); + loadDataWithReportLog(INDEX_TASK_WITH_DIMENSION_SPEC); try (final Closeable ignored = unloader(fullDatasourceName)) { final List intervalsBeforeCompaction = coordinator.getSegmentIntervals(fullDatasourceName); intervalsBeforeCompaction.sort(null); @@ -1548,6 +1548,27 @@ private void loadData(String indexTask, Map specs) throws Except ); } + private void loadDataWithReportLog(String indexTask) throws Exception + { + String taskSpec = getResourceAsString(indexTask); + taskSpec = StringUtils.replace(taskSpec, "%%DATASOURCE%%", fullDatasourceName); + taskSpec = StringUtils.replace( + taskSpec, + "%%SEGMENT_AVAIL_TIMEOUT_MILLIS%%", + jsonMapper.writeValueAsString("0") + ); + final String taskID = indexer.submitTask(taskSpec); + LOG.info("TaskID for loading index task %s", taskID); + indexer.waitUntilTaskCompletes(taskID); + LOG.info("Task log for %s", taskID); + LOG.info(indexer.getTaskLog(taskID)); + + ITRetryUtil.retryUntilTrue( + () -> coordinator.areSegmentsLoaded(fullDatasourceName), + "Segment Load" + ); + } + private void verifyQuery(String queryResource) throws Exception { verifyQuery(queryResource, ImmutableMap.of()); diff --git a/integration-tests/src/test/resources/indexer/wikipedia_index_task_with_dimension_spec.json b/integration-tests/src/test/resources/indexer/wikipedia_index_task_with_dimension_spec.json index a34cd31f939e..35b7e61daae8 100644 --- a/integration-tests/src/test/resources/indexer/wikipedia_index_task_with_dimension_spec.json +++ b/integration-tests/src/test/resources/indexer/wikipedia_index_task_with_dimension_spec.json @@ -61,7 +61,7 @@ ], "granularitySpec": { "segmentGranularity": "DAY", - "queryGranularity": "second", + "queryGranularity": "DAY", "intervals" : [ "2013-08-31/2013-09-02" ] } }, From 020076720d7fc806cb8b29b59811d13b8bfa1fc7 Mon Sep 17 00:00:00 2001 From: "tejaswini.bandlamudi" Date: Sat, 25 Feb 2023 15:16:05 +0530 Subject: [PATCH 25/33] debug --- .../druid/tests/coordinator/duty/ITAutoCompactionTest.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/integration-tests/src/test/java/org/apache/druid/tests/coordinator/duty/ITAutoCompactionTest.java b/integration-tests/src/test/java/org/apache/druid/tests/coordinator/duty/ITAutoCompactionTest.java index 898af7c6642e..f85feaa7d0c9 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/coordinator/duty/ITAutoCompactionTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/coordinator/duty/ITAutoCompactionTest.java @@ -1561,7 +1561,11 @@ private void loadDataWithReportLog(String indexTask) throws Exception LOG.info("TaskID for loading index task %s", taskID); indexer.waitUntilTaskCompletes(taskID); LOG.info("Task log for %s", taskID); - LOG.info(indexer.getTaskLog(taskID)); + try { + LOG.info(indexer.getTaskReport(taskID).toString()); + } catch (Exception e) { + LOG.info("Task reports not found"); + } ITRetryUtil.retryUntilTrue( () -> coordinator.areSegmentsLoaded(fullDatasourceName), From 4c678f0cfa09023eba02cd2d52f8bce66244a915 Mon Sep 17 00:00:00 2001 From: "tejaswini.bandlamudi" Date: Sat, 25 Feb 2023 17:06:04 +0530 Subject: [PATCH 26/33] revert debug changes --- .../duty/ITAutoCompactionTest.java | 23 +++++++++++-------- ...ipedia_index_task_with_dimension_spec.json | 6 ++--- .../wikipedia_with_timestamp_index_task.json | 6 ++--- 3 files changed, 18 insertions(+), 17 deletions(-) diff --git a/integration-tests/src/test/java/org/apache/druid/tests/coordinator/duty/ITAutoCompactionTest.java b/integration-tests/src/test/java/org/apache/druid/tests/coordinator/duty/ITAutoCompactionTest.java index f85feaa7d0c9..2122dbd0fe5d 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/coordinator/duty/ITAutoCompactionTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/coordinator/duty/ITAutoCompactionTest.java @@ -1298,7 +1298,7 @@ public void testAutoCompactionDutyWithDimensionsSpec() throws Exception { // Index data with dimensions "page", "language", "user", "unpatrolled", "newPage", "robot", "anonymous", // "namespace", "continent", "country", "region", "city" - loadDataWithReportLog(INDEX_TASK_WITH_DIMENSION_SPEC); + loadDataWithReportLog(INDEX_TASK_WITH_DIMENSION_SPEC, ImmutableMap.of()); try (final Closeable ignored = unloader(fullDatasourceName)) { final List intervalsBeforeCompaction = coordinator.getSegmentIntervals(fullDatasourceName); intervalsBeforeCompaction.sort(null); @@ -1345,7 +1345,7 @@ public void testAutoCompactionDutyWithDimensionsSpec() throws Exception @Test public void testAutoCompactionDutyWithFilter() throws Exception { - loadDataWithReportLog(INDEX_TASK_WITH_DIMENSION_SPEC); + loadDataWithReportLog(INDEX_TASK_WITH_DIMENSION_SPEC, ImmutableMap.of()); try (final Closeable ignored = unloader(fullDatasourceName)) { final List intervalsBeforeCompaction = coordinator.getSegmentIntervals(fullDatasourceName); intervalsBeforeCompaction.sort(null); @@ -1393,7 +1393,7 @@ public void testAutoCompactionDutyWithFilter() throws Exception @Test public void testAutoCompactionDutyWithMetricsSpec() throws Exception { - loadDataWithReportLog(INDEX_TASK_WITH_DIMENSION_SPEC); + loadDataWithReportLog(INDEX_TASK_WITH_DIMENSION_SPEC, ImmutableMap.of()); try (final Closeable ignored = unloader(fullDatasourceName)) { final List intervalsBeforeCompaction = coordinator.getSegmentIntervals(fullDatasourceName); intervalsBeforeCompaction.sort(null); @@ -1548,7 +1548,7 @@ private void loadData(String indexTask, Map specs) throws Except ); } - private void loadDataWithReportLog(String indexTask) throws Exception + private void loadDataWithReportLog(String indexTask, Map specs) throws Exception { String taskSpec = getResourceAsString(indexTask); taskSpec = StringUtils.replace(taskSpec, "%%DATASOURCE%%", fullDatasourceName); @@ -1557,6 +1557,13 @@ private void loadDataWithReportLog(String indexTask) throws Exception "%%SEGMENT_AVAIL_TIMEOUT_MILLIS%%", jsonMapper.writeValueAsString("0") ); + for (Map.Entry entry : specs.entrySet()) { + taskSpec = StringUtils.replace( + taskSpec, + entry.getKey(), + jsonMapper.writeValueAsString(entry.getValue()) + ); + } final String taskID = indexer.submitTask(taskSpec); LOG.info("TaskID for loading index task %s", taskID); indexer.waitUntilTaskCompletes(taskID); @@ -1723,11 +1730,9 @@ private void verifySegmentsCount(int numExpectedSegments) { ITRetryUtil.retryUntilTrue( () -> { - List metadataSegments = coordinator.getFullSegmentsMetadata(fullDatasourceName); - LOG.info("Current metadata segment count: %d, expected: %d", metadataSegments.size(), numExpectedSegments); - LOG.info("Segments metadata - %s", metadataSegments.stream().map(x->x.getId() + " - " + x.getSize()).collect( - Collectors.toList())); - return metadataSegments.size() == numExpectedSegments; + int metadataSegmentCount = coordinator.getSegments(fullDatasourceName).size(); + LOG.info("Current metadata segment count: %d, expected: %d", metadataSegmentCount, numExpectedSegments); + return metadataSegmentCount == numExpectedSegments; }, "Compaction segment count check" ); diff --git a/integration-tests/src/test/resources/indexer/wikipedia_index_task_with_dimension_spec.json b/integration-tests/src/test/resources/indexer/wikipedia_index_task_with_dimension_spec.json index 35b7e61daae8..cedf2b3b5185 100644 --- a/integration-tests/src/test/resources/indexer/wikipedia_index_task_with_dimension_spec.json +++ b/integration-tests/src/test/resources/indexer/wikipedia_index_task_with_dimension_spec.json @@ -4,8 +4,7 @@ "dataSchema": { "dataSource": "%%DATASOURCE%%", "timestampSpec": { - "column": "timestamp", - "format": "auto" + "column": "timestamp" }, "dimensionsSpec": { "dimensions": [ @@ -77,8 +76,7 @@ }, "inputFormat": { "type": "json" - }, - "appendToExisting": false + } }, "tuningConfig": { "type": "index", diff --git a/integration-tests/src/test/resources/indexer/wikipedia_with_timestamp_index_task.json b/integration-tests/src/test/resources/indexer/wikipedia_with_timestamp_index_task.json index 9957d3934762..933b96f33c0e 100644 --- a/integration-tests/src/test/resources/indexer/wikipedia_with_timestamp_index_task.json +++ b/integration-tests/src/test/resources/indexer/wikipedia_with_timestamp_index_task.json @@ -4,8 +4,7 @@ "dataSchema": { "dataSource": "%%DATASOURCE%%", "timestampSpec": { - "column": "timestamp", - "format": "auto" + "column": "timestamp" }, "dimensionsSpec": { "dimensions": [ @@ -78,8 +77,7 @@ }, "inputFormat": { "type" : "json" - }, - "appendToExisting": false + } }, "tuningConfig": { "type": "index", From 8fda4879f3357643618c5354cb924f2578f4fd13 Mon Sep 17 00:00:00 2001 From: "tejaswini.bandlamudi" Date: Sat, 25 Feb 2023 19:01:15 +0530 Subject: [PATCH 27/33] revert debug changes --- .../indexer/wikipedia_index_task_with_dimension_spec.json | 6 ++---- .../indexer/wikipedia_with_timestamp_index_task.json | 6 ++---- .../tests/coordinator/duty/ITAutoCompactionTest.java | 6 ------ .../apache/druid/tests/indexer/ITCompactionTaskTest.java | 8 +++----- .../indexer/wikipedia_index_task_with_dimension_spec.json | 7 ++----- .../indexer/wikipedia_with_timestamp_index_task.json | 7 ++----- 6 files changed, 11 insertions(+), 29 deletions(-) diff --git a/integration-tests-ex/cases/src/test/resources/indexer/wikipedia_index_task_with_dimension_spec.json b/integration-tests-ex/cases/src/test/resources/indexer/wikipedia_index_task_with_dimension_spec.json index 1ad3d46885b4..1d6dc6ed3004 100644 --- a/integration-tests-ex/cases/src/test/resources/indexer/wikipedia_index_task_with_dimension_spec.json +++ b/integration-tests-ex/cases/src/test/resources/indexer/wikipedia_index_task_with_dimension_spec.json @@ -4,8 +4,7 @@ "dataSchema": { "dataSource": "%%DATASOURCE%%", "timestampSpec": { - "column": "timestamp", - "format": "auto" + "column": "timestamp" }, "dimensionsSpec": { "dimensions": [ @@ -74,8 +73,7 @@ }, "inputFormat": { "type": "json" - }, - "appendToExisting": false + } }, "tuningConfig": { "type": "index", diff --git a/integration-tests-ex/cases/src/test/resources/indexer/wikipedia_with_timestamp_index_task.json b/integration-tests-ex/cases/src/test/resources/indexer/wikipedia_with_timestamp_index_task.json index 9f573fcb5faf..b3ca26dbce57 100644 --- a/integration-tests-ex/cases/src/test/resources/indexer/wikipedia_with_timestamp_index_task.json +++ b/integration-tests-ex/cases/src/test/resources/indexer/wikipedia_with_timestamp_index_task.json @@ -4,8 +4,7 @@ "dataSchema": { "dataSource": "%%DATASOURCE%%", "timestampSpec": { - "column": "timestamp", - "format": "auto" + "column": "timestamp" }, "dimensionsSpec": { "dimensions": [ @@ -75,8 +74,7 @@ }, "inputFormat": { "type": "json" - }, - "appendToExisting": false + } }, "tuningConfig": { "type": "index", diff --git a/integration-tests/src/test/java/org/apache/druid/tests/coordinator/duty/ITAutoCompactionTest.java b/integration-tests/src/test/java/org/apache/druid/tests/coordinator/duty/ITAutoCompactionTest.java index 2122dbd0fe5d..de583f8d05c6 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/coordinator/duty/ITAutoCompactionTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/coordinator/duty/ITAutoCompactionTest.java @@ -1567,12 +1567,6 @@ private void loadDataWithReportLog(String indexTask, Map specs) final String taskID = indexer.submitTask(taskSpec); LOG.info("TaskID for loading index task %s", taskID); indexer.waitUntilTaskCompletes(taskID); - LOG.info("Task log for %s", taskID); - try { - LOG.info(indexer.getTaskReport(taskID).toString()); - } catch (Exception e) { - LOG.info("Task reports not found"); - } ITRetryUtil.retryUntilTrue( () -> coordinator.areSegmentsLoaded(fullDatasourceName), diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITCompactionTaskTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITCompactionTaskTest.java index 172d458479d5..32abfc9a5f5f 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITCompactionTaskTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITCompactionTaskTest.java @@ -299,11 +299,9 @@ private void checkNumberOfSegments(int numExpectedSegments) { ITRetryUtil.retryUntilTrue( () -> { - List metadataSegments = coordinator.getFullSegmentsMetadata(fullDatasourceName); - LOG.info("Current metadata segment count: %d, expected: %d", metadataSegments.size(), numExpectedSegments); - LOG.info("Segments metadata - %s", metadataSegments.stream().map(x->x.getId() + " - " + x.getSize()).collect( - Collectors.toList())); - return metadataSegments.size() == numExpectedSegments; + int metadataSegmentCount = coordinator.getSegments(fullDatasourceName).size(); + LOG.info("Current metadata segment count: %d, expected: %d", metadataSegmentCount, numExpectedSegments); + return metadataSegmentCount == numExpectedSegments; }, "Segment count check" ); diff --git a/integration-tests/src/test/resources/indexer/wikipedia_index_task_with_dimension_spec.json b/integration-tests/src/test/resources/indexer/wikipedia_index_task_with_dimension_spec.json index cedf2b3b5185..1d6dc6ed3004 100644 --- a/integration-tests/src/test/resources/indexer/wikipedia_index_task_with_dimension_spec.json +++ b/integration-tests/src/test/resources/indexer/wikipedia_index_task_with_dimension_spec.json @@ -68,11 +68,8 @@ "type": "index", "inputSource": { "type": "local", - "files": [ - "/resources/data/batch_index/json/wikipedia_index_data1.json", - "/resources/data/batch_index/json/wikipedia_index_data2.json", - "/resources/data/batch_index/json/wikipedia_index_data3.json" - ] + "baseDir": "/resources/data/batch_index/json", + "filter": "wikipedia_index_data*" }, "inputFormat": { "type": "json" diff --git a/integration-tests/src/test/resources/indexer/wikipedia_with_timestamp_index_task.json b/integration-tests/src/test/resources/indexer/wikipedia_with_timestamp_index_task.json index 933b96f33c0e..28a038bfcebb 100644 --- a/integration-tests/src/test/resources/indexer/wikipedia_with_timestamp_index_task.json +++ b/integration-tests/src/test/resources/indexer/wikipedia_with_timestamp_index_task.json @@ -69,11 +69,8 @@ "type": "index", "inputSource": { "type": "local", - "files": [ - "/resources/data/batch_index/json/wikipedia_index_data1.json", - "/resources/data/batch_index/json/wikipedia_index_data2.json", - "/resources/data/batch_index/json/wikipedia_index_data3.json" - ] + "baseDir": "/resources/data/batch_index/json", + "filter": "wikipedia_index_data*" }, "inputFormat": { "type" : "json" From 265df19eb21bf6adede2a9bce2cf122afc1436f9 Mon Sep 17 00:00:00 2001 From: "tejaswini.bandlamudi" Date: Sat, 25 Feb 2023 19:55:13 +0530 Subject: [PATCH 28/33] debug --- .../apache/druid/tests/indexer/ITCompactionTaskTest.java | 8 +++++--- .../indexer/wikipedia_index_task_with_dimension_spec.json | 7 +++++-- .../indexer/wikipedia_with_timestamp_index_task.json | 7 +++++-- 3 files changed, 15 insertions(+), 7 deletions(-) diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITCompactionTaskTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITCompactionTaskTest.java index 32abfc9a5f5f..8d6bdc45a7a1 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITCompactionTaskTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITCompactionTaskTest.java @@ -299,9 +299,11 @@ private void checkNumberOfSegments(int numExpectedSegments) { ITRetryUtil.retryUntilTrue( () -> { - int metadataSegmentCount = coordinator.getSegments(fullDatasourceName).size(); - LOG.info("Current metadata segment count: %d, expected: %d", metadataSegmentCount, numExpectedSegments); - return metadataSegmentCount == numExpectedSegments; + List metadataSegments = coordinator.getFullSegmentsMetadata(fullDatasourceName); + LOG.info("Current metadata segment count: %d, expected: %d", metadataSegments.size(), numExpectedSegments); + LOG.info("Segments metadata"); + LOG.info(metadataSegments.toString()); + return metadataSegments.size() == numExpectedSegments; }, "Segment count check" ); diff --git a/integration-tests/src/test/resources/indexer/wikipedia_index_task_with_dimension_spec.json b/integration-tests/src/test/resources/indexer/wikipedia_index_task_with_dimension_spec.json index 1d6dc6ed3004..cedf2b3b5185 100644 --- a/integration-tests/src/test/resources/indexer/wikipedia_index_task_with_dimension_spec.json +++ b/integration-tests/src/test/resources/indexer/wikipedia_index_task_with_dimension_spec.json @@ -68,8 +68,11 @@ "type": "index", "inputSource": { "type": "local", - "baseDir": "/resources/data/batch_index/json", - "filter": "wikipedia_index_data*" + "files": [ + "/resources/data/batch_index/json/wikipedia_index_data1.json", + "/resources/data/batch_index/json/wikipedia_index_data2.json", + "/resources/data/batch_index/json/wikipedia_index_data3.json" + ] }, "inputFormat": { "type": "json" diff --git a/integration-tests/src/test/resources/indexer/wikipedia_with_timestamp_index_task.json b/integration-tests/src/test/resources/indexer/wikipedia_with_timestamp_index_task.json index 28a038bfcebb..933b96f33c0e 100644 --- a/integration-tests/src/test/resources/indexer/wikipedia_with_timestamp_index_task.json +++ b/integration-tests/src/test/resources/indexer/wikipedia_with_timestamp_index_task.json @@ -69,8 +69,11 @@ "type": "index", "inputSource": { "type": "local", - "baseDir": "/resources/data/batch_index/json", - "filter": "wikipedia_index_data*" + "files": [ + "/resources/data/batch_index/json/wikipedia_index_data1.json", + "/resources/data/batch_index/json/wikipedia_index_data2.json", + "/resources/data/batch_index/json/wikipedia_index_data3.json" + ] }, "inputFormat": { "type" : "json" From 80ef4c2812dfba748d44b1158af925645b5e3532 Mon Sep 17 00:00:00 2001 From: "tejaswini.bandlamudi" Date: Sat, 25 Feb 2023 22:32:47 +0530 Subject: [PATCH 29/33] cleanup --- .github/workflows/standard-its.yml | 228 +++++++++--------- .../unit-and-integration-tests-unified.yml | 56 ++--- ...ipedia_index_task_with_dimension_spec.json | 13 +- .../wikipedia_with_timestamp_index_task.json | 13 +- .../duty/ITAutoCompactionTest.java | 33 +-- .../tests/indexer/ITCompactionTaskTest.java | 9 +- ...ipedia_index_task_with_dimension_spec.json | 6 +- .../wikipedia_with_timestamp_index_task.json | 8 +- 8 files changed, 175 insertions(+), 191 deletions(-) diff --git a/.github/workflows/standard-its.yml b/.github/workflows/standard-its.yml index 7f3797f0992c..9909ce012fe2 100644 --- a/.github/workflows/standard-its.yml +++ b/.github/workflows/standard-its.yml @@ -24,7 +24,7 @@ jobs: fail-fast: false matrix: jdk: [8, 11] - testing_group: [compaction] + testing_group: [batch-index, input-format, input-source, perfect-rollup-parallel-batch-index, kafka-index, kafka-index-slow, kafka-transactional-index, kafka-transactional-index-slow, kafka-data-format, ldap-security, realtime-index, append-ingestion, compaction] uses: ./.github/workflows/reusable-standard-its.yml with: build_jdk: 8 @@ -38,7 +38,7 @@ jobs: fail-fast: false matrix: jdk: [8, 11] - testing_group: [compaction] + testing_group: [input-source, perfect-rollup-parallel-batch-index, kafka-index, kafka-transactional-index, kafka-index-slow, kafka-transactional-index-slow, kafka-data-format, append-ingestion, compaction] uses: ./.github/workflows/reusable-standard-its.yml with: build_jdk: 8 @@ -47,115 +47,115 @@ jobs: use_indexer: indexer group: ${{ matrix.testing_group }} -# integration-query-tests-middleManager: -# strategy: -# fail-fast: false -# matrix: -# jdk: [8, 11] -# testing_group: [query, query-retry, query-error, security, high-availability] -# uses: ./.github/workflows/reusable-standard-its.yml -# with: -# build_jdk: 8 -# runtime_jdk: ${{ matrix.jdk }} -# testing_groups: -Dgroups=${{ matrix.testing_group }} -# use_indexer: middleManager -# override_config_path: ./environment-configs/test-groups/prepopulated-data -# group: ${{ matrix.testing_group }} -# -# integration-query-tests-middleManager-mariaDB: -# strategy: -# fail-fast: false -# matrix: -# jdk: [8, 11] -# uses: ./.github/workflows/reusable-standard-its.yml -# with: -# build_jdk: 8 -# runtime_jdk: ${{ matrix.jdk }} -# testing_groups: -Dgroups=query -# use_indexer: middleManager -# mysql_driver: org.mariadb.jdbc.Driver -# override_config_path: ./environment-configs/test-groups/prepopulated-data -# group: query -# -# integration-shuffle-deep-store-tests: -# strategy: -# fail-fast: false -# matrix: -# indexer: [indexer, middleManager] -# uses: ./.github/workflows/reusable-standard-its.yml -# with: -# build_jdk: 8 -# runtime_jdk: 8 -# testing_groups: -Dgroups=shuffle-deep-store -# use_indexer: ${{ matrix.indexer }} -# override_config_path: ./environment-configs/test-groups/shuffle-deep-store -# group: shuffle deep store -# -# integration-custom-coordinator-duties-tests: -# uses: ./.github/workflows/reusable-standard-its.yml -# with: -# build_jdk: 8 -# runtime_jdk: 8 -# testing_groups: -Dgroups=custom-coordinator-duties -# use_indexer: middleManager -# override_config_path: ./environment-configs/test-groups/custom-coordinator-duties -# group: custom coordinator duties -# -# integration-k8s-leadership-tests: -# name: (Compile=openjdk8, Run=openjdk8, Cluster Build On K8s) ITNestedQueryPushDownTest integration test -# runs-on: ubuntu-22.04 -# env: -# MVN: mvn --no-snapshot-updates -# MAVEN_SKIP: -P skip-static-checks -Dweb.console.skip=true -Dmaven.javadoc.skip=true -# CONFIG_FILE: k8s_run_config_file.json -# IT_TEST: -Dit.test=ITNestedQueryPushDownTest -# POD_NAME: int-test -# POD_NAMESPACE: default -# BUILD_DRUID_CLUSTER: true -# steps: -# - name: Checkout branch -# uses: actions/checkout@v3 -# -# - name: Setup java -# run: export JAVA_HOME=$JAVA_HOME_8_X64 -# -# - name: Restore Maven repository -# id: maven-restore -# uses: actions/cache/restore@v3 -# with: -# path: ~/.m2/repository -# key: maven-${{ runner.os }}-8-${{ github.sha }} -# -# - name: Maven build -# if: steps.maven-restore.outputs.cache-hit != 'true' -# run: | -# ./it.sh ci -# -# - name: Run IT -# id: test -# run: | -# # Debug echo -# echo "MAVEN_OPTS='-Xmx2048m' ${MVN} verify -pl integration-tests -P int-tests-config-file ${IT_TEST} ${MAVEN_SKIP} -Dpod.name=${POD_NAME} -Dpod.namespace=${POD_NAMESPACE} -Dbuild.druid.cluster=${BUILD_DRUID_CLUSTER}" -# MAVEN_OPTS='-Xmx2048m' ${MVN} verify -pl integration-tests -P int-tests-config-file ${IT_TEST} ${MAVEN_SKIP} -Dpod.name=${POD_NAME} -Dpod.namespace=${POD_NAMESPACE} -Dbuild.druid.cluster=${BUILD_DRUID_CLUSTER} -# -# - name: Debug on failure -# if: ${{ failure() && steps.test.conclusion == 'failure' }} -# run: | -# for v in broker middlemanager router coordinator historical ; do -# echo "------------------------druid-tiny-cluster-"$v"s-0-------------------------"; -# sudo /usr/local/bin/kubectl logs --tail 1000 druid-tiny-cluster-"$v"s-0; -# done -# -# integration-other-tests: -# strategy: -# fail-fast: false -# matrix: -# jdk: [8, 11] -# indexer: [middleManager, indexer] -# uses: ./.github/workflows/reusable-standard-its.yml -# with: -# build_jdk: 8 -# runtime_jdk: ${{ matrix.jdk }} -# testing_groups: -DexcludedGroups=batch-index,input-format,input-source,perfect-rollup-parallel-batch-index,kafka-index,query,query-retry,query-error,realtime-index,security,ldap-security,s3-deep-storage,gcs-deep-storage,azure-deep-storage,hdfs-deep-storage,s3-ingestion,kinesis-index,kinesis-data-format,kafka-transactional-index,kafka-index-slow,kafka-transactional-index-slow,kafka-data-format,hadoop-s3-to-s3-deep-storage,hadoop-s3-to-hdfs-deep-storage,hadoop-azure-to-azure-deep-storage,hadoop-azure-to-hdfs-deep-storage,hadoop-gcs-to-gcs-deep-storage,hadoop-gcs-to-hdfs-deep-storage,aliyun-oss-deep-storage,append-ingestion,compaction,high-availability,upgrade,shuffle-deep-store,custom-coordinator-duties -# use_indexer: ${{ matrix.indexer }} -# group: other + integration-query-tests-middleManager: + strategy: + fail-fast: false + matrix: + jdk: [8, 11] + testing_group: [query, query-retry, query-error, security, high-availability] + uses: ./.github/workflows/reusable-standard-its.yml + with: + build_jdk: 8 + runtime_jdk: ${{ matrix.jdk }} + testing_groups: -Dgroups=${{ matrix.testing_group }} + use_indexer: middleManager + override_config_path: ./environment-configs/test-groups/prepopulated-data + group: ${{ matrix.testing_group }} + + integration-query-tests-middleManager-mariaDB: + strategy: + fail-fast: false + matrix: + jdk: [8, 11] + uses: ./.github/workflows/reusable-standard-its.yml + with: + build_jdk: 8 + runtime_jdk: ${{ matrix.jdk }} + testing_groups: -Dgroups=query + use_indexer: middleManager + mysql_driver: org.mariadb.jdbc.Driver + override_config_path: ./environment-configs/test-groups/prepopulated-data + group: query + + integration-shuffle-deep-store-tests: + strategy: + fail-fast: false + matrix: + indexer: [indexer, middleManager] + uses: ./.github/workflows/reusable-standard-its.yml + with: + build_jdk: 8 + runtime_jdk: 8 + testing_groups: -Dgroups=shuffle-deep-store + use_indexer: ${{ matrix.indexer }} + override_config_path: ./environment-configs/test-groups/shuffle-deep-store + group: shuffle deep store + + integration-custom-coordinator-duties-tests: + uses: ./.github/workflows/reusable-standard-its.yml + with: + build_jdk: 8 + runtime_jdk: 8 + testing_groups: -Dgroups=custom-coordinator-duties + use_indexer: middleManager + override_config_path: ./environment-configs/test-groups/custom-coordinator-duties + group: custom coordinator duties + + integration-k8s-leadership-tests: + name: (Compile=openjdk8, Run=openjdk8, Cluster Build On K8s) ITNestedQueryPushDownTest integration test + runs-on: ubuntu-22.04 + env: + MVN: mvn --no-snapshot-updates + MAVEN_SKIP: -P skip-static-checks -Dweb.console.skip=true -Dmaven.javadoc.skip=true + CONFIG_FILE: k8s_run_config_file.json + IT_TEST: -Dit.test=ITNestedQueryPushDownTest + POD_NAME: int-test + POD_NAMESPACE: default + BUILD_DRUID_CLUSTER: true + steps: + - name: Checkout branch + uses: actions/checkout@v3 + + - name: Setup java + run: export JAVA_HOME=$JAVA_HOME_8_X64 + + - name: Restore Maven repository + id: maven-restore + uses: actions/cache/restore@v3 + with: + path: ~/.m2/repository + key: maven-${{ runner.os }}-8-${{ github.sha }} + + - name: Maven build + if: steps.maven-restore.outputs.cache-hit != 'true' + run: | + ./it.sh ci + + - name: Run IT + id: test + run: | + # Debug echo + echo "MAVEN_OPTS='-Xmx2048m' ${MVN} verify -pl integration-tests -P int-tests-config-file ${IT_TEST} ${MAVEN_SKIP} -Dpod.name=${POD_NAME} -Dpod.namespace=${POD_NAMESPACE} -Dbuild.druid.cluster=${BUILD_DRUID_CLUSTER}" + MAVEN_OPTS='-Xmx2048m' ${MVN} verify -pl integration-tests -P int-tests-config-file ${IT_TEST} ${MAVEN_SKIP} -Dpod.name=${POD_NAME} -Dpod.namespace=${POD_NAMESPACE} -Dbuild.druid.cluster=${BUILD_DRUID_CLUSTER} + + - name: Debug on failure + if: ${{ failure() && steps.test.conclusion == 'failure' }} + run: | + for v in broker middlemanager router coordinator historical ; do + echo "------------------------druid-tiny-cluster-"$v"s-0-------------------------"; + sudo /usr/local/bin/kubectl logs --tail 1000 druid-tiny-cluster-"$v"s-0; + done + + integration-other-tests: + strategy: + fail-fast: false + matrix: + jdk: [8, 11] + indexer: [middleManager, indexer] + uses: ./.github/workflows/reusable-standard-its.yml + with: + build_jdk: 8 + runtime_jdk: ${{ matrix.jdk }} + testing_groups: -DexcludedGroups=batch-index,input-format,input-source,perfect-rollup-parallel-batch-index,kafka-index,query,query-retry,query-error,realtime-index,security,ldap-security,s3-deep-storage,gcs-deep-storage,azure-deep-storage,hdfs-deep-storage,s3-ingestion,kinesis-index,kinesis-data-format,kafka-transactional-index,kafka-index-slow,kafka-transactional-index-slow,kafka-data-format,hadoop-s3-to-s3-deep-storage,hadoop-s3-to-hdfs-deep-storage,hadoop-azure-to-azure-deep-storage,hadoop-azure-to-hdfs-deep-storage,hadoop-gcs-to-gcs-deep-storage,hadoop-gcs-to-hdfs-deep-storage,aliyun-oss-deep-storage,append-ingestion,compaction,high-availability,upgrade,shuffle-deep-store,custom-coordinator-duties + use_indexer: ${{ matrix.indexer }} + group: other diff --git a/.github/workflows/unit-and-integration-tests-unified.yml b/.github/workflows/unit-and-integration-tests-unified.yml index 1e1f0e45fc6c..2555aa48a935 100644 --- a/.github/workflows/unit-and-integration-tests-unified.yml +++ b/.github/workflows/unit-and-integration-tests-unified.yml @@ -106,35 +106,35 @@ jobs: echo $DRUID_IT_IMAGE_NAME docker save "$DRUID_IT_IMAGE_NAME" | gzip > druid-container-jdk${{ matrix.jdk }}.tar.gz -# unit-tests: -# strategy: -# fail-fast: false -# matrix: -# sql_compatibility: [ false, true ] -# name: unit tests (jdk8, sql-compat=${{ matrix.sql_compatibility }}) -# uses: ./.github/workflows/unit-tests.yml -# needs: build -# with: -# jdk: 8 -# sql_compatibility: ${{ matrix.sql_compatibility }} -# -# unit-tests-phase2: -# strategy: -# fail-fast: false -# matrix: -# jdk: [11, 17] -# sql_compatibility: [ false, true ] -# name: unit tests (jdk${{ matrix.jdk }}, sql-compat=${{ matrix.sql_compatibility }}) -# uses: ./.github/workflows/unit-tests.yml -# needs: unit-tests -# with: -# jdk: ${{ matrix.jdk }} -# sql_compatibility: ${{ matrix.sql_compatibility }} + unit-tests: + strategy: + fail-fast: false + matrix: + sql_compatibility: [ false, true ] + name: unit tests (jdk8, sql-compat=${{ matrix.sql_compatibility }}) + uses: ./.github/workflows/unit-tests.yml + needs: build + with: + jdk: 8 + sql_compatibility: ${{ matrix.sql_compatibility }} + + unit-tests-phase2: + strategy: + fail-fast: false + matrix: + jdk: [11, 17] + sql_compatibility: [ false, true ] + name: unit tests (jdk${{ matrix.jdk }}, sql-compat=${{ matrix.sql_compatibility }}) + uses: ./.github/workflows/unit-tests.yml + needs: unit-tests + with: + jdk: ${{ matrix.jdk }} + sql_compatibility: ${{ matrix.sql_compatibility }} standard-its: - needs: build + needs: unit-tests uses: ./.github/workflows/standard-its.yml -# revised-its: -# needs: unit-tests -# uses: ./.github/workflows/revised-its.yml + revised-its: + needs: unit-tests + uses: ./.github/workflows/revised-its.yml diff --git a/integration-tests-ex/cases/src/test/resources/indexer/wikipedia_index_task_with_dimension_spec.json b/integration-tests-ex/cases/src/test/resources/indexer/wikipedia_index_task_with_dimension_spec.json index 1d6dc6ed3004..35b7e61daae8 100644 --- a/integration-tests-ex/cases/src/test/resources/indexer/wikipedia_index_task_with_dimension_spec.json +++ b/integration-tests-ex/cases/src/test/resources/indexer/wikipedia_index_task_with_dimension_spec.json @@ -4,7 +4,8 @@ "dataSchema": { "dataSource": "%%DATASOURCE%%", "timestampSpec": { - "column": "timestamp" + "column": "timestamp", + "format": "auto" }, "dimensionsSpec": { "dimensions": [ @@ -68,12 +69,16 @@ "type": "index", "inputSource": { "type": "local", - "baseDir": "/resources/data/batch_index/json", - "filter": "wikipedia_index_data*" + "files": [ + "/resources/data/batch_index/json/wikipedia_index_data1.json", + "/resources/data/batch_index/json/wikipedia_index_data2.json", + "/resources/data/batch_index/json/wikipedia_index_data3.json" + ] }, "inputFormat": { "type": "json" - } + }, + "appendToExisting": false }, "tuningConfig": { "type": "index", diff --git a/integration-tests-ex/cases/src/test/resources/indexer/wikipedia_with_timestamp_index_task.json b/integration-tests-ex/cases/src/test/resources/indexer/wikipedia_with_timestamp_index_task.json index b3ca26dbce57..00a06ee52604 100644 --- a/integration-tests-ex/cases/src/test/resources/indexer/wikipedia_with_timestamp_index_task.json +++ b/integration-tests-ex/cases/src/test/resources/indexer/wikipedia_with_timestamp_index_task.json @@ -4,7 +4,8 @@ "dataSchema": { "dataSource": "%%DATASOURCE%%", "timestampSpec": { - "column": "timestamp" + "column": "timestamp", + "format": "auto" }, "dimensionsSpec": { "dimensions": [ @@ -69,12 +70,16 @@ "type": "index", "inputSource": { "type": "local", - "baseDir": "/resources/data/batch_index/json", - "filter": "wikipedia_index_data*" + "files": [ + "/resources/data/batch_index/json/wikipedia_index_data1.json", + "/resources/data/batch_index/json/wikipedia_index_data2.json", + "/resources/data/batch_index/json/wikipedia_index_data3.json" + ] }, "inputFormat": { "type": "json" - } + }, + "appendToExisting": false }, "tuningConfig": { "type": "index", diff --git a/integration-tests/src/test/java/org/apache/druid/tests/coordinator/duty/ITAutoCompactionTest.java b/integration-tests/src/test/java/org/apache/druid/tests/coordinator/duty/ITAutoCompactionTest.java index de583f8d05c6..3df5c230fc48 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/coordinator/duty/ITAutoCompactionTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/coordinator/duty/ITAutoCompactionTest.java @@ -89,7 +89,6 @@ import java.util.Map; import java.util.Set; import java.util.UUID; -import java.util.stream.Collectors; @Test(groups = {TestNGGroup.COMPACTION}) @Guice(moduleFactory = DruidTestModuleFactory.class) @@ -1298,7 +1297,7 @@ public void testAutoCompactionDutyWithDimensionsSpec() throws Exception { // Index data with dimensions "page", "language", "user", "unpatrolled", "newPage", "robot", "anonymous", // "namespace", "continent", "country", "region", "city" - loadDataWithReportLog(INDEX_TASK_WITH_DIMENSION_SPEC, ImmutableMap.of()); + loadData(INDEX_TASK_WITH_DIMENSION_SPEC); try (final Closeable ignored = unloader(fullDatasourceName)) { final List intervalsBeforeCompaction = coordinator.getSegmentIntervals(fullDatasourceName); intervalsBeforeCompaction.sort(null); @@ -1345,7 +1344,7 @@ public void testAutoCompactionDutyWithDimensionsSpec() throws Exception @Test public void testAutoCompactionDutyWithFilter() throws Exception { - loadDataWithReportLog(INDEX_TASK_WITH_DIMENSION_SPEC, ImmutableMap.of()); + loadData(INDEX_TASK_WITH_DIMENSION_SPEC); try (final Closeable ignored = unloader(fullDatasourceName)) { final List intervalsBeforeCompaction = coordinator.getSegmentIntervals(fullDatasourceName); intervalsBeforeCompaction.sort(null); @@ -1393,7 +1392,7 @@ public void testAutoCompactionDutyWithFilter() throws Exception @Test public void testAutoCompactionDutyWithMetricsSpec() throws Exception { - loadDataWithReportLog(INDEX_TASK_WITH_DIMENSION_SPEC, ImmutableMap.of()); + loadData(INDEX_TASK_WITH_DIMENSION_SPEC); try (final Closeable ignored = unloader(fullDatasourceName)) { final List intervalsBeforeCompaction = coordinator.getSegmentIntervals(fullDatasourceName); intervalsBeforeCompaction.sort(null); @@ -1548,32 +1547,6 @@ private void loadData(String indexTask, Map specs) throws Except ); } - private void loadDataWithReportLog(String indexTask, Map specs) throws Exception - { - String taskSpec = getResourceAsString(indexTask); - taskSpec = StringUtils.replace(taskSpec, "%%DATASOURCE%%", fullDatasourceName); - taskSpec = StringUtils.replace( - taskSpec, - "%%SEGMENT_AVAIL_TIMEOUT_MILLIS%%", - jsonMapper.writeValueAsString("0") - ); - for (Map.Entry entry : specs.entrySet()) { - taskSpec = StringUtils.replace( - taskSpec, - entry.getKey(), - jsonMapper.writeValueAsString(entry.getValue()) - ); - } - final String taskID = indexer.submitTask(taskSpec); - LOG.info("TaskID for loading index task %s", taskID); - indexer.waitUntilTaskCompletes(taskID); - - ITRetryUtil.retryUntilTrue( - () -> coordinator.areSegmentsLoaded(fullDatasourceName), - "Segment Load" - ); - } - private void verifyQuery(String queryResource) throws Exception { verifyQuery(queryResource, ImmutableMap.of()); diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITCompactionTaskTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITCompactionTaskTest.java index 8d6bdc45a7a1..d6bc0e2249f3 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITCompactionTaskTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITCompactionTaskTest.java @@ -47,7 +47,6 @@ import java.util.List; import java.util.Map; import java.util.Set; -import java.util.stream.Collectors; @Test(groups = {TestNGGroup.COMPACTION, TestNGGroup.QUICKSTART_COMPATIBLE}) @Guice(moduleFactory = DruidTestModuleFactory.class) @@ -299,11 +298,9 @@ private void checkNumberOfSegments(int numExpectedSegments) { ITRetryUtil.retryUntilTrue( () -> { - List metadataSegments = coordinator.getFullSegmentsMetadata(fullDatasourceName); - LOG.info("Current metadata segment count: %d, expected: %d", metadataSegments.size(), numExpectedSegments); - LOG.info("Segments metadata"); - LOG.info(metadataSegments.toString()); - return metadataSegments.size() == numExpectedSegments; + int metadataSegmentCount = coordinator.getSegments(fullDatasourceName).size(); + LOG.info("Current metadata segment count: %d, expected: %d", metadataSegmentCount, numExpectedSegments); + return metadataSegmentCount == numExpectedSegments; }, "Segment count check" ); diff --git a/integration-tests/src/test/resources/indexer/wikipedia_index_task_with_dimension_spec.json b/integration-tests/src/test/resources/indexer/wikipedia_index_task_with_dimension_spec.json index cedf2b3b5185..35b7e61daae8 100644 --- a/integration-tests/src/test/resources/indexer/wikipedia_index_task_with_dimension_spec.json +++ b/integration-tests/src/test/resources/indexer/wikipedia_index_task_with_dimension_spec.json @@ -4,7 +4,8 @@ "dataSchema": { "dataSource": "%%DATASOURCE%%", "timestampSpec": { - "column": "timestamp" + "column": "timestamp", + "format": "auto" }, "dimensionsSpec": { "dimensions": [ @@ -76,7 +77,8 @@ }, "inputFormat": { "type": "json" - } + }, + "appendToExisting": false }, "tuningConfig": { "type": "index", diff --git a/integration-tests/src/test/resources/indexer/wikipedia_with_timestamp_index_task.json b/integration-tests/src/test/resources/indexer/wikipedia_with_timestamp_index_task.json index 933b96f33c0e..00a06ee52604 100644 --- a/integration-tests/src/test/resources/indexer/wikipedia_with_timestamp_index_task.json +++ b/integration-tests/src/test/resources/indexer/wikipedia_with_timestamp_index_task.json @@ -4,7 +4,8 @@ "dataSchema": { "dataSource": "%%DATASOURCE%%", "timestampSpec": { - "column": "timestamp" + "column": "timestamp", + "format": "auto" }, "dimensionsSpec": { "dimensions": [ @@ -76,8 +77,9 @@ ] }, "inputFormat": { - "type" : "json" - } + "type": "json" + }, + "appendToExisting": false }, "tuningConfig": { "type": "index", From fee5c156283a544da1fa1d36039c83f89876be07 Mon Sep 17 00:00:00 2001 From: "tejaswini.bandlamudi" Date: Sun, 26 Feb 2023 02:00:35 +0530 Subject: [PATCH 30/33] final cleanup --- .../kinesis/KinesisSamplerSpecTest.java | 4 ++- .../common/task/AbstractBatchIndexTask.java | 2 +- .../parallel/ParallelIndexSupervisorTask.java | 6 ++-- ...bstractMultiPhaseParallelIndexingTest.java | 2 +- .../sampler/InputSourceSamplerTest.java | 32 +++++++++---------- .../tests/indexer/ITCompactionTaskTest.java | 1 - 6 files changed, 24 insertions(+), 23 deletions(-) diff --git a/extensions-core/kinesis-indexing-service/src/test/java/org/apache/druid/indexing/kinesis/KinesisSamplerSpecTest.java b/extensions-core/kinesis-indexing-service/src/test/java/org/apache/druid/indexing/kinesis/KinesisSamplerSpecTest.java index ebe8cc675497..a166863b0ded 100644 --- a/extensions-core/kinesis-indexing-service/src/test/java/org/apache/druid/indexing/kinesis/KinesisSamplerSpecTest.java +++ b/extensions-core/kinesis-indexing-service/src/test/java/org/apache/druid/indexing/kinesis/KinesisSamplerSpecTest.java @@ -171,7 +171,7 @@ public void testSample() throws InterruptedException } @Test - public void testWithInputRowParser() throws IOException + public void testSampleWithInputRowParser() throws IOException, InterruptedException { ObjectMapper objectMapper = new DefaultObjectMapper(); TimestampSpec timestampSpec = new TimestampSpec("timestamp", "iso", null); @@ -244,6 +244,8 @@ public void testWithInputRowParser() throws IOException new InputSourceSampler(new DefaultObjectMapper()), null ); + + runSamplerAndCompareResponse(samplerSpec, false); } private void runSamplerAndCompareResponse(SamplerSpec samplerSpec, boolean useInputFormat) throws InterruptedException diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AbstractBatchIndexTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AbstractBatchIndexTask.java index 6d4609ed3d93..a4cd183c01bb 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AbstractBatchIndexTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AbstractBatchIndexTask.java @@ -567,7 +567,7 @@ static Granularity findGranularityFromSegments(List segments) /** *

- * This task will overwrite some segments with data read from some input source outside of Druid. + * This task will overwrite some segments with data read from input source outside of Druid. * As a result, only the segments falling in intervalsToRead should be locked. *

* The order of segments within the returned list is unspecified, but each segment is guaranteed to appear in the list diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTask.java index 02531e9a1557..5d3ebc2ca246 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTask.java @@ -120,9 +120,9 @@ import java.util.stream.Collectors; /** - * ParallelIndexSupervisorTask is capable of running multiple subTasks for parallel indexing. - * While this task is running, it can submit multiple child tasks to overlords. This task - * succeeds only when all its child tasks succeed; otherwise it fails. + * ParallelIndexSupervisorTask is capable of running multiple subTasks for parallel indexing. This is + * applicable if the input {@link InputSource} is splittable. While this task is running, it can submit + * multiple child tasks to overlords. This task succeeds only when all its child tasks succeed; otherwise it fails. * * @see ParallelIndexTaskRunner */ diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/AbstractMultiPhaseParallelIndexingTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/AbstractMultiPhaseParallelIndexingTest.java index 9c6733d2543e..ef62971e9a22 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/AbstractMultiPhaseParallelIndexingTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/AbstractMultiPhaseParallelIndexingTest.java @@ -239,7 +239,7 @@ protected ParallelIndexSupervisorTask createTask( tuningConfig ); } else { - Preconditions.checkArgument(inputFormat == null); + Preconditions.checkArgument(inputFormat == null && parseSpec != null); ParallelIndexIOConfig ioConfig = new ParallelIndexIOConfig( null, new LocalInputSource(inputDirectory, filter), diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/sampler/InputSourceSamplerTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/sampler/InputSourceSamplerTest.java index 321685031013..5f6375e62974 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/sampler/InputSourceSamplerTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/sampler/InputSourceSamplerTest.java @@ -173,7 +173,7 @@ public void testNoParams() @Test public void testNoDataSchema() { - final InputSource inputSource = createInputSource(getTestRows(), null); + final InputSource inputSource = createInputSource(getTestRows()); final SamplerResponse response = inputSourceSampler.sample(inputSource, createInputFormat(), null, null); Assert.assertEquals(6, response.getNumRowsRead()); @@ -241,7 +241,7 @@ public void testNoDataSchema() @Test public void testNoDataSchemaNumRows() { - final InputSource inputSource = createInputSource(getTestRows(), null); + final InputSource inputSource = createInputSource(getTestRows()); final SamplerResponse response = inputSourceSampler.sample( inputSource, createInputFormat(), @@ -290,7 +290,7 @@ public void testMissingValueTimestampSpec() throws IOException final TimestampSpec timestampSpec = new TimestampSpec(null, null, DateTimes.of("1970")); final DimensionsSpec dimensionsSpec = new DimensionsSpec(null); final DataSchema dataSchema = createDataSchema(timestampSpec, dimensionsSpec, null, null, null); - final InputSource inputSource = createInputSource(getTestRows(), dataSchema); + final InputSource inputSource = createInputSource(getTestRows()); final InputFormat inputFormat = createInputFormat(); SamplerResponse response = inputSourceSampler.sample(inputSource, inputFormat, dataSchema, null); @@ -399,7 +399,7 @@ public void testWithTimestampSpec() throws IOException final TimestampSpec timestampSpec = new TimestampSpec("t", null, null); final DimensionsSpec dimensionsSpec = new DimensionsSpec(null); final DataSchema dataSchema = createDataSchema(timestampSpec, dimensionsSpec, null, null, null); - final InputSource inputSource = createInputSource(getTestRows(), dataSchema); + final InputSource inputSource = createInputSource(getTestRows()); final InputFormat inputFormat = createInputFormat(); SamplerResponse response = inputSourceSampler.sample(inputSource, inputFormat, dataSchema, null); @@ -499,7 +499,7 @@ public void testWithDimensionSpec() throws IOException ImmutableList.of(StringDimensionSchema.create("dim1"), StringDimensionSchema.create("met1")) ); final DataSchema dataSchema = createDataSchema(timestampSpec, dimensionsSpec, null, null, null); - final InputSource inputSource = createInputSource(getTestRows(), dataSchema); + final InputSource inputSource = createInputSource(getTestRows()); final InputFormat inputFormat = createInputFormat(); SamplerResponse response = inputSourceSampler.sample(inputSource, inputFormat, dataSchema, null); @@ -605,7 +605,7 @@ public void testWithNoRollup() throws IOException granularitySpec, null ); - final InputSource inputSource = createInputSource(getTestRows(), dataSchema); + final InputSource inputSource = createInputSource(getTestRows()); final InputFormat inputFormat = createInputFormat(); SamplerResponse response = inputSourceSampler.sample(inputSource, inputFormat, dataSchema, null); @@ -716,7 +716,7 @@ public void testWithRollup() throws IOException granularitySpec, null ); - final InputSource inputSource = createInputSource(getTestRows(), dataSchema); + final InputSource inputSource = createInputSource(getTestRows()); final InputFormat inputFormat = createInputFormat(); SamplerResponse response = inputSourceSampler.sample(inputSource, inputFormat, dataSchema, null); @@ -799,7 +799,7 @@ public void testWithMoreRollup() throws IOException granularitySpec, null ); - final InputSource inputSource = createInputSource(getTestRows(), dataSchema); + final InputSource inputSource = createInputSource(getTestRows()); final InputFormat inputFormat = createInputFormat(); SamplerResponse response = inputSourceSampler.sample(inputSource, inputFormat, dataSchema, null); @@ -870,7 +870,7 @@ public void testWithTransformsAutoDimensions() throws IOException granularitySpec, transformSpec ); - final InputSource inputSource = createInputSource(getTestRows(), dataSchema); + final InputSource inputSource = createInputSource(getTestRows()); final InputFormat inputFormat = createInputFormat(); SamplerResponse response = inputSourceSampler.sample(inputSource, inputFormat, dataSchema, null); @@ -959,7 +959,7 @@ public void testWithTransformsDimensionsSpec() throws IOException granularitySpec, transformSpec ); - final InputSource inputSource = createInputSource(getTestRows(), dataSchema); + final InputSource inputSource = createInputSource(getTestRows()); final InputFormat inputFormat = createInputFormat(); SamplerResponse response = inputSourceSampler.sample(inputSource, inputFormat, dataSchema, null); @@ -1027,7 +1027,7 @@ public void testWithFilter() throws IOException granularitySpec, transformSpec ); - final InputSource inputSource = createInputSource(getTestRows(), dataSchema); + final InputSource inputSource = createInputSource(getTestRows()); final InputFormat inputFormat = createInputFormat(); SamplerResponse response = inputSourceSampler.sample(inputSource, inputFormat, dataSchema, null); @@ -1114,7 +1114,7 @@ public void testIndexParseException() throws IOException "2019-04-22T12:00,foo2,,invalidNumber" : OBJECT_MAPPER.writeValueAsString(rawColumns4ParseExceptionRow)); - final InputSource inputSource = createInputSource(inputTestRows, dataSchema); + final InputSource inputSource = createInputSource(inputTestRows); final InputFormat inputFormat = createInputFormat(); SamplerResponse response = inputSourceSampler.sample(inputSource, inputFormat, dataSchema, null); @@ -1346,7 +1346,7 @@ public void testRowLimiting() throws IOException granularitySpec, null ); - final InputSource inputSource = createInputSource(getTestRows(), dataSchema); + final InputSource inputSource = createInputSource(getTestRows()); final InputFormat inputFormat = createInputFormat(); SamplerResponse response = inputSourceSampler.sample( @@ -1381,7 +1381,7 @@ public void testMaxBytesInMemoryLimiting() throws IOException granularitySpec, null ); - final InputSource inputSource = createInputSource(getTestRows(), dataSchema); + final InputSource inputSource = createInputSource(getTestRows()); final InputFormat inputFormat = createInputFormat(); SamplerResponse response = inputSourceSampler.sample( @@ -1415,7 +1415,7 @@ public void testMaxClientResponseBytesLimiting() throws IOException granularitySpec, null ); - final InputSource inputSource = createInputSource(getTestRows(), dataSchema); + final InputSource inputSource = createInputSource(getTestRows()); final InputFormat inputFormat = createInputFormat(); SamplerResponse response = inputSourceSampler.sample( @@ -1526,7 +1526,7 @@ private Map getParserMap(InputRowParser parser) throws IOExcepti return OBJECT_MAPPER.readValue(OBJECT_MAPPER.writeValueAsBytes(parser), Map.class); } - private InputSource createInputSource(List rows, DataSchema dataSchema) + private InputSource createInputSource(List rows) { final String data = String.join("\n", rows); return new InlineInputSource(data); diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITCompactionTaskTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITCompactionTaskTest.java index d6bc0e2249f3..957c8a5522c4 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITCompactionTaskTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITCompactionTaskTest.java @@ -29,7 +29,6 @@ import org.apache.druid.testing.guice.DruidTestModuleFactory; import org.apache.druid.testing.utils.ITRetryUtil; import org.apache.druid.tests.TestNGGroup; -import org.apache.druid.timeline.DataSegment; import org.joda.time.Interval; import org.joda.time.chrono.ISOChronology; import org.testng.annotations.BeforeMethod; From 455f559b3ceb04734c2bb01f050d4464885ee5d0 Mon Sep 17 00:00:00 2001 From: "tejaswini.bandlamudi" Date: Mon, 27 Feb 2023 12:00:13 +0530 Subject: [PATCH 31/33] nit checkstyle --- .../org/apache/druid/testsEx/indexer/AbstractIndexerTest.java | 1 - .../java/org/apache/druid/tests/indexer/AbstractIndexerTest.java | 1 - 2 files changed, 2 deletions(-) diff --git a/integration-tests-ex/cases/src/test/java/org/apache/druid/testsEx/indexer/AbstractIndexerTest.java b/integration-tests-ex/cases/src/test/java/org/apache/druid/testsEx/indexer/AbstractIndexerTest.java index 9032c7f4a604..37fc9ec651cd 100644 --- a/integration-tests-ex/cases/src/test/java/org/apache/druid/testsEx/indexer/AbstractIndexerTest.java +++ b/integration-tests-ex/cases/src/test/java/org/apache/druid/testsEx/indexer/AbstractIndexerTest.java @@ -44,7 +44,6 @@ import java.util.ArrayList; import java.util.Collections; import java.util.List; -import java.util.concurrent.Callable; public abstract class AbstractIndexerTest { diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractIndexerTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractIndexerTest.java index d613534bb511..8a382287d43b 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractIndexerTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractIndexerTest.java @@ -47,7 +47,6 @@ import java.util.ArrayList; import java.util.Collections; import java.util.List; -import java.util.concurrent.Callable; public abstract class AbstractIndexerTest { From 62e6f7c2969456287596aa7d70bbb4b9490b83a1 Mon Sep 17 00:00:00 2001 From: "tejaswini.bandlamudi" Date: Mon, 27 Feb 2023 15:02:09 +0530 Subject: [PATCH 32/33] nit intellij inspections fix --- .../seekablestream/SeekableStreamSamplerSpec.java | 2 +- .../org/apache/druid/data/input/FirehoseFactory.java | 12 ------------ .../data/input/impl/prefetch/ObjectOpenFunction.java | 7 ------- 3 files changed, 1 insertion(+), 20 deletions(-) diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/SeekableStreamSamplerSpec.java b/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/SeekableStreamSamplerSpec.java index 1c3ed81a0878..af850dd1050b 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/SeekableStreamSamplerSpec.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/SeekableStreamSamplerSpec.java @@ -217,7 +217,7 @@ public void close() throws IOException } @Override - public CloseableIterator read(InputStats inputStats) throws IOException + public CloseableIterator read(InputStats inputStats) { return null; } diff --git a/processing/src/main/java/org/apache/druid/data/input/FirehoseFactory.java b/processing/src/main/java/org/apache/druid/data/input/FirehoseFactory.java index e1598519bff1..6ec8657d435a 100644 --- a/processing/src/main/java/org/apache/druid/data/input/FirehoseFactory.java +++ b/processing/src/main/java/org/apache/druid/data/input/FirehoseFactory.java @@ -73,18 +73,6 @@ default Firehose connect(T parser, @Nullable File temporaryDirectory) throws IOE return connect(parser); } - /** - * Initialization method that connects up the firehose. This method is intended for use by the sampler, and allows - * implementors to return a more efficient firehose, knowing that only a small number of rows will be read. - * - * @param parser an input row parser - * @param temporaryDirectory a directory where temporary files are stored - */ - default Firehose connectForSampler(T parser, @Nullable File temporaryDirectory) throws IOException, ParseException - { - return connect(parser, temporaryDirectory); - } - @SuppressWarnings("unused") @JsonIgnore default boolean isSplittable() diff --git a/processing/src/main/java/org/apache/druid/data/input/impl/prefetch/ObjectOpenFunction.java b/processing/src/main/java/org/apache/druid/data/input/impl/prefetch/ObjectOpenFunction.java index 6af64fb18cf0..6941e82df981 100644 --- a/processing/src/main/java/org/apache/druid/data/input/impl/prefetch/ObjectOpenFunction.java +++ b/processing/src/main/java/org/apache/druid/data/input/impl/prefetch/ObjectOpenFunction.java @@ -19,7 +19,6 @@ package org.apache.druid.data.input.impl.prefetch; -import java.io.File; import java.io.IOException; import java.io.InputStream; @@ -31,10 +30,4 @@ default InputStream open(T object, long start) throws IOException { return open(object); } - - default InputStream open(T object, File outFile) throws IOException - { - return open(object); - } - } From ec135781a521736ec5cb4aa958f99e4e2ac372a7 Mon Sep 17 00:00:00 2001 From: "tejaswini.bandlamudi" Date: Mon, 27 Feb 2023 15:58:18 +0530 Subject: [PATCH 33/33] revert fix flaky BatchIndex IT failures. --- .../testsEx/indexer/AbstractITBatchIndexTest.java | 3 --- .../druid/testsEx/indexer/AbstractIndexerTest.java | 14 +++++++++----- .../tests/indexer/AbstractITBatchIndexTest.java | 3 --- .../druid/tests/indexer/AbstractIndexerTest.java | 14 +++++++++----- 4 files changed, 18 insertions(+), 16 deletions(-) diff --git a/integration-tests-ex/cases/src/test/java/org/apache/druid/testsEx/indexer/AbstractITBatchIndexTest.java b/integration-tests-ex/cases/src/test/java/org/apache/druid/testsEx/indexer/AbstractITBatchIndexTest.java index fb02d8f8dad6..2edb13a04999 100644 --- a/integration-tests-ex/cases/src/test/java/org/apache/druid/testsEx/indexer/AbstractITBatchIndexTest.java +++ b/integration-tests-ex/cases/src/test/java/org/apache/druid/testsEx/indexer/AbstractITBatchIndexTest.java @@ -456,9 +456,6 @@ protected void submitTaskAndWait( Pair segmentAvailabilityConfirmationPair ) { - // Wait for any existing kill tasks to complete before submitting new index task otherwise - // kill tasks can fail with interval lock revoked. - waitForAllTasksToCompleteForDataSource(dataSourceName); final List oldVersions = waitForNewVersion ? coordinator.getAvailableSegments(dataSourceName) : null; long startSubTaskCount = -1; diff --git a/integration-tests-ex/cases/src/test/java/org/apache/druid/testsEx/indexer/AbstractIndexerTest.java b/integration-tests-ex/cases/src/test/java/org/apache/druid/testsEx/indexer/AbstractIndexerTest.java index 37fc9ec651cd..5e53330e705b 100644 --- a/integration-tests-ex/cases/src/test/java/org/apache/druid/testsEx/indexer/AbstractIndexerTest.java +++ b/integration-tests-ex/cases/src/test/java/org/apache/druid/testsEx/indexer/AbstractIndexerTest.java @@ -44,6 +44,7 @@ import java.util.ArrayList; import java.util.Collections; import java.util.List; +import java.util.concurrent.Callable; public abstract class AbstractIndexerTest { @@ -102,9 +103,6 @@ protected void unloadAndKillData(final String dataSource) protected String submitIndexTask(String indexTask, final String fullDatasourceName) throws Exception { - // Wait for any existing kill tasks to complete before submitting new index task otherwise - // kill tasks can fail with interval lock revoked. - waitForAllTasksToCompleteForDataSource(fullDatasourceName); String taskSpec = getResourceAsString(indexTask); taskSpec = StringUtils.replace(taskSpec, "%%DATASOURCE%%", fullDatasourceName); taskSpec = StringUtils.replace( @@ -137,8 +135,14 @@ private void unloadAndKillData(final String dataSource, String start, String end Interval interval = Intervals.of(start + "/" + end); coordinator.unloadSegmentsForDataSource(dataSource); ITRetryUtil.retryUntilFalse( - () -> coordinator.areSegmentsLoaded(dataSource), - "Segment Unloading" + new Callable() + { + @Override + public Boolean call() + { + return coordinator.areSegmentsLoaded(dataSource); + } + }, "Segment Unloading" ); coordinator.deleteSegmentsDataSource(dataSource, interval); waitForAllTasksToCompleteForDataSource(dataSource); diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractITBatchIndexTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractITBatchIndexTest.java index 0f160580db51..8bcfaa0df134 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractITBatchIndexTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractITBatchIndexTest.java @@ -340,9 +340,6 @@ protected void submitTaskAndWait( Pair segmentAvailabilityConfirmationPair ) { - // Wait for any existing kill tasks to complete before submitting new index task otherwise - // kill tasks can fail with interval lock revoked. - waitForAllTasksToCompleteForDataSource(dataSourceName); final List oldVersions = waitForNewVersion ? coordinator.getAvailableSegments(dataSourceName) : null; long startSubTaskCount = -1; diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractIndexerTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractIndexerTest.java index 8a382287d43b..1bb6e8e12c8d 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractIndexerTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractIndexerTest.java @@ -47,6 +47,7 @@ import java.util.ArrayList; import java.util.Collections; import java.util.List; +import java.util.concurrent.Callable; public abstract class AbstractIndexerTest { @@ -109,9 +110,6 @@ protected void unloadAndKillData(final String dataSource) protected String submitIndexTask(String indexTask, final String fullDatasourceName) throws Exception { - // Wait for any existing kill tasks to complete before submitting new index task otherwise - // kill tasks can fail with interval lock revoked. - waitForAllTasksToCompleteForDataSource(fullDatasourceName); String taskSpec = getResourceAsString(indexTask); taskSpec = StringUtils.replace(taskSpec, "%%DATASOURCE%%", fullDatasourceName); taskSpec = StringUtils.replace( @@ -144,8 +142,14 @@ private void unloadAndKillData(final String dataSource, String start, String end Interval interval = Intervals.of(start + "/" + end); coordinator.unloadSegmentsForDataSource(dataSource); ITRetryUtil.retryUntilFalse( - () -> coordinator.areSegmentsLoaded(dataSource), - "Segment Unloading" + new Callable() + { + @Override + public Boolean call() + { + return coordinator.areSegmentsLoaded(dataSource); + } + }, "Segment Unloading" ); coordinator.deleteSegmentsDataSource(dataSource, interval); waitForAllTasksToCompleteForDataSource(dataSource);