From d9e7f9bd2b5cd1b2e8df07b66ff9b7aea1e41443 Mon Sep 17 00:00:00 2001 From: "Lucas.Capistrant" Date: Wed, 9 Dec 2020 08:07:19 -0600 Subject: [PATCH 01/28] Add ability to wait for segment availability for batch jobs --- .../apache/druid/indexer/IngestionState.java | 1 + docs/ingestion/hadoop.md | 1 + docs/ingestion/native-batch.md | 2 + .../MaterializedViewSupervisorSpec.java | 3 +- .../indexing/kafka/KafkaIndexTaskTest.java | 4 +- .../kinesis/KinesisIndexTaskTest.java | 4 +- .../druid/indexer/HadoopTuningConfig.java | 26 ++- .../indexer/BatchDeltaIngestionTest.java | 1 + .../DetermineHashedPartitionsJobTest.java | 1 + .../indexer/DeterminePartitionsJobTest.java | 1 + .../indexer/HadoopDruidIndexerConfigTest.java | 1 + .../druid/indexer/HadoopTuningConfigTest.java | 2 + .../druid/indexer/IndexGeneratorJobTest.java | 1 + .../apache/druid/indexer/JobHelperTest.java | 1 + .../indexer/path/GranularityPathSpecTest.java | 1 + ...IngestionStatsAndErrorsTaskReportData.java | 25 ++- .../druid/indexing/common/TaskToolbox.java | 2 +- .../indexing/common/TaskToolboxFactory.java | 2 +- .../common/task/AbstractBatchIndexTask.java | 74 +++++++++ .../AppenderatorDriverRealtimeIndexTask.java | 3 +- .../indexing/common/task/CompactionTask.java | 3 +- .../indexing/common/task/HadoopIndexTask.java | 29 +++- .../druid/indexing/common/task/IndexTask.java | 61 +++++-- .../parallel/ParallelIndexSupervisorTask.java | 88 +++++++++- .../parallel/ParallelIndexTuningConfig.java | 10 +- .../SeekableStreamIndexTaskRunner.java | 3 +- .../indexing/common/TaskToolboxTest.java | 2 +- ...penderatorDriverRealtimeIndexTaskTest.java | 4 +- .../ClientCompactionTaskQuerySerdeTest.java | 1 + .../common/task/CompactionTaskRunTest.java | 1 + .../common/task/CompactionTaskTest.java | 6 + .../common/task/IndexTaskSerdeTest.java | 18 ++- .../indexing/common/task/IndexTaskTest.java | 12 +- .../common/task/RealtimeIndexTaskTest.java | 4 +- .../common/task/TaskReportSerdeTest.java | 3 +- .../indexing/common/task/TaskSerdeTest.java | 4 +- ...stractParallelIndexSupervisorTaskTest.java | 2 + .../ParallelIndexSupervisorTaskKillTest.java | 1 + ...rallelIndexSupervisorTaskResourceTest.java | 1 + .../ParallelIndexSupervisorTaskSerdeTest.java | 1 + .../ParallelIndexSupervisorTaskTest.java | 1 + .../parallel/ParallelIndexTestingFactory.java | 3 +- .../ParallelIndexTuningConfigTest.java | 7 + .../SinglePhaseParallelIndexingTest.java | 1 + .../IngestSegmentFirehoseFactoryTest.java | 2 +- ...estSegmentFirehoseFactoryTimelineTest.java | 2 +- .../indexing/overlord/TaskLifecycleTest.java | 8 +- .../worker/WorkerTaskManagerTest.java | 2 +- .../worker/WorkerTaskMonitorTest.java | 2 +- .../clients/OverlordResourceTestClient.java | 25 +++ .../AbstractAzureInputHadoopIndexTest.java | 6 +- .../AbstractGcsInputHadoopIndexTest.java | 6 +- .../AbstractS3InputHadoopIndexTest.java | 6 +- .../ITAzureInputToAzureHadoopIndexTest.java | 3 +- .../ITAzureInputToHdfsHadoopIndexTest.java | 3 +- .../ITGcsInputToGcsHadoopIndexTest.java | 3 +- .../ITGcsInputToHdfsHadoopIndexTest.java | 3 +- .../druid/tests/hadoop/ITHadoopIndexTest.java | 151 +++++++++++++++++- .../ITS3InputToHdfsHadoopIndexTest.java | 3 +- .../hadoop/ITS3InputToS3HadoopIndexTest.java | 3 +- ...ractAzureInputSourceParallelIndexTest.java | 8 +- ...stractGcsInputSourceParallelIndexTest.java | 8 +- ...tractHdfsInputSourceParallelIndexTest.java | 9 +- .../indexer/AbstractITBatchIndexTest.java | 70 ++++++-- ...ractLocalInputSourceParallelIndexTest.java | 17 +- ...stractOssInputSourceParallelIndexTest.java | 8 +- ...bstractS3InputSourceParallelIndexTest.java | 8 +- .../tests/indexer/ITAppendBatchIndexTest.java | 11 +- .../ITAzureToAzureParallelIndexTest.java | 2 +- .../ITAzureToHdfsParallelIndexTest.java | 2 +- .../ITBestEffortRollupParallelIndexTest.java | 140 +++++++++++++++- .../ITCombiningFirehoseFactoryIndexTest.java | 7 +- ...CombiningInputSourceParallelIndexTest.java | 7 +- .../indexer/ITGcsToGcsParallelIndexTest.java | 2 +- .../indexer/ITGcsToHdfsParallelIndexTest.java | 2 +- .../ITHdfsToAzureParallelIndexTest.java | 2 +- .../indexer/ITHdfsToGcsParallelIndexTest.java | 2 +- .../ITHdfsToHdfsParallelIndexTest.java | 6 +- .../indexer/ITHdfsToS3ParallelIndexTest.java | 2 +- .../tests/indexer/ITHttpInputSourceTest.java | 4 +- .../druid/tests/indexer/ITIndexerTest.java | 138 ++++++++++++++-- .../ITLocalInputSourceAllInputFormatTest.java | 15 +- .../indexer/ITOssToOssParallelIndexTest.java | 2 +- .../ITPerfectRollupParallelIndexTest.java | 11 +- .../ITS3OverrideCredentialsIndexTest.java | 4 +- .../indexer/ITS3ToHdfsParallelIndexTest.java | 2 +- .../indexer/ITS3ToS3ParallelIndexTest.java | 2 +- .../tests/indexer/ITSqlInputSourceTest.java | 4 +- .../indexer/ITTestCoordinatorPausedTest.java | 4 +- .../druid/tests/indexer/ITTransformTest.java | 16 +- .../hadoop/wikipedia_hadoop_index_task.json | 1 + .../hadoop/wikipedia_hadoop_reindex_task.json | 1 + .../indexer/wikipedia_index_task.json | 3 +- .../wikipedia_parallel_index_task.json | 3 +- ...oordinatorBasedSegmentHandoffNotifier.java | 2 +- ...atorBasedSegmentHandoffNotifierConfig.java | 2 +- ...torBasedSegmentHandoffNotifierFactory.java | 2 +- .../SegmentHandoffNotifier.java | 2 +- .../SegmentHandoffNotifierFactory.java | 2 +- .../druid/segment/indexing/TuningConfig.java | 1 + .../appenderator/AppenderatorPlumber.java | 2 +- .../AppenderatorPlumberSchool.java | 2 +- .../StreamAppenderatorDriver.java | 4 +- .../NoopSegmentHandoffNotifierFactory.java | 2 + .../realtime/plumber/RealtimePlumber.java | 1 + .../plumber/RealtimePlumberSchool.java | 1 + ...inatorBasedSegmentHandoffNotifierTest.java | 2 +- .../appenderator/AppenderatorPlumberTest.java | 4 +- .../StreamAppenderatorDriverTest.java | 4 +- .../plumber/RealtimePlumberSchoolTest.java | 2 + .../java/org/apache/druid/cli/CliPeon.java | 6 +- website/.spelling | 1 + 112 files changed, 1045 insertions(+), 170 deletions(-) rename server/src/main/java/org/apache/druid/segment/{realtime/plumber => handoff}/CoordinatorBasedSegmentHandoffNotifier.java (99%) rename server/src/main/java/org/apache/druid/segment/{realtime/plumber => handoff}/CoordinatorBasedSegmentHandoffNotifierConfig.java (95%) rename server/src/main/java/org/apache/druid/segment/{realtime/plumber => handoff}/CoordinatorBasedSegmentHandoffNotifierFactory.java (96%) rename server/src/main/java/org/apache/druid/segment/{realtime/plumber => handoff}/SegmentHandoffNotifier.java (97%) rename server/src/main/java/org/apache/druid/segment/{realtime/plumber => handoff}/SegmentHandoffNotifierFactory.java (94%) rename server/src/test/java/org/apache/druid/segment/{realtime/plumber => handoff}/CoordinatorBasedSegmentHandoffNotifierTest.java (99%) diff --git a/core/src/main/java/org/apache/druid/indexer/IngestionState.java b/core/src/main/java/org/apache/druid/indexer/IngestionState.java index 26d46166674e..ddbd0a995776 100644 --- a/core/src/main/java/org/apache/druid/indexer/IngestionState.java +++ b/core/src/main/java/org/apache/druid/indexer/IngestionState.java @@ -24,5 +24,6 @@ public enum IngestionState NOT_STARTED, DETERMINE_PARTITIONS, BUILD_SEGMENTS, + SEGMENT_AVAILABILITY_WAIT, COMPLETED } diff --git a/docs/ingestion/hadoop.md b/docs/ingestion/hadoop.md index 088bdceee76d..94654b950dbc 100644 --- a/docs/ingestion/hadoop.md +++ b/docs/ingestion/hadoop.md @@ -336,6 +336,7 @@ The tuningConfig is optional and default parameters will be used if no tuningCon |logParseExceptions|Boolean|If true, log an error message when a parsing exception occurs, containing information about the row where the error occurred.|no(default = false)| |maxParseExceptions|Integer|The maximum number of parse exceptions that can occur before the task halts ingestion and fails. Overrides `ignoreInvalidRows` if `maxParseExceptions` is defined.|no(default = unlimited)| |useYarnRMJobStatusFallback|Boolean|If the Hadoop jobs created by the indexing task are unable to retrieve their completion status from the JobHistory server, and this parameter is true, the indexing task will try to fetch the application status from `http:///ws/v1/cluster/apps/`, where `` is the value of `yarn.resourcemanager.webapp.address` in your Hadoop configuration. This flag is intended as a fallback for cases where an indexing task's jobs succeed, but the JobHistory server is unavailable, causing the indexing task to fail because it cannot determine the job statuses.|no (default = true)| +|awaitSegmentAvailabilityTimeoutMillis|Long|Milliseconds to wait for the newly indexed segments to become available for query after ingestion completes. If `<= 0`, no wait will occur. If `> 0`, the task will wait for the Coordinator to indicate that the new segments are available for querying. If the timeout expires, the task will exit as successful, but the segments were not confirmed to have become available for query.|no (default = 0)| ### `jobProperties` diff --git a/docs/ingestion/native-batch.md b/docs/ingestion/native-batch.md index e87197a05913..1f781b54c8bb 100644 --- a/docs/ingestion/native-batch.md +++ b/docs/ingestion/native-batch.md @@ -171,6 +171,7 @@ A sample task is shown below: |id|The task ID. If this is not explicitly specified, Druid generates the task ID using task type, data source name, interval, and date-time stamp. |no| |spec|The ingestion spec including the data schema, IOConfig, and TuningConfig. See below for more details. |yes| |context|Context containing various task configuration parameters. See below for more details.|no| +|awaitSegmentAvailabilityTimeoutMillis|Long|Milliseconds to wait for the newly indexed segments to become available for query after ingestion completes. If `<= 0`, no wait will occur. If `> 0`, the task will wait for the Coordinator to indicate that the new segments are available for querying. If the timeout expires, the task will exit as successful, but the segments were not confirmed to have become available for query.|no (default = 0)| ### `dataSchema` @@ -222,6 +223,7 @@ The tuningConfig is optional and default parameters will be used if no tuningCon |taskStatusCheckPeriodMs|Polling period in milliseconds to check running task statuses.|1000|no| |chatHandlerTimeout|Timeout for reporting the pushed segments in worker tasks.|PT10S|no| |chatHandlerNumRetries|Retries for reporting the pushed segments in worker tasks.|5|no| +|awaitSegmentAvailabilityTimeoutMillis|Long|Milliseconds to wait for the newly indexed segments to become available for query after ingestion completes. If `<= 0`, no wait will occur. If `> 0`, the task will wait for the Coordinator to indicate that the new segments are available for querying. If the timeout expires, the task will exit as successful, but the segments were not confirmed to have become available for query.|no (default = 0)| ### Split Hint Spec diff --git a/extensions-contrib/materialized-view-maintenance/src/main/java/org/apache/druid/indexing/materializedview/MaterializedViewSupervisorSpec.java b/extensions-contrib/materialized-view-maintenance/src/main/java/org/apache/druid/indexing/materializedview/MaterializedViewSupervisorSpec.java index db63a7316d1f..be21a6558fff 100644 --- a/extensions-contrib/materialized-view-maintenance/src/main/java/org/apache/druid/indexing/materializedview/MaterializedViewSupervisorSpec.java +++ b/extensions-contrib/materialized-view-maintenance/src/main/java/org/apache/druid/indexing/materializedview/MaterializedViewSupervisorSpec.java @@ -199,7 +199,8 @@ public HadoopIndexTask createTask(Interval interval, String version, List allowedHadoopPrefix, final @JsonProperty("logParseExceptions") @Nullable Boolean logParseExceptions, final @JsonProperty("maxParseExceptions") @Nullable Integer maxParseExceptions, - final @JsonProperty("useYarnRMJobStatusFallback") @Nullable Boolean useYarnRMJobStatusFallback + final @JsonProperty("useYarnRMJobStatusFallback") @Nullable Boolean useYarnRMJobStatusFallback, + final @JsonProperty("awaitSegmentAvailabilityTimeoutMillis") @Nullable Long awaitSegmentAvailabilityTimeoutMillis ) { this.workingPath = workingPath; @@ -176,6 +179,12 @@ public HadoopTuningConfig( this.logParseExceptions = logParseExceptions == null ? TuningConfig.DEFAULT_LOG_PARSE_EXCEPTIONS : logParseExceptions; this.useYarnRMJobStatusFallback = useYarnRMJobStatusFallback == null ? true : useYarnRMJobStatusFallback; + + if (awaitSegmentAvailabilityTimeoutMillis == null || awaitSegmentAvailabilityTimeoutMillis < 0) { + this.awaitSegmentAvailabilityTimeoutMillis = DEFAULT_AWAIT_SEGMENT_AVAILABILITY_TIMEOUT_MILLIS; + } else { + this.awaitSegmentAvailabilityTimeoutMillis = awaitSegmentAvailabilityTimeoutMillis; + } } @Nullable @@ -334,6 +343,12 @@ public boolean isUseYarnRMJobStatusFallback() return useYarnRMJobStatusFallback; } + @JsonProperty + public long getAwaitSegmentAvailabilityTimeoutMillis() + { + return awaitSegmentAvailabilityTimeoutMillis; + } + public HadoopTuningConfig withWorkingPath(String path) { return new HadoopTuningConfig( @@ -361,7 +376,8 @@ public HadoopTuningConfig withWorkingPath(String path) allowedHadoopPrefix, logParseExceptions, maxParseExceptions, - useYarnRMJobStatusFallback + useYarnRMJobStatusFallback, + awaitSegmentAvailabilityTimeoutMillis ); } @@ -392,7 +408,8 @@ public HadoopTuningConfig withVersion(String ver) allowedHadoopPrefix, logParseExceptions, maxParseExceptions, - useYarnRMJobStatusFallback + useYarnRMJobStatusFallback, + awaitSegmentAvailabilityTimeoutMillis ); } @@ -423,7 +440,8 @@ public HadoopTuningConfig withShardSpecs(Map> specs allowedHadoopPrefix, logParseExceptions, maxParseExceptions, - useYarnRMJobStatusFallback + useYarnRMJobStatusFallback, + awaitSegmentAvailabilityTimeoutMillis ); } } diff --git a/indexing-hadoop/src/test/java/org/apache/druid/indexer/BatchDeltaIngestionTest.java b/indexing-hadoop/src/test/java/org/apache/druid/indexer/BatchDeltaIngestionTest.java index 644ae240262d..404d5ed67b8f 100644 --- a/indexing-hadoop/src/test/java/org/apache/druid/indexer/BatchDeltaIngestionTest.java +++ b/indexing-hadoop/src/test/java/org/apache/druid/indexer/BatchDeltaIngestionTest.java @@ -481,6 +481,7 @@ private HadoopDruidIndexerConfig makeHadoopDruidIndexerConfig( null, null, null, + null, null ) ) diff --git a/indexing-hadoop/src/test/java/org/apache/druid/indexer/DetermineHashedPartitionsJobTest.java b/indexing-hadoop/src/test/java/org/apache/druid/indexer/DetermineHashedPartitionsJobTest.java index a127d519cc2b..1bb672cb36c4 100644 --- a/indexing-hadoop/src/test/java/org/apache/druid/indexer/DetermineHashedPartitionsJobTest.java +++ b/indexing-hadoop/src/test/java/org/apache/druid/indexer/DetermineHashedPartitionsJobTest.java @@ -235,6 +235,7 @@ public DetermineHashedPartitionsJobTest( null, null, null, + null, null ) ); diff --git a/indexing-hadoop/src/test/java/org/apache/druid/indexer/DeterminePartitionsJobTest.java b/indexing-hadoop/src/test/java/org/apache/druid/indexer/DeterminePartitionsJobTest.java index 047c1049c0ff..b73b47cacc1b 100644 --- a/indexing-hadoop/src/test/java/org/apache/druid/indexer/DeterminePartitionsJobTest.java +++ b/indexing-hadoop/src/test/java/org/apache/druid/indexer/DeterminePartitionsJobTest.java @@ -344,6 +344,7 @@ public DeterminePartitionsJobTest( null, null, null, + null, null ) ) diff --git a/indexing-hadoop/src/test/java/org/apache/druid/indexer/HadoopDruidIndexerConfigTest.java b/indexing-hadoop/src/test/java/org/apache/druid/indexer/HadoopDruidIndexerConfigTest.java index 9eabd4187a32..86e35d08ab9d 100644 --- a/indexing-hadoop/src/test/java/org/apache/druid/indexer/HadoopDruidIndexerConfigTest.java +++ b/indexing-hadoop/src/test/java/org/apache/druid/indexer/HadoopDruidIndexerConfigTest.java @@ -257,6 +257,7 @@ HadoopIngestionSpec build() null, null, null, + null, null ); diff --git a/indexing-hadoop/src/test/java/org/apache/druid/indexer/HadoopTuningConfigTest.java b/indexing-hadoop/src/test/java/org/apache/druid/indexer/HadoopTuningConfigTest.java index 277f590cecc7..230c6c0a949d 100644 --- a/indexing-hadoop/src/test/java/org/apache/druid/indexer/HadoopTuningConfigTest.java +++ b/indexing-hadoop/src/test/java/org/apache/druid/indexer/HadoopTuningConfigTest.java @@ -63,6 +63,7 @@ public void testSerde() throws Exception null, null, null, + null, null ); @@ -86,6 +87,7 @@ public void testSerde() throws Exception Assert.assertEquals(0, actual.getNumBackgroundPersistThreads()); Assert.assertEquals(true, actual.isForceExtendableShardSpecs()); Assert.assertEquals(true, actual.isUseExplicitVersion()); + Assert.assertEquals(0, actual.getAwaitSegmentAvailabilityTimeoutMillis()); } public static T jsonReadWriteRead(String s, Class klass) diff --git a/indexing-hadoop/src/test/java/org/apache/druid/indexer/IndexGeneratorJobTest.java b/indexing-hadoop/src/test/java/org/apache/druid/indexer/IndexGeneratorJobTest.java index 8d59554909b6..9c4a65f4c015 100644 --- a/indexing-hadoop/src/test/java/org/apache/druid/indexer/IndexGeneratorJobTest.java +++ b/indexing-hadoop/src/test/java/org/apache/druid/indexer/IndexGeneratorJobTest.java @@ -547,6 +547,7 @@ public void setUp() throws Exception null, null, null, + null, null ) ) diff --git a/indexing-hadoop/src/test/java/org/apache/druid/indexer/JobHelperTest.java b/indexing-hadoop/src/test/java/org/apache/druid/indexer/JobHelperTest.java index 3188762bc0b0..9115eddfc086 100644 --- a/indexing-hadoop/src/test/java/org/apache/druid/indexer/JobHelperTest.java +++ b/indexing-hadoop/src/test/java/org/apache/druid/indexer/JobHelperTest.java @@ -187,6 +187,7 @@ public void setup() throws Exception null, null, null, + null, null ) ) diff --git a/indexing-hadoop/src/test/java/org/apache/druid/indexer/path/GranularityPathSpecTest.java b/indexing-hadoop/src/test/java/org/apache/druid/indexer/path/GranularityPathSpecTest.java index 68505be91cf1..8fc3e6c28084 100644 --- a/indexing-hadoop/src/test/java/org/apache/druid/indexer/path/GranularityPathSpecTest.java +++ b/indexing-hadoop/src/test/java/org/apache/druid/indexer/path/GranularityPathSpecTest.java @@ -79,6 +79,7 @@ public class GranularityPathSpecTest null, null, null, + null, null ); diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/IngestionStatsAndErrorsTaskReportData.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/IngestionStatsAndErrorsTaskReportData.java index 3b7b780561ef..bb149e54b50f 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/IngestionStatsAndErrorsTaskReportData.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/IngestionStatsAndErrorsTaskReportData.java @@ -41,17 +41,22 @@ public class IngestionStatsAndErrorsTaskReportData @Nullable private String errorMsg; + @JsonProperty + private boolean segmentAvailabilityConfirmed; + public IngestionStatsAndErrorsTaskReportData( @JsonProperty("ingestionState") IngestionState ingestionState, @JsonProperty("unparseableEvents") Map unparseableEvents, @JsonProperty("rowStats") Map rowStats, - @JsonProperty("errorMsg") @Nullable String errorMsg + @JsonProperty("errorMsg") @Nullable String errorMsg, + @JsonProperty("segmentAvailabilityConfirmed") boolean segmentAvailabilityConfirmed ) { this.ingestionState = ingestionState; this.unparseableEvents = unparseableEvents; this.rowStats = rowStats; this.errorMsg = errorMsg; + this.segmentAvailabilityConfirmed = segmentAvailabilityConfirmed; } @JsonProperty @@ -79,6 +84,12 @@ public String getErrorMsg() return errorMsg; } + @JsonProperty + public boolean isSegmentAvailabilityConfirmed() + { + return segmentAvailabilityConfirmed; + } + public static IngestionStatsAndErrorsTaskReportData getPayloadFromTaskReports( Map taskReports ) @@ -100,13 +111,20 @@ public boolean equals(Object o) return getIngestionState() == that.getIngestionState() && Objects.equals(getUnparseableEvents(), that.getUnparseableEvents()) && Objects.equals(getRowStats(), that.getRowStats()) && - Objects.equals(getErrorMsg(), that.getErrorMsg()); + Objects.equals(getErrorMsg(), that.getErrorMsg()) && + Objects.equals(isSegmentAvailabilityConfirmed(), that.isSegmentAvailabilityConfirmed()); } @Override public int hashCode() { - return Objects.hash(getIngestionState(), getUnparseableEvents(), getRowStats(), getErrorMsg()); + return Objects.hash( + getIngestionState(), + getUnparseableEvents(), + getRowStats(), + getErrorMsg(), + isSegmentAvailabilityConfirmed() + ); } @Override @@ -117,6 +135,7 @@ public String toString() ", unparseableEvents=" + unparseableEvents + ", rowStats=" + rowStats + ", errorMsg='" + errorMsg + '\'' + + ", segmentAvailabilityConfirmed=" + segmentAvailabilityConfirmed + '}'; } } diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/TaskToolbox.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/TaskToolbox.java index cbdfc68975da..bc1e00b3d04e 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/TaskToolbox.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/TaskToolbox.java @@ -49,6 +49,7 @@ import org.apache.druid.query.QueryRunnerFactoryConglomerate; import org.apache.druid.segment.IndexIO; import org.apache.druid.segment.IndexMergerV9; +import org.apache.druid.segment.handoff.SegmentHandoffNotifierFactory; import org.apache.druid.segment.incremental.RowIngestionMetersFactory; import org.apache.druid.segment.join.JoinableFactory; import org.apache.druid.segment.loading.DataSegmentArchiver; @@ -59,7 +60,6 @@ import org.apache.druid.segment.loading.SegmentLoadingException; import org.apache.druid.segment.realtime.appenderator.AppenderatorsManager; import org.apache.druid.segment.realtime.firehose.ChatHandlerProvider; -import org.apache.druid.segment.realtime.plumber.SegmentHandoffNotifierFactory; import org.apache.druid.server.DruidNode; import org.apache.druid.server.coordination.DataSegmentAnnouncer; import org.apache.druid.server.coordination.DataSegmentServerAnnouncer; diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/TaskToolboxFactory.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/TaskToolboxFactory.java index 2ef1f88ac0a7..5112fa9f4eb5 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/TaskToolboxFactory.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/TaskToolboxFactory.java @@ -47,6 +47,7 @@ import org.apache.druid.query.QueryRunnerFactoryConglomerate; import org.apache.druid.segment.IndexIO; import org.apache.druid.segment.IndexMergerV9; +import org.apache.druid.segment.handoff.SegmentHandoffNotifierFactory; import org.apache.druid.segment.incremental.RowIngestionMetersFactory; import org.apache.druid.segment.join.JoinableFactory; import org.apache.druid.segment.loading.DataSegmentArchiver; @@ -55,7 +56,6 @@ import org.apache.druid.segment.loading.DataSegmentPusher; import org.apache.druid.segment.realtime.appenderator.AppenderatorsManager; import org.apache.druid.segment.realtime.firehose.ChatHandlerProvider; -import org.apache.druid.segment.realtime.plumber.SegmentHandoffNotifierFactory; import org.apache.druid.server.DruidNode; import org.apache.druid.server.coordination.DataSegmentAnnouncer; import org.apache.druid.server.coordination.DataSegmentServerAnnouncer; diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AbstractBatchIndexTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AbstractBatchIndexTask.java index a316ce8d7867..2159fe0888c1 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AbstractBatchIndexTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AbstractBatchIndexTask.java @@ -49,7 +49,9 @@ import org.apache.druid.java.util.common.granularity.Granularity; import org.apache.druid.java.util.common.granularity.GranularityType; import org.apache.druid.java.util.common.logger.Logger; +import org.apache.druid.query.SegmentDescriptor; import org.apache.druid.query.aggregation.AggregatorFactory; +import org.apache.druid.segment.handoff.SegmentHandoffNotifier; import org.apache.druid.segment.incremental.ParseExceptionHandler; import org.apache.druid.segment.incremental.RowIngestionMeters; import org.apache.druid.segment.indexing.DataSchema; @@ -74,6 +76,7 @@ import java.util.List; import java.util.Map; import java.util.Set; +import java.util.concurrent.ExecutorService; import java.util.function.BiConsumer; import java.util.function.Consumer; import java.util.function.Function; @@ -89,6 +92,9 @@ public abstract class AbstractBatchIndexTask extends AbstractTask { private static final Logger log = new Logger(AbstractBatchIndexTask.class); + private final Object availabilityCondition = new Object(); + protected boolean segmentAvailabilityConfirmationCompleted = false; + @GuardedBy("this") private final TaskResourceCleaner resourceCloserOnAbnormalExit = new TaskResourceCleaner(); @@ -576,6 +582,74 @@ protected static List findInputSegments( } } + /** + * Wait for segments to become available on the cluster. If waitTimeout is reached, giveup on waiting. This is a + * QoS method that can be used to make Batch Ingest tasks wait to finish until their ingested data is available on + * the cluster. Doing so gives an end user assurance that a Successful task status means their data is available + * for querying. + * + * @param toolbox {@link TaskToolbox} object with for assisting with task work. + * @param segmentsToWaitFor {@link List} of segments to wait for availability. + * @param waitTimeout Millis to wait before giving up + * @return True if all segments became available, otherwise False. + */ + protected boolean waitForSegmentAvailability(TaskToolbox toolbox, ExecutorService exec, List segmentsToWaitFor, long waitTimeout) + { + if (segmentsToWaitFor.isEmpty()) { + log.info("Asked to wait for segments to be available, but I wasn't provided with any segments!?"); + } + log.info("Waiting for segments to be loaded by the cluster..."); + + SegmentHandoffNotifier notifier = toolbox.getSegmentHandoffNotifierFactory() + .createSegmentHandoffNotifier(segmentsToWaitFor.get(0).getDataSource()); + + notifier.start(); + for (DataSegment s : segmentsToWaitFor) { + notifier.registerSegmentHandoffCallback( + new SegmentDescriptor(s.getInterval(), s.getVersion(), s.getShardSpec().getPartitionNum()), + exec, + () -> { + log.info( + "Confirmed availability for [%s]. Removing from list of segments to wait for", + s.getId() + ); + synchronized (segmentsToWaitFor) { + segmentsToWaitFor.remove(s); + } + synchronized (availabilityCondition) { + availabilityCondition.notifyAll(); + } + } + ); + } + + long forceEndWaitTime = System.currentTimeMillis() + waitTimeout; + try { + synchronized (availabilityCondition) { + while (!segmentsToWaitFor.isEmpty()) { + log.info("[%d] segments stil unavailable.", segmentsToWaitFor.size()); + long curr = System.currentTimeMillis(); + if (forceEndWaitTime - curr > 0) { + availabilityCondition.wait(forceEndWaitTime - curr); + } else { + log.warn("Segment Availabilty Wait Timeout. [%d] segments might not have become available for " + + "query", + segmentsToWaitFor.size() + ); + return false; + } + } + } + } + catch (InterruptedException e) { + throw new RuntimeException(e); + } + finally { + notifier.close(); + } + return true; + } + private static class LockGranularityDetermineResult { private final LockGranularity lockGranularity; diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AppenderatorDriverRealtimeIndexTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AppenderatorDriverRealtimeIndexTask.java index 064d2caa3350..9729d37eec38 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AppenderatorDriverRealtimeIndexTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AppenderatorDriverRealtimeIndexTask.java @@ -584,7 +584,8 @@ private Map getTaskCompletionReports() ingestionState, getTaskCompletionUnparseableEvents(), getTaskCompletionRowStats(), - errorMsg + errorMsg, + false ) ) ); diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/CompactionTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/CompactionTask.java index e773b6f95d33..45a5b2c6cd58 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/CompactionTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/CompactionTask.java @@ -243,7 +243,8 @@ static ParallelIndexTuningConfig getTuningConfig(TuningConfig tuningConfig) null, indexTuningConfig.isLogParseExceptions(), indexTuningConfig.getMaxParseExceptions(), - indexTuningConfig.getMaxSavedParseExceptions() + indexTuningConfig.getMaxSavedParseExceptions(), + indexTuningConfig.getAwaitSegmentAvailabilityTimeoutMillis() ); } else { throw new ISE( diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/HadoopIndexTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/HadoopIndexTask.java index 1236e359bd3d..04a08c82a4d3 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/HadoopIndexTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/HadoopIndexTask.java @@ -53,6 +53,7 @@ import org.apache.druid.indexing.hadoop.OverlordActionBasedUsedSegmentsRetriever; import org.apache.druid.java.util.common.JodaUtils; import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.java.util.common.concurrent.Execs; import org.apache.druid.java.util.common.granularity.Granularity; import org.apache.druid.java.util.common.logger.Logger; import org.apache.druid.segment.incremental.RowIngestionMeters; @@ -79,10 +80,12 @@ import java.io.File; import java.lang.reflect.InvocationTargetException; import java.lang.reflect.Method; +import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.SortedSet; +import java.util.concurrent.ExecutorService; public class HadoopIndexTask extends HadoopTask implements ChatHandler { @@ -442,8 +445,29 @@ private TaskStatus runInternal(TaskToolbox toolbox) throws Exception ); if (buildSegmentsStatus.getDataSegments() != null) { - ingestionState = IngestionState.COMPLETED; toolbox.publishSegments(buildSegmentsStatus.getDataSegments()); + + // Try to wait for segments to be loaded by the cluster if the tuning config specifies a non-zero value + // for awaitSegmentAvailabilityTimeoutMillis + if (spec.getTuningConfig().getAwaitSegmentAvailabilityTimeoutMillis() > 0) { + ingestionState = IngestionState.SEGMENT_AVAILABILITY_WAIT; + ArrayList segmentsToWaitFor = new ArrayList<>(buildSegmentsStatus.getDataSegments()); + ExecutorService availabilityExec = + Execs.singleThreaded("HadoopTaskAvailabilityWaitExec"); + try { + segmentAvailabilityConfirmationCompleted = waitForSegmentAvailability( + toolbox, + availabilityExec, + segmentsToWaitFor, + spec.getTuningConfig().getAwaitSegmentAvailabilityTimeoutMillis() + ); + } + finally { + availabilityExec.shutdownNow(); + } + } + + ingestionState = IngestionState.COMPLETED; toolbox.getTaskReportFileWriter().write(getId(), getTaskCompletionReports()); return TaskStatus.success(getId()); } else { @@ -536,7 +560,8 @@ private Map getTaskCompletionReports() ingestionState, null, getTaskCompletionRowStats(), - errorMsg + errorMsg, + segmentAvailabilityConfirmationCompleted ) ) ); diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/IndexTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/IndexTask.java index 69484b54f13a..7361f88ac9a8 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/IndexTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/IndexTask.java @@ -69,6 +69,7 @@ import org.apache.druid.java.util.common.JodaUtils; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.java.util.common.UOE; +import org.apache.druid.java.util.common.concurrent.Execs; import org.apache.druid.java.util.common.granularity.Granularity; import org.apache.druid.java.util.common.guava.Comparators; import org.apache.druid.java.util.common.logger.Logger; @@ -125,6 +126,7 @@ import java.util.SortedSet; import java.util.TreeMap; import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; import java.util.function.Function; @@ -522,7 +524,8 @@ private Map getTaskCompletionReports() ingestionState, getTaskCompletionUnparseableEvents(), getTaskCompletionRowStats(), - errorMsg + errorMsg, + segmentAvailabilityConfirmationCompleted ) ) ); @@ -909,6 +912,26 @@ private TaskStatus generateAndPublishSegments( awaitPublish(driver.publishAll(inputSegments, publisher, annotateFunction), pushTimeout); appenderator.close(); + // Try to wait for segments to be loaded by the cluster if the tuning config specifies a non-zero value + // for awaitSegmentAvailabilityTimeoutMillis + if (tuningConfig.getAwaitSegmentAvailabilityTimeoutMillis() > 0) { + ingestionState = IngestionState.SEGMENT_AVAILABILITY_WAIT; + ArrayList segmentsToWaitFor = new ArrayList<>(published.getSegments()); + ExecutorService availabilityExec = + Execs.singleThreaded("IndexTaskAvailabilityWaitExec"); + try { + segmentAvailabilityConfirmationCompleted = waitForSegmentAvailability( + toolbox, + availabilityExec, + segmentsToWaitFor, + tuningConfig.getAwaitSegmentAvailabilityTimeoutMillis() + ); + } + finally { + availabilityExec.shutdownNow(); + } + } + ingestionState = IngestionState.COMPLETED; if (published == null) { log.error("Failed to publish segments, aborting!"); @@ -1143,6 +1166,7 @@ public static class IndexTuningConfig implements AppenderatorConfig private final boolean logParseExceptions; private final int maxParseExceptions; private final int maxSavedParseExceptions; + private final long awaitSegmentAvailabilityTimeoutMillis; @Nullable private final SegmentWriteOutMediumFactory segmentWriteOutMediumFactory; @@ -1210,7 +1234,8 @@ public IndexTuningConfig( SegmentWriteOutMediumFactory segmentWriteOutMediumFactory, @JsonProperty("logParseExceptions") @Nullable Boolean logParseExceptions, @JsonProperty("maxParseExceptions") @Nullable Integer maxParseExceptions, - @JsonProperty("maxSavedParseExceptions") @Nullable Integer maxSavedParseExceptions + @JsonProperty("maxSavedParseExceptions") @Nullable Integer maxSavedParseExceptions, + @JsonProperty("awaitSegmentAvailabilityTimeoutMillis") @Nullable Long awaitSegmentAvailabilityTimeoutMillis ) { this( @@ -1235,7 +1260,8 @@ public IndexTuningConfig( segmentWriteOutMediumFactory, logParseExceptions, maxParseExceptions, - maxSavedParseExceptions + maxSavedParseExceptions, + awaitSegmentAvailabilityTimeoutMillis ); Preconditions.checkArgument( @@ -1246,7 +1272,7 @@ public IndexTuningConfig( private IndexTuningConfig() { - this(null, null, null, null, null, null, null, null, null, null, null, null, null, null, null); + this(null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null); } private IndexTuningConfig( @@ -1264,7 +1290,8 @@ private IndexTuningConfig( @Nullable SegmentWriteOutMediumFactory segmentWriteOutMediumFactory, @Nullable Boolean logParseExceptions, @Nullable Integer maxParseExceptions, - @Nullable Integer maxSavedParseExceptions + @Nullable Integer maxSavedParseExceptions, + @Nullable Long awaitSegmentAvailabilityTimeoutMillis ) { this.appendableIndexSpec = appendableIndexSpec == null ? DEFAULT_APPENDABLE_INDEX : appendableIndexSpec; @@ -1300,6 +1327,11 @@ private IndexTuningConfig( this.logParseExceptions = logParseExceptions == null ? TuningConfig.DEFAULT_LOG_PARSE_EXCEPTIONS : logParseExceptions; + if (awaitSegmentAvailabilityTimeoutMillis == null || awaitSegmentAvailabilityTimeoutMillis < 0) { + this.awaitSegmentAvailabilityTimeoutMillis = DEFAULT_AWAIT_SEGMENT_AVAILABILITY_TIMEOUT_MILLIS; + } else { + this.awaitSegmentAvailabilityTimeoutMillis = awaitSegmentAvailabilityTimeoutMillis; + } } @Override @@ -1320,7 +1352,8 @@ public IndexTuningConfig withBasePersistDirectory(File dir) segmentWriteOutMediumFactory, logParseExceptions, maxParseExceptions, - maxSavedParseExceptions + maxSavedParseExceptions, + awaitSegmentAvailabilityTimeoutMillis ); } @@ -1341,7 +1374,8 @@ public IndexTuningConfig withPartitionsSpec(PartitionsSpec partitionsSpec) segmentWriteOutMediumFactory, logParseExceptions, maxParseExceptions, - maxSavedParseExceptions + maxSavedParseExceptions, + awaitSegmentAvailabilityTimeoutMillis ); } @@ -1519,6 +1553,12 @@ public Period getIntermediatePersistPeriod() return new Period(Integer.MAX_VALUE); // intermediate persist doesn't make much sense for batch jobs } + @JsonProperty + public long getAwaitSegmentAvailabilityTimeoutMillis() + { + return awaitSegmentAvailabilityTimeoutMillis; + } + @Override public boolean equals(Object o) { @@ -1543,7 +1583,8 @@ public boolean equals(Object o) Objects.equals(indexSpec, that.indexSpec) && Objects.equals(indexSpecForIntermediatePersists, that.indexSpecForIntermediatePersists) && Objects.equals(basePersistDirectory, that.basePersistDirectory) && - Objects.equals(segmentWriteOutMediumFactory, that.segmentWriteOutMediumFactory); + Objects.equals(segmentWriteOutMediumFactory, that.segmentWriteOutMediumFactory) && + Objects.equals(awaitSegmentAvailabilityTimeoutMillis, that.awaitSegmentAvailabilityTimeoutMillis); } @Override @@ -1564,7 +1605,8 @@ public int hashCode() logParseExceptions, maxParseExceptions, maxSavedParseExceptions, - segmentWriteOutMediumFactory + segmentWriteOutMediumFactory, + awaitSegmentAvailabilityTimeoutMillis ); } @@ -1586,6 +1628,7 @@ public String toString() ", maxParseExceptions=" + maxParseExceptions + ", maxSavedParseExceptions=" + maxSavedParseExceptions + ", segmentWriteOutMediumFactory=" + segmentWriteOutMediumFactory + + ", awaitSegmentAvailabilityTimeoutMillis=" + awaitSegmentAvailabilityTimeoutMillis + '}'; } } diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTask.java index 3d272b43813c..0f346e91360e 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTask.java @@ -36,14 +36,18 @@ import org.apache.druid.data.input.FiniteFirehoseFactory; import org.apache.druid.data.input.InputFormat; import org.apache.druid.data.input.InputSource; +import org.apache.druid.indexer.IngestionState; import org.apache.druid.indexer.TaskState; import org.apache.druid.indexer.TaskStatus; import org.apache.druid.indexer.partitions.HashedPartitionsSpec; import org.apache.druid.indexer.partitions.PartitionsSpec; import org.apache.druid.indexer.partitions.SingleDimensionPartitionsSpec; import org.apache.druid.indexing.common.Counters; +import org.apache.druid.indexing.common.IngestionStatsAndErrorsTaskReport; +import org.apache.druid.indexing.common.IngestionStatsAndErrorsTaskReportData; import org.apache.druid.indexing.common.TaskLock; import org.apache.druid.indexing.common.TaskLockType; +import org.apache.druid.indexing.common.TaskReport; import org.apache.druid.indexing.common.TaskToolbox; import org.apache.druid.indexing.common.actions.LockListAction; import org.apache.druid.indexing.common.actions.SegmentTransactionalInsertAction; @@ -66,6 +70,7 @@ import org.apache.druid.java.util.common.IAE; import org.apache.druid.java.util.common.ISE; import org.apache.druid.java.util.common.Pair; +import org.apache.druid.java.util.common.concurrent.Execs; import org.apache.druid.java.util.common.granularity.Granularity; import org.apache.druid.java.util.common.logger.Logger; import org.apache.druid.segment.indexing.TuningConfig; @@ -111,6 +116,7 @@ import java.util.Set; import java.util.SortedSet; import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ExecutorService; import java.util.concurrent.ThreadLocalRandom; import java.util.concurrent.atomic.AtomicInteger; import java.util.function.BiFunction; @@ -491,6 +497,31 @@ private boolean isParallelMode() return isParallelMode(baseInputSource, ingestionSchema.getTuningConfig()); } + /** + * Attempt to wait for indexed segments to become available on the cluster. + * @param reportsMap Map containing information with published segments that we are going to wait for. + */ + private void waitForSegmentAvailability(Map reportsMap) + { + ArrayList segmentsToWaitFor = new ArrayList<>(); + reportsMap.values() + .forEach(report -> { + segmentsToWaitFor.addAll(report.getNewSegments()); + }); + ExecutorService availabilityExec = Execs.singleThreaded("ParallelTaskAvailabilityWaitExec"); + try { + segmentAvailabilityConfirmationCompleted = waitForSegmentAvailability( + toolbox, + availabilityExec, + segmentsToWaitFor, + ingestionSchema.getTuningConfig().getAwaitSegmentAvailabilityTimeoutMillis() + ); + } + finally { + availabilityExec.shutdownNow(); + } + } + /** * Run the single phase parallel indexing for best-effort rollup. In this mode, each sub task created by * the supervisor task reads data and generates segments individually. @@ -506,8 +537,16 @@ private TaskStatus runSinglePhaseParallel(TaskToolbox toolbox) throws Exception if (state.isSuccess()) { //noinspection ConstantConditions publishSegments(toolbox, runner.getReports()); + if (ingestionSchema.getTuningConfig().getAwaitSegmentAvailabilityTimeoutMillis() > 0) { + waitForSegmentAvailability(runner.getReports()); + } } - return TaskStatus.fromCode(getId(), state); + TaskStatus taskStatus = TaskStatus.fromCode(getId(), state); + toolbox.getTaskReportFileWriter().write( + getId(), + getTaskCompletionReports(taskStatus, segmentAvailabilityConfirmationCompleted) + ); + return taskStatus; } /** @@ -644,9 +683,17 @@ private TaskStatus runHashPartitionMultiPhaseParallel(TaskToolbox toolbox) throw if (state.isSuccess()) { //noinspection ConstantConditions publishSegments(toolbox, mergeRunner.getReports()); + if (ingestionSchema.getTuningConfig().getAwaitSegmentAvailabilityTimeoutMillis() > 0) { + waitForSegmentAvailability(mergeRunner.getReports()); + } } - return TaskStatus.fromCode(getId(), state); + TaskStatus taskStatus = TaskStatus.fromCode(getId(), state); + toolbox.getTaskReportFileWriter().write( + getId(), + getTaskCompletionReports(taskStatus, segmentAvailabilityConfirmationCompleted) + ); + return taskStatus; } private TaskStatus runRangePartitionMultiPhaseParallel(TaskToolbox toolbox) throws Exception @@ -706,9 +753,17 @@ private TaskStatus runRangePartitionMultiPhaseParallel(TaskToolbox toolbox) thro TaskState mergeState = runNextPhase(mergeRunner); if (mergeState.isSuccess()) { publishSegments(toolbox, mergeRunner.getReports()); + if (ingestionSchema.getTuningConfig().getAwaitSegmentAvailabilityTimeoutMillis() > 0) { + waitForSegmentAvailability(mergeRunner.getReports()); + } } - return TaskStatus.fromCode(getId(), mergeState); + TaskStatus taskStatus = TaskStatus.fromCode(getId(), mergeState); + toolbox.getTaskReportFileWriter().write( + getId(), + getTaskCompletionReports(taskStatus, segmentAvailabilityConfirmationCompleted) + ); + return taskStatus; } private static Map mergeCardinalityReports(Collection reports) @@ -968,6 +1023,30 @@ private TaskStatus runSequential(TaskToolbox toolbox) throws Exception } } + /** + * Generate an IngestionStatsAndErrorsTaskReport for the task. + * + * @param taskStatus {@link TaskStatus} + * @param segmentAvailabilityConfirmed Whether or not the segments were confirmed to be available for query when + * when the task completed. + * @return + */ + private Map getTaskCompletionReports(TaskStatus taskStatus, boolean segmentAvailabilityConfirmed) + { + return TaskReport.buildTaskReports( + new IngestionStatsAndErrorsTaskReport( + getId(), + new IngestionStatsAndErrorsTaskReportData( + IngestionState.COMPLETED, + new HashMap<>(), + new HashMap<>(), + taskStatus.getErrorMsg(), + segmentAvailabilityConfirmed + ) + ) + ); + } + private static IndexTuningConfig convertToIndexTuningConfig(ParallelIndexTuningConfig tuningConfig) { return new IndexTuningConfig( @@ -991,7 +1070,8 @@ private static IndexTuningConfig convertToIndexTuningConfig(ParallelIndexTuningC tuningConfig.getSegmentWriteOutMediumFactory(), tuningConfig.isLogParseExceptions(), tuningConfig.getMaxParseExceptions(), - tuningConfig.getMaxSavedParseExceptions() + tuningConfig.getMaxSavedParseExceptions(), + tuningConfig.getAwaitSegmentAvailabilityTimeoutMillis() ); } diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexTuningConfig.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexTuningConfig.java index ca7fdf13ffed..64375dc15e00 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexTuningConfig.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexTuningConfig.java @@ -99,6 +99,7 @@ public static ParallelIndexTuningConfig defaultConfig() null, null, null, + null, null ); } @@ -131,7 +132,8 @@ public ParallelIndexTuningConfig( @JsonProperty("totalNumMergeTasks") @Nullable Integer totalNumMergeTasks, @JsonProperty("logParseExceptions") @Nullable Boolean logParseExceptions, @JsonProperty("maxParseExceptions") @Nullable Integer maxParseExceptions, - @JsonProperty("maxSavedParseExceptions") @Nullable Integer maxSavedParseExceptions + @JsonProperty("maxSavedParseExceptions") @Nullable Integer maxSavedParseExceptions, + @JsonProperty("awaitSegmentAvailabilityTimeoutMillis") @Nullable Long awaitSegmentAvailabilityTimeoutMillis ) { super( @@ -155,7 +157,8 @@ public ParallelIndexTuningConfig( segmentWriteOutMediumFactory, logParseExceptions, maxParseExceptions, - maxSavedParseExceptions + maxSavedParseExceptions, + awaitSegmentAvailabilityTimeoutMillis ); if (maxNumSubTasks != null && maxNumConcurrentSubTasks != null) { @@ -276,7 +279,8 @@ public ParallelIndexTuningConfig withPartitionsSpec(PartitionsSpec partitionsSpe getTotalNumMergeTasks(), isLogParseExceptions(), getMaxParseExceptions(), - getMaxSavedParseExceptions() + getMaxSavedParseExceptions(), + getAwaitSegmentAvailabilityTimeoutMillis() ); } diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/SeekableStreamIndexTaskRunner.java b/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/SeekableStreamIndexTaskRunner.java index 2a2bf2d7f05f..a283c5ae9061 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/SeekableStreamIndexTaskRunner.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/SeekableStreamIndexTaskRunner.java @@ -1058,7 +1058,8 @@ private Map getTaskCompletionReports(@Nullable String errorM ingestionState, getTaskCompletionUnparseableEvents(), getTaskCompletionRowStats(), - errorMsg + errorMsg, + false ) ) ); diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/TaskToolboxTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/TaskToolboxTest.java index 3dc99883998c..227ddb7914c4 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/TaskToolboxTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/TaskToolboxTest.java @@ -37,6 +37,7 @@ import org.apache.druid.query.QueryRunnerFactoryConglomerate; import org.apache.druid.segment.IndexIO; import org.apache.druid.segment.IndexMergerV9; +import org.apache.druid.segment.handoff.SegmentHandoffNotifierFactory; import org.apache.druid.segment.join.NoopJoinableFactory; import org.apache.druid.segment.loading.DataSegmentArchiver; import org.apache.druid.segment.loading.DataSegmentKiller; @@ -45,7 +46,6 @@ import org.apache.druid.segment.loading.SegmentLoaderLocalCacheManager; import org.apache.druid.segment.loading.SegmentLoadingException; import org.apache.druid.segment.realtime.firehose.NoopChatHandlerProvider; -import org.apache.druid.segment.realtime.plumber.SegmentHandoffNotifierFactory; import org.apache.druid.server.DruidNode; import org.apache.druid.server.coordination.DataSegmentAnnouncer; import org.apache.druid.server.coordination.DataSegmentServerAnnouncer; diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/AppenderatorDriverRealtimeIndexTaskTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/AppenderatorDriverRealtimeIndexTaskTest.java index bfe7f40c51c5..000407e7f3dd 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/AppenderatorDriverRealtimeIndexTaskTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/AppenderatorDriverRealtimeIndexTaskTest.java @@ -109,14 +109,14 @@ import org.apache.druid.query.timeseries.TimeseriesQueryRunnerFactory; import org.apache.druid.query.timeseries.TimeseriesResultValue; import org.apache.druid.segment.TestHelper; +import org.apache.druid.segment.handoff.SegmentHandoffNotifier; +import org.apache.druid.segment.handoff.SegmentHandoffNotifierFactory; import org.apache.druid.segment.incremental.RowIngestionMeters; import org.apache.druid.segment.indexing.DataSchema; import org.apache.druid.segment.indexing.RealtimeIOConfig; import org.apache.druid.segment.indexing.granularity.UniformGranularitySpec; import org.apache.druid.segment.join.NoopJoinableFactory; import org.apache.druid.segment.realtime.firehose.NoopChatHandlerProvider; -import org.apache.druid.segment.realtime.plumber.SegmentHandoffNotifier; -import org.apache.druid.segment.realtime.plumber.SegmentHandoffNotifierFactory; import org.apache.druid.segment.transform.ExpressionTransform; import org.apache.druid.segment.transform.TransformSpec; import org.apache.druid.server.DruidNode; diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/ClientCompactionTaskQuerySerdeTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/ClientCompactionTaskQuerySerdeTest.java index 0daffc66bbe8..a25ae37648ae 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/ClientCompactionTaskQuerySerdeTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/ClientCompactionTaskQuerySerdeTest.java @@ -238,6 +238,7 @@ public void testCompactionTaskToClientCompactionTaskQuery() throws IOException 100, null, null, + null, null ) ) diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskRunTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskRunTest.java index 1fce094d110f..b466f3546c47 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskRunTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskRunTest.java @@ -298,6 +298,7 @@ public void testRunWithHashPartitioning() throws Exception null, null, null, + null, null ) ) diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskTest.java index 14c0840be4bd..2e056034b4be 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskTest.java @@ -334,6 +334,7 @@ private static ParallelIndexTuningConfig createTuningConfig() null, null, null, + null, null ); } @@ -462,6 +463,7 @@ public void testSerdeWithOldTuningConfigSuccessfullyDeserializeToNewOne() throws null, null, null, + null, null ), null, @@ -614,6 +616,7 @@ public void testCreateIngestionSchemaWithTargetPartitionSize() throws IOExceptio null, null, null, + null, null ); final List ingestionSpecs = CompactionTask.createIngestionSchema( @@ -682,6 +685,7 @@ public void testCreateIngestionSchemaWithMaxTotalRows() throws IOException, Segm null, null, null, + null, null ); final List ingestionSpecs = CompactionTask.createIngestionSchema( @@ -750,6 +754,7 @@ public void testCreateIngestionSchemaWithNumShards() throws IOException, Segment null, null, null, + null, null ); final List ingestionSpecs = CompactionTask.createIngestionSchema( @@ -1139,6 +1144,7 @@ private void assertIngestionSchema( null, null, null, + null, null ), expectedSegmentGranularity diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IndexTaskSerdeTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IndexTaskSerdeTest.java index bf2e44428f29..7c19ad16abc9 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IndexTaskSerdeTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IndexTaskSerdeTest.java @@ -82,7 +82,8 @@ public void testSerdeTuningConfigWithDynamicPartitionsSpec() throws IOException OffHeapMemorySegmentWriteOutMediumFactory.instance(), true, 10, - 100 + 100, + null ); assertSerdeTuningConfig(tuningConfig); } @@ -116,7 +117,8 @@ public void testSerdeTuningConfigWithHashedPartitionsSpec() throws IOException OffHeapMemorySegmentWriteOutMediumFactory.instance(), true, 10, - 100 + 100, + -1L ); assertSerdeTuningConfig(tuningConfig); } @@ -150,7 +152,8 @@ public void testSerdeTuningConfigWithDeprecatedDynamicPartitionsSpec() throws IO OffHeapMemorySegmentWriteOutMediumFactory.instance(), true, 10, - 100 + 100, + 1L ); assertSerdeTuningConfig(tuningConfig); } @@ -184,7 +187,8 @@ public void testSerdeTuningConfigWithDeprecatedHashedPartitionsSpec() throws IOE OffHeapMemorySegmentWriteOutMediumFactory.instance(), true, 10, - 100 + 100, + null ); assertSerdeTuningConfig(tuningConfig); } @@ -220,7 +224,8 @@ public void testForceGuaranteedRollupWithDynamicPartitionsSpec() OffHeapMemorySegmentWriteOutMediumFactory.instance(), true, 10, - 100 + 100, + null ); } @@ -255,7 +260,8 @@ public void testBestEffortRollupWithHashedPartitionsSpec() OffHeapMemorySegmentWriteOutMediumFactory.instance(), true, 10, - 100 + 100, + null ); } diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IndexTaskTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IndexTaskTest.java index a04e94b17e00..2baece1be254 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IndexTaskTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IndexTaskTest.java @@ -1135,7 +1135,8 @@ public void testMultipleParseExceptionsSuccess() throws Exception null, true, 7, - 7 + 7, + null ); final TimestampSpec timestampSpec = new TimestampSpec("time", "auto", null); @@ -1264,7 +1265,8 @@ public void testMultipleParseExceptionsFailure() throws Exception null, true, 2, - 5 + 5, + null ); final TimestampSpec timestampSpec = new TimestampSpec("time", "auto", null); @@ -1385,7 +1387,8 @@ public void testMultipleParseExceptionsFailureAtDeterminePartitions() throws Exc null, true, 2, - 5 + 5, + null ); final TimestampSpec timestampSpec = new TimestampSpec("time", "auto", null); @@ -1830,7 +1833,8 @@ static IndexTuningConfig createTuningConfig( null, null, null, - 1 + 1, + null ); } diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/RealtimeIndexTaskTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/RealtimeIndexTaskTest.java index 2038441acfa7..3e284077a3ce 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/RealtimeIndexTaskTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/RealtimeIndexTaskTest.java @@ -97,6 +97,8 @@ import org.apache.druid.query.timeseries.TimeseriesQueryRunnerFactory; import org.apache.druid.query.timeseries.TimeseriesResultValue; import org.apache.druid.segment.TestHelper; +import org.apache.druid.segment.handoff.SegmentHandoffNotifier; +import org.apache.druid.segment.handoff.SegmentHandoffNotifierFactory; import org.apache.druid.segment.indexing.DataSchema; import org.apache.druid.segment.indexing.RealtimeIOConfig; import org.apache.druid.segment.indexing.RealtimeTuningConfig; @@ -104,8 +106,6 @@ import org.apache.druid.segment.join.NoopJoinableFactory; import org.apache.druid.segment.realtime.FireDepartment; import org.apache.druid.segment.realtime.firehose.NoopChatHandlerProvider; -import org.apache.druid.segment.realtime.plumber.SegmentHandoffNotifier; -import org.apache.druid.segment.realtime.plumber.SegmentHandoffNotifierFactory; import org.apache.druid.segment.realtime.plumber.ServerTimeRejectionPolicyFactory; import org.apache.druid.segment.transform.ExpressionTransform; import org.apache.druid.segment.transform.TransformSpec; diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/TaskReportSerdeTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/TaskReportSerdeTest.java index d133b3ac8da2..f00df38c40d7 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/TaskReportSerdeTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/TaskReportSerdeTest.java @@ -55,7 +55,8 @@ public void testSerde() throws Exception ImmutableMap.of( "number", 1234 ), - "an error message" + "an error message", + false ) ); String report1serialized = jsonMapper.writeValueAsString(report1); diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/TaskSerdeTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/TaskSerdeTest.java index aa3d33bf2506..0c85410a4e94 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/TaskSerdeTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/TaskSerdeTest.java @@ -267,7 +267,8 @@ public void testIndexTaskSerde() throws Exception null, null, null, - null + null, + 1L ) ), null @@ -348,6 +349,7 @@ public void testIndexTaskwithResourceSerde() throws Exception null, null, null, + null, null ) ), diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/AbstractParallelIndexSupervisorTaskTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/AbstractParallelIndexSupervisorTaskTest.java index 45b0f383b110..4d638a9204a4 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/AbstractParallelIndexSupervisorTaskTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/AbstractParallelIndexSupervisorTaskTest.java @@ -167,6 +167,7 @@ public class AbstractParallelIndexSupervisorTaskTest extends IngestionTestBase null, null, null, + null, null ); @@ -248,6 +249,7 @@ protected ParallelIndexTuningConfig newTuningConfig( null, null, null, + null, null ); } diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTaskKillTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTaskKillTest.java index fc193b0e1321..ee231b50d780 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTaskKillTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTaskKillTest.java @@ -188,6 +188,7 @@ private ParallelIndexSupervisorTask newTask( null, null, null, + null, null ) ); diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTaskResourceTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTaskResourceTest.java index be6b0a86737e..a4937774eebf 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTaskResourceTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTaskResourceTest.java @@ -437,6 +437,7 @@ private TestSupervisorTask newTask( null, null, null, + null, null ) ); diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTaskSerdeTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTaskSerdeTest.java index b4a712580062..70c34f9b94c8 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTaskSerdeTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTaskSerdeTest.java @@ -271,6 +271,7 @@ ParallelIndexIngestionSpec build() null, null, null, + null, null ); diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTaskTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTaskTest.java index 60882358cb1f..46663c6fba12 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTaskTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTaskTest.java @@ -224,6 +224,7 @@ public void testFailToConstructWhenBothAppendToExistingAndForceGuaranteedRollupA null, false, null, + null, null ); final ParallelIndexIngestionSpec indexIngestionSpec = new ParallelIndexIngestionSpec( diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexTestingFactory.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexTestingFactory.java index c2f712320b91..aacb6b0111fb 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexTestingFactory.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexTestingFactory.java @@ -182,7 +182,8 @@ ParallelIndexTuningConfig build() 22, logParseExceptions, maxParseExceptions, - 25 + 25, + null ); } } diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexTuningConfigTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexTuningConfigTest.java index a826ae9cd42d..cf862ff6a9e7 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexTuningConfigTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexTuningConfigTest.java @@ -99,6 +99,7 @@ public void testSerdeWithMaxRowsPerSegment() null, false, null, + null, null ); final byte[] json = mapper.writeValueAsBytes(tuningConfig); @@ -142,6 +143,7 @@ public void testSerdeWithMaxNumConcurrentSubTasks() throws IOException null, false, null, + null, null ); final byte[] json = mapper.writeValueAsBytes(tuningConfig); @@ -185,6 +187,7 @@ public void testSerdeWithMaxNumSubTasks() throws IOException null, false, null, + null, null ); final byte[] json = mapper.writeValueAsBytes(tuningConfig); @@ -230,6 +233,7 @@ public void testSerdeWithMaxNumSubTasksAndMaxNumConcurrentSubTasks() null, false, null, + null, null ); } @@ -272,6 +276,7 @@ public void testConstructorWithHashedPartitionsSpecAndNonForceGuaranteedRollupFa null, false, null, + null, null ); } @@ -314,6 +319,7 @@ public void testConstructorWithSingleDimensionPartitionsSpecAndNonForceGuarantee null, false, null, + null, null ); } @@ -356,6 +362,7 @@ public void testConstructorWithDynamicPartitionsSpecAndForceGuaranteedRollupFail null, false, null, + null, null ); } diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/SinglePhaseParallelIndexingTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/SinglePhaseParallelIndexingTest.java index e444f8091839..d27c19bc1e19 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/SinglePhaseParallelIndexingTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/SinglePhaseParallelIndexingTest.java @@ -362,6 +362,7 @@ public void testWith1MaxNumConcurrentSubTasks() null, null, null, + null, null ) ); diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/firehose/IngestSegmentFirehoseFactoryTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/firehose/IngestSegmentFirehoseFactoryTest.java index 6ab41617614c..ca375610b5ae 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/firehose/IngestSegmentFirehoseFactoryTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/firehose/IngestSegmentFirehoseFactoryTest.java @@ -71,12 +71,12 @@ import org.apache.druid.segment.IndexSpec; import org.apache.druid.segment.TestHelper; import org.apache.druid.segment.column.ColumnHolder; +import org.apache.druid.segment.handoff.SegmentHandoffNotifierFactory; import org.apache.druid.segment.incremental.IncrementalIndex; import org.apache.druid.segment.incremental.IncrementalIndexSchema; import org.apache.druid.segment.loading.LocalDataSegmentPuller; import org.apache.druid.segment.loading.LocalLoadSpec; import org.apache.druid.segment.realtime.firehose.CombiningFirehoseFactory; -import org.apache.druid.segment.realtime.plumber.SegmentHandoffNotifierFactory; import org.apache.druid.segment.transform.ExpressionTransform; import org.apache.druid.segment.transform.TransformSpec; import org.apache.druid.server.metrics.NoopServiceEmitter; diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/firehose/IngestSegmentFirehoseFactoryTimelineTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/firehose/IngestSegmentFirehoseFactoryTimelineTest.java index 06ebc56e9ec7..afe866f849c4 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/firehose/IngestSegmentFirehoseFactoryTimelineTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/firehose/IngestSegmentFirehoseFactoryTimelineTest.java @@ -48,10 +48,10 @@ import org.apache.druid.segment.IndexIO; import org.apache.druid.segment.IndexMergerV9; import org.apache.druid.segment.IndexSpec; +import org.apache.druid.segment.handoff.SegmentHandoffNotifierFactory; import org.apache.druid.segment.incremental.IncrementalIndex; import org.apache.druid.segment.incremental.IncrementalIndexSchema; import org.apache.druid.segment.incremental.IndexSizeExceededException; -import org.apache.druid.segment.realtime.plumber.SegmentHandoffNotifierFactory; import org.apache.druid.segment.transform.TransformSpec; import org.apache.druid.timeline.DataSegment; import org.apache.druid.timeline.SegmentId; diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskLifecycleTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskLifecycleTest.java index 48f75d7a82c3..4ef816b3e748 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskLifecycleTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskLifecycleTest.java @@ -113,6 +113,8 @@ import org.apache.druid.segment.IndexMergerV9; import org.apache.druid.segment.IndexSpec; import org.apache.druid.segment.TestHelper; +import org.apache.druid.segment.handoff.SegmentHandoffNotifier; +import org.apache.druid.segment.handoff.SegmentHandoffNotifierFactory; import org.apache.druid.segment.indexing.DataSchema; import org.apache.druid.segment.indexing.RealtimeIOConfig; import org.apache.druid.segment.indexing.RealtimeTuningConfig; @@ -128,8 +130,6 @@ import org.apache.druid.segment.realtime.appenderator.AppenderatorsManager; import org.apache.druid.segment.realtime.appenderator.UnifiedIndexerAppenderatorsManager; import org.apache.druid.segment.realtime.firehose.NoopChatHandlerProvider; -import org.apache.druid.segment.realtime.plumber.SegmentHandoffNotifier; -import org.apache.druid.segment.realtime.plumber.SegmentHandoffNotifierFactory; import org.apache.druid.server.DruidNode; import org.apache.druid.server.coordination.DataSegmentAnnouncer; import org.apache.druid.server.coordination.DataSegmentServerAnnouncer; @@ -764,6 +764,7 @@ public void testIndexTask() throws Exception null, null, null, + null, null ) ), @@ -845,6 +846,7 @@ public void testIndexTaskFailure() throws Exception null, null, null, + null, null ) ), @@ -1271,6 +1273,7 @@ public void testResumeTasks() throws Exception null, null, null, + null, null ) ), @@ -1379,6 +1382,7 @@ public void testUnifiedAppenderatorsManagerCleanup() throws Exception null, null, null, + null, null ) ), diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/worker/WorkerTaskManagerTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/worker/WorkerTaskManagerTest.java index 8e5984c37183..8362fc99e779 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/worker/WorkerTaskManagerTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/worker/WorkerTaskManagerTest.java @@ -42,9 +42,9 @@ import org.apache.druid.java.util.common.FileUtils; import org.apache.druid.segment.IndexIO; import org.apache.druid.segment.IndexMergerV9; +import org.apache.druid.segment.handoff.SegmentHandoffNotifierFactory; import org.apache.druid.segment.join.NoopJoinableFactory; import org.apache.druid.segment.realtime.firehose.NoopChatHandlerProvider; -import org.apache.druid.segment.realtime.plumber.SegmentHandoffNotifierFactory; import org.apache.druid.server.coordination.ChangeRequestHistory; import org.apache.druid.server.coordination.ChangeRequestsSnapshot; import org.apache.druid.server.security.AuthTestUtils; diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/worker/WorkerTaskMonitorTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/worker/WorkerTaskMonitorTest.java index b0bdbc89cc07..d46ff99ac7cf 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/worker/WorkerTaskMonitorTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/worker/WorkerTaskMonitorTest.java @@ -49,9 +49,9 @@ import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.segment.IndexIO; import org.apache.druid.segment.IndexMergerV9; +import org.apache.druid.segment.handoff.SegmentHandoffNotifierFactory; import org.apache.druid.segment.join.NoopJoinableFactory; import org.apache.druid.segment.realtime.firehose.NoopChatHandlerProvider; -import org.apache.druid.segment.realtime.plumber.SegmentHandoffNotifierFactory; import org.apache.druid.server.DruidNode; import org.apache.druid.server.initialization.IndexerZkConfig; import org.apache.druid.server.initialization.ServerConfig; diff --git a/integration-tests/src/main/java/org/apache/druid/testing/clients/OverlordResourceTestClient.java b/integration-tests/src/main/java/org/apache/druid/testing/clients/OverlordResourceTestClient.java index 2be6eb3c9156..d30726c695a4 100644 --- a/integration-tests/src/main/java/org/apache/druid/testing/clients/OverlordResourceTestClient.java +++ b/integration-tests/src/main/java/org/apache/druid/testing/clients/OverlordResourceTestClient.java @@ -28,6 +28,7 @@ import org.apache.druid.indexer.TaskStatusPlus; import org.apache.druid.indexing.common.IngestionStatsAndErrorsTaskReport; import org.apache.druid.indexing.common.IngestionStatsAndErrorsTaskReportData; +import org.apache.druid.indexing.common.TaskReport; import org.apache.druid.indexing.overlord.supervisor.SupervisorStateManager; import org.apache.druid.java.util.common.ISE; import org.apache.druid.java.util.common.RetryUtils; @@ -428,6 +429,30 @@ public void resumeSupervisor(String id) } } + public TaskReport getTaskReport(String taskID) + { + try { + StatusResponseHolder response = makeRequest( + HttpMethod.GET, + StringUtils.format( + "%stask/%s/reports", + getIndexerURL(), + StringUtils.urlEncode(taskID) + ) + ); + Map taskReportMap = jsonMapper.readValue( + response.getContent(), + new TypeReference>() + { + } + ); + return taskReportMap.get("ingestionStatsAndErrors"); + } + catch (Exception e) { + throw new RuntimeException(e); + } + } + private StatusResponseHolder makeRequest(HttpMethod method, String url) { try { diff --git a/integration-tests/src/test/java/org/apache/druid/tests/hadoop/AbstractAzureInputHadoopIndexTest.java b/integration-tests/src/test/java/org/apache/druid/tests/hadoop/AbstractAzureInputHadoopIndexTest.java index 2a6894ef91d0..a9c1cc1286ce 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/hadoop/AbstractAzureInputHadoopIndexTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/hadoop/AbstractAzureInputHadoopIndexTest.java @@ -19,6 +19,7 @@ package org.apache.druid.tests.hadoop; +import org.apache.druid.java.util.common.Pair; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.tests.indexer.AbstractITBatchIndexTest; @@ -31,7 +32,7 @@ public abstract class AbstractAzureInputHadoopIndexTest extends AbstractITBatchI private static final String INDEX_TASK = "/hadoop/wikipedia_hadoop_azure_input_index_task.json"; private static final String INDEX_QUERIES_RESOURCE = "/indexer/wikipedia_index_queries.json"; - void doTest() throws Exception + void doTest(Pair segmentAvailabilityConfirmationPair) throws Exception { final String indexDatasource = "wikipedia_index_test_" + UUID.randomUUID(); try ( @@ -76,7 +77,8 @@ void doTest() throws Exception INDEX_QUERIES_RESOURCE, false, true, - true + true, + segmentAvailabilityConfirmationPair ); } } diff --git a/integration-tests/src/test/java/org/apache/druid/tests/hadoop/AbstractGcsInputHadoopIndexTest.java b/integration-tests/src/test/java/org/apache/druid/tests/hadoop/AbstractGcsInputHadoopIndexTest.java index 244a2f5438d0..0f7f1d16ff3d 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/hadoop/AbstractGcsInputHadoopIndexTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/hadoop/AbstractGcsInputHadoopIndexTest.java @@ -19,6 +19,7 @@ package org.apache.druid.tests.hadoop; +import org.apache.druid.java.util.common.Pair; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.tests.indexer.AbstractITBatchIndexTest; @@ -31,7 +32,7 @@ public abstract class AbstractGcsInputHadoopIndexTest extends AbstractITBatchInd private static final String INDEX_TASK = "/hadoop/wikipedia_hadoop_gcs_input_index_task.json"; private static final String INDEX_QUERIES_RESOURCE = "/indexer/wikipedia_index_queries.json"; - void doTest() throws Exception + void doTest(Pair segmentAvailabilityConfirmationPair) throws Exception { final String indexDatasource = "wikipedia_hadoop_index_test_" + UUID.randomUUID(); try ( @@ -72,7 +73,8 @@ void doTest() throws Exception INDEX_QUERIES_RESOURCE, false, true, - true + true, + segmentAvailabilityConfirmationPair ); } } diff --git a/integration-tests/src/test/java/org/apache/druid/tests/hadoop/AbstractS3InputHadoopIndexTest.java b/integration-tests/src/test/java/org/apache/druid/tests/hadoop/AbstractS3InputHadoopIndexTest.java index 86dcdfaacff7..13f0dd9e869d 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/hadoop/AbstractS3InputHadoopIndexTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/hadoop/AbstractS3InputHadoopIndexTest.java @@ -21,6 +21,7 @@ import com.google.inject.Inject; import org.apache.druid.common.aws.AWSCredentialsConfig; +import org.apache.druid.java.util.common.Pair; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.tests.indexer.AbstractITBatchIndexTest; @@ -36,7 +37,7 @@ public abstract class AbstractS3InputHadoopIndexTest extends AbstractITBatchInde private static final String INDEX_TASK = "/hadoop/wikipedia_hadoop_s3_input_index_task.json"; private static final String INDEX_QUERIES_RESOURCE = "/indexer/wikipedia_index_queries.json"; - void doTest() throws Exception + void doTest(Pair segmentAvailabilityConfirmationPair) throws Exception { final String indexDatasource = "wikipedia_hadoop_index_test_" + UUID.randomUUID(); try ( @@ -89,7 +90,8 @@ void doTest() throws Exception INDEX_QUERIES_RESOURCE, false, true, - true + true, + segmentAvailabilityConfirmationPair ); } } diff --git a/integration-tests/src/test/java/org/apache/druid/tests/hadoop/ITAzureInputToAzureHadoopIndexTest.java b/integration-tests/src/test/java/org/apache/druid/tests/hadoop/ITAzureInputToAzureHadoopIndexTest.java index 47a5d9f8a858..16321337890d 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/hadoop/ITAzureInputToAzureHadoopIndexTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/hadoop/ITAzureInputToAzureHadoopIndexTest.java @@ -19,6 +19,7 @@ package org.apache.druid.tests.hadoop; +import org.apache.druid.java.util.common.Pair; import org.apache.druid.testing.guice.DruidTestModuleFactory; import org.apache.druid.tests.TestNGGroup; import org.testng.annotations.Guice; @@ -45,6 +46,6 @@ public class ITAzureInputToAzureHadoopIndexTest extends AbstractAzureInputHadoop { public void testGcsIndexData() throws Exception { - doTest(); + doTest(new Pair<>(false, false)); } } diff --git a/integration-tests/src/test/java/org/apache/druid/tests/hadoop/ITAzureInputToHdfsHadoopIndexTest.java b/integration-tests/src/test/java/org/apache/druid/tests/hadoop/ITAzureInputToHdfsHadoopIndexTest.java index 915c14c408f1..45cbf06c46dc 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/hadoop/ITAzureInputToHdfsHadoopIndexTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/hadoop/ITAzureInputToHdfsHadoopIndexTest.java @@ -19,6 +19,7 @@ package org.apache.druid.tests.hadoop; +import org.apache.druid.java.util.common.Pair; import org.apache.druid.testing.guice.DruidTestModuleFactory; import org.apache.druid.tests.TestNGGroup; import org.testng.annotations.Guice; @@ -45,6 +46,6 @@ public class ITAzureInputToHdfsHadoopIndexTest extends AbstractAzureInputHadoopI { public void testGcsIndexData() throws Exception { - doTest(); + doTest(new Pair<>(false, false)); } } diff --git a/integration-tests/src/test/java/org/apache/druid/tests/hadoop/ITGcsInputToGcsHadoopIndexTest.java b/integration-tests/src/test/java/org/apache/druid/tests/hadoop/ITGcsInputToGcsHadoopIndexTest.java index 5b0e4ec95579..ca986b1dd089 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/hadoop/ITGcsInputToGcsHadoopIndexTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/hadoop/ITGcsInputToGcsHadoopIndexTest.java @@ -19,6 +19,7 @@ package org.apache.druid.tests.hadoop; +import org.apache.druid.java.util.common.Pair; import org.apache.druid.testing.guice.DruidTestModuleFactory; import org.apache.druid.tests.TestNGGroup; import org.testng.annotations.Guice; @@ -46,6 +47,6 @@ public class ITGcsInputToGcsHadoopIndexTest extends AbstractGcsInputHadoopIndexT { public void testGcsIndexData() throws Exception { - doTest(); + doTest(new Pair<>(false, false)); } } diff --git a/integration-tests/src/test/java/org/apache/druid/tests/hadoop/ITGcsInputToHdfsHadoopIndexTest.java b/integration-tests/src/test/java/org/apache/druid/tests/hadoop/ITGcsInputToHdfsHadoopIndexTest.java index 314d5d7164d6..c39792b5bf8c 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/hadoop/ITGcsInputToHdfsHadoopIndexTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/hadoop/ITGcsInputToHdfsHadoopIndexTest.java @@ -19,6 +19,7 @@ package org.apache.druid.tests.hadoop; +import org.apache.druid.java.util.common.Pair; import org.apache.druid.testing.guice.DruidTestModuleFactory; import org.apache.druid.tests.TestNGGroup; import org.testng.annotations.Guice; @@ -46,6 +47,6 @@ public class ITGcsInputToHdfsHadoopIndexTest extends AbstractGcsInputHadoopIndex { public void testGcsIndexData() throws Exception { - doTest(); + doTest(new Pair<>(false, false)); } } diff --git a/integration-tests/src/test/java/org/apache/druid/tests/hadoop/ITHadoopIndexTest.java b/integration-tests/src/test/java/org/apache/druid/tests/hadoop/ITHadoopIndexTest.java index 898c7d58845d..f3b6d0d69e2e 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/hadoop/ITHadoopIndexTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/hadoop/ITHadoopIndexTest.java @@ -20,12 +20,17 @@ package org.apache.druid.tests.hadoop; import com.google.common.collect.ImmutableList; +import com.google.inject.Inject; import org.apache.druid.indexer.partitions.DimensionBasedPartitionsSpec; import org.apache.druid.indexer.partitions.HashedPartitionsSpec; import org.apache.druid.indexer.partitions.SingleDimensionPartitionsSpec; +import org.apache.druid.java.util.common.Pair; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.java.util.common.logger.Logger; +import org.apache.druid.server.coordinator.CoordinatorDynamicConfig; +import org.apache.druid.testing.clients.CoordinatorResourceTestClient; import org.apache.druid.testing.guice.DruidTestModuleFactory; +import org.apache.druid.testing.utils.ITRetryUtil; import org.apache.druid.tests.TestNGGroup; import org.apache.druid.tests.indexer.AbstractITBatchIndexTest; import org.apache.druid.timeline.partition.HashPartitionFunction; @@ -68,6 +73,14 @@ public class ITHadoopIndexTest extends AbstractITBatchIndexTest private static final String REINDEX_QUERIES_RESOURCE = "/indexer/wikipedia_reindex_queries.json"; private static final String REINDEX_DATASOURCE = "wikipedia_hadoop_reindex_test"; + private static final CoordinatorDynamicConfig DYNAMIC_CONFIG_PAUSED = + CoordinatorDynamicConfig.builder().withPauseCoordination(true).build(); + private static final CoordinatorDynamicConfig DYNAMIC_CONFIG_DEFAULT = + CoordinatorDynamicConfig.builder().build(); + + @Inject + CoordinatorResourceTestClient coordinatorClient; + @DataProvider public static Object[][] resources() { @@ -114,7 +127,8 @@ public void testLegacyITHadoopIndexTest() throws Exception BATCH_QUERIES_RESOURCE, false, true, - true + true, + new Pair<>(false, false) ); } } @@ -142,6 +156,75 @@ public void testIndexData(DimensionBasedPartitionsSpec partitionsSpec) throws Ex "%%PARTITIONS_SPEC%%", jsonMapper.writeValueAsString(partitionsSpec) ); + spec = StringUtils.replace( + spec, + "%%SEGMENT_AVAIL_TIMEOUT_MILLIS%%", + jsonMapper.writeValueAsString(0) + ); + + return spec; + } + catch (Exception e) { + throw new RuntimeException(e); + } + }; + + doIndexTest( + indexDatasource, + INDEX_TASK, + specPathsTransform, + INDEX_QUERIES_RESOURCE, + false, + true, + true, + new Pair<>(false, false) + ); + + doReindexTest( + indexDatasource, + reindexDatasource, + REINDEX_TASK, + REINDEX_QUERIES_RESOURCE, + new Pair<>(false, false) + ); + } + } + + /** + * Test Hadoop Batch Ingestion with a non-zero value for awaitSegmentAvailabilityTimeoutMillis. This will confirm that + * the report for the task indicates segments were confirmed to be available on the cluster before finishing the job. + * + * @throws Exception + */ + public void testIndexDataAwaitSegmentAvailability() throws Exception + { + String indexDatasource = INDEX_DATASOURCE + "_" + UUID.randomUUID(); + String reindexDatasource = REINDEX_DATASOURCE + "_" + UUID.randomUUID(); + + try ( + final Closeable ignored1 = unloader(indexDatasource + config.getExtraDatasourceNameSuffix()); + final Closeable ignored2 = unloader(reindexDatasource + config.getExtraDatasourceNameSuffix()); + ) { + final Function specPathsTransform = spec -> { + try { + String path = "/batch_index/json"; + spec = StringUtils.replace( + spec, + "%%INPUT_PATHS%%", + path + ); + spec = StringUtils.replace( + spec, + "%%PARTITIONS_SPEC%%", + jsonMapper.writeValueAsString( + new HashedPartitionsSpec(3, null, null) + ) + ); + spec = StringUtils.replace( + spec, + "%%SEGMENT_AVAIL_TIMEOUT_MILLIS%%", + jsonMapper.writeValueAsString(600000) + ); return spec; } @@ -157,14 +240,76 @@ public void testIndexData(DimensionBasedPartitionsSpec partitionsSpec) throws Ex INDEX_QUERIES_RESOURCE, false, true, - true + true, + new Pair<>(true, true) ); doReindexTest( indexDatasource, reindexDatasource, REINDEX_TASK, - REINDEX_QUERIES_RESOURCE + REINDEX_QUERIES_RESOURCE, + new Pair<>(true, true) + ); + } + } + + /** + * Test Hadoop Batch Indexing with non-zero value for awaitSegmentAvailabilityTimeoutMillis. The coordinator + * is paused when the task runs. This should result in a successful task with a flag in the task report indicating + * that we did not confirm segment availability. + * + * @throws Exception + */ + public void testIndexDataAwaitSegmentAvailabilityFailsButTaskSucceeds() throws Exception + { + String indexDatasource = INDEX_DATASOURCE + "_" + UUID.randomUUID(); + + try ( + final Closeable ignored1 = unloader(indexDatasource + config.getExtraDatasourceNameSuffix()); + ) { + coordinatorClient.postDynamicConfig(DYNAMIC_CONFIG_PAUSED); + final Function specPathsTransform = spec -> { + try { + String path = "/batch_index/json"; + spec = StringUtils.replace( + spec, + "%%INPUT_PATHS%%", + path + ); + spec = StringUtils.replace( + spec, + "%%PARTITIONS_SPEC%%", + jsonMapper.writeValueAsString( + new HashedPartitionsSpec(3, null, null) + ) + ); + spec = StringUtils.replace( + spec, + "%%SEGMENT_AVAIL_TIMEOUT_MILLIS%%", + jsonMapper.writeValueAsString(1) + ); + + return spec; + } + catch (Exception e) { + throw new RuntimeException(e); + } + }; + + doIndexTest( + indexDatasource, + INDEX_TASK, + specPathsTransform, + INDEX_QUERIES_RESOURCE, + false, + false, + false, + new Pair<>(true, false) + ); + coordinatorClient.postDynamicConfig(DYNAMIC_CONFIG_DEFAULT); + ITRetryUtil.retryUntilTrue( + () -> coordinator.areSegmentsLoaded(INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix()), "Segment Load" ); } } diff --git a/integration-tests/src/test/java/org/apache/druid/tests/hadoop/ITS3InputToHdfsHadoopIndexTest.java b/integration-tests/src/test/java/org/apache/druid/tests/hadoop/ITS3InputToHdfsHadoopIndexTest.java index 01aa8e006d1b..d59d3d869187 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/hadoop/ITS3InputToHdfsHadoopIndexTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/hadoop/ITS3InputToHdfsHadoopIndexTest.java @@ -19,6 +19,7 @@ package org.apache.druid.tests.hadoop; +import org.apache.druid.java.util.common.Pair; import org.apache.druid.testing.guice.DruidTestModuleFactory; import org.apache.druid.tests.TestNGGroup; import org.testng.annotations.Guice; @@ -44,6 +45,6 @@ public class ITS3InputToHdfsHadoopIndexTest extends AbstractS3InputHadoopIndexTe @Test() public void testS3IndexData() throws Exception { - doTest(); + doTest(new Pair<>(false, false)); } } diff --git a/integration-tests/src/test/java/org/apache/druid/tests/hadoop/ITS3InputToS3HadoopIndexTest.java b/integration-tests/src/test/java/org/apache/druid/tests/hadoop/ITS3InputToS3HadoopIndexTest.java index 27cedcdc75dd..2e1667924f4e 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/hadoop/ITS3InputToS3HadoopIndexTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/hadoop/ITS3InputToS3HadoopIndexTest.java @@ -19,6 +19,7 @@ package org.apache.druid.tests.hadoop; +import org.apache.druid.java.util.common.Pair; import org.apache.druid.testing.guice.DruidTestModuleFactory; import org.apache.druid.tests.TestNGGroup; import org.testng.annotations.Guice; @@ -44,6 +45,6 @@ public class ITS3InputToS3HadoopIndexTest extends AbstractS3InputHadoopIndexTest @Test() public void testS3IndexData() throws Exception { - doTest(); + doTest(new Pair<>(false, false)); } } diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractAzureInputSourceParallelIndexTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractAzureInputSourceParallelIndexTest.java index 8ca08a0ef1e5..9ce161f6ccd9 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractAzureInputSourceParallelIndexTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractAzureInputSourceParallelIndexTest.java @@ -68,7 +68,10 @@ public static Object[][] resources() }; } - void doTest(Pair azureInputSource) throws Exception + void doTest( + Pair azureInputSource, + Pair segmentAvailabilityConfirmationPair + ) throws Exception { final String indexDatasource = "wikipedia_index_test_" + UUID.randomUUID(); try ( @@ -125,7 +128,8 @@ void doTest(Pair azureInputSource) throws Exception INDEX_QUERIES_RESOURCE, false, true, - true + true, + segmentAvailabilityConfirmationPair ); } } diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractGcsInputSourceParallelIndexTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractGcsInputSourceParallelIndexTest.java index 805619a2d182..f720a5d5943e 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractGcsInputSourceParallelIndexTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractGcsInputSourceParallelIndexTest.java @@ -68,7 +68,10 @@ public static Object[][] resources() }; } - void doTest(Pair gcsInputSource) throws Exception + void doTest( + Pair gcsInputSource, + Pair segmentAvailabilityConfirmationPair + ) throws Exception { final String indexDatasource = "wikipedia_index_test_" + UUID.randomUUID(); try ( @@ -125,7 +128,8 @@ void doTest(Pair gcsInputSource) throws Exception INDEX_QUERIES_RESOURCE, false, true, - true + true, + segmentAvailabilityConfirmationPair ); } } diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractHdfsInputSourceParallelIndexTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractHdfsInputSourceParallelIndexTest.java index 742238accb05..eaedde10bbfe 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractHdfsInputSourceParallelIndexTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractHdfsInputSourceParallelIndexTest.java @@ -58,7 +58,11 @@ public static Object[][] resources() }; } - void doTest(Pair hdfsInputSource, InputFormatDetails inputFormatDetails) throws Exception + void doTest( + Pair hdfsInputSource, + InputFormatDetails inputFormatDetails, + Pair segmentAvailabilityConfirmationPair + ) throws Exception { final String indexDatasource = "wikipedia_index_test_" + UUID.randomUUID(); try ( @@ -115,7 +119,8 @@ void doTest(Pair hdfsInputSource, InputFormatDetails inputFormatDe INDEX_QUERIES_RESOURCE, false, true, - true + true, + segmentAvailabilityConfirmationPair ); } } diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractITBatchIndexTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractITBatchIndexTest.java index 0873417ad498..76d654df9796 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractITBatchIndexTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractITBatchIndexTest.java @@ -23,6 +23,9 @@ import com.google.inject.Inject; import org.apache.commons.io.IOUtils; import org.apache.druid.indexer.partitions.SecondaryPartitionType; +import org.apache.druid.indexing.common.IngestionStatsAndErrorsTaskReport; +import org.apache.druid.indexing.common.IngestionStatsAndErrorsTaskReportData; +import org.apache.druid.indexing.common.TaskReport; import org.apache.druid.indexing.common.task.batch.parallel.PartialDimensionCardinalityTask; import org.apache.druid.indexing.common.task.batch.parallel.PartialDimensionDistributionTask; import org.apache.druid.indexing.common.task.batch.parallel.PartialGenericSegmentMergeTask; @@ -31,6 +34,7 @@ import org.apache.druid.indexing.common.task.batch.parallel.SinglePhaseSubTask; import org.apache.druid.java.util.common.ISE; import org.apache.druid.java.util.common.Intervals; +import org.apache.druid.java.util.common.Pair; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.java.util.common.logger.Logger; import org.apache.druid.testing.IntegrationTestingConfig; @@ -103,10 +107,20 @@ protected void doIndexTest( String queryFilePath, boolean waitForNewVersion, boolean runTestQueries, - boolean waitForSegmentsToLoad + boolean waitForSegmentsToLoad, + Pair segmentAvailabilityConfirmationPair ) throws IOException { - doIndexTest(dataSource, indexTaskFilePath, Function.identity(), queryFilePath, waitForNewVersion, runTestQueries, waitForSegmentsToLoad); + doIndexTest( + dataSource, + indexTaskFilePath, + Function.identity(), + queryFilePath, + waitForNewVersion, + runTestQueries, + waitForSegmentsToLoad, + segmentAvailabilityConfirmationPair + ); } protected void doIndexTest( @@ -116,7 +130,8 @@ protected void doIndexTest( String queryFilePath, boolean waitForNewVersion, boolean runTestQueries, - boolean waitForSegmentsToLoad + boolean waitForSegmentsToLoad, + Pair segmentAvailabilityConfirmationPair ) throws IOException { final String fullDatasourceName = dataSource + config.getExtraDatasourceNameSuffix(); @@ -128,7 +143,13 @@ protected void doIndexTest( ) ); - submitTaskAndWait(taskSpec, fullDatasourceName, waitForNewVersion, waitForSegmentsToLoad); + submitTaskAndWait( + taskSpec, + fullDatasourceName, + waitForNewVersion, + waitForSegmentsToLoad, + segmentAvailabilityConfirmationPair + ); if (runTestQueries) { doTestQuery(dataSource, queryFilePath); } @@ -164,10 +185,18 @@ protected void doReindexTest( String baseDataSource, String reindexDataSource, String reindexTaskFilePath, - String queryFilePath + String queryFilePath, + Pair segmentAvailabilityConfirmationPair ) throws IOException { - doReindexTest(baseDataSource, reindexDataSource, Function.identity(), reindexTaskFilePath, queryFilePath); + doReindexTest( + baseDataSource, + reindexDataSource, + Function.identity(), + reindexTaskFilePath, + queryFilePath, + segmentAvailabilityConfirmationPair + ); } void doReindexTest( @@ -175,7 +204,8 @@ void doReindexTest( String reindexDataSource, Function taskSpecTransform, String reindexTaskFilePath, - String queryFilePath + String queryFilePath, + Pair segmentAvailabilityConfirmationPair ) throws IOException { final String fullBaseDatasourceName = baseDataSource + config.getExtraDatasourceNameSuffix(); @@ -195,7 +225,13 @@ void doReindexTest( taskSpec = taskSpecTransform.apply(taskSpec); - submitTaskAndWait(taskSpec, fullReindexDatasourceName, false, true); + submitTaskAndWait( + taskSpec, + fullReindexDatasourceName, + false, + true, + segmentAvailabilityConfirmationPair + ); try { String queryResponseTemplate; try { @@ -239,7 +275,8 @@ void doIndexTestSqlTest( fullDatasourceName ); - submitTaskAndWait(taskSpec, fullDatasourceName, false, true); + Pair dummyPair = new Pair<>(false, false); + submitTaskAndWait(taskSpec, fullDatasourceName, false, true, dummyPair); try { sqlQueryHelper.testQueriesFromFile(queryFilePath); } @@ -253,7 +290,8 @@ private void submitTaskAndWait( String taskSpec, String dataSourceName, boolean waitForNewVersion, - boolean waitForSegmentsToLoad + boolean waitForSegmentsToLoad, + Pair segmentAvailabilityConfirmationPair ) { final List oldVersions = waitForNewVersion ? coordinator.getAvailableSegments(dataSourceName) : null; @@ -280,6 +318,18 @@ private void submitTaskAndWait( ); } + if (segmentAvailabilityConfirmationPair.lhs != null && segmentAvailabilityConfirmationPair.lhs) { + TaskReport reportRaw = indexer.getTaskReport(taskID); + IngestionStatsAndErrorsTaskReport report = (IngestionStatsAndErrorsTaskReport) reportRaw; + IngestionStatsAndErrorsTaskReportData reportData = (IngestionStatsAndErrorsTaskReportData) report.getPayload(); + if (segmentAvailabilityConfirmationPair.rhs != null) { + Assert.assertEquals( + Boolean.valueOf(reportData.isSegmentAvailabilityConfirmed()), + segmentAvailabilityConfirmationPair.rhs + ); + } + } + // IT*ParallelIndexTest do a second round of ingestion to replace segements in an existing // data source. For that second round we need to make sure the coordinator actually learned // about the new segments befor waiting for it to report that all segments are loaded; otherwise diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractLocalInputSourceParallelIndexTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractLocalInputSourceParallelIndexTest.java index 82578d1e3625..52c19dd9ba92 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractLocalInputSourceParallelIndexTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractLocalInputSourceParallelIndexTest.java @@ -21,6 +21,7 @@ import com.google.common.collect.ImmutableMap; import org.apache.druid.indexer.partitions.DynamicPartitionsSpec; +import org.apache.druid.java.util.common.Pair; import org.apache.druid.java.util.common.StringUtils; import javax.annotation.Nonnull; @@ -34,12 +35,19 @@ public abstract class AbstractLocalInputSourceParallelIndexTest extends Abstract private static final String INDEX_TASK = "/indexer/wikipedia_local_input_source_index_task.json"; private static final String INDEX_QUERIES_RESOURCE = "/indexer/wikipedia_index_queries.json"; - public void doIndexTest(InputFormatDetails inputFormatDetails) throws Exception + public void doIndexTest( + InputFormatDetails inputFormatDetails, + Pair segmentAvailabilityConfirmationPair + ) throws Exception { - doIndexTest(inputFormatDetails, ImmutableMap.of()); + doIndexTest(inputFormatDetails, ImmutableMap.of(), segmentAvailabilityConfirmationPair); } - public void doIndexTest(InputFormatDetails inputFormatDetails, @Nonnull Map extraInputFormatMap) throws Exception + public void doIndexTest( + InputFormatDetails inputFormatDetails, + @Nonnull Map extraInputFormatMap, + Pair segmentAvailabilityConfirmationPair + ) throws Exception { final String indexDatasource = "wikipedia_index_test_" + UUID.randomUUID(); Map inputFormatMap = new ImmutableMap.Builder().putAll(extraInputFormatMap) @@ -94,7 +102,8 @@ public void doIndexTest(InputFormatDetails inputFormatDetails, @Nonnull Map inputSource) throws Exception + void doTest( + Pair inputSource, + Pair segmentAvailabilityConfirmationPair + ) throws Exception { final String indexDatasource = "wikipedia_index_test_" + UUID.randomUUID(); try ( @@ -125,7 +128,8 @@ void doTest(Pair inputSource) throws Exception INDEX_QUERIES_RESOURCE, false, true, - true + true, + segmentAvailabilityConfirmationPair ); } } diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractS3InputSourceParallelIndexTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractS3InputSourceParallelIndexTest.java index 3962c8d82b99..1d378d5dee67 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractS3InputSourceParallelIndexTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractS3InputSourceParallelIndexTest.java @@ -68,7 +68,10 @@ public static Object[][] resources() }; } - void doTest(Pair s3InputSource) throws Exception + void doTest( + Pair s3InputSource, + Pair segmentAvailabilityConfirmationPair + ) throws Exception { final String indexDatasource = "wikipedia_index_test_" + UUID.randomUUID(); try ( @@ -125,7 +128,8 @@ void doTest(Pair s3InputSource) throws Exception INDEX_QUERIES_RESOURCE, false, true, - true + true, + segmentAvailabilityConfirmationPair ); } } diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITAppendBatchIndexTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITAppendBatchIndexTest.java index 8ba848ef8dc7..7f5e01b553ec 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITAppendBatchIndexTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITAppendBatchIndexTest.java @@ -25,6 +25,7 @@ import org.apache.druid.indexer.partitions.HashedPartitionsSpec; import org.apache.druid.indexer.partitions.PartitionsSpec; import org.apache.druid.indexer.partitions.SingleDimensionPartitionsSpec; +import org.apache.druid.java.util.common.Pair; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.java.util.common.logger.Logger; import org.apache.druid.testing.guice.DruidTestModuleFactory; @@ -96,11 +97,11 @@ public void doIndexTest(List partitionsSpecList, List e final Closeable ignored1 = unloader(indexDatasource + config.getExtraDatasourceNameSuffix()); ) { // Submit initial ingestion task - submitIngestionTaskAndVerify(indexDatasource, partitionsSpecList.get(0), false); + submitIngestionTaskAndVerify(indexDatasource, partitionsSpecList.get(0), false, new Pair<>(false, false)); verifySegmentsCountAndLoaded(indexDatasource, expectedSegmentCountList.get(0)); doTestQuery(indexDatasource, INDEX_QUERIES_INITIAL_INGESTION_RESOURCE); // Submit append ingestion task - submitIngestionTaskAndVerify(indexDatasource, partitionsSpecList.get(1), true); + submitIngestionTaskAndVerify(indexDatasource, partitionsSpecList.get(1), true, new Pair<>(false, false)); verifySegmentsCountAndLoaded(indexDatasource, expectedSegmentCountList.get(1)); doTestQuery(indexDatasource, INDEX_QUERIES_POST_APPEND_PRE_COMPACT_RESOURCE); // Submit compaction task @@ -115,7 +116,8 @@ public void doIndexTest(List partitionsSpecList, List e private void submitIngestionTaskAndVerify( String indexDatasource, PartitionsSpec partitionsSpec, - boolean appendToExisting + boolean appendToExisting, + Pair segmentAvailabilityConfirmationPair ) throws Exception { InputFormatDetails inputFormatDetails = InputFormatDetails.JSON; @@ -175,7 +177,8 @@ private void submitIngestionTaskAndVerify( null, false, false, - true + true, + segmentAvailabilityConfirmationPair ); } } diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITAzureToAzureParallelIndexTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITAzureToAzureParallelIndexTest.java index e8594ccf515e..cbb8bba877ae 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITAzureToAzureParallelIndexTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITAzureToAzureParallelIndexTest.java @@ -44,6 +44,6 @@ public class ITAzureToAzureParallelIndexTest extends AbstractAzureInputSourcePar @Test(dataProvider = "resources") public void testAzureIndexData(Pair azureInputSource) throws Exception { - doTest(azureInputSource); + doTest(azureInputSource, new Pair<>(false, false)); } } diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITAzureToHdfsParallelIndexTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITAzureToHdfsParallelIndexTest.java index cd1d76a0e32f..ca7ee72e02f6 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITAzureToHdfsParallelIndexTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITAzureToHdfsParallelIndexTest.java @@ -46,6 +46,6 @@ public class ITAzureToHdfsParallelIndexTest extends AbstractAzureInputSourcePara @Test(dataProvider = "resources") public void testAzureIndexData(Pair azureInputSource) throws Exception { - doTest(azureInputSource); + doTest(azureInputSource, new Pair<>(false, false)); } } diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITBestEffortRollupParallelIndexTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITBestEffortRollupParallelIndexTest.java index 373bef35f5f4..a463e389d79e 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITBestEffortRollupParallelIndexTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITBestEffortRollupParallelIndexTest.java @@ -20,10 +20,15 @@ package org.apache.druid.tests.indexer; import com.fasterxml.jackson.core.JsonProcessingException; +import com.google.inject.Inject; import org.apache.druid.indexer.partitions.DynamicPartitionsSpec; import org.apache.druid.indexer.partitions.PartitionsSpec; +import org.apache.druid.java.util.common.Pair; import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.server.coordinator.CoordinatorDynamicConfig; +import org.apache.druid.testing.clients.CoordinatorResourceTestClient; import org.apache.druid.testing.guice.DruidTestModuleFactory; +import org.apache.druid.testing.utils.ITRetryUtil; import org.apache.druid.tests.TestNGGroup; import org.testng.Assert; import org.testng.annotations.DataProvider; @@ -49,6 +54,14 @@ public class ITBestEffortRollupParallelIndexTest extends AbstractITBatchIndexTes private static final String INDEX_DRUID_INPUT_SOURCE_DATASOURCE = "wikipedia_parallel_druid_input_source_index_test"; private static final String INDEX_DRUID_INPUT_SOURCE_TASK = "/indexer/wikipedia_parallel_druid_input_source_index_task.json"; + private static final CoordinatorDynamicConfig DYNAMIC_CONFIG_PAUSED = + CoordinatorDynamicConfig.builder().withPauseCoordination(true).build(); + private static final CoordinatorDynamicConfig DYNAMIC_CONFIG_DEFAULT = + CoordinatorDynamicConfig.builder().build(); + + @Inject + CoordinatorResourceTestClient coordinatorClient; + @DataProvider public static Object[][] resources() { @@ -75,6 +88,11 @@ public void testIndexData(PartitionsSpec partitionsSpec) throws Exception "%%FORCE_GUARANTEED_ROLLUP%%", Boolean.toString(false) ); + spec = StringUtils.replace( + spec, + "%%SEGMENT_AVAIL_TIMEOUT_MILLIS%%", + jsonMapper.writeValueAsString("0") + ); return StringUtils.replace( spec, "%%PARTITIONS_SPEC%%", @@ -93,7 +111,8 @@ public void testIndexData(PartitionsSpec partitionsSpec) throws Exception INDEX_QUERIES_RESOURCE, false, true, - true + true, + new Pair<>(false, false) ); // Index again, this time only choosing the second data file, and without explicit intervals chosen. @@ -105,7 +124,8 @@ public void testIndexData(PartitionsSpec partitionsSpec) throws Exception REINDEX_QUERIES_RESOURCE, true, true, - true + true, + new Pair<>(false, false) ); doReindexTest( @@ -113,7 +133,8 @@ public void testIndexData(PartitionsSpec partitionsSpec) throws Exception INDEX_INGEST_SEGMENT_DATASOURCE, rollupTransform, INDEX_INGEST_SEGMENT_TASK, - REINDEX_QUERIES_RESOURCE + REINDEX_QUERIES_RESOURCE, + new Pair<>(false, false) ); // with DruidInputSource instead of IngestSegmentFirehose @@ -122,7 +143,118 @@ public void testIndexData(PartitionsSpec partitionsSpec) throws Exception INDEX_DRUID_INPUT_SOURCE_DATASOURCE, rollupTransform, INDEX_DRUID_INPUT_SOURCE_TASK, - REINDEX_QUERIES_RESOURCE + REINDEX_QUERIES_RESOURCE, + new Pair<>(false, false) + ); + } + } + + /** + * Test a non zero value for awaitSegmentAvailabilityTimeoutMillis. This will confirm that the report for the task + * indicates segments were confirmed to be available on the cluster before finishing the ingestion job. + * + * @param partitionsSpec + * @throws Exception + */ + @Test(dataProvider = "resources") + public void testIndexDataVerifySegmentAvailability(PartitionsSpec partitionsSpec) throws Exception + { + try ( + final Closeable ignored1 = unloader(INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix()); + ) { + boolean forceGuaranteedRollup = partitionsSpec.isForceGuaranteedRollupCompatible(); + Assert.assertFalse(forceGuaranteedRollup, "parititionSpec does not support best-effort rollup"); + + final Function rollupTransform = spec -> { + try { + spec = StringUtils.replace( + spec, + "%%FORCE_GUARANTEED_ROLLUP%%", + Boolean.toString(false) + ); + spec = StringUtils.replace( + spec, + "%%SEGMENT_AVAIL_TIMEOUT_MILLIS%%", + jsonMapper.writeValueAsString("600000") + ); + return StringUtils.replace( + spec, + "%%PARTITIONS_SPEC%%", + jsonMapper.writeValueAsString(partitionsSpec) + ); + } + catch (JsonProcessingException e) { + throw new RuntimeException(e); + } + }; + + doIndexTest( + INDEX_DATASOURCE, + INDEX_TASK, + rollupTransform, + INDEX_QUERIES_RESOURCE, + false, + true, + true, + new Pair<>(true, true) + ); + } + } + + /** + * Test a non zero value for awaitSegmentAvailabilityTimeoutMillis. Setting the config value to 1 millis + * and pausing coordination to confirm that the task will still succeed even if the job was not able to confirm the + * segments were loaded by the time the timeout occurs. + * + * @param partitionsSpec + * @throws Exception + */ + @Test(dataProvider = "resources") + public void testIndexDataAwaitSegmentAvailabilityFailsButTaskSucceeds(PartitionsSpec partitionsSpec) throws Exception + { + try ( + final Closeable ignored1 = unloader(INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix()); + ) { + coordinatorClient.postDynamicConfig(DYNAMIC_CONFIG_PAUSED); + boolean forceGuaranteedRollup = partitionsSpec.isForceGuaranteedRollupCompatible(); + Assert.assertFalse(forceGuaranteedRollup, "parititionSpec does not support best-effort rollup"); + + final Function rollupTransform = spec -> { + try { + spec = StringUtils.replace( + spec, + "%%FORCE_GUARANTEED_ROLLUP%%", + Boolean.toString(false) + ); + spec = StringUtils.replace( + spec, + "%%SEGMENT_AVAIL_TIMEOUT_MILLIS%%", + jsonMapper.writeValueAsString("1") + ); + return StringUtils.replace( + spec, + "%%PARTITIONS_SPEC%%", + jsonMapper.writeValueAsString(partitionsSpec) + ); + } + catch (JsonProcessingException e) { + throw new RuntimeException(e); + } + }; + + doIndexTest( + INDEX_DATASOURCE, + INDEX_TASK, + rollupTransform, + INDEX_QUERIES_RESOURCE, + false, + false, + false, + new Pair<>(true, false) + ); + coordinatorClient.postDynamicConfig(DYNAMIC_CONFIG_DEFAULT); + ITRetryUtil.retryUntilTrue( + () -> coordinator.areSegmentsLoaded(INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix()), "Segment Load" ); } } diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITCombiningFirehoseFactoryIndexTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITCombiningFirehoseFactoryIndexTest.java index 3a0583144bf5..bf123ac2f9fc 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITCombiningFirehoseFactoryIndexTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITCombiningFirehoseFactoryIndexTest.java @@ -19,6 +19,7 @@ package org.apache.druid.tests.indexer; +import org.apache.druid.java.util.common.Pair; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.testing.guice.DruidTestModuleFactory; import org.apache.druid.tests.TestNGGroup; @@ -65,7 +66,8 @@ public void testIndexData() throws Exception INDEX_QUERIES_RESOURCE, false, true, - true + true, + new Pair<>(false, false) ); doIndexTest( COMBINING_INDEX_DATASOURCE, @@ -74,7 +76,8 @@ public void testIndexData() throws Exception COMBINING_QUERIES_RESOURCE, false, true, - true + true, + new Pair<>(false, false) ); } } diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITCombiningInputSourceParallelIndexTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITCombiningInputSourceParallelIndexTest.java index dd46d9b127bd..30dd77b90c7b 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITCombiningInputSourceParallelIndexTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITCombiningInputSourceParallelIndexTest.java @@ -21,6 +21,7 @@ import com.google.common.collect.ImmutableMap; import org.apache.druid.indexer.partitions.DynamicPartitionsSpec; +import org.apache.druid.java.util.common.Pair; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.testing.guice.DruidTestModuleFactory; import org.apache.druid.tests.TestNGGroup; @@ -103,7 +104,8 @@ public void testIndexData() throws Exception INDEX_QUERIES_RESOURCE, false, true, - true + true, + new Pair<>(false, false) ); doIndexTest( COMBINING_INDEX_DATASOURCE, @@ -112,7 +114,8 @@ public void testIndexData() throws Exception COMBINING_QUERIES_RESOURCE, false, true, - true + true, + new Pair<>(false, false) ); } } diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITGcsToGcsParallelIndexTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITGcsToGcsParallelIndexTest.java index 75949d7bc095..927cb3ee2812 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITGcsToGcsParallelIndexTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITGcsToGcsParallelIndexTest.java @@ -45,6 +45,6 @@ public class ITGcsToGcsParallelIndexTest extends AbstractGcsInputSourceParallelI @Test(dataProvider = "resources") public void testGcsIndexData(Pair gcsInputSource) throws Exception { - doTest(gcsInputSource); + doTest(gcsInputSource, new Pair<>(false, false)); } } diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITGcsToHdfsParallelIndexTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITGcsToHdfsParallelIndexTest.java index ec2e09803264..2c9a42e1a6a2 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITGcsToHdfsParallelIndexTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITGcsToHdfsParallelIndexTest.java @@ -47,6 +47,6 @@ public class ITGcsToHdfsParallelIndexTest extends AbstractGcsInputSourceParallel @Test(dataProvider = "resources") public void testGcsIndexData(Pair gcsInputSource) throws Exception { - doTest(gcsInputSource); + doTest(gcsInputSource, new Pair<>(false, false)); } } diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITHdfsToAzureParallelIndexTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITHdfsToAzureParallelIndexTest.java index 0abd874d3e41..e0cee7f8a55a 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITHdfsToAzureParallelIndexTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITHdfsToAzureParallelIndexTest.java @@ -42,6 +42,6 @@ public class ITHdfsToAzureParallelIndexTest extends AbstractHdfsInputSourceParal @Test(dataProvider = "resources") public void testHdfsIndexData(Pair hdfsInputSource) throws Exception { - doTest(hdfsInputSource, InputFormatDetails.JSON); + doTest(hdfsInputSource, InputFormatDetails.JSON, new Pair<>(false, false)); } } diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITHdfsToGcsParallelIndexTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITHdfsToGcsParallelIndexTest.java index c30e1a545094..5e302a14f770 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITHdfsToGcsParallelIndexTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITHdfsToGcsParallelIndexTest.java @@ -43,6 +43,6 @@ public class ITHdfsToGcsParallelIndexTest extends AbstractHdfsInputSourceParalle @Test(dataProvider = "resources") public void testHdfsIndexData(Pair hdfsInputSource) throws Exception { - doTest(hdfsInputSource, InputFormatDetails.JSON); + doTest(hdfsInputSource, InputFormatDetails.JSON, new Pair<>(false, false)); } } diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITHdfsToHdfsParallelIndexTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITHdfsToHdfsParallelIndexTest.java index 71f227c8dbed..42073544d148 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITHdfsToHdfsParallelIndexTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITHdfsToHdfsParallelIndexTest.java @@ -41,18 +41,18 @@ public class ITHdfsToHdfsParallelIndexTest extends AbstractHdfsInputSourceParall @Test(dataProvider = "resources") public void testHdfsIndexJsonData(Pair hdfsInputSource) throws Exception { - doTest(hdfsInputSource, InputFormatDetails.JSON); + doTest(hdfsInputSource, InputFormatDetails.JSON, new Pair<>(false, false)); } @Test(dataProvider = "resources") public void testHdfsIndexOrcData(Pair hdfsInputSource) throws Exception { - doTest(hdfsInputSource, InputFormatDetails.ORC); + doTest(hdfsInputSource, InputFormatDetails.ORC, new Pair<>(false, false)); } @Test(dataProvider = "resources") public void testHdfsIndexParquetData(Pair hdfsInputSource) throws Exception { - doTest(hdfsInputSource, InputFormatDetails.PARQUET); + doTest(hdfsInputSource, InputFormatDetails.PARQUET, new Pair<>(false, false)); } } diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITHdfsToS3ParallelIndexTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITHdfsToS3ParallelIndexTest.java index 564c0c19f447..5e674fda106d 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITHdfsToS3ParallelIndexTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITHdfsToS3ParallelIndexTest.java @@ -42,6 +42,6 @@ public class ITHdfsToS3ParallelIndexTest extends AbstractHdfsInputSourceParallel @Test(dataProvider = "resources") public void testHdfsIndexData(Pair hdfsInputSource) throws Exception { - doTest(hdfsInputSource, InputFormatDetails.JSON); + doTest(hdfsInputSource, InputFormatDetails.JSON, new Pair<>(false, false)); } } diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITHttpInputSourceTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITHttpInputSourceTest.java index 69d1eeaaa94f..3d63ea2172ae 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITHttpInputSourceTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITHttpInputSourceTest.java @@ -19,6 +19,7 @@ package org.apache.druid.tests.indexer; +import org.apache.druid.java.util.common.Pair; import org.apache.druid.testing.guice.DruidTestModuleFactory; import org.apache.druid.tests.TestNGGroup; import org.testng.annotations.Guice; @@ -46,7 +47,8 @@ public void doTest() throws IOException INDEX_QUERIES_RESOURCE, false, true, - true + true, + new Pair<>(false, false) ); } } diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITIndexerTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITIndexerTest.java index a602d4a5aa05..05d0f7587cef 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITIndexerTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITIndexerTest.java @@ -19,12 +19,20 @@ package org.apache.druid.tests.indexer; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.google.inject.Inject; +import org.apache.druid.java.util.common.Pair; +import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.server.coordinator.CoordinatorDynamicConfig; +import org.apache.druid.testing.clients.CoordinatorResourceTestClient; import org.apache.druid.testing.guice.DruidTestModuleFactory; +import org.apache.druid.testing.utils.ITRetryUtil; import org.apache.druid.tests.TestNGGroup; import org.testng.annotations.Guice; import org.testng.annotations.Test; import java.io.Closeable; +import java.util.function.Function; @Test(groups = {TestNGGroup.BATCH_INDEX, TestNGGroup.QUICKSTART_COMPATIBLE}) @Guice(moduleFactory = DruidTestModuleFactory.class) @@ -54,6 +62,14 @@ public class ITIndexerTest extends AbstractITBatchIndexTest private static final String MERGE_REINDEX_QUERIES_RESOURCE = "/indexer/wikipedia_merge_index_queries.json"; private static final String MERGE_REINDEX_DATASOURCE = "wikipedia_merge_reindex_test"; + private static final CoordinatorDynamicConfig DYNAMIC_CONFIG_PAUSED = + CoordinatorDynamicConfig.builder().withPauseCoordination(true).build(); + private static final CoordinatorDynamicConfig DYNAMIC_CONFIG_DEFAULT = + CoordinatorDynamicConfig.builder().build(); + + @Inject + CoordinatorResourceTestClient coordinatorClient; + @Test public void testIndexData() throws Exception { @@ -64,25 +80,43 @@ public void testIndexData() throws Exception final Closeable ignored2 = unloader(reindexDatasource + config.getExtraDatasourceNameSuffix()); final Closeable ignored3 = unloader(reindexDatasourceWithDruidInputSource + config.getExtraDatasourceNameSuffix()) ) { + + final Function transform = spec -> { + try { + return StringUtils.replace( + spec, + "%%SEGMENT_AVAIL_TIMEOUT_MILLIS%%", + jsonMapper.writeValueAsString("0") + ); + } + catch (JsonProcessingException e) { + throw new RuntimeException(e); + } + }; + doIndexTest( INDEX_DATASOURCE, INDEX_TASK, + transform, INDEX_QUERIES_RESOURCE, false, true, - true + true, + new Pair<>(false, false) ); doReindexTest( INDEX_DATASOURCE, reindexDatasource, REINDEX_TASK, - REINDEX_QUERIES_RESOURCE + REINDEX_QUERIES_RESOURCE, + new Pair<>(false, false) ); doReindexTest( INDEX_DATASOURCE, reindexDatasourceWithDruidInputSource, REINDEX_TASK_WITH_DRUID_INPUT_SOURCE, - REINDEX_QUERIES_RESOURCE + REINDEX_QUERIES_RESOURCE, + new Pair<>(false, false) ); } } @@ -103,19 +137,22 @@ public void testReIndexDataWithTimestamp() throws Exception INDEX_WITH_TIMESTAMP_QUERIES_RESOURCE, false, true, - true + true, + new Pair<>(false, false) ); doReindexTest( INDEX_WITH_TIMESTAMP_DATASOURCE, reindexDatasource, REINDEX_TASK, - REINDEX_QUERIES_RESOURCE + REINDEX_QUERIES_RESOURCE, + new Pair<>(false, false) ); doReindexTest( INDEX_WITH_TIMESTAMP_DATASOURCE, reindexDatasourceWithDruidInputSource, REINDEX_TASK_WITH_DRUID_INPUT_SOURCE, - REINDEX_QUERIES_RESOURCE + REINDEX_QUERIES_RESOURCE, + new Pair<>(false, false) ); } } @@ -136,19 +173,102 @@ public void testMERGEIndexData() throws Exception MERGE_INDEX_QUERIES_RESOURCE, false, true, - true + true, + new Pair<>(false, false) ); doReindexTest( MERGE_INDEX_DATASOURCE, reindexDatasource, MERGE_REINDEX_TASK, - MERGE_REINDEX_QUERIES_RESOURCE + MERGE_REINDEX_QUERIES_RESOURCE, + new Pair<>(false, false) ); doReindexTest( MERGE_INDEX_DATASOURCE, reindexDatasourceWithDruidInputSource, MERGE_REINDEX_TASK_WITH_DRUID_INPUT_SOURCE, - MERGE_INDEX_QUERIES_RESOURCE + MERGE_INDEX_QUERIES_RESOURCE, + new Pair<>(false, false) + ); + } + } + + /** + * Test that task reports indicate the ingested segments were loaded before the configured timeout expired. + * + * @throws Exception + */ + @Test + public void testIndexDataAwaitSegmentAvailability() throws Exception + { + try ( + final Closeable ignored1 = unloader(INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix()); + ) { + final Function transform = spec -> { + try { + return StringUtils.replace( + spec, + "%%SEGMENT_AVAIL_TIMEOUT_MILLIS%%", + jsonMapper.writeValueAsString("600000") + ); + } + catch (JsonProcessingException e) { + throw new RuntimeException(e); + } + }; + + doIndexTest( + INDEX_DATASOURCE, + INDEX_TASK, + transform, + INDEX_QUERIES_RESOURCE, + false, + true, + true, + new Pair<>(true, true) + ); + } + } + + /** + * Test that the task still succeeds if the segments do not become available before the configured wait timeout + * expires. + * + * @throws Exception + */ + @Test + public void testIndexDataAwaitSegmentAvailabilityFailsButTaskSucceeds() throws Exception + { + try ( + final Closeable ignored1 = unloader(INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix()); + ) { + coordinatorClient.postDynamicConfig(DYNAMIC_CONFIG_PAUSED); + final Function transform = spec -> { + try { + return StringUtils.replace( + spec, + "%%SEGMENT_AVAIL_TIMEOUT_MILLIS%%", + jsonMapper.writeValueAsString("1") + ); + } + catch (JsonProcessingException e) { + throw new RuntimeException(e); + } + }; + + doIndexTest( + INDEX_DATASOURCE, + INDEX_TASK, + transform, + INDEX_QUERIES_RESOURCE, + false, + false, + false, + new Pair<>(true, false) + ); + coordinatorClient.postDynamicConfig(DYNAMIC_CONFIG_DEFAULT); + ITRetryUtil.retryUntilTrue( + () -> coordinator.areSegmentsLoaded(INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix()), "Segment Load" ); } } diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITLocalInputSourceAllInputFormatTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITLocalInputSourceAllInputFormatTest.java index a0c1014105a9..34fa4d908188 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITLocalInputSourceAllInputFormatTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITLocalInputSourceAllInputFormatTest.java @@ -21,6 +21,7 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; +import org.apache.druid.java.util.common.Pair; import org.apache.druid.testing.guice.DruidTestModuleFactory; import org.apache.druid.tests.TestNGGroup; import org.testng.annotations.Guice; @@ -58,42 +59,42 @@ public void testAvroInputFormatIndexDataIngestionSpecWithSchema() throws Excepti "type", "record", "name", "wikipedia", "fields", fieldList); - doIndexTest(InputFormatDetails.AVRO, ImmutableMap.of("schema", schema)); + doIndexTest(InputFormatDetails.AVRO, ImmutableMap.of("schema", schema), new Pair<>(false, false)); } @Test public void testAvroInputFormatIndexDataIngestionSpecWithoutSchema() throws Exception { - doIndexTest(InputFormatDetails.AVRO); + doIndexTest(InputFormatDetails.AVRO, new Pair<>(false, false)); } @Test public void testJsonInputFormatIndexDataIngestionSpecWithSchema() throws Exception { - doIndexTest(InputFormatDetails.JSON); + doIndexTest(InputFormatDetails.JSON, new Pair<>(false, false)); } @Test public void testTsvInputFormatIndexDataIngestionSpecWithSchema() throws Exception { - doIndexTest(InputFormatDetails.TSV, ImmutableMap.of("findColumnsFromHeader", true)); + doIndexTest(InputFormatDetails.TSV, ImmutableMap.of("findColumnsFromHeader", true), new Pair<>(false, false)); } @Test public void testParquetInputFormatIndexDataIngestionSpecWithSchema() throws Exception { - doIndexTest(InputFormatDetails.PARQUET); + doIndexTest(InputFormatDetails.PARQUET, new Pair<>(false, false)); } @Test public void testOrcInputFormatIndexDataIngestionSpecWithSchema() throws Exception { - doIndexTest(InputFormatDetails.ORC); + doIndexTest(InputFormatDetails.ORC, new Pair<>(false, false)); } @Test public void testCsvInputFormatIndexDataIngestionSpecWithSchema() throws Exception { - doIndexTest(InputFormatDetails.CSV, ImmutableMap.of("findColumnsFromHeader", true)); + doIndexTest(InputFormatDetails.CSV, ImmutableMap.of("findColumnsFromHeader", true), new Pair<>(false, false)); } } diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITOssToOssParallelIndexTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITOssToOssParallelIndexTest.java index ea989598b4bd..003b41452a57 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITOssToOssParallelIndexTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITOssToOssParallelIndexTest.java @@ -44,6 +44,6 @@ public class ITOssToOssParallelIndexTest extends AbstractOssInputSourceParallelI @Test(dataProvider = "resources") public void testAliyunOssIndexData(Pair ossInputSource) throws Exception { - doTest(ossInputSource); + doTest(ossInputSource, new Pair<>(false, false)); } } diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITPerfectRollupParallelIndexTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITPerfectRollupParallelIndexTest.java index fef886dda2f7..7eba2309db98 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITPerfectRollupParallelIndexTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITPerfectRollupParallelIndexTest.java @@ -23,6 +23,7 @@ import org.apache.druid.indexer.partitions.HashedPartitionsSpec; import org.apache.druid.indexer.partitions.PartitionsSpec; import org.apache.druid.indexer.partitions.SingleDimensionPartitionsSpec; +import org.apache.druid.java.util.common.Pair; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.testing.guice.DruidTestModuleFactory; import org.apache.druid.tests.TestNGGroup; @@ -95,7 +96,8 @@ public void testIndexData(PartitionsSpec partitionsSpec) throws Exception INDEX_QUERIES_RESOURCE, false, true, - true + true, + new Pair<>(false, false) ); doReindexTest( @@ -103,7 +105,9 @@ public void testIndexData(PartitionsSpec partitionsSpec) throws Exception INDEX_INGEST_SEGMENT_DATASOURCE, rollupTransform, INDEX_INGEST_SEGMENT_TASK, - INDEX_QUERIES_RESOURCE + INDEX_QUERIES_RESOURCE, + new Pair<>(false, false) + ); // with DruidInputSource instead of IngestSegmentFirehose @@ -112,7 +116,8 @@ public void testIndexData(PartitionsSpec partitionsSpec) throws Exception INDEX_DRUID_INPUT_SOURCE_DATASOURCE, rollupTransform, INDEX_DRUID_INPUT_SOURCE_TASK, - INDEX_QUERIES_RESOURCE + INDEX_QUERIES_RESOURCE, + new Pair<>(false, false) ); } } diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITS3OverrideCredentialsIndexTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITS3OverrideCredentialsIndexTest.java index ec949f17fed3..79912218ce9e 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITS3OverrideCredentialsIndexTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITS3OverrideCredentialsIndexTest.java @@ -23,6 +23,7 @@ import com.google.common.collect.ImmutableMap; import org.apache.druid.indexer.TaskState; import org.apache.druid.indexer.TaskStatusPlus; +import org.apache.druid.java.util.common.Pair; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.testing.guice.DruidTestModuleFactory; import org.apache.druid.tests.TestNGGroup; @@ -129,7 +130,8 @@ public void testS3WithValidOverrideCredentialsIndexDataShouldSucceed() throws Ex INDEX_QUERIES_RESOURCE, false, true, - true + true, + new Pair<>(false, false) ); } } diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITS3ToHdfsParallelIndexTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITS3ToHdfsParallelIndexTest.java index b57f7bc975e9..a5869447156e 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITS3ToHdfsParallelIndexTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITS3ToHdfsParallelIndexTest.java @@ -46,6 +46,6 @@ public class ITS3ToHdfsParallelIndexTest extends AbstractS3InputSourceParallelIn @Test(dataProvider = "resources") public void testS3IndexData(Pair s3InputSource) throws Exception { - doTest(s3InputSource); + doTest(s3InputSource, new Pair<>(false, false)); } } diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITS3ToS3ParallelIndexTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITS3ToS3ParallelIndexTest.java index e8f8b361fd64..0a85d38fe16f 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITS3ToS3ParallelIndexTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITS3ToS3ParallelIndexTest.java @@ -44,6 +44,6 @@ public class ITS3ToS3ParallelIndexTest extends AbstractS3InputSourceParallelInde @Test(dataProvider = "resources") public void testS3IndexData(Pair s3InputSource) throws Exception { - doTest(s3InputSource); + doTest(s3InputSource, new Pair<>(false, false)); } } diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITSqlInputSourceTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITSqlInputSourceTest.java index 92e9db809916..30c0aedd7688 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITSqlInputSourceTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITSqlInputSourceTest.java @@ -21,6 +21,7 @@ import com.google.common.collect.ImmutableList; import org.apache.druid.indexer.partitions.DynamicPartitionsSpec; +import org.apache.druid.java.util.common.Pair; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.testing.guice.DruidTestModuleFactory; import org.apache.druid.tests.TestNGGroup; @@ -95,7 +96,8 @@ public void testIndexData(List sqlQueries) throws Exception INDEX_QUERIES_RESOURCE, false, true, - true + true, + new Pair<>(false, false) ); } diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITTestCoordinatorPausedTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITTestCoordinatorPausedTest.java index 269c74d4cd19..b38a0c8b6213 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITTestCoordinatorPausedTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITTestCoordinatorPausedTest.java @@ -20,6 +20,7 @@ package org.apache.druid.tests.indexer; import com.google.inject.Inject; +import org.apache.druid.java.util.common.Pair; import org.apache.druid.java.util.common.logger.Logger; import org.apache.druid.server.coordinator.CoordinatorDynamicConfig; import org.apache.druid.testing.clients.CoordinatorResourceTestClient; @@ -59,7 +60,8 @@ public void testCoordinatorPause() throws Exception INDEX_QUERIES_RESOURCE, false, false, - false + false, + new Pair<>(false, false) ); TimeUnit.MINUTES.sleep(3); if (coordinatorClient.areSegmentsLoaded(INDEX_DATASOURCE)) { diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITTransformTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITTransformTest.java index 4a5116402873..e98623dcd899 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITTransformTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITTransformTest.java @@ -19,6 +19,7 @@ package org.apache.druid.tests.indexer; +import org.apache.druid.java.util.common.Pair; import org.apache.druid.testing.guice.DruidTestModuleFactory; import org.apache.druid.tests.TestNGGroup; import org.testng.annotations.Guice; @@ -56,13 +57,15 @@ public void testIndexAndReIndexWithTransformSpec() throws IOException INDEX_QUERIES_RESOURCE, false, true, - true + true, + new Pair<>(false, false) ); doReindexTest( INDEX_DATASOURCE, reindexDatasourceWithDruidInputSource, REINDEX_TASK_WITH_DRUID_INPUT_SOURCE, - REINDEX_QUERIES_RESOURCE + REINDEX_QUERIES_RESOURCE, + new Pair<>(false, false) ); } } @@ -83,13 +86,15 @@ public void testIndexAndReIndexUsingIngestSegmentWithTransforms() throws IOExcep INDEX_QUERIES_RESOURCE, false, true, - true + true, + new Pair<>(false, false) ); doReindexTest( INDEX_DATASOURCE, reindexDatasource, REINDEX_TASK, - REINDEX_QUERIES_RESOURCE + REINDEX_QUERIES_RESOURCE, + new Pair<>(false, false) ); } } @@ -108,7 +113,8 @@ public void testIndexWithFirehoseAndTransforms() throws IOException INDEX_QUERIES_RESOURCE, false, true, - true + true, + new Pair<>(false, false) ); } } diff --git a/integration-tests/src/test/resources/hadoop/wikipedia_hadoop_index_task.json b/integration-tests/src/test/resources/hadoop/wikipedia_hadoop_index_task.json index d30214b93141..76600f57cf87 100644 --- a/integration-tests/src/test/resources/hadoop/wikipedia_hadoop_index_task.json +++ b/integration-tests/src/test/resources/hadoop/wikipedia_hadoop_index_task.json @@ -80,6 +80,7 @@ "tuningConfig": { "type": "hadoop", "partitionsSpec": %%PARTITIONS_SPEC%%, + "awaitSegmentAvailabilityTimeoutMillis": %%SEGMENT_AVAIL_TIMEOUT_MILLIS%%, "jobProperties": { "fs.permissions.umask-mode": "022", "fs.default.name" : "hdfs://druid-it-hadoop:9000", diff --git a/integration-tests/src/test/resources/hadoop/wikipedia_hadoop_reindex_task.json b/integration-tests/src/test/resources/hadoop/wikipedia_hadoop_reindex_task.json index cf44540a6b49..66256b3d3184 100644 --- a/integration-tests/src/test/resources/hadoop/wikipedia_hadoop_reindex_task.json +++ b/integration-tests/src/test/resources/hadoop/wikipedia_hadoop_reindex_task.json @@ -56,6 +56,7 @@ "targetPartitionSize": 75000, "type": "hashed" }, + "awaitSegmentAvailabilityTimeoutMillis": %%SEGMENT_AVAIL_TIMEOUT_MILLIS%%, "jobProperties": { "fs.permissions.umask-mode": "022", "fs.default.name" : "hdfs://druid-it-hadoop:9000", diff --git a/integration-tests/src/test/resources/indexer/wikipedia_index_task.json b/integration-tests/src/test/resources/indexer/wikipedia_index_task.json index 1364567fbff2..1e6952f56f61 100644 --- a/integration-tests/src/test/resources/indexer/wikipedia_index_task.json +++ b/integration-tests/src/test/resources/indexer/wikipedia_index_task.json @@ -79,7 +79,8 @@ }, "tuningConfig": { "type": "index", - "maxRowsPerSegment": 3 + "maxRowsPerSegment": 3, + "awaitSegmentAvailabilityTimeoutMillis": %%SEGMENT_AVAIL_TIMEOUT_MILLIS%% } } } \ No newline at end of file diff --git a/integration-tests/src/test/resources/indexer/wikipedia_parallel_index_task.json b/integration-tests/src/test/resources/indexer/wikipedia_parallel_index_task.json index 4781d39b0249..e83b1109da02 100644 --- a/integration-tests/src/test/resources/indexer/wikipedia_parallel_index_task.json +++ b/integration-tests/src/test/resources/indexer/wikipedia_parallel_index_task.json @@ -70,7 +70,8 @@ "type": "maxSize", "maxSplitSize": 1 }, - "partitionsSpec": %%PARTITIONS_SPEC%% + "partitionsSpec": %%PARTITIONS_SPEC%%, + "awaitSegmentAvailabilityTimeoutMillis": %%SEGMENT_AVAIL_TIMEOUT_MILLIS%% } } } \ No newline at end of file diff --git a/server/src/main/java/org/apache/druid/segment/realtime/plumber/CoordinatorBasedSegmentHandoffNotifier.java b/server/src/main/java/org/apache/druid/segment/handoff/CoordinatorBasedSegmentHandoffNotifier.java similarity index 99% rename from server/src/main/java/org/apache/druid/segment/realtime/plumber/CoordinatorBasedSegmentHandoffNotifier.java rename to server/src/main/java/org/apache/druid/segment/handoff/CoordinatorBasedSegmentHandoffNotifier.java index 2e97258a52da..c53ad19548fb 100644 --- a/server/src/main/java/org/apache/druid/segment/realtime/plumber/CoordinatorBasedSegmentHandoffNotifier.java +++ b/server/src/main/java/org/apache/druid/segment/handoff/CoordinatorBasedSegmentHandoffNotifier.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.druid.segment.realtime.plumber; +package org.apache.druid.segment.handoff; import org.apache.druid.client.ImmutableSegmentLoadInfo; import org.apache.druid.client.coordinator.CoordinatorClient; diff --git a/server/src/main/java/org/apache/druid/segment/realtime/plumber/CoordinatorBasedSegmentHandoffNotifierConfig.java b/server/src/main/java/org/apache/druid/segment/handoff/CoordinatorBasedSegmentHandoffNotifierConfig.java similarity index 95% rename from server/src/main/java/org/apache/druid/segment/realtime/plumber/CoordinatorBasedSegmentHandoffNotifierConfig.java rename to server/src/main/java/org/apache/druid/segment/handoff/CoordinatorBasedSegmentHandoffNotifierConfig.java index 285a4929d0d3..2ee769b38570 100644 --- a/server/src/main/java/org/apache/druid/segment/realtime/plumber/CoordinatorBasedSegmentHandoffNotifierConfig.java +++ b/server/src/main/java/org/apache/druid/segment/handoff/CoordinatorBasedSegmentHandoffNotifierConfig.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.druid.segment.realtime.plumber; +package org.apache.druid.segment.handoff; import com.fasterxml.jackson.annotation.JsonProperty; import org.joda.time.Duration; diff --git a/server/src/main/java/org/apache/druid/segment/realtime/plumber/CoordinatorBasedSegmentHandoffNotifierFactory.java b/server/src/main/java/org/apache/druid/segment/handoff/CoordinatorBasedSegmentHandoffNotifierFactory.java similarity index 96% rename from server/src/main/java/org/apache/druid/segment/realtime/plumber/CoordinatorBasedSegmentHandoffNotifierFactory.java rename to server/src/main/java/org/apache/druid/segment/handoff/CoordinatorBasedSegmentHandoffNotifierFactory.java index d92bea767526..aecb8e8f35be 100644 --- a/server/src/main/java/org/apache/druid/segment/realtime/plumber/CoordinatorBasedSegmentHandoffNotifierFactory.java +++ b/server/src/main/java/org/apache/druid/segment/handoff/CoordinatorBasedSegmentHandoffNotifierFactory.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.druid.segment.realtime.plumber; +package org.apache.druid.segment.handoff; import com.google.inject.Inject; import org.apache.druid.client.coordinator.CoordinatorClient; diff --git a/server/src/main/java/org/apache/druid/segment/realtime/plumber/SegmentHandoffNotifier.java b/server/src/main/java/org/apache/druid/segment/handoff/SegmentHandoffNotifier.java similarity index 97% rename from server/src/main/java/org/apache/druid/segment/realtime/plumber/SegmentHandoffNotifier.java rename to server/src/main/java/org/apache/druid/segment/handoff/SegmentHandoffNotifier.java index 9c77b1f07fcc..022e205727d3 100644 --- a/server/src/main/java/org/apache/druid/segment/realtime/plumber/SegmentHandoffNotifier.java +++ b/server/src/main/java/org/apache/druid/segment/handoff/SegmentHandoffNotifier.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.druid.segment.realtime.plumber; +package org.apache.druid.segment.handoff; import org.apache.druid.query.SegmentDescriptor; diff --git a/server/src/main/java/org/apache/druid/segment/realtime/plumber/SegmentHandoffNotifierFactory.java b/server/src/main/java/org/apache/druid/segment/handoff/SegmentHandoffNotifierFactory.java similarity index 94% rename from server/src/main/java/org/apache/druid/segment/realtime/plumber/SegmentHandoffNotifierFactory.java rename to server/src/main/java/org/apache/druid/segment/handoff/SegmentHandoffNotifierFactory.java index 96aeb5fcd3a9..12cfde8d9084 100644 --- a/server/src/main/java/org/apache/druid/segment/realtime/plumber/SegmentHandoffNotifierFactory.java +++ b/server/src/main/java/org/apache/druid/segment/handoff/SegmentHandoffNotifierFactory.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.druid.segment.realtime.plumber; +package org.apache.druid.segment.handoff; public interface SegmentHandoffNotifierFactory diff --git a/server/src/main/java/org/apache/druid/segment/indexing/TuningConfig.java b/server/src/main/java/org/apache/druid/segment/indexing/TuningConfig.java index 760494e8b703..6eb06ff486cd 100644 --- a/server/src/main/java/org/apache/druid/segment/indexing/TuningConfig.java +++ b/server/src/main/java/org/apache/druid/segment/indexing/TuningConfig.java @@ -39,6 +39,7 @@ public interface TuningConfig int DEFAULT_MAX_PARSE_EXCEPTIONS = Integer.MAX_VALUE; int DEFAULT_MAX_SAVED_PARSE_EXCEPTIONS = 0; int DEFAULT_MAX_ROWS_IN_MEMORY = 1_000_000; + int DEFAULT_AWAIT_SEGMENT_AVAILABILITY_TIMEOUT_MILLIS = 0; /** * The incremental index implementation to use diff --git a/server/src/main/java/org/apache/druid/segment/realtime/appenderator/AppenderatorPlumber.java b/server/src/main/java/org/apache/druid/segment/realtime/appenderator/AppenderatorPlumber.java index 2aa5d43a2eaa..bea2890fcf0f 100644 --- a/server/src/main/java/org/apache/druid/segment/realtime/appenderator/AppenderatorPlumber.java +++ b/server/src/main/java/org/apache/druid/segment/realtime/appenderator/AppenderatorPlumber.java @@ -43,6 +43,7 @@ import org.apache.druid.query.QueryPlus; import org.apache.druid.query.QueryRunner; import org.apache.druid.query.context.ResponseContext; +import org.apache.druid.segment.handoff.SegmentHandoffNotifier; import org.apache.druid.segment.incremental.IncrementalIndexAddResult; import org.apache.druid.segment.incremental.IndexSizeExceededException; import org.apache.druid.segment.indexing.DataSchema; @@ -52,7 +53,6 @@ import org.apache.druid.segment.realtime.plumber.Committers; import org.apache.druid.segment.realtime.plumber.Plumber; import org.apache.druid.segment.realtime.plumber.RejectionPolicy; -import org.apache.druid.segment.realtime.plumber.SegmentHandoffNotifier; import org.apache.druid.segment.realtime.plumber.VersioningPolicy; import org.apache.druid.server.coordination.DataSegmentAnnouncer; import org.apache.druid.timeline.DataSegment; diff --git a/server/src/main/java/org/apache/druid/segment/realtime/appenderator/AppenderatorPlumberSchool.java b/server/src/main/java/org/apache/druid/segment/realtime/appenderator/AppenderatorPlumberSchool.java index 99adff0a1ba4..8d188111f64e 100644 --- a/server/src/main/java/org/apache/druid/segment/realtime/appenderator/AppenderatorPlumberSchool.java +++ b/server/src/main/java/org/apache/druid/segment/realtime/appenderator/AppenderatorPlumberSchool.java @@ -22,13 +22,13 @@ import com.fasterxml.jackson.annotation.JacksonInject; import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; +import org.apache.druid.segment.handoff.SegmentHandoffNotifierFactory; import org.apache.druid.segment.indexing.DataSchema; import org.apache.druid.segment.indexing.RealtimeTuningConfig; import org.apache.druid.segment.realtime.FireDepartmentMetrics; import org.apache.druid.segment.realtime.SegmentPublisher; import org.apache.druid.segment.realtime.plumber.Plumber; import org.apache.druid.segment.realtime.plumber.PlumberSchool; -import org.apache.druid.segment.realtime.plumber.SegmentHandoffNotifierFactory; import org.apache.druid.server.coordination.DataSegmentAnnouncer; public class AppenderatorPlumberSchool implements PlumberSchool diff --git a/server/src/main/java/org/apache/druid/segment/realtime/appenderator/StreamAppenderatorDriver.java b/server/src/main/java/org/apache/druid/segment/realtime/appenderator/StreamAppenderatorDriver.java index ea466f6de109..5847947ff57d 100644 --- a/server/src/main/java/org/apache/druid/segment/realtime/appenderator/StreamAppenderatorDriver.java +++ b/server/src/main/java/org/apache/druid/segment/realtime/appenderator/StreamAppenderatorDriver.java @@ -36,11 +36,11 @@ import org.apache.druid.java.util.common.guava.Comparators; import org.apache.druid.java.util.common.logger.Logger; import org.apache.druid.query.SegmentDescriptor; +import org.apache.druid.segment.handoff.SegmentHandoffNotifier; +import org.apache.druid.segment.handoff.SegmentHandoffNotifierFactory; import org.apache.druid.segment.loading.DataSegmentKiller; import org.apache.druid.segment.realtime.FireDepartmentMetrics; import org.apache.druid.segment.realtime.appenderator.SegmentWithState.SegmentState; -import org.apache.druid.segment.realtime.plumber.SegmentHandoffNotifier; -import org.apache.druid.segment.realtime.plumber.SegmentHandoffNotifierFactory; import org.apache.druid.timeline.DataSegment; import javax.annotation.Nullable; diff --git a/server/src/main/java/org/apache/druid/segment/realtime/plumber/NoopSegmentHandoffNotifierFactory.java b/server/src/main/java/org/apache/druid/segment/realtime/plumber/NoopSegmentHandoffNotifierFactory.java index 536c1aef7188..9a51143d64b2 100644 --- a/server/src/main/java/org/apache/druid/segment/realtime/plumber/NoopSegmentHandoffNotifierFactory.java +++ b/server/src/main/java/org/apache/druid/segment/realtime/plumber/NoopSegmentHandoffNotifierFactory.java @@ -21,6 +21,8 @@ import org.apache.druid.java.util.common.logger.Logger; import org.apache.druid.query.SegmentDescriptor; +import org.apache.druid.segment.handoff.SegmentHandoffNotifier; +import org.apache.druid.segment.handoff.SegmentHandoffNotifierFactory; import java.util.concurrent.Executor; diff --git a/server/src/main/java/org/apache/druid/segment/realtime/plumber/RealtimePlumber.java b/server/src/main/java/org/apache/druid/segment/realtime/plumber/RealtimePlumber.java index 2aec7587a1bc..1ed4598a6d0e 100644 --- a/server/src/main/java/org/apache/druid/segment/realtime/plumber/RealtimePlumber.java +++ b/server/src/main/java/org/apache/druid/segment/realtime/plumber/RealtimePlumber.java @@ -57,6 +57,7 @@ import org.apache.druid.segment.QueryableIndex; import org.apache.druid.segment.QueryableIndexSegment; import org.apache.druid.segment.ReferenceCountingSegment; +import org.apache.druid.segment.handoff.SegmentHandoffNotifier; import org.apache.druid.segment.incremental.IncrementalIndexAddResult; import org.apache.druid.segment.incremental.IndexSizeExceededException; import org.apache.druid.segment.indexing.DataSchema; diff --git a/server/src/main/java/org/apache/druid/segment/realtime/plumber/RealtimePlumberSchool.java b/server/src/main/java/org/apache/druid/segment/realtime/plumber/RealtimePlumberSchool.java index f4d5c43f17af..3f808f9817b7 100644 --- a/server/src/main/java/org/apache/druid/segment/realtime/plumber/RealtimePlumberSchool.java +++ b/server/src/main/java/org/apache/druid/segment/realtime/plumber/RealtimePlumberSchool.java @@ -31,6 +31,7 @@ import org.apache.druid.query.QueryRunnerFactoryConglomerate; import org.apache.druid.segment.IndexIO; import org.apache.druid.segment.IndexMergerV9; +import org.apache.druid.segment.handoff.SegmentHandoffNotifierFactory; import org.apache.druid.segment.indexing.DataSchema; import org.apache.druid.segment.indexing.RealtimeTuningConfig; import org.apache.druid.segment.join.JoinableFactory; diff --git a/server/src/test/java/org/apache/druid/segment/realtime/plumber/CoordinatorBasedSegmentHandoffNotifierTest.java b/server/src/test/java/org/apache/druid/segment/handoff/CoordinatorBasedSegmentHandoffNotifierTest.java similarity index 99% rename from server/src/test/java/org/apache/druid/segment/realtime/plumber/CoordinatorBasedSegmentHandoffNotifierTest.java rename to server/src/test/java/org/apache/druid/segment/handoff/CoordinatorBasedSegmentHandoffNotifierTest.java index f5534bd46a3b..31933aa4a824 100644 --- a/server/src/test/java/org/apache/druid/segment/realtime/plumber/CoordinatorBasedSegmentHandoffNotifierTest.java +++ b/server/src/test/java/org/apache/druid/segment/handoff/CoordinatorBasedSegmentHandoffNotifierTest.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.druid.segment.realtime.plumber; +package org.apache.druid.segment.handoff; import com.google.common.collect.Sets; import org.apache.druid.client.ImmutableSegmentLoadInfo; diff --git a/server/src/test/java/org/apache/druid/segment/realtime/appenderator/AppenderatorPlumberTest.java b/server/src/test/java/org/apache/druid/segment/realtime/appenderator/AppenderatorPlumberTest.java index 87df0e8d532c..4bd432b96bfd 100644 --- a/server/src/test/java/org/apache/druid/segment/realtime/appenderator/AppenderatorPlumberTest.java +++ b/server/src/test/java/org/apache/druid/segment/realtime/appenderator/AppenderatorPlumberTest.java @@ -20,12 +20,12 @@ package org.apache.druid.segment.realtime.appenderator; import org.apache.druid.data.input.InputRow; +import org.apache.druid.segment.handoff.SegmentHandoffNotifier; +import org.apache.druid.segment.handoff.SegmentHandoffNotifierFactory; import org.apache.druid.segment.indexing.RealtimeTuningConfig; import org.apache.druid.segment.realtime.SegmentPublisher; import org.apache.druid.segment.realtime.plumber.IntervalStartVersioningPolicy; import org.apache.druid.segment.realtime.plumber.NoopRejectionPolicyFactory; -import org.apache.druid.segment.realtime.plumber.SegmentHandoffNotifier; -import org.apache.druid.segment.realtime.plumber.SegmentHandoffNotifierFactory; import org.apache.druid.server.coordination.DataSegmentAnnouncer; import org.easymock.EasyMock; import org.junit.Assert; diff --git a/server/src/test/java/org/apache/druid/segment/realtime/appenderator/StreamAppenderatorDriverTest.java b/server/src/test/java/org/apache/druid/segment/realtime/appenderator/StreamAppenderatorDriverTest.java index 87fc54b42a3a..30917746f678 100644 --- a/server/src/test/java/org/apache/druid/segment/realtime/appenderator/StreamAppenderatorDriverTest.java +++ b/server/src/test/java/org/apache/druid/segment/realtime/appenderator/StreamAppenderatorDriverTest.java @@ -38,10 +38,10 @@ import org.apache.druid.java.util.common.granularity.Granularities; import org.apache.druid.java.util.common.granularity.Granularity; import org.apache.druid.query.SegmentDescriptor; +import org.apache.druid.segment.handoff.SegmentHandoffNotifier; +import org.apache.druid.segment.handoff.SegmentHandoffNotifierFactory; import org.apache.druid.segment.loading.DataSegmentKiller; import org.apache.druid.segment.realtime.FireDepartmentMetrics; -import org.apache.druid.segment.realtime.plumber.SegmentHandoffNotifier; -import org.apache.druid.segment.realtime.plumber.SegmentHandoffNotifierFactory; import org.apache.druid.timeline.DataSegment; import org.apache.druid.timeline.partition.NumberedShardSpec; import org.easymock.EasyMock; diff --git a/server/src/test/java/org/apache/druid/segment/realtime/plumber/RealtimePlumberSchoolTest.java b/server/src/test/java/org/apache/druid/segment/realtime/plumber/RealtimePlumberSchoolTest.java index 9b73cbe12cd3..87ae270264f1 100644 --- a/server/src/test/java/org/apache/druid/segment/realtime/plumber/RealtimePlumberSchoolTest.java +++ b/server/src/test/java/org/apache/druid/segment/realtime/plumber/RealtimePlumberSchoolTest.java @@ -46,6 +46,8 @@ import org.apache.druid.segment.QueryableIndex; import org.apache.druid.segment.ReferenceCountingSegment; import org.apache.druid.segment.TestHelper; +import org.apache.druid.segment.handoff.SegmentHandoffNotifier; +import org.apache.druid.segment.handoff.SegmentHandoffNotifierFactory; import org.apache.druid.segment.indexing.DataSchema; import org.apache.druid.segment.indexing.RealtimeTuningConfig; import org.apache.druid.segment.indexing.granularity.UniformGranularitySpec; diff --git a/services/src/main/java/org/apache/druid/cli/CliPeon.java b/services/src/main/java/org/apache/druid/cli/CliPeon.java index 6f7cacda8b34..ea3bcf2fab10 100644 --- a/services/src/main/java/org/apache/druid/cli/CliPeon.java +++ b/services/src/main/java/org/apache/druid/cli/CliPeon.java @@ -93,6 +93,9 @@ import org.apache.druid.metadata.input.InputSourceModule; import org.apache.druid.query.QuerySegmentWalker; import org.apache.druid.query.lookup.LookupModule; +import org.apache.druid.segment.handoff.CoordinatorBasedSegmentHandoffNotifierConfig; +import org.apache.druid.segment.handoff.CoordinatorBasedSegmentHandoffNotifierFactory; +import org.apache.druid.segment.handoff.SegmentHandoffNotifierFactory; import org.apache.druid.segment.incremental.RowIngestionMetersFactory; import org.apache.druid.segment.loading.DataSegmentArchiver; import org.apache.druid.segment.loading.DataSegmentKiller; @@ -105,9 +108,6 @@ import org.apache.druid.segment.realtime.firehose.ChatHandlerProvider; import org.apache.druid.segment.realtime.firehose.NoopChatHandlerProvider; import org.apache.druid.segment.realtime.firehose.ServiceAnnouncingChatHandlerProvider; -import org.apache.druid.segment.realtime.plumber.CoordinatorBasedSegmentHandoffNotifierConfig; -import org.apache.druid.segment.realtime.plumber.CoordinatorBasedSegmentHandoffNotifierFactory; -import org.apache.druid.segment.realtime.plumber.SegmentHandoffNotifierFactory; import org.apache.druid.server.DruidNode; import org.apache.druid.server.ResponseContextConfig; import org.apache.druid.server.SegmentManager; diff --git a/website/.spelling b/website/.spelling index a1e5dfdf3956..26d859efab49 100644 --- a/website/.spelling +++ b/website/.spelling @@ -1038,6 +1038,7 @@ timeChunk totalNumMergeTasks StaticS3Firehose prefetchTriggerBytes +awaitSegmentAvailabilityTimeoutMillis - ../docs/ingestion/schema-design.md product_category product_id From 3d4de8ca8ddee96357c48955f74fb95d4d3d2cec Mon Sep 17 00:00:00 2001 From: "Lucas.Capistrant" Date: Fri, 11 Dec 2020 15:14:24 -0600 Subject: [PATCH 02/28] IT updates --- .../apache/druid/testing/utils/ITRetryUtil.java | 2 +- .../druid/tests/hadoop/ITHadoopIndexTest.java | 15 +++------------ .../hadoop/wikipedia_hadoop_reindex_task.json | 1 - 3 files changed, 4 insertions(+), 14 deletions(-) diff --git a/integration-tests/src/main/java/org/apache/druid/testing/utils/ITRetryUtil.java b/integration-tests/src/main/java/org/apache/druid/testing/utils/ITRetryUtil.java index 4be7f4f3be81..0b8ac2891b43 100644 --- a/integration-tests/src/main/java/org/apache/druid/testing/utils/ITRetryUtil.java +++ b/integration-tests/src/main/java/org/apache/druid/testing/utils/ITRetryUtil.java @@ -30,7 +30,7 @@ public class ITRetryUtil private static final Logger LOG = new Logger(ITRetryUtil.class); - public static final int DEFAULT_RETRY_COUNT = 150; // 5 minutes + public static final int DEFAULT_RETRY_COUNT = 300; // 10 minutes. legacy batch hadoop takes long time on local mode. public static final long DEFAULT_RETRY_SLEEP = TimeUnit.SECONDS.toMillis(2); diff --git a/integration-tests/src/test/java/org/apache/druid/tests/hadoop/ITHadoopIndexTest.java b/integration-tests/src/test/java/org/apache/druid/tests/hadoop/ITHadoopIndexTest.java index f3b6d0d69e2e..a61fae4e0e73 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/hadoop/ITHadoopIndexTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/hadoop/ITHadoopIndexTest.java @@ -196,14 +196,12 @@ public void testIndexData(DimensionBasedPartitionsSpec partitionsSpec) throws Ex * * @throws Exception */ + @Test public void testIndexDataAwaitSegmentAvailability() throws Exception { String indexDatasource = INDEX_DATASOURCE + "_" + UUID.randomUUID(); - String reindexDatasource = REINDEX_DATASOURCE + "_" + UUID.randomUUID(); - try ( final Closeable ignored1 = unloader(indexDatasource + config.getExtraDatasourceNameSuffix()); - final Closeable ignored2 = unloader(reindexDatasource + config.getExtraDatasourceNameSuffix()); ) { final Function specPathsTransform = spec -> { try { @@ -243,14 +241,6 @@ public void testIndexDataAwaitSegmentAvailability() throws Exception true, new Pair<>(true, true) ); - - doReindexTest( - indexDatasource, - reindexDatasource, - REINDEX_TASK, - REINDEX_QUERIES_RESOURCE, - new Pair<>(true, true) - ); } } @@ -261,6 +251,7 @@ public void testIndexDataAwaitSegmentAvailability() throws Exception * * @throws Exception */ + @Test public void testIndexDataAwaitSegmentAvailabilityFailsButTaskSucceeds() throws Exception { String indexDatasource = INDEX_DATASOURCE + "_" + UUID.randomUUID(); @@ -309,7 +300,7 @@ public void testIndexDataAwaitSegmentAvailabilityFailsButTaskSucceeds() throws E ); coordinatorClient.postDynamicConfig(DYNAMIC_CONFIG_DEFAULT); ITRetryUtil.retryUntilTrue( - () -> coordinator.areSegmentsLoaded(INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix()), "Segment Load" + () -> coordinatorClient.areSegmentsLoaded(indexDatasource + config.getExtraDatasourceNameSuffix()), "Segment Load For: " + indexDatasource + config.getExtraDatasourceNameSuffix() ); } } diff --git a/integration-tests/src/test/resources/hadoop/wikipedia_hadoop_reindex_task.json b/integration-tests/src/test/resources/hadoop/wikipedia_hadoop_reindex_task.json index 66256b3d3184..cf44540a6b49 100644 --- a/integration-tests/src/test/resources/hadoop/wikipedia_hadoop_reindex_task.json +++ b/integration-tests/src/test/resources/hadoop/wikipedia_hadoop_reindex_task.json @@ -56,7 +56,6 @@ "targetPartitionSize": 75000, "type": "hashed" }, - "awaitSegmentAvailabilityTimeoutMillis": %%SEGMENT_AVAIL_TIMEOUT_MILLIS%%, "jobProperties": { "fs.permissions.umask-mode": "022", "fs.default.name" : "hdfs://druid-it-hadoop:9000", From 027938ea80e385136b32c405f8046b9cef675fc1 Mon Sep 17 00:00:00 2001 From: "Lucas.Capistrant" Date: Fri, 11 Dec 2020 17:39:57 -0600 Subject: [PATCH 03/28] fix queries in legacy hadoop IT --- .../src/test/resources/hadoop/batch_hadoop_queries.json | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/integration-tests/src/test/resources/hadoop/batch_hadoop_queries.json b/integration-tests/src/test/resources/hadoop/batch_hadoop_queries.json index d59e7ae0dc2a..2a390b2ec1f7 100644 --- a/integration-tests/src/test/resources/hadoop/batch_hadoop_queries.json +++ b/integration-tests/src/test/resources/hadoop/batch_hadoop_queries.json @@ -18,6 +18,7 @@ "type": "STRING", "size": 0, "hasMultipleValues": false, + "hasNulls": false, "minValue": "location_1", "maxValue": "location_5", "cardinality": 5, @@ -27,6 +28,7 @@ "type": "thetaSketch", "size": 0, "hasMultipleValues": false, + "hasNulls": true, "minValue": null, "maxValue": null, "cardinality": null, @@ -36,6 +38,7 @@ "type": "thetaSketch", "size": 0, "hasMultipleValues": false, + "hasNulls": true, "minValue": null, "maxValue": null, "cardinality": null, @@ -45,6 +48,7 @@ "type": "LONG", "size": 0, "hasMultipleValues": false, + "hasNulls": false, "minValue": null, "maxValue": null, "cardinality": null, @@ -54,6 +58,7 @@ "type": "STRING", "size": 0, "hasMultipleValues": false, + "hasNulls": false, "minValue": "product_1", "maxValue": "product_9", "cardinality": 15, From 8b9d26d07527b8b2b6b1a8ed09dc0ac36e353797 Mon Sep 17 00:00:00 2001 From: "Lucas.Capistrant" Date: Mon, 14 Dec 2020 17:09:39 -0600 Subject: [PATCH 04/28] Fix broken indexing integration tests --- .../duty/ITAutoCompactionTest.java | 5 ++++ .../indexer/AbstractITBatchIndexTest.java | 24 +++++++++++++++---- .../ITCombiningFirehoseFactoryIndexTest.java | 15 ++++++++++++ .../tests/indexer/ITCompactionTaskTest.java | 5 ++++ .../ITPerfectRollupParallelIndexTest.java | 5 ++++ .../ITSystemTableBatchIndexTaskTest.java | 20 +++++++++++++++- .../indexer/ITTestCoordinatorPausedTest.java | 18 ++++++++++++++ 7 files changed, 87 insertions(+), 5 deletions(-) diff --git a/integration-tests/src/test/java/org/apache/druid/tests/coordinator/duty/ITAutoCompactionTest.java b/integration-tests/src/test/java/org/apache/druid/tests/coordinator/duty/ITAutoCompactionTest.java index 5d1d55ba7b63..9e83b13ca466 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/coordinator/duty/ITAutoCompactionTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/coordinator/duty/ITAutoCompactionTest.java @@ -282,6 +282,11 @@ private void loadData(String indexTask) throws Exception { String taskSpec = getResourceAsString(indexTask); taskSpec = StringUtils.replace(taskSpec, "%%DATASOURCE%%", fullDatasourceName); + taskSpec = StringUtils.replace( + taskSpec, + "%%SEGMENT_AVAIL_TIMEOUT_MILLIS%%", + jsonMapper.writeValueAsString("0") + ); final String taskID = indexer.submitTask(taskSpec); LOG.info("TaskID for loading index task %s", taskID); indexer.waitUntilTaskCompletes(taskID); diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractITBatchIndexTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractITBatchIndexTest.java index 76d654df9796..d7fea8e30bea 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractITBatchIndexTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractITBatchIndexTest.java @@ -267,12 +267,28 @@ void doIndexTestSqlTest( String indexTaskFilePath, String queryFilePath ) throws IOException + { + doIndexTestSqlTest( + dataSource, + indexTaskFilePath, + queryFilePath, + Function.identity() + ); + } + void doIndexTestSqlTest( + String dataSource, + String indexTaskFilePath, + String queryFilePath, + Function taskSpecTransform + ) throws IOException { final String fullDatasourceName = dataSource + config.getExtraDatasourceNameSuffix(); - final String taskSpec = StringUtils.replace( - getResourceAsString(indexTaskFilePath), - "%%DATASOURCE%%", - fullDatasourceName + final String taskSpec = taskSpecTransform.apply( + StringUtils.replace( + getResourceAsString(indexTaskFilePath), + "%%DATASOURCE%%", + fullDatasourceName + ) ); Pair dummyPair = new Pair<>(false, false); diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITCombiningFirehoseFactoryIndexTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITCombiningFirehoseFactoryIndexTest.java index bf123ac2f9fc..c1350f550684 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITCombiningFirehoseFactoryIndexTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITCombiningFirehoseFactoryIndexTest.java @@ -19,6 +19,7 @@ package org.apache.druid.tests.indexer; +import com.fasterxml.jackson.core.JsonProcessingException; import org.apache.druid.java.util.common.Pair; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.testing.guice.DruidTestModuleFactory; @@ -60,9 +61,23 @@ public void testIndexData() throws Exception throw new RuntimeException(e); } }; + final Function transform = spec -> { + try { + return StringUtils.replace( + spec, + "%%SEGMENT_AVAIL_TIMEOUT_MILLIS%%", + jsonMapper.writeValueAsString("0") + ); + } + catch (JsonProcessingException e) { + throw new RuntimeException(e); + } + }; + doIndexTest( INDEX_DATASOURCE, INDEX_TASK, + transform, INDEX_QUERIES_RESOURCE, false, true, diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITCompactionTaskTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITCompactionTaskTest.java index ed7b44e984ed..0d32f274a5b7 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITCompactionTaskTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITCompactionTaskTest.java @@ -99,6 +99,11 @@ private void loadDataAndCompact(String indexTask, String queriesResource) throws "%%DATASOURCE%%", fullDatasourceName ); + queryResponseTemplate = StringUtils.replace( + queryResponseTemplate, + "%%SEGMENT_AVAIL_TIMEOUT_MILLIS%%", + jsonMapper.writeValueAsString("0") + ); queryHelper.testQueriesFromString(queryResponseTemplate); diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITPerfectRollupParallelIndexTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITPerfectRollupParallelIndexTest.java index 7eba2309db98..1bb1a79bd49c 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITPerfectRollupParallelIndexTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITPerfectRollupParallelIndexTest.java @@ -78,6 +78,11 @@ public void testIndexData(PartitionsSpec partitionsSpec) throws Exception "%%FORCE_GUARANTEED_ROLLUP%%", Boolean.toString(true) ); + spec = StringUtils.replace( + spec, + "%%SEGMENT_AVAIL_TIMEOUT_MILLIS%%", + jsonMapper.writeValueAsString("0") + ); return StringUtils.replace( spec, "%%PARTITIONS_SPEC%%", diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITSystemTableBatchIndexTaskTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITSystemTableBatchIndexTaskTest.java index df742e4698a5..7582ae46bf14 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITSystemTableBatchIndexTaskTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITSystemTableBatchIndexTaskTest.java @@ -19,6 +19,8 @@ package org.apache.druid.tests.indexer; +import com.fasterxml.jackson.core.JsonProcessingException; +import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.java.util.common.logger.Logger; import org.apache.druid.testing.guice.DruidTestModuleFactory; import org.apache.druid.tests.TestNGGroup; @@ -26,6 +28,7 @@ import org.testng.annotations.Test; import java.io.Closeable; +import java.util.function.Function; @Test(groups = TestNGGroup.BATCH_INDEX) @Guice(moduleFactory = DruidTestModuleFactory.class) @@ -43,10 +46,25 @@ public void testIndexData() throws Exception try ( final Closeable ignored = unloader(INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix()) ) { + + final Function transform = spec -> { + try { + return StringUtils.replace( + spec, + "%%SEGMENT_AVAIL_TIMEOUT_MILLIS%%", + jsonMapper.writeValueAsString("0") + ); + } + catch (JsonProcessingException e) { + throw new RuntimeException(e); + } + }; + doIndexTestSqlTest( INDEX_DATASOURCE, INDEX_TASK, - SYSTEM_QUERIES_RESOURCE + SYSTEM_QUERIES_RESOURCE, + transform ); } } diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITTestCoordinatorPausedTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITTestCoordinatorPausedTest.java index b38a0c8b6213..a75f9a933c5a 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITTestCoordinatorPausedTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITTestCoordinatorPausedTest.java @@ -19,8 +19,10 @@ package org.apache.druid.tests.indexer; +import com.fasterxml.jackson.core.JsonProcessingException; import com.google.inject.Inject; import org.apache.druid.java.util.common.Pair; +import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.java.util.common.logger.Logger; import org.apache.druid.server.coordinator.CoordinatorDynamicConfig; import org.apache.druid.testing.clients.CoordinatorResourceTestClient; @@ -31,6 +33,7 @@ import java.io.Closeable; import java.util.concurrent.TimeUnit; +import java.util.function.Function; @Guice(moduleFactory = DruidTestModuleFactory.class) public class ITTestCoordinatorPausedTest extends AbstractITBatchIndexTest @@ -54,9 +57,24 @@ public void testCoordinatorPause() throws Exception final Closeable ignored1 = unloader(INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix()) ) { coordinatorClient.postDynamicConfig(DYNAMIC_CONFIG_PAUSED); + + final Function transform = spec -> { + try { + return StringUtils.replace( + spec, + "%%SEGMENT_AVAIL_TIMEOUT_MILLIS%%", + jsonMapper.writeValueAsString("0") + ); + } + catch (JsonProcessingException e) { + throw new RuntimeException(e); + } + }; + doIndexTest( INDEX_DATASOURCE, INDEX_TASK, + transform, INDEX_QUERIES_RESOURCE, false, false, From 72cd38a438c16a4cadb4bdedc7b643ffde0b7230 Mon Sep 17 00:00:00 2001 From: "Lucas.Capistrant" Date: Mon, 14 Dec 2020 18:14:22 -0600 Subject: [PATCH 05/28] address an lgtm flag --- .../java/org/apache/druid/indexing/common/task/IndexTask.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/IndexTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/IndexTask.java index 7361f88ac9a8..44403bb92b3d 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/IndexTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/IndexTask.java @@ -914,7 +914,7 @@ private TaskStatus generateAndPublishSegments( // Try to wait for segments to be loaded by the cluster if the tuning config specifies a non-zero value // for awaitSegmentAvailabilityTimeoutMillis - if (tuningConfig.getAwaitSegmentAvailabilityTimeoutMillis() > 0) { + if (tuningConfig.getAwaitSegmentAvailabilityTimeoutMillis() > 0 && published != null) { ingestionState = IngestionState.SEGMENT_AVAILABILITY_WAIT; ArrayList segmentsToWaitFor = new ArrayList<>(published.getSegments()); ExecutorService availabilityExec = From f47a8bdb1dfe9602fa64459c3cf8c8ec13808cae Mon Sep 17 00:00:00 2001 From: "Lucas.Capistrant" Date: Tue, 15 Dec 2020 14:42:48 -0600 Subject: [PATCH 06/28] spell checker still flagging for hadoop doc. adding under that file header too --- website/.spelling | 1 + 1 file changed, 1 insertion(+) diff --git a/website/.spelling b/website/.spelling index 26d859efab49..969f35963010 100644 --- a/website/.spelling +++ b/website/.spelling @@ -950,6 +950,7 @@ InputSplit JobHistory a.example.com assumeGrouped +awaitSegmentAvailabilityTimeoutMillis cleanupOnFailure combineText connectURI From d5ed3c838e6939f4779bc34b65b9bcae321507b3 Mon Sep 17 00:00:00 2001 From: "Lucas.Capistrant" Date: Tue, 15 Dec 2020 14:45:47 -0600 Subject: [PATCH 07/28] fix compaction IT --- .../org/apache/druid/tests/indexer/ITCompactionTaskTest.java | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITCompactionTaskTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITCompactionTaskTest.java index 0d32f274a5b7..5e3533dbb00c 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITCompactionTaskTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITCompactionTaskTest.java @@ -119,6 +119,11 @@ private void loadData(String indexTask) throws Exception { String taskSpec = getResourceAsString(indexTask); taskSpec = StringUtils.replace(taskSpec, "%%DATASOURCE%%", fullDatasourceName); + taskSpec = StringUtils.replace( + taskSpec, + "%%SEGMENT_AVAIL_TIMEOUT_MILLIS%%", + jsonMapper.writeValueAsString("0") + ); final String taskID = indexer.submitTask(taskSpec); LOG.info("TaskID for loading index task %s", taskID); indexer.waitUntilTaskCompletes(taskID); From 71d6f180e31bec39cf5a3917b44c6a9afd984cfc Mon Sep 17 00:00:00 2001 From: "Lucas.Capistrant" Date: Tue, 15 Dec 2020 17:13:11 -0600 Subject: [PATCH 08/28] Updates to wait for availability method --- .../druid/indexing/common/task/AbstractBatchIndexTask.java | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AbstractBatchIndexTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AbstractBatchIndexTask.java index 2159fe0888c1..ecca97582692 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AbstractBatchIndexTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AbstractBatchIndexTask.java @@ -597,6 +597,7 @@ protected boolean waitForSegmentAvailability(TaskToolbox toolbox, ExecutorServic { if (segmentsToWaitFor.isEmpty()) { log.info("Asked to wait for segments to be available, but I wasn't provided with any segments!?"); + return false; } log.info("Waiting for segments to be loaded by the cluster..."); @@ -613,10 +614,8 @@ protected boolean waitForSegmentAvailability(TaskToolbox toolbox, ExecutorServic "Confirmed availability for [%s]. Removing from list of segments to wait for", s.getId() ); - synchronized (segmentsToWaitFor) { - segmentsToWaitFor.remove(s); - } synchronized (availabilityCondition) { + segmentsToWaitFor.remove(s); availabilityCondition.notifyAll(); } } @@ -627,7 +626,7 @@ protected boolean waitForSegmentAvailability(TaskToolbox toolbox, ExecutorServic try { synchronized (availabilityCondition) { while (!segmentsToWaitFor.isEmpty()) { - log.info("[%d] segments stil unavailable.", segmentsToWaitFor.size()); + log.info("[%d] segments still unavailable.", segmentsToWaitFor.size()); long curr = System.currentTimeMillis(); if (forceEndWaitTime - curr > 0) { availabilityCondition.wait(forceEndWaitTime - curr); From 0170dcaa6b81ff72b117c84faeb9a755006544d3 Mon Sep 17 00:00:00 2001 From: "Lucas.Capistrant" Date: Tue, 15 Dec 2020 17:13:23 -0600 Subject: [PATCH 09/28] improve unit testing for patch --- .../indexing/common/task/IndexTaskTest.java | 165 ++++++++++++++++++ .../common/task/TaskReportSerdeTest.java | 2 +- 2 files changed, 166 insertions(+), 1 deletion(-) diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IndexTaskTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IndexTaskTest.java index 2baece1be254..520649cb3d69 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IndexTaskTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IndexTaskTest.java @@ -49,6 +49,7 @@ import org.apache.druid.indexing.common.IngestionStatsAndErrorsTaskReportData; import org.apache.druid.indexing.common.LockGranularity; import org.apache.druid.indexing.common.TaskReport; +import org.apache.druid.indexing.common.TaskToolbox; import org.apache.druid.indexing.common.actions.SegmentAllocateAction; import org.apache.druid.indexing.common.task.IndexTask.IndexIOConfig; import org.apache.druid.indexing.common.task.IndexTask.IndexIngestionSpec; @@ -56,6 +57,7 @@ import org.apache.druid.java.util.common.Intervals; import org.apache.druid.java.util.common.Pair; import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.java.util.common.concurrent.Execs; import org.apache.druid.java.util.common.granularity.Granularities; import org.apache.druid.java.util.common.granularity.Granularity; import org.apache.druid.java.util.common.guava.Sequence; @@ -70,6 +72,8 @@ import org.apache.druid.segment.IndexSpec; import org.apache.druid.segment.QueryableIndexStorageAdapter; import org.apache.druid.segment.VirtualColumns; +import org.apache.druid.segment.handoff.SegmentHandoffNotifier; +import org.apache.druid.segment.handoff.SegmentHandoffNotifierFactory; import org.apache.druid.segment.incremental.RowIngestionMeters; import org.apache.druid.segment.incremental.RowIngestionMetersFactory; import org.apache.druid.segment.indexing.DataSchema; @@ -83,14 +87,18 @@ import org.apache.druid.segment.realtime.appenderator.AppenderatorsManager; import org.apache.druid.segment.realtime.firehose.LocalFirehoseFactory; import org.apache.druid.segment.realtime.firehose.WindowedStorageAdapter; +import org.apache.druid.segment.realtime.plumber.NoopSegmentHandoffNotifierFactory; import org.apache.druid.segment.transform.ExpressionTransform; import org.apache.druid.segment.transform.TransformSpec; import org.apache.druid.timeline.DataSegment; +import org.apache.druid.timeline.SegmentId; import org.apache.druid.timeline.partition.HashBasedNumberedShardSpec; import org.apache.druid.timeline.partition.HashPartitionFunction; import org.apache.druid.timeline.partition.NumberedOverwriteShardSpec; import org.apache.druid.timeline.partition.NumberedShardSpec; import org.apache.druid.timeline.partition.PartitionIds; +import org.apache.druid.timeline.partition.ShardSpec; +import org.easymock.EasyMock; import org.hamcrest.CoreMatchers; import org.joda.time.Interval; import org.junit.Assert; @@ -115,6 +123,7 @@ import java.util.List; import java.util.Map; import java.util.Set; +import java.util.concurrent.ExecutorService; @RunWith(Parameterized.class) public class IndexTaskTest extends IngestionTestBase @@ -954,6 +963,162 @@ public void testBestEffortRollup() throws Exception } } + @Test + public void testWaitForSegmentAvailabilityNoSegments() throws IOException + { + final File tmpDir = temporaryFolder.newFolder(); + + TaskToolbox mockToolbox = EasyMock.createMock(TaskToolbox.class); + ExecutorService mockExec = EasyMock.createMock(ExecutorService.class); + List segmentsToWaitFor = new ArrayList<>(); + IndexTask indexTask = new IndexTask( + null, + null, + createDefaultIngestionSpec( + jsonMapper, + tmpDir, + new UniformGranularitySpec( + Granularities.HOUR, + Granularities.MINUTE, + null + ), + null, + createTuningConfigWithMaxRowsPerSegment(2, true), + false + ), + null + ); + + EasyMock.replay(mockToolbox); + EasyMock.replay(mockExec); + Assert.assertFalse(indexTask.waitForSegmentAvailability(mockToolbox, mockExec, segmentsToWaitFor, 1000)); + EasyMock.verify(mockToolbox); + EasyMock.verify(mockExec); + } + + @Test + public void testWaitForSegmentAvailabilityMultipleSegmentsTimeout() throws IOException + { + final File tmpDir = temporaryFolder.newFolder(); + + TaskToolbox mockToolbox = EasyMock.createMock(TaskToolbox.class); + ExecutorService mockExec = EasyMock.createMock(ExecutorService.class); + SegmentHandoffNotifierFactory mockFactory = EasyMock.createMock(SegmentHandoffNotifierFactory.class); + SegmentHandoffNotifier mockNotifier = EasyMock.createMock(SegmentHandoffNotifier.class); + + DataSegment mockDataSegment1 = EasyMock.createMock(DataSegment.class); + DataSegment mockDataSegment2 = EasyMock.createMock(DataSegment.class); + List segmentsToWaitFor = new ArrayList<>(); + segmentsToWaitFor.add(mockDataSegment1); + segmentsToWaitFor.add(mockDataSegment2); + + IndexTask indexTask = new IndexTask( + null, + null, + createDefaultIngestionSpec( + jsonMapper, + tmpDir, + new UniformGranularitySpec( + Granularities.HOUR, + Granularities.MINUTE, + null + ), + null, + createTuningConfigWithMaxRowsPerSegment(2, true), + false + ), + null + ); + + EasyMock.expect(mockDataSegment1.getInterval()).andReturn(new Interval(0L, 1L)).once(); + EasyMock.expect(mockDataSegment1.getVersion()).andReturn("dummyString").once(); + EasyMock.expect(mockDataSegment1.getShardSpec()).andReturn(EasyMock.createMock(ShardSpec.class)).once(); + EasyMock.expect(mockDataSegment2.getInterval()).andReturn(new Interval(0L, 1L)).once(); + EasyMock.expect(mockDataSegment2.getVersion()).andReturn("dummyString").once(); + EasyMock.expect(mockDataSegment2.getShardSpec()).andReturn(EasyMock.createMock(ShardSpec.class)).once(); + + EasyMock.expect(mockToolbox.getSegmentHandoffNotifierFactory()).andReturn(mockFactory).once(); + EasyMock.expect(mockDataSegment1.getDataSource()).andReturn("MockDataSource").once(); + EasyMock.expect(mockFactory.createSegmentHandoffNotifier("MockDataSource")).andReturn(mockNotifier).once(); + mockNotifier.start(); + EasyMock.expectLastCall().once(); + mockNotifier.registerSegmentHandoffCallback(EasyMock.anyObject(), EasyMock.anyObject(), EasyMock.anyObject()); + EasyMock.expectLastCall().andReturn(true).times(2); + mockNotifier.close(); + EasyMock.expectLastCall().once(); + + + EasyMock.replay(mockToolbox); + EasyMock.replay(mockExec); + EasyMock.replay(mockDataSegment1, mockDataSegment2); + EasyMock.replay(mockFactory, mockNotifier); + + Assert.assertFalse(indexTask.waitForSegmentAvailability(mockToolbox, mockExec, segmentsToWaitFor, 1000)); + EasyMock.verify(mockToolbox); + EasyMock.verify(mockExec); + EasyMock.verify(mockDataSegment1, mockDataSegment2); + EasyMock.verify(mockFactory, mockNotifier); + } + + @Test + public void testWaitForSegmentAvailabilityMultipleSegmentsSuccess() throws IOException + { + ExecutorService exec = Execs.singleThreaded("HandoffTest"); + try { + final File tmpDir = temporaryFolder.newFolder(); + + TaskToolbox mockToolbox = EasyMock.createMock(TaskToolbox.class); + + DataSegment mockDataSegment1 = EasyMock.createMock(DataSegment.class); + DataSegment mockDataSegment2 = EasyMock.createMock(DataSegment.class); + List segmentsToWaitFor = new ArrayList<>(); + segmentsToWaitFor.add(mockDataSegment1); + segmentsToWaitFor.add(mockDataSegment2); + + IndexTask indexTask = new IndexTask( + null, + null, + createDefaultIngestionSpec( + jsonMapper, + tmpDir, + new UniformGranularitySpec( + Granularities.HOUR, + Granularities.MINUTE, + null + ), + null, + createTuningConfigWithMaxRowsPerSegment(2, true), + false + ), + null + ); + + EasyMock.expect(mockDataSegment1.getInterval()).andReturn(new Interval(0L, 1L)).once(); + EasyMock.expect(mockDataSegment1.getVersion()).andReturn("dummyString").once(); + EasyMock.expect(mockDataSegment1.getShardSpec()).andReturn(EasyMock.createMock(ShardSpec.class)).once(); + EasyMock.expect(mockDataSegment1.getId()).andReturn(SegmentId.dummy("MockDataSource")).once(); + EasyMock.expect(mockDataSegment2.getInterval()).andReturn(new Interval(0L, 1L)).once(); + EasyMock.expect(mockDataSegment2.getVersion()).andReturn("dummyString").once(); + EasyMock.expect(mockDataSegment2.getShardSpec()).andReturn(EasyMock.createMock(ShardSpec.class)).once(); + EasyMock.expect(mockDataSegment2.getId()).andReturn(SegmentId.dummy("MockDataSource")).once(); + + EasyMock.expect(mockToolbox.getSegmentHandoffNotifierFactory()) + .andReturn(new NoopSegmentHandoffNotifierFactory()) + .once(); + EasyMock.expect(mockDataSegment1.getDataSource()).andReturn("MockDataSource").once(); + + EasyMock.replay(mockToolbox); + EasyMock.replay(mockDataSegment1, mockDataSegment2); + + Assert.assertTrue(indexTask.waitForSegmentAvailability(mockToolbox, exec, segmentsToWaitFor, 30000)); + EasyMock.verify(mockToolbox); + EasyMock.verify(mockDataSegment1, mockDataSegment2); + } + finally { + exec.shutdownNow(); + } + } + private static void populateRollupTestData(File tmpFile) throws IOException { try (BufferedWriter writer = Files.newWriter(tmpFile, StandardCharsets.UTF_8)) { diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/TaskReportSerdeTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/TaskReportSerdeTest.java index f00df38c40d7..c4d153cf9fe0 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/TaskReportSerdeTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/TaskReportSerdeTest.java @@ -65,7 +65,7 @@ public void testSerde() throws Exception IngestionStatsAndErrorsTaskReport.class ); Assert.assertEquals(report1, report2); - + Assert.assertEquals(report1.hashCode(), report2.hashCode()); Map reportMap1 = TaskReport.buildTaskReports(report1); String reportMapSerialized = jsonMapper.writeValueAsString(reportMap1); From 237325f909f79c3dbb51f03b1d7c841cb20fe062 Mon Sep 17 00:00:00 2001 From: "Lucas.Capistrant" Date: Thu, 17 Dec 2020 10:32:46 -0600 Subject: [PATCH 10/28] fix bad indentation --- .../indexing/common/task/IndexTaskTest.java | 30 +++++++++---------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IndexTaskTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IndexTaskTest.java index 520649cb3d69..59c4fcdd76ce 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IndexTaskTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IndexTaskTest.java @@ -972,21 +972,21 @@ public void testWaitForSegmentAvailabilityNoSegments() throws IOException ExecutorService mockExec = EasyMock.createMock(ExecutorService.class); List segmentsToWaitFor = new ArrayList<>(); IndexTask indexTask = new IndexTask( - null, - null, - createDefaultIngestionSpec( - jsonMapper, - tmpDir, - new UniformGranularitySpec( - Granularities.HOUR, - Granularities.MINUTE, - null - ), - null, - createTuningConfigWithMaxRowsPerSegment(2, true), - false - ), - null + null, + null, + createDefaultIngestionSpec( + jsonMapper, + tmpDir, + new UniformGranularitySpec( + Granularities.HOUR, + Granularities.MINUTE, + null + ), + null, + createTuningConfigWithMaxRowsPerSegment(2, true), + false + ), + null ); EasyMock.replay(mockToolbox); From 848805ebbf31ac6c1d6e4d85ac4c7d0de2e5ed88 Mon Sep 17 00:00:00 2001 From: "Lucas.Capistrant" Date: Thu, 17 Dec 2020 12:00:25 -0600 Subject: [PATCH 11/28] refactor waitForSegmentAvailability --- .../common/task/AbstractBatchIndexTask.java | 43 +++--- .../indexing/common/task/HadoopIndexTask.java | 20 +-- .../druid/indexing/common/task/IndexTask.java | 20 +-- .../parallel/ParallelIndexSupervisorTask.java | 29 ++-- .../indexing/common/task/IndexTaskTest.java | 141 ++++++++++-------- 5 files changed, 119 insertions(+), 134 deletions(-) diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AbstractBatchIndexTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AbstractBatchIndexTask.java index ecca97582692..5bb1bc51bfa2 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AbstractBatchIndexTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AbstractBatchIndexTask.java @@ -46,6 +46,7 @@ import org.apache.druid.indexing.overlord.Segments; import org.apache.druid.java.util.common.ISE; import org.apache.druid.java.util.common.JodaUtils; +import org.apache.druid.java.util.common.concurrent.Execs; import org.apache.druid.java.util.common.granularity.Granularity; import org.apache.druid.java.util.common.granularity.GranularityType; import org.apache.druid.java.util.common.logger.Logger; @@ -76,7 +77,9 @@ import java.util.List; import java.util.Map; import java.util.Set; +import java.util.concurrent.CountDownLatch; import java.util.concurrent.ExecutorService; +import java.util.concurrent.TimeUnit; import java.util.function.BiConsumer; import java.util.function.Consumer; import java.util.function.Function; @@ -92,7 +95,6 @@ public abstract class AbstractBatchIndexTask extends AbstractTask { private static final Logger log = new Logger(AbstractBatchIndexTask.class); - private final Object availabilityCondition = new Object(); protected boolean segmentAvailabilityConfirmationCompleted = false; @GuardedBy("this") @@ -593,16 +595,25 @@ protected static List findInputSegments( * @param waitTimeout Millis to wait before giving up * @return True if all segments became available, otherwise False. */ - protected boolean waitForSegmentAvailability(TaskToolbox toolbox, ExecutorService exec, List segmentsToWaitFor, long waitTimeout) + protected boolean waitForSegmentAvailability( + TaskToolbox toolbox, + List segmentsToWaitFor, + long waitTimeout + ) { if (segmentsToWaitFor.isEmpty()) { - log.info("Asked to wait for segments to be available, but I wasn't provided with any segments!?"); + log.warn("Asked to wait for segments to be available, but I wasn't provided with any segments!?"); + return false; + } else if (waitTimeout <= 0) { + log.warn("Asked to wait for availability for <= 0 seconds?! Requested waitTimeout: [%s]", waitTimeout); return false; } log.info("Waiting for segments to be loaded by the cluster..."); SegmentHandoffNotifier notifier = toolbox.getSegmentHandoffNotifierFactory() .createSegmentHandoffNotifier(segmentsToWaitFor.get(0).getDataSource()); + ExecutorService exec = Execs.directExecutor(); + CountDownLatch doneSignal = new CountDownLatch(segmentsToWaitFor.size()); notifier.start(); for (DataSegment s : segmentsToWaitFor) { @@ -614,39 +625,21 @@ protected boolean waitForSegmentAvailability(TaskToolbox toolbox, ExecutorServic "Confirmed availability for [%s]. Removing from list of segments to wait for", s.getId() ); - synchronized (availabilityCondition) { - segmentsToWaitFor.remove(s); - availabilityCondition.notifyAll(); - } + doneSignal.countDown(); } ); } - long forceEndWaitTime = System.currentTimeMillis() + waitTimeout; try { - synchronized (availabilityCondition) { - while (!segmentsToWaitFor.isEmpty()) { - log.info("[%d] segments still unavailable.", segmentsToWaitFor.size()); - long curr = System.currentTimeMillis(); - if (forceEndWaitTime - curr > 0) { - availabilityCondition.wait(forceEndWaitTime - curr); - } else { - log.warn("Segment Availabilty Wait Timeout. [%d] segments might not have become available for " - + "query", - segmentsToWaitFor.size() - ); - return false; - } - } - } + return doneSignal.await(waitTimeout, TimeUnit.MILLISECONDS); } catch (InterruptedException e) { - throw new RuntimeException(e); + log.warn("Interrupted while waiting for segment availablity; Unable to confirm availability!"); + return false; } finally { notifier.close(); } - return true; } private static class LockGranularityDetermineResult diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/HadoopIndexTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/HadoopIndexTask.java index 04a08c82a4d3..cc911abd7dc4 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/HadoopIndexTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/HadoopIndexTask.java @@ -53,7 +53,6 @@ import org.apache.druid.indexing.hadoop.OverlordActionBasedUsedSegmentsRetriever; import org.apache.druid.java.util.common.JodaUtils; import org.apache.druid.java.util.common.StringUtils; -import org.apache.druid.java.util.common.concurrent.Execs; import org.apache.druid.java.util.common.granularity.Granularity; import org.apache.druid.java.util.common.logger.Logger; import org.apache.druid.segment.incremental.RowIngestionMeters; @@ -85,7 +84,6 @@ import java.util.List; import java.util.Map; import java.util.SortedSet; -import java.util.concurrent.ExecutorService; public class HadoopIndexTask extends HadoopTask implements ChatHandler { @@ -452,19 +450,11 @@ private TaskStatus runInternal(TaskToolbox toolbox) throws Exception if (spec.getTuningConfig().getAwaitSegmentAvailabilityTimeoutMillis() > 0) { ingestionState = IngestionState.SEGMENT_AVAILABILITY_WAIT; ArrayList segmentsToWaitFor = new ArrayList<>(buildSegmentsStatus.getDataSegments()); - ExecutorService availabilityExec = - Execs.singleThreaded("HadoopTaskAvailabilityWaitExec"); - try { - segmentAvailabilityConfirmationCompleted = waitForSegmentAvailability( - toolbox, - availabilityExec, - segmentsToWaitFor, - spec.getTuningConfig().getAwaitSegmentAvailabilityTimeoutMillis() - ); - } - finally { - availabilityExec.shutdownNow(); - } + segmentAvailabilityConfirmationCompleted = waitForSegmentAvailability( + toolbox, + segmentsToWaitFor, + spec.getTuningConfig().getAwaitSegmentAvailabilityTimeoutMillis() + ); } ingestionState = IngestionState.COMPLETED; diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/IndexTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/IndexTask.java index 44403bb92b3d..ec9e753a84f7 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/IndexTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/IndexTask.java @@ -69,7 +69,6 @@ import org.apache.druid.java.util.common.JodaUtils; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.java.util.common.UOE; -import org.apache.druid.java.util.common.concurrent.Execs; import org.apache.druid.java.util.common.granularity.Granularity; import org.apache.druid.java.util.common.guava.Comparators; import org.apache.druid.java.util.common.logger.Logger; @@ -126,7 +125,6 @@ import java.util.SortedSet; import java.util.TreeMap; import java.util.concurrent.ExecutionException; -import java.util.concurrent.ExecutorService; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; import java.util.function.Function; @@ -917,19 +915,11 @@ private TaskStatus generateAndPublishSegments( if (tuningConfig.getAwaitSegmentAvailabilityTimeoutMillis() > 0 && published != null) { ingestionState = IngestionState.SEGMENT_AVAILABILITY_WAIT; ArrayList segmentsToWaitFor = new ArrayList<>(published.getSegments()); - ExecutorService availabilityExec = - Execs.singleThreaded("IndexTaskAvailabilityWaitExec"); - try { - segmentAvailabilityConfirmationCompleted = waitForSegmentAvailability( - toolbox, - availabilityExec, - segmentsToWaitFor, - tuningConfig.getAwaitSegmentAvailabilityTimeoutMillis() - ); - } - finally { - availabilityExec.shutdownNow(); - } + segmentAvailabilityConfirmationCompleted = waitForSegmentAvailability( + toolbox, + segmentsToWaitFor, + tuningConfig.getAwaitSegmentAvailabilityTimeoutMillis() + ); } ingestionState = IngestionState.COMPLETED; diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTask.java index 0f346e91360e..e1387eebfaf4 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTask.java @@ -70,7 +70,6 @@ import org.apache.druid.java.util.common.IAE; import org.apache.druid.java.util.common.ISE; import org.apache.druid.java.util.common.Pair; -import org.apache.druid.java.util.common.concurrent.Execs; import org.apache.druid.java.util.common.granularity.Granularity; import org.apache.druid.java.util.common.logger.Logger; import org.apache.druid.segment.indexing.TuningConfig; @@ -116,7 +115,6 @@ import java.util.Set; import java.util.SortedSet; import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ExecutorService; import java.util.concurrent.ThreadLocalRandom; import java.util.concurrent.atomic.AtomicInteger; import java.util.function.BiFunction; @@ -173,6 +171,8 @@ public class ParallelIndexSupervisorTask extends AbstractBatchIndexTask implemen @MonotonicNonNull private volatile TaskToolbox toolbox; + private long awaitSegmentAvailabilityTimeoutMillis; + @JsonCreator public ParallelIndexSupervisorTask( @JsonProperty("id") String id, @@ -207,6 +207,8 @@ public ParallelIndexSupervisorTask( if (missingIntervalsInOverwriteMode) { addToContext(Tasks.FORCE_TIME_CHUNK_LOCK_KEY, true); } + + awaitSegmentAvailabilityTimeoutMillis = ingestionSchema.getTuningConfig().getAwaitSegmentAvailabilityTimeoutMillis(); } private static void checkPartitionsSpecForForceGuaranteedRollup(PartitionsSpec partitionsSpec) @@ -508,18 +510,11 @@ private void waitForSegmentAvailability(Map report .forEach(report -> { segmentsToWaitFor.addAll(report.getNewSegments()); }); - ExecutorService availabilityExec = Execs.singleThreaded("ParallelTaskAvailabilityWaitExec"); - try { - segmentAvailabilityConfirmationCompleted = waitForSegmentAvailability( - toolbox, - availabilityExec, - segmentsToWaitFor, - ingestionSchema.getTuningConfig().getAwaitSegmentAvailabilityTimeoutMillis() - ); - } - finally { - availabilityExec.shutdownNow(); - } + segmentAvailabilityConfirmationCompleted = waitForSegmentAvailability( + toolbox, + segmentsToWaitFor, + awaitSegmentAvailabilityTimeoutMillis + ); } /** @@ -537,7 +532,7 @@ private TaskStatus runSinglePhaseParallel(TaskToolbox toolbox) throws Exception if (state.isSuccess()) { //noinspection ConstantConditions publishSegments(toolbox, runner.getReports()); - if (ingestionSchema.getTuningConfig().getAwaitSegmentAvailabilityTimeoutMillis() > 0) { + if (awaitSegmentAvailabilityTimeoutMillis > 0) { waitForSegmentAvailability(runner.getReports()); } } @@ -683,7 +678,7 @@ private TaskStatus runHashPartitionMultiPhaseParallel(TaskToolbox toolbox) throw if (state.isSuccess()) { //noinspection ConstantConditions publishSegments(toolbox, mergeRunner.getReports()); - if (ingestionSchema.getTuningConfig().getAwaitSegmentAvailabilityTimeoutMillis() > 0) { + if (awaitSegmentAvailabilityTimeoutMillis > 0) { waitForSegmentAvailability(mergeRunner.getReports()); } } @@ -753,7 +748,7 @@ private TaskStatus runRangePartitionMultiPhaseParallel(TaskToolbox toolbox) thro TaskState mergeState = runNextPhase(mergeRunner); if (mergeState.isSuccess()) { publishSegments(toolbox, mergeRunner.getReports()); - if (ingestionSchema.getTuningConfig().getAwaitSegmentAvailabilityTimeoutMillis() > 0) { + if (awaitSegmentAvailabilityTimeoutMillis > 0) { waitForSegmentAvailability(mergeRunner.getReports()); } } diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IndexTaskTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IndexTaskTest.java index 59c4fcdd76ce..f19a2423d261 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IndexTaskTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IndexTaskTest.java @@ -57,7 +57,6 @@ import org.apache.druid.java.util.common.Intervals; import org.apache.druid.java.util.common.Pair; import org.apache.druid.java.util.common.StringUtils; -import org.apache.druid.java.util.common.concurrent.Execs; import org.apache.druid.java.util.common.granularity.Granularities; import org.apache.druid.java.util.common.granularity.Granularity; import org.apache.druid.java.util.common.guava.Sequence; @@ -123,7 +122,6 @@ import java.util.List; import java.util.Map; import java.util.Set; -import java.util.concurrent.ExecutorService; @RunWith(Parameterized.class) public class IndexTaskTest extends IngestionTestBase @@ -969,7 +967,6 @@ public void testWaitForSegmentAvailabilityNoSegments() throws IOException final File tmpDir = temporaryFolder.newFolder(); TaskToolbox mockToolbox = EasyMock.createMock(TaskToolbox.class); - ExecutorService mockExec = EasyMock.createMock(ExecutorService.class); List segmentsToWaitFor = new ArrayList<>(); IndexTask indexTask = new IndexTask( null, @@ -990,10 +987,39 @@ public void testWaitForSegmentAvailabilityNoSegments() throws IOException ); EasyMock.replay(mockToolbox); - EasyMock.replay(mockExec); - Assert.assertFalse(indexTask.waitForSegmentAvailability(mockToolbox, mockExec, segmentsToWaitFor, 1000)); + Assert.assertFalse(indexTask.waitForSegmentAvailability(mockToolbox, segmentsToWaitFor, 1000)); + EasyMock.verify(mockToolbox); + } + + @Test + public void testWaitForSegmentAvailabilityInvalidWaitTimeout() throws IOException + { + final File tmpDir = temporaryFolder.newFolder(); + + TaskToolbox mockToolbox = EasyMock.createMock(TaskToolbox.class); + List segmentsToWaitFor = new ArrayList<>(); + segmentsToWaitFor.add(EasyMock.createMock(DataSegment.class)); + IndexTask indexTask = new IndexTask( + null, + null, + createDefaultIngestionSpec( + jsonMapper, + tmpDir, + new UniformGranularitySpec( + Granularities.HOUR, + Granularities.MINUTE, + null + ), + null, + createTuningConfigWithMaxRowsPerSegment(2, true), + false + ), + null + ); + + EasyMock.replay(mockToolbox); + Assert.assertFalse(indexTask.waitForSegmentAvailability(mockToolbox, segmentsToWaitFor, -1)); EasyMock.verify(mockToolbox); - EasyMock.verify(mockExec); } @Test @@ -1002,7 +1028,6 @@ public void testWaitForSegmentAvailabilityMultipleSegmentsTimeout() throws IOExc final File tmpDir = temporaryFolder.newFolder(); TaskToolbox mockToolbox = EasyMock.createMock(TaskToolbox.class); - ExecutorService mockExec = EasyMock.createMock(ExecutorService.class); SegmentHandoffNotifierFactory mockFactory = EasyMock.createMock(SegmentHandoffNotifierFactory.class); SegmentHandoffNotifier mockNotifier = EasyMock.createMock(SegmentHandoffNotifier.class); @@ -1030,10 +1055,10 @@ public void testWaitForSegmentAvailabilityMultipleSegmentsTimeout() throws IOExc null ); - EasyMock.expect(mockDataSegment1.getInterval()).andReturn(new Interval(0L, 1L)).once(); + EasyMock.expect(mockDataSegment1.getInterval()).andReturn(Intervals.of("1970-01-01/2100-01-01")).once(); EasyMock.expect(mockDataSegment1.getVersion()).andReturn("dummyString").once(); EasyMock.expect(mockDataSegment1.getShardSpec()).andReturn(EasyMock.createMock(ShardSpec.class)).once(); - EasyMock.expect(mockDataSegment2.getInterval()).andReturn(new Interval(0L, 1L)).once(); + EasyMock.expect(mockDataSegment2.getInterval()).andReturn(Intervals.of("1970-01-01/2100-01-01")).once(); EasyMock.expect(mockDataSegment2.getVersion()).andReturn("dummyString").once(); EasyMock.expect(mockDataSegment2.getShardSpec()).andReturn(EasyMock.createMock(ShardSpec.class)).once(); @@ -1049,13 +1074,11 @@ public void testWaitForSegmentAvailabilityMultipleSegmentsTimeout() throws IOExc EasyMock.replay(mockToolbox); - EasyMock.replay(mockExec); EasyMock.replay(mockDataSegment1, mockDataSegment2); EasyMock.replay(mockFactory, mockNotifier); - Assert.assertFalse(indexTask.waitForSegmentAvailability(mockToolbox, mockExec, segmentsToWaitFor, 1000)); + Assert.assertFalse(indexTask.waitForSegmentAvailability(mockToolbox, segmentsToWaitFor, 1000)); EasyMock.verify(mockToolbox); - EasyMock.verify(mockExec); EasyMock.verify(mockDataSegment1, mockDataSegment2); EasyMock.verify(mockFactory, mockNotifier); } @@ -1063,60 +1086,54 @@ public void testWaitForSegmentAvailabilityMultipleSegmentsTimeout() throws IOExc @Test public void testWaitForSegmentAvailabilityMultipleSegmentsSuccess() throws IOException { - ExecutorService exec = Execs.singleThreaded("HandoffTest"); - try { - final File tmpDir = temporaryFolder.newFolder(); + final File tmpDir = temporaryFolder.newFolder(); - TaskToolbox mockToolbox = EasyMock.createMock(TaskToolbox.class); + TaskToolbox mockToolbox = EasyMock.createMock(TaskToolbox.class); - DataSegment mockDataSegment1 = EasyMock.createMock(DataSegment.class); - DataSegment mockDataSegment2 = EasyMock.createMock(DataSegment.class); - List segmentsToWaitFor = new ArrayList<>(); - segmentsToWaitFor.add(mockDataSegment1); - segmentsToWaitFor.add(mockDataSegment2); + DataSegment mockDataSegment1 = EasyMock.createMock(DataSegment.class); + DataSegment mockDataSegment2 = EasyMock.createMock(DataSegment.class); + List segmentsToWaitFor = new ArrayList<>(); + segmentsToWaitFor.add(mockDataSegment1); + segmentsToWaitFor.add(mockDataSegment2); - IndexTask indexTask = new IndexTask( - null, - null, - createDefaultIngestionSpec( - jsonMapper, - tmpDir, - new UniformGranularitySpec( - Granularities.HOUR, - Granularities.MINUTE, - null - ), - null, - createTuningConfigWithMaxRowsPerSegment(2, true), - false - ), - null - ); + IndexTask indexTask = new IndexTask( + null, + null, + createDefaultIngestionSpec( + jsonMapper, + tmpDir, + new UniformGranularitySpec( + Granularities.HOUR, + Granularities.MINUTE, + null + ), + null, + createTuningConfigWithMaxRowsPerSegment(2, true), + false + ), + null + ); - EasyMock.expect(mockDataSegment1.getInterval()).andReturn(new Interval(0L, 1L)).once(); - EasyMock.expect(mockDataSegment1.getVersion()).andReturn("dummyString").once(); - EasyMock.expect(mockDataSegment1.getShardSpec()).andReturn(EasyMock.createMock(ShardSpec.class)).once(); - EasyMock.expect(mockDataSegment1.getId()).andReturn(SegmentId.dummy("MockDataSource")).once(); - EasyMock.expect(mockDataSegment2.getInterval()).andReturn(new Interval(0L, 1L)).once(); - EasyMock.expect(mockDataSegment2.getVersion()).andReturn("dummyString").once(); - EasyMock.expect(mockDataSegment2.getShardSpec()).andReturn(EasyMock.createMock(ShardSpec.class)).once(); - EasyMock.expect(mockDataSegment2.getId()).andReturn(SegmentId.dummy("MockDataSource")).once(); - - EasyMock.expect(mockToolbox.getSegmentHandoffNotifierFactory()) - .andReturn(new NoopSegmentHandoffNotifierFactory()) - .once(); - EasyMock.expect(mockDataSegment1.getDataSource()).andReturn("MockDataSource").once(); - - EasyMock.replay(mockToolbox); - EasyMock.replay(mockDataSegment1, mockDataSegment2); - - Assert.assertTrue(indexTask.waitForSegmentAvailability(mockToolbox, exec, segmentsToWaitFor, 30000)); - EasyMock.verify(mockToolbox); - EasyMock.verify(mockDataSegment1, mockDataSegment2); - } - finally { - exec.shutdownNow(); - } + EasyMock.expect(mockDataSegment1.getInterval()).andReturn(Intervals.of("1970-01-01/1971-01-01")).once(); + EasyMock.expect(mockDataSegment1.getVersion()).andReturn("dummyString").once(); + EasyMock.expect(mockDataSegment1.getShardSpec()).andReturn(EasyMock.createMock(ShardSpec.class)).once(); + EasyMock.expect(mockDataSegment1.getId()).andReturn(SegmentId.dummy("MockDataSource")).once(); + EasyMock.expect(mockDataSegment2.getInterval()).andReturn(Intervals.of("1971-01-01/1972-01-01")).once(); + EasyMock.expect(mockDataSegment2.getVersion()).andReturn("dummyString").once(); + EasyMock.expect(mockDataSegment2.getShardSpec()).andReturn(EasyMock.createMock(ShardSpec.class)).once(); + EasyMock.expect(mockDataSegment2.getId()).andReturn(SegmentId.dummy("MockDataSource")).once(); + + EasyMock.expect(mockToolbox.getSegmentHandoffNotifierFactory()) + .andReturn(new NoopSegmentHandoffNotifierFactory()) + .once(); + EasyMock.expect(mockDataSegment1.getDataSource()).andReturn("MockDataSource").once(); + + EasyMock.replay(mockToolbox); + EasyMock.replay(mockDataSegment1, mockDataSegment2); + + Assert.assertTrue(indexTask.waitForSegmentAvailability(mockToolbox, segmentsToWaitFor, 30000)); + EasyMock.verify(mockToolbox); + EasyMock.verify(mockDataSegment1, mockDataSegment2); } private static void populateRollupTestData(File tmpFile) throws IOException From 32fdff51d315f71fa44bcf5522a9dc823124593b Mon Sep 17 00:00:00 2001 From: "Lucas.Capistrant" Date: Tue, 5 Jan 2021 16:28:13 -0600 Subject: [PATCH 12/28] Fixes based off of review comments --- .../indexing/common/task/AbstractBatchIndexTask.java | 4 ++-- .../seekablestream/SeekableStreamIndexTaskRunner.java | 11 ++++++++++- .../indexing/common/task/TaskReportSerdeTest.java | 2 +- 3 files changed, 13 insertions(+), 4 deletions(-) diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AbstractBatchIndexTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AbstractBatchIndexTask.java index 5bb1bc51bfa2..60568fdb6e20 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AbstractBatchIndexTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AbstractBatchIndexTask.java @@ -602,8 +602,8 @@ protected boolean waitForSegmentAvailability( ) { if (segmentsToWaitFor.isEmpty()) { - log.warn("Asked to wait for segments to be available, but I wasn't provided with any segments!?"); - return false; + log.info("Asked to wait for segments to be available, but I wasn't provided with any segments!?"); + return true; } else if (waitTimeout <= 0) { log.warn("Asked to wait for availability for <= 0 seconds?! Requested waitTimeout: [%s]", waitTimeout); return false; diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/SeekableStreamIndexTaskRunner.java b/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/SeekableStreamIndexTaskRunner.java index a283c5ae9061..dad6a48a32cf 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/SeekableStreamIndexTaskRunner.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/SeekableStreamIndexTaskRunner.java @@ -1049,6 +1049,15 @@ private synchronized void persistSequences() throws IOException log.info("Saved sequence metadata to disk: %s", sequences); } + /** + * Return a map of reports for the task. + * + * A successfull task should always have a null errorMsg. A falied task should always have a non-null + * errorMsg. + * + * @param errorMsg Nullable error message for the task. null if task succeeded. + * @return Map of reports for the task. + */ private Map getTaskCompletionReports(@Nullable String errorMsg) { return TaskReport.buildTaskReports( @@ -1059,7 +1068,7 @@ private Map getTaskCompletionReports(@Nullable String errorM getTaskCompletionUnparseableEvents(), getTaskCompletionRowStats(), errorMsg, - false + errorMsg == null ) ) ); diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/TaskReportSerdeTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/TaskReportSerdeTest.java index c4d153cf9fe0..65071b2ac194 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/TaskReportSerdeTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/TaskReportSerdeTest.java @@ -56,7 +56,7 @@ public void testSerde() throws Exception "number", 1234 ), "an error message", - false + true ) ); String report1serialized = jsonMapper.writeValueAsString(report1); From 9295df14e44d970cd1e71c893e0e5572279db905 Mon Sep 17 00:00:00 2001 From: "Lucas.Capistrant" Date: Wed, 6 Jan 2021 11:20:24 -0600 Subject: [PATCH 13/28] cleanup to get compile after merging with master --- .../common/task/ClientCompactionTaskQuerySerdeTest.java | 1 + .../druid/indexing/common/task/CompactionTaskRunTest.java | 1 + .../druid/indexing/common/task/CompactionTaskTest.java | 6 ++++++ .../apache/druid/indexing/common/task/IndexTaskTest.java | 4 ++++ .../parallel/AbstractParallelIndexSupervisorTaskTest.java | 2 ++ .../parallel/ParallelIndexSupervisorTaskKillTest.java | 1 + .../parallel/ParallelIndexSupervisorTaskResourceTest.java | 1 + .../parallel/ParallelIndexSupervisorTaskSerdeTest.java | 1 + .../batch/parallel/ParallelIndexSupervisorTaskTest.java | 1 + .../task/batch/parallel/ParallelIndexTestingFactory.java | 1 + .../task/batch/parallel/ParallelIndexTuningConfigTest.java | 7 +++++++ .../batch/parallel/SinglePhaseParallelIndexingTest.java | 1 + .../apache/druid/indexing/overlord/TaskLifecycleTest.java | 4 ++++ 13 files changed, 31 insertions(+) diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/ClientCompactionTaskQuerySerdeTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/ClientCompactionTaskQuerySerdeTest.java index a25ae37648ae..d0b38c964a16 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/ClientCompactionTaskQuerySerdeTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/ClientCompactionTaskQuerySerdeTest.java @@ -239,6 +239,7 @@ public void testCompactionTaskToClientCompactionTaskQuery() throws IOException null, null, null, + null, null ) ) diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskRunTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskRunTest.java index b466f3546c47..add7f00fa193 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskRunTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskRunTest.java @@ -299,6 +299,7 @@ public void testRunWithHashPartitioning() throws Exception null, null, null, + null, null ) ) diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskTest.java index 6eddb65d0550..15d8f9bd99f5 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskTest.java @@ -335,6 +335,7 @@ private static ParallelIndexTuningConfig createTuningConfig() null, null, null, + null, null ); } @@ -464,6 +465,7 @@ public void testSerdeWithOldTuningConfigSuccessfullyDeserializeToNewOne() throws null, null, null, + null, null ), null, @@ -632,6 +634,7 @@ public void testCreateIngestionSchemaWithTargetPartitionSize() throws IOExceptio null, null, null, + null, null ); final List ingestionSpecs = CompactionTask.createIngestionSchema( @@ -701,6 +704,7 @@ public void testCreateIngestionSchemaWithMaxTotalRows() throws IOException, Segm null, null, null, + null, null ); final List ingestionSpecs = CompactionTask.createIngestionSchema( @@ -770,6 +774,7 @@ public void testCreateIngestionSchemaWithNumShards() throws IOException, Segment null, null, null, + null, null ); final List ingestionSpecs = CompactionTask.createIngestionSchema( @@ -1160,6 +1165,7 @@ private void assertIngestionSchema( null, null, null, + null, null ), expectedSegmentGranularity diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IndexTaskTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IndexTaskTest.java index f19a2423d261..243be41530b9 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IndexTaskTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IndexTaskTest.java @@ -1318,6 +1318,7 @@ public void testMultipleParseExceptionsSuccess() throws Exception true, 7, 7, + null, null ); @@ -1448,6 +1449,7 @@ public void testMultipleParseExceptionsFailure() throws Exception true, 2, 5, + null, null ); @@ -1570,6 +1572,7 @@ public void testMultipleParseExceptionsFailureAtDeterminePartitions() throws Exc true, 2, 5, + null, null ); @@ -2016,6 +2019,7 @@ static IndexTuningConfig createTuningConfig( null, null, 1, + null, null ); } diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/AbstractParallelIndexSupervisorTaskTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/AbstractParallelIndexSupervisorTaskTest.java index 6d0585f547f9..7a5420f494fb 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/AbstractParallelIndexSupervisorTaskTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/AbstractParallelIndexSupervisorTaskTest.java @@ -168,6 +168,7 @@ public class AbstractParallelIndexSupervisorTaskTest extends IngestionTestBase null, null, null, + null, null ); @@ -250,6 +251,7 @@ protected ParallelIndexTuningConfig newTuningConfig( null, null, null, + null, null ); } diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTaskKillTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTaskKillTest.java index ee231b50d780..bd96912637e5 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTaskKillTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTaskKillTest.java @@ -189,6 +189,7 @@ private ParallelIndexSupervisorTask newTask( null, null, null, + null, null ) ); diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTaskResourceTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTaskResourceTest.java index a4937774eebf..a6aa9b02710b 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTaskResourceTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTaskResourceTest.java @@ -438,6 +438,7 @@ private TestSupervisorTask newTask( null, null, null, + null, null ) ); diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTaskSerdeTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTaskSerdeTest.java index 70c34f9b94c8..6ccf1ed6f932 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTaskSerdeTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTaskSerdeTest.java @@ -272,6 +272,7 @@ ParallelIndexIngestionSpec build() null, null, null, + null, null ); diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTaskTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTaskTest.java index 46663c6fba12..7f90c66b5833 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTaskTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTaskTest.java @@ -225,6 +225,7 @@ public void testFailToConstructWhenBothAppendToExistingAndForceGuaranteedRollupA false, null, null, + null, null ); final ParallelIndexIngestionSpec indexIngestionSpec = new ParallelIndexIngestionSpec( diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexTestingFactory.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexTestingFactory.java index aacb6b0111fb..c031fb8ab285 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexTestingFactory.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexTestingFactory.java @@ -183,6 +183,7 @@ ParallelIndexTuningConfig build() logParseExceptions, maxParseExceptions, 25, + null, null ); } diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexTuningConfigTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexTuningConfigTest.java index cf862ff6a9e7..c6694d784c6a 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexTuningConfigTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexTuningConfigTest.java @@ -100,6 +100,7 @@ public void testSerdeWithMaxRowsPerSegment() false, null, null, + null, null ); final byte[] json = mapper.writeValueAsBytes(tuningConfig); @@ -144,6 +145,7 @@ public void testSerdeWithMaxNumConcurrentSubTasks() throws IOException false, null, null, + null, null ); final byte[] json = mapper.writeValueAsBytes(tuningConfig); @@ -188,6 +190,7 @@ public void testSerdeWithMaxNumSubTasks() throws IOException false, null, null, + null, null ); final byte[] json = mapper.writeValueAsBytes(tuningConfig); @@ -234,6 +237,7 @@ public void testSerdeWithMaxNumSubTasksAndMaxNumConcurrentSubTasks() false, null, null, + null, null ); } @@ -277,6 +281,7 @@ public void testConstructorWithHashedPartitionsSpecAndNonForceGuaranteedRollupFa false, null, null, + null, null ); } @@ -320,6 +325,7 @@ public void testConstructorWithSingleDimensionPartitionsSpecAndNonForceGuarantee false, null, null, + null, null ); } @@ -363,6 +369,7 @@ public void testConstructorWithDynamicPartitionsSpecAndForceGuaranteedRollupFail false, null, null, + null, null ); } diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/SinglePhaseParallelIndexingTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/SinglePhaseParallelIndexingTest.java index d27c19bc1e19..a2fd21c72050 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/SinglePhaseParallelIndexingTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/SinglePhaseParallelIndexingTest.java @@ -363,6 +363,7 @@ public void testWith1MaxNumConcurrentSubTasks() null, null, null, + null, null ) ); diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskLifecycleTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskLifecycleTest.java index 4ef816b3e748..767eb05e8d45 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskLifecycleTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskLifecycleTest.java @@ -765,6 +765,7 @@ public void testIndexTask() throws Exception null, null, null, + null, null ) ), @@ -847,6 +848,7 @@ public void testIndexTaskFailure() throws Exception null, null, null, + null, null ) ), @@ -1274,6 +1276,7 @@ public void testResumeTasks() throws Exception null, null, null, + null, null ) ), @@ -1383,6 +1386,7 @@ public void testUnifiedAppenderatorsManagerCleanup() throws Exception null, null, null, + null, null ) ), From abf7f99f6412975b83d7c83e171ef4cc35d54321 Mon Sep 17 00:00:00 2001 From: "Lucas.Capistrant" Date: Wed, 6 Jan 2021 11:25:38 -0600 Subject: [PATCH 14/28] fix failing test after previous logic update --- .../org/apache/druid/indexing/common/task/IndexTaskTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IndexTaskTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IndexTaskTest.java index 243be41530b9..7803593513e0 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IndexTaskTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IndexTaskTest.java @@ -987,7 +987,7 @@ public void testWaitForSegmentAvailabilityNoSegments() throws IOException ); EasyMock.replay(mockToolbox); - Assert.assertFalse(indexTask.waitForSegmentAvailability(mockToolbox, segmentsToWaitFor, 1000)); + Assert.assertTrue(indexTask.waitForSegmentAvailability(mockToolbox, segmentsToWaitFor, 1000)); EasyMock.verify(mockToolbox); } From d2e9918d3ba2e9e388645de44bd7138866a14e52 Mon Sep 17 00:00:00 2001 From: "Lucas.Capistrant" Date: Wed, 6 Jan 2021 14:11:54 -0600 Subject: [PATCH 15/28] add back code that must have gotten deleted during conflict resolution --- .../org/apache/druid/indexing/common/task/CompactionTask.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/CompactionTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/CompactionTask.java index ad98a4b53df7..8fc5932da150 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/CompactionTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/CompactionTask.java @@ -834,6 +834,9 @@ List findSegments(TaskActionClient actionClient) throws IOException void checkSegments(LockGranularity lockGranularityInUse, List latestSegments) { + if (latestSegments.isEmpty()) { + throw new ISE("No segments found for compaction. Please check that datasource name and interval are correct."); + } if (!inputSpec.validateSegments(lockGranularityInUse, latestSegments)) { throw new ISE( "Specified segments in the spec are different from the current used segments. " From 222d3c2bfa9257a7a6c765597cd87d5bfb3587e4 Mon Sep 17 00:00:00 2001 From: "Lucas.Capistrant" Date: Thu, 28 Jan 2021 10:53:31 -0600 Subject: [PATCH 16/28] update some logging code --- .../druid/indexing/common/task/AbstractBatchIndexTask.java | 4 ++-- .../handoff/CoordinatorBasedSegmentHandoffNotifier.java | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AbstractBatchIndexTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AbstractBatchIndexTask.java index 60568fdb6e20..45aeaaeb0ff4 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AbstractBatchIndexTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AbstractBatchIndexTask.java @@ -608,7 +608,7 @@ protected boolean waitForSegmentAvailability( log.warn("Asked to wait for availability for <= 0 seconds?! Requested waitTimeout: [%s]", waitTimeout); return false; } - log.info("Waiting for segments to be loaded by the cluster..."); + log.info("Waiting for [%d] segments to be loaded by the cluster...", segmentsToWaitFor.size()); SegmentHandoffNotifier notifier = toolbox.getSegmentHandoffNotifierFactory() .createSegmentHandoffNotifier(segmentsToWaitFor.get(0).getDataSource()); @@ -621,7 +621,7 @@ protected boolean waitForSegmentAvailability( new SegmentDescriptor(s.getInterval(), s.getVersion(), s.getShardSpec().getPartitionNum()), exec, () -> { - log.info( + log.debug( "Confirmed availability for [%s]. Removing from list of segments to wait for", s.getId() ); diff --git a/server/src/main/java/org/apache/druid/segment/handoff/CoordinatorBasedSegmentHandoffNotifier.java b/server/src/main/java/org/apache/druid/segment/handoff/CoordinatorBasedSegmentHandoffNotifier.java index c53ad19548fb..fa884022a3a3 100644 --- a/server/src/main/java/org/apache/druid/segment/handoff/CoordinatorBasedSegmentHandoffNotifier.java +++ b/server/src/main/java/org/apache/druid/segment/handoff/CoordinatorBasedSegmentHandoffNotifier.java @@ -60,7 +60,7 @@ public CoordinatorBasedSegmentHandoffNotifier( @Override public boolean registerSegmentHandoffCallback(SegmentDescriptor descriptor, Executor exec, Runnable handOffRunnable) { - log.info("Adding SegmentHandoffCallback for dataSource[%s] Segment[%s]", dataSource, descriptor); + log.debug("Adding SegmentHandoffCallback for dataSource[%s] Segment[%s]", dataSource, descriptor); Pair prev = handOffCallbacks.putIfAbsent( descriptor, new Pair<>(exec, handOffRunnable) @@ -106,7 +106,7 @@ void checkForSegmentHandoffs() handOffComplete = isHandOffComplete(loadedSegments, descriptor); } if (handOffComplete) { - log.info("Segment Handoff complete for dataSource[%s] Segment[%s]", dataSource, descriptor); + log.debug("Segment Handoff complete for dataSource[%s] Segment[%s]", dataSource, descriptor); entry.getValue().lhs.execute(entry.getValue().rhs); itr.remove(); } @@ -122,7 +122,7 @@ void checkForSegmentHandoffs() } } if (!handOffCallbacks.isEmpty()) { - log.info("Still waiting for Handoff for Segments : [%s]", handOffCallbacks.keySet()); + log.info("Still waiting for Handoff for [%d] Segments", handOffCallbacks.size()); } } catch (Throwable t) { From 452b649088340b757cc3656bd2c241da73c8fb60 Mon Sep 17 00:00:00 2001 From: "Lucas.Capistrant" Date: Thu, 28 Jan 2021 11:49:32 -0600 Subject: [PATCH 17/28] fixes to get compilation working after merge with master --- .../java/org/apache/druid/indexing/common/task/IndexTask.java | 2 +- .../common/task/batch/parallel/ParallelIndexTuningConfig.java | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/IndexTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/IndexTask.java index 493dbfa19a21..8b8e9a694bad 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/IndexTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/IndexTask.java @@ -1269,7 +1269,7 @@ public IndexTuningConfig( private IndexTuningConfig() { - this(null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null); + this(null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null); } private IndexTuningConfig( diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexTuningConfig.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexTuningConfig.java index c68045e077db..f474c1efca9b 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexTuningConfig.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexTuningConfig.java @@ -101,6 +101,7 @@ public static ParallelIndexTuningConfig defaultConfig() null, null, null, + null, null ); } From 376cfa097cea19a86141b3dc63ce091d80c156f5 Mon Sep 17 00:00:00 2001 From: "Lucas.Capistrant" Date: Thu, 28 Jan 2021 14:45:34 -0600 Subject: [PATCH 18/28] reset interrupt flag in catch block after code review pointed it out --- .../druid/indexing/common/task/AbstractBatchIndexTask.java | 1 + 1 file changed, 1 insertion(+) diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AbstractBatchIndexTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AbstractBatchIndexTask.java index d158898ae19a..e14020a375f4 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AbstractBatchIndexTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AbstractBatchIndexTask.java @@ -639,6 +639,7 @@ protected boolean waitForSegmentAvailability( } catch (InterruptedException e) { log.warn("Interrupted while waiting for segment availablity; Unable to confirm availability!"); + Thread.currentThread().interrupt(); return false; } finally { From c63611a33fe7114014432830b0d69227340c6ad6 Mon Sep 17 00:00:00 2001 From: "Lucas.Capistrant" Date: Thu, 28 Jan 2021 14:51:36 -0600 Subject: [PATCH 19/28] small changes following self-review --- .../task/AppenderatorDriverRealtimeIndexTask.java | 10 +++++++++- .../druid/indexing/common/task/TaskSerdeTest.java | 1 + 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AppenderatorDriverRealtimeIndexTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AppenderatorDriverRealtimeIndexTask.java index 9729d37eec38..0b2cdccc5db4 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AppenderatorDriverRealtimeIndexTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AppenderatorDriverRealtimeIndexTask.java @@ -575,6 +575,14 @@ && isFirehoseDrainableByClosing(((TimedShutoffFirehoseFactory) firehoseFactory). && isFirehoseDrainableByClosing(((ClippedFirehoseFactory) firehoseFactory).getDelegate())); } + /** + * Return a map of reports for the task. + * + * A successfull task should always have a null errorMsg. A falied task should always have a non-null + * errorMsg. + * + * @return Map of reports for the task. + */ private Map getTaskCompletionReports() { return TaskReport.buildTaskReports( @@ -585,7 +593,7 @@ private Map getTaskCompletionReports() getTaskCompletionUnparseableEvents(), getTaskCompletionRowStats(), errorMsg, - false + errorMsg == null ) ) ); diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/TaskSerdeTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/TaskSerdeTest.java index cdbdc49477d3..fbd531bea097 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/TaskSerdeTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/TaskSerdeTest.java @@ -308,6 +308,7 @@ public void testIndexTaskSerde() throws Exception Assert.assertEquals(taskTuningConfig.getNumShards(), task2TuningConfig.getNumShards()); Assert.assertEquals(taskTuningConfig.getMaxRowsPerSegment(), task2TuningConfig.getMaxRowsPerSegment()); Assert.assertEquals(taskTuningConfig.isReportParseExceptions(), task2TuningConfig.isReportParseExceptions()); + Assert.assertEquals(taskTuningConfig.getAwaitSegmentAvailabilityTimeoutMillis(), task2TuningConfig.getAwaitSegmentAvailabilityTimeoutMillis()); } @Test From 503bdb448c52c7e94efb136162ee907e3f90a13f Mon Sep 17 00:00:00 2001 From: "Lucas.Capistrant" Date: Wed, 17 Mar 2021 15:06:26 -0500 Subject: [PATCH 20/28] fixup some issues brought on by merge with master --- .../clients/OverlordResourceTestClient.java | 27 +------------------ .../indexer/AbstractITBatchIndexTest.java | 2 +- 2 files changed, 2 insertions(+), 27 deletions(-) diff --git a/integration-tests/src/main/java/org/apache/druid/testing/clients/OverlordResourceTestClient.java b/integration-tests/src/main/java/org/apache/druid/testing/clients/OverlordResourceTestClient.java index 023548d4c57a..6e40ee2346a3 100644 --- a/integration-tests/src/main/java/org/apache/druid/testing/clients/OverlordResourceTestClient.java +++ b/integration-tests/src/main/java/org/apache/druid/testing/clients/OverlordResourceTestClient.java @@ -28,7 +28,6 @@ import org.apache.druid.indexer.TaskStatusPlus; import org.apache.druid.indexing.common.IngestionStatsAndErrorsTaskReport; import org.apache.druid.indexing.common.IngestionStatsAndErrorsTaskReportData; -import org.apache.druid.indexing.common.TaskReport; import org.apache.druid.indexing.overlord.supervisor.SupervisorStateManager; import org.apache.druid.java.util.common.ISE; import org.apache.druid.java.util.common.RetryUtils; @@ -224,7 +223,7 @@ public RowIngestionMetersTotals getTaskStats(String taskId) } } - private Map getTaskReport(String taskId) + public Map getTaskReport(String taskId) { try { StatusResponseHolder response = makeRequest( @@ -482,30 +481,6 @@ public void resumeSupervisor(String id) } } - public TaskReport getTaskReport(String taskID) - { - try { - StatusResponseHolder response = makeRequest( - HttpMethod.GET, - StringUtils.format( - "%stask/%s/reports", - getIndexerURL(), - StringUtils.urlEncode(taskID) - ) - ); - Map taskReportMap = jsonMapper.readValue( - response.getContent(), - new TypeReference>() - { - } - ); - return taskReportMap.get("ingestionStatsAndErrors"); - } - catch (Exception e) { - throw new RuntimeException(e); - } - } - private StatusResponseHolder makeRequest(HttpMethod method, String url) { try { diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractITBatchIndexTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractITBatchIndexTest.java index 0833296b1d27..acab50df3d35 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractITBatchIndexTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractITBatchIndexTest.java @@ -335,7 +335,7 @@ private void submitTaskAndWait( } if (segmentAvailabilityConfirmationPair.lhs != null && segmentAvailabilityConfirmationPair.lhs) { - TaskReport reportRaw = indexer.getTaskReport(taskID); + TaskReport reportRaw = indexer.getTaskReport(taskID).get("ingestionStatsAndErrors"); IngestionStatsAndErrorsTaskReport report = (IngestionStatsAndErrorsTaskReport) reportRaw; IngestionStatsAndErrorsTaskReportData reportData = (IngestionStatsAndErrorsTaskReportData) report.getPayload(); if (segmentAvailabilityConfirmationPair.rhs != null) { From 0fa4f410cedb5a413d03ebb84bf6a618043bed0b Mon Sep 17 00:00:00 2001 From: "Lucas.Capistrant" Date: Thu, 25 Mar 2021 17:54:07 -0500 Subject: [PATCH 21/28] small changes after review --- .../druid/indexing/common/task/AbstractBatchIndexTask.java | 6 +++--- .../seekablestream/SeekableStreamIndexTaskRunner.java | 7 +++++-- .../druid/indexing/common/task/IndexTaskSerdeTest.java | 2 +- .../org/apache/druid/segment/indexing/TuningConfig.java | 2 +- 4 files changed, 10 insertions(+), 7 deletions(-) diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AbstractBatchIndexTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AbstractBatchIndexTask.java index ad2273cbf641..ec417fd07624 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AbstractBatchIndexTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AbstractBatchIndexTask.java @@ -592,10 +592,10 @@ protected boolean waitForSegmentAvailability( ) { if (segmentsToWaitFor.isEmpty()) { - log.info("Asked to wait for segments to be available, but I wasn't provided with any segments!?"); + log.info("Asked to wait for segments to be available, but I wasn't provided with any segments."); return true; - } else if (waitTimeout <= 0) { - log.warn("Asked to wait for availability for <= 0 seconds?! Requested waitTimeout: [%s]", waitTimeout); + } else if (waitTimeout < 0) { + log.warn("Asked to wait for availability for < 0 seconds?! Requested waitTimeout: [%s]", waitTimeout); return false; } log.info("Waiting for [%d] segments to be loaded by the cluster...", segmentsToWaitFor.size()); diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/SeekableStreamIndexTaskRunner.java b/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/SeekableStreamIndexTaskRunner.java index 8788e56d3605..1174f2b9f3db 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/SeekableStreamIndexTaskRunner.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/SeekableStreamIndexTaskRunner.java @@ -1060,8 +1060,11 @@ private synchronized void persistSequences() throws IOException /** * Return a map of reports for the task. * - * A successfull task should always have a null errorMsg. A falied task should always have a non-null - * errorMsg. + * A successfull task should always have a null errorMsg. Segments availability is inherently confirmed + * if the task was succesful. + * + * A falied task should always have a non-null errorMsg. Segment availability is never confirmed if the task + * was not successful. * * @param errorMsg Nullable error message for the task. null if task succeeded. * @return Map of reports for the task. diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IndexTaskSerdeTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IndexTaskSerdeTest.java index edebfd29a187..5d3a164ce66d 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IndexTaskSerdeTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IndexTaskSerdeTest.java @@ -85,7 +85,7 @@ public void testSerdeTuningConfigWithDynamicPartitionsSpec() throws IOException 10, 100, 1234, - null + 0L ); assertSerdeTuningConfig(tuningConfig); } diff --git a/server/src/main/java/org/apache/druid/segment/indexing/TuningConfig.java b/server/src/main/java/org/apache/druid/segment/indexing/TuningConfig.java index d48c9843245d..266204520ce5 100644 --- a/server/src/main/java/org/apache/druid/segment/indexing/TuningConfig.java +++ b/server/src/main/java/org/apache/druid/segment/indexing/TuningConfig.java @@ -40,7 +40,7 @@ public interface TuningConfig int DEFAULT_MAX_SAVED_PARSE_EXCEPTIONS = 0; int DEFAULT_MAX_ROWS_IN_MEMORY = 1_000_000; boolean DEFAULT_SKIP_BYTES_IN_MEMORY_OVERHEAD_CHECK = false; - int DEFAULT_AWAIT_SEGMENT_AVAILABILITY_TIMEOUT_MILLIS = 0; + long DEFAULT_AWAIT_SEGMENT_AVAILABILITY_TIMEOUT_MILLIS = 0L; /** * The incremental index implementation to use From 8c3a49968413fa433d68b60cab25bf61435dd809 Mon Sep 17 00:00:00 2001 From: "Lucas.Capistrant" Date: Thu, 25 Mar 2021 18:10:43 -0500 Subject: [PATCH 22/28] cleanup a little bit after merge with master --- .../tests/indexer/AbstractIndexerTest.java | 5 +++++ .../tests/indexer/ITCompactionTaskTest.java | 19 ------------------- 2 files changed, 5 insertions(+), 19 deletions(-) diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractIndexerTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractIndexerTest.java index e97f52472a58..6c5067b70c39 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractIndexerTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractIndexerTest.java @@ -105,6 +105,11 @@ protected void loadData(String indexTask, final String fullDatasourceName) throw { String taskSpec = getResourceAsString(indexTask); taskSpec = StringUtils.replace(taskSpec, "%%DATASOURCE%%", fullDatasourceName); + taskSpec = StringUtils.replace( + taskSpec, + "%%SEGMENT_AVAIL_TIMEOUT_MILLIS%%", + jsonMapper.writeValueAsString("0") + ); final String taskID = indexer.submitTask(taskSpec); LOG.info("TaskID for loading index task %s", taskID); indexer.waitUntilTaskCompletes(taskID); diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITCompactionTaskTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITCompactionTaskTest.java index 1973a39c59c6..957c8a5522c4 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITCompactionTaskTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITCompactionTaskTest.java @@ -218,25 +218,6 @@ private void loadDataAndCompact( } } - private void loadData(String indexTask) throws Exception - { - String taskSpec = getResourceAsString(indexTask); - taskSpec = StringUtils.replace(taskSpec, "%%DATASOURCE%%", fullDatasourceName); - taskSpec = StringUtils.replace( - taskSpec, - "%%SEGMENT_AVAIL_TIMEOUT_MILLIS%%", - jsonMapper.writeValueAsString("0") - ); - final String taskID = indexer.submitTask(taskSpec); - LOG.info("TaskID for loading index task %s", taskID); - indexer.waitUntilTaskCompletes(taskID); - - ITRetryUtil.retryUntilTrue( - () -> coordinator.areSegmentsLoaded(fullDatasourceName), - "Segment Load" - ); - } - private void compactData(String compactionResource, GranularityType newSegmentGranularity, GranularityType newQueryGranularity) throws Exception { String template = getResourceAsString(compactionResource); From f6c91680a7fd4b751754e302862d3ad6eb57e1f5 Mon Sep 17 00:00:00 2001 From: "Lucas.Capistrant" Date: Tue, 30 Mar 2021 13:17:17 -0500 Subject: [PATCH 23/28] Fix potential resource leak in AbstractBatchIndexTask --- .../common/task/AbstractBatchIndexTask.java | 46 +++++++++---------- 1 file changed, 22 insertions(+), 24 deletions(-) diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AbstractBatchIndexTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AbstractBatchIndexTask.java index 1d9228a56892..d74dea3711c9 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AbstractBatchIndexTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AbstractBatchIndexTask.java @@ -591,27 +591,28 @@ protected boolean waitForSegmentAvailability( } log.info("Waiting for [%d] segments to be loaded by the cluster...", segmentsToWaitFor.size()); - SegmentHandoffNotifier notifier = toolbox.getSegmentHandoffNotifierFactory() - .createSegmentHandoffNotifier(segmentsToWaitFor.get(0).getDataSource()); - ExecutorService exec = Execs.directExecutor(); - CountDownLatch doneSignal = new CountDownLatch(segmentsToWaitFor.size()); - - notifier.start(); - for (DataSegment s : segmentsToWaitFor) { - notifier.registerSegmentHandoffCallback( - new SegmentDescriptor(s.getInterval(), s.getVersion(), s.getShardSpec().getPartitionNum()), - exec, - () -> { - log.debug( - "Confirmed availability for [%s]. Removing from list of segments to wait for", - s.getId() - ); - doneSignal.countDown(); - } - ); - } - - try { + try( + SegmentHandoffNotifier notifier = toolbox.getSegmentHandoffNotifierFactory() + .createSegmentHandoffNotifier(segmentsToWaitFor.get(0).getDataSource()) + ) { + + ExecutorService exec = Execs.directExecutor(); + CountDownLatch doneSignal = new CountDownLatch(segmentsToWaitFor.size()); + + notifier.start(); + for (DataSegment s : segmentsToWaitFor) { + notifier.registerSegmentHandoffCallback( + new SegmentDescriptor(s.getInterval(), s.getVersion(), s.getShardSpec().getPartitionNum()), + exec, + () -> { + log.debug( + "Confirmed availability for [%s]. Removing from list of segments to wait for", + s.getId() + ); + doneSignal.countDown(); + } + ); + } return doneSignal.await(waitTimeout, TimeUnit.MILLISECONDS); } catch (InterruptedException e) { @@ -619,9 +620,6 @@ protected boolean waitForSegmentAvailability( Thread.currentThread().interrupt(); return false; } - finally { - notifier.close(); - } } private static class LockGranularityDetermineResult From edc26dddaa0ac2bebd3580041136673c1cca7724 Mon Sep 17 00:00:00 2001 From: "Lucas.Capistrant" Date: Tue, 30 Mar 2021 13:55:33 -0500 Subject: [PATCH 24/28] syntax fix --- .../druid/indexing/common/task/AbstractBatchIndexTask.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AbstractBatchIndexTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AbstractBatchIndexTask.java index d74dea3711c9..a77f6030311c 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AbstractBatchIndexTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AbstractBatchIndexTask.java @@ -591,7 +591,7 @@ protected boolean waitForSegmentAvailability( } log.info("Waiting for [%d] segments to be loaded by the cluster...", segmentsToWaitFor.size()); - try( + try ( SegmentHandoffNotifier notifier = toolbox.getSegmentHandoffNotifierFactory() .createSegmentHandoffNotifier(segmentsToWaitFor.get(0).getDataSource()) ) { From 7b25321dd2f5eb0f92d3fbb379a57abbc8792f30 Mon Sep 17 00:00:00 2001 From: "Lucas.Capistrant" Date: Wed, 31 Mar 2021 12:23:15 -0500 Subject: [PATCH 25/28] Add a Compcation TuningConfig type --- .../IndexingServiceTuningConfigModule.java | 4 +- .../indexing/common/task/CompactionTask.java | 225 +++++++++++++++++- .../task/CompactionTaskParallelRunTest.java | 1 + .../common/task/CompactionTaskTest.java | 175 +++++++++++++- .../task/CompactionTuningConfigTest.java | 202 ++++++++++++++++ ...stractParallelIndexSupervisorTaskTest.java | 1 + .../tests/indexer/ITAppendBatchIndexTest.java | 1 + 7 files changed, 587 insertions(+), 22 deletions(-) create mode 100644 indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTuningConfigTest.java diff --git a/indexing-service/src/main/java/org/apache/druid/guice/IndexingServiceTuningConfigModule.java b/indexing-service/src/main/java/org/apache/druid/guice/IndexingServiceTuningConfigModule.java index 4799f2ad2805..56e502259a6b 100644 --- a/indexing-service/src/main/java/org/apache/druid/guice/IndexingServiceTuningConfigModule.java +++ b/indexing-service/src/main/java/org/apache/druid/guice/IndexingServiceTuningConfigModule.java @@ -24,6 +24,7 @@ import com.fasterxml.jackson.databind.module.SimpleModule; import com.google.common.collect.ImmutableList; import com.google.inject.Binder; +import org.apache.druid.indexing.common.task.CompactionTask; import org.apache.druid.indexing.common.task.IndexTask.IndexTuningConfig; import org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexTuningConfig; import org.apache.druid.initialization.DruidModule; @@ -39,7 +40,8 @@ public List getJacksonModules() new SimpleModule(IndexingServiceTuningConfigModule.class.getSimpleName()) .registerSubtypes( new NamedType(IndexTuningConfig.class, "index"), - new NamedType(ParallelIndexTuningConfig.class, "index_parallel") + new NamedType(ParallelIndexTuningConfig.class, "index_parallel"), + new NamedType(CompactionTask.CompactionTuningConfig.class, "compaction") ) ); } diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/CompactionTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/CompactionTask.java index b085360babe7..ad49ecad9792 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/CompactionTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/CompactionTask.java @@ -37,6 +37,7 @@ import org.apache.druid.client.indexing.ClientCompactionTaskQuery; import org.apache.druid.common.guava.SettableSupplier; import org.apache.druid.data.input.InputSource; +import org.apache.druid.data.input.SplitHintSpec; import org.apache.druid.data.input.impl.DimensionSchema; import org.apache.druid.data.input.impl.DimensionSchema.MultiValueHandling; import org.apache.druid.data.input.impl.DimensionsSpec; @@ -77,21 +78,25 @@ import org.apache.druid.query.aggregation.AggregatorFactory; import org.apache.druid.segment.DimensionHandler; import org.apache.druid.segment.IndexIO; +import org.apache.druid.segment.IndexSpec; import org.apache.druid.segment.QueryableIndex; import org.apache.druid.segment.column.ColumnHolder; import org.apache.druid.segment.column.ValueType; +import org.apache.druid.segment.incremental.AppendableIndexSpec; import org.apache.druid.segment.indexing.DataSchema; import org.apache.druid.segment.indexing.TuningConfig; import org.apache.druid.segment.indexing.granularity.GranularitySpec; import org.apache.druid.segment.indexing.granularity.UniformGranularitySpec; import org.apache.druid.segment.loading.SegmentLoadingException; import org.apache.druid.segment.realtime.appenderator.AppenderatorsManager; +import org.apache.druid.segment.writeout.SegmentWriteOutMediumFactory; import org.apache.druid.server.coordinator.duty.CompactSegments; import org.apache.druid.timeline.DataSegment; import org.apache.druid.timeline.TimelineObjectHolder; import org.apache.druid.timeline.VersionedIntervalTimeline; import org.apache.druid.timeline.partition.PartitionChunk; import org.apache.druid.timeline.partition.PartitionHolder; +import org.joda.time.Duration; import org.joda.time.Interval; import javax.annotation.Nonnull; @@ -146,7 +151,7 @@ public class CompactionTask extends AbstractBatchIndexTask @Nullable private final ClientCompactionTaskGranularitySpec granularitySpec; @Nullable - private final ParallelIndexTuningConfig tuningConfig; + private final CompactionTuningConfig tuningConfig; @JsonIgnore private final SegmentProvider segmentProvider; @JsonIgnore @@ -234,17 +239,51 @@ public CompactionTask( } @VisibleForTesting - static ParallelIndexTuningConfig getTuningConfig(TuningConfig tuningConfig) + static CompactionTuningConfig getTuningConfig(TuningConfig tuningConfig) { - if (tuningConfig instanceof ParallelIndexTuningConfig) { - return (ParallelIndexTuningConfig) tuningConfig; + if (tuningConfig instanceof CompactionTuningConfig) { + return (CompactionTuningConfig) tuningConfig; + } else if (tuningConfig instanceof ParallelIndexTuningConfig) { + final ParallelIndexTuningConfig parallelIndexTuningConfig = (ParallelIndexTuningConfig) tuningConfig; + return new CompactionTuningConfig( + null, + parallelIndexTuningConfig.getMaxRowsPerSegment(), + parallelIndexTuningConfig.getAppendableIndexSpec(), + parallelIndexTuningConfig.getMaxRowsInMemory(), + parallelIndexTuningConfig.getMaxBytesInMemory(), + parallelIndexTuningConfig.isSkipBytesInMemoryOverheadCheck(), + parallelIndexTuningConfig.getMaxTotalRows(), + parallelIndexTuningConfig.getNumShards(), + parallelIndexTuningConfig.getSplitHintSpec(), + parallelIndexTuningConfig.getPartitionsSpec(), + parallelIndexTuningConfig.getIndexSpec(), + parallelIndexTuningConfig.getIndexSpecForIntermediatePersists(), + parallelIndexTuningConfig.getMaxPendingPersists(), + parallelIndexTuningConfig.isForceGuaranteedRollup(), + parallelIndexTuningConfig.isReportParseExceptions(), + parallelIndexTuningConfig.getPushTimeout(), + parallelIndexTuningConfig.getSegmentWriteOutMediumFactory(), + null, + parallelIndexTuningConfig.getMaxNumConcurrentSubTasks(), + parallelIndexTuningConfig.getMaxRetry(), + parallelIndexTuningConfig.getTaskStatusCheckPeriodMs(), + parallelIndexTuningConfig.getChatHandlerTimeout(), + parallelIndexTuningConfig.getChatHandlerNumRetries(), + parallelIndexTuningConfig.getMaxNumSegmentsToMerge(), + parallelIndexTuningConfig.getTotalNumMergeTasks(), + parallelIndexTuningConfig.isLogParseExceptions(), + parallelIndexTuningConfig.getMaxParseExceptions(), + parallelIndexTuningConfig.getMaxSavedParseExceptions(), + parallelIndexTuningConfig.getMaxColumnsToMerge(), + parallelIndexTuningConfig.getAwaitSegmentAvailabilityTimeoutMillis() + ); } else if (tuningConfig instanceof IndexTuningConfig) { final IndexTuningConfig indexTuningConfig = (IndexTuningConfig) tuningConfig; - return new ParallelIndexTuningConfig( + return new CompactionTuningConfig( null, indexTuningConfig.getMaxRowsPerSegment(), indexTuningConfig.getAppendableIndexSpec(), - indexTuningConfig.getMaxRowsPerSegment(), + indexTuningConfig.getMaxRowsInMemory(), indexTuningConfig.getMaxBytesInMemory(), indexTuningConfig.isSkipBytesInMemoryOverheadCheck(), indexTuningConfig.getMaxTotalRows(), @@ -274,8 +313,9 @@ static ParallelIndexTuningConfig getTuningConfig(TuningConfig tuningConfig) ); } else { throw new ISE( - "Unknown tuningConfig type: [%s], Must be either [%s] or [%s]", + "Unknown tuningConfig type: [%s], Must be in [%s, %s, %s]", tuningConfig.getClass().getName(), + CompactionTuningConfig.class.getName(), ParallelIndexTuningConfig.class.getName(), IndexTuningConfig.class.getName() ); @@ -515,7 +555,7 @@ static List createIngestionSchema( toolbox.getIndexIO() ); - final ParallelIndexTuningConfig compactionTuningConfig = partitionConfigurationManager.computeTuningConfig(); + final CompactionTuningConfig compactionTuningConfig = partitionConfigurationManager.computeTuningConfig(); if (granularitySpec == null || granularitySpec.getSegmentGranularity() == null) { // original granularity @@ -950,18 +990,18 @@ void checkSegments(LockGranularity lockGranularityInUse, List lates static class PartitionConfigurationManager { @Nullable - private final ParallelIndexTuningConfig tuningConfig; + private final CompactionTuningConfig tuningConfig; - PartitionConfigurationManager(@Nullable ParallelIndexTuningConfig tuningConfig) + PartitionConfigurationManager(@Nullable CompactionTuningConfig tuningConfig) { this.tuningConfig = tuningConfig; } @Nullable - ParallelIndexTuningConfig computeTuningConfig() + CompactionTuningConfig computeTuningConfig() { - ParallelIndexTuningConfig newTuningConfig = tuningConfig == null - ? ParallelIndexTuningConfig.defaultConfig() + CompactionTuningConfig newTuningConfig = tuningConfig == null + ? CompactionTuningConfig.defaultConfig() : tuningConfig; PartitionsSpec partitionsSpec = newTuningConfig.getGivenOrDefaultPartitionsSpec(); if (partitionsSpec instanceof DynamicPartitionsSpec) { @@ -1082,4 +1122,163 @@ public CompactionTask build() ); } } + + /** + * Compcation Task Tuning Config. + * + * An extension of ParallelIndexTuningConfig. As of now, all this TuningConfig + * does is fail if the TuningConfig contains + * `awaitSegmentAvailabilityTimeoutMillis` that is != 0 since it is not + * supported for Compcation Tasks. + */ + public static class CompactionTuningConfig extends ParallelIndexTuningConfig + { + public static final String TYPE = "compaction"; + + public static CompactionTuningConfig defaultConfig() + { + return new CompactionTuningConfig( + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + 0L + ); + } + + @JsonCreator + public CompactionTuningConfig( + @JsonProperty("targetPartitionSize") @Deprecated @Nullable Integer targetPartitionSize, + @JsonProperty("maxRowsPerSegment") @Deprecated @Nullable Integer maxRowsPerSegment, + @JsonProperty("appendableIndexSpec") @Nullable AppendableIndexSpec appendableIndexSpec, + @JsonProperty("maxRowsInMemory") @Nullable Integer maxRowsInMemory, + @JsonProperty("maxBytesInMemory") @Nullable Long maxBytesInMemory, + @JsonProperty("skipBytesInMemoryOverheadCheck") @Nullable Boolean skipBytesInMemoryOverheadCheck, + @JsonProperty("maxTotalRows") @Deprecated @Nullable Long maxTotalRows, + @JsonProperty("numShards") @Deprecated @Nullable Integer numShards, + @JsonProperty("splitHintSpec") @Nullable SplitHintSpec splitHintSpec, + @JsonProperty("partitionsSpec") @Nullable PartitionsSpec partitionsSpec, + @JsonProperty("indexSpec") @Nullable IndexSpec indexSpec, + @JsonProperty("indexSpecForIntermediatePersists") @Nullable IndexSpec indexSpecForIntermediatePersists, + @JsonProperty("maxPendingPersists") @Nullable Integer maxPendingPersists, + @JsonProperty("forceGuaranteedRollup") @Nullable Boolean forceGuaranteedRollup, + @JsonProperty("reportParseExceptions") @Nullable Boolean reportParseExceptions, + @JsonProperty("pushTimeout") @Nullable Long pushTimeout, + @JsonProperty("segmentWriteOutMediumFactory") @Nullable SegmentWriteOutMediumFactory segmentWriteOutMediumFactory, + @JsonProperty("maxNumSubTasks") @Deprecated @Nullable Integer maxNumSubTasks, + @JsonProperty("maxNumConcurrentSubTasks") @Nullable Integer maxNumConcurrentSubTasks, + @JsonProperty("maxRetry") @Nullable Integer maxRetry, + @JsonProperty("taskStatusCheckPeriodMs") @Nullable Long taskStatusCheckPeriodMs, + @JsonProperty("chatHandlerTimeout") @Nullable Duration chatHandlerTimeout, + @JsonProperty("chatHandlerNumRetries") @Nullable Integer chatHandlerNumRetries, + @JsonProperty("maxNumSegmentsToMerge") @Nullable Integer maxNumSegmentsToMerge, + @JsonProperty("totalNumMergeTasks") @Nullable Integer totalNumMergeTasks, + @JsonProperty("logParseExceptions") @Nullable Boolean logParseExceptions, + @JsonProperty("maxParseExceptions") @Nullable Integer maxParseExceptions, + @JsonProperty("maxSavedParseExceptions") @Nullable Integer maxSavedParseExceptions, + @JsonProperty("maxColumnsToMerge") @Nullable Integer maxColumnsToMerge, + @JsonProperty("awaitSegmentAvailabilityTimeoutMillis") @Nullable Long awaitSegmentAvailabilityTimeoutMillis + ) + { + super( + targetPartitionSize, + maxRowsPerSegment, + appendableIndexSpec, + maxRowsInMemory, + maxBytesInMemory, + skipBytesInMemoryOverheadCheck, + maxTotalRows, + numShards, + splitHintSpec, + partitionsSpec, + indexSpec, + indexSpecForIntermediatePersists, + maxPendingPersists, + forceGuaranteedRollup, + reportParseExceptions, + pushTimeout, + segmentWriteOutMediumFactory, + maxNumSubTasks, + maxNumConcurrentSubTasks, + maxRetry, + taskStatusCheckPeriodMs, + chatHandlerTimeout, + chatHandlerNumRetries, + maxNumSegmentsToMerge, + totalNumMergeTasks, + logParseExceptions, + maxParseExceptions, + maxSavedParseExceptions, + maxColumnsToMerge, + awaitSegmentAvailabilityTimeoutMillis + ); + + Preconditions.checkArgument( + awaitSegmentAvailabilityTimeoutMillis == null || awaitSegmentAvailabilityTimeoutMillis == 0, + "awaitSegmentAvailabilityTimeoutMillis is not supported for Compcation Task" + ); + } + + @Override + public CompactionTuningConfig withPartitionsSpec(PartitionsSpec partitionsSpec) + { + return new CompactionTuningConfig( + null, + null, + getAppendableIndexSpec(), + getMaxRowsInMemory(), + getMaxBytesInMemory(), + isSkipBytesInMemoryOverheadCheck(), + null, + null, + getSplitHintSpec(), + partitionsSpec, + getIndexSpec(), + getIndexSpecForIntermediatePersists(), + getMaxPendingPersists(), + isForceGuaranteedRollup(), + isReportParseExceptions(), + getPushTimeout(), + getSegmentWriteOutMediumFactory(), + null, + getMaxNumConcurrentSubTasks(), + getMaxRetry(), + getTaskStatusCheckPeriodMs(), + getChatHandlerTimeout(), + getChatHandlerNumRetries(), + getMaxNumSegmentsToMerge(), + getTotalNumMergeTasks(), + isLogParseExceptions(), + getMaxParseExceptions(), + getMaxSavedParseExceptions(), + getMaxColumnsToMerge(), + getAwaitSegmentAvailabilityTimeoutMillis() + ); + } + } } diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskParallelRunTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskParallelRunTest.java index 2e23fcaef8ef..bab7799544af 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskParallelRunTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskParallelRunTest.java @@ -111,6 +111,7 @@ public CompactionTaskParallelRunTest(LockGranularity lockGranularity) public void setup() throws IOException { getObjectMapper().registerSubtypes(ParallelIndexTuningConfig.class, DruidInputSource.class); + getObjectMapper().registerSubtypes(CompactionTask.CompactionTuningConfig.class, DruidInputSource.class); inputDir = temporaryFolder.newFolder(); final File tmpFile = File.createTempFile("druid", "index", inputDir); diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskTest.java index da128e96f38a..f9f3a66cab3a 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskTest.java @@ -170,7 +170,7 @@ public class CompactionTaskTest Intervals.of("2017-06-30/2017-07-01") ); private static final Map MIXED_TYPE_COLUMN_MAP = new HashMap<>(); - private static final ParallelIndexTuningConfig TUNING_CONFIG = createTuningConfig(); + private static final CompactionTask.CompactionTuningConfig TUNING_CONFIG = createTuningConfig(); private static final TestUtils TEST_UTILS = new TestUtils(); private static final Map SEGMENT_MAP = new HashMap<>(); @@ -307,9 +307,9 @@ private static List findDimensions(int startIndex, Interval segmentInter return dimensions; } - private static ParallelIndexTuningConfig createTuningConfig() + private static CompactionTask.CompactionTuningConfig createTuningConfig() { - return new ParallelIndexTuningConfig( + return new CompactionTask.CompactionTuningConfig( null, null, // null to compute maxRowsPerSegment automatically null, @@ -608,6 +608,165 @@ public void testSerdeWithOldTuningConfigSuccessfullyDeserializeToNewOne() throws assertEquals(expectedFromJson, fromJson); } + @Test + public void testGetTuningConfigWithIndexTuningConfig() + { + IndexTuningConfig indexTuningConfig = new IndexTuningConfig( + null, + null, // null to compute maxRowsPerSegment automatically + null, + 500000, + 1000000L, + null, + null, + null, + null, + null, + null, + new IndexSpec( + new RoaringBitmapSerdeFactory(true), + CompressionStrategy.LZ4, + CompressionStrategy.LZF, + LongEncodingStrategy.LONGS + ), + null, + null, + true, + false, + null, + null, + null, + null, + null, + null, + null, + null + ); + + CompactionTask.CompactionTuningConfig compactionTuningConfig = new CompactionTask.CompactionTuningConfig( + null, + null, + null, + 500000, + 1000000L, + null, + null, + null, + null, + null, + new IndexSpec( + new RoaringBitmapSerdeFactory(true), + CompressionStrategy.LZ4, + CompressionStrategy.LZF, + LongEncodingStrategy.LONGS + ), + null, + null, + true, + false, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null + ); + + Assert.assertEquals(compactionTuningConfig, CompactionTask.getTuningConfig(indexTuningConfig)); + + } + + @Test + public void testGetTuningConfigWithParallelIndexTuningConfig() + { + ParallelIndexTuningConfig parallelIndexTuningConfig = new ParallelIndexTuningConfig( + null, + null, // null to compute maxRowsPerSegment automatically + null, + 500000, + 1000000L, + null, + null, + null, + null, + null, + new IndexSpec( + new RoaringBitmapSerdeFactory(true), + CompressionStrategy.LZ4, + CompressionStrategy.LZF, + LongEncodingStrategy.LONGS + ), + null, + null, + true, + false, + 5000L, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null + ); + + CompactionTask.CompactionTuningConfig compactionTuningConfig = new CompactionTask.CompactionTuningConfig( + null, + null, // null to compute maxRowsPerSegment automatically + null, + 500000, + 1000000L, + null, + null, + null, + null, + null, + new IndexSpec( + new RoaringBitmapSerdeFactory(true), + CompressionStrategy.LZ4, + CompressionStrategy.LZF, + LongEncodingStrategy.LONGS + ), + null, + null, + true, + false, + 5000L, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null + ); + + Assert.assertEquals(compactionTuningConfig, CompactionTask.getTuningConfig(parallelIndexTuningConfig)); + } + @Test public void testSerdeWithUnknownTuningConfigThrowingError() throws IOException { @@ -713,7 +872,7 @@ public void testCreateIngestionSchema() throws IOException, SegmentLoadingExcept @Test public void testCreateIngestionSchemaWithTargetPartitionSize() throws IOException, SegmentLoadingException { - final ParallelIndexTuningConfig tuningConfig = new ParallelIndexTuningConfig( + final CompactionTask.CompactionTuningConfig tuningConfig = new CompactionTask.CompactionTuningConfig( 100000, null, null, @@ -785,7 +944,7 @@ public void testCreateIngestionSchemaWithTargetPartitionSize() throws IOExceptio @Test public void testCreateIngestionSchemaWithMaxTotalRows() throws IOException, SegmentLoadingException { - final ParallelIndexTuningConfig tuningConfig = new ParallelIndexTuningConfig( + final CompactionTask.CompactionTuningConfig tuningConfig = new CompactionTask.CompactionTuningConfig( null, null, null, @@ -857,7 +1016,7 @@ public void testCreateIngestionSchemaWithMaxTotalRows() throws IOException, Segm @Test public void testCreateIngestionSchemaWithNumShards() throws IOException, SegmentLoadingException { - final ParallelIndexTuningConfig tuningConfig = new ParallelIndexTuningConfig( + final CompactionTask.CompactionTuningConfig tuningConfig = new CompactionTask.CompactionTuningConfig( null, null, null, @@ -1418,7 +1577,7 @@ private void assertIngestionSchema( expectedDimensionsSpecs, expectedMetricsSpec, expectedSegmentIntervals, - new ParallelIndexTuningConfig( + new CompactionTask.CompactionTuningConfig( null, null, null, @@ -1465,7 +1624,7 @@ private void assertIngestionSchema( List expectedDimensionsSpecs, List expectedMetricsSpec, List expectedSegmentIntervals, - ParallelIndexTuningConfig expectedTuningConfig, + CompactionTask.CompactionTuningConfig expectedTuningConfig, Granularity expectedSegmentGranularity, Granularity expectedQueryGranularity ) diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTuningConfigTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTuningConfigTest.java new file mode 100644 index 000000000000..2d496db89fe4 --- /dev/null +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTuningConfigTest.java @@ -0,0 +1,202 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.indexing.common.task; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.jsontype.NamedType; +import nl.jqno.equalsverifier.EqualsVerifier; +import org.apache.druid.indexer.partitions.DynamicPartitionsSpec; +import org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexTuningConfig; +import org.apache.druid.jackson.DefaultObjectMapper; +import org.apache.druid.segment.IndexSpec; +import org.apache.druid.segment.data.CompressionFactory.LongEncodingStrategy; +import org.apache.druid.segment.data.CompressionStrategy; +import org.apache.druid.segment.data.RoaringBitmapSerdeFactory; +import org.apache.druid.segment.indexing.TuningConfig; +import org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMediumFactory; +import org.joda.time.Duration; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.ExpectedException; + +import java.io.IOException; + +public class CompactionTuningConfigTest +{ + private final ObjectMapper mapper = new DefaultObjectMapper(); + + @Rule + public ExpectedException expectedException = ExpectedException.none(); + + @Before + public void setup() + { + mapper.registerSubtypes(new NamedType(CompactionTask.CompactionTuningConfig.class, "compcation")); + } + + @Test + public void testSerdeDefault() throws IOException + { + final CompactionTask.CompactionTuningConfig tuningConfig = CompactionTask.CompactionTuningConfig.defaultConfig(); + final byte[] json = mapper.writeValueAsBytes(tuningConfig); + final ParallelIndexTuningConfig fromJson = (CompactionTask.CompactionTuningConfig) mapper.readValue(json, TuningConfig.class); + Assert.assertEquals(fromJson, tuningConfig); + } + + @Test + public void testSerdeWithNonZeroAwaitSegmentAvailabilityTimeoutMillis() + { + expectedException.expect(IllegalArgumentException.class); + expectedException.expectMessage("awaitSegmentAvailabilityTimeoutMillis is not supported for Compcation Task"); + final CompactionTask.CompactionTuningConfig tuningConfig = new CompactionTask.CompactionTuningConfig( + null, + null, + null, + 10, + 1000L, + null, + null, + null, + null, + new DynamicPartitionsSpec(100, 100L), + new IndexSpec( + new RoaringBitmapSerdeFactory(true), + CompressionStrategy.UNCOMPRESSED, + CompressionStrategy.LZF, + LongEncodingStrategy.LONGS + ), + new IndexSpec(), + 1, + false, + true, + 10000L, + OffHeapMemorySegmentWriteOutMediumFactory.instance(), + null, + 250, + 100, + 20L, + new Duration(3600), + 128, + null, + null, + false, + null, + null, + null, + 5L + ); + } + + @Test + public void testSerdeWithZeroAwaitSegmentAvailabilityTimeoutMillis() + { + final CompactionTask.CompactionTuningConfig tuningConfig = new CompactionTask.CompactionTuningConfig( + null, + null, + null, + 10, + 1000L, + null, + null, + null, + null, + new DynamicPartitionsSpec(100, 100L), + new IndexSpec( + new RoaringBitmapSerdeFactory(true), + CompressionStrategy.UNCOMPRESSED, + CompressionStrategy.LZF, + LongEncodingStrategy.LONGS + ), + new IndexSpec(), + 1, + false, + true, + 10000L, + OffHeapMemorySegmentWriteOutMediumFactory.instance(), + null, + 250, + 100, + 20L, + new Duration(3600), + 128, + null, + null, + false, + null, + null, + null, + 0L + ); + Assert.assertEquals(0L, tuningConfig.getAwaitSegmentAvailabilityTimeoutMillis()); + } + + @Test + public void testSerdeWithNullAwaitSegmentAvailabilityTimeoutMillis() + { + final CompactionTask.CompactionTuningConfig tuningConfig = new CompactionTask.CompactionTuningConfig( + null, + null, + null, + 10, + 1000L, + null, + null, + null, + null, + new DynamicPartitionsSpec(100, 100L), + new IndexSpec( + new RoaringBitmapSerdeFactory(true), + CompressionStrategy.UNCOMPRESSED, + CompressionStrategy.LZF, + LongEncodingStrategy.LONGS + ), + new IndexSpec(), + 1, + false, + true, + 10000L, + OffHeapMemorySegmentWriteOutMediumFactory.instance(), + null, + 250, + 100, + 20L, + new Duration(3600), + 128, + null, + null, + false, + null, + null, + null, + null + ); + Assert.assertEquals(0L, tuningConfig.getAwaitSegmentAvailabilityTimeoutMillis()); + } + + @Test + public void testEqualsAndHashCode() + { + EqualsVerifier.forClass(CompactionTask.CompactionTuningConfig.class) + .usingGetClass() + .verify(); + } +} diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/AbstractParallelIndexSupervisorTaskTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/AbstractParallelIndexSupervisorTaskTest.java index 73ae7baab5e4..ef7c668b2df1 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/AbstractParallelIndexSupervisorTaskTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/AbstractParallelIndexSupervisorTaskTest.java @@ -544,6 +544,7 @@ public void prepareObjectMapper(ObjectMapper objectMapper, IndexIO indexIO) ); objectMapper.registerSubtypes( new NamedType(ParallelIndexSupervisorTask.class, ParallelIndexSupervisorTask.TYPE), + new NamedType(CompactionTask.CompactionTuningConfig.class, CompactionTask.CompactionTuningConfig.TYPE), new NamedType(SinglePhaseSubTask.class, SinglePhaseSubTask.TYPE), new NamedType(PartialHashSegmentGenerateTask.class, PartialHashSegmentGenerateTask.TYPE), new NamedType(PartialRangeSegmentGenerateTask.class, PartialRangeSegmentGenerateTask.TYPE), diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITAppendBatchIndexTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITAppendBatchIndexTest.java index 7f5e01b553ec..240157604f8e 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITAppendBatchIndexTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITAppendBatchIndexTest.java @@ -116,6 +116,7 @@ public void doIndexTest(List partitionsSpecList, List e private void submitIngestionTaskAndVerify( String indexDatasource, PartitionsSpec partitionsSpec, + boolean appendToExisting, Pair segmentAvailabilityConfirmationPair ) throws Exception From 9498cb07ba6c83a1a36f57580d42c337dd7fc75f Mon Sep 17 00:00:00 2001 From: "Lucas.Capistrant" Date: Wed, 31 Mar 2021 16:04:33 -0500 Subject: [PATCH 26/28] add docs stipulating the lack of support by Compaction tasks for the new config --- docs/ingestion/compaction.md | 2 +- docs/ingestion/native-batch.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/ingestion/compaction.md b/docs/ingestion/compaction.md index e50d2625e536..61c3b69f30ab 100644 --- a/docs/ingestion/compaction.md +++ b/docs/ingestion/compaction.md @@ -109,7 +109,7 @@ To perform a manual compaction, you submit a compaction task. Compaction tasks m |`dimensionsSpec`|Custom dimensions spec. The compaction task uses the specified dimensions spec if it exists instead of generating one.|No| |`metricsSpec`|Custom metrics spec. The compaction task uses the specified metrics spec rather than generating one.|No| |`segmentGranularity`|When set, the compaction task changes the segment granularity for the given interval. Deprecated. Use `granularitySpec`. |No.| -|`tuningConfig`|[Parallel indexing task tuningConfig](native-batch.md#tuningconfig)|No| +|`tuningConfig`|[Parallel indexing task tuningConfig](native-batch.md#tuningconfig). Note that your tuning config cannot contain a non-zero value for `awaitSegmentAvailabilityTimeoutMillis` because it is not supported by compaction tasks at this time.|No| |`context`|[Task context](./tasks.md#context)|No| |`granularitySpec`|Custom `granularitySpec` to describe the `segmentGranularity` and `queryGranularity` for the compacted segments. See [Compaction granularitySpec](#compaction-granularity-spec).|No| diff --git a/docs/ingestion/native-batch.md b/docs/ingestion/native-batch.md index 87eee59bac82..169b22cd3992 100644 --- a/docs/ingestion/native-batch.md +++ b/docs/ingestion/native-batch.md @@ -171,7 +171,7 @@ A sample task is shown below: |id|The task ID. If this is not explicitly specified, Druid generates the task ID using task type, data source name, interval, and date-time stamp. |no| |spec|The ingestion spec including the data schema, IOConfig, and TuningConfig. See below for more details. |yes| |context|Context containing various task configuration parameters. See below for more details.|no| -|awaitSegmentAvailabilityTimeoutMillis|Long|Milliseconds to wait for the newly indexed segments to become available for query after ingestion completes. If `<= 0`, no wait will occur. If `> 0`, the task will wait for the Coordinator to indicate that the new segments are available for querying. If the timeout expires, the task will exit as successful, but the segments were not confirmed to have become available for query.|no (default = 0)| +|awaitSegmentAvailabilityTimeoutMillis|Long|Milliseconds to wait for the newly indexed segments to become available for query after ingestion completes. If `<= 0`, no wait will occur. If `> 0`, the task will wait for the Coordinator to indicate that the new segments are available for querying. If the timeout expires, the task will exit as successful, but the segments were not confirmed to have become available for query. Note for compaction tasks: you should not set this to a non-zero value because it is not supported by the compaction task type at this time.|no (default = 0)| ### `dataSchema` From 747ee337f27efb2f36cad1c2bbe3058646e826b3 Mon Sep 17 00:00:00 2001 From: "Lucas.Capistrant" Date: Tue, 6 Apr 2021 15:41:10 -0500 Subject: [PATCH 27/28] Fixup compilation errors after merge with master --- .../org/apache/druid/indexing/common/task/IndexTaskTest.java | 4 ++++ .../apache/druid/tests/indexer/ITOverwriteBatchIndexTest.java | 4 +++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IndexTaskTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IndexTaskTest.java index ecc307a4c500..55c1f150c316 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IndexTaskTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IndexTaskTest.java @@ -1000,6 +1000,7 @@ public void testWaitForSegmentAvailabilityNoSegments() throws IOException ), null, createTuningConfigWithMaxRowsPerSegment(2, true), + false, false ), null @@ -1031,6 +1032,7 @@ public void testWaitForSegmentAvailabilityInvalidWaitTimeout() throws IOExceptio ), null, createTuningConfigWithMaxRowsPerSegment(2, true), + false, false ), null @@ -1069,6 +1071,7 @@ public void testWaitForSegmentAvailabilityMultipleSegmentsTimeout() throws IOExc ), null, createTuningConfigWithMaxRowsPerSegment(2, true), + false, false ), null @@ -1128,6 +1131,7 @@ public void testWaitForSegmentAvailabilityMultipleSegmentsSuccess() throws IOExc ), null, createTuningConfigWithMaxRowsPerSegment(2, true), + false, false ), null diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITOverwriteBatchIndexTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITOverwriteBatchIndexTest.java index f47a9663c79b..1ff7b18b6767 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITOverwriteBatchIndexTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITOverwriteBatchIndexTest.java @@ -21,6 +21,7 @@ import com.google.common.collect.ImmutableMap; import org.apache.druid.indexer.partitions.DynamicPartitionsSpec; +import org.apache.druid.java.util.common.Pair; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.testing.guice.DruidTestModuleFactory; import org.apache.druid.tests.TestNGGroup; @@ -150,7 +151,8 @@ private void submitIngestionTaskAndVerify( null, false, false, - true + true, + new Pair<>(false, false) ); } } From 381cdfe8d6404eedbc1df01f58235d7f4a9bf515 Mon Sep 17 00:00:00 2001 From: "Lucas.Capistrant" Date: Tue, 6 Apr 2021 20:17:56 -0500 Subject: [PATCH 28/28] Remove erreneous newline --- .../org/apache/druid/tests/indexer/ITAppendBatchIndexTest.java | 1 - 1 file changed, 1 deletion(-) diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITAppendBatchIndexTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITAppendBatchIndexTest.java index f5b967fc5455..5e9071e45507 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITAppendBatchIndexTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITAppendBatchIndexTest.java @@ -116,7 +116,6 @@ public void doIndexTest(List partitionsSpecList, List e private void submitIngestionTaskAndVerify( String indexDatasource, PartitionsSpec partitionsSpec, - boolean appendToExisting, Pair segmentAvailabilityConfirmationPair ) throws Exception