apache · suneet-s · Apr 9, 2021 · Dec 9, 2020 · Dec 11, 2020 · Dec 11, 2020
diff --git a/core/src/main/java/org/apache/druid/indexer/IngestionState.java b/core/src/main/java/org/apache/druid/indexer/IngestionState.java
@@ -24,5 +24,6 @@ public enum IngestionState
   NOT_STARTED,
   DETERMINE_PARTITIONS,
   BUILD_SEGMENTS,
+  SEGMENT_AVAILABILITY_WAIT,
   COMPLETED
 }
diff --git a/docs/ingestion/compaction.md b/docs/ingestion/compaction.md
@@ -111,7 +111,7 @@ To perform a manual compaction, you submit a compaction task. Compaction tasks m
 |`dimensionsSpec`|Custom dimensions spec. The compaction task uses the specified dimensions spec if it exists instead of generating one.|No|
 |`metricsSpec`|Custom metrics spec. The compaction task uses the specified metrics spec rather than generating one.|No|
 |`segmentGranularity`|When set, the compaction task changes the segment granularity for the given interval.  Deprecated. Use `granularitySpec`. |No.|
-|`tuningConfig`|[Parallel indexing task tuningConfig](native-batch.md#tuningconfig)|No|
+|`tuningConfig`|[Parallel indexing task tuningConfig](native-batch.md#tuningconfig). Note that your tuning config cannot contain a non-zero value for `awaitSegmentAvailabilityTimeoutMillis` because it is not supported by compaction tasks at this time.|No|
 |`context`|[Task context](./tasks.md#context)|No|
 |`granularitySpec`|Custom `granularitySpec` to describe the `segmentGranularity` and `queryGranularity` for the compacted segments. See [Compaction granularitySpec](#compaction-granularity-spec).|No|
 

diff --git a/docs/ingestion/hadoop.md b/docs/ingestion/hadoop.md
@@ -336,6 +336,7 @@ The tuningConfig is optional and default parameters will be used if no tuningCon
 |logParseExceptions|Boolean|If true, log an error message when a parsing exception occurs, containing information about the row where the error occurred.|no(default = false)|
 |maxParseExceptions|Integer|The maximum number of parse exceptions that can occur before the task halts ingestion and fails. Overrides `ignoreInvalidRows` if `maxParseExceptions` is defined.|no(default = unlimited)|
 |useYarnRMJobStatusFallback|Boolean|If the Hadoop jobs created by the indexing task are unable to retrieve their completion status from the JobHistory server, and this parameter is true, the indexing task will try to fetch the application status from `http://<yarn-rm-address>/ws/v1/cluster/apps/<application-id>`, where `<yarn-rm-address>` is the value of `yarn.resourcemanager.webapp.address` in your Hadoop configuration. This flag is intended as a fallback for cases where an indexing task's jobs succeed, but the JobHistory server is unavailable, causing the indexing task to fail because it cannot determine the job statuses.|no (default = true)|
+|awaitSegmentAvailabilityTimeoutMillis|Long|Milliseconds to wait for the newly indexed segments to become available for query after ingestion completes. If `<= 0`, no wait will occur. If `> 0`, the task will wait for the Coordinator to indicate that the new segments are available for querying. If the timeout expires, the task will exit as successful, but the segments were not confirmed to have become available for query.|no (default = 0)| 
 
 ### `jobProperties`
 

diff --git a/docs/ingestion/native-batch.md b/docs/ingestion/native-batch.md
@@ -197,6 +197,7 @@ A sample task is shown below:
 |id|The task ID. If this is not explicitly specified, Druid generates the task ID using task type, data source name, interval, and date-time stamp. |no|
 |spec|The ingestion spec including the data schema, IOConfig, and TuningConfig. See below for more details. |yes|
 |context|Context containing various task configuration parameters. See below for more details.|no|
+|awaitSegmentAvailabilityTimeoutMillis|Long|Milliseconds to wait for the newly indexed segments to become available for query after ingestion completes. If `<= 0`, no wait will occur. If `> 0`, the task will wait for the Coordinator to indicate that the new segments are available for querying. If the timeout expires, the task will exit as successful, but the segments were not confirmed to have become available for query. Note for compaction tasks: you should not set this to a non-zero value because it is not supported by the compaction task type at this time.|no (default = 0)|
 
 ### `dataSchema`
 
@@ -250,6 +251,7 @@ The tuningConfig is optional and default parameters will be used if no tuningCon
 |taskStatusCheckPeriodMs|Polling period in milliseconds to check running task statuses.|1000|no|
 |chatHandlerTimeout|Timeout for reporting the pushed segments in worker tasks.|PT10S|no|
 |chatHandlerNumRetries|Retries for reporting the pushed segments in worker tasks.|5|no|
+|awaitSegmentAvailabilityTimeoutMillis|Long|Milliseconds to wait for the newly indexed segments to become available for query after ingestion completes. If `<= 0`, no wait will occur. If `> 0`, the task will wait for the Coordinator to indicate that the new segments are available for querying. If the timeout expires, the task will exit as successful, but the segments were not confirmed to have become available for query.|no (default = 0)| 
 
 ### Split Hint Spec
 

diff --git a/.../main/java/org/apache/druid/indexing/materializedview/MaterializedViewSupervisorSpec.java b/.../main/java/org/apache/druid/indexing/materializedview/MaterializedViewSupervisorSpec.java
@@ -199,7 +199,8 @@ public HadoopIndexTask createTask(Interval interval, String version, List<DataSe
         tuningConfig.getUserAllowedHadoopPrefix(),
         tuningConfig.isLogParseExceptions(),
         tuningConfig.getMaxParseExceptions(),
-        tuningConfig.isUseYarnRMJobStatusFallback()
+        tuningConfig.isUseYarnRMJobStatusFallback(),
+        tuningConfig.getAwaitSegmentAvailabilityTimeoutMillis()
     );
 
     // generate granularity

diff --git a/...ka-indexing-service/src/test/java/org/apache/druid/indexing/kafka/KafkaIndexTaskTest.java b/...ka-indexing-service/src/test/java/org/apache/druid/indexing/kafka/KafkaIndexTaskTest.java
@@ -125,6 +125,8 @@
 import org.apache.druid.query.timeseries.TimeseriesQueryEngine;
 import org.apache.druid.query.timeseries.TimeseriesQueryQueryToolChest;
 import org.apache.druid.query.timeseries.TimeseriesQueryRunnerFactory;
+import org.apache.druid.segment.handoff.SegmentHandoffNotifier;
+import org.apache.druid.segment.handoff.SegmentHandoffNotifierFactory;
 import org.apache.druid.segment.incremental.RowIngestionMeters;
 import org.apache.druid.segment.incremental.RowIngestionMetersFactory;
 import org.apache.druid.segment.incremental.RowIngestionMetersTotals;
@@ -136,8 +138,6 @@
 import org.apache.druid.segment.loading.LocalDataSegmentPusherConfig;
 import org.apache.druid.segment.realtime.appenderator.AppenderatorsManager;
 import org.apache.druid.segment.realtime.firehose.NoopChatHandlerProvider;
-import org.apache.druid.segment.realtime.plumber.SegmentHandoffNotifier;
-import org.apache.druid.segment.realtime.plumber.SegmentHandoffNotifierFactory;
 import org.apache.druid.segment.transform.ExpressionTransform;
 import org.apache.druid.segment.transform.TransformSpec;
 import org.apache.druid.server.DruidNode;

diff --git a/...ndexing-service/src/test/java/org/apache/druid/indexing/kinesis/KinesisIndexTaskTest.java b/...ndexing-service/src/test/java/org/apache/druid/indexing/kinesis/KinesisIndexTaskTest.java
@@ -97,15 +97,15 @@
 import org.apache.druid.query.timeseries.TimeseriesQueryQueryToolChest;
 import org.apache.druid.query.timeseries.TimeseriesQueryRunnerFactory;
 import org.apache.druid.segment.TestHelper;
+import org.apache.druid.segment.handoff.SegmentHandoffNotifier;
+import org.apache.druid.segment.handoff.SegmentHandoffNotifierFactory;
 import org.apache.druid.segment.incremental.RowIngestionMeters;
 import org.apache.druid.segment.indexing.DataSchema;
 import org.apache.druid.segment.join.NoopJoinableFactory;
 import org.apache.druid.segment.loading.DataSegmentPusher;
 import org.apache.druid.segment.loading.LocalDataSegmentPusher;
 import org.apache.druid.segment.loading.LocalDataSegmentPusherConfig;
 import org.apache.druid.segment.realtime.firehose.NoopChatHandlerProvider;
-import org.apache.druid.segment.realtime.plumber.SegmentHandoffNotifier;
-import org.apache.druid.segment.realtime.plumber.SegmentHandoffNotifierFactory;
 import org.apache.druid.segment.transform.ExpressionTransform;
 import org.apache.druid.segment.transform.TransformSpec;
 import org.apache.druid.server.DruidNode;

diff --git a/indexing-hadoop/src/main/java/org/apache/druid/indexer/HadoopTuningConfig.java b/indexing-hadoop/src/main/java/org/apache/druid/indexer/HadoopTuningConfig.java
@@ -74,6 +74,7 @@ public static HadoopTuningConfig makeDefaultTuningConfig()
         null,
         null,
         null,
+        null,
         null
     );
   }
@@ -101,6 +102,7 @@ public static HadoopTuningConfig makeDefaultTuningConfig()
   private final boolean logParseExceptions;
   private final int maxParseExceptions;
   private final boolean useYarnRMJobStatusFallback;
+  private final long awaitSegmentAvailabilityTimeoutMillis;
 
   @JsonCreator
   public HadoopTuningConfig(
@@ -130,7 +132,8 @@ public HadoopTuningConfig(
       final @JsonProperty("allowedHadoopPrefix") @Nullable List<String> allowedHadoopPrefix,
       final @JsonProperty("logParseExceptions") @Nullable Boolean logParseExceptions,
       final @JsonProperty("maxParseExceptions") @Nullable Integer maxParseExceptions,
-      final @JsonProperty("useYarnRMJobStatusFallback") @Nullable Boolean useYarnRMJobStatusFallback
+      final @JsonProperty("useYarnRMJobStatusFallback") @Nullable Boolean useYarnRMJobStatusFallback,
+      final @JsonProperty("awaitSegmentAvailabilityTimeoutMillis") @Nullable Long awaitSegmentAvailabilityTimeoutMillis
   )
   {
     this.workingPath = workingPath;
@@ -176,6 +179,12 @@ public HadoopTuningConfig(
     this.logParseExceptions = logParseExceptions == null ? TuningConfig.DEFAULT_LOG_PARSE_EXCEPTIONS : logParseExceptions;
 
     this.useYarnRMJobStatusFallback = useYarnRMJobStatusFallback == null ? true : useYarnRMJobStatusFallback;
+
+    if (awaitSegmentAvailabilityTimeoutMillis == null || awaitSegmentAvailabilityTimeoutMillis < 0) {
+      this.awaitSegmentAvailabilityTimeoutMillis = DEFAULT_AWAIT_SEGMENT_AVAILABILITY_TIMEOUT_MILLIS;
+    } else {
+      this.awaitSegmentAvailabilityTimeoutMillis = awaitSegmentAvailabilityTimeoutMillis;
+    }
   }
 
   @Nullable
@@ -334,6 +343,12 @@ public boolean isUseYarnRMJobStatusFallback()
     return useYarnRMJobStatusFallback;
   }
 
+  @JsonProperty
+  public long getAwaitSegmentAvailabilityTimeoutMillis()
+  {
+    return awaitSegmentAvailabilityTimeoutMillis;
+  }
+
   public HadoopTuningConfig withWorkingPath(String path)
   {
     return new HadoopTuningConfig(
@@ -361,7 +376,8 @@ public HadoopTuningConfig withWorkingPath(String path)
         allowedHadoopPrefix,
         logParseExceptions,
         maxParseExceptions,
-        useYarnRMJobStatusFallback
+        useYarnRMJobStatusFallback,
+        awaitSegmentAvailabilityTimeoutMillis
     );
   }
 
@@ -392,7 +408,8 @@ public HadoopTuningConfig withVersion(String ver)
         allowedHadoopPrefix,
         logParseExceptions,
         maxParseExceptions,
-        useYarnRMJobStatusFallback
+        useYarnRMJobStatusFallback,
+        awaitSegmentAvailabilityTimeoutMillis
     );
   }
 
@@ -423,7 +440,8 @@ public HadoopTuningConfig withShardSpecs(Map<Long, List<HadoopyShardSpec>> specs
         allowedHadoopPrefix,
         logParseExceptions,
         maxParseExceptions,
-        useYarnRMJobStatusFallback
+        useYarnRMJobStatusFallback,
+        awaitSegmentAvailabilityTimeoutMillis
     );
   }
 }
diff --git a/indexing-hadoop/src/test/java/org/apache/druid/indexer/BatchDeltaIngestionTest.java b/indexing-hadoop/src/test/java/org/apache/druid/indexer/BatchDeltaIngestionTest.java
@@ -481,6 +481,7 @@ private HadoopDruidIndexerConfig makeHadoopDruidIndexerConfig(
                 null,
                 null,
                 null,
+                null,
                 null
             )
         )

diff --git a/indexing-hadoop/src/test/java/org/apache/druid/indexer/DetermineHashedPartitionsJobTest.java b/indexing-hadoop/src/test/java/org/apache/druid/indexer/DetermineHashedPartitionsJobTest.java
@@ -235,6 +235,7 @@ public DetermineHashedPartitionsJobTest(
             null,
             null,
             null,
+            null,
             null
         )
     );

diff --git a/indexing-hadoop/src/test/java/org/apache/druid/indexer/DeterminePartitionsJobTest.java b/indexing-hadoop/src/test/java/org/apache/druid/indexer/DeterminePartitionsJobTest.java
@@ -344,6 +344,7 @@ public DeterminePartitionsJobTest(
                 null,
                 null,
                 null,
+                null,
                 null
             )
         )

diff --git a/indexing-hadoop/src/test/java/org/apache/druid/indexer/HadoopDruidIndexerConfigTest.java b/indexing-hadoop/src/test/java/org/apache/druid/indexer/HadoopDruidIndexerConfigTest.java
@@ -257,6 +257,7 @@ HadoopIngestionSpec build()
           null,
           null,
           null,
+          null,
           null
       );
 

diff --git a/indexing-hadoop/src/test/java/org/apache/druid/indexer/HadoopTuningConfigTest.java b/indexing-hadoop/src/test/java/org/apache/druid/indexer/HadoopTuningConfigTest.java
@@ -63,6 +63,7 @@ public void testSerde() throws Exception
         null,
         null,
         null,
+        null,
         null
     );
 
@@ -86,6 +87,7 @@ public void testSerde() throws Exception
     Assert.assertEquals(0, actual.getNumBackgroundPersistThreads());
     Assert.assertEquals(true, actual.isForceExtendableShardSpecs());
     Assert.assertEquals(true, actual.isUseExplicitVersion());
+    Assert.assertEquals(0, actual.getAwaitSegmentAvailabilityTimeoutMillis());
   }
 
   public static <T> T jsonReadWriteRead(String s, Class<T> klass)

diff --git a/indexing-hadoop/src/test/java/org/apache/druid/indexer/IndexGeneratorJobTest.java b/indexing-hadoop/src/test/java/org/apache/druid/indexer/IndexGeneratorJobTest.java
@@ -547,6 +547,7 @@ public void setUp() throws Exception
                 null,
                 null,
                 null,
+                null,
                 null
             )
         )

diff --git a/indexing-hadoop/src/test/java/org/apache/druid/indexer/JobHelperTest.java b/indexing-hadoop/src/test/java/org/apache/druid/indexer/JobHelperTest.java
@@ -187,6 +187,7 @@ public void setup() throws Exception
                 null,
                 null,
                 null,
+                null,
                 null
             )
         )

diff --git a/indexing-hadoop/src/test/java/org/apache/druid/indexer/path/GranularityPathSpecTest.java b/indexing-hadoop/src/test/java/org/apache/druid/indexer/path/GranularityPathSpecTest.java
@@ -79,6 +79,7 @@ public class GranularityPathSpecTest
       null,
       null,
       null,
+      null,
       null
   );
 

diff --git a/indexing-service/src/main/java/org/apache/druid/guice/IndexingServiceTuningConfigModule.java b/indexing-service/src/main/java/org/apache/druid/guice/IndexingServiceTuningConfigModule.java
@@ -24,6 +24,7 @@
 import com.fasterxml.jackson.databind.module.SimpleModule;
 import com.google.common.collect.ImmutableList;
 import com.google.inject.Binder;
+import org.apache.druid.indexing.common.task.CompactionTask;
 import org.apache.druid.indexing.common.task.IndexTask.IndexTuningConfig;
 import org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexTuningConfig;
 import org.apache.druid.initialization.DruidModule;
@@ -39,7 +40,8 @@ public List<? extends Module> getJacksonModules()
         new SimpleModule(IndexingServiceTuningConfigModule.class.getSimpleName())
             .registerSubtypes(
                 new NamedType(IndexTuningConfig.class, "index"),
-                new NamedType(ParallelIndexTuningConfig.class, "index_parallel")
+                new NamedType(ParallelIndexTuningConfig.class, "index_parallel"),
+                new NamedType(CompactionTask.CompactionTuningConfig.class, "compaction")
             )
     );
   }

diff --git a/...src/main/java/org/apache/druid/indexing/common/IngestionStatsAndErrorsTaskReportData.java b/...src/main/java/org/apache/druid/indexing/common/IngestionStatsAndErrorsTaskReportData.java
@@ -41,17 +41,22 @@ public class IngestionStatsAndErrorsTaskReportData
   @Nullable
   private String errorMsg;
 
+  @JsonProperty
+  private boolean segmentAvailabilityConfirmed;
+
   public IngestionStatsAndErrorsTaskReportData(
       @JsonProperty("ingestionState") IngestionState ingestionState,
       @JsonProperty("unparseableEvents") Map<String, Object> unparseableEvents,
       @JsonProperty("rowStats") Map<String, Object> rowStats,
-      @JsonProperty("errorMsg") @Nullable String errorMsg
+      @JsonProperty("errorMsg") @Nullable String errorMsg,
+      @JsonProperty("segmentAvailabilityConfirmed") boolean segmentAvailabilityConfirmed
-      @JsonProperty("segmentAvailabilityConfirmed") boolean segmentAvailabilityConfirmed
+      @JsonProperty("segmentAvailabilityConfirmed") @Nullable Boolean segmentAvailabilityConfirmed
-      @JsonProperty("segmentAvailabilityConfirmed") boolean segmentAvailabilityConfirmed
+      @JsonProperty("segmentAvailabilityConfirmed") @Nullable Boolean segmentAvailabilityConfirmed
   )
   {
     this.ingestionState = ingestionState;
     this.unparseableEvents = unparseableEvents;
     this.rowStats = rowStats;
     this.errorMsg = errorMsg;
+    this.segmentAvailabilityConfirmed = segmentAvailabilityConfirmed;
   }
 
   @JsonProperty
@@ -79,6 +84,12 @@ public String getErrorMsg()
     return errorMsg;
   }
 
+  @JsonProperty
+  public boolean isSegmentAvailabilityConfirmed()
+  {
+    return segmentAvailabilityConfirmed;
+  }
+
   public static IngestionStatsAndErrorsTaskReportData getPayloadFromTaskReports(
       Map<String, TaskReport> taskReports
   )
@@ -100,13 +111,20 @@ public boolean equals(Object o)
     return getIngestionState() == that.getIngestionState() &&
            Objects.equals(getUnparseableEvents(), that.getUnparseableEvents()) &&
            Objects.equals(getRowStats(), that.getRowStats()) &&
-           Objects.equals(getErrorMsg(), that.getErrorMsg());
+           Objects.equals(getErrorMsg(), that.getErrorMsg()) &&
+           Objects.equals(isSegmentAvailabilityConfirmed(), that.isSegmentAvailabilityConfirmed());
   }
 
   @Override
   public int hashCode()
   {
-    return Objects.hash(getIngestionState(), getUnparseableEvents(), getRowStats(), getErrorMsg());
+    return Objects.hash(
+        getIngestionState(),
+        getUnparseableEvents(),
+        getRowStats(),
+        getErrorMsg(),
+        isSegmentAvailabilityConfirmed()
+    );
   }
 
   @Override
@@ -117,6 +135,7 @@ public String toString()
            ", unparseableEvents=" + unparseableEvents +
            ", rowStats=" + rowStats +
            ", errorMsg='" + errorMsg + '\'' +
+           ", segmentAvailabilityConfirmed=" + segmentAvailabilityConfirmed +
            '}';
   }
 }
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/TaskToolbox.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/TaskToolbox.java
@@ -49,6 +49,7 @@
 import org.apache.druid.query.QueryRunnerFactoryConglomerate;
 import org.apache.druid.segment.IndexIO;
 import org.apache.druid.segment.IndexMergerV9;
+import org.apache.druid.segment.handoff.SegmentHandoffNotifierFactory;
 import org.apache.druid.segment.incremental.RowIngestionMetersFactory;
 import org.apache.druid.segment.join.JoinableFactory;
 import org.apache.druid.segment.loading.DataSegmentArchiver;
@@ -59,7 +60,6 @@
 import org.apache.druid.segment.loading.SegmentLoadingException;
 import org.apache.druid.segment.realtime.appenderator.AppenderatorsManager;
 import org.apache.druid.segment.realtime.firehose.ChatHandlerProvider;
-import org.apache.druid.segment.realtime.plumber.SegmentHandoffNotifierFactory;
 import org.apache.druid.server.DruidNode;
 import org.apache.druid.server.coordination.DataSegmentAnnouncer;
 import org.apache.druid.server.coordination.DataSegmentServerAnnouncer;

diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/TaskToolboxFactory.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/TaskToolboxFactory.java
@@ -47,6 +47,7 @@
 import org.apache.druid.query.QueryRunnerFactoryConglomerate;
 import org.apache.druid.segment.IndexIO;
 import org.apache.druid.segment.IndexMergerV9;
+import org.apache.druid.segment.handoff.SegmentHandoffNotifierFactory;
 import org.apache.druid.segment.incremental.RowIngestionMetersFactory;
 import org.apache.druid.segment.join.JoinableFactory;
 import org.apache.druid.segment.loading.DataSegmentArchiver;
@@ -55,7 +56,6 @@
 import org.apache.druid.segment.loading.DataSegmentPusher;
 import org.apache.druid.segment.realtime.appenderator.AppenderatorsManager;
 import org.apache.druid.segment.realtime.firehose.ChatHandlerProvider;
-import org.apache.druid.segment.realtime.plumber.SegmentHandoffNotifierFactory;
 import org.apache.druid.server.DruidNode;
 import org.apache.druid.server.coordination.DataSegmentAnnouncer;
 import org.apache.druid.server.coordination.DataSegmentServerAnnouncer;
-Original file line number
+Diff line change
@@ Expand Up @@
                     null,
                     null,
                     null,
+                    null,
                     null
                 )
             )
@@ Expand Down @@