apache · aho135 · Mar 17, 2026 · Mar 17, 2026 · Mar 17, 2026 · Mar 18, 2026
diff --git a/...c/main/java/org/apache/druid/indexing/rabbitstream/supervisor/RabbitStreamSupervisor.java b/...c/main/java/org/apache/druid/indexing/rabbitstream/supervisor/RabbitStreamSupervisor.java
@@ -321,7 +321,7 @@ protected Map<String, Long> getTimeLagPerPartition(Map<String, Long> currentOffs
   }
 
   @Override
-  protected RabbitStreamDataSourceMetadata createDataSourceMetaDataForReset(String topic, Map<String, Long> map)
+  public RabbitStreamDataSourceMetadata createDataSourceMetaDataForReset(String topic, Map<String, Long> map)
   {
     return new RabbitStreamDataSourceMetadata(new SeekableStreamEndSequenceNumbers<>(topic, map));
   }
@@ -374,7 +374,7 @@ public LagStats computeLagStats()
   }
 
   @Override
-  protected void updatePartitionLagFromStream()
+  public void updatePartitionLagFromStream()
   {
     getRecordSupplierLock().lock();
 
@@ -401,7 +401,7 @@ protected void updatePartitionLagFromStream()
   }
 
   @Override
-  protected Map<String, Long> getLatestSequencesFromStream()
+  public Map<String, Long> getLatestSequencesFromStream()
   {
     return latestSequenceFromStream != null ? latestSequenceFromStream : new HashMap<>();
   }

diff --git a/...ing-service/src/main/java/org/apache/druid/indexing/kafka/supervisor/KafkaSupervisor.java b/...ing-service/src/main/java/org/apache/druid/indexing/kafka/supervisor/KafkaSupervisor.java
@@ -24,6 +24,7 @@
 import com.fasterxml.jackson.databind.ObjectMapper;
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Objects;
+import com.google.common.base.Optional;
 import com.google.common.collect.Sets;
 import org.apache.druid.common.utils.IdUtils;
 import org.apache.druid.data.input.kafka.KafkaRecordEntity;
@@ -41,6 +42,7 @@
 import org.apache.druid.indexing.overlord.DataSourceMetadata;
 import org.apache.druid.indexing.overlord.IndexerMetadataStorageCoordinator;
 import org.apache.druid.indexing.overlord.TaskMaster;
+import org.apache.druid.indexing.overlord.TaskQueue;
 import org.apache.druid.indexing.overlord.TaskStorage;
 import org.apache.druid.indexing.overlord.supervisor.autoscaler.LagStats;
 import org.apache.druid.indexing.seekablestream.SeekableStreamEndSequenceNumbers;
@@ -265,6 +267,132 @@ protected List<SeekableStreamIndexTask<KafkaTopicPartition, Long, KafkaRecordEnt
     return taskList;
   }
 
+  @Override
+  public void submitBackfillTask(
+      Map<KafkaTopicPartition, Long> startOffsets,
+      Map<KafkaTopicPartition, Long> endOffsets,
+      @Nullable Integer backfillTaskCount
+  )
+  {
+    if (startOffsets == null || startOffsets.isEmpty() || endOffsets == null || endOffsets.isEmpty()) {
+      log.info("No offsets to backfill, skipping backfill task submission");
+      return;
+    }
+
+    try {
+      String backfillSupervisorId = spec.getSpec().getDataSchema().getDataSource() + "_backfill";
+
+      // If backfillTaskCount is not provided, default to taskCount / 2
+      int taskCount = spec.getSpec().getIOConfig().getTaskCount();
+      int numBackfillTasks = backfillTaskCount != null ? backfillTaskCount : Math.max(1, taskCount / 2);
+      List<KafkaTopicPartition> partitions = new ArrayList<>(endOffsets.keySet());
+
+      // Determine actual number of tasks (can't have more tasks than partitions)
+      int numTasks = Math.min(numBackfillTasks, partitions.size());
+
+      log.info(
+          "Submitting %d backfill task(s) with supervisorId[%s] for %d partition(s)",
+          numTasks,
+          backfillSupervisorId,
+          partitions.size()
+      );
+
+      // Split partitions into groups for each task
+      int partitionsPerTask = partitions.size() / numTasks;
+      int remainder = partitions.size() % numTasks;
+
+      int startIdx = 0;
+      for (int taskNum = 0; taskNum < numTasks; taskNum++) {
+        // Distribute remainder across first few tasks
+        int taskPartitionCount = partitionsPerTask + (taskNum < remainder ? 1 : 0);
+        int endIdx = startIdx + taskPartitionCount;
+
+        List<KafkaTopicPartition> taskPartitions = partitions.subList(startIdx, endIdx);
+
+        // Create offset maps for this task's partitions only
+        Map<KafkaTopicPartition, Long> taskStartOffsets = new HashMap<>();
+        Map<KafkaTopicPartition, Long> taskEndOffsets = new HashMap<>();
+        for (KafkaTopicPartition partition : taskPartitions) {
+          Long startOffset = startOffsets.get(partition);
+          if (startOffset == null) {
+            log.info("No checkpoint has occurred before for partition [%s], setting startOffset equal to endOffset to skip data consumption", partition);
+            startOffset = endOffsets.get(partition);
+          }
+          taskStartOffsets.put(partition, startOffset);
+          taskEndOffsets.put(partition, endOffsets.get(partition));
+        }
+
+        String baseSequenceName = generateSequenceName(
+            taskStartOffsets,
+            null, // minimumMessageTime - process all data in range
+            null, // maximumMessageTime - process all data in range
+            spec.getSpec().getDataSchema(),
+            spec.getSpec().getTuningConfig()
+        );
+
+        KafkaSupervisorIOConfig kafkaIoConfig = spec.getSpec().getIOConfig();
+        KafkaIndexTaskIOConfig backfillIoConfig = new KafkaIndexTaskIOConfig(
+            taskNum, // taskGroupId
+            baseSequenceName,
+            null,
+            null,
+            new SeekableStreamStartSequenceNumbers<>(kafkaIoConfig.getStream(), taskStartOffsets, Collections.emptySet()),
+            new SeekableStreamEndSequenceNumbers<>(kafkaIoConfig.getStream(), taskEndOffsets),
+            kafkaIoConfig.getConsumerProperties(),
+            kafkaIoConfig.getPollTimeout(),
+            false, // useTransaction = false for backfill (no supervisor coordination)
+            null, // minimumMessageTime - no time filtering for backfill
+            null, // maximumMessageTime - no time filtering for backfill
+            kafkaIoConfig.getInputFormat(),
+            kafkaIoConfig.getConfigOverrides(),
+            kafkaIoConfig.isMultiTopic(),
+            null // refreshRejectionPeriodsInMinutes - don't refresh rejection periods for backfill
+        );
+
+        // Create backfill task with different supervisorId
+        String taskId = IdUtils.getRandomIdWithPrefix(baseSequenceName);
+        Map<String, Object> context = createBaseTaskContexts();
+        // Use APPEND locks to allow writing to intervals that may overlap with main supervisor
+        context.put("useConcurrentLocks", true);
+
+        KafkaIndexTask backfillTask = new KafkaIndexTask(
+            taskId,
+            backfillSupervisorId, // Use backfill supervisorId instead of spec.getId()
+            new TaskResource(baseSequenceName, 1),
+            spec.getSpec().getDataSchema(),
+            spec.getSpec().getTuningConfig(),
+            backfillIoConfig,
+            context,
+            sortingMapper,
+            null // no server priority for backfill tasks
+        );
+
+        Optional<TaskQueue> taskQueue = getTaskMaster().getTaskQueue();
+        if (taskQueue.isPresent()) {
+          log.info(
+              "Submitting backfill task[%s] (task %d of %d) with supervisorId[%s] for partitions %s, offsets from [%s] to [%s]",
+              taskId,
+              taskNum + 1,
+              numTasks,
+              backfillSupervisorId,
+              taskPartitions,
+              taskStartOffsets,
+              taskEndOffsets
+          );
+          taskQueue.get().add(backfillTask);
+        } else {
+          log.error("Failed to submit backfill task because I'm not the leader!");
+          break;
+        }
+
+        startIdx = endIdx;
+      }
+    }
+    catch (Exception e) {
+      log.error(e, "Failed to submit backfill task, skipping backfill");
+    }
+  }
+
   @Override
   protected Map<KafkaTopicPartition, Long> getPartitionRecordLag()
   {
@@ -355,7 +483,7 @@ protected Map<KafkaTopicPartition, Long> getTimeLagPerPartition(Map<KafkaTopicPa
   }
 
   @Override
-  protected KafkaDataSourceMetadata createDataSourceMetaDataForReset(String topic, Map<KafkaTopicPartition, Long> map)
+  public KafkaDataSourceMetadata createDataSourceMetaDataForReset(String topic, Map<KafkaTopicPartition, Long> map)
   {
     return new KafkaDataSourceMetadata(new SeekableStreamEndSequenceNumbers<>(topic, map));
   }
@@ -516,7 +644,7 @@ private Map<KafkaTopicPartition, Long> getTimestampPerPartitionAtCurrentOffset(S
    * </p>
    */
   @Override
-  protected void updatePartitionLagFromStream()
+  public void updatePartitionLagFromStream()
   {
     if (getIoConfig().isEmitTimeLagMetrics()) {
       updatePartitionTimeAndRecordLagFromStream();
@@ -565,7 +693,7 @@ private void updateOffsetSnapshot(
   }
 
   @Override
-  protected Map<KafkaTopicPartition, Long> getLatestSequencesFromStream()
+  public Map<KafkaTopicPartition, Long> getLatestSequencesFromStream()
   {
     return offsetSnapshotRef.get().getLatestOffsetsFromStream();
   }
@@ -598,17 +726,17 @@ protected boolean isMultiTopic()
    * Gets the offsets as stored in the metadata store. The map returned will only contain
    * offsets from topic partitions that match the current supervisor config stream. This
    * override is needed because in the case of multi-topic, a user could have updated the supervisor
-   * config from single topic to mult-topic, where the new multi-topic pattern regex matches the
+   * config from single topic to multi-topic, where the new multi-topic pattern regex matches the
    * old config single topic. Without this override, the previously stored metadata for the single
    * topic would be deemed as different from the currently configure stream, and not be included in
    * the offset map returned. This implementation handles these cases appropriately.
    *
-   * @return the previoulsy stored offsets from metadata storage, possibly updated with offsets removed
+   * @return the previously stored offsets from metadata storage, possibly updated with offsets removed
    * for topics that do not match the currently configured supervisor topic. Topic partition keys may also be
    * updated to single topic or multi-topic depending on the supervisor config, as needed.
    */
   @Override
-  protected Map<KafkaTopicPartition, Long> getOffsetsFromMetadataStorage()
+  public Map<KafkaTopicPartition, Long> getOffsetsFromMetadataStorage()
   {
     final DataSourceMetadata dataSourceMetadata = retrieveDataSourceMetadata();
     if (checkSourceMetadataMatch(dataSourceMetadata)) {

diff --git a/...service/src/test/java/org/apache/druid/indexing/kafka/supervisor/KafkaSupervisorTest.java b/...service/src/test/java/org/apache/druid/indexing/kafka/supervisor/KafkaSupervisorTest.java
@@ -6203,6 +6203,144 @@ public int getPendingCompletionTaskGroupsCount(int groupId)
     }
   }
 
+  @Test
+  public void testSubmitBackfillTask()
+  {
+    EasyMock.expect(taskMaster.getTaskQueue()).andReturn(Optional.of(taskQueue)).anyTimes();
+    EasyMock.expect(taskMaster.getTaskRunner()).andReturn(Optional.of(taskRunner)).anyTimes();
+    EasyMock.expect(taskStorage.getActiveTasksByDatasource(DATASOURCE)).andReturn(ImmutableList.of()).anyTimes();
+    EasyMock.expect(indexerMetadataStorageCoordinator.retrieveDataSourceMetadata(DATASOURCE)).andReturn(null).anyTimes();
+
+    Capture<Task> capturedTasks = Capture.newInstance(CaptureType.ALL);
+    EasyMock.expect(taskQueue.add(EasyMock.capture(capturedTasks))).andReturn(true).times(2);
+
+    replayAll();
+
+    supervisor = getTestableSupervisor(2, 4, true, false, null, null, null);
+
+    // Create start and end offsets for 3 partitions
+    Map<KafkaTopicPartition, Long> startOffsets = ImmutableMap.of(
+        new KafkaTopicPartition(false, topic, 0), 100L,
+        new KafkaTopicPartition(false, topic, 1), 200L,
+        new KafkaTopicPartition(false, topic, 2), 300L
+    );
+
+    Map<KafkaTopicPartition, Long> endOffsets = ImmutableMap.of(
+        new KafkaTopicPartition(false, topic, 0), 150L,
+        new KafkaTopicPartition(false, topic, 1), 250L,
+        new KafkaTopicPartition(false, topic, 2), 350L
+    );
+
+    supervisor.submitBackfillTask(startOffsets, endOffsets, null);
+
+    List<Task> tasks = capturedTasks.getValues();
+    Assert.assertEquals(2, tasks.size());
+
+    // Verify both tasks are KafkaIndexTask
+    for (Task task : tasks) {
+      Assert.assertTrue(task instanceof KafkaIndexTask);
+      KafkaIndexTask kafkaTask = (KafkaIndexTask) task;
+
+      // Verify useTransaction=false for backfill tasks
+      Assert.assertFalse(
+          "Backfill tasks should have useTransaction=false",
+          kafkaTask.getIOConfig().isUseTransaction()
+      );
+
+      // Verify task has correct datasource
+      Assert.assertEquals(DATASOURCE, kafkaTask.getDataSource());
+
+      // Verify offsets are within the expected range
+      Map<KafkaTopicPartition, Long> taskStartOffsets =
+          kafkaTask.getIOConfig().getStartSequenceNumbers().getPartitionSequenceNumberMap();
+      Map<KafkaTopicPartition, Long> taskEndOffsets =
+          kafkaTask.getIOConfig().getEndSequenceNumbers().getPartitionSequenceNumberMap();
+
+      for (Map.Entry<KafkaTopicPartition, Long> entry : taskStartOffsets.entrySet()) {
+        KafkaTopicPartition partition = entry.getKey();
+        Long startOffset = entry.getValue();
+        Long endOffset = taskEndOffsets.get(partition);
+
+        // Verify offsets are from our original maps
+        Assert.assertTrue(startOffsets.containsKey(partition));
+        Assert.assertEquals(startOffsets.get(partition), startOffset);
+        Assert.assertEquals(endOffsets.get(partition), endOffset);
+      }
+    }
+
+    verifyAll();
+  }
+
+  @Test
+  public void testSubmitBackfillTaskWithNullStartOffset()
+  {
+    EasyMock.expect(taskMaster.getTaskQueue()).andReturn(Optional.of(taskQueue)).anyTimes();
+    EasyMock.expect(taskMaster.getTaskRunner()).andReturn(Optional.of(taskRunner)).anyTimes();
+    EasyMock.expect(taskStorage.getActiveTasksByDatasource(DATASOURCE)).andReturn(ImmutableList.of()).anyTimes();
+    EasyMock.expect(indexerMetadataStorageCoordinator.retrieveDataSourceMetadata(DATASOURCE)).andReturn(null).anyTimes();
+
+    Capture<Task> capturedTask = Capture.newInstance();
+    EasyMock.expect(taskQueue.add(EasyMock.capture(capturedTask))).andReturn(true).once();
+
+    replayAll();
+
+    supervisor = getTestableSupervisor(2, 2, true, false, null, null, null);
+
+    KafkaTopicPartition partition0 = new KafkaTopicPartition(false, topic, 0);
+    KafkaTopicPartition partition1 = new KafkaTopicPartition(false, topic, 1);
+
+    // partition0 has a start offset, partition1 does not (null in startOffsets map)
+    Map<KafkaTopicPartition, Long> startOffsets = ImmutableMap.of(
+        partition0, 100L
+        // partition1 intentionally missing - simulates no checkpoint for this partition
+    );
+
+    Map<KafkaTopicPartition, Long> endOffsets = ImmutableMap.of(
+        partition0, 150L,
+        partition1, 250L
+    );
+
+    supervisor.submitBackfillTask(startOffsets, endOffsets, null);
+
+    // Verify task was submitted
+    Task task = capturedTask.getValue();
+    Assert.assertTrue(task instanceof KafkaIndexTask);
+    KafkaIndexTask kafkaTask = (KafkaIndexTask) task;
+
+    Map<KafkaTopicPartition, Long> taskStartOffsets =
+        kafkaTask.getIOConfig().getStartSequenceNumbers().getPartitionSequenceNumberMap();
+    Map<KafkaTopicPartition, Long> taskEndOffsets =
+        kafkaTask.getIOConfig().getEndSequenceNumbers().getPartitionSequenceNumberMap();
+
+    // partition0 should use its start offset
+    Assert.assertEquals(Long.valueOf(100L), taskStartOffsets.get(partition0));
+    Assert.assertEquals(Long.valueOf(150L), taskEndOffsets.get(partition0));
+
+    // partition1 should have startOffset set equal to endOffset (since start was null)
+    Assert.assertEquals(Long.valueOf(250L), taskStartOffsets.get(partition1));
+    Assert.assertEquals(Long.valueOf(250L), taskEndOffsets.get(partition1));
+
+    verifyAll();
+  }
+
+  @Test
+  public void testSubmitBackfillTaskWithEmptyOffsets()
+  {
+    EasyMock.expect(taskMaster.getTaskQueue()).andReturn(Optional.of(taskQueue)).anyTimes();
+    EasyMock.expect(taskMaster.getTaskRunner()).andReturn(Optional.of(taskRunner)).anyTimes();
+    EasyMock.expect(taskStorage.getActiveTasksByDatasource(DATASOURCE)).andReturn(ImmutableList.of()).anyTimes();
+
+    replayAll();
+
+    supervisor = getTestableSupervisor(2, 2, true, false, null, null, null);
+
+    // Submit with empty offsets - should return early without submitting any tasks
+    supervisor.submitBackfillTask(ImmutableMap.of(), ImmutableMap.of(), null);
+
+    // Verify no tasks were submitted (taskQueue.add should never be called)
+    verifyAll();
+  }
+
   private static class TestableKafkaSupervisorWithCustomIsTaskCurrent extends TestableKafkaSupervisor
   {
     private final boolean isTaskCurrentReturn;

diff --git a/...service/src/main/java/org/apache/druid/indexing/kinesis/supervisor/KinesisSupervisor.java b/...service/src/main/java/org/apache/druid/indexing/kinesis/supervisor/KinesisSupervisor.java
@@ -317,7 +317,7 @@ protected Map<String, Long> getTimeLagPerPartition(Map<String, String> currentOf
   }
 
   @Override
-  protected SeekableStreamDataSourceMetadata<String, String> createDataSourceMetaDataForReset(
+  public SeekableStreamDataSourceMetadata<String, String> createDataSourceMetaDataForReset(
       String stream,
       Map<String, String> map
   )
@@ -332,7 +332,7 @@ protected OrderedSequenceNumber<String> makeSequenceNumber(String seq, boolean i
   }
 
   @Override
-  protected void updatePartitionLagFromStream()
+  public void updatePartitionLagFromStream()
   {
     KinesisRecordSupplier supplier = (KinesisRecordSupplier) recordSupplier;
     // this recordSupplier method is thread safe, so does not need to acquire the recordSupplierLock