-
Notifications
You must be signed in to change notification settings - Fork 3.8k
Associate pending segments with the tasks that requested them #16144
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
dd63724
743696b
5f77ea6
21ca620
d2aa575
04c2593
0386c2a
cf7815b
e7e86b9
3805410
2a32dc2
9906afc
dfd3cad
21b8ee5
8bb0feb
7ffe0ec
3e17b19
422411b
f91b1a3
de6b40a
4653e60
4581133
b49b8d8
277596f
a21adbc
a6f958d
c31837a
477abce
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -22,10 +22,12 @@ | |
| import com.fasterxml.jackson.annotation.JsonCreator; | ||
| import com.fasterxml.jackson.annotation.JsonProperty; | ||
| import com.fasterxml.jackson.core.type.TypeReference; | ||
| import org.apache.druid.error.DruidException; | ||
| import org.apache.druid.error.InvalidInput; | ||
| import org.apache.druid.indexing.common.TaskLock; | ||
| import org.apache.druid.indexing.common.TaskLockType; | ||
| import org.apache.druid.indexing.common.task.IndexTaskUtils; | ||
| import org.apache.druid.indexing.common.task.PendingSegmentAllocatingTask; | ||
| import org.apache.druid.indexing.common.task.Task; | ||
| import org.apache.druid.indexing.overlord.CriticalAction; | ||
| import org.apache.druid.indexing.overlord.DataSourceMetadata; | ||
|
|
@@ -41,8 +43,20 @@ | |
| import java.util.stream.Collectors; | ||
|
|
||
| /** | ||
| * | ||
| * Append segments to metadata storage. The segment versions must all be less than or equal to a lock held by | ||
| * your task for the segment intervals. | ||
| * | ||
| * <pre> | ||
| * Pseudo code (for a single interval): | ||
| * For an append lock held over an interval: | ||
| * transaction { | ||
| * commit input segments contained within interval | ||
| * if there is an active replace lock over the interval: | ||
| * add an entry for the inputSegment corresponding to the replace lock's task in the upgradeSegments table | ||
| * fetch pending segments with parent contained within the input segments, and commit them | ||
| * } | ||
|
Comment on lines
+51
to
+58
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It doesn't seem appropriate to have the implementation described as pseudo-code. Someone might as well read the code. It is better to briefly describe the key points of the implementation in a list fashion. (This is not a blocker for the PR). |
||
| * </pre> | ||
| */ | ||
| public class SegmentTransactionalAppendAction implements TaskAction<SegmentPublishResult> | ||
| { | ||
|
|
@@ -114,6 +128,13 @@ public TypeReference<SegmentPublishResult> getReturnTypeReference() | |
| @Override | ||
| public SegmentPublishResult perform(Task task, TaskActionToolbox toolbox) | ||
| { | ||
| if (!(task instanceof PendingSegmentAllocatingTask)) { | ||
| throw DruidException.defensive( | ||
| "Task[%s] of type[%s] cannot append segments as it does not implement PendingSegmentAllocatingTask.", | ||
| task.getId(), | ||
| task.getType() | ||
| ); | ||
| } | ||
| // Verify that all the locks are of expected type | ||
| final List<TaskLock> locks = toolbox.getTaskLockbox().findLocksForTask(task); | ||
| for (TaskLock lock : locks) { | ||
|
|
@@ -132,17 +153,20 @@ public SegmentPublishResult perform(Task task, TaskActionToolbox toolbox) | |
| = TaskLocks.findReplaceLocksCoveringSegments(datasource, toolbox.getTaskLockbox(), segments); | ||
|
|
||
| final CriticalAction.Action<SegmentPublishResult> publishAction; | ||
| final String taskAllocatorId = ((PendingSegmentAllocatingTask) task).getTaskAllocatorId(); | ||
| if (startMetadata == null) { | ||
| publishAction = () -> toolbox.getIndexerMetadataStorageCoordinator().commitAppendSegments( | ||
| segments, | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we need this branch at all? Can't we just simply call
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. At some point, the plan was to have one action for just committing segments and another action for committing segments and metadata both. So we decided to keep a method that would just commit segments. But we eventually decided against having the two actions as it didn't really serve a lot of purpose. So now we could simplify the |
||
| segmentToReplaceLock | ||
| segmentToReplaceLock, | ||
| taskAllocatorId | ||
| ); | ||
| } else { | ||
| publishAction = () -> toolbox.getIndexerMetadataStorageCoordinator().commitAppendSegmentsAndMetadata( | ||
| segments, | ||
| segmentToReplaceLock, | ||
| startMetadata, | ||
| endMetadata | ||
| endMetadata, | ||
| taskAllocatorId | ||
| ); | ||
| } | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -30,18 +30,35 @@ | |
| import org.apache.druid.indexing.overlord.SegmentPublishResult; | ||
| import org.apache.druid.indexing.overlord.supervisor.SupervisorManager; | ||
| import org.apache.druid.java.util.common.logger.Logger; | ||
| import org.apache.druid.metadata.PendingSegmentRecord; | ||
| import org.apache.druid.metadata.ReplaceTaskLock; | ||
| import org.apache.druid.segment.SegmentUtils; | ||
| import org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec; | ||
| import org.apache.druid.timeline.DataSegment; | ||
|
|
||
| import java.util.HashMap; | ||
| import java.util.HashSet; | ||
| import java.util.Map; | ||
| import java.util.Set; | ||
| import java.util.stream.Collectors; | ||
|
|
||
| /** | ||
| * Replace segments in metadata storage. The segment versions must all be less than or equal to a lock held by | ||
| * your task for the segment intervals. | ||
| * | ||
| * <pre> | ||
| * Pseudo code (for a single interval) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same comment regarding pseudo-code. |
||
| *- For a replace lock held over an interval: | ||
| * transaction { | ||
| * commit input segments contained within interval | ||
| * upgrade ids in the upgradeSegments table corresponding to this task to the replace lock's version and commit them | ||
| * fetch payload, task_allocator_id for pending segments | ||
| * upgrade each such pending segment to the replace lock's version with the corresponding root segment | ||
| * } | ||
| * For every pending segment with version == replace lock version: | ||
| * Fetch payload, group_id or the pending segment and relay them to the supervisor | ||
| * The supervisor relays the payloads to all the tasks with the corresponding group_id to serve realtime queries | ||
| * </pre> | ||
| */ | ||
| public class SegmentTransactionalReplaceAction implements TaskAction<SegmentPublishResult> | ||
| { | ||
|
|
@@ -123,7 +140,7 @@ public SegmentPublishResult perform(Task task, TaskActionToolbox toolbox) | |
| // failure to upgrade pending segments does not affect success of the commit | ||
| if (publishResult.isSuccess() && toolbox.getSupervisorManager() != null) { | ||
| try { | ||
| tryUpgradeOverlappingPendingSegments(task, toolbox); | ||
| registerUpgradedPendingSegmentsOnSupervisor(task, toolbox); | ||
| } | ||
| catch (Exception e) { | ||
| log.error(e, "Error while upgrading pending segments for task[%s]", task.getId()); | ||
|
|
@@ -134,34 +151,55 @@ public SegmentPublishResult perform(Task task, TaskActionToolbox toolbox) | |
| } | ||
|
|
||
| /** | ||
| * Tries to upgrade any pending segments that overlap with the committed segments. | ||
| * Registers upgraded pending segments on the active supervisor, if any | ||
| */ | ||
| private void tryUpgradeOverlappingPendingSegments(Task task, TaskActionToolbox toolbox) | ||
| private void registerUpgradedPendingSegmentsOnSupervisor(Task task, TaskActionToolbox toolbox) | ||
| { | ||
| final SupervisorManager supervisorManager = toolbox.getSupervisorManager(); | ||
|
AmatyaAvadhanula marked this conversation as resolved.
|
||
| final Optional<String> activeSupervisorIdWithAppendLock = | ||
| supervisorManager.getActiveSupervisorIdForDatasourceWithAppendLock(task.getDataSource()); | ||
|
|
||
| if (!activeSupervisorIdWithAppendLock.isPresent()) { | ||
| return; | ||
| } | ||
|
|
||
| final Set<String> activeRealtimeSequencePrefixes | ||
| = supervisorManager.getActiveRealtimeSequencePrefixes(activeSupervisorIdWithAppendLock.get()); | ||
| Map<SegmentIdWithShardSpec, SegmentIdWithShardSpec> upgradedPendingSegments = | ||
| toolbox.getIndexerMetadataStorageCoordinator() | ||
| .upgradePendingSegmentsOverlappingWith(segments, activeRealtimeSequencePrefixes); | ||
| log.info( | ||
| "Upgraded [%d] pending segments for REPLACE task[%s]: [%s]", | ||
| upgradedPendingSegments.size(), task.getId(), upgradedPendingSegments | ||
| ); | ||
| final Set<ReplaceTaskLock> replaceLocksForTask = toolbox | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Some comments here would be helpful. |
||
| .getTaskLockbox() | ||
| .getAllReplaceLocksForDatasource(task.getDataSource()) | ||
| .stream() | ||
| .filter(lock -> task.getId().equals(lock.getSupervisorTaskId())) | ||
| .collect(Collectors.toSet()); | ||
|
|
||
|
|
||
| upgradedPendingSegments.forEach( | ||
| (oldId, newId) -> toolbox.getSupervisorManager() | ||
| .registerNewVersionOfPendingSegmentOnSupervisor( | ||
| activeSupervisorIdWithAppendLock.get(), | ||
| oldId, | ||
| newId | ||
| ) | ||
| Set<PendingSegmentRecord> pendingSegments = new HashSet<>(); | ||
| for (ReplaceTaskLock replaceLock : replaceLocksForTask) { | ||
| pendingSegments.addAll( | ||
| toolbox.getIndexerMetadataStorageCoordinator() | ||
| .getPendingSegments(task.getDataSource(), replaceLock.getInterval()) | ||
| ); | ||
| } | ||
| Map<String, SegmentIdWithShardSpec> idToPendingSegment = new HashMap<>(); | ||
| pendingSegments.forEach(pendingSegment -> idToPendingSegment.put( | ||
| pendingSegment.getId().asSegmentId().toString(), | ||
| pendingSegment.getId() | ||
| )); | ||
| Map<SegmentIdWithShardSpec, SegmentIdWithShardSpec> segmentToParent = new HashMap<>(); | ||
| pendingSegments.forEach(pendingSegment -> { | ||
| if (pendingSegment.getUpgradedFromSegmentId() != null | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we only look at pending segments that were upgraded by this task rather than all upgraded pending segments? |
||
| && !pendingSegment.getUpgradedFromSegmentId().equals(pendingSegment.getId().asSegmentId().toString())) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can the |
||
| segmentToParent.put( | ||
| pendingSegment.getId(), | ||
| idToPendingSegment.get(pendingSegment.getUpgradedFromSegmentId()) | ||
| ); | ||
| } | ||
| }); | ||
|
|
||
| segmentToParent.forEach( | ||
| (newId, oldId) -> supervisorManager.registerNewVersionOfPendingSegmentOnSupervisor( | ||
| activeSupervisorIdWithAppendLock.get(), | ||
| oldId, | ||
| newId | ||
| ) | ||
| ); | ||
| } | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This class need not implement
PendingSegmentAllocatingTaskas it never actually does any allocation. The allocation is always done by the controller task.this can be addressed in a follow up PR.