From b49d7f98feebfa492c1be6b629ecda106061c86c Mon Sep 17 00:00:00 2001 From: Abhishek Balaji Radhakrishnan Date: Tue, 27 Feb 2024 17:47:59 +0530 Subject: [PATCH 01/15] Kill task version support. Kill tasks by default kill all versions of unused segments in the specified interval. Users wanting to delete specific versions (for example, data compliance reasons) and keep rest of the versions can specify the optional version in the kill task payload. --- docs/data-management/delete.md | 2 + ...tadataStoreBasedUsedSegmentsRetriever.java | 2 +- .../actions/RetrieveUnusedSegmentsAction.java | 15 +- .../actions/RetrieveUsedSegmentsAction.java | 33 +- .../indexing/common/task/ArchiveTask.java | 2 +- .../common/task/KillUnusedSegmentsTask.java | 33 +- .../druid/indexing/common/task/MoveTask.java | 2 +- .../indexing/common/task/RestoreTask.java | 2 +- ...rlordActionBasedUsedSegmentsRetriever.java | 2 +- .../actions/RetrieveSegmentsActionsTest.java | 67 ++- .../RetrieveUsedSegmentsActionSerdeTest.java | 22 +- ...tKillUnusedSegmentsTaskQuerySerdeTest.java | 12 +- .../common/task/CompactionTaskRunTest.java | 24 +- .../task/KillUnusedSegmentsTaskTest.java | 414 ++++++++++++++++-- ...stractParallelIndexSupervisorTaskTest.java | 2 +- .../ConcurrentReplaceAndAppendTest.java | 1 + .../indexing/overlord/TaskLifecycleTest.java | 8 +- .../overlord/http/OverlordResourceTest.java | 2 +- ...TestIndexerMetadataStorageCoordinator.java | 4 +- .../ClientKillUnusedSegmentsTaskQuery.java | 13 +- .../IndexerMetadataStorageCoordinator.java | 24 +- .../IndexerSQLMetadataStorageCoordinator.java | 32 +- .../metadata/SqlSegmentsMetadataManager.java | 8 +- .../metadata/SqlSegmentsMetadataQuery.java | 59 ++- .../druid/rpc/indexing/OverlordClient.java | 2 + .../coordinator/duty/KillUnusedSegments.java | 1 + .../server/http/DataSourcesResource.java | 2 +- .../druid/server/http/MetadataResource.java | 2 +- ...ClientKillUnusedSegmentsTaskQueryTest.java | 1 + ...exerSQLMetadataStorageCoordinatorTest.java | 60 +-- .../rpc/indexing/OverlordClientImplTest.java | 1 + .../duty/KillUnusedSegmentsTest.java | 1 + .../server/http/DataSourcesResourceTest.java | 2 +- 33 files changed, 678 insertions(+), 179 deletions(-) diff --git a/docs/data-management/delete.md b/docs/data-management/delete.md index fccb14007b94..d319acc77b6b 100644 --- a/docs/data-management/delete.md +++ b/docs/data-management/delete.md @@ -95,6 +95,7 @@ The available grammar is: "id": , "dataSource": , "interval" : , + "version" : , "context": , "batchSize": , "limit": , @@ -106,6 +107,7 @@ Some of the parameters used in the task payload are further explained below: | Parameter | Default | Explanation | |-------------|-----------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `version` | null (all versions) | Version of segments in the interval for the kill task to delete. The default behavior is to delete all unused segment versions in the specified `interval`.| | `batchSize` |100 | Maximum number of segments that are deleted in one kill batch. Some operations on the Overlord may get stuck while a `kill` task is in progress due to concurrency constraints (such as in `TaskLockbox`). Thus, a `kill` task splits the list of unused segments to be deleted into smaller batches to yield the Overlord resources intermittently to other task operations.| | `limit` | null (no limit) | Maximum number of segments for the kill task to delete.| | `maxUsedStatusLastUpdatedTime` | null (no cutoff) | Maximum timestamp used as a cutoff to include unused segments. The kill task only considers segments which lie in the specified `interval` and were marked as unused no later than this time. The default behavior is to kill all unused segments in the `interval` regardless of when they where marked as unused.| diff --git a/indexing-hadoop/src/main/java/org/apache/druid/indexer/path/MetadataStoreBasedUsedSegmentsRetriever.java b/indexing-hadoop/src/main/java/org/apache/druid/indexer/path/MetadataStoreBasedUsedSegmentsRetriever.java index e1afb4cb6b62..df9e3297ee57 100644 --- a/indexing-hadoop/src/main/java/org/apache/druid/indexer/path/MetadataStoreBasedUsedSegmentsRetriever.java +++ b/indexing-hadoop/src/main/java/org/apache/druid/indexer/path/MetadataStoreBasedUsedSegmentsRetriever.java @@ -51,6 +51,6 @@ public Collection retrieveUsedSegmentsForIntervals( Segments visibility ) { - return indexerMetadataStorageCoordinator.retrieveUsedSegmentsForIntervals(dataSource, intervals, visibility); + return indexerMetadataStorageCoordinator.retrieveUsedSegmentsForIntervals(dataSource, intervals, null, visibility); } } diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/RetrieveUnusedSegmentsAction.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/RetrieveUnusedSegmentsAction.java index bb188952966a..ab1a0bef4a8b 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/RetrieveUnusedSegmentsAction.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/RetrieveUnusedSegmentsAction.java @@ -40,6 +40,9 @@ public class RetrieveUnusedSegmentsAction implements TaskAction> getReturnTypeReference() public List perform(Task task, TaskActionToolbox toolbox) { return toolbox.getIndexerMetadataStorageCoordinator() - .retrieveUnusedSegmentsForInterval(dataSource, interval, limit, maxUsedStatusLastUpdatedTime); + .retrieveUnusedSegmentsForInterval(dataSource, interval, version, limit, maxUsedStatusLastUpdatedTime); } @Override @@ -111,6 +123,7 @@ public String toString() return getClass().getSimpleName() + "{" + "dataSource='" + dataSource + '\'' + ", interval=" + interval + + ", version=" + version + ", limit=" + limit + ", maxUsedStatusLastUpdatedTime=" + maxUsedStatusLastUpdatedTime + '}'; diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/RetrieveUsedSegmentsAction.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/RetrieveUsedSegmentsAction.java index 29986eeba555..5b6016c20165 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/RetrieveUsedSegmentsAction.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/RetrieveUsedSegmentsAction.java @@ -73,6 +73,9 @@ public class RetrieveUsedSegmentsAction implements TaskAction intervals; + @JsonIgnore + private final String version; + @JsonIgnore private final Segments visibility; @@ -81,6 +84,7 @@ public RetrieveUsedSegmentsAction( @JsonProperty("dataSource") String dataSource, @Deprecated @JsonProperty("interval") Interval interval, @JsonProperty("intervals") Collection intervals, + @JsonProperty("version") @Nullable String version, // When JSON object is deserialized, this parameter is optional for backward compatibility. // Otherwise, it shouldn't be considered optional. @JsonProperty("visibility") @Nullable Segments visibility @@ -100,14 +104,14 @@ public RetrieveUsedSegmentsAction( theIntervals = JodaUtils.condenseIntervals(intervals); } this.intervals = Preconditions.checkNotNull(theIntervals, "no intervals found"); - + this.version = version; // Defaulting to the former behaviour when visibility wasn't explicitly specified for backward compatibility this.visibility = visibility != null ? visibility : Segments.ONLY_VISIBLE; } public RetrieveUsedSegmentsAction(String dataSource, Collection intervals) { - this(dataSource, null, intervals, Segments.ONLY_VISIBLE); + this(dataSource, null, intervals, null, Segments.ONLY_VISIBLE); } @JsonProperty @@ -122,6 +126,13 @@ public List getIntervals() return intervals; } + @Nullable + @JsonProperty + public String getVersion() + { + return version; + } + @JsonProperty public Segments getVisibility() { @@ -207,7 +218,7 @@ public Collection perform(Task task, TaskActionToolbox toolbox) private Collection retrieveUsedSegments(TaskActionToolbox toolbox) { return toolbox.getIndexerMetadataStorageCoordinator() - .retrieveUsedSegmentsForIntervals(dataSource, intervals, visibility); + .retrieveUsedSegmentsForIntervals(dataSource, intervals, version, visibility); } @Override @@ -225,22 +236,17 @@ public boolean equals(Object o) if (o == null || getClass() != o.getClass()) { return false; } - RetrieveUsedSegmentsAction that = (RetrieveUsedSegmentsAction) o; - - if (!dataSource.equals(that.dataSource)) { - return false; - } - if (!intervals.equals(that.intervals)) { - return false; - } - return visibility.equals(that.visibility); + return Objects.equals(dataSource, that.dataSource) + && Objects.equals(intervals, that.intervals) + && Objects.equals(version, that.version) + && visibility == that.visibility; } @Override public int hashCode() { - return Objects.hash(dataSource, intervals, visibility); + return Objects.hash(dataSource, intervals, version, visibility); } @Override @@ -249,6 +255,7 @@ public String toString() return getClass().getSimpleName() + "{" + "dataSource='" + dataSource + '\'' + ", intervals=" + intervals + + ", version=" + version + ", visibility=" + visibility + '}'; } diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/ArchiveTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/ArchiveTask.java index 957317ad93cd..9104e2cfb05b 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/ArchiveTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/ArchiveTask.java @@ -79,7 +79,7 @@ public TaskStatus runTask(TaskToolbox toolbox) throws Exception // List unused segments final List unusedSegments = toolbox .getTaskActionClient() - .submit(new RetrieveUnusedSegmentsAction(myLock.getDataSource(), myLock.getInterval(), null, null)); + .submit(new RetrieveUnusedSegmentsAction(myLock.getDataSource(), myLock.getInterval(), null, null, null)); // Verify none of these segments have versions > lock version for (final DataSegment unusedSegment : unusedSegments) { diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java index cbf4a84ba790..9a997bac808f 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java @@ -102,11 +102,17 @@ public class KillUnusedSegmentsTask extends AbstractFixedIntervalTask */ @Nullable private final DateTime maxUsedStatusLastUpdatedTime; + /** + * The version of segments to delete in this {@link #getInterval()}. + */ + @Nullable private final String version; + @JsonCreator public KillUnusedSegmentsTask( @JsonProperty("id") String id, @JsonProperty("dataSource") String dataSource, @JsonProperty("interval") Interval interval, + @JsonProperty("version") @Nullable String version, @JsonProperty("context") Map context, @JsonProperty("markAsUnused") @Deprecated Boolean markAsUnused, @JsonProperty("batchSize") Integer batchSize, @@ -131,6 +137,10 @@ public KillUnusedSegmentsTask( if (limit != null && Boolean.TRUE.equals(markAsUnused)) { throw InvalidInput.exception("limit[%d] cannot be provided when markAsUnused is enabled.", limit); } + if (version != null && Boolean.TRUE.equals(markAsUnused)) { + throw InvalidInput.exception("version[%s] cannot be provided when markAsUnused is enabled.", version); + } + this.version = version; this.limit = limit; this.maxUsedStatusLastUpdatedTime = maxUsedStatusLastUpdatedTime; } @@ -157,6 +167,13 @@ public int getBatchSize() return batchSize; } + @Nullable + @JsonProperty + public String getVersion() + { + return version; + } + @Nullable @JsonProperty public Integer getLimit() @@ -207,13 +224,9 @@ public TaskStatus runTask(TaskToolbox toolbox) throws Exception @Nullable Integer numTotalBatches = getNumTotalBatches(); List unusedSegments; LOG.info( - "Starting kill for datasource[%s] in interval[%s] with batchSize[%d], up to limit[%d] segments " - + "before maxUsedStatusLastUpdatedTime[%s] will be deleted%s", - getDataSource(), - getInterval(), - batchSize, - limit, - maxUsedStatusLastUpdatedTime, + "Starting kill for datasource[%s] in interval[%s] and version[%s] with batchSize[%d], up to limit[%d]" + + " segments before maxUsedStatusLastUpdatedTime[%s] will be deleted%s", + getDataSource(), getInterval(), getVersion(), batchSize, limit, maxUsedStatusLastUpdatedTime, numTotalBatches != null ? StringUtils.format(" in [%d] batches.", numTotalBatches) : "." ); @@ -221,6 +234,7 @@ public TaskStatus runTask(TaskToolbox toolbox) throws Exception getDataSource(), null, ImmutableList.of(getInterval()), + getVersion(), Segments.INCLUDING_OVERSHADOWED ); // Fetch the load specs of all segments overlapping with the unused segment intervals @@ -239,8 +253,8 @@ public TaskStatus runTask(TaskToolbox toolbox) throws Exception unusedSegments = toolbox .getTaskActionClient() .submit( - new RetrieveUnusedSegmentsAction(getDataSource(), getInterval(), nextBatchSize, maxUsedStatusLastUpdatedTime - )); + new RetrieveUnusedSegmentsAction(getDataSource(), getInterval(), getVersion(), nextBatchSize, maxUsedStatusLastUpdatedTime + )); // Fetch locks each time as a revokal could have occurred in between batches final NavigableMap> taskLockMap @@ -263,7 +277,6 @@ public TaskStatus runTask(TaskToolbox toolbox) throws Exception toolbox.getTaskActionClient().submit(new SegmentNukeAction(new HashSet<>(unusedSegments))); - // Kill segments from the deep storage only if their load specs are not being used by any used segments final List segmentsToBeKilled = unusedSegments .stream() diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/MoveTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/MoveTask.java index bea3685ca244..e02af66e0465 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/MoveTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/MoveTask.java @@ -87,7 +87,7 @@ public TaskStatus runTask(TaskToolbox toolbox) throws Exception // List unused segments final List unusedSegments = toolbox .getTaskActionClient() - .submit(new RetrieveUnusedSegmentsAction(myLock.getDataSource(), myLock.getInterval(), null, null)); + .submit(new RetrieveUnusedSegmentsAction(myLock.getDataSource(), myLock.getInterval(), null, null, null)); // Verify none of these segments have versions > lock version for (final DataSegment unusedSegment : unusedSegments) { diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/RestoreTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/RestoreTask.java index cc635383fed5..e3a0ecf633ee 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/RestoreTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/RestoreTask.java @@ -80,7 +80,7 @@ public TaskStatus runTask(TaskToolbox toolbox) throws Exception // List unused segments final List unusedSegments = toolbox .getTaskActionClient() - .submit(new RetrieveUnusedSegmentsAction(myLock.getDataSource(), myLock.getInterval(), null, null)); + .submit(new RetrieveUnusedSegmentsAction(myLock.getDataSource(), myLock.getInterval(), null, null, null)); // Verify none of these segments have versions > lock version for (final DataSegment unusedSegment : unusedSegments) { diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/hadoop/OverlordActionBasedUsedSegmentsRetriever.java b/indexing-service/src/main/java/org/apache/druid/indexing/hadoop/OverlordActionBasedUsedSegmentsRetriever.java index 73bc411fb01d..3846eb10a19b 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/hadoop/OverlordActionBasedUsedSegmentsRetriever.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/hadoop/OverlordActionBasedUsedSegmentsRetriever.java @@ -53,6 +53,6 @@ public Collection retrieveUsedSegmentsForIntervals( { return toolbox .getTaskActionClient() - .submit(new RetrieveUsedSegmentsAction(dataSource, null, intervals, visibility)); + .submit(new RetrieveUsedSegmentsAction(dataSource, null, intervals, null, visibility)); } } diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/RetrieveSegmentsActionsTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/RetrieveSegmentsActionsTest.java index 2dee594aad68..5d80a4746445 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/RetrieveSegmentsActionsTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/RetrieveSegmentsActionsTest.java @@ -23,6 +23,7 @@ import com.google.common.collect.ImmutableSet; import org.apache.druid.indexing.common.task.NoopTask; import org.apache.druid.indexing.common.task.Task; +import org.apache.druid.indexing.overlord.Segments; import org.apache.druid.java.util.common.DateTimes; import org.apache.druid.java.util.common.Intervals; import org.apache.druid.timeline.DataSegment; @@ -40,6 +41,8 @@ public class RetrieveSegmentsActionsTest { private static final Interval INTERVAL = Intervals.of("2017-10-01/2017-10-15"); + private static final String VERSION_1 = "1"; + private static final String VERSION_2 = "2"; @ClassRule public static TaskActionTestKit actionTestKit = new TaskActionTestKit(); @@ -56,9 +59,9 @@ public static void setup() throws IOException actionTestKit.getTaskLockbox().add(task); expectedUnusedSegments = new HashSet<>(); - expectedUnusedSegments.add(createSegment(Intervals.of("2017-10-05/2017-10-06"), "1")); - expectedUnusedSegments.add(createSegment(Intervals.of("2017-10-06/2017-10-07"), "1")); - expectedUnusedSegments.add(createSegment(Intervals.of("2017-10-07/2017-10-08"), "1")); + expectedUnusedSegments.add(createSegment(Intervals.of("2017-10-05/2017-10-06"), VERSION_1)); + expectedUnusedSegments.add(createSegment(Intervals.of("2017-10-06/2017-10-07"), VERSION_1)); + expectedUnusedSegments.add(createSegment(Intervals.of("2017-10-07/2017-10-08"), VERSION_1)); actionTestKit.getMetadataStorageCoordinator() .commitSegments(expectedUnusedSegments); @@ -66,9 +69,9 @@ public static void setup() throws IOException expectedUnusedSegments.forEach(s -> actionTestKit.getTaskLockbox().unlock(task, s.getInterval())); expectedUsedSegments = new HashSet<>(); - expectedUsedSegments.add(createSegment(Intervals.of("2017-10-05/2017-10-06"), "2")); - expectedUsedSegments.add(createSegment(Intervals.of("2017-10-06/2017-10-07"), "2")); - expectedUsedSegments.add(createSegment(Intervals.of("2017-10-07/2017-10-08"), "2")); + expectedUsedSegments.add(createSegment(Intervals.of("2017-10-05/2017-10-06"), VERSION_2)); + expectedUsedSegments.add(createSegment(Intervals.of("2017-10-06/2017-10-07"), VERSION_2)); + expectedUsedSegments.add(createSegment(Intervals.of("2017-10-07/2017-10-08"), VERSION_2)); actionTestKit.getMetadataStorageCoordinator() .commitSegments(expectedUsedSegments); @@ -103,9 +106,45 @@ public void testRetrieveUsedSegmentsAction() } @Test - public void testRetrieveUnusedSegmentsAction() + public void testRetrieveUsedSegmentsActionWithValidVersionIncludingOvershadowed() { - final RetrieveUnusedSegmentsAction action = new RetrieveUnusedSegmentsAction(task.getDataSource(), INTERVAL, null, null); + final RetrieveUsedSegmentsAction action = + new RetrieveUsedSegmentsAction(task.getDataSource(), null, ImmutableList.of(INTERVAL), VERSION_2, Segments.INCLUDING_OVERSHADOWED); + final Set resultSegments = new HashSet<>(action.perform(task, actionTestKit.getTaskActionToolbox())); + Assert.assertEquals(expectedUsedSegments, resultSegments); + } + + @Test + public void testRetrieveUsedSegmentsActionWithNonExistentVersionVisibleOnly() + { + final RetrieveUsedSegmentsAction action = + new RetrieveUsedSegmentsAction(task.getDataSource(), null, ImmutableList.of(INTERVAL), VERSION_1, Segments.ONLY_VISIBLE); + final Set resultSegments = new HashSet<>(action.perform(task, actionTestKit.getTaskActionToolbox())); + Assert.assertEquals(ImmutableSet.of(), resultSegments); + } + + @Test + public void testRetrieveUsedSegmentsActionWithValidVersionVisibleOnly() + { + final RetrieveUsedSegmentsAction action = + new RetrieveUsedSegmentsAction(task.getDataSource(), null, ImmutableList.of(INTERVAL), VERSION_2, Segments.ONLY_VISIBLE); + final Set resultSegments = new HashSet<>(action.perform(task, actionTestKit.getTaskActionToolbox())); + Assert.assertEquals(expectedUsedSegments, resultSegments); + } + + @Test + public void testRetrieveUsedSegmentsActionWithNonExistentVersionIncludingOvershadowed() + { + final RetrieveUsedSegmentsAction action = + new RetrieveUsedSegmentsAction(task.getDataSource(), null, ImmutableList.of(INTERVAL), VERSION_1, Segments.INCLUDING_OVERSHADOWED); + final Set resultSegments = new HashSet<>(action.perform(task, actionTestKit.getTaskActionToolbox())); + Assert.assertEquals(ImmutableSet.of(), resultSegments); + } + + @Test + public void testRetrieveUnusedSegmentsActionWithValidVersion() + { + final RetrieveUnusedSegmentsAction action = new RetrieveUnusedSegmentsAction(task.getDataSource(), INTERVAL, null, null, null); final Set resultSegments = new HashSet<>(action.perform(task, actionTestKit.getTaskActionToolbox())); Assert.assertEquals(expectedUnusedSegments, resultSegments); } @@ -113,15 +152,23 @@ public void testRetrieveUnusedSegmentsAction() @Test public void testRetrieveUnusedSegmentsActionWithMinUsedLastUpdatedTime() { - final RetrieveUnusedSegmentsAction action = new RetrieveUnusedSegmentsAction(task.getDataSource(), INTERVAL, null, DateTimes.MIN); + final RetrieveUnusedSegmentsAction action = new RetrieveUnusedSegmentsAction(task.getDataSource(), INTERVAL, null, null, DateTimes.MIN); final Set resultSegments = new HashSet<>(action.perform(task, actionTestKit.getTaskActionToolbox())); Assert.assertEquals(ImmutableSet.of(), resultSegments); } + @Test + public void testRetrieveUnusedSegmentsActionWithVersion() + { + final RetrieveUnusedSegmentsAction action = new RetrieveUnusedSegmentsAction(task.getDataSource(), INTERVAL, VERSION_1, null, null); + final Set resultSegments = new HashSet<>(action.perform(task, actionTestKit.getTaskActionToolbox())); + Assert.assertEquals(expectedUnusedSegments, resultSegments); + } + @Test public void testRetrieveUnusedSegmentsActionWithNowUsedLastUpdatedTime() { - final RetrieveUnusedSegmentsAction action = new RetrieveUnusedSegmentsAction(task.getDataSource(), INTERVAL, null, DateTimes.nowUtc()); + final RetrieveUnusedSegmentsAction action = new RetrieveUnusedSegmentsAction(task.getDataSource(), INTERVAL, null, null, DateTimes.nowUtc()); final Set resultSegments = new HashSet<>(action.perform(task, actionTestKit.getTaskActionToolbox())); Assert.assertEquals(expectedUnusedSegments, resultSegments); } diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/RetrieveUsedSegmentsActionSerdeTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/RetrieveUsedSegmentsActionSerdeTest.java index 99675fd57bb1..04ce80f3d9ec 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/RetrieveUsedSegmentsActionSerdeTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/RetrieveUsedSegmentsActionSerdeTest.java @@ -22,6 +22,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.ImmutableList; import org.apache.druid.indexing.overlord.Segments; +import org.apache.druid.java.util.common.DateTimes; import org.apache.druid.java.util.common.Intervals; import org.apache.druid.segment.TestHelper; import org.joda.time.Interval; @@ -42,7 +43,7 @@ public void testSingleIntervalSerde() throws Exception Interval interval = Intervals.of("2014/2015"); RetrieveUsedSegmentsAction expected = - new RetrieveUsedSegmentsAction("dataSource", interval, null, Segments.ONLY_VISIBLE); + new RetrieveUsedSegmentsAction("dataSource", interval, null, null, Segments.ONLY_VISIBLE); RetrieveUsedSegmentsAction actual = MAPPER.readValue(MAPPER.writeValueAsString(expected), RetrieveUsedSegmentsAction.class); @@ -65,6 +66,23 @@ public void testMultiIntervalSerde() throws Exception Assert.assertEquals(expected, actual); } + @Test + public void testMultiIntervalWithVersionSerde() throws Exception + { + List intervals = ImmutableList.of(Intervals.of("2014/2015"), Intervals.of("2016/2017")); + RetrieveUsedSegmentsAction expected = new RetrieveUsedSegmentsAction( + "dataSource", + null, + intervals, + DateTimes.nowUtc().toString(), + Segments.ONLY_VISIBLE + ); + + RetrieveUsedSegmentsAction actual = + MAPPER.readValue(MAPPER.writeValueAsString(expected), RetrieveUsedSegmentsAction.class); + Assert.assertEquals(expected, actual); + } + @Test public void testOldJsonDeserialization() throws Exception { @@ -72,7 +90,7 @@ public void testOldJsonDeserialization() throws Exception RetrieveUsedSegmentsAction actual = (RetrieveUsedSegmentsAction) MAPPER.readValue(jsonStr, TaskAction.class); Assert.assertEquals( - new RetrieveUsedSegmentsAction("test", Intervals.of("2014/2015"), null, Segments.ONLY_VISIBLE), + new RetrieveUsedSegmentsAction("test", Intervals.of("2014/2015"), null, null, Segments.ONLY_VISIBLE), actual ); } diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/ClientKillUnusedSegmentsTaskQuerySerdeTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/ClientKillUnusedSegmentsTaskQuerySerdeTest.java index 7e7c9088d61f..9ffe6c8d11a3 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/ClientKillUnusedSegmentsTaskQuerySerdeTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/ClientKillUnusedSegmentsTaskQuerySerdeTest.java @@ -52,6 +52,7 @@ public void testClientKillUnusedSegmentsTaskQueryToKillUnusedSegmentsTask() thro "killTaskId", "datasource", Intervals.of("2020-01-01/P1D"), + null, false, 99, 5, @@ -62,6 +63,7 @@ public void testClientKillUnusedSegmentsTaskQueryToKillUnusedSegmentsTask() thro Assert.assertEquals(taskQuery.getId(), fromJson.getId()); Assert.assertEquals(taskQuery.getDataSource(), fromJson.getDataSource()); Assert.assertEquals(taskQuery.getInterval(), fromJson.getInterval()); + Assert.assertNull(taskQuery.getVersion()); Assert.assertEquals(taskQuery.getMarkAsUnused(), fromJson.isMarkAsUnused()); Assert.assertEquals(taskQuery.getBatchSize(), Integer.valueOf(fromJson.getBatchSize())); Assert.assertEquals(taskQuery.getLimit(), fromJson.getLimit()); @@ -75,6 +77,7 @@ public void testClientKillUnusedSegmentsTaskQueryToKillUnusedSegmentsTaskDefault "killTaskId", "datasource", Intervals.of("2020-01-01/P1D"), + null, true, null, null, @@ -85,6 +88,7 @@ public void testClientKillUnusedSegmentsTaskQueryToKillUnusedSegmentsTaskDefault Assert.assertEquals(taskQuery.getId(), fromJson.getId()); Assert.assertEquals(taskQuery.getDataSource(), fromJson.getDataSource()); Assert.assertEquals(taskQuery.getInterval(), fromJson.getInterval()); + Assert.assertNull(taskQuery.getVersion()); Assert.assertEquals(taskQuery.getMarkAsUnused(), fromJson.isMarkAsUnused()); Assert.assertEquals(100, fromJson.getBatchSize()); Assert.assertNull(taskQuery.getLimit()); @@ -99,6 +103,7 @@ public void testKillUnusedSegmentsTaskToClientKillUnusedSegmentsTaskQuery() thro "datasource", Intervals.of("2020-01-01/P1D"), null, + null, true, 99, null, @@ -112,10 +117,11 @@ public void testKillUnusedSegmentsTaskToClientKillUnusedSegmentsTaskQuery() thro Assert.assertEquals(task.getId(), taskQuery.getId()); Assert.assertEquals(task.getDataSource(), taskQuery.getDataSource()); Assert.assertEquals(task.getInterval(), taskQuery.getInterval()); + Assert.assertNull(taskQuery.getVersion()); Assert.assertEquals(task.isMarkAsUnused(), taskQuery.getMarkAsUnused()); Assert.assertEquals(Integer.valueOf(task.getBatchSize()), taskQuery.getBatchSize()); - Assert.assertNull(task.getLimit()); - Assert.assertNull(task.getMaxUsedStatusLastUpdatedTime()); + Assert.assertNull(taskQuery.getLimit()); + Assert.assertNull(taskQuery.getMaxUsedStatusLastUpdatedTime()); } @Test @@ -125,6 +131,7 @@ public void testKillUnusedSegmentsTaskWithNonNullValuesToClientKillUnusedSegment null, "datasource", Intervals.of("2020-01-01/P1D"), + DateTimes.nowUtc().toString(), null, null, 99, @@ -139,6 +146,7 @@ public void testKillUnusedSegmentsTaskWithNonNullValuesToClientKillUnusedSegment Assert.assertEquals(task.getId(), taskQuery.getId()); Assert.assertEquals(task.getDataSource(), taskQuery.getDataSource()); Assert.assertEquals(task.getInterval(), taskQuery.getInterval()); + Assert.assertEquals(task.getVersion(), taskQuery.getVersion()); Assert.assertNull(taskQuery.getMarkAsUnused()); Assert.assertEquals(Integer.valueOf(task.getBatchSize()), taskQuery.getBatchSize()); Assert.assertEquals(task.getLimit(), taskQuery.getLimit()); diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskRunTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskRunTest.java index 03994e35e518..0bda569ac0d0 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskRunTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskRunTest.java @@ -196,7 +196,7 @@ public ListenableFuture> fetchUsedSegments( { return Futures.immediateFuture( ImmutableList.copyOf( - getStorageCoordinator().retrieveUsedSegmentsForIntervals(dataSource, intervals, Segments.ONLY_VISIBLE) + getStorageCoordinator().retrieveUsedSegmentsForIntervals(dataSource, intervals, null, Segments.ONLY_VISIBLE) ) ); } @@ -1119,7 +1119,7 @@ public void testCompactThenAppend() throws Exception getStorageCoordinator().retrieveUsedSegmentsForIntervals( DATA_SOURCE, Collections.singletonList(Intervals.of("2014-01-01/2014-01-02")), - Segments.ONLY_VISIBLE + null, Segments.ONLY_VISIBLE ) ); @@ -1185,7 +1185,7 @@ public void testPartialIntervalCompactWithFinerSegmentGranularityThenFullInterva getStorageCoordinator().retrieveUsedSegmentsForIntervals( DATA_SOURCE, Collections.singletonList(Intervals.of("2014-01-01T00:00:00/2014-01-01T01:00:00")), - Segments.ONLY_VISIBLE + null, Segments.ONLY_VISIBLE ) ); // add 2 unchanged segments for hour 02: @@ -1193,7 +1193,7 @@ public void testPartialIntervalCompactWithFinerSegmentGranularityThenFullInterva getStorageCoordinator().retrieveUsedSegmentsForIntervals( DATA_SOURCE, Collections.singletonList(Intervals.of("2014-01-01T02:00:00/2014-01-01T03:00:00")), - Segments.ONLY_VISIBLE + null, Segments.ONLY_VISIBLE ) ); expectedSegments.addAll(partialCompactionResult.rhs); @@ -1205,7 +1205,7 @@ public void testPartialIntervalCompactWithFinerSegmentGranularityThenFullInterva getStorageCoordinator().retrieveUsedSegmentsForIntervals( DATA_SOURCE, Collections.singletonList(Intervals.of("2014-01-01/2014-01-02")), - Segments.ONLY_VISIBLE + null, Segments.ONLY_VISIBLE ) ); Assert.assertEquals(expectedSegments, segmentsAfterPartialCompaction); @@ -1246,7 +1246,7 @@ public void testPartialIntervalCompactWithFinerSegmentGranularityThenFullInterva getStorageCoordinator().retrieveUsedSegmentsForIntervals( DATA_SOURCE, Collections.singletonList(Intervals.of("2014-01-01/2014-01-02")), - Segments.ONLY_VISIBLE + null, Segments.ONLY_VISIBLE ) ); segmentsAfterFullCompaction.sort( @@ -1341,7 +1341,7 @@ public void testCompactDatasourceOverIntervalWithOnlyTombstones() throws Excepti getStorageCoordinator().retrieveUsedSegmentsForIntervals( DATA_SOURCE, Collections.singletonList(Intervals.of("2014-01-01T00:00:00/2014-01-01T01:00:00")), - Segments.ONLY_VISIBLE + null, Segments.ONLY_VISIBLE ) ); // add 2 unchanged segments for hour 02: @@ -1349,7 +1349,7 @@ public void testCompactDatasourceOverIntervalWithOnlyTombstones() throws Excepti getStorageCoordinator().retrieveUsedSegmentsForIntervals( DATA_SOURCE, Collections.singletonList(Intervals.of("2014-01-01T02:00:00/2014-01-01T03:00:00")), - Segments.ONLY_VISIBLE + null, Segments.ONLY_VISIBLE ) ); expectedSegments.addAll(partialCompactionResult.rhs); @@ -1361,7 +1361,7 @@ public void testCompactDatasourceOverIntervalWithOnlyTombstones() throws Excepti getStorageCoordinator().retrieveUsedSegmentsForIntervals( DATA_SOURCE, Collections.singletonList(Intervals.of("2014-01-01/2014-01-02")), - Segments.ONLY_VISIBLE + null, Segments.ONLY_VISIBLE ) ); Assert.assertEquals(expectedSegments, segmentsAfterPartialCompaction); @@ -1410,7 +1410,7 @@ public void testPartialIntervalCompactWithFinerSegmentGranularityThenFullInterva getStorageCoordinator().retrieveUsedSegmentsForIntervals( DATA_SOURCE, Collections.singletonList(Intervals.of("2014-01-01/2014-01-02")), - Segments.ONLY_VISIBLE + null, Segments.ONLY_VISIBLE ) ); @@ -1436,7 +1436,7 @@ public void testPartialIntervalCompactWithFinerSegmentGranularityThenFullInterva getStorageCoordinator().retrieveUsedSegmentsForIntervals( DATA_SOURCE, Collections.singletonList(Intervals.of("2014-01-01/2014-01-02")), - Segments.ONLY_VISIBLE + null, Segments.ONLY_VISIBLE ) ); @@ -1455,7 +1455,7 @@ public void testPartialIntervalCompactWithFinerSegmentGranularityThenFullInterva getStorageCoordinator().retrieveUsedSegmentsForIntervals( DATA_SOURCE, Collections.singletonList(Intervals.of("2014-01-01/2014-01-02")), - Segments.ONLY_VISIBLE + null, Segments.ONLY_VISIBLE ) ); segmentsAfterFullCompaction.sort( diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTaskTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTaskTest.java index e2c433536a2b..0dcac0480bbc 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTaskTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTaskTest.java @@ -41,7 +41,9 @@ import org.junit.Before; import org.junit.Test; +import java.util.Arrays; import java.util.Collections; +import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; @@ -89,7 +91,7 @@ public void testKill() throws Exception null, DATA_SOURCE, Intervals.of("2019-03-01/2019-04-01"), - null, + null, null, false, null, null, @@ -99,10 +101,10 @@ public void testKill() throws Exception Assert.assertEquals(TaskState.SUCCESS, taskRunner.run(task).get().getStatusCode()); final List unusedSegments = getMetadataStorageCoordinator().retrieveUnusedSegmentsForInterval( - DATA_SOURCE, - Intervals.of("2019/2020"), - null, - null + DATA_SOURCE, + Intervals.of("2019/2020"), + null, null, + null ); Assert.assertEquals(ImmutableList.of(newSegment(Intervals.of("2019-02-01/2019-03-01"), version)), unusedSegments); @@ -144,7 +146,7 @@ public void testKillWithMarkUnused() throws Exception null, DATA_SOURCE, Intervals.of("2019-03-01/2019-04-01"), - null, + null, null, true, null, null, @@ -157,7 +159,7 @@ public void testKillWithMarkUnused() throws Exception getMetadataStorageCoordinator().retrieveUnusedSegmentsForInterval( DATA_SOURCE, Intervals.of("2019/2020"), - null, + null, null, null ); @@ -181,7 +183,7 @@ public void testGetInputSourceResources() null, DATA_SOURCE, Intervals.of("2019-03-01/2019-04-01"), - null, + null, null, true, null, null, @@ -217,7 +219,7 @@ public void testKillBatchSizeOneAndLimit4() throws Exception null, DATA_SOURCE, Intervals.of("2018-01-01/2020-01-01"), - null, + null, null, false, 1, 4, @@ -232,7 +234,7 @@ public void testKillBatchSizeOneAndLimit4() throws Exception getMetadataStorageCoordinator().retrieveUnusedSegmentsForInterval( DATA_SOURCE, Intervals.of("2019/2020"), - null, + null, null, null ); @@ -295,7 +297,7 @@ public void testKillMultipleUnusedSegmentsWithNullMaxUsedStatusLastUpdatedTime() null, DATA_SOURCE, umbrellaInterval, - null, + null, null, false, 1, 10, @@ -309,6 +311,7 @@ public void testKillMultipleUnusedSegmentsWithNullMaxUsedStatusLastUpdatedTime() DATA_SOURCE, umbrellaInterval, null, + null, null ); @@ -387,7 +390,7 @@ public void testKillMultipleUnusedSegmentsWithDifferentMaxUsedStatusLastUpdatedT null, DATA_SOURCE, umbrellaInterval, - null, + null, null, false, 1, 10, @@ -401,6 +404,7 @@ public void testKillMultipleUnusedSegmentsWithDifferentMaxUsedStatusLastUpdatedT DATA_SOURCE, umbrellaInterval, null, + null, null ); @@ -412,7 +416,7 @@ public void testKillMultipleUnusedSegmentsWithDifferentMaxUsedStatusLastUpdatedT null, DATA_SOURCE, umbrellaInterval, - null, + null, null, false, 1, 10, @@ -425,7 +429,7 @@ public void testKillMultipleUnusedSegmentsWithDifferentMaxUsedStatusLastUpdatedT getMetadataStorageCoordinator().retrieveUnusedSegmentsForInterval( DATA_SOURCE, umbrellaInterval, - null, + null, null, null ); @@ -500,7 +504,7 @@ public void testKillMultipleUnusedSegmentsWithDifferentMaxUsedStatusLastUpdatedT null, DATA_SOURCE, umbrellaInterval, - null, + null, null, false, 1, 10, @@ -511,10 +515,10 @@ public void testKillMultipleUnusedSegmentsWithDifferentMaxUsedStatusLastUpdatedT final List unusedSegments = getMetadataStorageCoordinator().retrieveUnusedSegmentsForInterval( - DATA_SOURCE, - umbrellaInterval, - null, - null + DATA_SOURCE, + umbrellaInterval, + null, null, + null ); Assert.assertEquals(ImmutableList.of(segment2, segment3), unusedSegments); @@ -525,7 +529,7 @@ public void testKillMultipleUnusedSegmentsWithDifferentMaxUsedStatusLastUpdatedT null, DATA_SOURCE, umbrellaInterval, - null, + null, null, false, 1, 10, @@ -535,14 +539,115 @@ public void testKillMultipleUnusedSegmentsWithDifferentMaxUsedStatusLastUpdatedT Assert.assertEquals(TaskState.SUCCESS, taskRunner.run(task2).get().getStatusCode()); final List unusedSegments2 = getMetadataStorageCoordinator().retrieveUnusedSegmentsForInterval( + DATA_SOURCE, + umbrellaInterval, + null, null, + null + ); + + Assert.assertEquals(ImmutableList.of(), unusedSegments2); + Assert.assertEquals(new KillTaskReport.Stats(2, 3, 0), getReportedStats()); + } + + @Test + public void testKillMultipleUnusedSegmentsWithVersionAndDifferentMaxUsedStatusLastUpdatedTime() throws Exception + { + final DateTime version = DateTimes.nowUtc(); + final DataSegment segment1 = newSegment(Intervals.of("2019-01-01/2019-02-01"), version.toString()); + final DataSegment segment2 = newSegment(Intervals.of("2019-02-01/2019-03-01"), version.toString()); + final DataSegment segment3 = newSegment(Intervals.of("2019-03-01/2019-04-01"), version.toString()); + final DataSegment segment4 = newSegment(Intervals.of("2019-04-01/2019-05-01"), version.minusHours(2).toString()); + final DataSegment segment5 = newSegment(Intervals.of("2019-04-01/2019-05-01"), version.minusHours(3).toString()); + + final Set segments = ImmutableSet.of(segment1, segment2, segment3, segment4, segment5); + Assert.assertEquals(segments, getMetadataStorageCoordinator().commitSegments(segments)); + + Assert.assertEquals( + 3, + getSegmentsMetadataManager().markSegmentsAsUnused( + ImmutableSet.of(segment1.getId(), segment2.getId(), segment4.getId()) + ) + ); + + // Capture the last updated time cutoff + final DateTime maxUsedStatusLastUpdatedTime1 = DateTimes.nowUtc(); + + // Delay for 1s, mark the segments as unused and then capture the last updated time cutoff again + Thread.sleep(1000); + + Assert.assertEquals( + 2, + getSegmentsMetadataManager().markSegmentsAsUnused( + ImmutableSet.of(segment3.getId(), segment5.getId()) + ) + ); + final DateTime maxUsedStatusLastUpdatedTime2 = DateTimes.nowUtc(); + + final List segmentIntervals = segments.stream() + .map(DataSegment::getInterval) + .collect(Collectors.toList()); + + final Interval umbrellaInterval = JodaUtils.umbrellaInterval(segmentIntervals); + + final KillUnusedSegmentsTask task1 = + new KillUnusedSegmentsTask( + null, DATA_SOURCE, umbrellaInterval, + version.toString(), + null, + false, + 1, + 10, + maxUsedStatusLastUpdatedTime1 + ); + + Assert.assertEquals(TaskState.SUCCESS, taskRunner.run(task1).get().getStatusCode()); + Assert.assertEquals( + new KillTaskReport.Stats(2, 3, 0), + getReportedStats() + ); + + final List actualUnusedSegments = + getMetadataStorageCoordinator().retrieveUnusedSegmentsForInterval( + DATA_SOURCE, + umbrellaInterval, + null, null, null ); - Assert.assertEquals(ImmutableList.of(), unusedSegments2); - Assert.assertEquals(new KillTaskReport.Stats(2, 3, 0), getReportedStats()); + Assert.assertEquals(ImmutableSet.of(segment3, segment4, segment5), new HashSet<>(actualUnusedSegments)); + + final KillUnusedSegmentsTask task2 = + new KillUnusedSegmentsTask( + null, + DATA_SOURCE, + umbrellaInterval, + version.toString(), + null, + false, + 1, + 10, + maxUsedStatusLastUpdatedTime2 + ); + + Assert.assertEquals(TaskState.SUCCESS, taskRunner.run(task2).get().getStatusCode()); + Assert.assertEquals( + new KillTaskReport.Stats(1, 2, 0), + getReportedStats() + ); + + final List actualUnusedSegments2 = + getMetadataStorageCoordinator().retrieveUnusedSegmentsForInterval( + DATA_SOURCE, + umbrellaInterval, + null, + null, + null + ); + + Assert.assertEquals(ImmutableSet.of(segment4, segment5), new HashSet<>(actualUnusedSegments2)); } @Test @@ -565,6 +670,7 @@ public void testKillBatchSizeThree() throws Exception DATA_SOURCE, Intervals.of("2018-01-01/2020-01-01"), null, + null, true, 3, null, @@ -576,17 +682,225 @@ public void testKillBatchSizeThree() throws Exception // we expect ALL tasks to be deleted final List unusedSegments = getMetadataStorageCoordinator().retrieveUnusedSegmentsForInterval( - DATA_SOURCE, - Intervals.of("2019/2020"), - null, - null - ); + DATA_SOURCE, + Intervals.of("2019/2020"), + null, + null, + null + ); Assert.assertEquals(Collections.emptyList(), unusedSegments); Assert.assertEquals(new KillTaskReport.Stats(4, 3, 4), getReportedStats()); } + @Test + public void testKillVersion() throws Exception + { + final DateTime version = DateTimes.nowUtc(); + final DataSegment segment1 = newSegment(Intervals.of("2019-01-01/2019-02-01"), version.toString()); + final DataSegment segment2 = newSegment(Intervals.of("2019-02-01/2019-03-01"), version.minusHours(1).toString()); + final DataSegment segment3 = newSegment(Intervals.of("2019-03-01/2019-04-01"), version.minusHours(2).toString()); + final DataSegment segment4 = newSegment(Intervals.of("2019-04-01/2019-05-01"), version.minusHours(3).toString()); + + final Set segments = ImmutableSet.of(segment1, segment2, segment3, segment4); + + Assert.assertEquals( + segments, + getMetadataStorageCoordinator().commitSegments(segments) + ); + Assert.assertEquals( + segments.size(), + getSegmentsMetadataManager().markSegmentsAsUnused( + segments.stream().map(DataSegment::getId).collect(Collectors.toSet()) + ) + ); + + final KillUnusedSegmentsTask task = new KillUnusedSegmentsTask( + null, + DATA_SOURCE, + Intervals.of("2018/2020"), + segment3.getVersion(), + null, + false, + 3, + null, + null + ); + + Assert.assertEquals(TaskState.SUCCESS, taskRunner.run(task).get().getStatusCode()); + Assert.assertEquals( + new KillTaskReport.Stats(1, 2, 0), + getReportedStats() + ); + + final List unusedSegments = getMetadataStorageCoordinator().retrieveUnusedSegmentsForInterval( + DATA_SOURCE, + Intervals.of("2018/2020"), + null, + null, + null + ); + + Assert.assertEquals(Arrays.asList(segment1, segment2, segment4), unusedSegments); + } + + @Test + public void testKillMultipleSegmentsWithVersion() throws Exception + { + final DateTime version = DateTimes.nowUtc(); + final DataSegment segment1 = newSegment(Intervals.of("2019-01-01/2019-02-01"), version.toString()); + final DataSegment segment2 = newSegment(Intervals.of("2019-02-01/2019-03-01"), version.toString()); + final DataSegment segment3 = newSegment(Intervals.of("2019-03-01/2019-04-01"), version.toString()); + final DataSegment segment4 = newSegment(Intervals.of("2019-01-01/2019-02-01"), version.minusHours(2).toString()); + final DataSegment segment5 = newSegment(Intervals.of("2019-01-01/2019-02-01"), version.minusHours(3).toString()); + + final Set segments = ImmutableSet.of(segment1, segment2, segment3, segment4, segment5); + + Assert.assertEquals( + segments, + getMetadataStorageCoordinator().commitSegments(segments) + ); + Assert.assertEquals( + segments.size(), + getSegmentsMetadataManager().markSegmentsAsUnused( + segments.stream().map(DataSegment::getId).collect(Collectors.toSet()) + ) + ); + + final KillUnusedSegmentsTask task = new KillUnusedSegmentsTask( + null, + DATA_SOURCE, + Intervals.of("2018/2020"), + version.toString(), + null, + false, + 3, + null, + null + ); + + Assert.assertEquals(TaskState.SUCCESS, taskRunner.run(task).get().getStatusCode()); + Assert.assertEquals( + new KillTaskReport.Stats(3, 2, 0), + getReportedStats() + ); + + final List actualUnusedSegments = getMetadataStorageCoordinator().retrieveUnusedSegmentsForInterval( + DATA_SOURCE, + Intervals.of("2018/2020"), + null, + null, + null + ); + + Assert.assertEquals(ImmutableSet.of(segment4, segment5), new HashSet<>(actualUnusedSegments)); + } + + @Test + public void testKillMultipleSegmentsWithVersionAndLimit() throws Exception + { + final DateTime version = DateTimes.nowUtc(); + final DataSegment segment1 = newSegment(Intervals.of("2019-01-01/2019-02-01"), version.toString()); + final DataSegment segment2 = newSegment(Intervals.of("2019-02-01/2019-03-01"), version.toString()); + final DataSegment segment3 = newSegment(Intervals.of("2019-03-01/2019-04-01"), version.toString()); + final DataSegment segment4 = newSegment(Intervals.of("2019-01-01/2019-02-01"), version.minusHours(2).toString()); + final DataSegment segment5 = newSegment(Intervals.of("2019-01-01/2019-02-01"), version.minusHours(3).toString()); + + final Set segments = ImmutableSet.of(segment1, segment2, segment3, segment4, segment5); + + Assert.assertEquals( + segments, + getMetadataStorageCoordinator().commitSegments(segments) + ); + Assert.assertEquals( + segments.size(), + getSegmentsMetadataManager().markSegmentsAsUnused( + segments.stream().map(DataSegment::getId).collect(Collectors.toSet()) + ) + ); + + final KillUnusedSegmentsTask task = new KillUnusedSegmentsTask( + null, + DATA_SOURCE, + Intervals.of("2018/2020"), + version.toString(), + null, + false, + 3, + 2, + null + ); + + Assert.assertEquals(TaskState.SUCCESS, taskRunner.run(task).get().getStatusCode()); + Assert.assertEquals( + new KillTaskReport.Stats(2, 1, 0), + getReportedStats() + ); + + final List actualUnusedSegments = getMetadataStorageCoordinator().retrieveUnusedSegmentsForInterval( + DATA_SOURCE, + Intervals.of("2018/2020"), + null, + null, + null + ); + + Assert.assertEquals(ImmutableSet.of(segment3, segment4, segment5), new HashSet<>(actualUnusedSegments)); + } + + @Test + public void testKillWithNonExistentVersion() throws Exception + { + final DateTime version = DateTimes.nowUtc(); + final DataSegment segment1 = newSegment(Intervals.of("2019-01-01/2019-02-01"), version.toString()); + final DataSegment segment2 = newSegment(Intervals.of("2019-02-01/2019-03-01"), version.toString()); + final DataSegment segment3 = newSegment(Intervals.of("2019-03-01/2019-04-01"), version.toString()); + final DataSegment segment4 = newSegment(Intervals.of("2019-01-01/2019-02-01"), version.minusHours(2).toString()); + final DataSegment segment5 = newSegment(Intervals.of("2019-01-01/2019-02-01"), version.minusHours(3).toString()); + + final Set segments = ImmutableSet.of(segment1, segment2, segment3, segment4, segment5); + + Assert.assertEquals( + segments, + getMetadataStorageCoordinator().commitSegments(segments) + ); + Assert.assertEquals( + segments.size(), + getSegmentsMetadataManager().markSegmentsAsUnused( + segments.stream().map(DataSegment::getId).collect(Collectors.toSet()) + ) + ); + + final KillUnusedSegmentsTask task = new KillUnusedSegmentsTask( + null, + DATA_SOURCE, + Intervals.of("2018/2020"), + version.plusDays(100).toString(), + null, + false, + 3, + 2, + null + ); + + Assert.assertEquals(TaskState.SUCCESS, taskRunner.run(task).get().getStatusCode()); + Assert.assertEquals( + new KillTaskReport.Stats(0, 1, 0), + getReportedStats() + ); + + final List actualUnusedSegments = getMetadataStorageCoordinator().retrieveUnusedSegmentsForInterval( + DATA_SOURCE, + Intervals.of("2018/2020"), + null, + null, + null + ); + + Assert.assertEquals(segments, new HashSet<>(actualUnusedSegments)); + } + @Test public void testComputeNextBatchSizeDefault() { @@ -595,7 +909,7 @@ public void testComputeNextBatchSizeDefault() null, DATA_SOURCE, Intervals.of("2018-01-01/2020-01-01"), - null, + null, null, false, null, null, @@ -612,7 +926,7 @@ public void testComputeNextBatchSizeWithBatchSizeLargerThanLimit() null, DATA_SOURCE, Intervals.of("2018-01-01/2020-01-01"), - null, + null, null, false, 10, 5, @@ -629,7 +943,7 @@ public void testComputeNextBatchSizeWithBatchSizeSmallerThanLimit() null, DATA_SOURCE, Intervals.of("2018-01-01/2020-01-01"), - null, + null, null, false, 5, 10, @@ -646,7 +960,7 @@ public void testComputeNextBatchSizeWithRemainingLessThanLimit() null, DATA_SOURCE, Intervals.of("2018-01-01/2020-01-01"), - null, + null, null, false, 5, 10, @@ -663,7 +977,7 @@ public void testGetNumTotalBatchesDefault() null, DATA_SOURCE, Intervals.of("2018-01-01/2020-01-01"), - null, + null, null, false, null, null, @@ -680,7 +994,7 @@ public void testGetNumTotalBatchesWithBatchSizeLargerThanLimit() null, DATA_SOURCE, Intervals.of("2018-01-01/2020-01-01"), - null, + null, null, false, 10, 5, @@ -699,7 +1013,7 @@ public void testInvalidLimit() null, DATA_SOURCE, Intervals.of("2018-01-01/2020-01-01"), - null, + null, null, false, 10, 0, @@ -722,7 +1036,7 @@ public void testInvalidBatchSize() null, DATA_SOURCE, Intervals.of("2018-01-01/2020-01-01"), - null, + null, null, false, 0, 10, @@ -736,7 +1050,7 @@ public void testInvalidBatchSize() } @Test - public void testInvalidMarkAsUnusedWithLimit() + public void testInvalidLimitWithMarkAsUnused() { MatcherAssert.assertThat( Assert.assertThrows( @@ -745,7 +1059,7 @@ public void testInvalidMarkAsUnusedWithLimit() null, DATA_SOURCE, Intervals.of("2018-01-01/2020-01-01"), - null, + null, null, true, 10, 10, @@ -758,6 +1072,30 @@ public void testInvalidMarkAsUnusedWithLimit() ); } + @Test + public void testInvalidVersionWithMarkAsUnused() + { + MatcherAssert.assertThat( + Assert.assertThrows( + DruidException.class, + () -> new KillUnusedSegmentsTask( + null, + DATA_SOURCE, + Intervals.of("2018-01-01/2020-01-01"), + "foo", + null, + true, + 10, + null, + null + ) + ), + DruidExceptionMatcher.invalidInput().expectMessageIs( + "version[foo] cannot be provided when markAsUnused is enabled." + ) + ); + } + @Test public void testGetNumTotalBatchesWithBatchSizeSmallerThanLimit() { @@ -766,7 +1104,7 @@ public void testGetNumTotalBatchesWithBatchSizeSmallerThanLimit() null, DATA_SOURCE, Intervals.of("2018-01-01/2020-01-01"), - null, + null, null, false, 5, 10, diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/AbstractParallelIndexSupervisorTaskTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/AbstractParallelIndexSupervisorTaskTest.java index 4a0707e3586f..38fd531cc5e1 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/AbstractParallelIndexSupervisorTaskTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/AbstractParallelIndexSupervisorTaskTest.java @@ -1034,7 +1034,7 @@ public ListenableFuture> fetchUsedSegments( { return exec.submit( () -> ImmutableList.copyOf( - getStorageCoordinator().retrieveUsedSegmentsForIntervals(dataSource, intervals, Segments.ONLY_VISIBLE) + getStorageCoordinator().retrieveUsedSegmentsForIntervals(dataSource, intervals, null, Segments.ONLY_VISIBLE) ) ); } diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/concurrent/ConcurrentReplaceAndAppendTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/concurrent/ConcurrentReplaceAndAppendTest.java index 91c7d6a71755..a24c7245aa81 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/concurrent/ConcurrentReplaceAndAppendTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/concurrent/ConcurrentReplaceAndAppendTest.java @@ -977,6 +977,7 @@ private void verifySegments(Interval interval, Segments visibility, DataSegment. WIKI, null, ImmutableList.of(interval), + null, visibility ) ); diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskLifecycleTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskLifecycleTest.java index 6c467bb7ac7c..aa334b6f467d 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskLifecycleTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskLifecycleTest.java @@ -908,7 +908,7 @@ public DataSegment apply(String input) final List unusedSegments = mdc.retrieveUnusedSegmentsForInterval( "test_kill_task", Intervals.of("2011-04-01/P4D"), - null, + null, null, null ); for (DataSegment segment : unusedSegments) { @@ -923,7 +923,7 @@ public DataSegment apply(String input) null, "test_kill_task", Intervals.of("2011-04-01/P4D"), - null, + null, null, false, null, null, @@ -1005,7 +1005,7 @@ public DataSegment apply(String input) final List unusedSegments = mdc.retrieveUnusedSegmentsForInterval( "test_kill_task", Intervals.of("2011-04-01/P4D"), - null, + null, null, null ); for (DataSegment segment : unusedSegments) { @@ -1021,7 +1021,7 @@ public DataSegment apply(String input) null, "test_kill_task", Intervals.of("2011-04-01/P4D"), - null, + null, null, false, null, maxSegmentsToKill, diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/http/OverlordResourceTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/http/OverlordResourceTest.java index 19eb1f8e7a2e..41626341bae0 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/http/OverlordResourceTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/http/OverlordResourceTest.java @@ -926,7 +926,7 @@ public void testKillTaskIsAudited() auditManager ); - Task task = new KillUnusedSegmentsTask("kill_all", "allow", Intervals.ETERNITY, null, false, 10, null, null); + Task task = new KillUnusedSegmentsTask("kill_all", "allow", Intervals.ETERNITY, null, null, false, 10, null, null); overlordResource.taskPost(task, req); Assert.assertTrue(auditEntryCapture.hasCaptured()); diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/test/TestIndexerMetadataStorageCoordinator.java b/indexing-service/src/test/java/org/apache/druid/indexing/test/TestIndexerMetadataStorageCoordinator.java index f42a300de5f3..0fa7b2df275f 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/test/TestIndexerMetadataStorageCoordinator.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/test/TestIndexerMetadataStorageCoordinator.java @@ -98,7 +98,7 @@ public List> retrieveUsedSegmentsAndCreatedDates(Strin public List retrieveUsedSegmentsForIntervals( String dataSource, List intervals, - Segments visibility + String version, Segments visibility ) { return ImmutableList.of(); @@ -108,7 +108,7 @@ public List retrieveUsedSegmentsForIntervals( public List retrieveUnusedSegmentsForInterval( String dataSource, Interval interval, - @Nullable Integer limit, + String version, @Nullable Integer limit, @Nullable DateTime maxUsedStatusLastUpdatedTime ) { diff --git a/server/src/main/java/org/apache/druid/client/indexing/ClientKillUnusedSegmentsTaskQuery.java b/server/src/main/java/org/apache/druid/client/indexing/ClientKillUnusedSegmentsTaskQuery.java index e5656ff39752..d22b505cf350 100644 --- a/server/src/main/java/org/apache/druid/client/indexing/ClientKillUnusedSegmentsTaskQuery.java +++ b/server/src/main/java/org/apache/druid/client/indexing/ClientKillUnusedSegmentsTaskQuery.java @@ -40,6 +40,7 @@ public class ClientKillUnusedSegmentsTaskQuery implements ClientTaskQuery private final String id; private final String dataSource; private final Interval interval; + @Nullable private final String version; private final Boolean markAsUnused; private final Integer batchSize; @Nullable private final Integer limit; @@ -50,6 +51,7 @@ public ClientKillUnusedSegmentsTaskQuery( @JsonProperty("id") String id, @JsonProperty("dataSource") String dataSource, @JsonProperty("interval") Interval interval, + @JsonProperty("version") String version, @JsonProperty("markAsUnused") @Deprecated Boolean markAsUnused, @JsonProperty("batchSize") Integer batchSize, @JsonProperty("limit") @Nullable Integer limit, @@ -65,6 +67,7 @@ public ClientKillUnusedSegmentsTaskQuery( this.id = id; this.dataSource = dataSource; this.interval = interval; + this.version = version; this.markAsUnused = markAsUnused; this.batchSize = batchSize; this.limit = limit; @@ -98,6 +101,13 @@ public Interval getInterval() return interval; } + @JsonProperty + @Nullable + public String getVersion() + { + return version; + } + /** * This field has been deprecated as "kill" tasks should not be responsible for * marking segments as unused. Instead, users should call the Coordinator API @@ -146,6 +156,7 @@ public boolean equals(Object o) return Objects.equals(id, that.id) && Objects.equals(dataSource, that.dataSource) && Objects.equals(interval, that.interval) + && Objects.equals(version, that.version) && Objects.equals(markAsUnused, that.markAsUnused) && Objects.equals(batchSize, that.batchSize) && Objects.equals(limit, that.limit) @@ -155,6 +166,6 @@ public boolean equals(Object o) @Override public int hashCode() { - return Objects.hash(id, dataSource, interval, markAsUnused, batchSize, limit, maxUsedStatusLastUpdatedTime); + return Objects.hash(id, dataSource, interval, version, markAsUnused, batchSize, limit, maxUsedStatusLastUpdatedTime); } } diff --git a/server/src/main/java/org/apache/druid/indexing/overlord/IndexerMetadataStorageCoordinator.java b/server/src/main/java/org/apache/druid/indexing/overlord/IndexerMetadataStorageCoordinator.java index 31c975339007..13dd944f7031 100644 --- a/server/src/main/java/org/apache/druid/indexing/overlord/IndexerMetadataStorageCoordinator.java +++ b/server/src/main/java/org/apache/druid/indexing/overlord/IndexerMetadataStorageCoordinator.java @@ -68,7 +68,7 @@ default Collection retrieveUsedSegmentsForInterval( Segments visibility ) { - return retrieveUsedSegmentsForIntervals(dataSource, Collections.singletonList(interval), visibility); + return retrieveUsedSegmentsForIntervals(dataSource, Collections.singletonList(interval), null, visibility); } /** @@ -102,20 +102,20 @@ default Collection retrieveUsedSegmentsForInterval( /** * Retrieve all published segments which may include any data in the given intervals and are marked as used from the * metadata store. - * + *

* The order of segments within the returned collection is unspecified, but each segment is guaranteed to appear in * the collection only once. * * @param dataSource The data source to query * @param intervals The intervals for which all applicable and used segments are requested. + * @param version The segment version to retrieve in the given {@code intervals} * @param visibility Whether only visible or visible as well as overshadowed segments should be returned. The * visibility is considered within the specified intervals: that is, a segment which is visible * outside of the specified intervals, but overshadowed on the specified intervals will not be * returned if {@link Segments#ONLY_VISIBLE} is passed. See more precise description in the doc for * {@link Segments}. * @return The DataSegments which include data in the requested intervals. These segments may contain data outside the - * requested intervals. - * + * requested intervals. * @implNote This method doesn't return a {@link Set} because there may be an expectation that {@code Set.contains()} * is O(1) operation, while it's not the case for the returned collection unless it copies all segments into a new * {@link java.util.HashSet} or {@link com.google.common.collect.ImmutableSet} which may in turn be unnecessary in @@ -124,6 +124,7 @@ default Collection retrieveUsedSegmentsForInterval( Collection retrieveUsedSegmentsForIntervals( String dataSource, List intervals, + @Nullable String version, Segments visibility ); @@ -131,20 +132,21 @@ Collection retrieveUsedSegmentsForIntervals( * Retrieve all published segments which include ONLY data within the given interval and are marked as unused from the * metadata store. * - * @param dataSource The data source the segments belong to - * @param interval Filter the data segments to ones that include data in this interval exclusively. - * @param limit The maximum number of unused segments to retreive. If null, no limit is applied. + * @param dataSource The data source the segments belong to + * @param interval Filter the data segments to ones that include data in this interval exclusively. + * @param version The segment version to retrieve in the given {@code interval}. + * @param limit The maximum number of unused segments to retreive. If null, no limit is applied. * @param maxUsedStatusLastUpdatedTime The maximum {@code used_status_last_updated} time. Any unused segment in {@code interval} - * with {@code used_status_last_updated} no later than this time will be included in the - * kill task. Segments without {@code used_status_last_updated} time (due to an upgrade - * from legacy Druid) will have {@code maxUsedStatusLastUpdatedTime} ignored - * + * with {@code used_status_last_updated} no later than this time will be included in the + * kill task. Segments without {@code used_status_last_updated} time (due to an upgrade + * from legacy Druid) will have {@code maxUsedStatusLastUpdatedTime} ignored * @return DataSegments which include ONLY data within the requested interval and are marked as unused. Segments NOT * returned here may include data in the interval */ List retrieveUnusedSegmentsForInterval( String dataSource, Interval interval, + String version, @Nullable Integer limit, @Nullable DateTime maxUsedStatusLastUpdatedTime ); diff --git a/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java b/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java index 3d8939c3e52e..72a3da73d671 100644 --- a/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java +++ b/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java @@ -136,19 +136,20 @@ public void start() public Collection retrieveUsedSegmentsForIntervals( final String dataSource, final List intervals, + @Nullable final String version, final Segments visibility ) { if (intervals == null || intervals.isEmpty()) { throw new IAE("null/empty intervals"); } - return doRetrieveUsedSegments(dataSource, intervals, visibility); + return doRetrieveUsedSegments(dataSource, intervals, version, visibility); } @Override public Collection retrieveAllUsedSegments(String dataSource, Segments visibility) { - return doRetrieveUsedSegments(dataSource, Collections.emptyList(), visibility); + return doRetrieveUsedSegments(dataSource, Collections.emptyList(), null, visibility); } /** @@ -157,6 +158,7 @@ public Collection retrieveAllUsedSegments(String dataSource, Segmen private Collection doRetrieveUsedSegments( final String dataSource, final List intervals, + @Nullable final String version, final Segments visibility ) { @@ -164,10 +166,10 @@ private Collection doRetrieveUsedSegments( handle -> { if (visibility == Segments.ONLY_VISIBLE) { final SegmentTimeline timeline = - getTimelineForIntervalsWithHandle(handle, dataSource, intervals); + getTimelineForIntervalsWithHandle(handle, dataSource, intervals, version); return timeline.findNonOvershadowedObjectsInInterval(Intervals.ETERNITY, Partitions.ONLY_COMPLETE); } else { - return retrieveAllUsedSegmentsForIntervalsWithHandle(handle, dataSource, intervals); + return retrieveAllUsedSegmentsForIntervalsWithHandle(handle, dataSource, intervals, version); } } ); @@ -233,6 +235,7 @@ public List> retrieveUsedSegmentsAndCreatedDates(Strin public List retrieveUnusedSegmentsForInterval( String dataSource, Interval interval, + @Nullable String version, @Nullable Integer limit, @Nullable DateTime maxUsedStatusLastUpdatedTime ) @@ -244,6 +247,7 @@ public List retrieveUnusedSegmentsForInterval( .retrieveUnusedSegments( dataSource, Collections.singletonList(interval), + version, limit, null, null, @@ -255,8 +259,8 @@ public List retrieveUnusedSegmentsForInterval( } ); - log.info("Found [%,d] unused segments for datasource[%s] in interval[%s] with maxUsedStatusLastUpdatedTime[%s].", - matchingSegments.size(), dataSource, interval, maxUsedStatusLastUpdatedTime); + log.info("Found [%,d] unused segments for datasource[%s] in interval[%s] and version[%s] with maxUsedStatusLastUpdatedTime[%s].", + matchingSegments.size(), dataSource, interval, version, maxUsedStatusLastUpdatedTime); return matchingSegments; } @@ -374,12 +378,13 @@ private Map getPendingSegmentsForIntervalWithHan private SegmentTimeline getTimelineForIntervalsWithHandle( final Handle handle, final String dataSource, - final List intervals + final List intervals, + @Nullable final String version ) throws IOException { try (final CloseableIterator iterator = SqlSegmentsMetadataQuery.forHandle(handle, connector, dbTables, jsonMapper) - .retrieveUsedSegments(dataSource, intervals)) { + .retrieveUsedSegments(dataSource, intervals, version)) { return SegmentTimeline.forSegments(iterator); } } @@ -387,12 +392,13 @@ private SegmentTimeline getTimelineForIntervalsWithHandle( private Collection retrieveAllUsedSegmentsForIntervalsWithHandle( final Handle handle, final String dataSource, - final List intervals + final List intervals, + @Nullable final String version ) throws IOException { try (final CloseableIterator iterator = SqlSegmentsMetadataQuery.forHandle(handle, connector, dbTables, jsonMapper) - .retrieveUsedSegments(dataSource, intervals)) { + .retrieveUsedSegments(dataSource, intervals, version)) { final List retVal = new ArrayList<>(); iterator.forEachRemaining(retVal::add); return retVal; @@ -668,7 +674,7 @@ public SegmentIdWithShardSpec allocatePendingSegment( handle -> { // Get the time chunk and associated data segments for the given interval, if any final List> existingChunks = - getTimelineForIntervalsWithHandle(handle, dataSource, ImmutableList.of(interval)) + getTimelineForIntervalsWithHandle(handle, dataSource, ImmutableList.of(interval), null) .lookup(interval); if (existingChunks.size() > 1) { // Not possible to expand more than one chunk with a single segment. @@ -921,7 +927,7 @@ private Map allocatePendingSegment { // Get the time chunk and associated data segments for the given interval, if any final List> existingChunks = - getTimelineForIntervalsWithHandle(handle, dataSource, Collections.singletonList(interval)) + getTimelineForIntervalsWithHandle(handle, dataSource, Collections.singletonList(interval), null) .lookup(interval); if (existingChunks.size() > 1) { log.warn( @@ -1456,7 +1462,7 @@ private Set createNewIdsForAppendSegments( final Collection overlappingSegments = retrieveUsedSegmentsForIntervals( dataSource, new ArrayList<>(appendIntervals), - Segments.INCLUDING_OVERSHADOWED + null, Segments.INCLUDING_OVERSHADOWED ); final Map> overlappingVersionToIntervals = new HashMap<>(); diff --git a/server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataManager.java b/server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataManager.java index 1bf8ec534a96..274e5a9dd908 100644 --- a/server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataManager.java +++ b/server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataManager.java @@ -683,12 +683,12 @@ private int doMarkAsUsedNonOvershadowedSegments(String dataSourceName, @Nullable interval == null ? Intervals.ONLY_ETERNITY : Collections.singletonList(interval); try (final CloseableIterator iterator = - queryTool.retrieveUsedSegments(dataSourceName, intervals)) { + queryTool.retrieveUsedSegments(dataSourceName, intervals, null)) { timeline.addSegments(iterator); } try (final CloseableIterator iterator = - queryTool.retrieveUnusedSegments(dataSourceName, intervals, null, null, null, null)) { + queryTool.retrieveUnusedSegments(dataSourceName, intervals, null, null, null, null, null)) { while (iterator.hasNext()) { final DataSegment dataSegment = iterator.next(); timeline.addSegments(Iterators.singletonIterator(dataSegment)); @@ -820,7 +820,7 @@ private CloseableIterator retrieveUsedSegmentsOverlappingIntervals( ) { return SqlSegmentsMetadataQuery.forHandle(handle, connector, dbTables.get(), jsonMapper) - .retrieveUsedSegments(dataSource, intervals); + .retrieveUsedSegments(dataSource, intervals, null); } private int markSegmentsAsUsed(final List segmentIds) @@ -996,7 +996,7 @@ public Iterable iterateAllUnusedSegmentsForDatasource( ? Intervals.ONLY_ETERNITY : Collections.singletonList(interval); try (final CloseableIterator iterator = - queryTool.retrieveUnusedSegmentsPlus(datasource, intervals, limit, lastSegmentId, sortOrder, null)) { + queryTool.retrieveUnusedSegmentsPlus(datasource, intervals, null, limit, lastSegmentId, sortOrder, null)) { return ImmutableList.copyOf(iterator); } } diff --git a/server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataQuery.java b/server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataQuery.java index adbfb0fda237..d2ea22f906f8 100644 --- a/server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataQuery.java +++ b/server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataQuery.java @@ -116,12 +116,14 @@ public static SqlSegmentsMetadataQuery forHandle( */ public CloseableIterator retrieveUsedSegments( final String dataSource, - final Collection intervals + final Collection intervals, + @Nullable final String version ) { return retrieveSegments( dataSource, intervals, + version, IntervalMode.OVERLAPS, true, null, @@ -135,27 +137,29 @@ public CloseableIterator retrieveUsedSegments( * Retrieves segments for a given datasource that are marked unused and that are *fully contained by* any interval * in a particular collection of intervals. If the collection of intervals is empty, this method will retrieve all * unused segments. - * + *

* This call does not return any information about realtime segments. * - * @param dataSource The name of the datasource - * @param intervals The intervals to search over - * @param limit The limit of segments to return - * @param lastSegmentId the last segment id from which to search for results. All segments returned are > - * this segment lexigraphically if sortOrder is null or ASC, or < this segment - * lexigraphically if sortOrder is DESC. - * @param sortOrder Specifies the order with which to return the matching segments by start time, end time. - * A null value indicates that order does not matter. + * @param dataSource The name of the datasource + * @param intervals The intervals to search over + * @param version + * @param limit The limit of segments to return + * @param lastSegmentId the last segment id from which to search for results. All segments returned are > + * this segment lexigraphically if sortOrder is null or ASC, or < this segment + * lexigraphically if sortOrder is DESC. + * @param sortOrder Specifies the order with which to return the matching segments by start time, end time. + * A null value indicates that order does not matter. * @param maxUsedStatusLastUpdatedTime The maximum {@code used_status_last_updated} time. Any unused segment in {@code intervals} - * with {@code used_status_last_updated} no later than this time will be included in the - * iterator. Segments without {@code used_status_last_updated} time (due to an upgrade - * from legacy Druid) will have {@code maxUsedStatusLastUpdatedTime} ignored - - * Returns a closeable iterator. You should close it when you are done. + * with {@code used_status_last_updated} no later than this time will be included in the + * iterator. Segments without {@code used_status_last_updated} time (due to an upgrade + * from legacy Druid) will have {@code maxUsedStatusLastUpdatedTime} ignored + *

+ * Returns a closeable iterator. You should close it when you are done. */ public CloseableIterator retrieveUnusedSegments( final String dataSource, final Collection intervals, + @Nullable final String version, @Nullable final Integer limit, @Nullable final String lastSegmentId, @Nullable final SortOrder sortOrder, @@ -165,6 +169,7 @@ public CloseableIterator retrieveUnusedSegments( return retrieveSegments( dataSource, intervals, + version, IntervalMode.CONTAINS, false, limit, @@ -199,6 +204,7 @@ public CloseableIterator retrieveUnusedSegments( public CloseableIterator retrieveUnusedSegmentsPlus( final String dataSource, final Collection intervals, + @Nullable final String version, @Nullable final Integer limit, @Nullable final String lastSegmentId, @Nullable final SortOrder sortOrder, @@ -208,6 +214,7 @@ public CloseableIterator retrieveUnusedSegmentsPlus( return retrieveSegmentsPlus( dataSource, intervals, + version, IntervalMode.CONTAINS, false, limit, @@ -305,6 +312,7 @@ public int markSegmentsUnused(final String dataSource, final Interval interval) retrieveSegments( dataSource, Collections.singletonList(interval), + null, IntervalMode.CONTAINS, true, null, @@ -444,6 +452,7 @@ public static void bindQueryIntervals(final Query> query, fi private CloseableIterator retrieveSegments( final String dataSource, final Collection intervals, + @Nullable final String version, final IntervalMode matchMode, final boolean used, @Nullable final Integer limit, @@ -454,7 +463,7 @@ private CloseableIterator retrieveSegments( { if (intervals.isEmpty() || intervals.size() <= MAX_INTERVALS_PER_BATCH) { return CloseableIterators.withEmptyBaggage( - retrieveSegmentsInIntervalsBatch(dataSource, intervals, matchMode, used, limit, lastSegmentId, sortOrder, maxUsedStatusLastUpdatedTime) + retrieveSegmentsInIntervalsBatch(dataSource, intervals, version, matchMode, used, limit, lastSegmentId, sortOrder, maxUsedStatusLastUpdatedTime) ); } else { final List> intervalsLists = Lists.partition(new ArrayList<>(intervals), MAX_INTERVALS_PER_BATCH); @@ -465,6 +474,7 @@ private CloseableIterator retrieveSegments( final UnmodifiableIterator iterator = retrieveSegmentsInIntervalsBatch( dataSource, intervalList, + version, matchMode, used, limitPerBatch, @@ -492,6 +502,7 @@ private CloseableIterator retrieveSegments( private CloseableIterator retrieveSegmentsPlus( final String dataSource, final Collection intervals, + @Nullable final String version, final IntervalMode matchMode, final boolean used, @Nullable final Integer limit, @@ -502,7 +513,7 @@ private CloseableIterator retrieveSegmentsPlus( { if (intervals.isEmpty() || intervals.size() <= MAX_INTERVALS_PER_BATCH) { return CloseableIterators.withEmptyBaggage( - retrieveSegmentsPlusInIntervalsBatch(dataSource, intervals, matchMode, used, limit, lastSegmentId, sortOrder, maxUsedStatusLastUpdatedTime) + retrieveSegmentsPlusInIntervalsBatch(dataSource, intervals, version, matchMode, used, limit, lastSegmentId, sortOrder, maxUsedStatusLastUpdatedTime) ); } else { final List> intervalsLists = Lists.partition(new ArrayList<>(intervals), MAX_INTERVALS_PER_BATCH); @@ -513,6 +524,7 @@ private CloseableIterator retrieveSegmentsPlus( final UnmodifiableIterator iterator = retrieveSegmentsPlusInIntervalsBatch( dataSource, intervalList, + version, matchMode, used, limitPerBatch, @@ -540,6 +552,7 @@ private CloseableIterator retrieveSegmentsPlus( private UnmodifiableIterator retrieveSegmentsInIntervalsBatch( final String dataSource, final Collection intervals, + @Nullable final String version, final IntervalMode matchMode, final boolean used, @Nullable final Integer limit, @@ -551,6 +564,7 @@ private UnmodifiableIterator retrieveSegmentsInIntervalsBatch( final Query> sql = buildSegmentsTableQuery( dataSource, intervals, + version, matchMode, used, limit, @@ -568,6 +582,7 @@ private UnmodifiableIterator retrieveSegmentsInIntervalsBatch( private UnmodifiableIterator retrieveSegmentsPlusInIntervalsBatch( final String dataSource, final Collection intervals, + @Nullable final String version, final IntervalMode matchMode, final boolean used, @Nullable final Integer limit, @@ -580,6 +595,7 @@ private UnmodifiableIterator retrieveSegmentsPlusInIntervalsBat final Query> sql = buildSegmentsTableQuery( dataSource, intervals, + version, matchMode, used, limit, @@ -597,6 +613,7 @@ private UnmodifiableIterator retrieveSegmentsPlusInIntervalsBat private Query> buildSegmentsTableQuery( final String dataSource, final Collection intervals, + @Nullable final String version, final IntervalMode matchMode, final boolean used, @Nullable final Integer limit, @@ -619,6 +636,10 @@ private Query> buildSegmentsTableQuery( appendConditionForIntervalsAndMatchMode(sb, intervals, matchMode, connector); } + if (version != null) { + sb.append(" AND version = :version"); + } + // Add the used_status_last_updated time filter only for unused segments when maxUsedStatusLastUpdatedTime is non-null. final boolean addMaxUsedLastUpdatedTimeFilter = !used && maxUsedStatusLastUpdatedTime != null; if (addMaxUsedLastUpdatedTimeFilter) { @@ -650,6 +671,10 @@ private Query> buildSegmentsTableQuery( .bind("used", used) .bind("dataSource", dataSource); + if (version != null) { + sql.bind("version", version); + } + if (addMaxUsedLastUpdatedTimeFilter) { sql.bind("used_status_last_updated", maxUsedStatusLastUpdatedTime.toString()); } diff --git a/server/src/main/java/org/apache/druid/rpc/indexing/OverlordClient.java b/server/src/main/java/org/apache/druid/rpc/indexing/OverlordClient.java index 422803492d8e..671b9a197b89 100644 --- a/server/src/main/java/org/apache/druid/rpc/indexing/OverlordClient.java +++ b/server/src/main/java/org/apache/druid/rpc/indexing/OverlordClient.java @@ -95,6 +95,7 @@ default ListenableFuture runKillTask( String idPrefix, String dataSource, Interval interval, + @Nullable String version, @Nullable Integer maxSegmentsToKill, @Nullable DateTime maxUsedStatusLastUpdatedTime ) @@ -104,6 +105,7 @@ default ListenableFuture runKillTask( taskId, dataSource, interval, + version, false, null, maxSegmentsToKill, diff --git a/server/src/main/java/org/apache/druid/server/coordinator/duty/KillUnusedSegments.java b/server/src/main/java/org/apache/druid/server/coordinator/duty/KillUnusedSegments.java index 10d6e077c536..b61ab4878aae 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/duty/KillUnusedSegments.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/duty/KillUnusedSegments.java @@ -218,6 +218,7 @@ private void killUnusedSegments( TASK_ID_PREFIX, dataSource, intervalToKill, + null, maxSegmentsToKill, maxUsedStatusLastUpdatedTime ), diff --git a/server/src/main/java/org/apache/druid/server/http/DataSourcesResource.java b/server/src/main/java/org/apache/druid/server/http/DataSourcesResource.java index 1546544029e8..bfc6ce03b07e 100644 --- a/server/src/main/java/org/apache/druid/server/http/DataSourcesResource.java +++ b/server/src/main/java/org/apache/druid/server/http/DataSourcesResource.java @@ -380,7 +380,7 @@ public Response killUnusedSegmentsInInterval( final Interval theInterval = Intervals.of(interval.replace('_', '/')); try { final String killTaskId = FutureUtils.getUnchecked( - overlordClient.runKillTask("api-issued", dataSourceName, theInterval, null, null), + overlordClient.runKillTask("api-issued", dataSourceName, theInterval, null, null, null), true ); auditManager.doAudit( diff --git a/server/src/main/java/org/apache/druid/server/http/MetadataResource.java b/server/src/main/java/org/apache/druid/server/http/MetadataResource.java index c53a998e238d..b5adbf3424b7 100644 --- a/server/src/main/java/org/apache/druid/server/http/MetadataResource.java +++ b/server/src/main/java/org/apache/druid/server/http/MetadataResource.java @@ -327,7 +327,7 @@ public Response getUsedSegmentsInDataSourceForIntervals( ) { Collection segments = metadataStorageCoordinator - .retrieveUsedSegmentsForIntervals(dataSourceName, intervals, Segments.INCLUDING_OVERSHADOWED); + .retrieveUsedSegmentsForIntervals(dataSourceName, intervals, null, Segments.INCLUDING_OVERSHADOWED); Response.ResponseBuilder builder = Response.status(Response.Status.OK); if (full != null) { diff --git a/server/src/test/java/org/apache/druid/client/indexing/ClientKillUnusedSegmentsTaskQueryTest.java b/server/src/test/java/org/apache/druid/client/indexing/ClientKillUnusedSegmentsTaskQueryTest.java index 0059d048e063..d683d59ac6e7 100644 --- a/server/src/test/java/org/apache/druid/client/indexing/ClientKillUnusedSegmentsTaskQueryTest.java +++ b/server/src/test/java/org/apache/druid/client/indexing/ClientKillUnusedSegmentsTaskQueryTest.java @@ -46,6 +46,7 @@ public void setUp() "killTaskId", DATA_SOURCE, INTERVAL, + null, true, BATCH_SIZE, LIMIT, diff --git a/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java b/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java index da7bf4226921..086eb1a69e02 100644 --- a/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java +++ b/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java @@ -1068,7 +1068,7 @@ public void testMultiIntervalUsedList() throws IOException coordinator.retrieveUsedSegmentsForIntervals( defaultSegment.getDataSource(), ImmutableList.of(defaultSegment.getInterval()), - Segments.ONLY_VISIBLE + null, Segments.ONLY_VISIBLE ) ).containsOnlyOnce(SEGMENTS.toArray(new DataSegment[0])); @@ -1076,7 +1076,7 @@ public void testMultiIntervalUsedList() throws IOException coordinator.retrieveUsedSegmentsForIntervals( defaultSegment.getDataSource(), ImmutableList.of(defaultSegment3.getInterval()), - Segments.ONLY_VISIBLE + null, Segments.ONLY_VISIBLE ) ).containsOnlyOnce(defaultSegment3); @@ -1084,7 +1084,7 @@ public void testMultiIntervalUsedList() throws IOException coordinator.retrieveUsedSegmentsForIntervals( defaultSegment.getDataSource(), ImmutableList.of(defaultSegment.getInterval(), defaultSegment3.getInterval()), - Segments.ONLY_VISIBLE + null, Segments.ONLY_VISIBLE ) ).containsOnlyOnce(defaultSegment, defaultSegment2, defaultSegment3); @@ -1096,7 +1096,7 @@ public void testMultiIntervalUsedList() throws IOException Intervals.of("2015-01-03T00Z/2015-01-03T05Z"), Intervals.of("2015-01-03T09Z/2015-01-04T00Z") ), - Segments.ONLY_VISIBLE + null, Segments.ONLY_VISIBLE ) ).containsOnlyOnce(defaultSegment3); } @@ -1110,7 +1110,7 @@ public void testRetrieveUsedSegmentsUsingMultipleIntervals() throws IOException final Collection actualUsedSegments = coordinator.retrieveUsedSegmentsForIntervals( DS.WIKI, intervals, - Segments.ONLY_VISIBLE + null, Segments.ONLY_VISIBLE ); Assert.assertEquals(segments.size(), actualUsedSegments.size()); @@ -1129,7 +1129,7 @@ public void testRetrieveAllUsedSegmentsUsingIntervalsOutOfRange() throws IOExcep final Collection actualUsedSegments = coordinator.retrieveUsedSegmentsForIntervals( DS.WIKI, ImmutableList.of(outOfRangeInterval), - Segments.ONLY_VISIBLE + null, Segments.ONLY_VISIBLE ); Assert.assertEquals(0, actualUsedSegments.size()); @@ -1158,7 +1158,7 @@ public void testRetrieveUnusedSegmentsUsingSingleIntervalAndNoLimit() throws IOE final List actualUnusedSegments = coordinator.retrieveUnusedSegmentsForInterval( DS.WIKI, Intervals.of("1900/3000"), - null, + null, null, null ); @@ -1176,7 +1176,7 @@ public void testRetrieveUnusedSegmentsUsingSingleIntervalAndLimitAtRange() throw final List actualUnusedSegments = coordinator.retrieveUnusedSegmentsForInterval( DS.WIKI, Intervals.of("1900/3000"), - requestedLimit, + null, requestedLimit, null ); @@ -1194,7 +1194,7 @@ public void testRetrieveUnusedSegmentsUsingSingleIntervalAndLimitInRange() throw final List actualUnusedSegments = coordinator.retrieveUnusedSegmentsForInterval( DS.WIKI, Intervals.of("1900/3000"), - requestedLimit, + null, requestedLimit, null ); @@ -1212,7 +1212,7 @@ public void testRetrieveUnusedSegmentsUsingSingleIntervalAndLimitOutOfRange() th final List actualUnusedSegments = coordinator.retrieveUnusedSegmentsForInterval( DS.WIKI, Intervals.of("1900/3000"), - limit, + null, limit, null ); Assert.assertEquals(segments.size(), actualUnusedSegments.size()); @@ -1233,7 +1233,7 @@ public void testRetrieveUnusedSegmentsUsingSingleIntervalOutOfRange() throws IOE final List actualUnusedSegments = coordinator.retrieveUnusedSegmentsForInterval( DS.WIKI, outOfRangeInterval, - limit, + null, limit, null ); Assert.assertEquals(0, actualUnusedSegments.size()); @@ -1673,7 +1673,7 @@ public void testSimpleUnusedList() throws IOException coordinator.retrieveUnusedSegmentsForInterval( defaultSegment.getDataSource(), defaultSegment.getInterval(), - null, + null, null, null ) ) @@ -1690,7 +1690,7 @@ public void testSimpleUnusedListWithLimit() throws IOException coordinator.retrieveUnusedSegmentsForInterval( defaultSegment.getDataSource(), defaultSegment.getInterval(), - limit, + null, limit, null ) ); @@ -1804,7 +1804,7 @@ public void testUnusedOverlapLow() throws IOException defaultSegment.getInterval().getStart().minus(1), defaultSegment.getInterval().getStart().plus(1) ), - null, + null, null, null ).isEmpty() ); @@ -1819,7 +1819,7 @@ public void testUnusedUnderlapLow() throws IOException coordinator.retrieveUnusedSegmentsForInterval( defaultSegment.getDataSource(), new Interval(defaultSegment.getInterval().getStart().plus(1), defaultSegment.getInterval().getEnd()), - null, + null, null, null ).isEmpty() ); @@ -1835,7 +1835,7 @@ public void testUnusedUnderlapHigh() throws IOException coordinator.retrieveUnusedSegmentsForInterval( defaultSegment.getDataSource(), new Interval(defaultSegment.getInterval().getStart(), defaultSegment.getInterval().getEnd().minus(1)), - null, + null, null, null ).isEmpty() ); @@ -1850,7 +1850,7 @@ public void testUnusedOverlapHigh() throws IOException coordinator.retrieveUnusedSegmentsForInterval( defaultSegment.getDataSource(), defaultSegment.getInterval().withStart(defaultSegment.getInterval().getEnd().minus(1)), - null, + null, null, null ).isEmpty() ); @@ -1867,7 +1867,7 @@ public void testUnusedBigOverlap() throws IOException coordinator.retrieveUnusedSegmentsForInterval( defaultSegment.getDataSource(), Intervals.of("2000/2999"), - null, + null, null, null ) ) @@ -1885,7 +1885,7 @@ public void testUnusedLowRange() throws IOException coordinator.retrieveUnusedSegmentsForInterval( defaultSegment.getDataSource(), defaultSegment.getInterval().withStart(defaultSegment.getInterval().getStart().minus(1)), - null, + null, null, null ) ) @@ -1896,7 +1896,7 @@ public void testUnusedLowRange() throws IOException coordinator.retrieveUnusedSegmentsForInterval( defaultSegment.getDataSource(), defaultSegment.getInterval().withStart(defaultSegment.getInterval().getStart().minusYears(1)), - null, + null, null, null ) ) @@ -1914,7 +1914,7 @@ public void testUnusedHighRange() throws IOException coordinator.retrieveUnusedSegmentsForInterval( defaultSegment.getDataSource(), defaultSegment.getInterval().withEnd(defaultSegment.getInterval().getEnd().plus(1)), - null, + null, null, null ) ) @@ -1925,7 +1925,7 @@ public void testUnusedHighRange() throws IOException coordinator.retrieveUnusedSegmentsForInterval( defaultSegment.getDataSource(), defaultSegment.getInterval().withEnd(defaultSegment.getInterval().getEnd().plusYears(1)), - null, + null, null, null ) ) @@ -1949,7 +1949,7 @@ public void testUsedHugeTimeRangeEternityFilter() throws IOException coordinator.retrieveUsedSegmentsForIntervals( hugeTimeRangeSegment1.getDataSource(), Intervals.ONLY_ETERNITY, - Segments.ONLY_VISIBLE + null, Segments.ONLY_VISIBLE ) ) ); @@ -3167,7 +3167,7 @@ public void testMarkSegmentsAsUnusedWithinIntervalOneYear() throws IOException coordinator.retrieveUnusedSegmentsForInterval( existingSegment1.getDataSource(), existingSegment1.getInterval().withEnd(existingSegment1.getInterval().getEnd().plus(1)), - null, + null, null, null ) ) @@ -3178,7 +3178,7 @@ public void testMarkSegmentsAsUnusedWithinIntervalOneYear() throws IOException coordinator.retrieveUnusedSegmentsForInterval( existingSegment2.getDataSource(), existingSegment2.getInterval().withEnd(existingSegment2.getInterval().getEnd().plusYears(1)), - null, + null, null, null ) ) @@ -3203,7 +3203,7 @@ public void testMarkSegmentsAsUnusedWithinIntervalTwoYears() throws IOException coordinator.retrieveUnusedSegmentsForInterval( existingSegment1.getDataSource(), existingSegment1.getInterval().withEnd(existingSegment1.getInterval().getEnd().plus(1)), - null, + null, null, null ) ) @@ -3214,7 +3214,7 @@ public void testMarkSegmentsAsUnusedWithinIntervalTwoYears() throws IOException coordinator.retrieveUnusedSegmentsForInterval( existingSegment2.getDataSource(), existingSegment2.getInterval().withEnd(existingSegment2.getInterval().getEnd().plusYears(1)), - null, + null, null, null ) ) @@ -3496,7 +3496,9 @@ private ImmutableList retrieveUnusedSegments( derbyConnectorRule.metadataTablesConfigSupplier().get(), mapper ) - .retrieveUnusedSegments(DS.WIKI, intervals, limit, lastSegmentId, sortOrder, maxUsedStatusLastUpdatedTime)) { + .retrieveUnusedSegments(DS.WIKI, intervals, + null, + limit, lastSegmentId, sortOrder, maxUsedStatusLastUpdatedTime)) { return ImmutableList.copyOf(iterator); } } @@ -3520,7 +3522,7 @@ private ImmutableList retrieveUnusedSegmentsPlus( derbyConnectorRule.metadataTablesConfigSupplier().get(), mapper ) - .retrieveUnusedSegmentsPlus(DS.WIKI, intervals, limit, lastSegmentId, sortOrder, maxUsedStatusLastUpdatedTime)) { + .retrieveUnusedSegmentsPlus(DS.WIKI, intervals, null, limit, lastSegmentId, sortOrder, maxUsedStatusLastUpdatedTime)) { return ImmutableList.copyOf(iterator); } } diff --git a/server/src/test/java/org/apache/druid/rpc/indexing/OverlordClientImplTest.java b/server/src/test/java/org/apache/druid/rpc/indexing/OverlordClientImplTest.java index cad3f21f06a2..047f94b125ca 100644 --- a/server/src/test/java/org/apache/druid/rpc/indexing/OverlordClientImplTest.java +++ b/server/src/test/java/org/apache/druid/rpc/indexing/OverlordClientImplTest.java @@ -434,6 +434,7 @@ public void test_taskPayload() throws ExecutionException, InterruptedException, null, null, null, + null, null ); diff --git a/server/src/test/java/org/apache/druid/server/coordinator/duty/KillUnusedSegmentsTest.java b/server/src/test/java/org/apache/druid/server/coordinator/duty/KillUnusedSegmentsTest.java index 3c28b3aa4a92..7be4be97971e 100644 --- a/server/src/test/java/org/apache/druid/server/coordinator/duty/KillUnusedSegmentsTest.java +++ b/server/src/test/java/org/apache/druid/server/coordinator/duty/KillUnusedSegmentsTest.java @@ -843,6 +843,7 @@ public ListenableFuture runKillTask( String idPrefix, String dataSource, Interval interval, + @Nullable String version, @Nullable Integer maxSegmentsToKill, @Nullable DateTime maxUsedStatusLastUpdatedTime ) diff --git a/server/src/test/java/org/apache/druid/server/http/DataSourcesResourceTest.java b/server/src/test/java/org/apache/druid/server/http/DataSourcesResourceTest.java index 089cd2e1d29c..79184fdf3270 100644 --- a/server/src/test/java/org/apache/druid/server/http/DataSourcesResourceTest.java +++ b/server/src/test/java/org/apache/druid/server/http/DataSourcesResourceTest.java @@ -591,7 +591,7 @@ public void testKillSegmentsInIntervalInDataSource() Interval theInterval = Intervals.of(interval.replace('_', '/')); OverlordClient overlordClient = EasyMock.createStrictMock(OverlordClient.class); - EasyMock.expect(overlordClient.runKillTask("api-issued", "datasource1", theInterval, null, null)) + EasyMock.expect(overlordClient.runKillTask("api-issued", "datasource1", theInterval, null, null, null)) .andReturn(Futures.immediateFuture("kill_task_1")); EasyMock.replay(overlordClient, server); From 3cecbd8e9f54feed94eb897b21a64cd2b7c98252 Mon Sep 17 00:00:00 2001 From: Abhishek Balaji Radhakrishnan Date: Wed, 28 Feb 2024 17:26:33 +0800 Subject: [PATCH 02/15] Formatting changes. --- .../common/task/KillUnusedSegmentsTask.java | 29 +- .../actions/RetrieveSegmentsActionsTest.java | 48 +- .../common/task/CompactionTaskRunTest.java | 33 +- .../task/KillUnusedSegmentsTaskTest.java | 494 +++++++++--------- .../indexing/overlord/TaskLifecycleTest.java | 12 +- ...TestIndexerMetadataStorageCoordinator.java | 3 +- .../IndexerMetadataStorageCoordinator.java | 18 +- .../metadata/SqlSegmentsMetadataQuery.java | 33 +- ...exerSQLMetadataStorageCoordinatorTest.java | 85 +-- 9 files changed, 416 insertions(+), 339 deletions(-) diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java index 9a997bac808f..6ac6c3d485c3 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java @@ -83,6 +83,11 @@ public class KillUnusedSegmentsTask extends AbstractFixedIntervalTask */ private static final int DEFAULT_SEGMENT_NUKE_BATCH_SIZE = 100; + /** + * The version of segments to delete in this {@link #getInterval()}. + */ + @Nullable private final String version; + @Deprecated private final boolean markAsUnused; /** @@ -102,11 +107,6 @@ public class KillUnusedSegmentsTask extends AbstractFixedIntervalTask */ @Nullable private final DateTime maxUsedStatusLastUpdatedTime; - /** - * The version of segments to delete in this {@link #getInterval()}. - */ - @Nullable private final String version; - @JsonCreator public KillUnusedSegmentsTask( @JsonProperty("id") String id, @@ -145,6 +145,14 @@ public KillUnusedSegmentsTask( this.maxUsedStatusLastUpdatedTime = maxUsedStatusLastUpdatedTime; } + + @Nullable + @JsonProperty + public String getVersion() + { + return version; + } + /** * This field has been deprecated as "kill" tasks should not be responsible for * marking segments as unused. Instead, users should call the Coordinator API @@ -167,13 +175,6 @@ public int getBatchSize() return batchSize; } - @Nullable - @JsonProperty - public String getVersion() - { - return version; - } - @Nullable @JsonProperty public Integer getLimit() @@ -253,8 +254,8 @@ public TaskStatus runTask(TaskToolbox toolbox) throws Exception unusedSegments = toolbox .getTaskActionClient() .submit( - new RetrieveUnusedSegmentsAction(getDataSource(), getInterval(), getVersion(), nextBatchSize, maxUsedStatusLastUpdatedTime - )); + new RetrieveUnusedSegmentsAction(getDataSource(), getInterval(), getVersion(), nextBatchSize, maxUsedStatusLastUpdatedTime) + ); // Fetch locks each time as a revokal could have occurred in between batches final NavigableMap> taskLockMap diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/RetrieveSegmentsActionsTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/RetrieveSegmentsActionsTest.java index 5d80a4746445..b14b70df4110 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/RetrieveSegmentsActionsTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/RetrieveSegmentsActionsTest.java @@ -41,8 +41,8 @@ public class RetrieveSegmentsActionsTest { private static final Interval INTERVAL = Intervals.of("2017-10-01/2017-10-15"); - private static final String VERSION_1 = "1"; - private static final String VERSION_2 = "2"; + private static final String VERSION_UNUSED = "1"; + private static final String VERSION_USED = "2"; @ClassRule public static TaskActionTestKit actionTestKit = new TaskActionTestKit(); @@ -59,9 +59,9 @@ public static void setup() throws IOException actionTestKit.getTaskLockbox().add(task); expectedUnusedSegments = new HashSet<>(); - expectedUnusedSegments.add(createSegment(Intervals.of("2017-10-05/2017-10-06"), VERSION_1)); - expectedUnusedSegments.add(createSegment(Intervals.of("2017-10-06/2017-10-07"), VERSION_1)); - expectedUnusedSegments.add(createSegment(Intervals.of("2017-10-07/2017-10-08"), VERSION_1)); + expectedUnusedSegments.add(createSegment(Intervals.of("2017-10-05/2017-10-06"), VERSION_UNUSED)); + expectedUnusedSegments.add(createSegment(Intervals.of("2017-10-06/2017-10-07"), VERSION_UNUSED)); + expectedUnusedSegments.add(createSegment(Intervals.of("2017-10-07/2017-10-08"), VERSION_UNUSED)); actionTestKit.getMetadataStorageCoordinator() .commitSegments(expectedUnusedSegments); @@ -69,9 +69,9 @@ public static void setup() throws IOException expectedUnusedSegments.forEach(s -> actionTestKit.getTaskLockbox().unlock(task, s.getInterval())); expectedUsedSegments = new HashSet<>(); - expectedUsedSegments.add(createSegment(Intervals.of("2017-10-05/2017-10-06"), VERSION_2)); - expectedUsedSegments.add(createSegment(Intervals.of("2017-10-06/2017-10-07"), VERSION_2)); - expectedUsedSegments.add(createSegment(Intervals.of("2017-10-07/2017-10-08"), VERSION_2)); + expectedUsedSegments.add(createSegment(Intervals.of("2017-10-05/2017-10-06"), VERSION_USED)); + expectedUsedSegments.add(createSegment(Intervals.of("2017-10-06/2017-10-07"), VERSION_USED)); + expectedUsedSegments.add(createSegment(Intervals.of("2017-10-07/2017-10-08"), VERSION_USED)); actionTestKit.getMetadataStorageCoordinator() .commitSegments(expectedUsedSegments); @@ -106,37 +106,37 @@ public void testRetrieveUsedSegmentsAction() } @Test - public void testRetrieveUsedSegmentsActionWithValidVersionIncludingOvershadowed() + public void testRetrieveUsedSegmentsActionWithValidVersionVisibleOnly() { final RetrieveUsedSegmentsAction action = - new RetrieveUsedSegmentsAction(task.getDataSource(), null, ImmutableList.of(INTERVAL), VERSION_2, Segments.INCLUDING_OVERSHADOWED); + new RetrieveUsedSegmentsAction(task.getDataSource(), null, ImmutableList.of(INTERVAL), VERSION_USED, Segments.ONLY_VISIBLE); final Set resultSegments = new HashSet<>(action.perform(task, actionTestKit.getTaskActionToolbox())); Assert.assertEquals(expectedUsedSegments, resultSegments); } @Test - public void testRetrieveUsedSegmentsActionWithNonExistentVersionVisibleOnly() + public void testRetrieveUsedSegmentsActionWithValidVersionIncludingOvershadowed() { final RetrieveUsedSegmentsAction action = - new RetrieveUsedSegmentsAction(task.getDataSource(), null, ImmutableList.of(INTERVAL), VERSION_1, Segments.ONLY_VISIBLE); + new RetrieveUsedSegmentsAction(task.getDataSource(), null, ImmutableList.of(INTERVAL), VERSION_USED, Segments.INCLUDING_OVERSHADOWED); final Set resultSegments = new HashSet<>(action.perform(task, actionTestKit.getTaskActionToolbox())); - Assert.assertEquals(ImmutableSet.of(), resultSegments); + Assert.assertEquals(expectedUsedSegments, resultSegments); } @Test - public void testRetrieveUsedSegmentsActionWithValidVersionVisibleOnly() + public void testRetrieveUsedSegmentsActionWithNonExistentVersionVisibleOnly() { final RetrieveUsedSegmentsAction action = - new RetrieveUsedSegmentsAction(task.getDataSource(), null, ImmutableList.of(INTERVAL), VERSION_2, Segments.ONLY_VISIBLE); + new RetrieveUsedSegmentsAction(task.getDataSource(), null, ImmutableList.of(INTERVAL), VERSION_UNUSED, Segments.ONLY_VISIBLE); final Set resultSegments = new HashSet<>(action.perform(task, actionTestKit.getTaskActionToolbox())); - Assert.assertEquals(expectedUsedSegments, resultSegments); + Assert.assertEquals(ImmutableSet.of(), resultSegments); } @Test public void testRetrieveUsedSegmentsActionWithNonExistentVersionIncludingOvershadowed() { final RetrieveUsedSegmentsAction action = - new RetrieveUsedSegmentsAction(task.getDataSource(), null, ImmutableList.of(INTERVAL), VERSION_1, Segments.INCLUDING_OVERSHADOWED); + new RetrieveUsedSegmentsAction(task.getDataSource(), null, ImmutableList.of(INTERVAL), VERSION_UNUSED, Segments.INCLUDING_OVERSHADOWED); final Set resultSegments = new HashSet<>(action.perform(task, actionTestKit.getTaskActionToolbox())); Assert.assertEquals(ImmutableSet.of(), resultSegments); } @@ -144,25 +144,27 @@ public void testRetrieveUsedSegmentsActionWithNonExistentVersionIncludingOversha @Test public void testRetrieveUnusedSegmentsActionWithValidVersion() { - final RetrieveUnusedSegmentsAction action = new RetrieveUnusedSegmentsAction(task.getDataSource(), INTERVAL, null, null, null); + final RetrieveUnusedSegmentsAction action = + new RetrieveUnusedSegmentsAction(task.getDataSource(), INTERVAL, VERSION_UNUSED, null, null); final Set resultSegments = new HashSet<>(action.perform(task, actionTestKit.getTaskActionToolbox())); Assert.assertEquals(expectedUnusedSegments, resultSegments); } @Test - public void testRetrieveUnusedSegmentsActionWithMinUsedLastUpdatedTime() + public void testRetrieveUnusedSegmentsActionWithNonExistentVersion() { - final RetrieveUnusedSegmentsAction action = new RetrieveUnusedSegmentsAction(task.getDataSource(), INTERVAL, null, null, DateTimes.MIN); + final RetrieveUnusedSegmentsAction action = + new RetrieveUnusedSegmentsAction(task.getDataSource(), INTERVAL, VERSION_USED, null, null); final Set resultSegments = new HashSet<>(action.perform(task, actionTestKit.getTaskActionToolbox())); Assert.assertEquals(ImmutableSet.of(), resultSegments); } @Test - public void testRetrieveUnusedSegmentsActionWithVersion() + public void testRetrieveUnusedSegmentsActionWithMinUsedLastUpdatedTime() { - final RetrieveUnusedSegmentsAction action = new RetrieveUnusedSegmentsAction(task.getDataSource(), INTERVAL, VERSION_1, null, null); + final RetrieveUnusedSegmentsAction action = new RetrieveUnusedSegmentsAction(task.getDataSource(), INTERVAL, null, null, DateTimes.MIN); final Set resultSegments = new HashSet<>(action.perform(task, actionTestKit.getTaskActionToolbox())); - Assert.assertEquals(expectedUnusedSegments, resultSegments); + Assert.assertEquals(ImmutableSet.of(), resultSegments); } @Test diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskRunTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskRunTest.java index 0bda569ac0d0..32e260d918d2 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskRunTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskRunTest.java @@ -1119,7 +1119,8 @@ public void testCompactThenAppend() throws Exception getStorageCoordinator().retrieveUsedSegmentsForIntervals( DATA_SOURCE, Collections.singletonList(Intervals.of("2014-01-01/2014-01-02")), - null, Segments.ONLY_VISIBLE + null, + Segments.ONLY_VISIBLE ) ); @@ -1185,7 +1186,8 @@ public void testPartialIntervalCompactWithFinerSegmentGranularityThenFullInterva getStorageCoordinator().retrieveUsedSegmentsForIntervals( DATA_SOURCE, Collections.singletonList(Intervals.of("2014-01-01T00:00:00/2014-01-01T01:00:00")), - null, Segments.ONLY_VISIBLE + null, + Segments.ONLY_VISIBLE ) ); // add 2 unchanged segments for hour 02: @@ -1193,7 +1195,8 @@ public void testPartialIntervalCompactWithFinerSegmentGranularityThenFullInterva getStorageCoordinator().retrieveUsedSegmentsForIntervals( DATA_SOURCE, Collections.singletonList(Intervals.of("2014-01-01T02:00:00/2014-01-01T03:00:00")), - null, Segments.ONLY_VISIBLE + null, + Segments.ONLY_VISIBLE ) ); expectedSegments.addAll(partialCompactionResult.rhs); @@ -1205,7 +1208,8 @@ public void testPartialIntervalCompactWithFinerSegmentGranularityThenFullInterva getStorageCoordinator().retrieveUsedSegmentsForIntervals( DATA_SOURCE, Collections.singletonList(Intervals.of("2014-01-01/2014-01-02")), - null, Segments.ONLY_VISIBLE + null, + Segments.ONLY_VISIBLE ) ); Assert.assertEquals(expectedSegments, segmentsAfterPartialCompaction); @@ -1246,7 +1250,8 @@ public void testPartialIntervalCompactWithFinerSegmentGranularityThenFullInterva getStorageCoordinator().retrieveUsedSegmentsForIntervals( DATA_SOURCE, Collections.singletonList(Intervals.of("2014-01-01/2014-01-02")), - null, Segments.ONLY_VISIBLE + null, + Segments.ONLY_VISIBLE ) ); segmentsAfterFullCompaction.sort( @@ -1341,7 +1346,8 @@ public void testCompactDatasourceOverIntervalWithOnlyTombstones() throws Excepti getStorageCoordinator().retrieveUsedSegmentsForIntervals( DATA_SOURCE, Collections.singletonList(Intervals.of("2014-01-01T00:00:00/2014-01-01T01:00:00")), - null, Segments.ONLY_VISIBLE + null, + Segments.ONLY_VISIBLE ) ); // add 2 unchanged segments for hour 02: @@ -1349,7 +1355,8 @@ public void testCompactDatasourceOverIntervalWithOnlyTombstones() throws Excepti getStorageCoordinator().retrieveUsedSegmentsForIntervals( DATA_SOURCE, Collections.singletonList(Intervals.of("2014-01-01T02:00:00/2014-01-01T03:00:00")), - null, Segments.ONLY_VISIBLE + null, + Segments.ONLY_VISIBLE ) ); expectedSegments.addAll(partialCompactionResult.rhs); @@ -1361,7 +1368,8 @@ public void testCompactDatasourceOverIntervalWithOnlyTombstones() throws Excepti getStorageCoordinator().retrieveUsedSegmentsForIntervals( DATA_SOURCE, Collections.singletonList(Intervals.of("2014-01-01/2014-01-02")), - null, Segments.ONLY_VISIBLE + null, + Segments.ONLY_VISIBLE ) ); Assert.assertEquals(expectedSegments, segmentsAfterPartialCompaction); @@ -1410,7 +1418,8 @@ public void testPartialIntervalCompactWithFinerSegmentGranularityThenFullInterva getStorageCoordinator().retrieveUsedSegmentsForIntervals( DATA_SOURCE, Collections.singletonList(Intervals.of("2014-01-01/2014-01-02")), - null, Segments.ONLY_VISIBLE + null, + Segments.ONLY_VISIBLE ) ); @@ -1436,7 +1445,8 @@ public void testPartialIntervalCompactWithFinerSegmentGranularityThenFullInterva getStorageCoordinator().retrieveUsedSegmentsForIntervals( DATA_SOURCE, Collections.singletonList(Intervals.of("2014-01-01/2014-01-02")), - null, Segments.ONLY_VISIBLE + null, + Segments.ONLY_VISIBLE ) ); @@ -1455,7 +1465,8 @@ public void testPartialIntervalCompactWithFinerSegmentGranularityThenFullInterva getStorageCoordinator().retrieveUsedSegmentsForIntervals( DATA_SOURCE, Collections.singletonList(Intervals.of("2014-01-01/2014-01-02")), - null, Segments.ONLY_VISIBLE + null, + Segments.ONLY_VISIBLE ) ); segmentsAfterFullCompaction.sort( diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTaskTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTaskTest.java index 0dcac0480bbc..bb02b3702155 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTaskTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTaskTest.java @@ -91,7 +91,8 @@ public void testKill() throws Exception null, DATA_SOURCE, Intervals.of("2019-03-01/2019-04-01"), - null, null, + null, + null, false, null, null, @@ -103,7 +104,8 @@ public void testKill() throws Exception final List unusedSegments = getMetadataStorageCoordinator().retrieveUnusedSegmentsForInterval( DATA_SOURCE, Intervals.of("2019/2020"), - null, null, + null, + null, null ); @@ -146,7 +148,8 @@ public void testKillWithMarkUnused() throws Exception null, DATA_SOURCE, Intervals.of("2019-03-01/2019-04-01"), - null, null, + null, + null, true, null, null, @@ -159,7 +162,8 @@ public void testKillWithMarkUnused() throws Exception getMetadataStorageCoordinator().retrieveUnusedSegmentsForInterval( DATA_SOURCE, Intervals.of("2019/2020"), - null, null, + null, + null, null ); @@ -175,6 +179,214 @@ public void testKillWithMarkUnused() throws Exception Assert.assertEquals(new KillTaskReport.Stats(1, 2, 1), getReportedStats()); } + + @Test + public void testKillVersion() throws Exception + { + final DateTime version = DateTimes.nowUtc(); + final DataSegment segment1 = newSegment(Intervals.of("2019-01-01/2019-02-01"), version.toString()); + final DataSegment segment2 = newSegment(Intervals.of("2019-02-01/2019-03-01"), version.minusHours(1).toString()); + final DataSegment segment3 = newSegment(Intervals.of("2019-03-01/2019-04-01"), version.minusHours(2).toString()); + final DataSegment segment4 = newSegment(Intervals.of("2019-04-01/2019-05-01"), version.minusHours(3).toString()); + + final Set segments = ImmutableSet.of(segment1, segment2, segment3, segment4); + + Assert.assertEquals( + segments, + getMetadataStorageCoordinator().commitSegments(segments) + ); + Assert.assertEquals( + segments.size(), + getSegmentsMetadataManager().markSegmentsAsUnused( + segments.stream().map(DataSegment::getId).collect(Collectors.toSet()) + ) + ); + + final KillUnusedSegmentsTask task = new KillUnusedSegmentsTask( + null, + DATA_SOURCE, + Intervals.of("2018/2020"), + segment3.getVersion(), + null, + false, + 3, + null, + null + ); + + Assert.assertEquals(TaskState.SUCCESS, taskRunner.run(task).get().getStatusCode()); + Assert.assertEquals( + new KillTaskReport.Stats(1, 2, 0), + getReportedStats() + ); + + final List unusedSegments = getMetadataStorageCoordinator().retrieveUnusedSegmentsForInterval( + DATA_SOURCE, + Intervals.of("2018/2020"), + null, + null, + null + ); + + Assert.assertEquals(Arrays.asList(segment1, segment2, segment4), unusedSegments); + } + + @Test + public void testKillMultipleSegmentsWithVersion() throws Exception + { + final DateTime version = DateTimes.nowUtc(); + final DataSegment segment1 = newSegment(Intervals.of("2019-01-01/2019-02-01"), version.toString()); + final DataSegment segment2 = newSegment(Intervals.of("2019-02-01/2019-03-01"), version.toString()); + final DataSegment segment3 = newSegment(Intervals.of("2019-03-01/2019-04-01"), version.toString()); + final DataSegment segment4 = newSegment(Intervals.of("2019-01-01/2019-02-01"), version.minusHours(2).toString()); + final DataSegment segment5 = newSegment(Intervals.of("2019-01-01/2019-02-01"), version.minusHours(3).toString()); + + final Set segments = ImmutableSet.of(segment1, segment2, segment3, segment4, segment5); + + Assert.assertEquals( + segments, + getMetadataStorageCoordinator().commitSegments(segments) + ); + Assert.assertEquals( + segments.size(), + getSegmentsMetadataManager().markSegmentsAsUnused( + segments.stream().map(DataSegment::getId).collect(Collectors.toSet()) + ) + ); + + final KillUnusedSegmentsTask task = new KillUnusedSegmentsTask( + null, + DATA_SOURCE, + Intervals.of("2018/2020"), + version.toString(), + null, + false, + 3, + null, + null + ); + + Assert.assertEquals(TaskState.SUCCESS, taskRunner.run(task).get().getStatusCode()); + Assert.assertEquals( + new KillTaskReport.Stats(3, 2, 0), + getReportedStats() + ); + + final List actualUnusedSegments = getMetadataStorageCoordinator().retrieveUnusedSegmentsForInterval( + DATA_SOURCE, + Intervals.of("2018/2020"), + null, + null, + null + ); + + Assert.assertEquals(ImmutableSet.of(segment4, segment5), new HashSet<>(actualUnusedSegments)); + } + + @Test + public void testKillMultipleSegmentsWithVersionAndLimit() throws Exception + { + final DateTime version = DateTimes.nowUtc(); + final DataSegment segment1 = newSegment(Intervals.of("2019-01-01/2019-02-01"), version.toString()); + final DataSegment segment2 = newSegment(Intervals.of("2019-02-01/2019-03-01"), version.toString()); + final DataSegment segment3 = newSegment(Intervals.of("2019-03-01/2019-04-01"), version.toString()); + final DataSegment segment4 = newSegment(Intervals.of("2019-01-01/2019-02-01"), version.minusHours(2).toString()); + final DataSegment segment5 = newSegment(Intervals.of("2019-01-01/2019-02-01"), version.minusHours(3).toString()); + + final Set segments = ImmutableSet.of(segment1, segment2, segment3, segment4, segment5); + + Assert.assertEquals( + segments, + getMetadataStorageCoordinator().commitSegments(segments) + ); + Assert.assertEquals( + segments.size(), + getSegmentsMetadataManager().markSegmentsAsUnused( + segments.stream().map(DataSegment::getId).collect(Collectors.toSet()) + ) + ); + + final KillUnusedSegmentsTask task = new KillUnusedSegmentsTask( + null, + DATA_SOURCE, + Intervals.of("2018/2020"), + version.toString(), + null, + false, + 3, + 2, + null + ); + + Assert.assertEquals(TaskState.SUCCESS, taskRunner.run(task).get().getStatusCode()); + Assert.assertEquals( + new KillTaskReport.Stats(2, 1, 0), + getReportedStats() + ); + + final List actualUnusedSegments = getMetadataStorageCoordinator().retrieveUnusedSegmentsForInterval( + DATA_SOURCE, + Intervals.of("2018/2020"), + null, + null, + null + ); + + Assert.assertEquals(ImmutableSet.of(segment3, segment4, segment5), new HashSet<>(actualUnusedSegments)); + } + + @Test + public void testKillWithNonExistentVersion() throws Exception + { + final DateTime version = DateTimes.nowUtc(); + final DataSegment segment1 = newSegment(Intervals.of("2019-01-01/2019-02-01"), version.toString()); + final DataSegment segment2 = newSegment(Intervals.of("2019-02-01/2019-03-01"), version.toString()); + final DataSegment segment3 = newSegment(Intervals.of("2019-03-01/2019-04-01"), version.toString()); + final DataSegment segment4 = newSegment(Intervals.of("2019-01-01/2019-02-01"), version.minusHours(2).toString()); + final DataSegment segment5 = newSegment(Intervals.of("2019-01-01/2019-02-01"), version.minusHours(3).toString()); + + final Set segments = ImmutableSet.of(segment1, segment2, segment3, segment4, segment5); + + Assert.assertEquals( + segments, + getMetadataStorageCoordinator().commitSegments(segments) + ); + Assert.assertEquals( + segments.size(), + getSegmentsMetadataManager().markSegmentsAsUnused( + segments.stream().map(DataSegment::getId).collect(Collectors.toSet()) + ) + ); + + final KillUnusedSegmentsTask task = new KillUnusedSegmentsTask( + null, + DATA_SOURCE, + Intervals.of("2018/2020"), + version.plusDays(100).toString(), + null, + false, + 3, + 2, + null + ); + + Assert.assertEquals(TaskState.SUCCESS, taskRunner.run(task).get().getStatusCode()); + Assert.assertEquals( + new KillTaskReport.Stats(0, 1, 0), + getReportedStats() + ); + + final List actualUnusedSegments = getMetadataStorageCoordinator().retrieveUnusedSegmentsForInterval( + DATA_SOURCE, + Intervals.of("2018/2020"), + null, + null, + null + ); + + Assert.assertEquals(segments, new HashSet<>(actualUnusedSegments)); + } + @Test public void testGetInputSourceResources() { @@ -183,7 +395,8 @@ public void testGetInputSourceResources() null, DATA_SOURCE, Intervals.of("2019-03-01/2019-04-01"), - null, null, + null, + null, true, null, null, @@ -219,7 +432,8 @@ public void testKillBatchSizeOneAndLimit4() throws Exception null, DATA_SOURCE, Intervals.of("2018-01-01/2020-01-01"), - null, null, + null, + null, false, 1, 4, @@ -234,7 +448,8 @@ public void testKillBatchSizeOneAndLimit4() throws Exception getMetadataStorageCoordinator().retrieveUnusedSegmentsForInterval( DATA_SOURCE, Intervals.of("2019/2020"), - null, null, + null, + null, null ); @@ -297,7 +512,8 @@ public void testKillMultipleUnusedSegmentsWithNullMaxUsedStatusLastUpdatedTime() null, DATA_SOURCE, umbrellaInterval, - null, null, + null, + null, false, 1, 10, @@ -390,7 +606,8 @@ public void testKillMultipleUnusedSegmentsWithDifferentMaxUsedStatusLastUpdatedT null, DATA_SOURCE, umbrellaInterval, - null, null, + null, + null, false, 1, 10, @@ -416,7 +633,8 @@ public void testKillMultipleUnusedSegmentsWithDifferentMaxUsedStatusLastUpdatedT null, DATA_SOURCE, umbrellaInterval, - null, null, + null, + null, false, 1, 10, @@ -429,7 +647,8 @@ public void testKillMultipleUnusedSegmentsWithDifferentMaxUsedStatusLastUpdatedT getMetadataStorageCoordinator().retrieveUnusedSegmentsForInterval( DATA_SOURCE, umbrellaInterval, - null, null, + null, + null, null ); @@ -504,7 +723,8 @@ public void testKillMultipleUnusedSegmentsWithDifferentMaxUsedStatusLastUpdatedT null, DATA_SOURCE, umbrellaInterval, - null, null, + null, + null, false, 1, 10, @@ -517,7 +737,8 @@ public void testKillMultipleUnusedSegmentsWithDifferentMaxUsedStatusLastUpdatedT getMetadataStorageCoordinator().retrieveUnusedSegmentsForInterval( DATA_SOURCE, umbrellaInterval, - null, null, + null, + null, null ); @@ -529,7 +750,8 @@ public void testKillMultipleUnusedSegmentsWithDifferentMaxUsedStatusLastUpdatedT null, DATA_SOURCE, umbrellaInterval, - null, null, + null, + null, false, 1, 10, @@ -541,16 +763,17 @@ public void testKillMultipleUnusedSegmentsWithDifferentMaxUsedStatusLastUpdatedT final List unusedSegments2 = getMetadataStorageCoordinator().retrieveUnusedSegmentsForInterval( DATA_SOURCE, umbrellaInterval, - null, null, + null, + null, null - ); + ); Assert.assertEquals(ImmutableList.of(), unusedSegments2); Assert.assertEquals(new KillTaskReport.Stats(2, 3, 0), getReportedStats()); } @Test - public void testKillMultipleUnusedSegmentsWithVersionAndDifferentMaxUsedStatusLastUpdatedTime() throws Exception + public void testKillMultipleUnusedSegmentsWithVersionAndDifferentLastUpdatedTime() throws Exception { final DateTime version = DateTimes.nowUtc(); final DataSegment segment1 = newSegment(Intervals.of("2019-01-01/2019-02-01"), version.toString()); @@ -694,213 +917,6 @@ public void testKillBatchSizeThree() throws Exception Assert.assertEquals(new KillTaskReport.Stats(4, 3, 4), getReportedStats()); } - @Test - public void testKillVersion() throws Exception - { - final DateTime version = DateTimes.nowUtc(); - final DataSegment segment1 = newSegment(Intervals.of("2019-01-01/2019-02-01"), version.toString()); - final DataSegment segment2 = newSegment(Intervals.of("2019-02-01/2019-03-01"), version.minusHours(1).toString()); - final DataSegment segment3 = newSegment(Intervals.of("2019-03-01/2019-04-01"), version.minusHours(2).toString()); - final DataSegment segment4 = newSegment(Intervals.of("2019-04-01/2019-05-01"), version.minusHours(3).toString()); - - final Set segments = ImmutableSet.of(segment1, segment2, segment3, segment4); - - Assert.assertEquals( - segments, - getMetadataStorageCoordinator().commitSegments(segments) - ); - Assert.assertEquals( - segments.size(), - getSegmentsMetadataManager().markSegmentsAsUnused( - segments.stream().map(DataSegment::getId).collect(Collectors.toSet()) - ) - ); - - final KillUnusedSegmentsTask task = new KillUnusedSegmentsTask( - null, - DATA_SOURCE, - Intervals.of("2018/2020"), - segment3.getVersion(), - null, - false, - 3, - null, - null - ); - - Assert.assertEquals(TaskState.SUCCESS, taskRunner.run(task).get().getStatusCode()); - Assert.assertEquals( - new KillTaskReport.Stats(1, 2, 0), - getReportedStats() - ); - - final List unusedSegments = getMetadataStorageCoordinator().retrieveUnusedSegmentsForInterval( - DATA_SOURCE, - Intervals.of("2018/2020"), - null, - null, - null - ); - - Assert.assertEquals(Arrays.asList(segment1, segment2, segment4), unusedSegments); - } - - @Test - public void testKillMultipleSegmentsWithVersion() throws Exception - { - final DateTime version = DateTimes.nowUtc(); - final DataSegment segment1 = newSegment(Intervals.of("2019-01-01/2019-02-01"), version.toString()); - final DataSegment segment2 = newSegment(Intervals.of("2019-02-01/2019-03-01"), version.toString()); - final DataSegment segment3 = newSegment(Intervals.of("2019-03-01/2019-04-01"), version.toString()); - final DataSegment segment4 = newSegment(Intervals.of("2019-01-01/2019-02-01"), version.minusHours(2).toString()); - final DataSegment segment5 = newSegment(Intervals.of("2019-01-01/2019-02-01"), version.minusHours(3).toString()); - - final Set segments = ImmutableSet.of(segment1, segment2, segment3, segment4, segment5); - - Assert.assertEquals( - segments, - getMetadataStorageCoordinator().commitSegments(segments) - ); - Assert.assertEquals( - segments.size(), - getSegmentsMetadataManager().markSegmentsAsUnused( - segments.stream().map(DataSegment::getId).collect(Collectors.toSet()) - ) - ); - - final KillUnusedSegmentsTask task = new KillUnusedSegmentsTask( - null, - DATA_SOURCE, - Intervals.of("2018/2020"), - version.toString(), - null, - false, - 3, - null, - null - ); - - Assert.assertEquals(TaskState.SUCCESS, taskRunner.run(task).get().getStatusCode()); - Assert.assertEquals( - new KillTaskReport.Stats(3, 2, 0), - getReportedStats() - ); - - final List actualUnusedSegments = getMetadataStorageCoordinator().retrieveUnusedSegmentsForInterval( - DATA_SOURCE, - Intervals.of("2018/2020"), - null, - null, - null - ); - - Assert.assertEquals(ImmutableSet.of(segment4, segment5), new HashSet<>(actualUnusedSegments)); - } - - @Test - public void testKillMultipleSegmentsWithVersionAndLimit() throws Exception - { - final DateTime version = DateTimes.nowUtc(); - final DataSegment segment1 = newSegment(Intervals.of("2019-01-01/2019-02-01"), version.toString()); - final DataSegment segment2 = newSegment(Intervals.of("2019-02-01/2019-03-01"), version.toString()); - final DataSegment segment3 = newSegment(Intervals.of("2019-03-01/2019-04-01"), version.toString()); - final DataSegment segment4 = newSegment(Intervals.of("2019-01-01/2019-02-01"), version.minusHours(2).toString()); - final DataSegment segment5 = newSegment(Intervals.of("2019-01-01/2019-02-01"), version.minusHours(3).toString()); - - final Set segments = ImmutableSet.of(segment1, segment2, segment3, segment4, segment5); - - Assert.assertEquals( - segments, - getMetadataStorageCoordinator().commitSegments(segments) - ); - Assert.assertEquals( - segments.size(), - getSegmentsMetadataManager().markSegmentsAsUnused( - segments.stream().map(DataSegment::getId).collect(Collectors.toSet()) - ) - ); - - final KillUnusedSegmentsTask task = new KillUnusedSegmentsTask( - null, - DATA_SOURCE, - Intervals.of("2018/2020"), - version.toString(), - null, - false, - 3, - 2, - null - ); - - Assert.assertEquals(TaskState.SUCCESS, taskRunner.run(task).get().getStatusCode()); - Assert.assertEquals( - new KillTaskReport.Stats(2, 1, 0), - getReportedStats() - ); - - final List actualUnusedSegments = getMetadataStorageCoordinator().retrieveUnusedSegmentsForInterval( - DATA_SOURCE, - Intervals.of("2018/2020"), - null, - null, - null - ); - - Assert.assertEquals(ImmutableSet.of(segment3, segment4, segment5), new HashSet<>(actualUnusedSegments)); - } - - @Test - public void testKillWithNonExistentVersion() throws Exception - { - final DateTime version = DateTimes.nowUtc(); - final DataSegment segment1 = newSegment(Intervals.of("2019-01-01/2019-02-01"), version.toString()); - final DataSegment segment2 = newSegment(Intervals.of("2019-02-01/2019-03-01"), version.toString()); - final DataSegment segment3 = newSegment(Intervals.of("2019-03-01/2019-04-01"), version.toString()); - final DataSegment segment4 = newSegment(Intervals.of("2019-01-01/2019-02-01"), version.minusHours(2).toString()); - final DataSegment segment5 = newSegment(Intervals.of("2019-01-01/2019-02-01"), version.minusHours(3).toString()); - - final Set segments = ImmutableSet.of(segment1, segment2, segment3, segment4, segment5); - - Assert.assertEquals( - segments, - getMetadataStorageCoordinator().commitSegments(segments) - ); - Assert.assertEquals( - segments.size(), - getSegmentsMetadataManager().markSegmentsAsUnused( - segments.stream().map(DataSegment::getId).collect(Collectors.toSet()) - ) - ); - - final KillUnusedSegmentsTask task = new KillUnusedSegmentsTask( - null, - DATA_SOURCE, - Intervals.of("2018/2020"), - version.plusDays(100).toString(), - null, - false, - 3, - 2, - null - ); - - Assert.assertEquals(TaskState.SUCCESS, taskRunner.run(task).get().getStatusCode()); - Assert.assertEquals( - new KillTaskReport.Stats(0, 1, 0), - getReportedStats() - ); - - final List actualUnusedSegments = getMetadataStorageCoordinator().retrieveUnusedSegmentsForInterval( - DATA_SOURCE, - Intervals.of("2018/2020"), - null, - null, - null - ); - - Assert.assertEquals(segments, new HashSet<>(actualUnusedSegments)); - } - @Test public void testComputeNextBatchSizeDefault() { @@ -909,7 +925,8 @@ public void testComputeNextBatchSizeDefault() null, DATA_SOURCE, Intervals.of("2018-01-01/2020-01-01"), - null, null, + null, + null, false, null, null, @@ -926,7 +943,8 @@ public void testComputeNextBatchSizeWithBatchSizeLargerThanLimit() null, DATA_SOURCE, Intervals.of("2018-01-01/2020-01-01"), - null, null, + null, + null, false, 10, 5, @@ -943,7 +961,8 @@ public void testComputeNextBatchSizeWithBatchSizeSmallerThanLimit() null, DATA_SOURCE, Intervals.of("2018-01-01/2020-01-01"), - null, null, + null, + null, false, 5, 10, @@ -960,7 +979,8 @@ public void testComputeNextBatchSizeWithRemainingLessThanLimit() null, DATA_SOURCE, Intervals.of("2018-01-01/2020-01-01"), - null, null, + null, + null, false, 5, 10, @@ -977,7 +997,8 @@ public void testGetNumTotalBatchesDefault() null, DATA_SOURCE, Intervals.of("2018-01-01/2020-01-01"), - null, null, + null, + null, false, null, null, @@ -994,7 +1015,8 @@ public void testGetNumTotalBatchesWithBatchSizeLargerThanLimit() null, DATA_SOURCE, Intervals.of("2018-01-01/2020-01-01"), - null, null, + null, + null, false, 10, 5, @@ -1013,7 +1035,8 @@ public void testInvalidLimit() null, DATA_SOURCE, Intervals.of("2018-01-01/2020-01-01"), - null, null, + null, + null, false, 10, 0, @@ -1036,7 +1059,8 @@ public void testInvalidBatchSize() null, DATA_SOURCE, Intervals.of("2018-01-01/2020-01-01"), - null, null, + null, + null, false, 0, 10, @@ -1059,7 +1083,8 @@ public void testInvalidLimitWithMarkAsUnused() null, DATA_SOURCE, Intervals.of("2018-01-01/2020-01-01"), - null, null, + null, + null, true, 10, 10, @@ -1104,7 +1129,8 @@ public void testGetNumTotalBatchesWithBatchSizeSmallerThanLimit() null, DATA_SOURCE, Intervals.of("2018-01-01/2020-01-01"), - null, null, + null, + null, false, 5, 10, diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskLifecycleTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskLifecycleTest.java index aa334b6f467d..f7ba8052211c 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskLifecycleTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskLifecycleTest.java @@ -908,7 +908,8 @@ public DataSegment apply(String input) final List unusedSegments = mdc.retrieveUnusedSegmentsForInterval( "test_kill_task", Intervals.of("2011-04-01/P4D"), - null, null, + null, + null, null ); for (DataSegment segment : unusedSegments) { @@ -923,7 +924,8 @@ public DataSegment apply(String input) null, "test_kill_task", Intervals.of("2011-04-01/P4D"), - null, null, + null, + null, false, null, null, @@ -1005,7 +1007,8 @@ public DataSegment apply(String input) final List unusedSegments = mdc.retrieveUnusedSegmentsForInterval( "test_kill_task", Intervals.of("2011-04-01/P4D"), - null, null, + null, + null, null ); for (DataSegment segment : unusedSegments) { @@ -1021,7 +1024,8 @@ public DataSegment apply(String input) null, "test_kill_task", Intervals.of("2011-04-01/P4D"), - null, null, + null, + null, false, null, maxSegmentsToKill, diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/test/TestIndexerMetadataStorageCoordinator.java b/indexing-service/src/test/java/org/apache/druid/indexing/test/TestIndexerMetadataStorageCoordinator.java index 0fa7b2df275f..dcd76cb3cc08 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/test/TestIndexerMetadataStorageCoordinator.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/test/TestIndexerMetadataStorageCoordinator.java @@ -108,7 +108,8 @@ public List retrieveUsedSegmentsForIntervals( public List retrieveUnusedSegmentsForInterval( String dataSource, Interval interval, - String version, @Nullable Integer limit, + @Nullable String version, + @Nullable Integer limit, @Nullable DateTime maxUsedStatusLastUpdatedTime ) { diff --git a/server/src/main/java/org/apache/druid/indexing/overlord/IndexerMetadataStorageCoordinator.java b/server/src/main/java/org/apache/druid/indexing/overlord/IndexerMetadataStorageCoordinator.java index 13dd944f7031..a01243b5944e 100644 --- a/server/src/main/java/org/apache/druid/indexing/overlord/IndexerMetadataStorageCoordinator.java +++ b/server/src/main/java/org/apache/druid/indexing/overlord/IndexerMetadataStorageCoordinator.java @@ -105,17 +105,20 @@ default Collection retrieveUsedSegmentsForInterval( *

* The order of segments within the returned collection is unspecified, but each segment is guaranteed to appear in * the collection only once. + *

* * @param dataSource The data source to query * @param intervals The intervals for which all applicable and used segments are requested. - * @param version The segment version to retrieve in the given {@code intervals} + * @param version An optional version of segments to retrieve in the given {@code intervals}. If unspecified, + * all versions of used segments in the {@code intervals} must be retrieved. * @param visibility Whether only visible or visible as well as overshadowed segments should be returned. The * visibility is considered within the specified intervals: that is, a segment which is visible * outside of the specified intervals, but overshadowed on the specified intervals will not be * returned if {@link Segments#ONLY_VISIBLE} is passed. See more precise description in the doc for * {@link Segments}. * @return The DataSegments which include data in the requested intervals. These segments may contain data outside the - * requested intervals. + * requested intervals. + * * @implNote This method doesn't return a {@link Set} because there may be an expectation that {@code Set.contains()} * is O(1) operation, while it's not the case for the returned collection unless it copies all segments into a new * {@link java.util.HashSet} or {@link com.google.common.collect.ImmutableSet} which may in turn be unnecessary in @@ -132,10 +135,11 @@ Collection retrieveUsedSegmentsForIntervals( * Retrieve all published segments which include ONLY data within the given interval and are marked as unused from the * metadata store. * - * @param dataSource The data source the segments belong to - * @param interval Filter the data segments to ones that include data in this interval exclusively. - * @param version The segment version to retrieve in the given {@code interval}. - * @param limit The maximum number of unused segments to retreive. If null, no limit is applied. + * @param dataSource The data source the segments belong to + * @param interval Filter the data segments to ones that include data in this interval exclusively. + * @param version An optional version of segments to retrieve in the given {@code interval}. If unspecified, all + * versions of unused segments in the {@code interval} must be retrieved. + * @param limit The maximum number of unused segments to retreive. If null, no limit is applied. * @param maxUsedStatusLastUpdatedTime The maximum {@code used_status_last_updated} time. Any unused segment in {@code interval} * with {@code used_status_last_updated} no later than this time will be included in the * kill task. Segments without {@code used_status_last_updated} time (due to an upgrade @@ -146,7 +150,7 @@ Collection retrieveUsedSegmentsForIntervals( List retrieveUnusedSegmentsForInterval( String dataSource, Interval interval, - String version, + @Nullable String version, @Nullable Integer limit, @Nullable DateTime maxUsedStatusLastUpdatedTime ); diff --git a/server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataQuery.java b/server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataQuery.java index d2ea22f906f8..09273ffa8cc3 100644 --- a/server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataQuery.java +++ b/server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataQuery.java @@ -112,7 +112,7 @@ public static SqlSegmentsMetadataQuery forHandle( * * This call does not return any information about realtime segments. * - * Returns a closeable iterator. You should close it when you are done. + * @return a closeable iterator. You should close it when you are done. */ public CloseableIterator retrieveUsedSegments( final String dataSource, @@ -139,22 +139,25 @@ public CloseableIterator retrieveUsedSegments( * unused segments. *

* This call does not return any information about realtime segments. + *

* - * @param dataSource The name of the datasource - * @param intervals The intervals to search over - * @param version - * @param limit The limit of segments to return - * @param lastSegmentId the last segment id from which to search for results. All segments returned are > - * this segment lexigraphically if sortOrder is null or ASC, or < this segment - * lexigraphically if sortOrder is DESC. - * @param sortOrder Specifies the order with which to return the matching segments by start time, end time. - * A null value indicates that order does not matter. + * @param dataSource The name of the datasource + * @param intervals The intervals to search over + * @param version An optional version of unused segments to retrieve in the given {@code intervals}. + * If unspecified, all versions of unused segments in the {@code intervals} must be retrieved. + * @param limit The limit of segments to return + * @param lastSegmentId the last segment id from which to search for results. All segments returned are > + * this segment lexigraphically if sortOrder is null or ASC, or < this segment + * lexigraphically if sortOrder is DESC. + * @param sortOrder Specifies the order with which to return the matching segments by start time, end time. + * A null value indicates that order does not matter. * @param maxUsedStatusLastUpdatedTime The maximum {@code used_status_last_updated} time. Any unused segment in {@code intervals} * with {@code used_status_last_updated} no later than this time will be included in the * iterator. Segments without {@code used_status_last_updated} time (due to an upgrade * from legacy Druid) will have {@code maxUsedStatusLastUpdatedTime} ignored - *

- * Returns a closeable iterator. You should close it when you are done. + * + * @return a closeable iterator. You should close it when you are done. + * */ public CloseableIterator retrieveUnusedSegments( final String dataSource, @@ -199,7 +202,7 @@ public CloseableIterator retrieveUnusedSegments( * iterator. Segments without {@code used_status_last_updated} time (due to an upgrade * from legacy Druid) will have {@code maxUsedStatusLastUpdatedTime} ignored - * Returns a closeable iterator. You should close it when you are done. + * @return a closeable iterator. You should close it when you are done. */ public CloseableIterator retrieveUnusedSegmentsPlus( final String dataSource, @@ -231,7 +234,7 @@ public CloseableIterator retrieveUnusedSegmentsPlus( * 1) ensure that the caller passes only used segments to this method when marking them as unused. * 2) Similarly, please try to call this method only on unused segments when marking segments as used with this method. * - * Returns the number of segments actually modified. + * @return the number of segments actually modified. */ public int markSegments(final Collection segmentIds, final boolean used) { @@ -268,7 +271,7 @@ public int markSegments(final Collection segmentIds, final boolean us /** * Marks all used segments that are *fully contained by* a particular interval as unused. * - * Returns the number of segments actually modified. + * @return the number of segments actually modified. */ public int markSegmentsUnused(final String dataSource, final Interval interval) { diff --git a/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java b/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java index 086eb1a69e02..ca17580361bc 100644 --- a/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java +++ b/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java @@ -1068,7 +1068,8 @@ public void testMultiIntervalUsedList() throws IOException coordinator.retrieveUsedSegmentsForIntervals( defaultSegment.getDataSource(), ImmutableList.of(defaultSegment.getInterval()), - null, Segments.ONLY_VISIBLE + null, + Segments.ONLY_VISIBLE ) ).containsOnlyOnce(SEGMENTS.toArray(new DataSegment[0])); @@ -1076,7 +1077,8 @@ public void testMultiIntervalUsedList() throws IOException coordinator.retrieveUsedSegmentsForIntervals( defaultSegment.getDataSource(), ImmutableList.of(defaultSegment3.getInterval()), - null, Segments.ONLY_VISIBLE + null, + Segments.ONLY_VISIBLE ) ).containsOnlyOnce(defaultSegment3); @@ -1084,7 +1086,8 @@ public void testMultiIntervalUsedList() throws IOException coordinator.retrieveUsedSegmentsForIntervals( defaultSegment.getDataSource(), ImmutableList.of(defaultSegment.getInterval(), defaultSegment3.getInterval()), - null, Segments.ONLY_VISIBLE + null, + Segments.ONLY_VISIBLE ) ).containsOnlyOnce(defaultSegment, defaultSegment2, defaultSegment3); @@ -1096,7 +1099,8 @@ public void testMultiIntervalUsedList() throws IOException Intervals.of("2015-01-03T00Z/2015-01-03T05Z"), Intervals.of("2015-01-03T09Z/2015-01-04T00Z") ), - null, Segments.ONLY_VISIBLE + null, + Segments.ONLY_VISIBLE ) ).containsOnlyOnce(defaultSegment3); } @@ -1110,7 +1114,8 @@ public void testRetrieveUsedSegmentsUsingMultipleIntervals() throws IOException final Collection actualUsedSegments = coordinator.retrieveUsedSegmentsForIntervals( DS.WIKI, intervals, - null, Segments.ONLY_VISIBLE + null, + Segments.ONLY_VISIBLE ); Assert.assertEquals(segments.size(), actualUsedSegments.size()); @@ -1129,7 +1134,8 @@ public void testRetrieveAllUsedSegmentsUsingIntervalsOutOfRange() throws IOExcep final Collection actualUsedSegments = coordinator.retrieveUsedSegmentsForIntervals( DS.WIKI, ImmutableList.of(outOfRangeInterval), - null, Segments.ONLY_VISIBLE + null, + Segments.ONLY_VISIBLE ); Assert.assertEquals(0, actualUsedSegments.size()); @@ -1158,7 +1164,8 @@ public void testRetrieveUnusedSegmentsUsingSingleIntervalAndNoLimit() throws IOE final List actualUnusedSegments = coordinator.retrieveUnusedSegmentsForInterval( DS.WIKI, Intervals.of("1900/3000"), - null, null, + null, + null, null ); @@ -1176,7 +1183,8 @@ public void testRetrieveUnusedSegmentsUsingSingleIntervalAndLimitAtRange() throw final List actualUnusedSegments = coordinator.retrieveUnusedSegmentsForInterval( DS.WIKI, Intervals.of("1900/3000"), - null, requestedLimit, + null, + requestedLimit, null ); @@ -1194,7 +1202,8 @@ public void testRetrieveUnusedSegmentsUsingSingleIntervalAndLimitInRange() throw final List actualUnusedSegments = coordinator.retrieveUnusedSegmentsForInterval( DS.WIKI, Intervals.of("1900/3000"), - null, requestedLimit, + null, + requestedLimit, null ); @@ -1212,7 +1221,8 @@ public void testRetrieveUnusedSegmentsUsingSingleIntervalAndLimitOutOfRange() th final List actualUnusedSegments = coordinator.retrieveUnusedSegmentsForInterval( DS.WIKI, Intervals.of("1900/3000"), - null, limit, + null, + limit, null ); Assert.assertEquals(segments.size(), actualUnusedSegments.size()); @@ -1233,7 +1243,8 @@ public void testRetrieveUnusedSegmentsUsingSingleIntervalOutOfRange() throws IOE final List actualUnusedSegments = coordinator.retrieveUnusedSegmentsForInterval( DS.WIKI, outOfRangeInterval, - null, limit, + null, + limit, null ); Assert.assertEquals(0, actualUnusedSegments.size()); @@ -1673,7 +1684,8 @@ public void testSimpleUnusedList() throws IOException coordinator.retrieveUnusedSegmentsForInterval( defaultSegment.getDataSource(), defaultSegment.getInterval(), - null, null, + null, + null, null ) ) @@ -1690,7 +1702,8 @@ public void testSimpleUnusedListWithLimit() throws IOException coordinator.retrieveUnusedSegmentsForInterval( defaultSegment.getDataSource(), defaultSegment.getInterval(), - null, limit, + null, + limit, null ) ); @@ -1804,7 +1817,8 @@ public void testUnusedOverlapLow() throws IOException defaultSegment.getInterval().getStart().minus(1), defaultSegment.getInterval().getStart().plus(1) ), - null, null, + null, + null, null ).isEmpty() ); @@ -1819,7 +1833,8 @@ public void testUnusedUnderlapLow() throws IOException coordinator.retrieveUnusedSegmentsForInterval( defaultSegment.getDataSource(), new Interval(defaultSegment.getInterval().getStart().plus(1), defaultSegment.getInterval().getEnd()), - null, null, + null, + null, null ).isEmpty() ); @@ -1835,7 +1850,8 @@ public void testUnusedUnderlapHigh() throws IOException coordinator.retrieveUnusedSegmentsForInterval( defaultSegment.getDataSource(), new Interval(defaultSegment.getInterval().getStart(), defaultSegment.getInterval().getEnd().minus(1)), - null, null, + null, + null, null ).isEmpty() ); @@ -1850,7 +1866,8 @@ public void testUnusedOverlapHigh() throws IOException coordinator.retrieveUnusedSegmentsForInterval( defaultSegment.getDataSource(), defaultSegment.getInterval().withStart(defaultSegment.getInterval().getEnd().minus(1)), - null, null, + null, + null, null ).isEmpty() ); @@ -1867,7 +1884,8 @@ public void testUnusedBigOverlap() throws IOException coordinator.retrieveUnusedSegmentsForInterval( defaultSegment.getDataSource(), Intervals.of("2000/2999"), - null, null, + null, + null, null ) ) @@ -1885,7 +1903,8 @@ public void testUnusedLowRange() throws IOException coordinator.retrieveUnusedSegmentsForInterval( defaultSegment.getDataSource(), defaultSegment.getInterval().withStart(defaultSegment.getInterval().getStart().minus(1)), - null, null, + null, + null, null ) ) @@ -1896,7 +1915,8 @@ public void testUnusedLowRange() throws IOException coordinator.retrieveUnusedSegmentsForInterval( defaultSegment.getDataSource(), defaultSegment.getInterval().withStart(defaultSegment.getInterval().getStart().minusYears(1)), - null, null, + null, + null, null ) ) @@ -1914,7 +1934,8 @@ public void testUnusedHighRange() throws IOException coordinator.retrieveUnusedSegmentsForInterval( defaultSegment.getDataSource(), defaultSegment.getInterval().withEnd(defaultSegment.getInterval().getEnd().plus(1)), - null, null, + null, + null, null ) ) @@ -1925,7 +1946,8 @@ public void testUnusedHighRange() throws IOException coordinator.retrieveUnusedSegmentsForInterval( defaultSegment.getDataSource(), defaultSegment.getInterval().withEnd(defaultSegment.getInterval().getEnd().plusYears(1)), - null, null, + null, + null, null ) ) @@ -1949,7 +1971,8 @@ public void testUsedHugeTimeRangeEternityFilter() throws IOException coordinator.retrieveUsedSegmentsForIntervals( hugeTimeRangeSegment1.getDataSource(), Intervals.ONLY_ETERNITY, - null, Segments.ONLY_VISIBLE + null, + Segments.ONLY_VISIBLE ) ) ); @@ -3167,7 +3190,8 @@ public void testMarkSegmentsAsUnusedWithinIntervalOneYear() throws IOException coordinator.retrieveUnusedSegmentsForInterval( existingSegment1.getDataSource(), existingSegment1.getInterval().withEnd(existingSegment1.getInterval().getEnd().plus(1)), - null, null, + null, + null, null ) ) @@ -3178,7 +3202,8 @@ public void testMarkSegmentsAsUnusedWithinIntervalOneYear() throws IOException coordinator.retrieveUnusedSegmentsForInterval( existingSegment2.getDataSource(), existingSegment2.getInterval().withEnd(existingSegment2.getInterval().getEnd().plusYears(1)), - null, null, + null, + null, null ) ) @@ -3203,7 +3228,8 @@ public void testMarkSegmentsAsUnusedWithinIntervalTwoYears() throws IOException coordinator.retrieveUnusedSegmentsForInterval( existingSegment1.getDataSource(), existingSegment1.getInterval().withEnd(existingSegment1.getInterval().getEnd().plus(1)), - null, null, + null, + null, null ) ) @@ -3214,7 +3240,8 @@ public void testMarkSegmentsAsUnusedWithinIntervalTwoYears() throws IOException coordinator.retrieveUnusedSegmentsForInterval( existingSegment2.getDataSource(), existingSegment2.getInterval().withEnd(existingSegment2.getInterval().getEnd().plusYears(1)), - null, null, + null, + null, null ) ) @@ -3496,9 +3523,7 @@ private ImmutableList retrieveUnusedSegments( derbyConnectorRule.metadataTablesConfigSupplier().get(), mapper ) - .retrieveUnusedSegments(DS.WIKI, intervals, - null, - limit, lastSegmentId, sortOrder, maxUsedStatusLastUpdatedTime)) { + .retrieveUnusedSegments(DS.WIKI, intervals, null, limit, lastSegmentId, sortOrder, maxUsedStatusLastUpdatedTime)) { return ImmutableList.copyOf(iterator); } } From 1970e529608f82e43a4fa1d1e0a1d7144ea6159b Mon Sep 17 00:00:00 2001 From: Abhishek Balaji Radhakrishnan Date: Wed, 28 Feb 2024 22:14:40 +0800 Subject: [PATCH 03/15] Multi version tests in RetrieveSegmentsActionsTest Sort of like method-level parameterized tests. --- .../actions/RetrieveSegmentsActionsTest.java | 112 +++++++++--------- 1 file changed, 55 insertions(+), 57 deletions(-) diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/RetrieveSegmentsActionsTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/RetrieveSegmentsActionsTest.java index b14b70df4110..2dd439183afe 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/RetrieveSegmentsActionsTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/RetrieveSegmentsActionsTest.java @@ -35,14 +35,22 @@ import org.junit.Test; import java.io.IOException; +import java.util.Arrays; import java.util.HashSet; +import java.util.List; import java.util.Set; +import java.util.stream.Collectors; public class RetrieveSegmentsActionsTest { private static final Interval INTERVAL = Intervals.of("2017-10-01/2017-10-15"); - private static final String VERSION_UNUSED = "1"; - private static final String VERSION_USED = "2"; + private static final String UNUSED_V1 = "unused_v1"; + private static final String UNUSED_V2 = "unused_v2"; + + private static final String USED_V1 = "used_v1"; + private static final String USED_V2 = "used_v2"; + + private static final List VERSIONS = Arrays.asList(USED_V1, USED_V2, UNUSED_V1, UNUSED_V2); @ClassRule public static TaskActionTestKit actionTestKit = new TaskActionTestKit(); @@ -59,9 +67,11 @@ public static void setup() throws IOException actionTestKit.getTaskLockbox().add(task); expectedUnusedSegments = new HashSet<>(); - expectedUnusedSegments.add(createSegment(Intervals.of("2017-10-05/2017-10-06"), VERSION_UNUSED)); - expectedUnusedSegments.add(createSegment(Intervals.of("2017-10-06/2017-10-07"), VERSION_UNUSED)); - expectedUnusedSegments.add(createSegment(Intervals.of("2017-10-07/2017-10-08"), VERSION_UNUSED)); + expectedUnusedSegments.add(createSegment(Intervals.of("2017-10-05/2017-10-06"), UNUSED_V1)); + expectedUnusedSegments.add(createSegment(Intervals.of("2017-10-06/2017-10-07"), UNUSED_V1)); + expectedUnusedSegments.add(createSegment(Intervals.of("2017-10-07/2017-10-08"), UNUSED_V1)); + expectedUnusedSegments.add(createSegment(Intervals.of("2017-10-06/2017-10-07"), UNUSED_V2)); + expectedUnusedSegments.add(createSegment(Intervals.of("2017-10-07/2017-10-08"), UNUSED_V2)); actionTestKit.getMetadataStorageCoordinator() .commitSegments(expectedUnusedSegments); @@ -69,9 +79,9 @@ public static void setup() throws IOException expectedUnusedSegments.forEach(s -> actionTestKit.getTaskLockbox().unlock(task, s.getInterval())); expectedUsedSegments = new HashSet<>(); - expectedUsedSegments.add(createSegment(Intervals.of("2017-10-05/2017-10-06"), VERSION_USED)); - expectedUsedSegments.add(createSegment(Intervals.of("2017-10-06/2017-10-07"), VERSION_USED)); - expectedUsedSegments.add(createSegment(Intervals.of("2017-10-07/2017-10-08"), VERSION_USED)); + expectedUsedSegments.add(createSegment(Intervals.of("2017-10-05/2017-10-06"), USED_V1)); + expectedUsedSegments.add(createSegment(Intervals.of("2017-10-06/2017-10-07"), USED_V2)); + expectedUsedSegments.add(createSegment(Intervals.of("2017-10-07/2017-10-08"), USED_V2)); actionTestKit.getMetadataStorageCoordinator() .commitSegments(expectedUsedSegments); @@ -91,7 +101,7 @@ private static DataSegment createSegment(Interval interval, String version) ImmutableList.of("dim1", "dim2"), ImmutableList.of("met1", "met2"), NoneShardSpec.instance(), - Integer.valueOf(version), + 9, 1 ); } @@ -101,77 +111,65 @@ public void testRetrieveUsedSegmentsAction() { final RetrieveUsedSegmentsAction action = new RetrieveUsedSegmentsAction(task.getDataSource(), ImmutableList.of(INTERVAL)); - final Set resultSegments = new HashSet<>(action.perform(task, actionTestKit.getTaskActionToolbox())); - Assert.assertEquals(expectedUsedSegments, resultSegments); - } - - @Test - public void testRetrieveUsedSegmentsActionWithValidVersionVisibleOnly() - { - final RetrieveUsedSegmentsAction action = - new RetrieveUsedSegmentsAction(task.getDataSource(), null, ImmutableList.of(INTERVAL), VERSION_USED, Segments.ONLY_VISIBLE); - final Set resultSegments = new HashSet<>(action.perform(task, actionTestKit.getTaskActionToolbox())); - Assert.assertEquals(expectedUsedSegments, resultSegments); - } - - @Test - public void testRetrieveUsedSegmentsActionWithValidVersionIncludingOvershadowed() - { - final RetrieveUsedSegmentsAction action = - new RetrieveUsedSegmentsAction(task.getDataSource(), null, ImmutableList.of(INTERVAL), VERSION_USED, Segments.INCLUDING_OVERSHADOWED); - final Set resultSegments = new HashSet<>(action.perform(task, actionTestKit.getTaskActionToolbox())); - Assert.assertEquals(expectedUsedSegments, resultSegments); + final Set actualSegments = new HashSet<>(action.perform(task, actionTestKit.getTaskActionToolbox())); + Assert.assertEquals(expectedUsedSegments, actualSegments); } @Test - public void testRetrieveUsedSegmentsActionWithNonExistentVersionVisibleOnly() + public void testRetrieveUsedSegmentsActionWithVersionVisibleOnly() { - final RetrieveUsedSegmentsAction action = - new RetrieveUsedSegmentsAction(task.getDataSource(), null, ImmutableList.of(INTERVAL), VERSION_UNUSED, Segments.ONLY_VISIBLE); - final Set resultSegments = new HashSet<>(action.perform(task, actionTestKit.getTaskActionToolbox())); - Assert.assertEquals(ImmutableSet.of(), resultSegments); - } - - @Test - public void testRetrieveUsedSegmentsActionWithNonExistentVersionIncludingOvershadowed() - { - final RetrieveUsedSegmentsAction action = - new RetrieveUsedSegmentsAction(task.getDataSource(), null, ImmutableList.of(INTERVAL), VERSION_UNUSED, Segments.INCLUDING_OVERSHADOWED); - final Set resultSegments = new HashSet<>(action.perform(task, actionTestKit.getTaskActionToolbox())); - Assert.assertEquals(ImmutableSet.of(), resultSegments); + for (final String version : VERSIONS) { + final RetrieveUsedSegmentsAction action = + new RetrieveUsedSegmentsAction(task.getDataSource(), null, ImmutableList.of(INTERVAL), version, Segments.ONLY_VISIBLE); + final Set actualSegments = new HashSet<>(action.perform(task, actionTestKit.getTaskActionToolbox())); + final Set expectedSegments = expectedUsedSegments.stream() + .filter(d -> d.getVersion().equals(version)) + .collect(Collectors.toSet()); + Assert.assertEquals(expectedSegments, actualSegments); + } } @Test - public void testRetrieveUnusedSegmentsActionWithValidVersion() + public void testRetrieveUsedSegmentsActionWithVersionIncludingOvershadowed() { - final RetrieveUnusedSegmentsAction action = - new RetrieveUnusedSegmentsAction(task.getDataSource(), INTERVAL, VERSION_UNUSED, null, null); - final Set resultSegments = new HashSet<>(action.perform(task, actionTestKit.getTaskActionToolbox())); - Assert.assertEquals(expectedUnusedSegments, resultSegments); + for (final String version : VERSIONS) { + final RetrieveUsedSegmentsAction action = + new RetrieveUsedSegmentsAction(task.getDataSource(), null, ImmutableList.of(INTERVAL), version, Segments.INCLUDING_OVERSHADOWED); + final Set actualSegments = new HashSet<>(action.perform(task, actionTestKit.getTaskActionToolbox())); + final Set expectedSegments = expectedUsedSegments.stream() + .filter(d -> d.getVersion().equals(version)) + .collect(Collectors.toSet()); + Assert.assertEquals(expectedSegments, actualSegments); + } } @Test - public void testRetrieveUnusedSegmentsActionWithNonExistentVersion() + public void testRetrieveUnusedSegmentsActionWithVersion() { - final RetrieveUnusedSegmentsAction action = - new RetrieveUnusedSegmentsAction(task.getDataSource(), INTERVAL, VERSION_USED, null, null); - final Set resultSegments = new HashSet<>(action.perform(task, actionTestKit.getTaskActionToolbox())); - Assert.assertEquals(ImmutableSet.of(), resultSegments); + for (final String version : VERSIONS) { + final RetrieveUnusedSegmentsAction action = + new RetrieveUnusedSegmentsAction(task.getDataSource(), INTERVAL, version, null, null); + final Set actualSegments = new HashSet<>(action.perform(task, actionTestKit.getTaskActionToolbox())); + final Set expectedSegments = expectedUnusedSegments.stream() + .filter(d -> d.getVersion().equals(version)) + .collect(Collectors.toSet()); + Assert.assertEquals(expectedSegments, actualSegments); + } } @Test public void testRetrieveUnusedSegmentsActionWithMinUsedLastUpdatedTime() { final RetrieveUnusedSegmentsAction action = new RetrieveUnusedSegmentsAction(task.getDataSource(), INTERVAL, null, null, DateTimes.MIN); - final Set resultSegments = new HashSet<>(action.perform(task, actionTestKit.getTaskActionToolbox())); - Assert.assertEquals(ImmutableSet.of(), resultSegments); + final Set actualSegments = new HashSet<>(action.perform(task, actionTestKit.getTaskActionToolbox())); + Assert.assertEquals(ImmutableSet.of(), actualSegments); } @Test public void testRetrieveUnusedSegmentsActionWithNowUsedLastUpdatedTime() { final RetrieveUnusedSegmentsAction action = new RetrieveUnusedSegmentsAction(task.getDataSource(), INTERVAL, null, null, DateTimes.nowUtc()); - final Set resultSegments = new HashSet<>(action.perform(task, actionTestKit.getTaskActionToolbox())); - Assert.assertEquals(expectedUnusedSegments, resultSegments); + final Set actualSegments = new HashSet<>(action.perform(task, actionTestKit.getTaskActionToolbox())); + Assert.assertEquals(expectedUnusedSegments, actualSegments); } } From ea6068a9c428a7cfbae917942455e0a0dd9fa3a5 Mon Sep 17 00:00:00 2001 From: Abhishek Balaji Radhakrishnan Date: Thu, 7 Mar 2024 10:05:06 +0530 Subject: [PATCH 04/15] Address review feedback --- ...TestIndexerMetadataStorageCoordinator.java | 3 +- .../IndexerSQLMetadataStorageCoordinator.java | 3 +- ...exerSQLMetadataStorageCoordinatorTest.java | 205 ++++++++++++++++++ 3 files changed, 209 insertions(+), 2 deletions(-) diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/test/TestIndexerMetadataStorageCoordinator.java b/indexing-service/src/test/java/org/apache/druid/indexing/test/TestIndexerMetadataStorageCoordinator.java index dcd76cb3cc08..03ef72bf4f83 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/test/TestIndexerMetadataStorageCoordinator.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/test/TestIndexerMetadataStorageCoordinator.java @@ -98,7 +98,8 @@ public List> retrieveUsedSegmentsAndCreatedDates(Strin public List retrieveUsedSegmentsForIntervals( String dataSource, List intervals, - String version, Segments visibility + @Nullable String version, + Segments visibility ) { return ImmutableList.of(); diff --git a/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java b/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java index 72a3da73d671..ff8f0b491e27 100644 --- a/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java +++ b/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java @@ -1462,7 +1462,8 @@ private Set createNewIdsForAppendSegments( final Collection overlappingSegments = retrieveUsedSegmentsForIntervals( dataSource, new ArrayList<>(appendIntervals), - null, Segments.INCLUDING_OVERSHADOWED + null, + Segments.INCLUDING_OVERSHADOWED ); final Map> overlappingVersionToIntervals = new HashMap<>(); diff --git a/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java b/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java index ca17580361bc..d598829782e9 100644 --- a/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java +++ b/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java @@ -1105,6 +1105,130 @@ public void testMultiIntervalUsedList() throws IOException ).containsOnlyOnce(defaultSegment3); } + @Test + public void testRetrieveUsedSegmentsOnlyVisibleWithVersion() throws IOException + { + final DateTime now = DateTimes.nowUtc(); + final DataSegment segment1 = createSegment( + Intervals.of("2023-01-01/2023-01-02"), + now.toString(), + new LinearShardSpec(0) + ); + final DataSegment segment2 = createSegment( + Intervals.of("2023-01-02/2023-01-03"), + now.plusDays(2).toString(), + new LinearShardSpec(0) + ); + final DataSegment segment3 = createSegment( + Intervals.of("2023-01-03/2023-01-04"), + now.plusDays(3).toString(), + new LinearShardSpec(0) + ); + + final ImmutableSet usedSegments = ImmutableSet.of(segment1, segment2, segment3); + Assert.assertEquals(usedSegments, coordinator.commitSegments(usedSegments)); + + for (DataSegment usedSegment : usedSegments) { + Assertions.assertThat( + coordinator.retrieveUsedSegmentsForIntervals( + DS.WIKI, + ImmutableList.of(Intervals.of("2023-01-01/2023-01-04")), + usedSegment.getVersion(), + Segments.ONLY_VISIBLE + ) + ).contains(usedSegment); + } + + Assertions.assertThat( + coordinator.retrieveUsedSegmentsForIntervals( + DS.WIKI, + ImmutableList.of(Intervals.of("2023-01-01/2023-01-04")), + null, + Segments.ONLY_VISIBLE + ) + ).containsAll(usedSegments); + + Assertions.assertThat( + coordinator.retrieveUsedSegmentsForIntervals( + DS.WIKI, + ImmutableList.of(Intervals.of("2023-01-01/2023-01-04")), + "some-non-existent-version", + Segments.ONLY_VISIBLE + ) + ).containsAll(ImmutableList.of()); + } + + @Test + public void testRetrieveUsedSegmentsIncludingOvershadowedWithVersion() throws IOException + { + final DateTime now = DateTimes.nowUtc(); + final DataSegment segment1 = createSegment( + Intervals.of("2023-01-01/2023-01-02"), + now.toString(), + new LinearShardSpec(0) + ); + final DataSegment segment2 = createSegment( + Intervals.of("2023-01-02/2023-01-03"), + now.plusDays(2).toString(), + new LinearShardSpec(0) + ); + + // segment3 and segment4 are in the same interval. However, segment4 should overshadow segment3 + // as segment4 contains a higher version than segment3. + final DataSegment segment3 = createSegment( + Intervals.of("2023-01-03/2023-01-04"), + now.plusDays(3).toString(), + new LinearShardSpec(0) + ); + final DataSegment segment4 = createSegment( + Intervals.of("2023-01-03/2023-01-04"), + now.plusDays(4).toString(), + new LinearShardSpec(0) + ); + + final ImmutableSet allSegments = ImmutableSet.of(segment1, segment2, segment3, segment4); + Assert.assertEquals(allSegments, coordinator.commitSegments(ImmutableSet.of(segment1, segment2, segment3, segment4))); + final ImmutableSet usedSegments = ImmutableSet.of(segment1, segment2, segment4); + + for (DataSegment usedSegment : usedSegments) { + Assertions.assertThat( + coordinator.retrieveUsedSegmentsForIntervals( + DS.WIKI, + ImmutableList.of(Intervals.of("2023-01-01/2023-01-04")), + usedSegment.getVersion(), + Segments.INCLUDING_OVERSHADOWED + ) + ).contains(usedSegment); + } + + Assertions.assertThat( + coordinator.retrieveUsedSegmentsForIntervals( + DS.WIKI, + ImmutableList.of(Intervals.of("2023-01-01/2023-01-04")), + segment3.getVersion(), + Segments.INCLUDING_OVERSHADOWED + ) + ).containsAll(ImmutableList.of()); + + Assertions.assertThat( + coordinator.retrieveUsedSegmentsForIntervals( + DS.WIKI, + ImmutableList.of(Intervals.of("2023-01-01/2023-01-04")), + null, + Segments.INCLUDING_OVERSHADOWED + ) + ).containsAll(usedSegments); + + Assertions.assertThat( + coordinator.retrieveUsedSegmentsForIntervals( + DS.WIKI, + ImmutableList.of(Intervals.of("2023-01-01/2023-01-04")), + "some-non-existent-version", + Segments.INCLUDING_OVERSHADOWED + ) + ).containsAll(ImmutableList.of()); + } + @Test public void testRetrieveUsedSegmentsUsingMultipleIntervals() throws IOException { @@ -1211,6 +1335,25 @@ public void testRetrieveUnusedSegmentsUsingSingleIntervalAndLimitInRange() throw Assert.assertTrue(actualUnusedSegments.containsAll(segments.stream().limit(requestedLimit).collect(Collectors.toList()))); } + @Test + public void testRetrieveUnusedSegmentsUsingSingleIntervalVersionAndLimitInRange() throws IOException + { + final List segments = createAndGetUsedYearSegments(1900, 2133); + markAllSegmentsUnused(new HashSet<>(segments), DateTimes.nowUtc()); + + final int requestedLimit = 10; + final List actualUnusedSegments = coordinator.retrieveUnusedSegmentsForInterval( + DS.WIKI, + Intervals.of("1900/3000"), + "version", + requestedLimit, + null + ); + + Assert.assertEquals(requestedLimit, actualUnusedSegments.size()); + Assert.assertTrue(actualUnusedSegments.containsAll(segments.stream().limit(requestedLimit).collect(Collectors.toList()))); + } + @Test public void testRetrieveUnusedSegmentsUsingSingleIntervalAndLimitOutOfRange() throws IOException { @@ -1692,6 +1835,68 @@ public void testSimpleUnusedList() throws IOException ); } + @Test + public void testRetrieveUnusedSegmentsWithVersion() throws IOException + { + final DateTime now = DateTimes.nowUtc(); + final DataSegment segment1 = createSegment( + Intervals.of("2023-01-01/2023-01-02"), + now.toString(), + new LinearShardSpec(0) + ); + final DataSegment segment2 = createSegment( + Intervals.of("2023-01-02/2023-01-03"), + now.plusDays(2).toString(), + new LinearShardSpec(0) + ); + final DataSegment segment3 = createSegment( + Intervals.of("2023-01-03/2023-01-04"), + now.plusDays(3).toString(), + new LinearShardSpec(0) + ); + final DataSegment segment4 = createSegment( + Intervals.of("2023-01-03/2023-01-04"), + now.plusDays(4).toString(), + new LinearShardSpec(0) + ); + + final ImmutableSet unusedSegments = ImmutableSet.of(segment1, segment2, segment3, segment4); + Assert.assertEquals(unusedSegments, coordinator.commitSegments(unusedSegments)); + markAllSegmentsUnused(unusedSegments, DateTimes.nowUtc()); + + for (DataSegment unusedSegment : unusedSegments) { + Assertions.assertThat( + coordinator.retrieveUnusedSegmentsForInterval( + DS.WIKI, + Intervals.of("2023-01-01/2023-01-04"), + unusedSegment.getVersion(), + null, + null + ) + ).contains(unusedSegment); + } + + Assertions.assertThat( + coordinator.retrieveUnusedSegmentsForInterval( + DS.WIKI, + Intervals.of("2023-01-01/2023-01-04"), + null, + null, + null + ) + ).containsAll(unusedSegments); + + Assertions.assertThat( + coordinator.retrieveUnusedSegmentsForInterval( + DS.WIKI, + Intervals.of("2023-01-01/2023-01-04"), + "some-non-existent-version", + null, + null + ) + ).containsAll(ImmutableSet.of()); + } + @Test public void testSimpleUnusedListWithLimit() throws IOException { From 3e33d42ebbc847d10ac6e3e0846f38d1e2012bea Mon Sep 17 00:00:00 2001 From: Abhishek Balaji Radhakrishnan Date: Mon, 11 Mar 2024 07:26:54 +0530 Subject: [PATCH 05/15] Accept a list of versions instead of a single version. Support multiple versions. --- .../actions/RetrieveUnusedSegmentsAction.java | 14 +++--- .../actions/RetrieveUsedSegmentsAction.java | 18 +++---- .../common/task/KillUnusedSegmentsTask.java | 23 ++++----- .../actions/RetrieveSegmentsActionsTest.java | 25 ++++++++-- .../RetrieveUsedSegmentsActionSerdeTest.java | 2 +- ...tKillUnusedSegmentsTaskQuerySerdeTest.java | 11 +++-- .../task/KillUnusedSegmentsTaskTest.java | 16 +++---- ...TestIndexerMetadataStorageCoordinator.java | 4 +- .../ClientKillUnusedSegmentsTaskQuery.java | 15 +++--- .../IndexerMetadataStorageCoordinator.java | 8 ++-- .../IndexerSQLMetadataStorageCoordinator.java | 26 +++++----- .../metadata/SqlSegmentsMetadataQuery.java | 48 +++++++++---------- .../druid/rpc/indexing/OverlordClient.java | 6 ++- ...exerSQLMetadataStorageCoordinatorTest.java | 16 +++---- .../duty/KillUnusedSegmentsTest.java | 2 +- 15 files changed, 128 insertions(+), 106 deletions(-) diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/RetrieveUnusedSegmentsAction.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/RetrieveUnusedSegmentsAction.java index ab1a0bef4a8b..b6b4e2942cc8 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/RetrieveUnusedSegmentsAction.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/RetrieveUnusedSegmentsAction.java @@ -41,7 +41,7 @@ public class RetrieveUnusedSegmentsAction implements TaskAction versions; @JsonIgnore private final Integer limit; @@ -53,14 +53,14 @@ public class RetrieveUnusedSegmentsAction implements TaskAction versions, @JsonProperty("limit") @Nullable Integer limit, @JsonProperty("maxUsedStatusLastUpdatedTime") @Nullable DateTime maxUsedStatusLastUpdatedTime ) { this.dataSource = dataSource; this.interval = interval; - this.version = version; + this.versions = versions; this.limit = limit; this.maxUsedStatusLastUpdatedTime = maxUsedStatusLastUpdatedTime; } @@ -79,9 +79,9 @@ public Interval getInterval() @Nullable @JsonProperty - public String getVersion() + public List getVersions() { - return version; + return versions; } @Nullable @@ -108,7 +108,7 @@ public TypeReference> getReturnTypeReference() public List perform(Task task, TaskActionToolbox toolbox) { return toolbox.getIndexerMetadataStorageCoordinator() - .retrieveUnusedSegmentsForInterval(dataSource, interval, version, limit, maxUsedStatusLastUpdatedTime); + .retrieveUnusedSegmentsForInterval(dataSource, interval, versions, limit, maxUsedStatusLastUpdatedTime); } @Override @@ -123,7 +123,7 @@ public String toString() return getClass().getSimpleName() + "{" + "dataSource='" + dataSource + '\'' + ", interval=" + interval + - ", version=" + version + + ", versions=" + versions + ", limit=" + limit + ", maxUsedStatusLastUpdatedTime=" + maxUsedStatusLastUpdatedTime + '}'; diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/RetrieveUsedSegmentsAction.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/RetrieveUsedSegmentsAction.java index 5b6016c20165..3269d317fe0a 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/RetrieveUsedSegmentsAction.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/RetrieveUsedSegmentsAction.java @@ -74,7 +74,7 @@ public class RetrieveUsedSegmentsAction implements TaskAction intervals; @JsonIgnore - private final String version; + private final List versions; @JsonIgnore private final Segments visibility; @@ -84,7 +84,7 @@ public RetrieveUsedSegmentsAction( @JsonProperty("dataSource") String dataSource, @Deprecated @JsonProperty("interval") Interval interval, @JsonProperty("intervals") Collection intervals, - @JsonProperty("version") @Nullable String version, + @JsonProperty("versions") @Nullable List versions, // When JSON object is deserialized, this parameter is optional for backward compatibility. // Otherwise, it shouldn't be considered optional. @JsonProperty("visibility") @Nullable Segments visibility @@ -104,7 +104,7 @@ public RetrieveUsedSegmentsAction( theIntervals = JodaUtils.condenseIntervals(intervals); } this.intervals = Preconditions.checkNotNull(theIntervals, "no intervals found"); - this.version = version; + this.versions = versions; // Defaulting to the former behaviour when visibility wasn't explicitly specified for backward compatibility this.visibility = visibility != null ? visibility : Segments.ONLY_VISIBLE; } @@ -128,9 +128,9 @@ public List getIntervals() @Nullable @JsonProperty - public String getVersion() + public List getVersions() { - return version; + return versions; } @JsonProperty @@ -218,7 +218,7 @@ public Collection perform(Task task, TaskActionToolbox toolbox) private Collection retrieveUsedSegments(TaskActionToolbox toolbox) { return toolbox.getIndexerMetadataStorageCoordinator() - .retrieveUsedSegmentsForIntervals(dataSource, intervals, version, visibility); + .retrieveUsedSegmentsForIntervals(dataSource, intervals, versions, visibility); } @Override @@ -239,14 +239,14 @@ public boolean equals(Object o) RetrieveUsedSegmentsAction that = (RetrieveUsedSegmentsAction) o; return Objects.equals(dataSource, that.dataSource) && Objects.equals(intervals, that.intervals) - && Objects.equals(version, that.version) + && Objects.equals(versions, that.versions) && visibility == that.visibility; } @Override public int hashCode() { - return Objects.hash(dataSource, intervals, version, visibility); + return Objects.hash(dataSource, intervals, versions, visibility); } @Override @@ -255,7 +255,7 @@ public String toString() return getClass().getSimpleName() + "{" + "dataSource='" + dataSource + '\'' + ", intervals=" + intervals + - ", version=" + version + + ", versions=" + versions + ", visibility=" + visibility + '}'; } diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java index 6ac6c3d485c3..4eec2f19fa3d 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java @@ -45,6 +45,7 @@ import org.apache.druid.java.util.common.logger.Logger; import org.apache.druid.server.security.ResourceAction; import org.apache.druid.timeline.DataSegment; +import org.apache.druid.utils.CollectionUtils; import org.joda.time.DateTime; import org.joda.time.Interval; @@ -86,7 +87,7 @@ public class KillUnusedSegmentsTask extends AbstractFixedIntervalTask /** * The version of segments to delete in this {@link #getInterval()}. */ - @Nullable private final String version; + @Nullable private final List versions; @Deprecated private final boolean markAsUnused; @@ -112,7 +113,7 @@ public KillUnusedSegmentsTask( @JsonProperty("id") String id, @JsonProperty("dataSource") String dataSource, @JsonProperty("interval") Interval interval, - @JsonProperty("version") @Nullable String version, + @JsonProperty("versions") @Nullable List versions, @JsonProperty("context") Map context, @JsonProperty("markAsUnused") @Deprecated Boolean markAsUnused, @JsonProperty("batchSize") Integer batchSize, @@ -137,10 +138,10 @@ public KillUnusedSegmentsTask( if (limit != null && Boolean.TRUE.equals(markAsUnused)) { throw InvalidInput.exception("limit[%d] cannot be provided when markAsUnused is enabled.", limit); } - if (version != null && Boolean.TRUE.equals(markAsUnused)) { - throw InvalidInput.exception("version[%s] cannot be provided when markAsUnused is enabled.", version); + if (!CollectionUtils.isNullOrEmpty(versions) && Boolean.TRUE.equals(markAsUnused)) { + throw InvalidInput.exception("versions[%s] cannot be provided when markAsUnused is enabled.", versions); } - this.version = version; + this.versions = versions; this.limit = limit; this.maxUsedStatusLastUpdatedTime = maxUsedStatusLastUpdatedTime; } @@ -148,9 +149,9 @@ public KillUnusedSegmentsTask( @Nullable @JsonProperty - public String getVersion() + public List getVersions() { - return version; + return versions; } /** @@ -225,9 +226,9 @@ public TaskStatus runTask(TaskToolbox toolbox) throws Exception @Nullable Integer numTotalBatches = getNumTotalBatches(); List unusedSegments; LOG.info( - "Starting kill for datasource[%s] in interval[%s] and version[%s] with batchSize[%d], up to limit[%d]" + "Starting kill for datasource[%s] in interval[%s] and versions[%s] with batchSize[%d], up to limit[%d]" + " segments before maxUsedStatusLastUpdatedTime[%s] will be deleted%s", - getDataSource(), getInterval(), getVersion(), batchSize, limit, maxUsedStatusLastUpdatedTime, + getDataSource(), getInterval(), getVersions(), batchSize, limit, maxUsedStatusLastUpdatedTime, numTotalBatches != null ? StringUtils.format(" in [%d] batches.", numTotalBatches) : "." ); @@ -235,7 +236,7 @@ public TaskStatus runTask(TaskToolbox toolbox) throws Exception getDataSource(), null, ImmutableList.of(getInterval()), - getVersion(), + getVersions(), Segments.INCLUDING_OVERSHADOWED ); // Fetch the load specs of all segments overlapping with the unused segment intervals @@ -254,7 +255,7 @@ public TaskStatus runTask(TaskToolbox toolbox) throws Exception unusedSegments = toolbox .getTaskActionClient() .submit( - new RetrieveUnusedSegmentsAction(getDataSource(), getInterval(), getVersion(), nextBatchSize, maxUsedStatusLastUpdatedTime) + new RetrieveUnusedSegmentsAction(getDataSource(), getInterval(), getVersions(), nextBatchSize, maxUsedStatusLastUpdatedTime) ); // Fetch locks each time as a revokal could have occurred in between batches diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/RetrieveSegmentsActionsTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/RetrieveSegmentsActionsTest.java index 2dd439183afe..9a3170f50328 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/RetrieveSegmentsActionsTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/RetrieveSegmentsActionsTest.java @@ -120,7 +120,13 @@ public void testRetrieveUsedSegmentsActionWithVersionVisibleOnly() { for (final String version : VERSIONS) { final RetrieveUsedSegmentsAction action = - new RetrieveUsedSegmentsAction(task.getDataSource(), null, ImmutableList.of(INTERVAL), version, Segments.ONLY_VISIBLE); + new RetrieveUsedSegmentsAction( + task.getDataSource(), + null, + ImmutableList.of(INTERVAL), + ImmutableList.of(version), + Segments.ONLY_VISIBLE + ); final Set actualSegments = new HashSet<>(action.perform(task, actionTestKit.getTaskActionToolbox())); final Set expectedSegments = expectedUsedSegments.stream() .filter(d -> d.getVersion().equals(version)) @@ -134,7 +140,13 @@ public void testRetrieveUsedSegmentsActionWithVersionIncludingOvershadowed() { for (final String version : VERSIONS) { final RetrieveUsedSegmentsAction action = - new RetrieveUsedSegmentsAction(task.getDataSource(), null, ImmutableList.of(INTERVAL), version, Segments.INCLUDING_OVERSHADOWED); + new RetrieveUsedSegmentsAction( + task.getDataSource(), + null, + ImmutableList.of(INTERVAL), + ImmutableList.of(version), + Segments.INCLUDING_OVERSHADOWED + ); final Set actualSegments = new HashSet<>(action.perform(task, actionTestKit.getTaskActionToolbox())); final Set expectedSegments = expectedUsedSegments.stream() .filter(d -> d.getVersion().equals(version)) @@ -147,8 +159,13 @@ public void testRetrieveUsedSegmentsActionWithVersionIncludingOvershadowed() public void testRetrieveUnusedSegmentsActionWithVersion() { for (final String version : VERSIONS) { - final RetrieveUnusedSegmentsAction action = - new RetrieveUnusedSegmentsAction(task.getDataSource(), INTERVAL, version, null, null); + final RetrieveUnusedSegmentsAction action = new RetrieveUnusedSegmentsAction( + task.getDataSource(), + INTERVAL, + ImmutableList.of(version), + null, + null + ); final Set actualSegments = new HashSet<>(action.perform(task, actionTestKit.getTaskActionToolbox())); final Set expectedSegments = expectedUnusedSegments.stream() .filter(d -> d.getVersion().equals(version)) diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/RetrieveUsedSegmentsActionSerdeTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/RetrieveUsedSegmentsActionSerdeTest.java index 04ce80f3d9ec..f6aea3bf25d9 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/RetrieveUsedSegmentsActionSerdeTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/RetrieveUsedSegmentsActionSerdeTest.java @@ -74,7 +74,7 @@ public void testMultiIntervalWithVersionSerde() throws Exception "dataSource", null, intervals, - DateTimes.nowUtc().toString(), + ImmutableList.of(DateTimes.nowUtc().toString()), Segments.ONLY_VISIBLE ); diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/ClientKillUnusedSegmentsTaskQuerySerdeTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/ClientKillUnusedSegmentsTaskQuerySerdeTest.java index 9ffe6c8d11a3..59f80d4b565f 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/ClientKillUnusedSegmentsTaskQuerySerdeTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/ClientKillUnusedSegmentsTaskQuerySerdeTest.java @@ -21,6 +21,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.jsontype.NamedType; +import com.google.common.collect.ImmutableList; import org.apache.druid.client.indexing.ClientKillUnusedSegmentsTaskQuery; import org.apache.druid.client.indexing.ClientTaskQuery; import org.apache.druid.jackson.DefaultObjectMapper; @@ -63,7 +64,7 @@ public void testClientKillUnusedSegmentsTaskQueryToKillUnusedSegmentsTask() thro Assert.assertEquals(taskQuery.getId(), fromJson.getId()); Assert.assertEquals(taskQuery.getDataSource(), fromJson.getDataSource()); Assert.assertEquals(taskQuery.getInterval(), fromJson.getInterval()); - Assert.assertNull(taskQuery.getVersion()); + Assert.assertNull(taskQuery.getVersions()); Assert.assertEquals(taskQuery.getMarkAsUnused(), fromJson.isMarkAsUnused()); Assert.assertEquals(taskQuery.getBatchSize(), Integer.valueOf(fromJson.getBatchSize())); Assert.assertEquals(taskQuery.getLimit(), fromJson.getLimit()); @@ -88,7 +89,7 @@ public void testClientKillUnusedSegmentsTaskQueryToKillUnusedSegmentsTaskDefault Assert.assertEquals(taskQuery.getId(), fromJson.getId()); Assert.assertEquals(taskQuery.getDataSource(), fromJson.getDataSource()); Assert.assertEquals(taskQuery.getInterval(), fromJson.getInterval()); - Assert.assertNull(taskQuery.getVersion()); + Assert.assertNull(taskQuery.getVersions()); Assert.assertEquals(taskQuery.getMarkAsUnused(), fromJson.isMarkAsUnused()); Assert.assertEquals(100, fromJson.getBatchSize()); Assert.assertNull(taskQuery.getLimit()); @@ -117,7 +118,7 @@ public void testKillUnusedSegmentsTaskToClientKillUnusedSegmentsTaskQuery() thro Assert.assertEquals(task.getId(), taskQuery.getId()); Assert.assertEquals(task.getDataSource(), taskQuery.getDataSource()); Assert.assertEquals(task.getInterval(), taskQuery.getInterval()); - Assert.assertNull(taskQuery.getVersion()); + Assert.assertNull(taskQuery.getVersions()); Assert.assertEquals(task.isMarkAsUnused(), taskQuery.getMarkAsUnused()); Assert.assertEquals(Integer.valueOf(task.getBatchSize()), taskQuery.getBatchSize()); Assert.assertNull(taskQuery.getLimit()); @@ -131,7 +132,7 @@ public void testKillUnusedSegmentsTaskWithNonNullValuesToClientKillUnusedSegment null, "datasource", Intervals.of("2020-01-01/P1D"), - DateTimes.nowUtc().toString(), + ImmutableList.of(DateTimes.nowUtc().toString()), null, null, 99, @@ -146,7 +147,7 @@ public void testKillUnusedSegmentsTaskWithNonNullValuesToClientKillUnusedSegment Assert.assertEquals(task.getId(), taskQuery.getId()); Assert.assertEquals(task.getDataSource(), taskQuery.getDataSource()); Assert.assertEquals(task.getInterval(), taskQuery.getInterval()); - Assert.assertEquals(task.getVersion(), taskQuery.getVersion()); + Assert.assertEquals(task.getVersions(), taskQuery.getVersions()); Assert.assertNull(taskQuery.getMarkAsUnused()); Assert.assertEquals(Integer.valueOf(task.getBatchSize()), taskQuery.getBatchSize()); Assert.assertEquals(task.getLimit(), taskQuery.getLimit()); diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTaskTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTaskTest.java index bb02b3702155..2663d57085ae 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTaskTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTaskTest.java @@ -206,7 +206,7 @@ public void testKillVersion() throws Exception null, DATA_SOURCE, Intervals.of("2018/2020"), - segment3.getVersion(), + ImmutableList.of(segment3.getVersion()), null, false, 3, @@ -258,7 +258,7 @@ public void testKillMultipleSegmentsWithVersion() throws Exception null, DATA_SOURCE, Intervals.of("2018/2020"), - version.toString(), + ImmutableList.of(version.toString()), null, false, 3, @@ -310,7 +310,7 @@ public void testKillMultipleSegmentsWithVersionAndLimit() throws Exception null, DATA_SOURCE, Intervals.of("2018/2020"), - version.toString(), + ImmutableList.of(version.toString()), null, false, 3, @@ -362,7 +362,7 @@ public void testKillWithNonExistentVersion() throws Exception null, DATA_SOURCE, Intervals.of("2018/2020"), - version.plusDays(100).toString(), + ImmutableList.of(version.plusDays(100).toString()), null, false, 3, @@ -817,7 +817,7 @@ public void testKillMultipleUnusedSegmentsWithVersionAndDifferentLastUpdatedTime null, DATA_SOURCE, umbrellaInterval, - version.toString(), + ImmutableList.of(version.toString()), null, false, 1, @@ -847,7 +847,7 @@ public void testKillMultipleUnusedSegmentsWithVersionAndDifferentLastUpdatedTime null, DATA_SOURCE, umbrellaInterval, - version.toString(), + ImmutableList.of(version.toString()), null, false, 1, @@ -1107,7 +1107,7 @@ public void testInvalidVersionWithMarkAsUnused() null, DATA_SOURCE, Intervals.of("2018-01-01/2020-01-01"), - "foo", + ImmutableList.of("foo"), null, true, 10, @@ -1116,7 +1116,7 @@ public void testInvalidVersionWithMarkAsUnused() ) ), DruidExceptionMatcher.invalidInput().expectMessageIs( - "version[foo] cannot be provided when markAsUnused is enabled." + "versions[[foo]] cannot be provided when markAsUnused is enabled." ) ); } diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/test/TestIndexerMetadataStorageCoordinator.java b/indexing-service/src/test/java/org/apache/druid/indexing/test/TestIndexerMetadataStorageCoordinator.java index 03ef72bf4f83..af2637943154 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/test/TestIndexerMetadataStorageCoordinator.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/test/TestIndexerMetadataStorageCoordinator.java @@ -98,7 +98,7 @@ public List> retrieveUsedSegmentsAndCreatedDates(Strin public List retrieveUsedSegmentsForIntervals( String dataSource, List intervals, - @Nullable String version, + @Nullable List versions, Segments visibility ) { @@ -109,7 +109,7 @@ public List retrieveUsedSegmentsForIntervals( public List retrieveUnusedSegmentsForInterval( String dataSource, Interval interval, - @Nullable String version, + @Nullable List versions, @Nullable Integer limit, @Nullable DateTime maxUsedStatusLastUpdatedTime ) diff --git a/server/src/main/java/org/apache/druid/client/indexing/ClientKillUnusedSegmentsTaskQuery.java b/server/src/main/java/org/apache/druid/client/indexing/ClientKillUnusedSegmentsTaskQuery.java index d22b505cf350..099ab2ada838 100644 --- a/server/src/main/java/org/apache/druid/client/indexing/ClientKillUnusedSegmentsTaskQuery.java +++ b/server/src/main/java/org/apache/druid/client/indexing/ClientKillUnusedSegmentsTaskQuery.java @@ -26,6 +26,7 @@ import org.joda.time.Interval; import javax.annotation.Nullable; +import java.util.List; import java.util.Objects; /** @@ -40,7 +41,7 @@ public class ClientKillUnusedSegmentsTaskQuery implements ClientTaskQuery private final String id; private final String dataSource; private final Interval interval; - @Nullable private final String version; + @Nullable private final List versions; private final Boolean markAsUnused; private final Integer batchSize; @Nullable private final Integer limit; @@ -51,7 +52,7 @@ public ClientKillUnusedSegmentsTaskQuery( @JsonProperty("id") String id, @JsonProperty("dataSource") String dataSource, @JsonProperty("interval") Interval interval, - @JsonProperty("version") String version, + @JsonProperty("versions") List versions, @JsonProperty("markAsUnused") @Deprecated Boolean markAsUnused, @JsonProperty("batchSize") Integer batchSize, @JsonProperty("limit") @Nullable Integer limit, @@ -67,7 +68,7 @@ public ClientKillUnusedSegmentsTaskQuery( this.id = id; this.dataSource = dataSource; this.interval = interval; - this.version = version; + this.versions = versions; this.markAsUnused = markAsUnused; this.batchSize = batchSize; this.limit = limit; @@ -103,9 +104,9 @@ public Interval getInterval() @JsonProperty @Nullable - public String getVersion() + public List getVersions() { - return version; + return versions; } /** @@ -156,7 +157,7 @@ public boolean equals(Object o) return Objects.equals(id, that.id) && Objects.equals(dataSource, that.dataSource) && Objects.equals(interval, that.interval) - && Objects.equals(version, that.version) + && Objects.equals(versions, that.versions) && Objects.equals(markAsUnused, that.markAsUnused) && Objects.equals(batchSize, that.batchSize) && Objects.equals(limit, that.limit) @@ -166,6 +167,6 @@ public boolean equals(Object o) @Override public int hashCode() { - return Objects.hash(id, dataSource, interval, version, markAsUnused, batchSize, limit, maxUsedStatusLastUpdatedTime); + return Objects.hash(id, dataSource, interval, versions, markAsUnused, batchSize, limit, maxUsedStatusLastUpdatedTime); } } diff --git a/server/src/main/java/org/apache/druid/indexing/overlord/IndexerMetadataStorageCoordinator.java b/server/src/main/java/org/apache/druid/indexing/overlord/IndexerMetadataStorageCoordinator.java index a01243b5944e..7e0cdcbf0605 100644 --- a/server/src/main/java/org/apache/druid/indexing/overlord/IndexerMetadataStorageCoordinator.java +++ b/server/src/main/java/org/apache/druid/indexing/overlord/IndexerMetadataStorageCoordinator.java @@ -109,7 +109,7 @@ default Collection retrieveUsedSegmentsForInterval( * * @param dataSource The data source to query * @param intervals The intervals for which all applicable and used segments are requested. - * @param version An optional version of segments to retrieve in the given {@code intervals}. If unspecified, + * @param versions An optional list of segment versions to retrieve in the given {@code intervals}. If unspecified, * all versions of used segments in the {@code intervals} must be retrieved. * @param visibility Whether only visible or visible as well as overshadowed segments should be returned. The * visibility is considered within the specified intervals: that is, a segment which is visible @@ -127,7 +127,7 @@ default Collection retrieveUsedSegmentsForInterval( Collection retrieveUsedSegmentsForIntervals( String dataSource, List intervals, - @Nullable String version, + @Nullable List versions, Segments visibility ); @@ -137,7 +137,7 @@ Collection retrieveUsedSegmentsForIntervals( * * @param dataSource The data source the segments belong to * @param interval Filter the data segments to ones that include data in this interval exclusively. - * @param version An optional version of segments to retrieve in the given {@code interval}. If unspecified, all + * @param versions An optional list of segment versions to retrieve in the given {@code interval}. If unspecified, all * versions of unused segments in the {@code interval} must be retrieved. * @param limit The maximum number of unused segments to retreive. If null, no limit is applied. * @param maxUsedStatusLastUpdatedTime The maximum {@code used_status_last_updated} time. Any unused segment in {@code interval} @@ -150,7 +150,7 @@ Collection retrieveUsedSegmentsForIntervals( List retrieveUnusedSegmentsForInterval( String dataSource, Interval interval, - @Nullable String version, + @Nullable List versions, @Nullable Integer limit, @Nullable DateTime maxUsedStatusLastUpdatedTime ); diff --git a/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java b/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java index ff8f0b491e27..e9140a7db085 100644 --- a/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java +++ b/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java @@ -136,14 +136,14 @@ public void start() public Collection retrieveUsedSegmentsForIntervals( final String dataSource, final List intervals, - @Nullable final String version, + @Nullable final List versions, final Segments visibility ) { if (intervals == null || intervals.isEmpty()) { throw new IAE("null/empty intervals"); } - return doRetrieveUsedSegments(dataSource, intervals, version, visibility); + return doRetrieveUsedSegments(dataSource, intervals, versions, visibility); } @Override @@ -158,7 +158,7 @@ public Collection retrieveAllUsedSegments(String dataSource, Segmen private Collection doRetrieveUsedSegments( final String dataSource, final List intervals, - @Nullable final String version, + @Nullable final List versions, final Segments visibility ) { @@ -166,10 +166,10 @@ private Collection doRetrieveUsedSegments( handle -> { if (visibility == Segments.ONLY_VISIBLE) { final SegmentTimeline timeline = - getTimelineForIntervalsWithHandle(handle, dataSource, intervals, version); + getTimelineForIntervalsWithHandle(handle, dataSource, intervals, versions); return timeline.findNonOvershadowedObjectsInInterval(Intervals.ETERNITY, Partitions.ONLY_COMPLETE); } else { - return retrieveAllUsedSegmentsForIntervalsWithHandle(handle, dataSource, intervals, version); + return retrieveAllUsedSegmentsForIntervalsWithHandle(handle, dataSource, intervals, versions); } } ); @@ -235,7 +235,7 @@ public List> retrieveUsedSegmentsAndCreatedDates(Strin public List retrieveUnusedSegmentsForInterval( String dataSource, Interval interval, - @Nullable String version, + @Nullable List versions, @Nullable Integer limit, @Nullable DateTime maxUsedStatusLastUpdatedTime ) @@ -247,7 +247,7 @@ public List retrieveUnusedSegmentsForInterval( .retrieveUnusedSegments( dataSource, Collections.singletonList(interval), - version, + versions, limit, null, null, @@ -259,8 +259,8 @@ public List retrieveUnusedSegmentsForInterval( } ); - log.info("Found [%,d] unused segments for datasource[%s] in interval[%s] and version[%s] with maxUsedStatusLastUpdatedTime[%s].", - matchingSegments.size(), dataSource, interval, version, maxUsedStatusLastUpdatedTime); + log.info("Found [%,d] unused segments for datasource[%s] in interval[%s] and versions[%s] with maxUsedStatusLastUpdatedTime[%s].", + matchingSegments.size(), dataSource, interval, versions, maxUsedStatusLastUpdatedTime); return matchingSegments; } @@ -379,12 +379,12 @@ private SegmentTimeline getTimelineForIntervalsWithHandle( final Handle handle, final String dataSource, final List intervals, - @Nullable final String version + @Nullable final List versions ) throws IOException { try (final CloseableIterator iterator = SqlSegmentsMetadataQuery.forHandle(handle, connector, dbTables, jsonMapper) - .retrieveUsedSegments(dataSource, intervals, version)) { + .retrieveUsedSegments(dataSource, intervals, versions)) { return SegmentTimeline.forSegments(iterator); } } @@ -393,12 +393,12 @@ private Collection retrieveAllUsedSegmentsForIntervalsWithHandle( final Handle handle, final String dataSource, final List intervals, - @Nullable final String version + @Nullable final List versions ) throws IOException { try (final CloseableIterator iterator = SqlSegmentsMetadataQuery.forHandle(handle, connector, dbTables, jsonMapper) - .retrieveUsedSegments(dataSource, intervals, version)) { + .retrieveUsedSegments(dataSource, intervals, versions)) { final List retVal = new ArrayList<>(); iterator.forEachRemaining(retVal::add); return retVal; diff --git a/server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataQuery.java b/server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataQuery.java index 09273ffa8cc3..aa57866532aa 100644 --- a/server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataQuery.java +++ b/server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataQuery.java @@ -35,6 +35,7 @@ import org.apache.druid.server.http.DataSegmentPlus; import org.apache.druid.timeline.DataSegment; import org.apache.druid.timeline.SegmentId; +import org.apache.druid.utils.CollectionUtils; import org.joda.time.DateTime; import org.joda.time.Interval; import org.skife.jdbi.v2.Handle; @@ -117,13 +118,13 @@ public static SqlSegmentsMetadataQuery forHandle( public CloseableIterator retrieveUsedSegments( final String dataSource, final Collection intervals, - @Nullable final String version + @Nullable final List versions ) { return retrieveSegments( dataSource, intervals, - version, + versions, IntervalMode.OVERLAPS, true, null, @@ -143,7 +144,7 @@ public CloseableIterator retrieveUsedSegments( * * @param dataSource The name of the datasource * @param intervals The intervals to search over - * @param version An optional version of unused segments to retrieve in the given {@code intervals}. + * @param versions An optional list of unused segment versions to retrieve in the given {@code intervals}. * If unspecified, all versions of unused segments in the {@code intervals} must be retrieved. * @param limit The limit of segments to return * @param lastSegmentId the last segment id from which to search for results. All segments returned are > @@ -162,7 +163,7 @@ public CloseableIterator retrieveUsedSegments( public CloseableIterator retrieveUnusedSegments( final String dataSource, final Collection intervals, - @Nullable final String version, + @Nullable final List versions, @Nullable final Integer limit, @Nullable final String lastSegmentId, @Nullable final SortOrder sortOrder, @@ -172,7 +173,7 @@ public CloseableIterator retrieveUnusedSegments( return retrieveSegments( dataSource, intervals, - version, + versions, IntervalMode.CONTAINS, false, limit, @@ -207,7 +208,7 @@ public CloseableIterator retrieveUnusedSegments( public CloseableIterator retrieveUnusedSegmentsPlus( final String dataSource, final Collection intervals, - @Nullable final String version, + @Nullable final List versions, @Nullable final Integer limit, @Nullable final String lastSegmentId, @Nullable final SortOrder sortOrder, @@ -217,7 +218,7 @@ public CloseableIterator retrieveUnusedSegmentsPlus( return retrieveSegmentsPlus( dataSource, intervals, - version, + versions, IntervalMode.CONTAINS, false, limit, @@ -455,7 +456,7 @@ public static void bindQueryIntervals(final Query> query, fi private CloseableIterator retrieveSegments( final String dataSource, final Collection intervals, - @Nullable final String version, + @Nullable final List versions, final IntervalMode matchMode, final boolean used, @Nullable final Integer limit, @@ -466,7 +467,7 @@ private CloseableIterator retrieveSegments( { if (intervals.isEmpty() || intervals.size() <= MAX_INTERVALS_PER_BATCH) { return CloseableIterators.withEmptyBaggage( - retrieveSegmentsInIntervalsBatch(dataSource, intervals, version, matchMode, used, limit, lastSegmentId, sortOrder, maxUsedStatusLastUpdatedTime) + retrieveSegmentsInIntervalsBatch(dataSource, intervals, versions, matchMode, used, limit, lastSegmentId, sortOrder, maxUsedStatusLastUpdatedTime) ); } else { final List> intervalsLists = Lists.partition(new ArrayList<>(intervals), MAX_INTERVALS_PER_BATCH); @@ -477,7 +478,7 @@ private CloseableIterator retrieveSegments( final UnmodifiableIterator iterator = retrieveSegmentsInIntervalsBatch( dataSource, intervalList, - version, + versions, matchMode, used, limitPerBatch, @@ -505,7 +506,7 @@ private CloseableIterator retrieveSegments( private CloseableIterator retrieveSegmentsPlus( final String dataSource, final Collection intervals, - @Nullable final String version, + @Nullable final List versions, final IntervalMode matchMode, final boolean used, @Nullable final Integer limit, @@ -516,7 +517,7 @@ private CloseableIterator retrieveSegmentsPlus( { if (intervals.isEmpty() || intervals.size() <= MAX_INTERVALS_PER_BATCH) { return CloseableIterators.withEmptyBaggage( - retrieveSegmentsPlusInIntervalsBatch(dataSource, intervals, version, matchMode, used, limit, lastSegmentId, sortOrder, maxUsedStatusLastUpdatedTime) + retrieveSegmentsPlusInIntervalsBatch(dataSource, intervals, versions, matchMode, used, limit, lastSegmentId, sortOrder, maxUsedStatusLastUpdatedTime) ); } else { final List> intervalsLists = Lists.partition(new ArrayList<>(intervals), MAX_INTERVALS_PER_BATCH); @@ -527,7 +528,7 @@ private CloseableIterator retrieveSegmentsPlus( final UnmodifiableIterator iterator = retrieveSegmentsPlusInIntervalsBatch( dataSource, intervalList, - version, + versions, matchMode, used, limitPerBatch, @@ -555,7 +556,7 @@ private CloseableIterator retrieveSegmentsPlus( private UnmodifiableIterator retrieveSegmentsInIntervalsBatch( final String dataSource, final Collection intervals, - @Nullable final String version, + @Nullable final List versions, final IntervalMode matchMode, final boolean used, @Nullable final Integer limit, @@ -567,7 +568,7 @@ private UnmodifiableIterator retrieveSegmentsInIntervalsBatch( final Query> sql = buildSegmentsTableQuery( dataSource, intervals, - version, + versions, matchMode, used, limit, @@ -585,7 +586,7 @@ private UnmodifiableIterator retrieveSegmentsInIntervalsBatch( private UnmodifiableIterator retrieveSegmentsPlusInIntervalsBatch( final String dataSource, final Collection intervals, - @Nullable final String version, + @Nullable final List versions, final IntervalMode matchMode, final boolean used, @Nullable final Integer limit, @@ -598,7 +599,7 @@ private UnmodifiableIterator retrieveSegmentsPlusInIntervalsBat final Query> sql = buildSegmentsTableQuery( dataSource, intervals, - version, + versions, matchMode, used, limit, @@ -616,7 +617,7 @@ private UnmodifiableIterator retrieveSegmentsPlusInIntervalsBat private Query> buildSegmentsTableQuery( final String dataSource, final Collection intervals, - @Nullable final String version, + @Nullable final List versions, final IntervalMode matchMode, final boolean used, @Nullable final Integer limit, @@ -639,8 +640,11 @@ private Query> buildSegmentsTableQuery( appendConditionForIntervalsAndMatchMode(sb, intervals, matchMode, connector); } - if (version != null) { - sb.append(" AND version = :version"); + if (!CollectionUtils.isNullOrEmpty(versions)) { + final String versionsStr = versions.stream() + .map(version -> "'" + version + "'") + .collect(Collectors.joining(",")); + sb.append(StringUtils.format(" AND version IN (%s)", versionsStr)); } // Add the used_status_last_updated time filter only for unused segments when maxUsedStatusLastUpdatedTime is non-null. @@ -674,10 +678,6 @@ private Query> buildSegmentsTableQuery( .bind("used", used) .bind("dataSource", dataSource); - if (version != null) { - sql.bind("version", version); - } - if (addMaxUsedLastUpdatedTimeFilter) { sql.bind("used_status_last_updated", maxUsedStatusLastUpdatedTime.toString()); } diff --git a/server/src/main/java/org/apache/druid/rpc/indexing/OverlordClient.java b/server/src/main/java/org/apache/druid/rpc/indexing/OverlordClient.java index 671b9a197b89..57fab2fff4a8 100644 --- a/server/src/main/java/org/apache/druid/rpc/indexing/OverlordClient.java +++ b/server/src/main/java/org/apache/druid/rpc/indexing/OverlordClient.java @@ -83,6 +83,8 @@ public interface OverlordClient * @param interval Umbrella interval to be considered by the kill task. Note that unused segments falling in this * widened umbrella interval may have different {@code used_status_last_updated} time, so the kill task * should also filter by {@code maxUsedStatusLastUpdatedTime} + * @param versions An optional list of segment versions to kill in the given {@code interval}. If unspecified, all + * versions of segments in the {@code interval} must be killed. * @param maxSegmentsToKill The maximum number of segments to kill * @param maxUsedStatusLastUpdatedTime The maximum {@code used_status_last_updated} time. Any unused segment in {@code interval} * with {@code used_status_last_updated} no later than this time will be included in the @@ -95,7 +97,7 @@ default ListenableFuture runKillTask( String idPrefix, String dataSource, Interval interval, - @Nullable String version, + @Nullable List versions, @Nullable Integer maxSegmentsToKill, @Nullable DateTime maxUsedStatusLastUpdatedTime ) @@ -105,7 +107,7 @@ default ListenableFuture runKillTask( taskId, dataSource, interval, - version, + versions, false, null, maxSegmentsToKill, diff --git a/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java b/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java index d598829782e9..750e90f9436b 100644 --- a/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java +++ b/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java @@ -1133,7 +1133,7 @@ public void testRetrieveUsedSegmentsOnlyVisibleWithVersion() throws IOException coordinator.retrieveUsedSegmentsForIntervals( DS.WIKI, ImmutableList.of(Intervals.of("2023-01-01/2023-01-04")), - usedSegment.getVersion(), + ImmutableList.of(usedSegment.getVersion()), Segments.ONLY_VISIBLE ) ).contains(usedSegment); @@ -1152,7 +1152,7 @@ public void testRetrieveUsedSegmentsOnlyVisibleWithVersion() throws IOException coordinator.retrieveUsedSegmentsForIntervals( DS.WIKI, ImmutableList.of(Intervals.of("2023-01-01/2023-01-04")), - "some-non-existent-version", + ImmutableList.of("some-non-existent-version"), Segments.ONLY_VISIBLE ) ).containsAll(ImmutableList.of()); @@ -1195,7 +1195,7 @@ public void testRetrieveUsedSegmentsIncludingOvershadowedWithVersion() throws IO coordinator.retrieveUsedSegmentsForIntervals( DS.WIKI, ImmutableList.of(Intervals.of("2023-01-01/2023-01-04")), - usedSegment.getVersion(), + ImmutableList.of(usedSegment.getVersion()), Segments.INCLUDING_OVERSHADOWED ) ).contains(usedSegment); @@ -1205,7 +1205,7 @@ public void testRetrieveUsedSegmentsIncludingOvershadowedWithVersion() throws IO coordinator.retrieveUsedSegmentsForIntervals( DS.WIKI, ImmutableList.of(Intervals.of("2023-01-01/2023-01-04")), - segment3.getVersion(), + ImmutableList.of(segment3.getVersion()), Segments.INCLUDING_OVERSHADOWED ) ).containsAll(ImmutableList.of()); @@ -1223,7 +1223,7 @@ public void testRetrieveUsedSegmentsIncludingOvershadowedWithVersion() throws IO coordinator.retrieveUsedSegmentsForIntervals( DS.WIKI, ImmutableList.of(Intervals.of("2023-01-01/2023-01-04")), - "some-non-existent-version", + ImmutableList.of("some-non-existent-version"), Segments.INCLUDING_OVERSHADOWED ) ).containsAll(ImmutableList.of()); @@ -1345,7 +1345,7 @@ public void testRetrieveUnusedSegmentsUsingSingleIntervalVersionAndLimitInRange( final List actualUnusedSegments = coordinator.retrieveUnusedSegmentsForInterval( DS.WIKI, Intervals.of("1900/3000"), - "version", + ImmutableList.of("version"), requestedLimit, null ); @@ -1869,7 +1869,7 @@ public void testRetrieveUnusedSegmentsWithVersion() throws IOException coordinator.retrieveUnusedSegmentsForInterval( DS.WIKI, Intervals.of("2023-01-01/2023-01-04"), - unusedSegment.getVersion(), + ImmutableList.of(unusedSegment.getVersion()), null, null ) @@ -1890,7 +1890,7 @@ public void testRetrieveUnusedSegmentsWithVersion() throws IOException coordinator.retrieveUnusedSegmentsForInterval( DS.WIKI, Intervals.of("2023-01-01/2023-01-04"), - "some-non-existent-version", + ImmutableList.of("some-non-existent-version"), null, null ) diff --git a/server/src/test/java/org/apache/druid/server/coordinator/duty/KillUnusedSegmentsTest.java b/server/src/test/java/org/apache/druid/server/coordinator/duty/KillUnusedSegmentsTest.java index 7be4be97971e..dfac4beeb94b 100644 --- a/server/src/test/java/org/apache/druid/server/coordinator/duty/KillUnusedSegmentsTest.java +++ b/server/src/test/java/org/apache/druid/server/coordinator/duty/KillUnusedSegmentsTest.java @@ -843,7 +843,7 @@ public ListenableFuture runKillTask( String idPrefix, String dataSource, Interval interval, - @Nullable String version, + @Nullable List versions, @Nullable Integer maxSegmentsToKill, @Nullable DateTime maxUsedStatusLastUpdatedTime ) From ee5bf83cd2ad016c3bdfd825cc66abd23bc856cc Mon Sep 17 00:00:00 2001 From: Abhishek Balaji Radhakrishnan Date: Mon, 11 Mar 2024 08:41:35 +0530 Subject: [PATCH 06/15] Tests for multiple versions. --- .../actions/RetrieveSegmentsActionsTest.java | 74 ++++++++----------- ...tKillUnusedSegmentsTaskQuerySerdeTest.java | 2 +- .../task/KillUnusedSegmentsTaskTest.java | 8 +- ...exerSQLMetadataStorageCoordinatorTest.java | 43 +++++++++-- 4 files changed, 69 insertions(+), 58 deletions(-) diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/RetrieveSegmentsActionsTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/RetrieveSegmentsActionsTest.java index 9a3170f50328..b49621ac834a 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/RetrieveSegmentsActionsTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/RetrieveSegmentsActionsTest.java @@ -39,7 +39,6 @@ import java.util.HashSet; import java.util.List; import java.util.Set; -import java.util.stream.Collectors; public class RetrieveSegmentsActionsTest { @@ -118,60 +117,45 @@ public void testRetrieveUsedSegmentsAction() @Test public void testRetrieveUsedSegmentsActionWithVersionVisibleOnly() { - for (final String version : VERSIONS) { - final RetrieveUsedSegmentsAction action = - new RetrieveUsedSegmentsAction( - task.getDataSource(), - null, - ImmutableList.of(INTERVAL), - ImmutableList.of(version), - Segments.ONLY_VISIBLE - ); - final Set actualSegments = new HashSet<>(action.perform(task, actionTestKit.getTaskActionToolbox())); - final Set expectedSegments = expectedUsedSegments.stream() - .filter(d -> d.getVersion().equals(version)) - .collect(Collectors.toSet()); - Assert.assertEquals(expectedSegments, actualSegments); - } + final RetrieveUsedSegmentsAction action = + new RetrieveUsedSegmentsAction( + task.getDataSource(), + null, + ImmutableList.of(INTERVAL), + VERSIONS, + Segments.ONLY_VISIBLE + ); + final Set actualSegments = new HashSet<>(action.perform(task, actionTestKit.getTaskActionToolbox())); + Assert.assertEquals(expectedUsedSegments, actualSegments); } @Test public void testRetrieveUsedSegmentsActionWithVersionIncludingOvershadowed() { - for (final String version : VERSIONS) { - final RetrieveUsedSegmentsAction action = - new RetrieveUsedSegmentsAction( - task.getDataSource(), - null, - ImmutableList.of(INTERVAL), - ImmutableList.of(version), - Segments.INCLUDING_OVERSHADOWED - ); - final Set actualSegments = new HashSet<>(action.perform(task, actionTestKit.getTaskActionToolbox())); - final Set expectedSegments = expectedUsedSegments.stream() - .filter(d -> d.getVersion().equals(version)) - .collect(Collectors.toSet()); - Assert.assertEquals(expectedSegments, actualSegments); - } + final RetrieveUsedSegmentsAction action = + new RetrieveUsedSegmentsAction( + task.getDataSource(), + null, + ImmutableList.of(INTERVAL), + VERSIONS, + Segments.INCLUDING_OVERSHADOWED + ); + final Set actualSegments = new HashSet<>(action.perform(task, actionTestKit.getTaskActionToolbox())); + Assert.assertEquals(expectedUsedSegments, actualSegments); } @Test public void testRetrieveUnusedSegmentsActionWithVersion() { - for (final String version : VERSIONS) { - final RetrieveUnusedSegmentsAction action = new RetrieveUnusedSegmentsAction( - task.getDataSource(), - INTERVAL, - ImmutableList.of(version), - null, - null - ); - final Set actualSegments = new HashSet<>(action.perform(task, actionTestKit.getTaskActionToolbox())); - final Set expectedSegments = expectedUnusedSegments.stream() - .filter(d -> d.getVersion().equals(version)) - .collect(Collectors.toSet()); - Assert.assertEquals(expectedSegments, actualSegments); - } + final RetrieveUnusedSegmentsAction action = new RetrieveUnusedSegmentsAction( + task.getDataSource(), + INTERVAL, + VERSIONS, + null, + null + ); + final Set actualSegments = new HashSet<>(action.perform(task, actionTestKit.getTaskActionToolbox())); + Assert.assertEquals(expectedUnusedSegments, actualSegments); } @Test diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/ClientKillUnusedSegmentsTaskQuerySerdeTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/ClientKillUnusedSegmentsTaskQuerySerdeTest.java index 59f80d4b565f..e6dbd0bad13a 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/ClientKillUnusedSegmentsTaskQuerySerdeTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/ClientKillUnusedSegmentsTaskQuerySerdeTest.java @@ -132,7 +132,7 @@ public void testKillUnusedSegmentsTaskWithNonNullValuesToClientKillUnusedSegment null, "datasource", Intervals.of("2020-01-01/P1D"), - ImmutableList.of(DateTimes.nowUtc().toString()), + ImmutableList.of("v1", "v2"), null, null, 99, diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTaskTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTaskTest.java index 2663d57085ae..ad9a9a580966 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTaskTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTaskTest.java @@ -232,7 +232,7 @@ public void testKillVersion() throws Exception } @Test - public void testKillMultipleSegmentsWithVersion() throws Exception + public void testKillMultipleSegmentsWithVersions() throws Exception { final DateTime version = DateTimes.nowUtc(); final DataSegment segment1 = newSegment(Intervals.of("2019-01-01/2019-02-01"), version.toString()); @@ -258,7 +258,7 @@ public void testKillMultipleSegmentsWithVersion() throws Exception null, DATA_SOURCE, Intervals.of("2018/2020"), - ImmutableList.of(version.toString()), + ImmutableList.of(version.toString(), version.minusHours(2).toString()), null, false, 3, @@ -268,7 +268,7 @@ public void testKillMultipleSegmentsWithVersion() throws Exception Assert.assertEquals(TaskState.SUCCESS, taskRunner.run(task).get().getStatusCode()); Assert.assertEquals( - new KillTaskReport.Stats(3, 2, 0), + new KillTaskReport.Stats(4, 3, 0), getReportedStats() ); @@ -280,7 +280,7 @@ public void testKillMultipleSegmentsWithVersion() throws Exception null ); - Assert.assertEquals(ImmutableSet.of(segment4, segment5), new HashSet<>(actualUnusedSegments)); + Assert.assertEquals(ImmutableSet.of(segment5), new HashSet<>(actualUnusedSegments)); } @Test diff --git a/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java b/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java index 750e90f9436b..a3163a2d535a 100644 --- a/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java +++ b/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java @@ -1109,19 +1109,23 @@ public void testMultiIntervalUsedList() throws IOException public void testRetrieveUsedSegmentsOnlyVisibleWithVersion() throws IOException { final DateTime now = DateTimes.nowUtc(); + final String v1 = now.toString(); + final String v2 = now.plusDays(2).toString(); + final String v3 = now.plusDays(3).toString(); + final DataSegment segment1 = createSegment( Intervals.of("2023-01-01/2023-01-02"), - now.toString(), + v1, new LinearShardSpec(0) ); final DataSegment segment2 = createSegment( Intervals.of("2023-01-02/2023-01-03"), - now.plusDays(2).toString(), + v2, new LinearShardSpec(0) ); final DataSegment segment3 = createSegment( Intervals.of("2023-01-03/2023-01-04"), - now.plusDays(3).toString(), + v3, new LinearShardSpec(0) ); @@ -1139,6 +1143,15 @@ public void testRetrieveUsedSegmentsOnlyVisibleWithVersion() throws IOException ).contains(usedSegment); } + Assertions.assertThat( + coordinator.retrieveUsedSegmentsForIntervals( + DS.WIKI, + ImmutableList.of(Intervals.of("2023-01-01/2023-01-04")), + ImmutableList.of(v1, v2), + Segments.ONLY_VISIBLE + ) + ).contains(segment1, segment2); + Assertions.assertThat( coordinator.retrieveUsedSegmentsForIntervals( DS.WIKI, @@ -1162,14 +1175,19 @@ public void testRetrieveUsedSegmentsOnlyVisibleWithVersion() throws IOException public void testRetrieveUsedSegmentsIncludingOvershadowedWithVersion() throws IOException { final DateTime now = DateTimes.nowUtc(); + final String v1 = now.toString(); + final String v2 = now.plusDays(2).toString(); + final String v3 = now.plusDays(3).toString(); + final String v4 = now.plusDays(4).toString(); + final DataSegment segment1 = createSegment( Intervals.of("2023-01-01/2023-01-02"), - now.toString(), + v1, new LinearShardSpec(0) ); final DataSegment segment2 = createSegment( Intervals.of("2023-01-02/2023-01-03"), - now.plusDays(2).toString(), + v2, new LinearShardSpec(0) ); @@ -1177,12 +1195,12 @@ public void testRetrieveUsedSegmentsIncludingOvershadowedWithVersion() throws IO // as segment4 contains a higher version than segment3. final DataSegment segment3 = createSegment( Intervals.of("2023-01-03/2023-01-04"), - now.plusDays(3).toString(), + v3, new LinearShardSpec(0) ); final DataSegment segment4 = createSegment( Intervals.of("2023-01-03/2023-01-04"), - now.plusDays(4).toString(), + v4, new LinearShardSpec(0) ); @@ -1205,7 +1223,16 @@ public void testRetrieveUsedSegmentsIncludingOvershadowedWithVersion() throws IO coordinator.retrieveUsedSegmentsForIntervals( DS.WIKI, ImmutableList.of(Intervals.of("2023-01-01/2023-01-04")), - ImmutableList.of(segment3.getVersion()), + ImmutableList.of(v1, v2), + Segments.INCLUDING_OVERSHADOWED + ) + ).contains(segment1, segment2); + + Assertions.assertThat( + coordinator.retrieveUsedSegmentsForIntervals( + DS.WIKI, + ImmutableList.of(Intervals.of("2023-01-01/2023-01-04")), + ImmutableList.of(v3), Segments.INCLUDING_OVERSHADOWED ) ).containsAll(ImmutableList.of()); From 8fc45f5abcc2f6ee1e8a8ff420fa22a2a495f649 Mon Sep 17 00:00:00 2001 From: Abhishek Balaji Radhakrishnan Date: Mon, 11 Mar 2024 08:47:18 +0530 Subject: [PATCH 07/15] Update docs --- docs/data-management/delete.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/data-management/delete.md b/docs/data-management/delete.md index d319acc77b6b..c444a2507333 100644 --- a/docs/data-management/delete.md +++ b/docs/data-management/delete.md @@ -95,7 +95,7 @@ The available grammar is: "id": , "dataSource": , "interval" : , - "version" : , + "versions" : , "context": , "batchSize": , "limit": , @@ -107,7 +107,7 @@ Some of the parameters used in the task payload are further explained below: | Parameter | Default | Explanation | |-------------|-----------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `version` | null (all versions) | Version of segments in the interval for the kill task to delete. The default behavior is to delete all unused segment versions in the specified `interval`.| +| `versions` | null (all versions) | List of segment versions within the specified `interval` for the kill task to delete. The default behavior is to delete all unused segment versions in the specified `interval`.| | `batchSize` |100 | Maximum number of segments that are deleted in one kill batch. Some operations on the Overlord may get stuck while a `kill` task is in progress due to concurrency constraints (such as in `TaskLockbox`). Thus, a `kill` task splits the list of unused segments to be deleted into smaller batches to yield the Overlord resources intermittently to other task operations.| | `limit` | null (no limit) | Maximum number of segments for the kill task to delete.| | `maxUsedStatusLastUpdatedTime` | null (no cutoff) | Maximum timestamp used as a cutoff to include unused segments. The kill task only considers segments which lie in the specified `interval` and were marked as unused no later than this time. The default behavior is to kill all unused segments in the `interval` regardless of when they where marked as unused.| From 7438099b6e332c053d5f0aa8f8b5d6ec42087380 Mon Sep 17 00:00:00 2001 From: Abhishek Balaji Radhakrishnan Date: Mon, 11 Mar 2024 09:00:46 +0530 Subject: [PATCH 08/15] Cleanup --- .../actions/RetrieveSegmentsActionsTest.java | 34 +++++++++---------- ...exerSQLMetadataStorageCoordinatorTest.java | 29 ++++++++++++---- 2 files changed, 38 insertions(+), 25 deletions(-) diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/RetrieveSegmentsActionsTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/RetrieveSegmentsActionsTest.java index b49621ac834a..9420caf9d220 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/RetrieveSegmentsActionsTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/RetrieveSegmentsActionsTest.java @@ -115,31 +115,29 @@ public void testRetrieveUsedSegmentsAction() } @Test - public void testRetrieveUsedSegmentsActionWithVersionVisibleOnly() + public void testRetrieveVisibleOnlyUsedSegmentsWithVersions() { - final RetrieveUsedSegmentsAction action = - new RetrieveUsedSegmentsAction( - task.getDataSource(), - null, - ImmutableList.of(INTERVAL), - VERSIONS, - Segments.ONLY_VISIBLE - ); + final RetrieveUsedSegmentsAction action = new RetrieveUsedSegmentsAction( + task.getDataSource(), + null, + ImmutableList.of(INTERVAL), + VERSIONS, + Segments.ONLY_VISIBLE + ); final Set actualSegments = new HashSet<>(action.perform(task, actionTestKit.getTaskActionToolbox())); Assert.assertEquals(expectedUsedSegments, actualSegments); } @Test - public void testRetrieveUsedSegmentsActionWithVersionIncludingOvershadowed() + public void testRetrieveIncludingOvershadowedUsedSegmentsWithVersions() { - final RetrieveUsedSegmentsAction action = - new RetrieveUsedSegmentsAction( - task.getDataSource(), - null, - ImmutableList.of(INTERVAL), - VERSIONS, - Segments.INCLUDING_OVERSHADOWED - ); + final RetrieveUsedSegmentsAction action = new RetrieveUsedSegmentsAction( + task.getDataSource(), + null, + ImmutableList.of(INTERVAL), + VERSIONS, + Segments.INCLUDING_OVERSHADOWED + ); final Set actualSegments = new HashSet<>(action.perform(task, actionTestKit.getTaskActionToolbox())); Assert.assertEquals(expectedUsedSegments, actualSegments); } diff --git a/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java b/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java index a3163a2d535a..53e90bbae88f 100644 --- a/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java +++ b/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java @@ -1106,7 +1106,7 @@ public void testMultiIntervalUsedList() throws IOException } @Test - public void testRetrieveUsedSegmentsOnlyVisibleWithVersion() throws IOException + public void testRetrieveVisibleOnlyUsedSegmentsWithVersions() throws IOException { final DateTime now = DateTimes.nowUtc(); final String v1 = now.toString(); @@ -1172,7 +1172,7 @@ public void testRetrieveUsedSegmentsOnlyVisibleWithVersion() throws IOException } @Test - public void testRetrieveUsedSegmentsIncludingOvershadowedWithVersion() throws IOException + public void testRetrieveIncludingOvershadowedUsedSegmentsWithVersions() throws IOException { final DateTime now = DateTimes.nowUtc(); final String v1 = now.toString(); @@ -1863,27 +1863,32 @@ public void testSimpleUnusedList() throws IOException } @Test - public void testRetrieveUnusedSegmentsWithVersion() throws IOException + public void testRetrieveUnusedSegmentsWithVersions() throws IOException { final DateTime now = DateTimes.nowUtc(); + final String v1 = now.toString(); + final String v2 = now.plusDays(2).toString(); + final String v3 = now.plusDays(3).toString(); + final String v4 = now.plusDays(4).toString(); + final DataSegment segment1 = createSegment( Intervals.of("2023-01-01/2023-01-02"), - now.toString(), + v1, new LinearShardSpec(0) ); final DataSegment segment2 = createSegment( Intervals.of("2023-01-02/2023-01-03"), - now.plusDays(2).toString(), + v2, new LinearShardSpec(0) ); final DataSegment segment3 = createSegment( Intervals.of("2023-01-03/2023-01-04"), - now.plusDays(3).toString(), + v3, new LinearShardSpec(0) ); final DataSegment segment4 = createSegment( Intervals.of("2023-01-03/2023-01-04"), - now.plusDays(4).toString(), + v4, new LinearShardSpec(0) ); @@ -1903,6 +1908,16 @@ public void testRetrieveUnusedSegmentsWithVersion() throws IOException ).contains(unusedSegment); } + Assertions.assertThat( + coordinator.retrieveUnusedSegmentsForInterval( + DS.WIKI, + Intervals.of("2023-01-01/2023-01-04"), + ImmutableList.of(v1, v2), + null, + null + ) + ).contains(segment1, segment2); + Assertions.assertThat( coordinator.retrieveUnusedSegmentsForInterval( DS.WIKI, From f5f10856eb71c7bb87d07231656ee9a9ef972446 Mon Sep 17 00:00:00 2001 From: Abhishek Balaji Radhakrishnan Date: Mon, 11 Mar 2024 13:57:20 +0530 Subject: [PATCH 09/15] Address review comments. Retain the old interface method and make it default and route it to the method with nullable versions variant. Update usages to use the default method where versions doesn't matter. --- .../common/task/KillUnusedSegmentsTask.java | 15 +++++++----- .../indexing/overlord/TaskLifecycleTest.java | 2 -- ...TestIndexerMetadataStorageCoordinator.java | 13 ++++++++++- .../IndexerMetadataStorageCoordinator.java | 23 +++++++++++++++++++ ...exerSQLMetadataStorageCoordinatorTest.java | 18 --------------- 5 files changed, 44 insertions(+), 27 deletions(-) diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java index 4eec2f19fa3d..b9207bccd24c 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java @@ -87,7 +87,8 @@ public class KillUnusedSegmentsTask extends AbstractFixedIntervalTask /** * The version of segments to delete in this {@link #getInterval()}. */ - @Nullable private final List versions; + @Nullable + private final List versions; @Deprecated private final boolean markAsUnused; @@ -135,11 +136,13 @@ public KillUnusedSegmentsTask( if (limit != null && limit <= 0) { throw InvalidInput.exception("limit[%d] must be a positive integer.", limit); } - if (limit != null && Boolean.TRUE.equals(markAsUnused)) { - throw InvalidInput.exception("limit[%d] cannot be provided when markAsUnused is enabled.", limit); - } - if (!CollectionUtils.isNullOrEmpty(versions) && Boolean.TRUE.equals(markAsUnused)) { - throw InvalidInput.exception("versions[%s] cannot be provided when markAsUnused is enabled.", versions); + if (Boolean.TRUE.equals(markAsUnused)) { + if (limit != null) { + throw InvalidInput.exception("limit[%d] cannot be provided when markAsUnused is enabled.", limit); + } + if (!CollectionUtils.isNullOrEmpty(versions)) { + throw InvalidInput.exception("versions[%s] cannot be provided when markAsUnused is enabled.", versions); + } } this.versions = versions; this.limit = limit; diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskLifecycleTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskLifecycleTest.java index f7ba8052211c..852ea0df02dc 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskLifecycleTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskLifecycleTest.java @@ -909,7 +909,6 @@ public DataSegment apply(String input) "test_kill_task", Intervals.of("2011-04-01/P4D"), null, - null, null ); for (DataSegment segment : unusedSegments) { @@ -1008,7 +1007,6 @@ public DataSegment apply(String input) "test_kill_task", Intervals.of("2011-04-01/P4D"), null, - null, null ); for (DataSegment segment : unusedSegments) { diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/test/TestIndexerMetadataStorageCoordinator.java b/indexing-service/src/test/java/org/apache/druid/indexing/test/TestIndexerMetadataStorageCoordinator.java index af2637943154..827f798eb7f4 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/test/TestIndexerMetadataStorageCoordinator.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/test/TestIndexerMetadataStorageCoordinator.java @@ -109,7 +109,6 @@ public List retrieveUsedSegmentsForIntervals( public List retrieveUnusedSegmentsForInterval( String dataSource, Interval interval, - @Nullable List versions, @Nullable Integer limit, @Nullable DateTime maxUsedStatusLastUpdatedTime ) @@ -124,6 +123,18 @@ public List retrieveUnusedSegmentsForInterval( } } + @Override + public List retrieveUnusedSegmentsForInterval( + String dataSource, + Interval interval, + @Nullable List versions, + @Nullable Integer limit, + @Nullable DateTime maxUsedStatusLastUpdatedTime + ) + { + return ImmutableList.of(); + } + @Override public int markSegmentsAsUnusedWithinInterval(String dataSource, Interval interval) { diff --git a/server/src/main/java/org/apache/druid/indexing/overlord/IndexerMetadataStorageCoordinator.java b/server/src/main/java/org/apache/druid/indexing/overlord/IndexerMetadataStorageCoordinator.java index 7e0cdcbf0605..6ef8eea2af0b 100644 --- a/server/src/main/java/org/apache/druid/indexing/overlord/IndexerMetadataStorageCoordinator.java +++ b/server/src/main/java/org/apache/druid/indexing/overlord/IndexerMetadataStorageCoordinator.java @@ -131,6 +131,29 @@ Collection retrieveUsedSegmentsForIntervals( Segments visibility ); + /** + * Retrieve all published segments which include ONLY data within the given interval and are marked as unused from the + * metadata store. + * + * @param dataSource The data source the segments belong to + * @param interval Filter the data segments to ones that include data in this interval exclusively. + * @param limit The maximum number of unused segments to retreive. If null, no limit is applied. + * @param maxUsedStatusLastUpdatedTime The maximum {@code used_status_last_updated} time. Any unused segment in {@code interval} + * with {@code used_status_last_updated} no later than this time will be included in the + * kill task. Segments without {@code used_status_last_updated} time (due to an upgrade + * from legacy Druid) will have {@code maxUsedStatusLastUpdatedTime} ignored + * @return DataSegments which include ONLY data within the requested interval and are marked as unused. Segments NOT + * returned here may include data in the interval + */ + default List retrieveUnusedSegmentsForInterval( + String dataSource, + Interval interval, + @Nullable Integer limit, + @Nullable DateTime maxUsedStatusLastUpdatedTime + ) { + return retrieveUnusedSegmentsForInterval(dataSource, interval, null, limit, maxUsedStatusLastUpdatedTime); + } + /** * Retrieve all published segments which include ONLY data within the given interval and are marked as unused from the * metadata store. diff --git a/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java b/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java index 53e90bbae88f..66965406ff5c 100644 --- a/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java +++ b/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java @@ -1316,7 +1316,6 @@ public void testRetrieveUnusedSegmentsUsingSingleIntervalAndNoLimit() throws IOE DS.WIKI, Intervals.of("1900/3000"), null, - null, null ); @@ -1334,7 +1333,6 @@ public void testRetrieveUnusedSegmentsUsingSingleIntervalAndLimitAtRange() throw final List actualUnusedSegments = coordinator.retrieveUnusedSegmentsForInterval( DS.WIKI, Intervals.of("1900/3000"), - null, requestedLimit, null ); @@ -1353,7 +1351,6 @@ public void testRetrieveUnusedSegmentsUsingSingleIntervalAndLimitInRange() throw final List actualUnusedSegments = coordinator.retrieveUnusedSegmentsForInterval( DS.WIKI, Intervals.of("1900/3000"), - null, requestedLimit, null ); @@ -1391,7 +1388,6 @@ public void testRetrieveUnusedSegmentsUsingSingleIntervalAndLimitOutOfRange() th final List actualUnusedSegments = coordinator.retrieveUnusedSegmentsForInterval( DS.WIKI, Intervals.of("1900/3000"), - null, limit, null ); @@ -1413,7 +1409,6 @@ public void testRetrieveUnusedSegmentsUsingSingleIntervalOutOfRange() throws IOE final List actualUnusedSegments = coordinator.retrieveUnusedSegmentsForInterval( DS.WIKI, outOfRangeInterval, - null, limit, null ); @@ -1855,7 +1850,6 @@ public void testSimpleUnusedList() throws IOException defaultSegment.getDataSource(), defaultSegment.getInterval(), null, - null, null ) ) @@ -2065,7 +2059,6 @@ public void testUnusedOverlapLow() throws IOException defaultSegment.getInterval().getStart().plus(1) ), null, - null, null ).isEmpty() ); @@ -2081,7 +2074,6 @@ public void testUnusedUnderlapLow() throws IOException defaultSegment.getDataSource(), new Interval(defaultSegment.getInterval().getStart().plus(1), defaultSegment.getInterval().getEnd()), null, - null, null ).isEmpty() ); @@ -2098,7 +2090,6 @@ public void testUnusedUnderlapHigh() throws IOException defaultSegment.getDataSource(), new Interval(defaultSegment.getInterval().getStart(), defaultSegment.getInterval().getEnd().minus(1)), null, - null, null ).isEmpty() ); @@ -2114,7 +2105,6 @@ public void testUnusedOverlapHigh() throws IOException defaultSegment.getDataSource(), defaultSegment.getInterval().withStart(defaultSegment.getInterval().getEnd().minus(1)), null, - null, null ).isEmpty() ); @@ -2132,7 +2122,6 @@ public void testUnusedBigOverlap() throws IOException defaultSegment.getDataSource(), Intervals.of("2000/2999"), null, - null, null ) ) @@ -2151,7 +2140,6 @@ public void testUnusedLowRange() throws IOException defaultSegment.getDataSource(), defaultSegment.getInterval().withStart(defaultSegment.getInterval().getStart().minus(1)), null, - null, null ) ) @@ -2163,7 +2151,6 @@ public void testUnusedLowRange() throws IOException defaultSegment.getDataSource(), defaultSegment.getInterval().withStart(defaultSegment.getInterval().getStart().minusYears(1)), null, - null, null ) ) @@ -2182,7 +2169,6 @@ public void testUnusedHighRange() throws IOException defaultSegment.getDataSource(), defaultSegment.getInterval().withEnd(defaultSegment.getInterval().getEnd().plus(1)), null, - null, null ) ) @@ -2194,7 +2180,6 @@ public void testUnusedHighRange() throws IOException defaultSegment.getDataSource(), defaultSegment.getInterval().withEnd(defaultSegment.getInterval().getEnd().plusYears(1)), null, - null, null ) ) @@ -3450,7 +3435,6 @@ public void testMarkSegmentsAsUnusedWithinIntervalOneYear() throws IOException existingSegment2.getDataSource(), existingSegment2.getInterval().withEnd(existingSegment2.getInterval().getEnd().plusYears(1)), null, - null, null ) ) @@ -3476,7 +3460,6 @@ public void testMarkSegmentsAsUnusedWithinIntervalTwoYears() throws IOException existingSegment1.getDataSource(), existingSegment1.getInterval().withEnd(existingSegment1.getInterval().getEnd().plus(1)), null, - null, null ) ) @@ -3488,7 +3471,6 @@ public void testMarkSegmentsAsUnusedWithinIntervalTwoYears() throws IOException existingSegment2.getDataSource(), existingSegment2.getInterval().withEnd(existingSegment2.getInterval().getEnd().plusYears(1)), null, - null, null ) ) From 9d227a87201c6d55dbcf527b8139de08b8771b1e Mon Sep 17 00:00:00 2001 From: Abhishek Balaji Radhakrishnan Date: Mon, 11 Mar 2024 15:07:41 +0530 Subject: [PATCH 10/15] Remove versions from retreive used segments action. --- ...tadataStoreBasedUsedSegmentsRetriever.java | 2 +- .../actions/RetrieveUsedSegmentsAction.java | 35 ++-- .../common/task/KillUnusedSegmentsTask.java | 1 - ...rlordActionBasedUsedSegmentsRetriever.java | 2 +- .../actions/RetrieveSegmentsActionsTest.java | 28 ---- .../RetrieveUsedSegmentsActionSerdeTest.java | 21 +-- .../common/task/CompactionTaskRunTest.java | 13 +- ...stractParallelIndexSupervisorTaskTest.java | 2 +- .../ConcurrentReplaceAndAppendTest.java | 1 - ...TestIndexerMetadataStorageCoordinator.java | 1 - .../IndexerMetadataStorageCoordinator.java | 8 +- .../IndexerSQLMetadataStorageCoordinator.java | 25 ++- .../metadata/SqlSegmentsMetadataManager.java | 4 +- .../metadata/SqlSegmentsMetadataQuery.java | 5 +- .../druid/server/http/MetadataResource.java | 2 +- ...exerSQLMetadataStorageCoordinatorTest.java | 158 ------------------ 16 files changed, 38 insertions(+), 270 deletions(-) diff --git a/indexing-hadoop/src/main/java/org/apache/druid/indexer/path/MetadataStoreBasedUsedSegmentsRetriever.java b/indexing-hadoop/src/main/java/org/apache/druid/indexer/path/MetadataStoreBasedUsedSegmentsRetriever.java index df9e3297ee57..e1afb4cb6b62 100644 --- a/indexing-hadoop/src/main/java/org/apache/druid/indexer/path/MetadataStoreBasedUsedSegmentsRetriever.java +++ b/indexing-hadoop/src/main/java/org/apache/druid/indexer/path/MetadataStoreBasedUsedSegmentsRetriever.java @@ -51,6 +51,6 @@ public Collection retrieveUsedSegmentsForIntervals( Segments visibility ) { - return indexerMetadataStorageCoordinator.retrieveUsedSegmentsForIntervals(dataSource, intervals, null, visibility); + return indexerMetadataStorageCoordinator.retrieveUsedSegmentsForIntervals(dataSource, intervals, visibility); } } diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/RetrieveUsedSegmentsAction.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/RetrieveUsedSegmentsAction.java index 3269d317fe0a..a3dd3ea5c3ab 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/RetrieveUsedSegmentsAction.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/RetrieveUsedSegmentsAction.java @@ -73,9 +73,6 @@ public class RetrieveUsedSegmentsAction implements TaskAction intervals; - @JsonIgnore - private final List versions; - @JsonIgnore private final Segments visibility; @@ -84,7 +81,6 @@ public RetrieveUsedSegmentsAction( @JsonProperty("dataSource") String dataSource, @Deprecated @JsonProperty("interval") Interval interval, @JsonProperty("intervals") Collection intervals, - @JsonProperty("versions") @Nullable List versions, // When JSON object is deserialized, this parameter is optional for backward compatibility. // Otherwise, it shouldn't be considered optional. @JsonProperty("visibility") @Nullable Segments visibility @@ -104,14 +100,14 @@ public RetrieveUsedSegmentsAction( theIntervals = JodaUtils.condenseIntervals(intervals); } this.intervals = Preconditions.checkNotNull(theIntervals, "no intervals found"); - this.versions = versions; + // Defaulting to the former behaviour when visibility wasn't explicitly specified for backward compatibility this.visibility = visibility != null ? visibility : Segments.ONLY_VISIBLE; } public RetrieveUsedSegmentsAction(String dataSource, Collection intervals) { - this(dataSource, null, intervals, null, Segments.ONLY_VISIBLE); + this(dataSource, null, intervals, Segments.ONLY_VISIBLE); } @JsonProperty @@ -126,13 +122,6 @@ public List getIntervals() return intervals; } - @Nullable - @JsonProperty - public List getVersions() - { - return versions; - } - @JsonProperty public Segments getVisibility() { @@ -218,7 +207,7 @@ public Collection perform(Task task, TaskActionToolbox toolbox) private Collection retrieveUsedSegments(TaskActionToolbox toolbox) { return toolbox.getIndexerMetadataStorageCoordinator() - .retrieveUsedSegmentsForIntervals(dataSource, intervals, versions, visibility); + .retrieveUsedSegmentsForIntervals(dataSource, intervals, visibility); } @Override @@ -236,17 +225,22 @@ public boolean equals(Object o) if (o == null || getClass() != o.getClass()) { return false; } + RetrieveUsedSegmentsAction that = (RetrieveUsedSegmentsAction) o; - return Objects.equals(dataSource, that.dataSource) - && Objects.equals(intervals, that.intervals) - && Objects.equals(versions, that.versions) - && visibility == that.visibility; + + if (!dataSource.equals(that.dataSource)) { + return false; + } + if (!intervals.equals(that.intervals)) { + return false; + } + return visibility.equals(that.visibility); } @Override public int hashCode() { - return Objects.hash(dataSource, intervals, versions, visibility); + return Objects.hash(dataSource, intervals, visibility); } @Override @@ -255,8 +249,7 @@ public String toString() return getClass().getSimpleName() + "{" + "dataSource='" + dataSource + '\'' + ", intervals=" + intervals + - ", versions=" + versions + ", visibility=" + visibility + '}'; } -} +} \ No newline at end of file diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java index b9207bccd24c..dbefb7cca65a 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java @@ -239,7 +239,6 @@ public TaskStatus runTask(TaskToolbox toolbox) throws Exception getDataSource(), null, ImmutableList.of(getInterval()), - getVersions(), Segments.INCLUDING_OVERSHADOWED ); // Fetch the load specs of all segments overlapping with the unused segment intervals diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/hadoop/OverlordActionBasedUsedSegmentsRetriever.java b/indexing-service/src/main/java/org/apache/druid/indexing/hadoop/OverlordActionBasedUsedSegmentsRetriever.java index 3846eb10a19b..73bc411fb01d 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/hadoop/OverlordActionBasedUsedSegmentsRetriever.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/hadoop/OverlordActionBasedUsedSegmentsRetriever.java @@ -53,6 +53,6 @@ public Collection retrieveUsedSegmentsForIntervals( { return toolbox .getTaskActionClient() - .submit(new RetrieveUsedSegmentsAction(dataSource, null, intervals, null, visibility)); + .submit(new RetrieveUsedSegmentsAction(dataSource, null, intervals, visibility)); } } diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/RetrieveSegmentsActionsTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/RetrieveSegmentsActionsTest.java index 9420caf9d220..9225d0b14514 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/RetrieveSegmentsActionsTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/RetrieveSegmentsActionsTest.java @@ -114,34 +114,6 @@ public void testRetrieveUsedSegmentsAction() Assert.assertEquals(expectedUsedSegments, actualSegments); } - @Test - public void testRetrieveVisibleOnlyUsedSegmentsWithVersions() - { - final RetrieveUsedSegmentsAction action = new RetrieveUsedSegmentsAction( - task.getDataSource(), - null, - ImmutableList.of(INTERVAL), - VERSIONS, - Segments.ONLY_VISIBLE - ); - final Set actualSegments = new HashSet<>(action.perform(task, actionTestKit.getTaskActionToolbox())); - Assert.assertEquals(expectedUsedSegments, actualSegments); - } - - @Test - public void testRetrieveIncludingOvershadowedUsedSegmentsWithVersions() - { - final RetrieveUsedSegmentsAction action = new RetrieveUsedSegmentsAction( - task.getDataSource(), - null, - ImmutableList.of(INTERVAL), - VERSIONS, - Segments.INCLUDING_OVERSHADOWED - ); - final Set actualSegments = new HashSet<>(action.perform(task, actionTestKit.getTaskActionToolbox())); - Assert.assertEquals(expectedUsedSegments, actualSegments); - } - @Test public void testRetrieveUnusedSegmentsActionWithVersion() { diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/RetrieveUsedSegmentsActionSerdeTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/RetrieveUsedSegmentsActionSerdeTest.java index f6aea3bf25d9..3e7859cd01f5 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/RetrieveUsedSegmentsActionSerdeTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/RetrieveUsedSegmentsActionSerdeTest.java @@ -43,7 +43,7 @@ public void testSingleIntervalSerde() throws Exception Interval interval = Intervals.of("2014/2015"); RetrieveUsedSegmentsAction expected = - new RetrieveUsedSegmentsAction("dataSource", interval, null, null, Segments.ONLY_VISIBLE); + new RetrieveUsedSegmentsAction("dataSource", interval, null, Segments.ONLY_VISIBLE); RetrieveUsedSegmentsAction actual = MAPPER.readValue(MAPPER.writeValueAsString(expected), RetrieveUsedSegmentsAction.class); @@ -66,23 +66,6 @@ public void testMultiIntervalSerde() throws Exception Assert.assertEquals(expected, actual); } - @Test - public void testMultiIntervalWithVersionSerde() throws Exception - { - List intervals = ImmutableList.of(Intervals.of("2014/2015"), Intervals.of("2016/2017")); - RetrieveUsedSegmentsAction expected = new RetrieveUsedSegmentsAction( - "dataSource", - null, - intervals, - ImmutableList.of(DateTimes.nowUtc().toString()), - Segments.ONLY_VISIBLE - ); - - RetrieveUsedSegmentsAction actual = - MAPPER.readValue(MAPPER.writeValueAsString(expected), RetrieveUsedSegmentsAction.class); - Assert.assertEquals(expected, actual); - } - @Test public void testOldJsonDeserialization() throws Exception { @@ -90,7 +73,7 @@ public void testOldJsonDeserialization() throws Exception RetrieveUsedSegmentsAction actual = (RetrieveUsedSegmentsAction) MAPPER.readValue(jsonStr, TaskAction.class); Assert.assertEquals( - new RetrieveUsedSegmentsAction("test", Intervals.of("2014/2015"), null, null, Segments.ONLY_VISIBLE), + new RetrieveUsedSegmentsAction("test", Intervals.of("2014/2015"), null, Segments.ONLY_VISIBLE), actual ); } diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskRunTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskRunTest.java index 32e260d918d2..03994e35e518 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskRunTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskRunTest.java @@ -196,7 +196,7 @@ public ListenableFuture> fetchUsedSegments( { return Futures.immediateFuture( ImmutableList.copyOf( - getStorageCoordinator().retrieveUsedSegmentsForIntervals(dataSource, intervals, null, Segments.ONLY_VISIBLE) + getStorageCoordinator().retrieveUsedSegmentsForIntervals(dataSource, intervals, Segments.ONLY_VISIBLE) ) ); } @@ -1119,7 +1119,6 @@ public void testCompactThenAppend() throws Exception getStorageCoordinator().retrieveUsedSegmentsForIntervals( DATA_SOURCE, Collections.singletonList(Intervals.of("2014-01-01/2014-01-02")), - null, Segments.ONLY_VISIBLE ) ); @@ -1186,7 +1185,6 @@ public void testPartialIntervalCompactWithFinerSegmentGranularityThenFullInterva getStorageCoordinator().retrieveUsedSegmentsForIntervals( DATA_SOURCE, Collections.singletonList(Intervals.of("2014-01-01T00:00:00/2014-01-01T01:00:00")), - null, Segments.ONLY_VISIBLE ) ); @@ -1195,7 +1193,6 @@ public void testPartialIntervalCompactWithFinerSegmentGranularityThenFullInterva getStorageCoordinator().retrieveUsedSegmentsForIntervals( DATA_SOURCE, Collections.singletonList(Intervals.of("2014-01-01T02:00:00/2014-01-01T03:00:00")), - null, Segments.ONLY_VISIBLE ) ); @@ -1208,7 +1205,6 @@ public void testPartialIntervalCompactWithFinerSegmentGranularityThenFullInterva getStorageCoordinator().retrieveUsedSegmentsForIntervals( DATA_SOURCE, Collections.singletonList(Intervals.of("2014-01-01/2014-01-02")), - null, Segments.ONLY_VISIBLE ) ); @@ -1250,7 +1246,6 @@ public void testPartialIntervalCompactWithFinerSegmentGranularityThenFullInterva getStorageCoordinator().retrieveUsedSegmentsForIntervals( DATA_SOURCE, Collections.singletonList(Intervals.of("2014-01-01/2014-01-02")), - null, Segments.ONLY_VISIBLE ) ); @@ -1346,7 +1341,6 @@ public void testCompactDatasourceOverIntervalWithOnlyTombstones() throws Excepti getStorageCoordinator().retrieveUsedSegmentsForIntervals( DATA_SOURCE, Collections.singletonList(Intervals.of("2014-01-01T00:00:00/2014-01-01T01:00:00")), - null, Segments.ONLY_VISIBLE ) ); @@ -1355,7 +1349,6 @@ public void testCompactDatasourceOverIntervalWithOnlyTombstones() throws Excepti getStorageCoordinator().retrieveUsedSegmentsForIntervals( DATA_SOURCE, Collections.singletonList(Intervals.of("2014-01-01T02:00:00/2014-01-01T03:00:00")), - null, Segments.ONLY_VISIBLE ) ); @@ -1368,7 +1361,6 @@ public void testCompactDatasourceOverIntervalWithOnlyTombstones() throws Excepti getStorageCoordinator().retrieveUsedSegmentsForIntervals( DATA_SOURCE, Collections.singletonList(Intervals.of("2014-01-01/2014-01-02")), - null, Segments.ONLY_VISIBLE ) ); @@ -1418,7 +1410,6 @@ public void testPartialIntervalCompactWithFinerSegmentGranularityThenFullInterva getStorageCoordinator().retrieveUsedSegmentsForIntervals( DATA_SOURCE, Collections.singletonList(Intervals.of("2014-01-01/2014-01-02")), - null, Segments.ONLY_VISIBLE ) ); @@ -1445,7 +1436,6 @@ public void testPartialIntervalCompactWithFinerSegmentGranularityThenFullInterva getStorageCoordinator().retrieveUsedSegmentsForIntervals( DATA_SOURCE, Collections.singletonList(Intervals.of("2014-01-01/2014-01-02")), - null, Segments.ONLY_VISIBLE ) ); @@ -1465,7 +1455,6 @@ public void testPartialIntervalCompactWithFinerSegmentGranularityThenFullInterva getStorageCoordinator().retrieveUsedSegmentsForIntervals( DATA_SOURCE, Collections.singletonList(Intervals.of("2014-01-01/2014-01-02")), - null, Segments.ONLY_VISIBLE ) ); diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/AbstractParallelIndexSupervisorTaskTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/AbstractParallelIndexSupervisorTaskTest.java index 420c93f13d71..f438b7bba844 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/AbstractParallelIndexSupervisorTaskTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/AbstractParallelIndexSupervisorTaskTest.java @@ -1042,7 +1042,7 @@ public ListenableFuture> fetchUsedSegments( { return exec.submit( () -> ImmutableList.copyOf( - getStorageCoordinator().retrieveUsedSegmentsForIntervals(dataSource, intervals, null, Segments.ONLY_VISIBLE) + getStorageCoordinator().retrieveUsedSegmentsForIntervals(dataSource, intervals, Segments.ONLY_VISIBLE) ) ); } diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/concurrent/ConcurrentReplaceAndAppendTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/concurrent/ConcurrentReplaceAndAppendTest.java index a24c7245aa81..91c7d6a71755 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/concurrent/ConcurrentReplaceAndAppendTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/concurrent/ConcurrentReplaceAndAppendTest.java @@ -977,7 +977,6 @@ private void verifySegments(Interval interval, Segments visibility, DataSegment. WIKI, null, ImmutableList.of(interval), - null, visibility ) ); diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/test/TestIndexerMetadataStorageCoordinator.java b/indexing-service/src/test/java/org/apache/druid/indexing/test/TestIndexerMetadataStorageCoordinator.java index 827f798eb7f4..37915c37114a 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/test/TestIndexerMetadataStorageCoordinator.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/test/TestIndexerMetadataStorageCoordinator.java @@ -98,7 +98,6 @@ public List> retrieveUsedSegmentsAndCreatedDates(Strin public List retrieveUsedSegmentsForIntervals( String dataSource, List intervals, - @Nullable List versions, Segments visibility ) { diff --git a/server/src/main/java/org/apache/druid/indexing/overlord/IndexerMetadataStorageCoordinator.java b/server/src/main/java/org/apache/druid/indexing/overlord/IndexerMetadataStorageCoordinator.java index 6ef8eea2af0b..72abff93df7a 100644 --- a/server/src/main/java/org/apache/druid/indexing/overlord/IndexerMetadataStorageCoordinator.java +++ b/server/src/main/java/org/apache/druid/indexing/overlord/IndexerMetadataStorageCoordinator.java @@ -68,7 +68,7 @@ default Collection retrieveUsedSegmentsForInterval( Segments visibility ) { - return retrieveUsedSegmentsForIntervals(dataSource, Collections.singletonList(interval), null, visibility); + return retrieveUsedSegmentsForIntervals(dataSource, Collections.singletonList(interval), visibility); } /** @@ -109,8 +109,6 @@ default Collection retrieveUsedSegmentsForInterval( * * @param dataSource The data source to query * @param intervals The intervals for which all applicable and used segments are requested. - * @param versions An optional list of segment versions to retrieve in the given {@code intervals}. If unspecified, - * all versions of used segments in the {@code intervals} must be retrieved. * @param visibility Whether only visible or visible as well as overshadowed segments should be returned. The * visibility is considered within the specified intervals: that is, a segment which is visible * outside of the specified intervals, but overshadowed on the specified intervals will not be @@ -127,7 +125,6 @@ default Collection retrieveUsedSegmentsForInterval( Collection retrieveUsedSegmentsForIntervals( String dataSource, List intervals, - @Nullable List versions, Segments visibility ); @@ -150,7 +147,8 @@ default List retrieveUnusedSegmentsForInterval( Interval interval, @Nullable Integer limit, @Nullable DateTime maxUsedStatusLastUpdatedTime - ) { + ) + { return retrieveUnusedSegmentsForInterval(dataSource, interval, null, limit, maxUsedStatusLastUpdatedTime); } diff --git a/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java b/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java index e9140a7db085..c886af6a6d93 100644 --- a/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java +++ b/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java @@ -136,20 +136,19 @@ public void start() public Collection retrieveUsedSegmentsForIntervals( final String dataSource, final List intervals, - @Nullable final List versions, final Segments visibility ) { if (intervals == null || intervals.isEmpty()) { throw new IAE("null/empty intervals"); } - return doRetrieveUsedSegments(dataSource, intervals, versions, visibility); + return doRetrieveUsedSegments(dataSource, intervals, visibility); } @Override public Collection retrieveAllUsedSegments(String dataSource, Segments visibility) { - return doRetrieveUsedSegments(dataSource, Collections.emptyList(), null, visibility); + return doRetrieveUsedSegments(dataSource, Collections.emptyList(), visibility); } /** @@ -158,7 +157,6 @@ public Collection retrieveAllUsedSegments(String dataSource, Segmen private Collection doRetrieveUsedSegments( final String dataSource, final List intervals, - @Nullable final List versions, final Segments visibility ) { @@ -166,10 +164,10 @@ private Collection doRetrieveUsedSegments( handle -> { if (visibility == Segments.ONLY_VISIBLE) { final SegmentTimeline timeline = - getTimelineForIntervalsWithHandle(handle, dataSource, intervals, versions); + getTimelineForIntervalsWithHandle(handle, dataSource, intervals); return timeline.findNonOvershadowedObjectsInInterval(Intervals.ETERNITY, Partitions.ONLY_COMPLETE); } else { - return retrieveAllUsedSegmentsForIntervalsWithHandle(handle, dataSource, intervals, versions); + return retrieveAllUsedSegmentsForIntervalsWithHandle(handle, dataSource, intervals); } } ); @@ -378,13 +376,12 @@ private Map getPendingSegmentsForIntervalWithHan private SegmentTimeline getTimelineForIntervalsWithHandle( final Handle handle, final String dataSource, - final List intervals, - @Nullable final List versions + final List intervals ) throws IOException { try (final CloseableIterator iterator = SqlSegmentsMetadataQuery.forHandle(handle, connector, dbTables, jsonMapper) - .retrieveUsedSegments(dataSource, intervals, versions)) { + .retrieveUsedSegments(dataSource, intervals)) { return SegmentTimeline.forSegments(iterator); } } @@ -392,13 +389,12 @@ private SegmentTimeline getTimelineForIntervalsWithHandle( private Collection retrieveAllUsedSegmentsForIntervalsWithHandle( final Handle handle, final String dataSource, - final List intervals, - @Nullable final List versions + final List intervals ) throws IOException { try (final CloseableIterator iterator = SqlSegmentsMetadataQuery.forHandle(handle, connector, dbTables, jsonMapper) - .retrieveUsedSegments(dataSource, intervals, versions)) { + .retrieveUsedSegments(dataSource, intervals)) { final List retVal = new ArrayList<>(); iterator.forEachRemaining(retVal::add); return retVal; @@ -674,7 +670,7 @@ public SegmentIdWithShardSpec allocatePendingSegment( handle -> { // Get the time chunk and associated data segments for the given interval, if any final List> existingChunks = - getTimelineForIntervalsWithHandle(handle, dataSource, ImmutableList.of(interval), null) + getTimelineForIntervalsWithHandle(handle, dataSource, ImmutableList.of(interval)) .lookup(interval); if (existingChunks.size() > 1) { // Not possible to expand more than one chunk with a single segment. @@ -927,7 +923,7 @@ private Map allocatePendingSegment { // Get the time chunk and associated data segments for the given interval, if any final List> existingChunks = - getTimelineForIntervalsWithHandle(handle, dataSource, Collections.singletonList(interval), null) + getTimelineForIntervalsWithHandle(handle, dataSource, Collections.singletonList(interval)) .lookup(interval); if (existingChunks.size() > 1) { log.warn( @@ -1462,7 +1458,6 @@ private Set createNewIdsForAppendSegments( final Collection overlappingSegments = retrieveUsedSegmentsForIntervals( dataSource, new ArrayList<>(appendIntervals), - null, Segments.INCLUDING_OVERSHADOWED ); diff --git a/server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataManager.java b/server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataManager.java index 97469093b489..028f73ee42a7 100644 --- a/server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataManager.java +++ b/server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataManager.java @@ -680,7 +680,7 @@ private int doMarkAsUsedNonOvershadowedSegments(String dataSourceName, @Nullable interval == null ? Intervals.ONLY_ETERNITY : Collections.singletonList(interval); try (final CloseableIterator iterator = - queryTool.retrieveUsedSegments(dataSourceName, intervals, null)) { + queryTool.retrieveUsedSegments(dataSourceName, intervals)) { timeline.addSegments(iterator); } @@ -817,7 +817,7 @@ private CloseableIterator retrieveUsedSegmentsOverlappingIntervals( ) { return SqlSegmentsMetadataQuery.forHandle(handle, connector, dbTables.get(), jsonMapper) - .retrieveUsedSegments(dataSource, intervals, null); + .retrieveUsedSegments(dataSource, intervals); } private int markSegmentsAsUsed(final List segmentIds) diff --git a/server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataQuery.java b/server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataQuery.java index aa57866532aa..822138975143 100644 --- a/server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataQuery.java +++ b/server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataQuery.java @@ -117,14 +117,13 @@ public static SqlSegmentsMetadataQuery forHandle( */ public CloseableIterator retrieveUsedSegments( final String dataSource, - final Collection intervals, - @Nullable final List versions + final Collection intervals ) { return retrieveSegments( dataSource, intervals, - versions, + null, IntervalMode.OVERLAPS, true, null, diff --git a/server/src/main/java/org/apache/druid/server/http/MetadataResource.java b/server/src/main/java/org/apache/druid/server/http/MetadataResource.java index b5adbf3424b7..c53a998e238d 100644 --- a/server/src/main/java/org/apache/druid/server/http/MetadataResource.java +++ b/server/src/main/java/org/apache/druid/server/http/MetadataResource.java @@ -327,7 +327,7 @@ public Response getUsedSegmentsInDataSourceForIntervals( ) { Collection segments = metadataStorageCoordinator - .retrieveUsedSegmentsForIntervals(dataSourceName, intervals, null, Segments.INCLUDING_OVERSHADOWED); + .retrieveUsedSegmentsForIntervals(dataSourceName, intervals, Segments.INCLUDING_OVERSHADOWED); Response.ResponseBuilder builder = Response.status(Response.Status.OK); if (full != null) { diff --git a/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java b/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java index 66965406ff5c..31035ca316cd 100644 --- a/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java +++ b/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java @@ -1068,7 +1068,6 @@ public void testMultiIntervalUsedList() throws IOException coordinator.retrieveUsedSegmentsForIntervals( defaultSegment.getDataSource(), ImmutableList.of(defaultSegment.getInterval()), - null, Segments.ONLY_VISIBLE ) ).containsOnlyOnce(SEGMENTS.toArray(new DataSegment[0])); @@ -1077,7 +1076,6 @@ public void testMultiIntervalUsedList() throws IOException coordinator.retrieveUsedSegmentsForIntervals( defaultSegment.getDataSource(), ImmutableList.of(defaultSegment3.getInterval()), - null, Segments.ONLY_VISIBLE ) ).containsOnlyOnce(defaultSegment3); @@ -1086,7 +1084,6 @@ public void testMultiIntervalUsedList() throws IOException coordinator.retrieveUsedSegmentsForIntervals( defaultSegment.getDataSource(), ImmutableList.of(defaultSegment.getInterval(), defaultSegment3.getInterval()), - null, Segments.ONLY_VISIBLE ) ).containsOnlyOnce(defaultSegment, defaultSegment2, defaultSegment3); @@ -1099,163 +1096,11 @@ public void testMultiIntervalUsedList() throws IOException Intervals.of("2015-01-03T00Z/2015-01-03T05Z"), Intervals.of("2015-01-03T09Z/2015-01-04T00Z") ), - null, Segments.ONLY_VISIBLE ) ).containsOnlyOnce(defaultSegment3); } - @Test - public void testRetrieveVisibleOnlyUsedSegmentsWithVersions() throws IOException - { - final DateTime now = DateTimes.nowUtc(); - final String v1 = now.toString(); - final String v2 = now.plusDays(2).toString(); - final String v3 = now.plusDays(3).toString(); - - final DataSegment segment1 = createSegment( - Intervals.of("2023-01-01/2023-01-02"), - v1, - new LinearShardSpec(0) - ); - final DataSegment segment2 = createSegment( - Intervals.of("2023-01-02/2023-01-03"), - v2, - new LinearShardSpec(0) - ); - final DataSegment segment3 = createSegment( - Intervals.of("2023-01-03/2023-01-04"), - v3, - new LinearShardSpec(0) - ); - - final ImmutableSet usedSegments = ImmutableSet.of(segment1, segment2, segment3); - Assert.assertEquals(usedSegments, coordinator.commitSegments(usedSegments)); - - for (DataSegment usedSegment : usedSegments) { - Assertions.assertThat( - coordinator.retrieveUsedSegmentsForIntervals( - DS.WIKI, - ImmutableList.of(Intervals.of("2023-01-01/2023-01-04")), - ImmutableList.of(usedSegment.getVersion()), - Segments.ONLY_VISIBLE - ) - ).contains(usedSegment); - } - - Assertions.assertThat( - coordinator.retrieveUsedSegmentsForIntervals( - DS.WIKI, - ImmutableList.of(Intervals.of("2023-01-01/2023-01-04")), - ImmutableList.of(v1, v2), - Segments.ONLY_VISIBLE - ) - ).contains(segment1, segment2); - - Assertions.assertThat( - coordinator.retrieveUsedSegmentsForIntervals( - DS.WIKI, - ImmutableList.of(Intervals.of("2023-01-01/2023-01-04")), - null, - Segments.ONLY_VISIBLE - ) - ).containsAll(usedSegments); - - Assertions.assertThat( - coordinator.retrieveUsedSegmentsForIntervals( - DS.WIKI, - ImmutableList.of(Intervals.of("2023-01-01/2023-01-04")), - ImmutableList.of("some-non-existent-version"), - Segments.ONLY_VISIBLE - ) - ).containsAll(ImmutableList.of()); - } - - @Test - public void testRetrieveIncludingOvershadowedUsedSegmentsWithVersions() throws IOException - { - final DateTime now = DateTimes.nowUtc(); - final String v1 = now.toString(); - final String v2 = now.plusDays(2).toString(); - final String v3 = now.plusDays(3).toString(); - final String v4 = now.plusDays(4).toString(); - - final DataSegment segment1 = createSegment( - Intervals.of("2023-01-01/2023-01-02"), - v1, - new LinearShardSpec(0) - ); - final DataSegment segment2 = createSegment( - Intervals.of("2023-01-02/2023-01-03"), - v2, - new LinearShardSpec(0) - ); - - // segment3 and segment4 are in the same interval. However, segment4 should overshadow segment3 - // as segment4 contains a higher version than segment3. - final DataSegment segment3 = createSegment( - Intervals.of("2023-01-03/2023-01-04"), - v3, - new LinearShardSpec(0) - ); - final DataSegment segment4 = createSegment( - Intervals.of("2023-01-03/2023-01-04"), - v4, - new LinearShardSpec(0) - ); - - final ImmutableSet allSegments = ImmutableSet.of(segment1, segment2, segment3, segment4); - Assert.assertEquals(allSegments, coordinator.commitSegments(ImmutableSet.of(segment1, segment2, segment3, segment4))); - final ImmutableSet usedSegments = ImmutableSet.of(segment1, segment2, segment4); - - for (DataSegment usedSegment : usedSegments) { - Assertions.assertThat( - coordinator.retrieveUsedSegmentsForIntervals( - DS.WIKI, - ImmutableList.of(Intervals.of("2023-01-01/2023-01-04")), - ImmutableList.of(usedSegment.getVersion()), - Segments.INCLUDING_OVERSHADOWED - ) - ).contains(usedSegment); - } - - Assertions.assertThat( - coordinator.retrieveUsedSegmentsForIntervals( - DS.WIKI, - ImmutableList.of(Intervals.of("2023-01-01/2023-01-04")), - ImmutableList.of(v1, v2), - Segments.INCLUDING_OVERSHADOWED - ) - ).contains(segment1, segment2); - - Assertions.assertThat( - coordinator.retrieveUsedSegmentsForIntervals( - DS.WIKI, - ImmutableList.of(Intervals.of("2023-01-01/2023-01-04")), - ImmutableList.of(v3), - Segments.INCLUDING_OVERSHADOWED - ) - ).containsAll(ImmutableList.of()); - - Assertions.assertThat( - coordinator.retrieveUsedSegmentsForIntervals( - DS.WIKI, - ImmutableList.of(Intervals.of("2023-01-01/2023-01-04")), - null, - Segments.INCLUDING_OVERSHADOWED - ) - ).containsAll(usedSegments); - - Assertions.assertThat( - coordinator.retrieveUsedSegmentsForIntervals( - DS.WIKI, - ImmutableList.of(Intervals.of("2023-01-01/2023-01-04")), - ImmutableList.of("some-non-existent-version"), - Segments.INCLUDING_OVERSHADOWED - ) - ).containsAll(ImmutableList.of()); - } - @Test public void testRetrieveUsedSegmentsUsingMultipleIntervals() throws IOException { @@ -1265,7 +1110,6 @@ public void testRetrieveUsedSegmentsUsingMultipleIntervals() throws IOException final Collection actualUsedSegments = coordinator.retrieveUsedSegmentsForIntervals( DS.WIKI, intervals, - null, Segments.ONLY_VISIBLE ); @@ -1285,7 +1129,6 @@ public void testRetrieveAllUsedSegmentsUsingIntervalsOutOfRange() throws IOExcep final Collection actualUsedSegments = coordinator.retrieveUsedSegmentsForIntervals( DS.WIKI, ImmutableList.of(outOfRangeInterval), - null, Segments.ONLY_VISIBLE ); @@ -2203,7 +2046,6 @@ public void testUsedHugeTimeRangeEternityFilter() throws IOException coordinator.retrieveUsedSegmentsForIntervals( hugeTimeRangeSegment1.getDataSource(), Intervals.ONLY_ETERNITY, - null, Segments.ONLY_VISIBLE ) ) From 4341955e46575fa397f6ecad5eccb75ef471f053 Mon Sep 17 00:00:00 2001 From: Abhishek Balaji Radhakrishnan Date: Mon, 11 Mar 2024 16:18:09 +0530 Subject: [PATCH 11/15] Some updates. --- .../actions/RetrieveUsedSegmentsAction.java | 2 +- .../actions/RetrieveSegmentsActionsTest.java | 28 +++++++------------ .../RetrieveUsedSegmentsActionSerdeTest.java | 1 - .../task/KillUnusedSegmentsTaskTest.java | 15 +--------- 4 files changed, 12 insertions(+), 34 deletions(-) diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/RetrieveUsedSegmentsAction.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/RetrieveUsedSegmentsAction.java index a3dd3ea5c3ab..29986eeba555 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/RetrieveUsedSegmentsAction.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/RetrieveUsedSegmentsAction.java @@ -252,4 +252,4 @@ public String toString() ", visibility=" + visibility + '}'; } -} \ No newline at end of file +} diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/RetrieveSegmentsActionsTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/RetrieveSegmentsActionsTest.java index 9225d0b14514..5f6e5db0185a 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/RetrieveSegmentsActionsTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/RetrieveSegmentsActionsTest.java @@ -23,7 +23,6 @@ import com.google.common.collect.ImmutableSet; import org.apache.druid.indexing.common.task.NoopTask; import org.apache.druid.indexing.common.task.Task; -import org.apache.druid.indexing.overlord.Segments; import org.apache.druid.java.util.common.DateTimes; import org.apache.druid.java.util.common.Intervals; import org.apache.druid.timeline.DataSegment; @@ -35,21 +34,14 @@ import org.junit.Test; import java.io.IOException; -import java.util.Arrays; import java.util.HashSet; -import java.util.List; import java.util.Set; public class RetrieveSegmentsActionsTest { private static final Interval INTERVAL = Intervals.of("2017-10-01/2017-10-15"); - private static final String UNUSED_V1 = "unused_v1"; - private static final String UNUSED_V2 = "unused_v2"; - - private static final String USED_V1 = "used_v1"; - private static final String USED_V2 = "used_v2"; - - private static final List VERSIONS = Arrays.asList(USED_V1, USED_V2, UNUSED_V1, UNUSED_V2); + private static final String UNUSED_V0 = "v0"; + private static final String UNUSED_V1 = "v1"; @ClassRule public static TaskActionTestKit actionTestKit = new TaskActionTestKit(); @@ -66,11 +58,11 @@ public static void setup() throws IOException actionTestKit.getTaskLockbox().add(task); expectedUnusedSegments = new HashSet<>(); - expectedUnusedSegments.add(createSegment(Intervals.of("2017-10-05/2017-10-06"), UNUSED_V1)); + expectedUnusedSegments.add(createSegment(Intervals.of("2017-10-05/2017-10-06"), UNUSED_V0)); + expectedUnusedSegments.add(createSegment(Intervals.of("2017-10-06/2017-10-07"), UNUSED_V0)); + expectedUnusedSegments.add(createSegment(Intervals.of("2017-10-07/2017-10-08"), UNUSED_V0)); expectedUnusedSegments.add(createSegment(Intervals.of("2017-10-06/2017-10-07"), UNUSED_V1)); expectedUnusedSegments.add(createSegment(Intervals.of("2017-10-07/2017-10-08"), UNUSED_V1)); - expectedUnusedSegments.add(createSegment(Intervals.of("2017-10-06/2017-10-07"), UNUSED_V2)); - expectedUnusedSegments.add(createSegment(Intervals.of("2017-10-07/2017-10-08"), UNUSED_V2)); actionTestKit.getMetadataStorageCoordinator() .commitSegments(expectedUnusedSegments); @@ -78,9 +70,9 @@ public static void setup() throws IOException expectedUnusedSegments.forEach(s -> actionTestKit.getTaskLockbox().unlock(task, s.getInterval())); expectedUsedSegments = new HashSet<>(); - expectedUsedSegments.add(createSegment(Intervals.of("2017-10-05/2017-10-06"), USED_V1)); - expectedUsedSegments.add(createSegment(Intervals.of("2017-10-06/2017-10-07"), USED_V2)); - expectedUsedSegments.add(createSegment(Intervals.of("2017-10-07/2017-10-08"), USED_V2)); + expectedUsedSegments.add(createSegment(Intervals.of("2017-10-05/2017-10-06"), "2")); + expectedUsedSegments.add(createSegment(Intervals.of("2017-10-06/2017-10-07"), "2")); + expectedUsedSegments.add(createSegment(Intervals.of("2017-10-07/2017-10-08"), "2")); actionTestKit.getMetadataStorageCoordinator() .commitSegments(expectedUsedSegments); @@ -115,12 +107,12 @@ public void testRetrieveUsedSegmentsAction() } @Test - public void testRetrieveUnusedSegmentsActionWithVersion() + public void testRetrieveUnusedSegmentsActionWithVersions() { final RetrieveUnusedSegmentsAction action = new RetrieveUnusedSegmentsAction( task.getDataSource(), INTERVAL, - VERSIONS, + ImmutableList.of(UNUSED_V0, UNUSED_V1), null, null ); diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/RetrieveUsedSegmentsActionSerdeTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/RetrieveUsedSegmentsActionSerdeTest.java index 3e7859cd01f5..99675fd57bb1 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/RetrieveUsedSegmentsActionSerdeTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/RetrieveUsedSegmentsActionSerdeTest.java @@ -22,7 +22,6 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.ImmutableList; import org.apache.druid.indexing.overlord.Segments; -import org.apache.druid.java.util.common.DateTimes; import org.apache.druid.java.util.common.Intervals; import org.apache.druid.segment.TestHelper; import org.joda.time.Interval; diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTaskTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTaskTest.java index ad9a9a580966..a858f34b63e3 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTaskTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTaskTest.java @@ -181,7 +181,7 @@ public void testKillWithMarkUnused() throws Exception @Test - public void testKillVersion() throws Exception + public void testKillWithVersions() throws Exception { final DateTime version = DateTimes.nowUtc(); final DataSegment segment1 = newSegment(Intervals.of("2019-01-01/2019-02-01"), version.toString()); @@ -224,7 +224,6 @@ public void testKillVersion() throws Exception DATA_SOURCE, Intervals.of("2018/2020"), null, - null, null ); @@ -276,7 +275,6 @@ public void testKillMultipleSegmentsWithVersions() throws Exception DATA_SOURCE, Intervals.of("2018/2020"), null, - null, null ); @@ -328,7 +326,6 @@ public void testKillMultipleSegmentsWithVersionAndLimit() throws Exception DATA_SOURCE, Intervals.of("2018/2020"), null, - null, null ); @@ -380,7 +377,6 @@ public void testKillWithNonExistentVersion() throws Exception DATA_SOURCE, Intervals.of("2018/2020"), null, - null, null ); @@ -449,7 +445,6 @@ public void testKillBatchSizeOneAndLimit4() throws Exception DATA_SOURCE, Intervals.of("2019/2020"), null, - null, null ); @@ -527,7 +522,6 @@ public void testKillMultipleUnusedSegmentsWithNullMaxUsedStatusLastUpdatedTime() DATA_SOURCE, umbrellaInterval, null, - null, null ); @@ -621,7 +615,6 @@ public void testKillMultipleUnusedSegmentsWithDifferentMaxUsedStatusLastUpdatedT DATA_SOURCE, umbrellaInterval, null, - null, null ); @@ -648,7 +641,6 @@ public void testKillMultipleUnusedSegmentsWithDifferentMaxUsedStatusLastUpdatedT DATA_SOURCE, umbrellaInterval, null, - null, null ); @@ -738,7 +730,6 @@ public void testKillMultipleUnusedSegmentsWithDifferentMaxUsedStatusLastUpdatedT DATA_SOURCE, umbrellaInterval, null, - null, null ); @@ -764,7 +755,6 @@ public void testKillMultipleUnusedSegmentsWithDifferentMaxUsedStatusLastUpdatedT DATA_SOURCE, umbrellaInterval, null, - null, null ); @@ -836,7 +826,6 @@ public void testKillMultipleUnusedSegmentsWithVersionAndDifferentLastUpdatedTime DATA_SOURCE, umbrellaInterval, null, - null, null ); @@ -866,7 +855,6 @@ public void testKillMultipleUnusedSegmentsWithVersionAndDifferentLastUpdatedTime DATA_SOURCE, umbrellaInterval, null, - null, null ); @@ -908,7 +896,6 @@ public void testKillBatchSizeThree() throws Exception DATA_SOURCE, Intervals.of("2019/2020"), null, - null, null ); From 61fb960da91e6ebb9b153dd40653a0ce6f958414 Mon Sep 17 00:00:00 2001 From: Abhishek Radhakrishnan Date: Mon, 11 Mar 2024 20:34:50 +0800 Subject: [PATCH 12/15] Apply suggestions from code review Co-authored-by: Kashif Faraz --- .../client/indexing/ClientKillUnusedSegmentsTaskQuery.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/server/src/main/java/org/apache/druid/client/indexing/ClientKillUnusedSegmentsTaskQuery.java b/server/src/main/java/org/apache/druid/client/indexing/ClientKillUnusedSegmentsTaskQuery.java index 099ab2ada838..b2d1953f0cce 100644 --- a/server/src/main/java/org/apache/druid/client/indexing/ClientKillUnusedSegmentsTaskQuery.java +++ b/server/src/main/java/org/apache/druid/client/indexing/ClientKillUnusedSegmentsTaskQuery.java @@ -41,7 +41,8 @@ public class ClientKillUnusedSegmentsTaskQuery implements ClientTaskQuery private final String id; private final String dataSource; private final Interval interval; - @Nullable private final List versions; + @Nullable + private final List versions; private final Boolean markAsUnused; private final Integer batchSize; @Nullable private final Integer limit; @@ -52,7 +53,7 @@ public ClientKillUnusedSegmentsTaskQuery( @JsonProperty("id") String id, @JsonProperty("dataSource") String dataSource, @JsonProperty("interval") Interval interval, - @JsonProperty("versions") List versions, + @JsonProperty("versions") @Nullable List versions, @JsonProperty("markAsUnused") @Deprecated Boolean markAsUnused, @JsonProperty("batchSize") Integer batchSize, @JsonProperty("limit") @Nullable Integer limit, From 607006e6d5a9e73e74bfdc038cd2a35e775ee399 Mon Sep 17 00:00:00 2001 From: Abhishek Balaji Radhakrishnan Date: Mon, 11 Mar 2024 18:10:32 +0530 Subject: [PATCH 13/15] /s/actual/observed/g --- .../actions/RetrieveSegmentsActionsTest.java | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/RetrieveSegmentsActionsTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/RetrieveSegmentsActionsTest.java index 5f6e5db0185a..e61cf4128b4e 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/RetrieveSegmentsActionsTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/RetrieveSegmentsActionsTest.java @@ -102,8 +102,8 @@ public void testRetrieveUsedSegmentsAction() { final RetrieveUsedSegmentsAction action = new RetrieveUsedSegmentsAction(task.getDataSource(), ImmutableList.of(INTERVAL)); - final Set actualSegments = new HashSet<>(action.perform(task, actionTestKit.getTaskActionToolbox())); - Assert.assertEquals(expectedUsedSegments, actualSegments); + final Set observedUsedSegments = new HashSet<>(action.perform(task, actionTestKit.getTaskActionToolbox())); + Assert.assertEquals(expectedUsedSegments, observedUsedSegments); } @Test @@ -116,23 +116,23 @@ public void testRetrieveUnusedSegmentsActionWithVersions() null, null ); - final Set actualSegments = new HashSet<>(action.perform(task, actionTestKit.getTaskActionToolbox())); - Assert.assertEquals(expectedUnusedSegments, actualSegments); + final Set observedUnusedSegments = new HashSet<>(action.perform(task, actionTestKit.getTaskActionToolbox())); + Assert.assertEquals(expectedUnusedSegments, observedUnusedSegments); } @Test public void testRetrieveUnusedSegmentsActionWithMinUsedLastUpdatedTime() { final RetrieveUnusedSegmentsAction action = new RetrieveUnusedSegmentsAction(task.getDataSource(), INTERVAL, null, null, DateTimes.MIN); - final Set actualSegments = new HashSet<>(action.perform(task, actionTestKit.getTaskActionToolbox())); - Assert.assertEquals(ImmutableSet.of(), actualSegments); + final Set observedUnusedSegments = new HashSet<>(action.perform(task, actionTestKit.getTaskActionToolbox())); + Assert.assertEquals(ImmutableSet.of(), observedUnusedSegments); } @Test public void testRetrieveUnusedSegmentsActionWithNowUsedLastUpdatedTime() { final RetrieveUnusedSegmentsAction action = new RetrieveUnusedSegmentsAction(task.getDataSource(), INTERVAL, null, null, DateTimes.nowUtc()); - final Set actualSegments = new HashSet<>(action.perform(task, actionTestKit.getTaskActionToolbox())); - Assert.assertEquals(expectedUnusedSegments, actualSegments); + final Set observedUnusedSegments = new HashSet<>(action.perform(task, actionTestKit.getTaskActionToolbox())); + Assert.assertEquals(expectedUnusedSegments, observedUnusedSegments); } } From bcb578ed51775b621519ff1d98c7c23f6968e10c Mon Sep 17 00:00:00 2001 From: Abhishek Balaji Radhakrishnan Date: Mon, 11 Mar 2024 18:28:23 +0530 Subject: [PATCH 14/15] minor test cleanup --- .../common/task/KillUnusedSegmentsTask.java | 8 +- .../task/KillUnusedSegmentsTaskTest.java | 107 ++++++------------ 2 files changed, 37 insertions(+), 78 deletions(-) diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java index dbefb7cca65a..64007441153c 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java @@ -254,11 +254,9 @@ public TaskStatus runTask(TaskToolbox toolbox) throws Exception break; } - unusedSegments = toolbox - .getTaskActionClient() - .submit( - new RetrieveUnusedSegmentsAction(getDataSource(), getInterval(), getVersions(), nextBatchSize, maxUsedStatusLastUpdatedTime) - ); + unusedSegments = toolbox.getTaskActionClient().submit( + new RetrieveUnusedSegmentsAction(getDataSource(), getInterval(), getVersions(), nextBatchSize, maxUsedStatusLastUpdatedTime) + ); // Fetch locks each time as a revokal could have occurred in between batches final NavigableMap> taskLockMap diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTaskTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTaskTest.java index a858f34b63e3..92c485d49947 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTaskTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTaskTest.java @@ -179,66 +179,19 @@ public void testKillWithMarkUnused() throws Exception Assert.assertEquals(new KillTaskReport.Stats(1, 2, 1), getReportedStats()); } - @Test - public void testKillWithVersions() throws Exception + public void testKillSegmentsWithVersions() throws Exception { - final DateTime version = DateTimes.nowUtc(); - final DataSegment segment1 = newSegment(Intervals.of("2019-01-01/2019-02-01"), version.toString()); - final DataSegment segment2 = newSegment(Intervals.of("2019-02-01/2019-03-01"), version.minusHours(1).toString()); - final DataSegment segment3 = newSegment(Intervals.of("2019-03-01/2019-04-01"), version.minusHours(2).toString()); - final DataSegment segment4 = newSegment(Intervals.of("2019-04-01/2019-05-01"), version.minusHours(3).toString()); - - final Set segments = ImmutableSet.of(segment1, segment2, segment3, segment4); - - Assert.assertEquals( - segments, - getMetadataStorageCoordinator().commitSegments(segments) - ); - Assert.assertEquals( - segments.size(), - getSegmentsMetadataManager().markSegmentsAsUnused( - segments.stream().map(DataSegment::getId).collect(Collectors.toSet()) - ) - ); - - final KillUnusedSegmentsTask task = new KillUnusedSegmentsTask( - null, - DATA_SOURCE, - Intervals.of("2018/2020"), - ImmutableList.of(segment3.getVersion()), - null, - false, - 3, - null, - null - ); - - Assert.assertEquals(TaskState.SUCCESS, taskRunner.run(task).get().getStatusCode()); - Assert.assertEquals( - new KillTaskReport.Stats(1, 2, 0), - getReportedStats() - ); + final DateTime now = DateTimes.nowUtc(); + final String v1 = now.toString(); + final String v2 = now.minusHours(2).toString(); + final String v3 = now.minusHours(3).toString(); - final List unusedSegments = getMetadataStorageCoordinator().retrieveUnusedSegmentsForInterval( - DATA_SOURCE, - Intervals.of("2018/2020"), - null, - null - ); - - Assert.assertEquals(Arrays.asList(segment1, segment2, segment4), unusedSegments); - } - - @Test - public void testKillMultipleSegmentsWithVersions() throws Exception - { - final DateTime version = DateTimes.nowUtc(); - final DataSegment segment1 = newSegment(Intervals.of("2019-01-01/2019-02-01"), version.toString()); - final DataSegment segment2 = newSegment(Intervals.of("2019-02-01/2019-03-01"), version.toString()); - final DataSegment segment3 = newSegment(Intervals.of("2019-03-01/2019-04-01"), version.toString()); - final DataSegment segment4 = newSegment(Intervals.of("2019-01-01/2019-02-01"), version.minusHours(2).toString()); - final DataSegment segment5 = newSegment(Intervals.of("2019-01-01/2019-02-01"), version.minusHours(3).toString()); + final DataSegment segment1 = newSegment(Intervals.of("2019-01-01/2019-02-01"), v1); + final DataSegment segment2 = newSegment(Intervals.of("2019-02-01/2019-03-01"), v1); + final DataSegment segment3 = newSegment(Intervals.of("2019-03-01/2019-04-01"), v1); + final DataSegment segment4 = newSegment(Intervals.of("2019-01-01/2019-02-01"), v2); + final DataSegment segment5 = newSegment(Intervals.of("2019-01-01/2019-02-01"), v3); final Set segments = ImmutableSet.of(segment1, segment2, segment3, segment4, segment5); @@ -257,7 +210,7 @@ public void testKillMultipleSegmentsWithVersions() throws Exception null, DATA_SOURCE, Intervals.of("2018/2020"), - ImmutableList.of(version.toString(), version.minusHours(2).toString()), + ImmutableList.of(v1, v2), null, false, 3, @@ -282,14 +235,18 @@ public void testKillMultipleSegmentsWithVersions() throws Exception } @Test - public void testKillMultipleSegmentsWithVersionAndLimit() throws Exception + public void testKillSegmentsWithVersionsAndLimit() throws Exception { - final DateTime version = DateTimes.nowUtc(); - final DataSegment segment1 = newSegment(Intervals.of("2019-01-01/2019-02-01"), version.toString()); - final DataSegment segment2 = newSegment(Intervals.of("2019-02-01/2019-03-01"), version.toString()); - final DataSegment segment3 = newSegment(Intervals.of("2019-03-01/2019-04-01"), version.toString()); - final DataSegment segment4 = newSegment(Intervals.of("2019-01-01/2019-02-01"), version.minusHours(2).toString()); - final DataSegment segment5 = newSegment(Intervals.of("2019-01-01/2019-02-01"), version.minusHours(3).toString()); + final DateTime now = DateTimes.nowUtc(); + final String v1 = now.toString(); + final String v2 = now.minusHours(2).toString(); + final String v3 = now.minusHours(3).toString(); + + final DataSegment segment1 = newSegment(Intervals.of("2019-01-01/2019-02-01"), v1); + final DataSegment segment2 = newSegment(Intervals.of("2019-02-01/2019-03-01"), v1); + final DataSegment segment3 = newSegment(Intervals.of("2019-03-01/2019-04-01"), v1); + final DataSegment segment4 = newSegment(Intervals.of("2019-01-01/2019-02-01"), v2); + final DataSegment segment5 = newSegment(Intervals.of("2019-01-01/2019-02-01"), v3); final Set segments = ImmutableSet.of(segment1, segment2, segment3, segment4, segment5); @@ -308,7 +265,7 @@ public void testKillMultipleSegmentsWithVersionAndLimit() throws Exception null, DATA_SOURCE, Intervals.of("2018/2020"), - ImmutableList.of(version.toString()), + ImmutableList.of(v1), null, false, 3, @@ -335,12 +292,16 @@ public void testKillMultipleSegmentsWithVersionAndLimit() throws Exception @Test public void testKillWithNonExistentVersion() throws Exception { - final DateTime version = DateTimes.nowUtc(); - final DataSegment segment1 = newSegment(Intervals.of("2019-01-01/2019-02-01"), version.toString()); - final DataSegment segment2 = newSegment(Intervals.of("2019-02-01/2019-03-01"), version.toString()); - final DataSegment segment3 = newSegment(Intervals.of("2019-03-01/2019-04-01"), version.toString()); - final DataSegment segment4 = newSegment(Intervals.of("2019-01-01/2019-02-01"), version.minusHours(2).toString()); - final DataSegment segment5 = newSegment(Intervals.of("2019-01-01/2019-02-01"), version.minusHours(3).toString()); + final DateTime now = DateTimes.nowUtc(); + final String v1 = now.toString(); + final String v2 = now.minusHours(2).toString(); + final String v3 = now.minusHours(3).toString(); + + final DataSegment segment1 = newSegment(Intervals.of("2019-01-01/2019-02-01"), v1); + final DataSegment segment2 = newSegment(Intervals.of("2019-02-01/2019-03-01"), v1); + final DataSegment segment3 = newSegment(Intervals.of("2019-03-01/2019-04-01"), v1); + final DataSegment segment4 = newSegment(Intervals.of("2019-01-01/2019-02-01"), v2); + final DataSegment segment5 = newSegment(Intervals.of("2019-01-01/2019-02-01"), v3); final Set segments = ImmutableSet.of(segment1, segment2, segment3, segment4, segment5); @@ -359,7 +320,7 @@ public void testKillWithNonExistentVersion() throws Exception null, DATA_SOURCE, Intervals.of("2018/2020"), - ImmutableList.of(version.plusDays(100).toString()), + ImmutableList.of(now.plusDays(100).toString()), null, false, 3, From b316cb96eff886900e914a6dc33b0406acec62e0 Mon Sep 17 00:00:00 2001 From: Abhishek Balaji Radhakrishnan Date: Mon, 11 Mar 2024 20:29:38 +0530 Subject: [PATCH 15/15] WIP: Test fixes and updates. Also add test for kill by version with used load spec. Checkpoint. --- .../task/KillUnusedSegmentsTaskTest.java | 118 +++++++++++++----- ...TestIndexerMetadataStorageCoordinator.java | 18 +-- .../ClientKillUnusedSegmentsTaskQuery.java | 6 +- 3 files changed, 102 insertions(+), 40 deletions(-) diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTaskTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTaskTest.java index 92c485d49947..ddc9044849ce 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTaskTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTaskTest.java @@ -21,6 +21,7 @@ import com.fasterxml.jackson.core.type.TypeReference; import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import org.apache.druid.error.DruidException; import org.apache.druid.error.DruidExceptionMatcher; @@ -41,7 +42,6 @@ import org.junit.Before; import org.junit.Test; -import java.util.Arrays; import java.util.Collections; import java.util.HashSet; import java.util.List; @@ -184,8 +184,8 @@ public void testKillSegmentsWithVersions() throws Exception { final DateTime now = DateTimes.nowUtc(); final String v1 = now.toString(); - final String v2 = now.minusHours(2).toString(); - final String v3 = now.minusHours(3).toString(); + final String v2 = now.minusHours(2).toString(); + final String v3 = now.minusHours(3).toString(); final DataSegment segment1 = newSegment(Intervals.of("2019-01-01/2019-02-01"), v1); final DataSegment segment2 = newSegment(Intervals.of("2019-02-01/2019-03-01"), v1); @@ -195,10 +195,7 @@ public void testKillSegmentsWithVersions() throws Exception final Set segments = ImmutableSet.of(segment1, segment2, segment3, segment4, segment5); - Assert.assertEquals( - segments, - getMetadataStorageCoordinator().commitSegments(segments) - ); + Assert.assertEquals(segments, getMetadataStorageCoordinator().commitSegments(segments)); Assert.assertEquals( segments.size(), getSegmentsMetadataManager().markSegmentsAsUnused( @@ -224,14 +221,14 @@ public void testKillSegmentsWithVersions() throws Exception getReportedStats() ); - final List actualUnusedSegments = getMetadataStorageCoordinator().retrieveUnusedSegmentsForInterval( + final List observedUnusedSegments = getMetadataStorageCoordinator().retrieveUnusedSegmentsForInterval( DATA_SOURCE, Intervals.of("2018/2020"), null, null ); - Assert.assertEquals(ImmutableSet.of(segment5), new HashSet<>(actualUnusedSegments)); + Assert.assertEquals(ImmutableSet.of(segment5), new HashSet<>(observedUnusedSegments)); } @Test @@ -239,8 +236,8 @@ public void testKillSegmentsWithVersionsAndLimit() throws Exception { final DateTime now = DateTimes.nowUtc(); final String v1 = now.toString(); - final String v2 = now.minusHours(2).toString(); - final String v3 = now.minusHours(3).toString(); + final String v2 = now.minusHours(2).toString(); + final String v3 = now.minusHours(3).toString(); final DataSegment segment1 = newSegment(Intervals.of("2019-01-01/2019-02-01"), v1); final DataSegment segment2 = newSegment(Intervals.of("2019-02-01/2019-03-01"), v1); @@ -250,10 +247,7 @@ public void testKillSegmentsWithVersionsAndLimit() throws Exception final Set segments = ImmutableSet.of(segment1, segment2, segment3, segment4, segment5); - Assert.assertEquals( - segments, - getMetadataStorageCoordinator().commitSegments(segments) - ); + Assert.assertEquals(segments, getMetadataStorageCoordinator().commitSegments(segments)); Assert.assertEquals( segments.size(), getSegmentsMetadataManager().markSegmentsAsUnused( @@ -279,14 +273,14 @@ public void testKillSegmentsWithVersionsAndLimit() throws Exception getReportedStats() ); - final List actualUnusedSegments = getMetadataStorageCoordinator().retrieveUnusedSegmentsForInterval( + final List observedUnusedSegments = getMetadataStorageCoordinator().retrieveUnusedSegmentsForInterval( DATA_SOURCE, Intervals.of("2018/2020"), null, null ); - Assert.assertEquals(ImmutableSet.of(segment3, segment4, segment5), new HashSet<>(actualUnusedSegments)); + Assert.assertEquals(ImmutableSet.of(segment3, segment4, segment5), new HashSet<>(observedUnusedSegments)); } @Test @@ -294,8 +288,8 @@ public void testKillWithNonExistentVersion() throws Exception { final DateTime now = DateTimes.nowUtc(); final String v1 = now.toString(); - final String v2 = now.minusHours(2).toString(); - final String v3 = now.minusHours(3).toString(); + final String v2 = now.minusHours(2).toString(); + final String v3 = now.minusHours(3).toString(); final DataSegment segment1 = newSegment(Intervals.of("2019-01-01/2019-02-01"), v1); final DataSegment segment2 = newSegment(Intervals.of("2019-02-01/2019-03-01"), v1); @@ -305,10 +299,7 @@ public void testKillWithNonExistentVersion() throws Exception final Set segments = ImmutableSet.of(segment1, segment2, segment3, segment4, segment5); - Assert.assertEquals( - segments, - getMetadataStorageCoordinator().commitSegments(segments) - ); + Assert.assertEquals(segments, getMetadataStorageCoordinator().commitSegments(segments)); Assert.assertEquals( segments.size(), getSegmentsMetadataManager().markSegmentsAsUnused( @@ -334,14 +325,68 @@ public void testKillWithNonExistentVersion() throws Exception getReportedStats() ); - final List actualUnusedSegments = getMetadataStorageCoordinator().retrieveUnusedSegmentsForInterval( + final List observedUnusedSegments = getMetadataStorageCoordinator().retrieveUnusedSegmentsForInterval( + DATA_SOURCE, + Intervals.of("2018/2020"), + null, + null + ); + + Assert.assertEquals(segments, new HashSet<>(observedUnusedSegments)); + } + + @Test + public void testKillWithUsedLoadSpec() throws Exception + { + final DateTime now = DateTimes.nowUtc(); + final String v1 = now.toString(); + final String v2 = now.minusHours(2).toString(); + final String v3 = now.minusHours(3).toString(); + + final DataSegment segment1 = newSegment(Intervals.of("2019-01-01/2019-02-01"), v1, ImmutableMap.of("foo", "bar")); + final DataSegment segment2 = newSegment(Intervals.of("2019-02-01/2019-03-01"), v1, ImmutableMap.of("foo", "bar")); + final DataSegment segment3 = newSegment(Intervals.of("2019-03-01/2019-04-01"), v1, ImmutableMap.of("foo", "bar")); + final DataSegment segment4 = newSegment(Intervals.of("2019-04-01/2019-05-01"), v2, ImmutableMap.of("foo", "bar")); + final DataSegment segment5 = newSegment(Intervals.of("2019-05-01/2019-06-01"), v3, ImmutableMap.of("foo", "bar")); + + final Set segments = ImmutableSet.of(segment1, segment2, segment3, segment4, segment5); + final Set unusedSegments = ImmutableSet.of(segment1, segment2, segment3, segment4); + + Assert.assertEquals(segments, getMetadataStorageCoordinator().commitSegments(segments)); + Assert.assertEquals( + unusedSegments.size(), + getSegmentsMetadataManager().markSegmentsAsUnused( + unusedSegments.stream().map(DataSegment::getId).collect(Collectors.toSet()) + ) + ); + + final KillUnusedSegmentsTask task = new KillUnusedSegmentsTask( + null, + DATA_SOURCE, + Intervals.of("2018/2020"), + ImmutableList.of(v1, v2), + null, + false, + 1, + 100, + null + ); + + Assert.assertEquals(TaskState.SUCCESS, taskRunner.run(task).get().getStatusCode()); + Assert.assertEquals( + new KillTaskReport.Stats(4, 1, 0), + getReportedStats() + ); + + final List observedUnusedSegments = getMetadataStorageCoordinator().retrieveUnusedSegmentsForInterval( DATA_SOURCE, Intervals.of("2018/2020"), null, null ); - Assert.assertEquals(segments, new HashSet<>(actualUnusedSegments)); + Assert.assertEquals(ImmutableSet.of(), new HashSet<>(observedUnusedSegments)); + Assert.assertEquals(ImmutableSet.of(segment5), getMetadataStorageCoordinator().retrieveAllUsedSegments(DATA_SOURCE, Segments.ONLY_VISIBLE)); } @Test @@ -782,7 +827,7 @@ public void testKillMultipleUnusedSegmentsWithVersionAndDifferentLastUpdatedTime getReportedStats() ); - final List actualUnusedSegments = + final List observedUnusedSegments = getMetadataStorageCoordinator().retrieveUnusedSegmentsForInterval( DATA_SOURCE, umbrellaInterval, @@ -790,7 +835,7 @@ public void testKillMultipleUnusedSegmentsWithVersionAndDifferentLastUpdatedTime null ); - Assert.assertEquals(ImmutableSet.of(segment3, segment4, segment5), new HashSet<>(actualUnusedSegments)); + Assert.assertEquals(ImmutableSet.of(segment3, segment4, segment5), new HashSet<>(observedUnusedSegments)); final KillUnusedSegmentsTask task2 = new KillUnusedSegmentsTask( @@ -811,7 +856,7 @@ public void testKillMultipleUnusedSegmentsWithVersionAndDifferentLastUpdatedTime getReportedStats() ); - final List actualUnusedSegments2 = + final List observedUnusedSegments2 = getMetadataStorageCoordinator().retrieveUnusedSegmentsForInterval( DATA_SOURCE, umbrellaInterval, @@ -819,7 +864,7 @@ public void testKillMultipleUnusedSegmentsWithVersionAndDifferentLastUpdatedTime null ); - Assert.assertEquals(ImmutableSet.of(segment4, segment5), new HashSet<>(actualUnusedSegments2)); + Assert.assertEquals(ImmutableSet.of(segment4, segment5), new HashSet<>(observedUnusedSegments2)); } @Test @@ -1135,4 +1180,19 @@ private static DataSegment newSegment(Interval interval, String version) 10L ); } + + private static DataSegment newSegment(Interval interval, String version, Map loadSpec) + { + return new DataSegment( + DATA_SOURCE, + interval, + version, + loadSpec, + null, + null, + null, + 9, + 10L + ); + } } diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/test/TestIndexerMetadataStorageCoordinator.java b/indexing-service/src/test/java/org/apache/druid/indexing/test/TestIndexerMetadataStorageCoordinator.java index 37915c37114a..9ad3be6e3619 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/test/TestIndexerMetadataStorageCoordinator.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/test/TestIndexerMetadataStorageCoordinator.java @@ -112,14 +112,7 @@ public List retrieveUnusedSegmentsForInterval( @Nullable DateTime maxUsedStatusLastUpdatedTime ) { - synchronized (unusedSegments) { - return ImmutableList.copyOf( - unusedSegments.stream() - .filter(ds -> !nuked.contains(ds)) - .limit(limit != null ? limit : Long.MAX_VALUE) - .iterator() - ); - } + return retrieveUnusedSegmentsForInterval(dataSource, interval, null, limit, maxUsedStatusLastUpdatedTime); } @Override @@ -131,7 +124,14 @@ public List retrieveUnusedSegmentsForInterval( @Nullable DateTime maxUsedStatusLastUpdatedTime ) { - return ImmutableList.of(); + synchronized (unusedSegments) { + return ImmutableList.copyOf( + unusedSegments.stream() + .filter(ds -> !nuked.contains(ds)) + .limit(limit != null ? limit : Long.MAX_VALUE) + .iterator() + ); + } } @Override diff --git a/server/src/main/java/org/apache/druid/client/indexing/ClientKillUnusedSegmentsTaskQuery.java b/server/src/main/java/org/apache/druid/client/indexing/ClientKillUnusedSegmentsTaskQuery.java index b2d1953f0cce..4dfad3c97c0b 100644 --- a/server/src/main/java/org/apache/druid/client/indexing/ClientKillUnusedSegmentsTaskQuery.java +++ b/server/src/main/java/org/apache/druid/client/indexing/ClientKillUnusedSegmentsTaskQuery.java @@ -45,8 +45,10 @@ public class ClientKillUnusedSegmentsTaskQuery implements ClientTaskQuery private final List versions; private final Boolean markAsUnused; private final Integer batchSize; - @Nullable private final Integer limit; - @Nullable private final DateTime maxUsedStatusLastUpdatedTime; + @Nullable + private final Integer limit; + @Nullable + private final DateTime maxUsedStatusLastUpdatedTime; @JsonCreator public ClientKillUnusedSegmentsTaskQuery(