From 142d548c85ac6fef88e2dac965d81b68c02aaec7 Mon Sep 17 00:00:00 2001 From: zachjsh Date: Tue, 25 Jul 2023 12:47:39 -0700 Subject: [PATCH 01/23] * add a `maxSegmentsToKill` config property to `KillUnusedSegmentsTask` to allow for limiting the number of segments that are killed --- .../common/task/KillUnusedSegmentsTask.java | 21 +++- ...tKillUnusedSegmentsTaskQuerySerdeTest.java | 4 +- .../task/KillUnusedSegmentsTaskTest.java | 9 +- .../indexing/overlord/TaskLifecycleTest.java | 100 +++++++++++++++++- 4 files changed, 127 insertions(+), 7 deletions(-) diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java index 29cdea6c73af..53e5dae44ce1 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java @@ -23,6 +23,7 @@ import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonInclude; import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableSet; import org.apache.druid.client.indexing.ClientKillUnusedSegmentsTaskQuery; import org.apache.druid.indexer.TaskStatus; @@ -41,6 +42,8 @@ import org.joda.time.Interval; import javax.annotation.Nonnull; +import javax.annotation.Nullable; + import java.io.IOException; import java.util.ArrayList; import java.util.HashSet; @@ -61,6 +64,7 @@ public class KillUnusedSegmentsTask extends AbstractFixedIntervalTask private static final Logger LOG = new Logger(KillUnusedSegmentsTask.class); private final boolean markAsUnused; + @Nullable private final Integer maxSegmentsToKill; @JsonCreator public KillUnusedSegmentsTask( @@ -68,7 +72,8 @@ public KillUnusedSegmentsTask( @JsonProperty("dataSource") String dataSource, @JsonProperty("interval") Interval interval, @JsonProperty("context") Map context, - @JsonProperty("markAsUnused") Boolean markAsUnused + @JsonProperty("markAsUnused") Boolean markAsUnused, + @JsonProperty("maxSegmentsToKill") Integer maxSegmentsToKill ) { super( @@ -78,6 +83,9 @@ public KillUnusedSegmentsTask( context ); this.markAsUnused = markAsUnused != null && markAsUnused; + this.maxSegmentsToKill = maxSegmentsToKill; + Preconditions.checkArgument(this.maxSegmentsToKill > 0, "maxSegmentsToKill must be > 0"); + } @JsonProperty @@ -87,6 +95,12 @@ public boolean isMarkAsUnused() return markAsUnused; } + @JsonProperty + public Integer getMaxSegmentsToKill() + { + return maxSegmentsToKill; + } + @Override public String getType() { @@ -114,7 +128,7 @@ public TaskStatus runTask(TaskToolbox toolbox) throws Exception } // List unused segments - final List unusedSegments = toolbox + List unusedSegments = toolbox .getTaskActionClient() .submit(new RetrieveUnusedSegmentsAction(getDataSource(), getInterval())); @@ -128,6 +142,9 @@ public TaskStatus runTask(TaskToolbox toolbox) throws Exception } // Kill segments + unusedSegments = maxSegmentsToKill == null + ? unusedSegments + : unusedSegments.subList(0, Math.min(maxSegmentsToKill, unusedSegments.size())); toolbox.getTaskActionClient().submit(new SegmentNukeAction(new HashSet<>(unusedSegments))); for (DataSegment segment : unusedSegments) { toolbox.getDataSegmentKiller().kill(segment); diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/ClientKillUnusedSegmentsTaskQuerySerdeTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/ClientKillUnusedSegmentsTaskQuerySerdeTest.java index e4583c91abf9..d369a5acb357 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/ClientKillUnusedSegmentsTaskQuerySerdeTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/ClientKillUnusedSegmentsTaskQuerySerdeTest.java @@ -69,7 +69,8 @@ public void testKillUnusedSegmentsTaskToClientKillUnusedSegmentsTaskQuery() thro "datasource", Intervals.of("2020-01-01/P1D"), null, - true + true, + null ); final byte[] json = objectMapper.writeValueAsBytes(task); final ClientKillUnusedSegmentsTaskQuery taskQuery = (ClientKillUnusedSegmentsTaskQuery) objectMapper.readValue( @@ -80,5 +81,6 @@ public void testKillUnusedSegmentsTaskToClientKillUnusedSegmentsTaskQuery() thro Assert.assertEquals(task.getDataSource(), taskQuery.getDataSource()); Assert.assertEquals(task.getInterval(), taskQuery.getInterval()); Assert.assertEquals(task.isMarkAsUnused(), taskQuery.getMarkAsUnused()); + Assert.assertNull(task.getMaxSegmentsToKill()); } } diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTaskTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTaskTest.java index 86c6aeb4095e..f084cf64a4a7 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTaskTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTaskTest.java @@ -79,7 +79,8 @@ public void testKill() throws Exception DATA_SOURCE, Intervals.of("2019-03-01/2019-04-01"), null, - false + false, + null ); Assert.assertEquals(TaskState.SUCCESS, taskRunner.run(task).get().getStatusCode()); @@ -124,7 +125,8 @@ public void testKillWithMarkUnused() throws Exception DATA_SOURCE, Intervals.of("2019-03-01/2019-04-01"), null, - true + true, + null ); Assert.assertEquals(TaskState.SUCCESS, taskRunner.run(task).get().getStatusCode()); @@ -151,7 +153,8 @@ public void testGetInputSourceResources() DATA_SOURCE, Intervals.of("2019-03-01/2019-04-01"), null, - true + true, + null ); Assert.assertTrue(task.getInputSourceResources().isEmpty()); } diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskLifecycleTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskLifecycleTest.java index 6b5927bf1791..5fcd9f4a030e 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskLifecycleTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskLifecycleTest.java @@ -947,7 +947,8 @@ public DataSegment apply(String input) "test_kill_task", Intervals.of("2011-04-01/P4D"), null, - false + false, + null ); final TaskStatus status = runTask(killUnusedSegmentsTask); @@ -967,6 +968,103 @@ public DataSegment apply(String input) } } + @Test + public void testKillUnusedSegmentsTaskWithMaxSegmentsToKill() throws Exception + { + final File tmpSegmentDir = temporaryFolder.newFolder(); + + List expectedUnusedSegments = Lists.transform( + ImmutableList.of( + "2011-04-01/2011-04-02", + "2011-04-02/2011-04-03", + "2011-04-04/2011-04-05" + ), new Function() + { + @Override + public DataSegment apply(String input) + { + final Interval interval = Intervals.of(input); + try { + return DataSegment.builder() + .dataSource("test_kill_task") + .interval(interval) + .loadSpec( + ImmutableMap.of( + "type", + "local", + "path", + tmpSegmentDir.getCanonicalPath() + + "/druid/localStorage/wikipedia/" + + interval.getStart() + + "-" + + interval.getEnd() + + "/" + + "2011-04-6T16:52:46.119-05:00" + + "/0/index.zip" + ) + ) + .version("2011-04-6T16:52:46.119-05:00") + .dimensions(ImmutableList.of()) + .metrics(ImmutableList.of()) + .shardSpec(NoneShardSpec.instance()) + .binaryVersion(9) + .size(0) + .build(); + } + catch (IOException e) { + throw new ISE(e, "Error creating segments"); + } + } + } + ); + + mdc.setUnusedSegments(expectedUnusedSegments); + + // manually create local segments files + List segmentFiles = new ArrayList<>(); + for (DataSegment segment : mdc.retrieveUnusedSegmentsForInterval("test_kill_task", Intervals.of("2011-04-01/P4D"))) { + File file = new File((String) segment.getLoadSpec().get("path")); + FileUtils.mkdirp(file.getParentFile()); + Files.write(file.toPath(), ByteArrays.EMPTY_ARRAY); + segmentFiles.add(file); + } + + final int maxSegmentsToKill = 2; + final Task killUnusedSegmentsTask = + new KillUnusedSegmentsTask( + null, + "test_kill_task", + Intervals.of("2011-04-01/P4D"), + null, + false, + maxSegmentsToKill + ); + + final TaskStatus status = runTask(killUnusedSegmentsTask); + Assert.assertEquals(taskLocation, status.getLocation()); + Assert.assertEquals("merged statusCode", TaskState.SUCCESS, status.getStatusCode()); + Assert.assertEquals("num segments published", 0, mdc.getPublished().size()); + Assert.assertEquals("num segments nuked", maxSegmentsToKill, mdc.getNuked().size()); + Assert.assertTrue( + "expected unused segments get killed", + expectedUnusedSegments.containsAll(mdc.getNuked()) + ); + + int expectedNumOfSegmentsRemaining = segmentFiles.size() - maxSegmentsToKill; + int actualNumOfSegmentsRemaining = 0; + for (File file : segmentFiles) { + if (file.exists()) { + actualNumOfSegmentsRemaining++; + } + } + + Assert.assertEquals( + "Expected of segments deleted did not match expectations", + expectedNumOfSegmentsRemaining, + actualNumOfSegmentsRemaining + ); + } + @Test public void testRealtimeishTask() throws Exception { From fa48bd0c7d115e139ce511b56892adcfab36d31a Mon Sep 17 00:00:00 2001 From: zachjsh Date: Tue, 25 Jul 2023 13:06:37 -0700 Subject: [PATCH 02/23] * pipe `maxSegments` config in autokill to kill task submitted --- .../common/task/KillUnusedSegmentsTask.java | 4 ++-- ...tKillUnusedSegmentsTaskQuerySerdeTest.java | 4 +++- .../ClientKillUnusedSegmentsTaskQuery.java | 21 ++++++++++++++++--- .../indexing/HttpIndexingServiceClient.java | 15 +++++++++++-- .../indexing/IndexingServiceClient.java | 2 +- .../coordinator/duty/KillUnusedSegments.java | 2 +- .../server/http/DataSourcesResource.java | 2 +- ...ClientKillUnusedSegmentsTaskQueryTest.java | 8 ++++++- .../rpc/indexing/OverlordClientImplTest.java | 2 +- .../duty/KillUnusedSegmentsTest.java | 7 ++++--- .../server/http/DataSourcesResourceTest.java | 2 +- 11 files changed, 52 insertions(+), 17 deletions(-) diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java index 53e5dae44ce1..9d1587f4dda9 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java @@ -73,7 +73,7 @@ public KillUnusedSegmentsTask( @JsonProperty("interval") Interval interval, @JsonProperty("context") Map context, @JsonProperty("markAsUnused") Boolean markAsUnused, - @JsonProperty("maxSegmentsToKill") Integer maxSegmentsToKill + @JsonProperty("maxSegmentsToKill") @Nullable Integer maxSegmentsToKill ) { super( @@ -83,8 +83,8 @@ public KillUnusedSegmentsTask( context ); this.markAsUnused = markAsUnused != null && markAsUnused; + Preconditions.checkArgument(maxSegmentsToKill > 0, "maxSegmentsToKill must be > 0"); this.maxSegmentsToKill = maxSegmentsToKill; - Preconditions.checkArgument(this.maxSegmentsToKill > 0, "maxSegmentsToKill must be > 0"); } diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/ClientKillUnusedSegmentsTaskQuerySerdeTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/ClientKillUnusedSegmentsTaskQuerySerdeTest.java index d369a5acb357..e917a48ae391 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/ClientKillUnusedSegmentsTaskQuerySerdeTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/ClientKillUnusedSegmentsTaskQuerySerdeTest.java @@ -51,7 +51,8 @@ public void testClientKillUnusedSegmentsTaskQueryToKillUnusedSegmentsTask() thro "killTaskId", "datasource", Intervals.of("2020-01-01/P1D"), - true + true, + 5 ); final byte[] json = objectMapper.writeValueAsBytes(taskQuery); final KillUnusedSegmentsTask fromJson = (KillUnusedSegmentsTask) objectMapper.readValue(json, Task.class); @@ -59,6 +60,7 @@ public void testClientKillUnusedSegmentsTaskQueryToKillUnusedSegmentsTask() thro Assert.assertEquals(taskQuery.getDataSource(), fromJson.getDataSource()); Assert.assertEquals(taskQuery.getInterval(), fromJson.getInterval()); Assert.assertEquals(taskQuery.getMarkAsUnused(), fromJson.isMarkAsUnused()); + Assert.assertEquals(taskQuery.getMaxSegmentsToKill(), fromJson.getMaxSegmentsToKill()); } @Test diff --git a/server/src/main/java/org/apache/druid/client/indexing/ClientKillUnusedSegmentsTaskQuery.java b/server/src/main/java/org/apache/druid/client/indexing/ClientKillUnusedSegmentsTaskQuery.java index 4435e5fac4c8..817f966ac48e 100644 --- a/server/src/main/java/org/apache/druid/client/indexing/ClientKillUnusedSegmentsTaskQuery.java +++ b/server/src/main/java/org/apache/druid/client/indexing/ClientKillUnusedSegmentsTaskQuery.java @@ -24,6 +24,8 @@ import com.google.common.base.Preconditions; import org.joda.time.Interval; +import javax.annotation.Nullable; + import java.util.Objects; /** @@ -39,19 +41,23 @@ public class ClientKillUnusedSegmentsTaskQuery implements ClientTaskQuery private final String dataSource; private final Interval interval; private final Boolean markAsUnused; + @Nullable private final Integer maxSegmentsToKill; @JsonCreator public ClientKillUnusedSegmentsTaskQuery( @JsonProperty("id") String id, @JsonProperty("dataSource") String dataSource, @JsonProperty("interval") Interval interval, - @JsonProperty("markAsUnused") Boolean markAsUnused + @JsonProperty("markAsUnused") Boolean markAsUnused, + @JsonProperty("maxSegmentsToKill") Integer maxSegmentsToKill ) { this.id = Preconditions.checkNotNull(id, "id"); this.dataSource = dataSource; this.interval = interval; this.markAsUnused = markAsUnused; + Preconditions.checkArgument(maxSegmentsToKill > 0, "maxSegmentsToKill must be > 0"); + this.maxSegmentsToKill = maxSegmentsToKill; } @JsonProperty @@ -87,6 +93,14 @@ public Boolean getMarkAsUnused() return markAsUnused; } + @JsonProperty + @Nullable + public Integer getMaxSegmentsToKill() + { + return maxSegmentsToKill; + } + + @Override public boolean equals(Object o) { @@ -100,12 +114,13 @@ public boolean equals(Object o) return Objects.equals(id, that.id) && Objects.equals(dataSource, that.dataSource) && Objects.equals(interval, that.interval) - && Objects.equals(markAsUnused, that.markAsUnused); + && Objects.equals(markAsUnused, that.markAsUnused) + && Objects.equals(maxSegmentsToKill, that.maxSegmentsToKill); } @Override public int hashCode() { - return Objects.hash(id, dataSource, interval, markAsUnused); + return Objects.hash(id, dataSource, interval, markAsUnused, maxSegmentsToKill); } } diff --git a/server/src/main/java/org/apache/druid/client/indexing/HttpIndexingServiceClient.java b/server/src/main/java/org/apache/druid/client/indexing/HttpIndexingServiceClient.java index 821e4343901f..3ad45ade33c5 100644 --- a/server/src/main/java/org/apache/druid/client/indexing/HttpIndexingServiceClient.java +++ b/server/src/main/java/org/apache/druid/client/indexing/HttpIndexingServiceClient.java @@ -71,10 +71,21 @@ public HttpIndexingServiceClient( } @Override - public void killUnusedSegments(String idPrefix, String dataSource, Interval interval) + public void killUnusedSegments( + String idPrefix, + String dataSource, + Interval interval, + @Nullable Integer maxSegmentsToKill + ) { final String taskId = IdUtils.newTaskId(idPrefix, ClientKillUnusedSegmentsTaskQuery.TYPE, dataSource, interval); - final ClientTaskQuery taskQuery = new ClientKillUnusedSegmentsTaskQuery(taskId, dataSource, interval, false); + final ClientTaskQuery taskQuery = new ClientKillUnusedSegmentsTaskQuery( + taskId, + dataSource, + interval, + false, + maxSegmentsToKill + ); runTask(taskId, taskQuery); } diff --git a/server/src/main/java/org/apache/druid/client/indexing/IndexingServiceClient.java b/server/src/main/java/org/apache/druid/client/indexing/IndexingServiceClient.java index 2658d57d2375..d7757ed73693 100644 --- a/server/src/main/java/org/apache/druid/client/indexing/IndexingServiceClient.java +++ b/server/src/main/java/org/apache/druid/client/indexing/IndexingServiceClient.java @@ -37,7 +37,7 @@ */ public interface IndexingServiceClient { - void killUnusedSegments(String idPrefix, String dataSource, Interval interval); + void killUnusedSegments(String idPrefix, String dataSource, Interval interval, @Nullable Integer maxSegmentsToKill); int killPendingSegments(String dataSource, DateTime end); diff --git a/server/src/main/java/org/apache/druid/server/coordinator/duty/KillUnusedSegments.java b/server/src/main/java/org/apache/druid/server/coordinator/duty/KillUnusedSegments.java index f1659b333503..9bad0ad855ca 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/duty/KillUnusedSegments.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/duty/KillUnusedSegments.java @@ -129,7 +129,7 @@ private void killUnusedSegments(Collection dataSourcesToKill) } try { - indexingServiceClient.killUnusedSegments("coordinator-issued", dataSource, intervalToKill); + indexingServiceClient.killUnusedSegments("coordinator-issued", dataSource, intervalToKill, maxSegmentsToKill); ++submittedTasks; } catch (Exception ex) { diff --git a/server/src/main/java/org/apache/druid/server/http/DataSourcesResource.java b/server/src/main/java/org/apache/druid/server/http/DataSourcesResource.java index 291bfbc8fa05..ec569d04ee03 100644 --- a/server/src/main/java/org/apache/druid/server/http/DataSourcesResource.java +++ b/server/src/main/java/org/apache/druid/server/http/DataSourcesResource.java @@ -341,7 +341,7 @@ public Response killUnusedSegmentsInInterval( } final Interval theInterval = Intervals.of(interval.replace('_', '/')); try { - indexingServiceClient.killUnusedSegments("api-issued", dataSourceName, theInterval); + indexingServiceClient.killUnusedSegments("api-issued", dataSourceName, theInterval, null); return Response.ok().build(); } catch (Exception e) { diff --git a/server/src/test/java/org/apache/druid/client/indexing/ClientKillUnusedSegmentsTaskQueryTest.java b/server/src/test/java/org/apache/druid/client/indexing/ClientKillUnusedSegmentsTaskQueryTest.java index 0e6c0c86cb53..35cf80384cc4 100644 --- a/server/src/test/java/org/apache/druid/client/indexing/ClientKillUnusedSegmentsTaskQueryTest.java +++ b/server/src/test/java/org/apache/druid/client/indexing/ClientKillUnusedSegmentsTaskQueryTest.java @@ -40,7 +40,13 @@ public class ClientKillUnusedSegmentsTaskQueryTest @Before public void setUp() { - clientKillUnusedSegmentsQuery = new ClientKillUnusedSegmentsTaskQuery("killTaskId", DATA_SOURCE, INTERVAL, true); + clientKillUnusedSegmentsQuery = new ClientKillUnusedSegmentsTaskQuery( + "killTaskId", + DATA_SOURCE, + INTERVAL, + true, + null + ); } @After diff --git a/server/src/test/java/org/apache/druid/rpc/indexing/OverlordClientImplTest.java b/server/src/test/java/org/apache/druid/rpc/indexing/OverlordClientImplTest.java index 26fb972ac5cd..8a2d31ca3da8 100644 --- a/server/src/test/java/org/apache/druid/rpc/indexing/OverlordClientImplTest.java +++ b/server/src/test/java/org/apache/druid/rpc/indexing/OverlordClientImplTest.java @@ -45,7 +45,7 @@ public void testTaskPayload() throws ExecutionException, InterruptedException, J MockServiceClient client = new MockServiceClient(); final OverlordClientImpl overlordClient = new OverlordClientImpl(client, DefaultObjectMapper.INSTANCE); - ClientTaskQuery clientTaskQuery = new ClientKillUnusedSegmentsTaskQuery(taskID, "test", null, null); + ClientTaskQuery clientTaskQuery = new ClientKillUnusedSegmentsTaskQuery(taskID, "test", null, null, null); client.expect(new RequestBuilder(HttpMethod.GET, "/druid/indexer/v1/task/" + taskID), HttpResponseStatus.OK, diff --git a/server/src/test/java/org/apache/druid/server/coordinator/duty/KillUnusedSegmentsTest.java b/server/src/test/java/org/apache/druid/server/coordinator/duty/KillUnusedSegmentsTest.java index 3bd25f4c197e..650841c813a6 100644 --- a/server/src/test/java/org/apache/druid/server/coordinator/duty/KillUnusedSegmentsTest.java +++ b/server/src/test/java/org/apache/druid/server/coordinator/duty/KillUnusedSegmentsTest.java @@ -145,7 +145,7 @@ public void testRunWithNoIntervalShouldNotKillAnySegments() target.run(params); Mockito.verify(indexingServiceClient, Mockito.never()) - .killUnusedSegments(anyString(), anyString(), any(Interval.class)); + .killUnusedSegments(anyString(), anyString(), any(Interval.class), any(Integer.class)); } @Test @@ -158,7 +158,7 @@ public void testRunWithSpecificDatasourceAndNoIntervalShouldNotKillAnySegments() // No unused segment is older than the retention period target.run(params); Mockito.verify(indexingServiceClient, Mockito.never()) - .killUnusedSegments(anyString(), anyString(), any(Interval.class)); + .killUnusedSegments(anyString(), anyString(), any(Interval.class), any(Integer.class)); } @Test @@ -220,7 +220,8 @@ private void runAndVerifyKillInterval(Interval expectedKillInterval) Mockito.verify(indexingServiceClient, Mockito.times(1)).killUnusedSegments( ArgumentMatchers.anyString(), ArgumentMatchers.eq("DS1"), - ArgumentMatchers.eq(expectedKillInterval) + ArgumentMatchers.eq(expectedKillInterval), + ArgumentMatchers.eq(config.getCoordinatorKillMaxSegments()) ); } diff --git a/server/src/test/java/org/apache/druid/server/http/DataSourcesResourceTest.java b/server/src/test/java/org/apache/druid/server/http/DataSourcesResourceTest.java index 4e59e913e23b..5ea824d4ac8d 100644 --- a/server/src/test/java/org/apache/druid/server/http/DataSourcesResourceTest.java +++ b/server/src/test/java/org/apache/druid/server/http/DataSourcesResourceTest.java @@ -592,7 +592,7 @@ public void testKillSegmentsInIntervalInDataSource() Interval theInterval = Intervals.of(interval.replace('_', '/')); IndexingServiceClient indexingServiceClient = EasyMock.createStrictMock(IndexingServiceClient.class); - indexingServiceClient.killUnusedSegments("api-issued", "datasource1", theInterval); + indexingServiceClient.killUnusedSegments("api-issued", "datasource1", theInterval, null); EasyMock.expectLastCall().once(); EasyMock.replay(indexingServiceClient, server); From 72e2a8bc2bdc962e8e650c17fe42381eecc08586 Mon Sep 17 00:00:00 2001 From: zachjsh Date: Tue, 25 Jul 2023 14:21:12 -0700 Subject: [PATCH 03/23] * add coordinator dynamic config `killTaskSlotRatio` which allows user to control the maximum ratio of total task slots available in cluster. If unset, will default to null which behaves as though all available task slots can be used in cluster, which was the existing behavior. --- .../common/task/KillUnusedSegmentsTask.java | 1 + .../druid/indexing/common/task/Task.java | 2 +- .../coordinator/CoordinatorDynamicConfig.java | 20 +++++++ .../coordinator/duty/KillUnusedSegments.java | 55 ++++++++++++++++++- .../http/CoordinatorDynamicConfigTest.java | 53 ++++++++++++++++++ 5 files changed, 128 insertions(+), 3 deletions(-) diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java index 9d1587f4dda9..ed99feb92a11 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java @@ -61,6 +61,7 @@ */ public class KillUnusedSegmentsTask extends AbstractFixedIntervalTask { + public static final String TYPE = "kill"; private static final Logger LOG = new Logger(KillUnusedSegmentsTask.class); private final boolean markAsUnused; diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/Task.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/Task.java index 5c3186ef3923..58a2ad435b0f 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/Task.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/Task.java @@ -61,7 +61,7 @@ */ @JsonTypeInfo(use = JsonTypeInfo.Id.NAME, property = "type") @JsonSubTypes(value = { - @Type(name = "kill", value = KillUnusedSegmentsTask.class), + @Type(name = KillUnusedSegmentsTask.TYPE, value = KillUnusedSegmentsTask.class), @Type(name = "move", value = MoveTask.class), @Type(name = "archive", value = ArchiveTask.class), @Type(name = "restore", value = RestoreTask.class), diff --git a/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorDynamicConfig.java b/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorDynamicConfig.java index 9a3802940320..c96c093e781f 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorDynamicConfig.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorDynamicConfig.java @@ -69,6 +69,7 @@ public class CoordinatorDynamicConfig * List of specific data sources for which kill tasks are sent in {@link KillUnusedSegments}. */ private final Set specificDataSourcesToKillUnusedSegmentsIn; + @Nullable private final Double killTaskSlotRatio; private final Set decommissioningNodes; private final Map debugDimensions; @@ -130,6 +131,7 @@ public CoordinatorDynamicConfig( // Keeping the legacy 'killDataSourceWhitelist' property name for backward compatibility. When the project is // updated to Jackson 2.9 it could be changed, see https://github.com/apache/druid/issues/7152 @JsonProperty("killDataSourceWhitelist") Object specificDataSourcesToKillUnusedSegmentsIn, + @JsonProperty("killTaskSlotRatio") @Nullable Double killTaskSlotRatio, // Type is Object here so that we can support both string and list as Coordinator console can not send array of // strings in the update request, as well as for specificDataSourcesToKillUnusedSegmentsIn. // Keeping the legacy 'killPendingSegmentsSkipList' property name for backward compatibility. When the project is @@ -158,6 +160,7 @@ public CoordinatorDynamicConfig( this.balancerComputeThreads = Math.max(balancerComputeThreads, 1); this.specificDataSourcesToKillUnusedSegmentsIn = parseJsonStringOrArray(specificDataSourcesToKillUnusedSegmentsIn); + this.killTaskSlotRatio = killTaskSlotRatio; this.dataSourcesToNotKillStalePendingSegmentsIn = parseJsonStringOrArray(dataSourcesToNotKillStalePendingSegmentsIn); this.maxSegmentsInNodeLoadingQueue = Builder.valueOrDefault( @@ -297,6 +300,12 @@ public Set getSpecificDataSourcesToKillUnusedSegmentsIn() return specificDataSourcesToKillUnusedSegmentsIn; } + @JsonProperty("killTaskSlotRatio") + public Double getKillTaskSlotRatio() + { + return killTaskSlotRatio; + } + @JsonIgnore public boolean isKillUnusedSegmentsInAllDataSources() { @@ -507,6 +516,7 @@ public static class Builder private Integer replicationThrottleLimit; private Integer balancerComputeThreads; private Object specificDataSourcesToKillUnusedSegmentsIn; + private @Nullable Double killTaskSlotRatio; private Object dataSourcesToNotKillStalePendingSegmentsIn; private Integer maxSegmentsInNodeLoadingQueue; private Object decommissioningNodes; @@ -532,6 +542,7 @@ public Builder( @JsonProperty("replicationThrottleLimit") @Nullable Integer replicationThrottleLimit, @JsonProperty("balancerComputeThreads") @Nullable Integer balancerComputeThreads, @JsonProperty("killDataSourceWhitelist") @Nullable Object specificDataSourcesToKillUnusedSegmentsIn, + @JsonProperty("killTaskSlotRatio") @Nullable Double killTaskSlotRatio, @JsonProperty("killPendingSegmentsSkipList") @Nullable Object dataSourcesToNotKillStalePendingSegmentsIn, @JsonProperty("maxSegmentsInNodeLoadingQueue") @Nullable Integer maxSegmentsInNodeLoadingQueue, @JsonProperty("decommissioningNodes") @Nullable Object decommissioningNodes, @@ -553,6 +564,7 @@ public Builder( this.replicationThrottleLimit = replicationThrottleLimit; this.balancerComputeThreads = balancerComputeThreads; this.specificDataSourcesToKillUnusedSegmentsIn = specificDataSourcesToKillUnusedSegmentsIn; + this.killTaskSlotRatio = killTaskSlotRatio; this.dataSourcesToNotKillStalePendingSegmentsIn = dataSourcesToNotKillStalePendingSegmentsIn; this.maxSegmentsInNodeLoadingQueue = maxSegmentsInNodeLoadingQueue; this.decommissioningNodes = decommissioningNodes; @@ -625,6 +637,12 @@ public Builder withSpecificDataSourcesToKillUnusedSegmentsIn(Set dataSou return this; } + public Builder withKillTaskSlotRatio(Double killTaskSlotRatio) + { + this.killTaskSlotRatio = killTaskSlotRatio; + return this; + } + public Builder withMaxSegmentsInNodeLoadingQueue(int maxSegmentsInNodeLoadingQueue) { this.maxSegmentsInNodeLoadingQueue = maxSegmentsInNodeLoadingQueue; @@ -685,6 +703,7 @@ public CoordinatorDynamicConfig build() valueOrDefault(replicationThrottleLimit, Defaults.REPLICATION_THROTTLE_LIMIT), valueOrDefault(balancerComputeThreads, Defaults.BALANCER_COMPUTE_THREADS), specificDataSourcesToKillUnusedSegmentsIn, + killTaskSlotRatio, dataSourcesToNotKillStalePendingSegmentsIn, valueOrDefault(maxSegmentsInNodeLoadingQueue, Defaults.MAX_SEGMENTS_IN_NODE_LOADING_QUEUE), decommissioningNodes, @@ -720,6 +739,7 @@ public CoordinatorDynamicConfig build(CoordinatorDynamicConfig defaults) valueOrDefault(replicationThrottleLimit, defaults.getReplicationThrottleLimit()), valueOrDefault(balancerComputeThreads, defaults.getBalancerComputeThreads()), valueOrDefault(specificDataSourcesToKillUnusedSegmentsIn, defaults.getSpecificDataSourcesToKillUnusedSegmentsIn()), + valueOrDefault(killTaskSlotRatio, defaults.getKillTaskSlotRatio()), valueOrDefault(dataSourcesToNotKillStalePendingSegmentsIn, defaults.getDataSourcesToNotKillStalePendingSegmentsIn()), valueOrDefault(maxSegmentsInNodeLoadingQueue, defaults.getMaxSegmentsInNodeLoadingQueue()), valueOrDefault(decommissioningNodes, defaults.getDecommissioningNodes()), diff --git a/server/src/main/java/org/apache/druid/server/coordinator/duty/KillUnusedSegments.java b/server/src/main/java/org/apache/druid/server/coordinator/duty/KillUnusedSegments.java index 9bad0ad855ca..741b21a45992 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/duty/KillUnusedSegments.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/duty/KillUnusedSegments.java @@ -32,6 +32,8 @@ import org.joda.time.DateTime; import org.joda.time.Interval; +import javax.annotation.Nullable; + import java.util.Collection; import java.util.List; @@ -46,6 +48,7 @@ */ public class KillUnusedSegments implements CoordinatorDuty { + public static final String KILL_TASK_TYPE = "kill"; private static final Logger log = new Logger(KillUnusedSegments.class); private final long period; @@ -99,6 +102,7 @@ public DruidCoordinatorRuntimeParams run(DruidCoordinatorRuntimeParams params) { Collection dataSourcesToKill = params.getCoordinatorDynamicConfig().getSpecificDataSourcesToKillUnusedSegmentsIn(); + final Double killTaskSlotRatio = params.getCoordinatorDynamicConfig().getKillTaskSlotRatio(); // If no datasource has been specified, all are eligible for killing unused segments if (CollectionUtils.isNullOrEmpty(dataSourcesToKill)) { @@ -113,15 +117,19 @@ public DruidCoordinatorRuntimeParams run(DruidCoordinatorRuntimeParams params) } else { log.debug("Killing unused segments in datasources: %s", dataSourcesToKill); lastKillTime = currentTimeMillis; - killUnusedSegments(dataSourcesToKill); + killUnusedSegments(dataSourcesToKill, killTaskSlotRatio); } return params; } - private void killUnusedSegments(Collection dataSourcesToKill) + private void killUnusedSegments(Collection dataSourcesToKill, @Nullable Double killTaskSlotRatio) { int submittedTasks = 0; + int availableKillTaskSlots = getAvailableKillTaskSlots(killTaskSlotRatio); + if (0 == availableKillTaskSlots) { + log.warn("Not killing any unused segments because there are no available kill task slots at this time."); + } for (String dataSource : dataSourcesToKill) { final Interval intervalToKill = findIntervalForKill(dataSource); if (intervalToKill == null) { @@ -139,6 +147,11 @@ private void killUnusedSegments(Collection dataSourcesToKill) break; } } + + if (submittedTasks >= availableKillTaskSlots) { + log.info("Reached kill task slot limit with pending unused segments to kill. Will resume " + + "on the next coordinator cycle."); + } } log.debug("Submitted kill tasks for [%d] datasources.", submittedTasks); @@ -147,6 +160,7 @@ private void killUnusedSegments(Collection dataSourcesToKill) /** * Calculates the interval for which segments are to be killed in a datasource. */ + @Nullable private Interval findIntervalForKill(String dataSource) { final DateTime maxEndTime = ignoreRetainDuration @@ -165,4 +179,41 @@ private Interval findIntervalForKill(String dataSource) } } + private int getAvailableKillTaskSlots(@Nullable Double killTaskSlotRatio) + { + return Math.min(0, getKillTaskCapacity(killTaskSlotRatio) - getNumActiveKillTaskSlots()); + } + + private int getNumActiveKillTaskSlots() + { + // Fetch currently running kill tasks + return (int) indexingServiceClient.getActiveTasks().stream() + .filter(status -> { + final String taskType = status.getType(); + // taskType can be null if middleManagers are running with an older version. Here, we consevatively regard + // the tasks of the unknown taskType as the killTask. This is because it's important to not run + // killTasks more than the configured limit at any time which might impact to the ingestion + // performance. + return taskType == null || KILL_TASK_TYPE.equals(taskType); + }).count(); + } + + private int getKillTaskCapacity(@Nullable Double killTaskSlotRatio) + { + int totalWorkerCapacity; + try { + totalWorkerCapacity = indexingServiceClient.getTotalWorkerCapacityWithAutoScale(); + if (totalWorkerCapacity < 0) { + totalWorkerCapacity = indexingServiceClient.getTotalWorkerCapacity(); + } + } + catch (Exception e) { + log.warn("Failed to get total worker capacity with auto scale slots. Falling back to current capacity count"); + totalWorkerCapacity = indexingServiceClient.getTotalWorkerCapacity(); + } + + return killTaskSlotRatio == null + ? totalWorkerCapacity + : (int) (totalWorkerCapacity * killTaskSlotRatio); + } } diff --git a/server/src/test/java/org/apache/druid/server/http/CoordinatorDynamicConfigTest.java b/server/src/test/java/org/apache/druid/server/http/CoordinatorDynamicConfigTest.java index a7744256d5d4..0056bb8cd8d3 100644 --- a/server/src/test/java/org/apache/druid/server/http/CoordinatorDynamicConfigTest.java +++ b/server/src/test/java/org/apache/druid/server/http/CoordinatorDynamicConfigTest.java @@ -79,6 +79,7 @@ public void testSerde() throws Exception 1, 2, whitelist, + null, false, 1, decommissioning, @@ -99,6 +100,7 @@ public void testSerde() throws Exception 1, 2, whitelist, + null, false, 1, ImmutableSet.of("host1"), @@ -119,6 +121,7 @@ public void testSerde() throws Exception 1, 2, whitelist, + null, false, 1, ImmutableSet.of("host1"), @@ -139,6 +142,7 @@ public void testSerde() throws Exception 1, 2, whitelist, + null, false, 1, ImmutableSet.of("host1"), @@ -159,6 +163,7 @@ public void testSerde() throws Exception 1, 2, whitelist, + null, false, 1, ImmutableSet.of("host1"), @@ -179,6 +184,7 @@ public void testSerde() throws Exception 1, 2, whitelist, + null, false, 1, ImmutableSet.of("host1"), @@ -216,6 +222,7 @@ public void testConstructorWithNullsShouldKillUnusedSegmentsInAllDataSources() null, null, null, + null, ImmutableSet.of("host1"), 5, true, @@ -243,6 +250,7 @@ public void testConstructorWithSpecificDataSourcesToKillShouldNotKillUnusedSegme ImmutableSet.of("test1"), null, null, + null, ImmutableSet.of("host1"), 5, true, @@ -292,6 +300,7 @@ public void testDecommissioningParametersBackwardCompatibility() throws Exceptio 1, 2, whitelist, + null, false, 1, decommissioning, @@ -312,6 +321,7 @@ public void testDecommissioningParametersBackwardCompatibility() throws Exceptio 1, 2, whitelist, + null, false, 1, ImmutableSet.of("host1"), @@ -332,6 +342,7 @@ public void testDecommissioningParametersBackwardCompatibility() throws Exceptio 1, 2, whitelist, + null, false, 1, ImmutableSet.of("host1"), @@ -376,6 +387,7 @@ public void testSerdeWithStringInKillDataSourceWhitelist() throws Exception 1, 2, ImmutableSet.of("test1", "test2"), + null, false, 1, ImmutableSet.of(), @@ -435,6 +447,7 @@ public void testHandleMissingPercentOfSegmentsToConsiderPerMove() throws Excepti 1, 2, whitelist, + null, false, 1, decommissioning, @@ -477,6 +490,7 @@ public void testSerdeWithKillAllDataSources() throws Exception 1, 2, ImmutableSet.of(), + null, true, 1, ImmutableSet.of(), @@ -530,6 +544,7 @@ public void testDeserializeWithoutMaxSegmentsInNodeLoadingQueue() throws Excepti 1, 2, ImmutableSet.of(), + null, true, EXPECTED_DEFAULT_MAX_SEGMENTS_IN_NODE_LOADING_QUEUE, ImmutableSet.of(), @@ -555,6 +570,7 @@ public void testBuilderDefaults() 500, 1, emptyList, + null, true, EXPECTED_DEFAULT_MAX_SEGMENTS_IN_NODE_LOADING_QUEUE, emptyList, @@ -583,6 +599,7 @@ public void testBuilderWithDefaultSpecificDataSourcesToKillUnusedSegmentsInSpeci 500, 1, ImmutableSet.of("DATASOURCE"), + null, false, EXPECTED_DEFAULT_MAX_SEGMENTS_IN_NODE_LOADING_QUEUE, ImmutableSet.of(), @@ -593,6 +610,36 @@ public void testBuilderWithDefaultSpecificDataSourcesToKillUnusedSegmentsInSpeci ); } + @Test + public void testBuilderWithKillTaskSlotRatio() + { + double killTaskSlotRatio = 0.2; + CoordinatorDynamicConfig defaultConfig = + CoordinatorDynamicConfig.builder() + .withKillTaskSlotRatio(killTaskSlotRatio) + .build(); + CoordinatorDynamicConfig config = CoordinatorDynamicConfig.builder().build(defaultConfig); + assertConfig( + config, + 900000, + 524288000, + 100, + 100, + 15, + 500, + 1, + ImmutableSet.of(), + killTaskSlotRatio, + true, + EXPECTED_DEFAULT_MAX_SEGMENTS_IN_NODE_LOADING_QUEUE, + ImmutableSet.of(), + 70, + false, + false, + Integer.MAX_VALUE + ); + } + @Test public void testUpdate() { @@ -621,6 +668,7 @@ public void testUpdate() null, null, null, + null, null ).build(current) ); @@ -670,6 +718,7 @@ private void assertConfig( int expectedReplicationThrottleLimit, int expectedBalancerComputeThreads, Set expectedSpecificDataSourcesToKillUnusedSegmentsIn, + Double expectedKillTaskSlotRatio, boolean expectedKillUnusedSegmentsInAllDataSources, int expectedMaxSegmentsInNodeLoadingQueue, Set decommissioningNodes, @@ -693,6 +742,10 @@ private void assertConfig( expectedSpecificDataSourcesToKillUnusedSegmentsIn, config.getSpecificDataSourcesToKillUnusedSegmentsIn() ); + Assert.assertEquals( + expectedKillTaskSlotRatio, + config.getKillTaskSlotRatio() + ); Assert.assertEquals(expectedKillUnusedSegmentsInAllDataSources, config.isKillUnusedSegmentsInAllDataSources()); Assert.assertEquals(expectedMaxSegmentsInNodeLoadingQueue, config.getMaxSegmentsInNodeLoadingQueue()); Assert.assertEquals(decommissioningNodes, config.getDecommissioningNodes()); From 4364f6352eaef0c06b19fd2c89e8ef74c34a444e Mon Sep 17 00:00:00 2001 From: zachjsh Date: Tue, 25 Jul 2023 14:24:51 -0700 Subject: [PATCH 04/23] * fix CoordinatorDynamicConfig equals, toString, and hashCode methods --- .../druid/server/coordinator/CoordinatorDynamicConfig.java | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorDynamicConfig.java b/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorDynamicConfig.java index c96c093e781f..7dd9b1f20a88 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorDynamicConfig.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorDynamicConfig.java @@ -415,6 +415,7 @@ public String toString() ", replicationThrottleLimit=" + replicationThrottleLimit + ", balancerComputeThreads=" + balancerComputeThreads + ", specificDataSourcesToKillUnusedSegmentsIn=" + specificDataSourcesToKillUnusedSegmentsIn + + ", killTaskSlotRatio=" + killTaskSlotRatio + ", dataSourcesToNotKillStalePendingSegmentsIn=" + dataSourcesToNotKillStalePendingSegmentsIn + ", maxSegmentsInNodeLoadingQueue=" + maxSegmentsInNodeLoadingQueue + ", decommissioningNodes=" + decommissioningNodes + @@ -453,6 +454,9 @@ public boolean equals(Object o) && Objects.equals( specificDataSourcesToKillUnusedSegmentsIn, that.specificDataSourcesToKillUnusedSegmentsIn) + && Objects.equals( + killTaskSlotRatio, + that.killTaskSlotRatio) && Objects.equals( dataSourcesToNotKillStalePendingSegmentsIn, that.dataSourcesToNotKillStalePendingSegmentsIn) @@ -473,6 +477,7 @@ public int hashCode() balancerComputeThreads, maxSegmentsInNodeLoadingQueue, specificDataSourcesToKillUnusedSegmentsIn, + killTaskSlotRatio, dataSourcesToNotKillStalePendingSegmentsIn, decommissioningNodes, decommissioningMaxPercentOfMaxSegmentsToMove, From c71c69f97bd2e2a48b2421afcf0dc5a52a227169 Mon Sep 17 00:00:00 2001 From: zachjsh Date: Tue, 25 Jul 2023 14:41:55 -0700 Subject: [PATCH 05/23] * minimum of 1 task used for kill tasks with auto kill duty --- .../server/coordinator/duty/KillUnusedSegments.java | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/server/src/main/java/org/apache/druid/server/coordinator/duty/KillUnusedSegments.java b/server/src/main/java/org/apache/druid/server/coordinator/duty/KillUnusedSegments.java index 741b21a45992..0a29bde61b71 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/duty/KillUnusedSegments.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/duty/KillUnusedSegments.java @@ -212,8 +212,11 @@ private int getKillTaskCapacity(@Nullable Double killTaskSlotRatio) totalWorkerCapacity = indexingServiceClient.getTotalWorkerCapacity(); } - return killTaskSlotRatio == null - ? totalWorkerCapacity - : (int) (totalWorkerCapacity * killTaskSlotRatio); + return Math.min( + killTaskSlotRatio == null + ? totalWorkerCapacity + : (int) (totalWorkerCapacity * killTaskSlotRatio), + 1 + ); } } From c90e8047565c6308a7d028a07c253c3a5c0abc72 Mon Sep 17 00:00:00 2001 From: zachjsh Date: Tue, 25 Jul 2023 15:18:30 -0700 Subject: [PATCH 06/23] * fixes after merge --- .../druid/rpc/indexing/OverlordClient.java | 18 ++++- .../coordinator/duty/KillUnusedSegments.java | 75 +++++++++++++++---- .../server/http/DataSourcesResource.java | 2 +- .../rpc/indexing/OverlordClientImplTest.java | 2 +- 4 files changed, 79 insertions(+), 18 deletions(-) diff --git a/server/src/main/java/org/apache/druid/rpc/indexing/OverlordClient.java b/server/src/main/java/org/apache/druid/rpc/indexing/OverlordClient.java index 0cb77ee7045a..c0882976e1ff 100644 --- a/server/src/main/java/org/apache/druid/rpc/indexing/OverlordClient.java +++ b/server/src/main/java/org/apache/druid/rpc/indexing/OverlordClient.java @@ -84,9 +84,25 @@ public interface OverlordClient * @return future with task ID */ default ListenableFuture runKillTask(String idPrefix, String dataSource, Interval interval) + { + return runKillTask(idPrefix, dataSource, interval, null); + } + + default ListenableFuture runKillTask( + String idPrefix, + String dataSource, + Interval interval, + @Nullable Integer maxSegmentsToKill + ) { final String taskId = IdUtils.newTaskId(idPrefix, ClientKillUnusedSegmentsTaskQuery.TYPE, dataSource, interval); - final ClientTaskQuery taskQuery = new ClientKillUnusedSegmentsTaskQuery(taskId, dataSource, interval, false); + final ClientTaskQuery taskQuery = new ClientKillUnusedSegmentsTaskQuery( + taskId, + dataSource, + interval, + false, + maxSegmentsToKill + ); return FutureUtils.transform(runTask(taskId, taskQuery), ignored -> taskId); } diff --git a/server/src/main/java/org/apache/druid/server/coordinator/duty/KillUnusedSegments.java b/server/src/main/java/org/apache/druid/server/coordinator/duty/KillUnusedSegments.java index 4dad48349df4..527ed8389a44 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/duty/KillUnusedSegments.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/duty/KillUnusedSegments.java @@ -21,22 +21,30 @@ import com.google.common.base.Preconditions; import com.google.inject.Inject; +import org.apache.druid.client.indexing.IndexingTotalWorkerCapacityInfo; import org.apache.druid.common.guava.FutureUtils; +import org.apache.druid.indexer.TaskStatusPlus; import org.apache.druid.java.util.common.DateTimes; import org.apache.druid.java.util.common.JodaUtils; +import org.apache.druid.java.util.common.io.Closer; import org.apache.druid.java.util.common.logger.Logger; +import org.apache.druid.java.util.common.parsers.CloseableIterator; import org.apache.druid.metadata.SegmentsMetadataManager; +import org.apache.druid.rpc.HttpResponseException; import org.apache.druid.rpc.indexing.OverlordClient; import org.apache.druid.server.coordinator.DruidCoordinatorConfig; import org.apache.druid.server.coordinator.DruidCoordinatorRuntimeParams; import org.apache.druid.utils.CollectionUtils; +import org.jboss.netty.handler.codec.http.HttpResponseStatus; import org.joda.time.DateTime; import org.joda.time.Interval; import javax.annotation.Nullable; +import java.io.IOException; import java.util.Collection; import java.util.List; +import java.util.concurrent.ExecutionException; /** * Completely removes information about unused segments who have an interval end that comes before @@ -138,7 +146,12 @@ private void killUnusedSegments(Collection dataSourcesToKill, @Nullable } try { - FutureUtils.getUnchecked(overlordClient.runKillTask("coordinator-issued", dataSource, intervalToKill), true, maxSegmentsToKill); + FutureUtils.getUnchecked(overlordClient.runKillTask( + "coordinator-issued", + dataSource, + intervalToKill, + maxSegmentsToKill + ), true); ++submittedTasks; } catch (Exception ex) { @@ -152,6 +165,7 @@ private void killUnusedSegments(Collection dataSourcesToKill, @Nullable if (submittedTasks >= availableKillTaskSlots) { log.info("Reached kill task slot limit with pending unused segments to kill. Will resume " + "on the next coordinator cycle."); + break; } } @@ -187,31 +201,62 @@ private int getAvailableKillTaskSlots(@Nullable Double killTaskSlotRatio) private int getNumActiveKillTaskSlots() { + final CloseableIterator activeTasks = + FutureUtils.getUnchecked(overlordClient.taskStatuses(null, null, 0), true); // Fetch currently running kill tasks - return (int) indexingServiceClient.getActiveTasks().stream() - .filter(status -> { - final String taskType = status.getType(); - // taskType can be null if middleManagers are running with an older version. Here, we consevatively regard - // the tasks of the unknown taskType as the killTask. This is because it's important to not run - // killTasks more than the configured limit at any time which might impact to the ingestion - // performance. - return taskType == null || KILL_TASK_TYPE.equals(taskType); - }).count(); + int numActiveKillTasks = 0; + + try (final Closer closer = Closer.create()) { + closer.register(activeTasks); + while (activeTasks.hasNext()) { + final TaskStatusPlus status = activeTasks.next(); + + // taskType can be null if middleManagers are running with an older version. Here, we consevatively regard + // the tasks of the unknown taskType as the killTask. This is because it's important to not run + // killTasks more than the configured limit at any time which might impact to the ingestion + // performance. + if (status.getType() == null || KILL_TASK_TYPE.equals(status.getType())) { + numActiveKillTasks++; + } + } + } + catch (IOException e) { + throw new RuntimeException(e); + } + + return numActiveKillTasks; } private int getKillTaskCapacity(@Nullable Double killTaskSlotRatio) { int totalWorkerCapacity; try { - totalWorkerCapacity = indexingServiceClient.getTotalWorkerCapacityWithAutoScale(); + final IndexingTotalWorkerCapacityInfo workerCapacityInfo = + FutureUtils.get(overlordClient.getTotalWorkerCapacity(), true); + totalWorkerCapacity = workerCapacityInfo.getMaximumCapacityWithAutoScale(); if (totalWorkerCapacity < 0) { - totalWorkerCapacity = indexingServiceClient.getTotalWorkerCapacity(); + totalWorkerCapacity = workerCapacityInfo.getCurrentClusterCapacity(); } } - catch (Exception e) { - log.warn("Failed to get total worker capacity with auto scale slots. Falling back to current capacity count"); - totalWorkerCapacity = indexingServiceClient.getTotalWorkerCapacity(); + catch (ExecutionException e) { + // Call to getTotalWorkerCapacity may fail during a rolling upgrade: API was added in 0.23.0. + if (e.getCause() instanceof HttpResponseException + && ((HttpResponseException) e.getCause()).getResponse().getStatus().equals(HttpResponseStatus.NOT_FOUND)) { + log.noStackTrace().warn(e, "Call to getTotalWorkerCapacity failed. Falling back to getWorkers."); + totalWorkerCapacity = + FutureUtils.getUnchecked(overlordClient.getWorkers(), true) + .stream() + .mapToInt(worker -> worker.getWorker().getCapacity()) + .sum(); + } else { + throw new RuntimeException(e.getCause()); + } } + catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new RuntimeException(e); + } + return Math.min( killTaskSlotRatio == null diff --git a/server/src/main/java/org/apache/druid/server/http/DataSourcesResource.java b/server/src/main/java/org/apache/druid/server/http/DataSourcesResource.java index 68cfc49e369f..1e146736da13 100644 --- a/server/src/main/java/org/apache/druid/server/http/DataSourcesResource.java +++ b/server/src/main/java/org/apache/druid/server/http/DataSourcesResource.java @@ -342,7 +342,7 @@ public Response killUnusedSegmentsInInterval( } final Interval theInterval = Intervals.of(interval.replace('_', '/')); try { - FutureUtils.getUnchecked(overlordClient.runKillTask("api-issued", dataSourceName, theInterval), true, null); + FutureUtils.getUnchecked(overlordClient.runKillTask("api-issued", dataSourceName, theInterval, null), true); return Response.ok().build(); } catch (Exception e) { diff --git a/server/src/test/java/org/apache/druid/rpc/indexing/OverlordClientImplTest.java b/server/src/test/java/org/apache/druid/rpc/indexing/OverlordClientImplTest.java index 2d89bded8022..eae0d11187dd 100644 --- a/server/src/test/java/org/apache/druid/rpc/indexing/OverlordClientImplTest.java +++ b/server/src/test/java/org/apache/druid/rpc/indexing/OverlordClientImplTest.java @@ -422,7 +422,7 @@ public void test_killPendingSegments() throws Exception public void test_taskPayload() throws ExecutionException, InterruptedException, JsonProcessingException { final String taskID = "taskId_1"; - final ClientTaskQuery clientTaskQuery = new ClientKillUnusedSegmentsTaskQuery(taskID, "test", null, null); + final ClientTaskQuery clientTaskQuery = new ClientKillUnusedSegmentsTaskQuery(taskID, "test", null, null, null); serviceClient.expectAndRespond( new RequestBuilder(HttpMethod.GET, "/druid/indexer/v1/task/" + taskID), From 3f7ddc9910ed989d557f315837534814f5331489 Mon Sep 17 00:00:00 2001 From: zachjsh Date: Wed, 26 Jul 2023 12:33:53 -0700 Subject: [PATCH 07/23] * fix failing tests --- .../druid/indexing/common/task/KillUnusedSegmentsTask.java | 4 +++- .../client/indexing/ClientKillUnusedSegmentsTaskQuery.java | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java index ed99feb92a11..fc9ab978822a 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java @@ -83,8 +83,10 @@ public KillUnusedSegmentsTask( interval, context ); + if (null != maxSegmentsToKill) { + Preconditions.checkArgument(maxSegmentsToKill > 0, "maxSegmentsToKill must be > 0"); + } this.markAsUnused = markAsUnused != null && markAsUnused; - Preconditions.checkArgument(maxSegmentsToKill > 0, "maxSegmentsToKill must be > 0"); this.maxSegmentsToKill = maxSegmentsToKill; } diff --git a/server/src/main/java/org/apache/druid/client/indexing/ClientKillUnusedSegmentsTaskQuery.java b/server/src/main/java/org/apache/druid/client/indexing/ClientKillUnusedSegmentsTaskQuery.java index 817f966ac48e..a805018f9057 100644 --- a/server/src/main/java/org/apache/druid/client/indexing/ClientKillUnusedSegmentsTaskQuery.java +++ b/server/src/main/java/org/apache/druid/client/indexing/ClientKillUnusedSegmentsTaskQuery.java @@ -56,7 +56,9 @@ public ClientKillUnusedSegmentsTaskQuery( this.dataSource = dataSource; this.interval = interval; this.markAsUnused = markAsUnused; - Preconditions.checkArgument(maxSegmentsToKill > 0, "maxSegmentsToKill must be > 0"); + if (null != maxSegmentsToKill) { + Preconditions.checkArgument(maxSegmentsToKill > 0, "maxSegmentsToKill must be > 0"); + } this.maxSegmentsToKill = maxSegmentsToKill; } From be58f1c2626e2aeb78002fcefd0998da0bae1063 Mon Sep 17 00:00:00 2001 From: zachjsh Date: Wed, 26 Jul 2023 12:36:55 -0700 Subject: [PATCH 08/23] * add @Nullable --- .../druid/indexing/common/task/KillUnusedSegmentsTask.java | 1 + 1 file changed, 1 insertion(+) diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java index fc9ab978822a..e2193c54fe35 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java @@ -98,6 +98,7 @@ public boolean isMarkAsUnused() return markAsUnused; } + @Nullable @JsonProperty public Integer getMaxSegmentsToKill() { From 3a165858e28a426434c5eb063ccc0cf58168c762 Mon Sep 17 00:00:00 2001 From: zachjsh Date: Wed, 26 Jul 2023 13:32:22 -0700 Subject: [PATCH 09/23] * add more tests --- .../coordinator/duty/KillUnusedSegments.java | 5 +- .../duty/KillUnusedSegmentsTest.java | 90 ++++++++++++++++++- 2 files changed, 92 insertions(+), 3 deletions(-) diff --git a/server/src/main/java/org/apache/druid/server/coordinator/duty/KillUnusedSegments.java b/server/src/main/java/org/apache/druid/server/coordinator/duty/KillUnusedSegments.java index 527ed8389a44..078ea9600867 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/duty/KillUnusedSegments.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/duty/KillUnusedSegments.java @@ -138,6 +138,7 @@ private void killUnusedSegments(Collection dataSourcesToKill, @Nullable int availableKillTaskSlots = getAvailableKillTaskSlots(killTaskSlotRatio); if (0 == availableKillTaskSlots) { log.warn("Not killing any unused segments because there are no available kill task slots at this time."); + return; } for (String dataSource : dataSourcesToKill) { final Interval intervalToKill = findIntervalForKill(dataSource); @@ -196,7 +197,7 @@ private Interval findIntervalForKill(String dataSource) private int getAvailableKillTaskSlots(@Nullable Double killTaskSlotRatio) { - return Math.min(0, getKillTaskCapacity(killTaskSlotRatio) - getNumActiveKillTaskSlots()); + return Math.max(0, getKillTaskCapacity(killTaskSlotRatio) - getNumActiveKillTaskSlots()); } private int getNumActiveKillTaskSlots() @@ -258,7 +259,7 @@ private int getKillTaskCapacity(@Nullable Double killTaskSlotRatio) } - return Math.min( + return Math.max( killTaskSlotRatio == null ? totalWorkerCapacity : (int) (totalWorkerCapacity * killTaskSlotRatio), diff --git a/server/src/test/java/org/apache/druid/server/coordinator/duty/KillUnusedSegmentsTest.java b/server/src/test/java/org/apache/druid/server/coordinator/duty/KillUnusedSegmentsTest.java index eb71c09a86ee..5337fac41055 100644 --- a/server/src/test/java/org/apache/druid/server/coordinator/duty/KillUnusedSegmentsTest.java +++ b/server/src/test/java/org/apache/druid/server/coordinator/duty/KillUnusedSegmentsTest.java @@ -20,6 +20,13 @@ package org.apache.druid.server.coordinator.duty; import com.google.common.collect.ImmutableList; +import com.google.common.util.concurrent.Futures; +import org.apache.druid.client.indexing.IndexingTotalWorkerCapacityInfo; +import org.apache.druid.indexer.RunnerTaskState; +import org.apache.druid.indexer.TaskLocation; +import org.apache.druid.indexer.TaskState; +import org.apache.druid.indexer.TaskStatusPlus; +import org.apache.druid.java.util.common.CloseableIterators; import org.apache.druid.java.util.common.DateTimes; import org.apache.druid.metadata.SegmentsMetadataManager; import org.apache.druid.rpc.indexing.OverlordClient; @@ -47,7 +54,10 @@ import java.util.List; import java.util.stream.Collectors; +import javax.annotation.Nullable; + import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyInt; import static org.mockito.ArgumentMatchers.anyString; /** @@ -143,6 +153,7 @@ public void testRunWithNoIntervalShouldNotKillAnySegments() ArgumentMatchers.anyInt() ); + mockAvailableTaskSlotsForKill(null); target.run(params); Mockito.verify(overlordClient, Mockito.never()) .runKillTask(anyString(), anyString(), any(Interval.class)); @@ -156,6 +167,7 @@ public void testRunWithSpecificDatasourceAndNoIntervalShouldNotKillAnySegments() target = new KillUnusedSegments(segmentsMetadataManager, overlordClient, config); // No unused segment is older than the retention period + mockAvailableTaskSlotsForKill(null); target.run(params); Mockito.verify(overlordClient, Mockito.never()) .runKillTask(anyString(), anyString(), any(Interval.class), any(Integer.class)); @@ -169,9 +181,34 @@ public void testDurationToRetain() yearOldSegment.getInterval().getStart(), dayOldSegment.getInterval().getEnd() ); + mockAvailableTaskSlotsForKill(null); runAndVerifyKillInterval(expectedKillInterval); } + @Test + public void testAtLeastOneTaskCapacityEvenIfKillTaskRatio0() + { + // Only segments more than a day old are killed + Interval expectedKillInterval = new Interval( + yearOldSegment.getInterval().getStart(), + dayOldSegment.getInterval().getEnd() + ); + mockAvailableTaskSlotsForKill(0.0); + runAndVerifyKillInterval(expectedKillInterval); + } + + @Test + public void testNoAvailableTaskCapacityForKill() + { + // Only segments more than a day old are killed + Interval expectedKillInterval = new Interval( + yearOldSegment.getInterval().getStart(), + dayOldSegment.getInterval().getEnd() + ); + mockNoAvailableTaskSlotsForKill(); + runAndVerifyNoKill(); + } + @Test public void testNegativeDurationToRetain() { @@ -185,6 +222,7 @@ public void testNegativeDurationToRetain() yearOldSegment.getInterval().getStart(), nextDaySegment.getInterval().getEnd() ); + mockAvailableTaskSlotsForKill(null); runAndVerifyKillInterval(expectedKillInterval); } @@ -200,6 +238,7 @@ public void testIgnoreDurationToRetain() yearOldSegment.getInterval().getStart(), nextMonthSegment.getInterval().getEnd() ); + mockAvailableTaskSlotsForKill(null); runAndVerifyKillInterval(expectedKillInterval); } @@ -210,18 +249,31 @@ public void testMaxSegmentsToKill() .when(config).getCoordinatorKillMaxSegments(); target = new KillUnusedSegments(segmentsMetadataManager, overlordClient, config); + mockAvailableTaskSlotsForKill(null); // Only 1 unused segment is killed runAndVerifyKillInterval(yearOldSegment.getInterval()); } private void runAndVerifyKillInterval(Interval expectedKillInterval) { + int maxSegmentsToKill = config.getCoordinatorKillMaxSegments(); target.run(params); Mockito.verify(overlordClient, Mockito.times(1)).runKillTask( ArgumentMatchers.anyString(), ArgumentMatchers.eq("DS1"), ArgumentMatchers.eq(expectedKillInterval), - ArgumentMatchers.eq(config.getCoordinatorKillMaxSegments()) + ArgumentMatchers.eq(maxSegmentsToKill) + ); + } + + private void runAndVerifyNoKill() + { + target.run(params); + Mockito.verify(overlordClient, Mockito.never()).runKillTask( + ArgumentMatchers.anyString(), + ArgumentMatchers.anyString(), + ArgumentMatchers.any(Interval.class), + ArgumentMatchers.anyInt() ); } @@ -239,4 +291,40 @@ private DataSegment createSegmentWithEnd(DateTime endTime) 0 ); } + + private void mockAvailableTaskSlotsForKill(@Nullable Double killTaskSlotRatio) + { + Mockito.doReturn(killTaskSlotRatio) + .when(coordinatorDynamicConfig).getKillTaskSlotRatio(); + Mockito.doReturn(Futures.immediateFuture(new IndexingTotalWorkerCapacityInfo(1, 10))) + .when(overlordClient) + .getTotalWorkerCapacity(); + Mockito.doReturn(Futures.immediateFuture( + CloseableIterators.withEmptyBaggage(ImmutableList.of().iterator()))) + .when(overlordClient) + .taskStatuses(null, null, 0); + } + private void mockNoAvailableTaskSlotsForKill() + { + Mockito.doReturn(Futures.immediateFuture(new IndexingTotalWorkerCapacityInfo(1, 1))) + .when(overlordClient) + .getTotalWorkerCapacity(); + final TaskStatusPlus runningConflictCompactionTask = new TaskStatusPlus( + "taskId", + "groupId", + KillUnusedSegments.KILL_TASK_TYPE, + DateTimes.EPOCH, + DateTimes.EPOCH, + TaskState.RUNNING, + RunnerTaskState.RUNNING, + -1L, + TaskLocation.unknown(), + "datasource", + null + ); + Mockito.doReturn(Futures.immediateFuture( + CloseableIterators.withEmptyBaggage(ImmutableList.of(runningConflictCompactionTask).iterator()))) + .when(overlordClient) + .taskStatuses(null, null, 0); + } } From f08bf89722e47a21973b5eb2ff3100a483eb1774 Mon Sep 17 00:00:00 2001 From: zachjsh Date: Wed, 26 Jul 2023 13:56:39 -0700 Subject: [PATCH 10/23] * add docs --- docs/configuration/index.md | 40 ++++++++++--------- docs/data-management/delete.md | 8 +++- docs/operations/clean-metadata-store.md | 1 + .../duty/KillUnusedSegmentsTest.java | 5 +-- 4 files changed, 31 insertions(+), 23 deletions(-) diff --git a/docs/configuration/index.md b/docs/configuration/index.md index 4690af390b66..0e47c532be2e 100644 --- a/docs/configuration/index.md +++ b/docs/configuration/index.md @@ -934,6 +934,7 @@ A sample Coordinator dynamic config JSON object is shown below: "replicantLifetime": 15, "replicationThrottleLimit": 10, "killDataSourceWhitelist": ["wikipedia", "testDatasource"], + "killTaskSlotRatio": 0.10, "decommissioningNodes": ["localhost:8182", "localhost:8282"], "decommissioningMaxPercentOfMaxSegmentsToMove": 70, "pauseCoordination": false, @@ -944,25 +945,26 @@ A sample Coordinator dynamic config JSON object is shown below: Issuing a GET request at the same URL will return the spec that is currently in place. A description of the config setup spec is shown below. -|Property|Description|Default| -|--------|-----------|-------| -|`millisToWaitBeforeDeleting`|How long does the Coordinator need to be a leader before it can start marking overshadowed segments as unused in metadata storage.|900000 (15 mins)| -|`mergeBytesLimit`|The maximum total uncompressed size in bytes of segments to merge.|524288000L| -|`mergeSegmentsLimit`|The maximum number of segments that can be in a single [append task](../ingestion/tasks.md).|100| -|`smartSegmentLoading`|Enables ["smart" segment loading mode](#smart-segment-loading) which dynamically computes the optimal values of several properties that maximize Coordinator performance.|true| -|`maxSegmentsToMove`|The maximum number of segments that can be moved at any given time.|100| -|`replicantLifetime`|The maximum number of Coordinator runs for which a segment can wait in the load queue of a Historical before Druid raises an alert.|15| -|`replicationThrottleLimit`|The maximum number of segment replicas that can be assigned to a historical tier in a single Coordinator run. This property prevents historicals from becoming overwhelmed when loading extra replicas of segments that are already available in the cluster.|500| -|`balancerComputeThreads`|Thread pool size for computing moving cost of segments during segment balancing. Consider increasing this if you have a lot of segments and moving segments begins to stall.|1| -|`killDataSourceWhitelist`|List of specific data sources for which kill tasks are sent if property `druid.coordinator.kill.on` is true. This can be a list of comma-separated data source names or a JSON array.|none| -|`killPendingSegmentsSkipList`|List of data sources for which pendingSegments are _NOT_ cleaned up if property `druid.coordinator.kill.pendingSegments.on` is true. This can be a list of comma-separated data sources or a JSON array.|none| -|`maxSegmentsInNodeLoadingQueue`|The maximum number of segments allowed in the load queue of any given server. Use this parameter to load segments faster if, for example, the cluster contains slow-loading nodes or if there are too many segments to be replicated to a particular node (when faster loading is preferred to better segments distribution). The optimal value depends on the loading speed of segments, acceptable replication time and number of nodes. |500| -|`useRoundRobinSegmentAssignment`|Boolean flag for whether segments should be assigned to historicals in a round robin fashion. When disabled, segment assignment is done using the chosen balancer strategy. When enabled, this can speed up segment assignments leaving balancing to move the segments to their optimal locations (based on the balancer strategy) lazily. |true| -|`decommissioningNodes`| List of historical servers to 'decommission'. Coordinator will not assign new segments to 'decommissioning' servers, and segments will be moved away from them to be placed on non-decommissioning servers at the maximum rate specified by `decommissioningMaxPercentOfMaxSegmentsToMove`.|none| -|`decommissioningMaxPercentOfMaxSegmentsToMove`| Upper limit of segments the Coordinator can move from decommissioning servers to active non-decommissioning servers during a single run. This value is relative to the total maximum number of segments that can be moved at any given time based upon the value of `maxSegmentsToMove`.

If `decommissioningMaxPercentOfMaxSegmentsToMove` is 0, the Coordinator does not move segments to decommissioning servers, effectively putting them in a type of "maintenance" mode. In this case, decommissioning servers do not participate in balancing or assignment by load rules. The Coordinator still considers segments on decommissioning servers as candidates to replicate on active servers.

Decommissioning can stall if there are no available active servers to move the segments to. You can use the maximum percent of decommissioning segment movements to prioritize balancing or to decrease commissioning time to prevent active servers from being overloaded. The value must be between 0 and 100.|70| -|`pauseCoordination`| Boolean flag for whether or not the coordinator should execute its various duties of coordinating the cluster. Setting this to true essentially pauses all coordination work while allowing the API to remain up. Duties that are paused include all classes that implement the `CoordinatorDuty` Interface. Such duties include: Segment balancing, Segment compaction, Submitting kill tasks for unused segments (if enabled), Logging of used segments in the cluster, Marking of newly unused or overshadowed segments, Matching and execution of load/drop rules for used segments, Unloading segments that are no longer marked as used from Historical servers. An example of when an admin may want to pause coordination would be if they are doing deep storage maintenance on HDFS Name Nodes with downtime and don't want the coordinator to be directing Historical Nodes to hit the Name Node with API requests until maintenance is done and the deep store is declared healthy for use again. |false| -|`replicateAfterLoadTimeout`| Boolean flag for whether or not additional replication is needed for segments that have failed to load due to the expiry of `druid.coordinator.load.timeout`. If this is set to true, the coordinator will attempt to replicate the failed segment on a different historical server. This helps improve the segment availability if there are a few slow historicals in the cluster. However, the slow historical may still load the segment later and the coordinator may issue drop requests if the segment is over-replicated.|false| -|`maxNonPrimaryReplicantsToLoad`|The maximum number of replicas that can be assigned across all tiers in a single Coordinator run. This parameter serves the same purpose as `replicationThrottleLimit` except this limit applies at the cluster-level instead of per tier. The default value does not apply a limit to the number of replicas assigned per coordination cycle. If you want to use a non-default value for this property, you may want to start with `~20%` of the number of segments found on the historical server with the most segments. Use the Druid metric, `coordinator/time` with the filter `duty=org.apache.druid.server.coordinator.duty.RunRules` to see how different values of this property impact your Coordinator execution time.|`Integer.MAX_VALUE` (no limit)| +|Property| Description | Default | +|--------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------| +|`millisToWaitBeforeDeleting`| How long does the Coordinator need to be a leader before it can start marking overshadowed segments as unused in metadata storage. | 900000 (15 mins) | +|`mergeBytesLimit`| The maximum total uncompressed size in bytes of segments to merge. | 524288000L | +|`mergeSegmentsLimit`| The maximum number of segments that can be in a single [append task](../ingestion/tasks.md). | 100 | +|`smartSegmentLoading`| Enables ["smart" segment loading mode](#smart-segment-loading) which dynamically computes the optimal values of several properties that maximize Coordinator performance. | true | +|`maxSegmentsToMove`| The maximum number of segments that can be moved at any given time. | 100 | +|`replicantLifetime`| The maximum number of Coordinator runs for which a segment can wait in the load queue of a Historical before Druid raises an alert. | 15 | +|`replicationThrottleLimit`| The maximum number of segment replicas that can be assigned to a historical tier in a single Coordinator run. This property prevents historicals from becoming overwhelmed when loading extra replicas of segments that are already available in the cluster. | 500 | +|`balancerComputeThreads`| Thread pool size for computing moving cost of segments during segment balancing. Consider increasing this if you have a lot of segments and moving segments begins to stall. | 1 | +|`killDataSourceWhitelist`| List of specific data sources for which kill tasks are sent if property `druid.coordinator.kill.on` is true. This can be a list of comma-separated data source names or a JSON array. | none | +|`killTaskSlotRatio`| Ratio of total available task slots, including autoscaling if applicable that will be allowed for kill tasks. This limit only applies for kill tasks that are spawned automatically by the coordinator's auto kill duty, which is enabled when `druid.coordinator.kill.on` is true. | none - no limit | +|`killPendingSegmentsSkipList`| List of data sources for which pendingSegments are _NOT_ cleaned up if property `druid.coordinator.kill.pendingSegments.on` is true. This can be a list of comma-separated data sources or a JSON array. | none | +|`maxSegmentsInNodeLoadingQueue`| The maximum number of segments allowed in the load queue of any given server. Use this parameter to load segments faster if, for example, the cluster contains slow-loading nodes or if there are too many segments to be replicated to a particular node (when faster loading is preferred to better segments distribution). The optimal value depends on the loading speed of segments, acceptable replication time and number of nodes. | 500 | +|`useRoundRobinSegmentAssignment`| Boolean flag for whether segments should be assigned to historicals in a round robin fashion. When disabled, segment assignment is done using the chosen balancer strategy. When enabled, this can speed up segment assignments leaving balancing to move the segments to their optimal locations (based on the balancer strategy) lazily. | true | +|`decommissioningNodes`| List of historical servers to 'decommission'. Coordinator will not assign new segments to 'decommissioning' servers, and segments will be moved away from them to be placed on non-decommissioning servers at the maximum rate specified by `decommissioningMaxPercentOfMaxSegmentsToMove`. | none | +|`decommissioningMaxPercentOfMaxSegmentsToMove`| Upper limit of segments the Coordinator can move from decommissioning servers to active non-decommissioning servers during a single run. This value is relative to the total maximum number of segments that can be moved at any given time based upon the value of `maxSegmentsToMove`.

If `decommissioningMaxPercentOfMaxSegmentsToMove` is 0, the Coordinator does not move segments to decommissioning servers, effectively putting them in a type of "maintenance" mode. In this case, decommissioning servers do not participate in balancing or assignment by load rules. The Coordinator still considers segments on decommissioning servers as candidates to replicate on active servers.

Decommissioning can stall if there are no available active servers to move the segments to. You can use the maximum percent of decommissioning segment movements to prioritize balancing or to decrease commissioning time to prevent active servers from being overloaded. The value must be between 0 and 100. | 70 | +|`pauseCoordination`| Boolean flag for whether or not the coordinator should execute its various duties of coordinating the cluster. Setting this to true essentially pauses all coordination work while allowing the API to remain up. Duties that are paused include all classes that implement the `CoordinatorDuty` Interface. Such duties include: Segment balancing, Segment compaction, Submitting kill tasks for unused segments (if enabled), Logging of used segments in the cluster, Marking of newly unused or overshadowed segments, Matching and execution of load/drop rules for used segments, Unloading segments that are no longer marked as used from Historical servers. An example of when an admin may want to pause coordination would be if they are doing deep storage maintenance on HDFS Name Nodes with downtime and don't want the coordinator to be directing Historical Nodes to hit the Name Node with API requests until maintenance is done and the deep store is declared healthy for use again. | false | +|`replicateAfterLoadTimeout`| Boolean flag for whether or not additional replication is needed for segments that have failed to load due to the expiry of `druid.coordinator.load.timeout`. If this is set to true, the coordinator will attempt to replicate the failed segment on a different historical server. This helps improve the segment availability if there are a few slow historicals in the cluster. However, the slow historical may still load the segment later and the coordinator may issue drop requests if the segment is over-replicated. | false | +|`maxNonPrimaryReplicantsToLoad`| The maximum number of replicas that can be assigned across all tiers in a single Coordinator run. This parameter serves the same purpose as `replicationThrottleLimit` except this limit applies at the cluster-level instead of per tier. The default value does not apply a limit to the number of replicas assigned per coordination cycle. If you want to use a non-default value for this property, you may want to start with `~20%` of the number of segments found on the historical server with the most segments. Use the Druid metric, `coordinator/time` with the filter `duty=org.apache.druid.server.coordinator.duty.RunRules` to see how different values of this property impact your Coordinator execution time. | `Integer.MAX_VALUE` (no limit) | ##### Smart segment loading diff --git a/docs/data-management/delete.md b/docs/data-management/delete.md index 60377d858ca6..3a460f6dda79 100644 --- a/docs/data-management/delete.md +++ b/docs/data-management/delete.md @@ -95,9 +95,15 @@ The available grammar is: "id": , "dataSource": , "interval" : , - "context": + "context": , + "maxSegmentsToKill": } ``` **WARNING:** The `kill` task permanently removes all information about the affected segments from the metadata store and deep storage. This operation cannot be undone. + +Note: If `maxSegmentsToKill` is not specified, all matched segments are deleted. If `maxSegmentsToKill` is less than +the number of matching segments found, then only that number of matching segments will be deleted, but all matching +segments will still be marked unused, if specified to be. In this case, any remaining unusued segments can be deleted +with a subsequent kill task issued, or via [Automated unused segment deletion](../operations/clean-metadata-store.md#segment-records-and-segments-in-deep-storage-kill-task) diff --git a/docs/operations/clean-metadata-store.md b/docs/operations/clean-metadata-store.md index 8aa3c7dc32bb..e13d3a2591f4 100644 --- a/docs/operations/clean-metadata-store.md +++ b/docs/operations/clean-metadata-store.md @@ -85,6 +85,7 @@ If `killDataSourceWhitelist` is not set or empty, then kill tasks can be submitt - `druid.coordinator.kill.period`: Defines the frequency in [ISO 8601 format](https://en.wikipedia.org/wiki/ISO_8601#Durations) for the cleanup job to check for and delete eligible segments. Defaults to `P1D`. Must be greater than `druid.coordinator.period.indexingPeriod`. - `druid.coordinator.kill.durationToRetain`: Defines the retention period in [ISO 8601 format](https://en.wikipedia.org/wiki/ISO_8601#Durations) after creation that segments become eligible for deletion. - `druid.coordinator.kill.maxSegments`: Defines the maximum number of segments to delete per kill task. +- `druid.coordinator.kill.maxSegments`: Defines the maximum number of segments to delete per kill task. ### Audit records diff --git a/server/src/test/java/org/apache/druid/server/coordinator/duty/KillUnusedSegmentsTest.java b/server/src/test/java/org/apache/druid/server/coordinator/duty/KillUnusedSegmentsTest.java index 5337fac41055..5a7a7589711a 100644 --- a/server/src/test/java/org/apache/druid/server/coordinator/duty/KillUnusedSegmentsTest.java +++ b/server/src/test/java/org/apache/druid/server/coordinator/duty/KillUnusedSegmentsTest.java @@ -48,16 +48,15 @@ import org.mockito.Mockito; import org.mockito.junit.MockitoJUnitRunner; +import javax.annotation.Nullable; + import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.stream.Collectors; -import javax.annotation.Nullable; - import static org.mockito.ArgumentMatchers.any; -import static org.mockito.ArgumentMatchers.anyInt; import static org.mockito.ArgumentMatchers.anyString; /** From b98c30f41b53393a67c0f1a7ddb5c5f498f3b73e Mon Sep 17 00:00:00 2001 From: zachjsh Date: Wed, 26 Jul 2023 17:21:41 -0700 Subject: [PATCH 11/23] * fix docs --- docs/configuration/index.md | 40 ++++++++++++------------- docs/data-management/delete.md | 2 +- docs/operations/clean-metadata-store.md | 1 - 3 files changed, 21 insertions(+), 22 deletions(-) diff --git a/docs/configuration/index.md b/docs/configuration/index.md index 0e47c532be2e..6fcace0650ae 100644 --- a/docs/configuration/index.md +++ b/docs/configuration/index.md @@ -945,26 +945,26 @@ A sample Coordinator dynamic config JSON object is shown below: Issuing a GET request at the same URL will return the spec that is currently in place. A description of the config setup spec is shown below. -|Property| Description | Default | -|--------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------| -|`millisToWaitBeforeDeleting`| How long does the Coordinator need to be a leader before it can start marking overshadowed segments as unused in metadata storage. | 900000 (15 mins) | -|`mergeBytesLimit`| The maximum total uncompressed size in bytes of segments to merge. | 524288000L | -|`mergeSegmentsLimit`| The maximum number of segments that can be in a single [append task](../ingestion/tasks.md). | 100 | -|`smartSegmentLoading`| Enables ["smart" segment loading mode](#smart-segment-loading) which dynamically computes the optimal values of several properties that maximize Coordinator performance. | true | -|`maxSegmentsToMove`| The maximum number of segments that can be moved at any given time. | 100 | -|`replicantLifetime`| The maximum number of Coordinator runs for which a segment can wait in the load queue of a Historical before Druid raises an alert. | 15 | -|`replicationThrottleLimit`| The maximum number of segment replicas that can be assigned to a historical tier in a single Coordinator run. This property prevents historicals from becoming overwhelmed when loading extra replicas of segments that are already available in the cluster. | 500 | -|`balancerComputeThreads`| Thread pool size for computing moving cost of segments during segment balancing. Consider increasing this if you have a lot of segments and moving segments begins to stall. | 1 | -|`killDataSourceWhitelist`| List of specific data sources for which kill tasks are sent if property `druid.coordinator.kill.on` is true. This can be a list of comma-separated data source names or a JSON array. | none | -|`killTaskSlotRatio`| Ratio of total available task slots, including autoscaling if applicable that will be allowed for kill tasks. This limit only applies for kill tasks that are spawned automatically by the coordinator's auto kill duty, which is enabled when `druid.coordinator.kill.on` is true. | none - no limit | -|`killPendingSegmentsSkipList`| List of data sources for which pendingSegments are _NOT_ cleaned up if property `druid.coordinator.kill.pendingSegments.on` is true. This can be a list of comma-separated data sources or a JSON array. | none | -|`maxSegmentsInNodeLoadingQueue`| The maximum number of segments allowed in the load queue of any given server. Use this parameter to load segments faster if, for example, the cluster contains slow-loading nodes or if there are too many segments to be replicated to a particular node (when faster loading is preferred to better segments distribution). The optimal value depends on the loading speed of segments, acceptable replication time and number of nodes. | 500 | -|`useRoundRobinSegmentAssignment`| Boolean flag for whether segments should be assigned to historicals in a round robin fashion. When disabled, segment assignment is done using the chosen balancer strategy. When enabled, this can speed up segment assignments leaving balancing to move the segments to their optimal locations (based on the balancer strategy) lazily. | true | -|`decommissioningNodes`| List of historical servers to 'decommission'. Coordinator will not assign new segments to 'decommissioning' servers, and segments will be moved away from them to be placed on non-decommissioning servers at the maximum rate specified by `decommissioningMaxPercentOfMaxSegmentsToMove`. | none | -|`decommissioningMaxPercentOfMaxSegmentsToMove`| Upper limit of segments the Coordinator can move from decommissioning servers to active non-decommissioning servers during a single run. This value is relative to the total maximum number of segments that can be moved at any given time based upon the value of `maxSegmentsToMove`.

If `decommissioningMaxPercentOfMaxSegmentsToMove` is 0, the Coordinator does not move segments to decommissioning servers, effectively putting them in a type of "maintenance" mode. In this case, decommissioning servers do not participate in balancing or assignment by load rules. The Coordinator still considers segments on decommissioning servers as candidates to replicate on active servers.

Decommissioning can stall if there are no available active servers to move the segments to. You can use the maximum percent of decommissioning segment movements to prioritize balancing or to decrease commissioning time to prevent active servers from being overloaded. The value must be between 0 and 100. | 70 | -|`pauseCoordination`| Boolean flag for whether or not the coordinator should execute its various duties of coordinating the cluster. Setting this to true essentially pauses all coordination work while allowing the API to remain up. Duties that are paused include all classes that implement the `CoordinatorDuty` Interface. Such duties include: Segment balancing, Segment compaction, Submitting kill tasks for unused segments (if enabled), Logging of used segments in the cluster, Marking of newly unused or overshadowed segments, Matching and execution of load/drop rules for used segments, Unloading segments that are no longer marked as used from Historical servers. An example of when an admin may want to pause coordination would be if they are doing deep storage maintenance on HDFS Name Nodes with downtime and don't want the coordinator to be directing Historical Nodes to hit the Name Node with API requests until maintenance is done and the deep store is declared healthy for use again. | false | -|`replicateAfterLoadTimeout`| Boolean flag for whether or not additional replication is needed for segments that have failed to load due to the expiry of `druid.coordinator.load.timeout`. If this is set to true, the coordinator will attempt to replicate the failed segment on a different historical server. This helps improve the segment availability if there are a few slow historicals in the cluster. However, the slow historical may still load the segment later and the coordinator may issue drop requests if the segment is over-replicated. | false | -|`maxNonPrimaryReplicantsToLoad`| The maximum number of replicas that can be assigned across all tiers in a single Coordinator run. This parameter serves the same purpose as `replicationThrottleLimit` except this limit applies at the cluster-level instead of per tier. The default value does not apply a limit to the number of replicas assigned per coordination cycle. If you want to use a non-default value for this property, you may want to start with `~20%` of the number of segments found on the historical server with the most segments. Use the Druid metric, `coordinator/time` with the filter `duty=org.apache.druid.server.coordinator.duty.RunRules` to see how different values of this property impact your Coordinator execution time. | `Integer.MAX_VALUE` (no limit) | +|Property|Description|Default| +|--------|-----------|-------| +|`millisToWaitBeforeDeleting`|How long does the Coordinator need to be a leader before it can start marking overshadowed segments as unused in metadata storage.|900000 (15 mins)| +|`mergeBytesLimit`|The maximum total uncompressed size in bytes of segments to merge.|524288000L| +|`mergeSegmentsLimit`|The maximum number of segments that can be in a single [append task](../ingestion/tasks.md).|100| +|`smartSegmentLoading`|Enables ["smart" segment loading mode](#smart-segment-loading) which dynamically computes the optimal values of several properties that maximize Coordinator performance.|true| +|`maxSegmentsToMove`|The maximum number of segments that can be moved at any given time.|100| +|`replicantLifetime`|The maximum number of Coordinator runs for which a segment can wait in the load queue of a Historical before Druid raises an alert.|15| +|`replicationThrottleLimit`|The maximum number of segment replicas that can be assigned to a historical tier in a single Coordinator run. This property prevents historicals from becoming overwhelmed when loading extra replicas of segments that are already available in the cluster.|500| +|`balancerComputeThreads`|Thread pool size for computing moving cost of segments during segment balancing. Consider increasing this if you have a lot of segments and moving segments begins to stall.|1| +|`killDataSourceWhitelist`|List of specific data sources for which kill tasks are sent if property `druid.coordinator.kill.on` is true. This can be a list of comma-separated data source names or a JSON array.|none| +|`killTaskSlotRatio`| Ratio of total available task slots, including autoscaling if applicable that will be allowed for kill tasks. This limit only applies for kill tasks that are spawned automatically by the coordinator's auto kill duty, which is enabled when `druid.coordinator.kill.on` is true.| none - no limit | +|`killPendingSegmentsSkipList`|List of data sources for which pendingSegments are _NOT_ cleaned up if property `druid.coordinator.kill.pendingSegments.on` is true. This can be a list of comma-separated data sources or a JSON array.|none| +|`maxSegmentsInNodeLoadingQueue`|The maximum number of segments allowed in the load queue of any given server. Use this parameter to load segments faster if, for example, the cluster contains slow-loading nodes or if there are too many segments to be replicated to a particular node (when faster loading is preferred to better segments distribution). The optimal value depends on the loading speed of segments, acceptable replication time and number of nodes. |500| +|`useRoundRobinSegmentAssignment`|Boolean flag for whether segments should be assigned to historicals in a round robin fashion. When disabled, segment assignment is done using the chosen balancer strategy. When enabled, this can speed up segment assignments leaving balancing to move the segments to their optimal locations (based on the balancer strategy) lazily. |true| +|`decommissioningNodes`| List of historical servers to 'decommission'. Coordinator will not assign new segments to 'decommissioning' servers, and segments will be moved away from them to be placed on non-decommissioning servers at the maximum rate specified by `decommissioningMaxPercentOfMaxSegmentsToMove`.|none| +|`decommissioningMaxPercentOfMaxSegmentsToMove`| Upper limit of segments the Coordinator can move from decommissioning servers to active non-decommissioning servers during a single run. This value is relative to the total maximum number of segments that can be moved at any given time based upon the value of `maxSegmentsToMove`.

If `decommissioningMaxPercentOfMaxSegmentsToMove` is 0, the Coordinator does not move segments to decommissioning servers, effectively putting them in a type of "maintenance" mode. In this case, decommissioning servers do not participate in balancing or assignment by load rules. The Coordinator still considers segments on decommissioning servers as candidates to replicate on active servers.

Decommissioning can stall if there are no available active servers to move the segments to. You can use the maximum percent of decommissioning segment movements to prioritize balancing or to decrease commissioning time to prevent active servers from being overloaded. The value must be between 0 and 100.|70| +|`pauseCoordination`| Boolean flag for whether or not the coordinator should execute its various duties of coordinating the cluster. Setting this to true essentially pauses all coordination work while allowing the API to remain up. Duties that are paused include all classes that implement the `CoordinatorDuty` Interface. Such duties include: Segment balancing, Segment compaction, Submitting kill tasks for unused segments (if enabled), Logging of used segments in the cluster, Marking of newly unused or overshadowed segments, Matching and execution of load/drop rules for used segments, Unloading segments that are no longer marked as used from Historical servers. An example of when an admin may want to pause coordination would be if they are doing deep storage maintenance on HDFS Name Nodes with downtime and don't want the coordinator to be directing Historical Nodes to hit the Name Node with API requests until maintenance is done and the deep store is declared healthy for use again. |false| +|`replicateAfterLoadTimeout`| Boolean flag for whether or not additional replication is needed for segments that have failed to load due to the expiry of `druid.coordinator.load.timeout`. If this is set to true, the coordinator will attempt to replicate the failed segment on a different historical server. This helps improve the segment availability if there are a few slow historicals in the cluster. However, the slow historical may still load the segment later and the coordinator may issue drop requests if the segment is over-replicated.|false| +|`maxNonPrimaryReplicantsToLoad`|The maximum number of replicas that can be assigned across all tiers in a single Coordinator run. This parameter serves the same purpose as `replicationThrottleLimit` except this limit applies at the cluster-level instead of per tier. The default value does not apply a limit to the number of replicas assigned per coordination cycle. If you want to use a non-default value for this property, you may want to start with `~20%` of the number of segments found on the historical server with the most segments. Use the Druid metric, `coordinator/time` with the filter `duty=org.apache.druid.server.coordinator.duty.RunRules` to see how different values of this property impact your Coordinator execution time.|`Integer.MAX_VALUE` (no limit)| ##### Smart segment loading diff --git a/docs/data-management/delete.md b/docs/data-management/delete.md index 3a460f6dda79..51d4e1f25dc7 100644 --- a/docs/data-management/delete.md +++ b/docs/data-management/delete.md @@ -105,5 +105,5 @@ deep storage. This operation cannot be undone. Note: If `maxSegmentsToKill` is not specified, all matched segments are deleted. If `maxSegmentsToKill` is less than the number of matching segments found, then only that number of matching segments will be deleted, but all matching -segments will still be marked unused, if specified to be. In this case, any remaining unusued segments can be deleted +segments will still be marked unused, if specified to be. In this case, any remaining unused segments can be deleted with a subsequent kill task issued, or via [Automated unused segment deletion](../operations/clean-metadata-store.md#segment-records-and-segments-in-deep-storage-kill-task) diff --git a/docs/operations/clean-metadata-store.md b/docs/operations/clean-metadata-store.md index e13d3a2591f4..8aa3c7dc32bb 100644 --- a/docs/operations/clean-metadata-store.md +++ b/docs/operations/clean-metadata-store.md @@ -85,7 +85,6 @@ If `killDataSourceWhitelist` is not set or empty, then kill tasks can be submitt - `druid.coordinator.kill.period`: Defines the frequency in [ISO 8601 format](https://en.wikipedia.org/wiki/ISO_8601#Durations) for the cleanup job to check for and delete eligible segments. Defaults to `P1D`. Must be greater than `druid.coordinator.period.indexingPeriod`. - `druid.coordinator.kill.durationToRetain`: Defines the retention period in [ISO 8601 format](https://en.wikipedia.org/wiki/ISO_8601#Durations) after creation that segments become eligible for deletion. - `druid.coordinator.kill.maxSegments`: Defines the maximum number of segments to delete per kill task. -- `druid.coordinator.kill.maxSegments`: Defines the maximum number of segments to delete per kill task. ### Audit records From e52410af6b61ed935777bb55111528ec690f09a1 Mon Sep 17 00:00:00 2001 From: zachjsh Date: Wed, 26 Jul 2023 17:24:41 -0700 Subject: [PATCH 12/23] * add missing javadoc --- .../apache/druid/rpc/indexing/OverlordClient.java | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/server/src/main/java/org/apache/druid/rpc/indexing/OverlordClient.java b/server/src/main/java/org/apache/druid/rpc/indexing/OverlordClient.java index c0882976e1ff..dde6246c8a68 100644 --- a/server/src/main/java/org/apache/druid/rpc/indexing/OverlordClient.java +++ b/server/src/main/java/org/apache/druid/rpc/indexing/OverlordClient.java @@ -88,6 +88,20 @@ default ListenableFuture runKillTask(String idPrefix, String dataSource, return runKillTask(idPrefix, dataSource, interval, null); } + /** + * Run a "kill" task for a particular datasource and interval. Shortcut to {@link #runTask(String, Object)}. + * + * The kill task deletes all unused segment records from deep storage and the metadata store. The task runs + * asynchronously after the API call returns. The resolved future is the ID of the task, which can be used to + * monitor its progress through the {@link #taskStatus(String)} API. + * + * @param idPrefix Descriptive prefix to include at the start of task IDs + * @param dataSource Datasource to kill + * @param interval Interval to kill + * @param maxSegmentsToKill The maximum number of segments to kill + * + * @return future with task ID + */ default ListenableFuture runKillTask( String idPrefix, String dataSource, From 0347e08d4eedada169f7cde302f04798f9237cd8 Mon Sep 17 00:00:00 2001 From: zachjsh Date: Wed, 26 Jul 2023 17:26:05 -0700 Subject: [PATCH 13/23] * review comments * QL --- .../druid/server/coordinator/CoordinatorDynamicConfig.java | 4 +--- .../server/coordinator/duty/KillUnusedSegmentsTest.java | 5 ----- 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorDynamicConfig.java b/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorDynamicConfig.java index 7dd9b1f20a88..9f64bbac94bf 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorDynamicConfig.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorDynamicConfig.java @@ -454,9 +454,7 @@ public boolean equals(Object o) && Objects.equals( specificDataSourcesToKillUnusedSegmentsIn, that.specificDataSourcesToKillUnusedSegmentsIn) - && Objects.equals( - killTaskSlotRatio, - that.killTaskSlotRatio) + && Objects.equals(killTaskSlotRatio, that.killTaskSlotRatio) && Objects.equals( dataSourcesToNotKillStalePendingSegmentsIn, that.dataSourcesToNotKillStalePendingSegmentsIn) diff --git a/server/src/test/java/org/apache/druid/server/coordinator/duty/KillUnusedSegmentsTest.java b/server/src/test/java/org/apache/druid/server/coordinator/duty/KillUnusedSegmentsTest.java index 5a7a7589711a..cd58e2c1941d 100644 --- a/server/src/test/java/org/apache/druid/server/coordinator/duty/KillUnusedSegmentsTest.java +++ b/server/src/test/java/org/apache/druid/server/coordinator/duty/KillUnusedSegmentsTest.java @@ -199,11 +199,6 @@ public void testAtLeastOneTaskCapacityEvenIfKillTaskRatio0() @Test public void testNoAvailableTaskCapacityForKill() { - // Only segments more than a day old are killed - Interval expectedKillInterval = new Interval( - yearOldSegment.getInterval().getStart(), - dayOldSegment.getInterval().getEnd() - ); mockNoAvailableTaskSlotsForKill(); runAndVerifyNoKill(); } From 3d338191b24572ef02d726c98c4d98c0f1c60d80 Mon Sep 17 00:00:00 2001 From: zachjsh Date: Wed, 26 Jul 2023 17:38:25 -0700 Subject: [PATCH 14/23] * add validation for killTaskSlotRatio --- .../server/coordinator/CoordinatorDynamicConfig.java | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorDynamicConfig.java b/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorDynamicConfig.java index 9f64bbac94bf..916e6db14a1c 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorDynamicConfig.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorDynamicConfig.java @@ -69,6 +69,8 @@ public class CoordinatorDynamicConfig * List of specific data sources for which kill tasks are sent in {@link KillUnusedSegments}. */ private final Set specificDataSourcesToKillUnusedSegmentsIn; + + private static final double DEFAULT_KILL_TASK_SLOT_RATIO = 1.0; @Nullable private final Double killTaskSlotRatio; private final Set decommissioningNodes; @@ -160,7 +162,14 @@ public CoordinatorDynamicConfig( this.balancerComputeThreads = Math.max(balancerComputeThreads, 1); this.specificDataSourcesToKillUnusedSegmentsIn = parseJsonStringOrArray(specificDataSourcesToKillUnusedSegmentsIn); - this.killTaskSlotRatio = killTaskSlotRatio; + + if (null != killTaskSlotRatio) { + Preconditions.checkArgument( + killTaskSlotRatio >= 0 && killTaskSlotRatio <= 1, + "killTaskSlotRatio must be >= 0 and <= 1" + ); + } + this.killTaskSlotRatio = killTaskSlotRatio != null ? killTaskSlotRatio : DEFAULT_KILL_TASK_SLOT_RATIO; this.dataSourcesToNotKillStalePendingSegmentsIn = parseJsonStringOrArray(dataSourcesToNotKillStalePendingSegmentsIn); this.maxSegmentsInNodeLoadingQueue = Builder.valueOrDefault( From ea997150757f4000954b52e348e5cd46869e7a73 Mon Sep 17 00:00:00 2001 From: zachjsh Date: Wed, 26 Jul 2023 19:12:14 -0700 Subject: [PATCH 15/23] * fix failing test --- .../http/CoordinatorDynamicConfigTest.java | 30 +++++++++---------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/server/src/test/java/org/apache/druid/server/http/CoordinatorDynamicConfigTest.java b/server/src/test/java/org/apache/druid/server/http/CoordinatorDynamicConfigTest.java index 0056bb8cd8d3..cd7bfdb1e4ff 100644 --- a/server/src/test/java/org/apache/druid/server/http/CoordinatorDynamicConfigTest.java +++ b/server/src/test/java/org/apache/druid/server/http/CoordinatorDynamicConfigTest.java @@ -79,7 +79,7 @@ public void testSerde() throws Exception 1, 2, whitelist, - null, + 1.0, false, 1, decommissioning, @@ -100,7 +100,7 @@ public void testSerde() throws Exception 1, 2, whitelist, - null, + 1.0, false, 1, ImmutableSet.of("host1"), @@ -121,7 +121,7 @@ public void testSerde() throws Exception 1, 2, whitelist, - null, + 1.0, false, 1, ImmutableSet.of("host1"), @@ -142,7 +142,7 @@ public void testSerde() throws Exception 1, 2, whitelist, - null, + 1.0, false, 1, ImmutableSet.of("host1"), @@ -163,7 +163,7 @@ public void testSerde() throws Exception 1, 2, whitelist, - null, + 1.0, false, 1, ImmutableSet.of("host1"), @@ -184,7 +184,7 @@ public void testSerde() throws Exception 1, 2, whitelist, - null, + 1.0, false, 1, ImmutableSet.of("host1"), @@ -300,7 +300,7 @@ public void testDecommissioningParametersBackwardCompatibility() throws Exceptio 1, 2, whitelist, - null, + 1.0, false, 1, decommissioning, @@ -321,7 +321,7 @@ public void testDecommissioningParametersBackwardCompatibility() throws Exceptio 1, 2, whitelist, - null, + 1.0, false, 1, ImmutableSet.of("host1"), @@ -342,7 +342,7 @@ public void testDecommissioningParametersBackwardCompatibility() throws Exceptio 1, 2, whitelist, - null, + 1.0, false, 1, ImmutableSet.of("host1"), @@ -387,7 +387,7 @@ public void testSerdeWithStringInKillDataSourceWhitelist() throws Exception 1, 2, ImmutableSet.of("test1", "test2"), - null, + 1.0, false, 1, ImmutableSet.of(), @@ -447,7 +447,7 @@ public void testHandleMissingPercentOfSegmentsToConsiderPerMove() throws Excepti 1, 2, whitelist, - null, + 1.0, false, 1, decommissioning, @@ -490,7 +490,7 @@ public void testSerdeWithKillAllDataSources() throws Exception 1, 2, ImmutableSet.of(), - null, + 1.0, true, 1, ImmutableSet.of(), @@ -544,7 +544,7 @@ public void testDeserializeWithoutMaxSegmentsInNodeLoadingQueue() throws Excepti 1, 2, ImmutableSet.of(), - null, + 1.0, true, EXPECTED_DEFAULT_MAX_SEGMENTS_IN_NODE_LOADING_QUEUE, ImmutableSet.of(), @@ -570,7 +570,7 @@ public void testBuilderDefaults() 500, 1, emptyList, - null, + 1.0, true, EXPECTED_DEFAULT_MAX_SEGMENTS_IN_NODE_LOADING_QUEUE, emptyList, @@ -599,7 +599,7 @@ public void testBuilderWithDefaultSpecificDataSourcesToKillUnusedSegmentsInSpeci 500, 1, ImmutableSet.of("DATASOURCE"), - null, + 1.0, false, EXPECTED_DEFAULT_MAX_SEGMENTS_IN_NODE_LOADING_QUEUE, ImmutableSet.of(), From e15cfce45ca53e1a5a1810e4aa421cae0810801c Mon Sep 17 00:00:00 2001 From: zachjsh Date: Thu, 27 Jul 2023 15:24:56 -0700 Subject: [PATCH 16/23] Apply suggestions from code review Co-authored-by: Abhishek Radhakrishnan --- docs/data-management/delete.md | 2 +- .../druid/indexing/common/task/KillUnusedSegmentsTask.java | 4 ++-- .../druid/server/coordinator/CoordinatorDynamicConfig.java | 7 ++----- .../druid/server/coordinator/duty/KillUnusedSegments.java | 4 ++-- 4 files changed, 7 insertions(+), 10 deletions(-) diff --git a/docs/data-management/delete.md b/docs/data-management/delete.md index 51d4e1f25dc7..c30cc2a42bd6 100644 --- a/docs/data-management/delete.md +++ b/docs/data-management/delete.md @@ -96,7 +96,7 @@ The available grammar is: "dataSource": , "interval" : , "context": , - "maxSegmentsToKill": + "maxSegmentsToKill": } ``` diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java index e2193c54fe35..9ba36411e39f 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java @@ -83,8 +83,8 @@ public KillUnusedSegmentsTask( interval, context ); - if (null != maxSegmentsToKill) { - Preconditions.checkArgument(maxSegmentsToKill > 0, "maxSegmentsToKill must be > 0"); + if (null != maxSegmentsToKill && maxSegmentsToKill <= 0) { + throw InvalidInput.exception("maxSegmentsToKill [%d] is invalid. It must be a positive integer.", maxSegmentsToKill); } this.markAsUnused = markAsUnused != null && markAsUnused; this.maxSegmentsToKill = maxSegmentsToKill; diff --git a/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorDynamicConfig.java b/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorDynamicConfig.java index 916e6db14a1c..00c438202406 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorDynamicConfig.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorDynamicConfig.java @@ -163,11 +163,8 @@ public CoordinatorDynamicConfig( this.specificDataSourcesToKillUnusedSegmentsIn = parseJsonStringOrArray(specificDataSourcesToKillUnusedSegmentsIn); - if (null != killTaskSlotRatio) { - Preconditions.checkArgument( - killTaskSlotRatio >= 0 && killTaskSlotRatio <= 1, - "killTaskSlotRatio must be >= 0 and <= 1" - ); + if (null != killTaskSlotRatio && (killTaskSlotRatio < 0 || killTaskSlotRatio > 1)) { + throw InvalidInput.exception("killTaskSlotRatio [%.2f] is invalid. It must be >= 0 and <= 1.", killTaskSlotRatio) } this.killTaskSlotRatio = killTaskSlotRatio != null ? killTaskSlotRatio : DEFAULT_KILL_TASK_SLOT_RATIO; this.dataSourcesToNotKillStalePendingSegmentsIn diff --git a/server/src/main/java/org/apache/druid/server/coordinator/duty/KillUnusedSegments.java b/server/src/main/java/org/apache/druid/server/coordinator/duty/KillUnusedSegments.java index 078ea9600867..0746fd46943b 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/duty/KillUnusedSegments.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/duty/KillUnusedSegments.java @@ -164,13 +164,13 @@ private void killUnusedSegments(Collection dataSourcesToKill, @Nullable } if (submittedTasks >= availableKillTaskSlots) { - log.info("Reached kill task slot limit with pending unused segments to kill. Will resume " + log.info("Submitted [%d] kill tasks and reached kill task slot limit [%d]. Will resume " + "on the next coordinator cycle."); break; } } - log.debug("Submitted kill tasks for [%d] datasources.", submittedTasks); + log.debug("Submitted [%d] kill tasks for [%d] datasources.", submittedTasks, dataSourcesToKill.size()); } /** From 3cf2bd8e2a41ef942e83f217f747137493f4ca6d Mon Sep 17 00:00:00 2001 From: zachjsh Date: Thu, 27 Jul 2023 15:42:25 -0700 Subject: [PATCH 17/23] * address review comments * fix compilation / checkstyle issues brought on by review suggestion commits --- docs/configuration/index.md | 2 +- .../druid/indexing/common/task/KillUnusedSegmentsTask.java | 7 +++++-- .../druid/server/coordinator/CoordinatorDynamicConfig.java | 3 ++- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/docs/configuration/index.md b/docs/configuration/index.md index 6fcace0650ae..d4ed2081e120 100644 --- a/docs/configuration/index.md +++ b/docs/configuration/index.md @@ -956,7 +956,7 @@ Issuing a GET request at the same URL will return the spec that is currently in |`replicationThrottleLimit`|The maximum number of segment replicas that can be assigned to a historical tier in a single Coordinator run. This property prevents historicals from becoming overwhelmed when loading extra replicas of segments that are already available in the cluster.|500| |`balancerComputeThreads`|Thread pool size for computing moving cost of segments during segment balancing. Consider increasing this if you have a lot of segments and moving segments begins to stall.|1| |`killDataSourceWhitelist`|List of specific data sources for which kill tasks are sent if property `druid.coordinator.kill.on` is true. This can be a list of comma-separated data source names or a JSON array.|none| -|`killTaskSlotRatio`| Ratio of total available task slots, including autoscaling if applicable that will be allowed for kill tasks. This limit only applies for kill tasks that are spawned automatically by the coordinator's auto kill duty, which is enabled when `druid.coordinator.kill.on` is true.| none - no limit | +|`killTaskSlotRatio`| Ratio of total available task slots, including autoscaling if applicable that will be allowed for kill tasks. This limit only applies for kill tasks that are spawned automatically by the coordinator's auto kill duty, which is enabled when `druid.coordinator.kill.on` is true.| 1 - all task slots can be used | |`killPendingSegmentsSkipList`|List of data sources for which pendingSegments are _NOT_ cleaned up if property `druid.coordinator.kill.pendingSegments.on` is true. This can be a list of comma-separated data sources or a JSON array.|none| |`maxSegmentsInNodeLoadingQueue`|The maximum number of segments allowed in the load queue of any given server. Use this parameter to load segments faster if, for example, the cluster contains slow-loading nodes or if there are too many segments to be replicated to a particular node (when faster loading is preferred to better segments distribution). The optimal value depends on the loading speed of segments, acceptable replication time and number of nodes. |500| |`useRoundRobinSegmentAssignment`|Boolean flag for whether segments should be assigned to historicals in a round robin fashion. When disabled, segment assignment is done using the chosen balancer strategy. When enabled, this can speed up segment assignments leaving balancing to move the segments to their optimal locations (based on the balancer strategy) lazily. |true| diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java index 9ba36411e39f..a46d5c129535 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java @@ -23,9 +23,9 @@ import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonInclude; import com.fasterxml.jackson.annotation.JsonProperty; -import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableSet; import org.apache.druid.client.indexing.ClientKillUnusedSegmentsTaskQuery; +import org.apache.druid.error.InvalidInput; import org.apache.druid.indexer.TaskStatus; import org.apache.druid.indexing.common.TaskLock; import org.apache.druid.indexing.common.TaskToolbox; @@ -84,7 +84,10 @@ public KillUnusedSegmentsTask( context ); if (null != maxSegmentsToKill && maxSegmentsToKill <= 0) { - throw InvalidInput.exception("maxSegmentsToKill [%d] is invalid. It must be a positive integer.", maxSegmentsToKill); + throw InvalidInput.exception( + "maxSegmentsToKill [%d] is invalid. It must be a positive integer.", + maxSegmentsToKill + ); } this.markAsUnused = markAsUnused != null && markAsUnused; this.maxSegmentsToKill = maxSegmentsToKill; diff --git a/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorDynamicConfig.java b/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorDynamicConfig.java index 00c438202406..2f6823ee373d 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorDynamicConfig.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorDynamicConfig.java @@ -25,6 +25,7 @@ import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableSet; import org.apache.druid.common.config.JacksonConfigManager; +import org.apache.druid.error.InvalidInput; import org.apache.druid.java.util.common.logger.Logger; import org.apache.druid.server.coordinator.duty.KillUnusedSegments; import org.apache.druid.server.coordinator.loading.LoadQueuePeon; @@ -164,7 +165,7 @@ public CoordinatorDynamicConfig( = parseJsonStringOrArray(specificDataSourcesToKillUnusedSegmentsIn); if (null != killTaskSlotRatio && (killTaskSlotRatio < 0 || killTaskSlotRatio > 1)) { - throw InvalidInput.exception("killTaskSlotRatio [%.2f] is invalid. It must be >= 0 and <= 1.", killTaskSlotRatio) + throw InvalidInput.exception("killTaskSlotRatio [%.2f] is invalid. It must be >= 0 and <= 1.", killTaskSlotRatio); } this.killTaskSlotRatio = killTaskSlotRatio != null ? killTaskSlotRatio : DEFAULT_KILL_TASK_SLOT_RATIO; this.dataSourcesToNotKillStalePendingSegmentsIn From d853a031b2d79d09fda0c5c3bc0d46d110b032d2 Mon Sep 17 00:00:00 2001 From: zachjsh Date: Thu, 27 Jul 2023 22:50:59 -0700 Subject: [PATCH 18/23] * propogate maxSegments config when retreiving unused segments --- .../actions/RetrieveUnusedSegmentsAction.java | 19 +++++++++++++++++-- .../indexing/common/task/ArchiveTask.java | 2 +- .../common/task/KillUnusedSegmentsTask.java | 2 +- .../druid/indexing/common/task/MoveTask.java | 2 +- .../indexing/common/task/RestoreTask.java | 2 +- .../actions/RetrieveSegmentsActionsTest.java | 2 +- ...TestIndexerMetadataStorageCoordinator.java | 12 ++++++++++++ .../IndexerMetadataStorageCoordinator.java | 19 ++++++++++++++++--- .../IndexerSQLMetadataStorageCoordinator.java | 12 +++++++++++- .../metadata/SqlSegmentsMetadataManager.java | 2 +- .../metadata/SqlSegmentsMetadataQuery.java | 19 +++++++++++++------ .../coordinator/duty/KillUnusedSegments.java | 6 ++++-- ...exerSQLMetadataStorageCoordinatorTest.java | 16 ++++++++++++++++ 13 files changed, 95 insertions(+), 20 deletions(-) diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/RetrieveUnusedSegmentsAction.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/RetrieveUnusedSegmentsAction.java index f114ec456465..1b7b98d283a0 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/RetrieveUnusedSegmentsAction.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/RetrieveUnusedSegmentsAction.java @@ -27,6 +27,8 @@ import org.apache.druid.timeline.DataSegment; import org.joda.time.Interval; +import javax.annotation.Nullable; + import java.util.List; public class RetrieveUnusedSegmentsAction implements TaskAction> @@ -37,14 +39,19 @@ public class RetrieveUnusedSegmentsAction implements TaskAction> getReturnTypeReference() { @@ -68,7 +82,8 @@ public TypeReference> getReturnTypeReference() @Override public List perform(Task task, TaskActionToolbox toolbox) { - return toolbox.getIndexerMetadataStorageCoordinator().retrieveUnusedSegmentsForInterval(dataSource, interval); + return toolbox.getIndexerMetadataStorageCoordinator() + .retrieveUnusedSegmentsForInterval(dataSource, interval, maxSegments); } @Override diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/ArchiveTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/ArchiveTask.java index 904181a2431c..42d04316a41f 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/ArchiveTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/ArchiveTask.java @@ -79,7 +79,7 @@ public TaskStatus runTask(TaskToolbox toolbox) throws Exception // List unused segments final List unusedSegments = toolbox .getTaskActionClient() - .submit(new RetrieveUnusedSegmentsAction(myLock.getDataSource(), myLock.getInterval())); + .submit(new RetrieveUnusedSegmentsAction(myLock.getDataSource(), myLock.getInterval(), null)); // Verify none of these segments have versions > lock version for (final DataSegment unusedSegment : unusedSegments) { diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java index a46d5c129535..dbe89be2a61c 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java @@ -137,7 +137,7 @@ public TaskStatus runTask(TaskToolbox toolbox) throws Exception // List unused segments List unusedSegments = toolbox .getTaskActionClient() - .submit(new RetrieveUnusedSegmentsAction(getDataSource(), getInterval())); + .submit(new RetrieveUnusedSegmentsAction(getDataSource(), getInterval(), maxSegmentsToKill)); if (!TaskLocks.isLockCoversSegments(taskLockMap, unusedSegments)) { throw new ISE( diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/MoveTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/MoveTask.java index 3e8b792eb885..d23b3820db74 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/MoveTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/MoveTask.java @@ -87,7 +87,7 @@ public TaskStatus runTask(TaskToolbox toolbox) throws Exception // List unused segments final List unusedSegments = toolbox .getTaskActionClient() - .submit(new RetrieveUnusedSegmentsAction(myLock.getDataSource(), myLock.getInterval())); + .submit(new RetrieveUnusedSegmentsAction(myLock.getDataSource(), myLock.getInterval(), null)); // Verify none of these segments have versions > lock version for (final DataSegment unusedSegment : unusedSegments) { diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/RestoreTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/RestoreTask.java index 5e91ad3375aa..1364bcb597fe 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/RestoreTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/RestoreTask.java @@ -80,7 +80,7 @@ public TaskStatus runTask(TaskToolbox toolbox) throws Exception // List unused segments final List unusedSegments = toolbox .getTaskActionClient() - .submit(new RetrieveUnusedSegmentsAction(myLock.getDataSource(), myLock.getInterval())); + .submit(new RetrieveUnusedSegmentsAction(myLock.getDataSource(), myLock.getInterval(), null)); // Verify none of these segments have versions > lock version for (final DataSegment unusedSegment : unusedSegments) { diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/RetrieveSegmentsActionsTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/RetrieveSegmentsActionsTest.java index 6149208fc391..2d360dfeb521 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/RetrieveSegmentsActionsTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/RetrieveSegmentsActionsTest.java @@ -104,7 +104,7 @@ public void testRetrieveUsedSegmentsAction() @Test public void testRetrieveUnusedSegmentsAction() { - final RetrieveUnusedSegmentsAction action = new RetrieveUnusedSegmentsAction(task.getDataSource(), INTERVAL); + final RetrieveUnusedSegmentsAction action = new RetrieveUnusedSegmentsAction(task.getDataSource(), INTERVAL, null); final Set resultSegments = new HashSet<>(action.perform(task, actionTestKit.getTaskActionToolbox())); Assert.assertEquals(expectedUnusedSegments, resultSegments); } diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/test/TestIndexerMetadataStorageCoordinator.java b/indexing-service/src/test/java/org/apache/druid/indexing/test/TestIndexerMetadataStorageCoordinator.java index d64bd1d22263..ef0b99c21dc5 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/test/TestIndexerMetadataStorageCoordinator.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/test/TestIndexerMetadataStorageCoordinator.java @@ -109,6 +109,18 @@ public List retrieveUnusedSegmentsForInterval(String dataSource, In } } + @Override + public List retrieveUnusedSegmentsForInterval( + String dataSource, + Interval interval, + @Nullable Integer maxSegments + ) + { + synchronized (unusedSegments) { + return unusedSegments.subList(0, maxSegments); + } + } + @Override public int markSegmentsAsUnusedWithinInterval(String dataSource, Interval interval) { diff --git a/server/src/main/java/org/apache/druid/indexing/overlord/IndexerMetadataStorageCoordinator.java b/server/src/main/java/org/apache/druid/indexing/overlord/IndexerMetadataStorageCoordinator.java index b3c70f0cdbe9..e7892d69269d 100644 --- a/server/src/main/java/org/apache/druid/indexing/overlord/IndexerMetadataStorageCoordinator.java +++ b/server/src/main/java/org/apache/druid/indexing/overlord/IndexerMetadataStorageCoordinator.java @@ -123,17 +123,30 @@ Collection retrieveUsedSegmentsForIntervals( Segments visibility ); + /** + * see {@link #retrieveUnusedSegmentsForInterval(String, Interval, Integer)} + */ + List retrieveUnusedSegmentsForInterval(String dataSource, Interval interval); + /** * Retrieve all published segments which include ONLY data within the given interval and are marked as unused from the * metadata store. * - * @param dataSource The data source the segments belong to - * @param interval Filter the data segments to ones that include data in this interval exclusively. + * @param dataSource The data source the segments belong to + * @param interval Filter the data segments to ones that include data in this interval exclusively. + * @param maxSegments The maximum number of unused segments to retreive. If null, no limit is applied. * * @return DataSegments which include ONLY data within the requested interval and are marked as unused. Segments NOT * returned here may include data in the interval */ - List retrieveUnusedSegmentsForInterval(String dataSource, Interval interval); + default List retrieveUnusedSegmentsForInterval( + String dataSource, + Interval interval, + @Nullable Integer maxSegments + ) + { + return retrieveUnusedSegmentsForInterval(dataSource, interval); + } /** * Mark as unused segments which include ONLY data within the given interval. diff --git a/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java b/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java index 107dc4b9f97a..24fd064c5c00 100644 --- a/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java +++ b/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java @@ -190,12 +190,22 @@ public List> retrieveUsedSegmentsAndCreatedDates(Strin @Override public List retrieveUnusedSegmentsForInterval(final String dataSource, final Interval interval) + { + return retrieveUnusedSegmentsForInterval(dataSource, interval, null); + } + + @Override + public List retrieveUnusedSegmentsForInterval( + String dataSource, + Interval interval, + @Nullable Integer maxSegments + ) { final List matchingSegments = connector.inReadOnlyTransaction( (handle, status) -> { try (final CloseableIterator iterator = SqlSegmentsMetadataQuery.forHandle(handle, connector, dbTables, jsonMapper) - .retrieveUnusedSegments(dataSource, Collections.singletonList(interval))) { + .retrieveUnusedSegments(dataSource, Collections.singletonList(interval), maxSegments)) { return ImmutableList.copyOf(iterator); } } diff --git a/server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataManager.java b/server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataManager.java index d98c5d4d0861..46db26a56d6c 100644 --- a/server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataManager.java +++ b/server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataManager.java @@ -574,7 +574,7 @@ private int doMarkAsUsedNonOvershadowedSegments(String dataSourceName, @Nullable } try (final CloseableIterator iterator = - queryTool.retrieveUnusedSegments(dataSourceName, intervals)) { + queryTool.retrieveUnusedSegments(dataSourceName, intervals, null)) { while (iterator.hasNext()) { final DataSegment dataSegment = iterator.next(); timeline.addSegments(Iterators.singletonIterator(dataSegment)); diff --git a/server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataQuery.java b/server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataQuery.java index 13460e2695d0..49b100c3608c 100644 --- a/server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataQuery.java +++ b/server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataQuery.java @@ -37,6 +37,8 @@ import org.skife.jdbi.v2.Query; import org.skife.jdbi.v2.ResultIterator; +import javax.annotation.Nullable; + import java.util.Collection; import java.util.Collections; import java.util.Iterator; @@ -104,7 +106,7 @@ public CloseableIterator retrieveUsedSegments( final Collection intervals ) { - return retrieveSegments(dataSource, intervals, IntervalMode.OVERLAPS, true); + return retrieveSegments(dataSource, intervals, IntervalMode.OVERLAPS, true, null); } /** @@ -118,10 +120,11 @@ public CloseableIterator retrieveUsedSegments( */ public CloseableIterator retrieveUnusedSegments( final String dataSource, - final Collection intervals + final Collection intervals, + @Nullable final Integer maxSegments ) { - return retrieveSegments(dataSource, intervals, IntervalMode.CONTAINS, false); + return retrieveSegments(dataSource, intervals, IntervalMode.CONTAINS, false, maxSegments); } /** @@ -201,7 +204,7 @@ public int markSegmentsUnused(final String dataSource, final Interval interval) // Retrieve, then drop, since we can't write a WHERE clause directly. final List segments = ImmutableList.copyOf( Iterators.transform( - retrieveSegments(dataSource, Collections.singletonList(interval), IntervalMode.CONTAINS, true), + retrieveSegments(dataSource, Collections.singletonList(interval), IntervalMode.CONTAINS, true, null), DataSegment::getId ) ); @@ -213,7 +216,8 @@ private CloseableIterator retrieveSegments( final String dataSource, final Collection intervals, final IntervalMode matchMode, - final boolean used + final boolean used, + @Nullable final Integer maxSegments ) { // Check if the intervals all support comparing as strings. If so, bake them into the SQL. @@ -254,11 +258,14 @@ private CloseableIterator retrieveSegments( sb.append(")"); } - final Query> sql = handle + Query> sql = handle .createQuery(StringUtils.format(sb.toString(), dbTables.getSegmentsTable())) .setFetchSize(connector.getStreamingFetchSize()) .bind("used", used) .bind("dataSource", dataSource); + if (null != maxSegments) { + sql = sql.setMaxRows(maxSegments); + } if (compareAsString) { final Iterator iterator = intervals.iterator(); diff --git a/server/src/main/java/org/apache/druid/server/coordinator/duty/KillUnusedSegments.java b/server/src/main/java/org/apache/druid/server/coordinator/duty/KillUnusedSegments.java index 0746fd46943b..fea506e6899f 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/duty/KillUnusedSegments.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/duty/KillUnusedSegments.java @@ -26,6 +26,7 @@ import org.apache.druid.indexer.TaskStatusPlus; import org.apache.druid.java.util.common.DateTimes; import org.apache.druid.java.util.common.JodaUtils; +import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.java.util.common.io.Closer; import org.apache.druid.java.util.common.logger.Logger; import org.apache.druid.java.util.common.parsers.CloseableIterator; @@ -164,8 +165,9 @@ private void killUnusedSegments(Collection dataSourcesToKill, @Nullable } if (submittedTasks >= availableKillTaskSlots) { - log.info("Submitted [%d] kill tasks and reached kill task slot limit [%d]. Will resume " - + "on the next coordinator cycle."); + log.info(StringUtils.format( + "Submitted [%d] kill tasks and reached kill task slot limit [%d]. Will resume " + + "on the next coordinator cycle.", submittedTasks, availableKillTaskSlots)); break; } } diff --git a/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java b/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java index 74e06bfb6645..444e159411e3 100644 --- a/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java +++ b/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java @@ -932,6 +932,22 @@ public void testSimpleUnusedList() throws IOException ); } + @Test + public void testSimpleUnusedListWithLimit() throws IOException + { + coordinator.announceHistoricalSegments(SEGMENTS); + markAllSegmentsUnused(); + int limit = SEGMENTS.size() - 1; + Set retreivedUnusedSegments = ImmutableSet.copyOf( + coordinator.retrieveUnusedSegmentsForInterval( + defaultSegment.getDataSource(), + defaultSegment.getInterval(), + limit + ) + ); + Assert.assertEquals(limit, retreivedUnusedSegments.size()); + Assert.assertTrue(SEGMENTS.containsAll(retreivedUnusedSegments)); + } @Test public void testUsedOverlapLow() throws IOException From 2eccb3302d7c8d6aafc113955dc1334d8cf2894f Mon Sep 17 00:00:00 2001 From: zachjsh Date: Wed, 2 Aug 2023 14:18:52 -0400 Subject: [PATCH 19/23] * revert back to existing kill task with limit --- .../common/task/KillUnusedSegmentsTask.java | 30 +++++++++++-------- ...tKillUnusedSegmentsTaskQuerySerdeTest.java | 4 +-- .../ClientKillUnusedSegmentsTaskQuery.java | 20 ++++++------- 3 files changed, 30 insertions(+), 24 deletions(-) diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java index dbe89be2a61c..77d812a88d95 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java @@ -65,7 +65,7 @@ public class KillUnusedSegmentsTask extends AbstractFixedIntervalTask private static final Logger LOG = new Logger(KillUnusedSegmentsTask.class); private final boolean markAsUnused; - @Nullable private final Integer maxSegmentsToKill; + @Nullable private final Integer limit; @JsonCreator public KillUnusedSegmentsTask( @@ -73,8 +73,8 @@ public KillUnusedSegmentsTask( @JsonProperty("dataSource") String dataSource, @JsonProperty("interval") Interval interval, @JsonProperty("context") Map context, - @JsonProperty("markAsUnused") Boolean markAsUnused, - @JsonProperty("maxSegmentsToKill") @Nullable Integer maxSegmentsToKill + @JsonProperty("markAsUnused") @Deprecated Boolean markAsUnused, + @JsonProperty("limit") @Nullable Integer limit ) { super( @@ -83,14 +83,20 @@ public KillUnusedSegmentsTask( interval, context ); - if (null != maxSegmentsToKill && maxSegmentsToKill <= 0) { + if (null != limit && limit <= 0) { throw InvalidInput.exception( - "maxSegmentsToKill [%d] is invalid. It must be a positive integer.", - maxSegmentsToKill + "limit [%d] is invalid. It must be a positive integer.", + limit + ); + } + if (limit != null && markAsUnused != null && markAsUnused) { + throw InvalidInput.exception( + "limit cannot be provided with markAsUnused.", + limit ); } this.markAsUnused = markAsUnused != null && markAsUnused; - this.maxSegmentsToKill = maxSegmentsToKill; + this.limit = limit; } @@ -103,9 +109,9 @@ public boolean isMarkAsUnused() @Nullable @JsonProperty - public Integer getMaxSegmentsToKill() + public Integer getLimit() { - return maxSegmentsToKill; + return limit; } @Override @@ -137,7 +143,7 @@ public TaskStatus runTask(TaskToolbox toolbox) throws Exception // List unused segments List unusedSegments = toolbox .getTaskActionClient() - .submit(new RetrieveUnusedSegmentsAction(getDataSource(), getInterval(), maxSegmentsToKill)); + .submit(new RetrieveUnusedSegmentsAction(getDataSource(), getInterval(), limit)); if (!TaskLocks.isLockCoversSegments(taskLockMap, unusedSegments)) { throw new ISE( @@ -149,9 +155,9 @@ public TaskStatus runTask(TaskToolbox toolbox) throws Exception } // Kill segments - unusedSegments = maxSegmentsToKill == null + unusedSegments = limit == null ? unusedSegments - : unusedSegments.subList(0, Math.min(maxSegmentsToKill, unusedSegments.size())); + : unusedSegments.subList(0, Math.min(limit, unusedSegments.size())); toolbox.getTaskActionClient().submit(new SegmentNukeAction(new HashSet<>(unusedSegments))); for (DataSegment segment : unusedSegments) { toolbox.getDataSegmentKiller().kill(segment); diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/ClientKillUnusedSegmentsTaskQuerySerdeTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/ClientKillUnusedSegmentsTaskQuerySerdeTest.java index e917a48ae391..59b07fc9e0df 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/ClientKillUnusedSegmentsTaskQuerySerdeTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/ClientKillUnusedSegmentsTaskQuerySerdeTest.java @@ -60,7 +60,7 @@ public void testClientKillUnusedSegmentsTaskQueryToKillUnusedSegmentsTask() thro Assert.assertEquals(taskQuery.getDataSource(), fromJson.getDataSource()); Assert.assertEquals(taskQuery.getInterval(), fromJson.getInterval()); Assert.assertEquals(taskQuery.getMarkAsUnused(), fromJson.isMarkAsUnused()); - Assert.assertEquals(taskQuery.getMaxSegmentsToKill(), fromJson.getMaxSegmentsToKill()); + Assert.assertEquals(taskQuery.getLimit(), fromJson.getLimit()); } @Test @@ -83,6 +83,6 @@ public void testKillUnusedSegmentsTaskToClientKillUnusedSegmentsTaskQuery() thro Assert.assertEquals(task.getDataSource(), taskQuery.getDataSource()); Assert.assertEquals(task.getInterval(), taskQuery.getInterval()); Assert.assertEquals(task.isMarkAsUnused(), taskQuery.getMarkAsUnused()); - Assert.assertNull(task.getMaxSegmentsToKill()); + Assert.assertNull(task.getLimit()); } } diff --git a/server/src/main/java/org/apache/druid/client/indexing/ClientKillUnusedSegmentsTaskQuery.java b/server/src/main/java/org/apache/druid/client/indexing/ClientKillUnusedSegmentsTaskQuery.java index a805018f9057..5ea9f10c23d4 100644 --- a/server/src/main/java/org/apache/druid/client/indexing/ClientKillUnusedSegmentsTaskQuery.java +++ b/server/src/main/java/org/apache/druid/client/indexing/ClientKillUnusedSegmentsTaskQuery.java @@ -41,25 +41,25 @@ public class ClientKillUnusedSegmentsTaskQuery implements ClientTaskQuery private final String dataSource; private final Interval interval; private final Boolean markAsUnused; - @Nullable private final Integer maxSegmentsToKill; + @Nullable private final Integer limit; @JsonCreator public ClientKillUnusedSegmentsTaskQuery( @JsonProperty("id") String id, @JsonProperty("dataSource") String dataSource, @JsonProperty("interval") Interval interval, - @JsonProperty("markAsUnused") Boolean markAsUnused, - @JsonProperty("maxSegmentsToKill") Integer maxSegmentsToKill + @JsonProperty("markAsUnused") @Deprecated Boolean markAsUnused, + @JsonProperty("limit") Integer limit ) { this.id = Preconditions.checkNotNull(id, "id"); this.dataSource = dataSource; this.interval = interval; this.markAsUnused = markAsUnused; - if (null != maxSegmentsToKill) { - Preconditions.checkArgument(maxSegmentsToKill > 0, "maxSegmentsToKill must be > 0"); + if (null != limit) { + Preconditions.checkArgument(limit > 0, "limit must be > 0"); } - this.maxSegmentsToKill = maxSegmentsToKill; + this.limit = limit; } @JsonProperty @@ -97,9 +97,9 @@ public Boolean getMarkAsUnused() @JsonProperty @Nullable - public Integer getMaxSegmentsToKill() + public Integer getLimit() { - return maxSegmentsToKill; + return limit; } @@ -117,12 +117,12 @@ public boolean equals(Object o) && Objects.equals(dataSource, that.dataSource) && Objects.equals(interval, that.interval) && Objects.equals(markAsUnused, that.markAsUnused) - && Objects.equals(maxSegmentsToKill, that.maxSegmentsToKill); + && Objects.equals(limit, that.limit); } @Override public int hashCode() { - return Objects.hash(id, dataSource, interval, markAsUnused, maxSegmentsToKill); + return Objects.hash(id, dataSource, interval, markAsUnused, limit); } } From 66153db62eb664f5699f13a48d399730062d10d5 Mon Sep 17 00:00:00 2001 From: zachjsh Date: Wed, 2 Aug 2023 17:45:06 -0400 Subject: [PATCH 20/23] * revert killTaskSlotRatio --- docs/configuration/index.md | 2 - docs/data-management/delete.md | 5 +- .../coordinator/CoordinatorDynamicConfig.java | 30 ------ .../coordinator/duty/KillUnusedSegments.java | 100 +----------------- .../duty/KillUnusedSegmentsTest.java | 87 +-------------- .../http/CoordinatorDynamicConfigTest.java | 53 ---------- 6 files changed, 8 insertions(+), 269 deletions(-) diff --git a/docs/configuration/index.md b/docs/configuration/index.md index d4ed2081e120..4690af390b66 100644 --- a/docs/configuration/index.md +++ b/docs/configuration/index.md @@ -934,7 +934,6 @@ A sample Coordinator dynamic config JSON object is shown below: "replicantLifetime": 15, "replicationThrottleLimit": 10, "killDataSourceWhitelist": ["wikipedia", "testDatasource"], - "killTaskSlotRatio": 0.10, "decommissioningNodes": ["localhost:8182", "localhost:8282"], "decommissioningMaxPercentOfMaxSegmentsToMove": 70, "pauseCoordination": false, @@ -956,7 +955,6 @@ Issuing a GET request at the same URL will return the spec that is currently in |`replicationThrottleLimit`|The maximum number of segment replicas that can be assigned to a historical tier in a single Coordinator run. This property prevents historicals from becoming overwhelmed when loading extra replicas of segments that are already available in the cluster.|500| |`balancerComputeThreads`|Thread pool size for computing moving cost of segments during segment balancing. Consider increasing this if you have a lot of segments and moving segments begins to stall.|1| |`killDataSourceWhitelist`|List of specific data sources for which kill tasks are sent if property `druid.coordinator.kill.on` is true. This can be a list of comma-separated data source names or a JSON array.|none| -|`killTaskSlotRatio`| Ratio of total available task slots, including autoscaling if applicable that will be allowed for kill tasks. This limit only applies for kill tasks that are spawned automatically by the coordinator's auto kill duty, which is enabled when `druid.coordinator.kill.on` is true.| 1 - all task slots can be used | |`killPendingSegmentsSkipList`|List of data sources for which pendingSegments are _NOT_ cleaned up if property `druid.coordinator.kill.pendingSegments.on` is true. This can be a list of comma-separated data sources or a JSON array.|none| |`maxSegmentsInNodeLoadingQueue`|The maximum number of segments allowed in the load queue of any given server. Use this parameter to load segments faster if, for example, the cluster contains slow-loading nodes or if there are too many segments to be replicated to a particular node (when faster loading is preferred to better segments distribution). The optimal value depends on the loading speed of segments, acceptable replication time and number of nodes. |500| |`useRoundRobinSegmentAssignment`|Boolean flag for whether segments should be assigned to historicals in a round robin fashion. When disabled, segment assignment is done using the chosen balancer strategy. When enabled, this can speed up segment assignments leaving balancing to move the segments to their optimal locations (based on the balancer strategy) lazily. |true| diff --git a/docs/data-management/delete.md b/docs/data-management/delete.md index c30cc2a42bd6..720f7d10440e 100644 --- a/docs/data-management/delete.md +++ b/docs/data-management/delete.md @@ -104,6 +104,5 @@ The available grammar is: deep storage. This operation cannot be undone. Note: If `maxSegmentsToKill` is not specified, all matched segments are deleted. If `maxSegmentsToKill` is less than -the number of matching segments found, then only that number of matching segments will be deleted, but all matching -segments will still be marked unused, if specified to be. In this case, any remaining unused segments can be deleted -with a subsequent kill task issued, or via [Automated unused segment deletion](../operations/clean-metadata-store.md#segment-records-and-segments-in-deep-storage-kill-task) +the number of matching segments found, then only that number of matching segments will be deleted. `maxSegmentsToKill` +cannot be used when `markAsUnused` is set to true, or a validation error will occur. \ No newline at end of file diff --git a/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorDynamicConfig.java b/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorDynamicConfig.java index 2f6823ee373d..9a3802940320 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorDynamicConfig.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorDynamicConfig.java @@ -25,7 +25,6 @@ import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableSet; import org.apache.druid.common.config.JacksonConfigManager; -import org.apache.druid.error.InvalidInput; import org.apache.druid.java.util.common.logger.Logger; import org.apache.druid.server.coordinator.duty.KillUnusedSegments; import org.apache.druid.server.coordinator.loading.LoadQueuePeon; @@ -70,9 +69,6 @@ public class CoordinatorDynamicConfig * List of specific data sources for which kill tasks are sent in {@link KillUnusedSegments}. */ private final Set specificDataSourcesToKillUnusedSegmentsIn; - - private static final double DEFAULT_KILL_TASK_SLOT_RATIO = 1.0; - @Nullable private final Double killTaskSlotRatio; private final Set decommissioningNodes; private final Map debugDimensions; @@ -134,7 +130,6 @@ public CoordinatorDynamicConfig( // Keeping the legacy 'killDataSourceWhitelist' property name for backward compatibility. When the project is // updated to Jackson 2.9 it could be changed, see https://github.com/apache/druid/issues/7152 @JsonProperty("killDataSourceWhitelist") Object specificDataSourcesToKillUnusedSegmentsIn, - @JsonProperty("killTaskSlotRatio") @Nullable Double killTaskSlotRatio, // Type is Object here so that we can support both string and list as Coordinator console can not send array of // strings in the update request, as well as for specificDataSourcesToKillUnusedSegmentsIn. // Keeping the legacy 'killPendingSegmentsSkipList' property name for backward compatibility. When the project is @@ -163,11 +158,6 @@ public CoordinatorDynamicConfig( this.balancerComputeThreads = Math.max(balancerComputeThreads, 1); this.specificDataSourcesToKillUnusedSegmentsIn = parseJsonStringOrArray(specificDataSourcesToKillUnusedSegmentsIn); - - if (null != killTaskSlotRatio && (killTaskSlotRatio < 0 || killTaskSlotRatio > 1)) { - throw InvalidInput.exception("killTaskSlotRatio [%.2f] is invalid. It must be >= 0 and <= 1.", killTaskSlotRatio); - } - this.killTaskSlotRatio = killTaskSlotRatio != null ? killTaskSlotRatio : DEFAULT_KILL_TASK_SLOT_RATIO; this.dataSourcesToNotKillStalePendingSegmentsIn = parseJsonStringOrArray(dataSourcesToNotKillStalePendingSegmentsIn); this.maxSegmentsInNodeLoadingQueue = Builder.valueOrDefault( @@ -307,12 +297,6 @@ public Set getSpecificDataSourcesToKillUnusedSegmentsIn() return specificDataSourcesToKillUnusedSegmentsIn; } - @JsonProperty("killTaskSlotRatio") - public Double getKillTaskSlotRatio() - { - return killTaskSlotRatio; - } - @JsonIgnore public boolean isKillUnusedSegmentsInAllDataSources() { @@ -422,7 +406,6 @@ public String toString() ", replicationThrottleLimit=" + replicationThrottleLimit + ", balancerComputeThreads=" + balancerComputeThreads + ", specificDataSourcesToKillUnusedSegmentsIn=" + specificDataSourcesToKillUnusedSegmentsIn + - ", killTaskSlotRatio=" + killTaskSlotRatio + ", dataSourcesToNotKillStalePendingSegmentsIn=" + dataSourcesToNotKillStalePendingSegmentsIn + ", maxSegmentsInNodeLoadingQueue=" + maxSegmentsInNodeLoadingQueue + ", decommissioningNodes=" + decommissioningNodes + @@ -461,7 +444,6 @@ public boolean equals(Object o) && Objects.equals( specificDataSourcesToKillUnusedSegmentsIn, that.specificDataSourcesToKillUnusedSegmentsIn) - && Objects.equals(killTaskSlotRatio, that.killTaskSlotRatio) && Objects.equals( dataSourcesToNotKillStalePendingSegmentsIn, that.dataSourcesToNotKillStalePendingSegmentsIn) @@ -482,7 +464,6 @@ public int hashCode() balancerComputeThreads, maxSegmentsInNodeLoadingQueue, specificDataSourcesToKillUnusedSegmentsIn, - killTaskSlotRatio, dataSourcesToNotKillStalePendingSegmentsIn, decommissioningNodes, decommissioningMaxPercentOfMaxSegmentsToMove, @@ -526,7 +507,6 @@ public static class Builder private Integer replicationThrottleLimit; private Integer balancerComputeThreads; private Object specificDataSourcesToKillUnusedSegmentsIn; - private @Nullable Double killTaskSlotRatio; private Object dataSourcesToNotKillStalePendingSegmentsIn; private Integer maxSegmentsInNodeLoadingQueue; private Object decommissioningNodes; @@ -552,7 +532,6 @@ public Builder( @JsonProperty("replicationThrottleLimit") @Nullable Integer replicationThrottleLimit, @JsonProperty("balancerComputeThreads") @Nullable Integer balancerComputeThreads, @JsonProperty("killDataSourceWhitelist") @Nullable Object specificDataSourcesToKillUnusedSegmentsIn, - @JsonProperty("killTaskSlotRatio") @Nullable Double killTaskSlotRatio, @JsonProperty("killPendingSegmentsSkipList") @Nullable Object dataSourcesToNotKillStalePendingSegmentsIn, @JsonProperty("maxSegmentsInNodeLoadingQueue") @Nullable Integer maxSegmentsInNodeLoadingQueue, @JsonProperty("decommissioningNodes") @Nullable Object decommissioningNodes, @@ -574,7 +553,6 @@ public Builder( this.replicationThrottleLimit = replicationThrottleLimit; this.balancerComputeThreads = balancerComputeThreads; this.specificDataSourcesToKillUnusedSegmentsIn = specificDataSourcesToKillUnusedSegmentsIn; - this.killTaskSlotRatio = killTaskSlotRatio; this.dataSourcesToNotKillStalePendingSegmentsIn = dataSourcesToNotKillStalePendingSegmentsIn; this.maxSegmentsInNodeLoadingQueue = maxSegmentsInNodeLoadingQueue; this.decommissioningNodes = decommissioningNodes; @@ -647,12 +625,6 @@ public Builder withSpecificDataSourcesToKillUnusedSegmentsIn(Set dataSou return this; } - public Builder withKillTaskSlotRatio(Double killTaskSlotRatio) - { - this.killTaskSlotRatio = killTaskSlotRatio; - return this; - } - public Builder withMaxSegmentsInNodeLoadingQueue(int maxSegmentsInNodeLoadingQueue) { this.maxSegmentsInNodeLoadingQueue = maxSegmentsInNodeLoadingQueue; @@ -713,7 +685,6 @@ public CoordinatorDynamicConfig build() valueOrDefault(replicationThrottleLimit, Defaults.REPLICATION_THROTTLE_LIMIT), valueOrDefault(balancerComputeThreads, Defaults.BALANCER_COMPUTE_THREADS), specificDataSourcesToKillUnusedSegmentsIn, - killTaskSlotRatio, dataSourcesToNotKillStalePendingSegmentsIn, valueOrDefault(maxSegmentsInNodeLoadingQueue, Defaults.MAX_SEGMENTS_IN_NODE_LOADING_QUEUE), decommissioningNodes, @@ -749,7 +720,6 @@ public CoordinatorDynamicConfig build(CoordinatorDynamicConfig defaults) valueOrDefault(replicationThrottleLimit, defaults.getReplicationThrottleLimit()), valueOrDefault(balancerComputeThreads, defaults.getBalancerComputeThreads()), valueOrDefault(specificDataSourcesToKillUnusedSegmentsIn, defaults.getSpecificDataSourcesToKillUnusedSegmentsIn()), - valueOrDefault(killTaskSlotRatio, defaults.getKillTaskSlotRatio()), valueOrDefault(dataSourcesToNotKillStalePendingSegmentsIn, defaults.getDataSourcesToNotKillStalePendingSegmentsIn()), valueOrDefault(maxSegmentsInNodeLoadingQueue, defaults.getMaxSegmentsInNodeLoadingQueue()), valueOrDefault(decommissioningNodes, defaults.getDecommissioningNodes()), diff --git a/server/src/main/java/org/apache/druid/server/coordinator/duty/KillUnusedSegments.java b/server/src/main/java/org/apache/druid/server/coordinator/duty/KillUnusedSegments.java index fea506e6899f..205947b00392 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/duty/KillUnusedSegments.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/duty/KillUnusedSegments.java @@ -21,31 +21,22 @@ import com.google.common.base.Preconditions; import com.google.inject.Inject; -import org.apache.druid.client.indexing.IndexingTotalWorkerCapacityInfo; import org.apache.druid.common.guava.FutureUtils; -import org.apache.druid.indexer.TaskStatusPlus; import org.apache.druid.java.util.common.DateTimes; import org.apache.druid.java.util.common.JodaUtils; -import org.apache.druid.java.util.common.StringUtils; -import org.apache.druid.java.util.common.io.Closer; import org.apache.druid.java.util.common.logger.Logger; -import org.apache.druid.java.util.common.parsers.CloseableIterator; import org.apache.druid.metadata.SegmentsMetadataManager; -import org.apache.druid.rpc.HttpResponseException; import org.apache.druid.rpc.indexing.OverlordClient; import org.apache.druid.server.coordinator.DruidCoordinatorConfig; import org.apache.druid.server.coordinator.DruidCoordinatorRuntimeParams; import org.apache.druid.utils.CollectionUtils; -import org.jboss.netty.handler.codec.http.HttpResponseStatus; import org.joda.time.DateTime; import org.joda.time.Interval; import javax.annotation.Nullable; -import java.io.IOException; import java.util.Collection; import java.util.List; -import java.util.concurrent.ExecutionException; /** * Completely removes information about unused segments who have an interval end that comes before @@ -58,7 +49,6 @@ */ public class KillUnusedSegments implements CoordinatorDuty { - public static final String KILL_TASK_TYPE = "kill"; private static final Logger log = new Logger(KillUnusedSegments.class); private final long period; @@ -112,7 +102,6 @@ public DruidCoordinatorRuntimeParams run(DruidCoordinatorRuntimeParams params) { Collection dataSourcesToKill = params.getCoordinatorDynamicConfig().getSpecificDataSourcesToKillUnusedSegmentsIn(); - final Double killTaskSlotRatio = params.getCoordinatorDynamicConfig().getKillTaskSlotRatio(); // If no datasource has been specified, all are eligible for killing unused segments if (CollectionUtils.isNullOrEmpty(dataSourcesToKill)) { @@ -127,20 +116,15 @@ public DruidCoordinatorRuntimeParams run(DruidCoordinatorRuntimeParams params) } else { log.debug("Killing unused segments in datasources: %s", dataSourcesToKill); lastKillTime = currentTimeMillis; - killUnusedSegments(dataSourcesToKill, killTaskSlotRatio); + killUnusedSegments(dataSourcesToKill); } return params; } - private void killUnusedSegments(Collection dataSourcesToKill, @Nullable Double killTaskSlotRatio) + private void killUnusedSegments(Collection dataSourcesToKill) { int submittedTasks = 0; - int availableKillTaskSlots = getAvailableKillTaskSlots(killTaskSlotRatio); - if (0 == availableKillTaskSlots) { - log.warn("Not killing any unused segments because there are no available kill task slots at this time."); - return; - } for (String dataSource : dataSourcesToKill) { final Interval intervalToKill = findIntervalForKill(dataSource); if (intervalToKill == null) { @@ -163,16 +147,9 @@ private void killUnusedSegments(Collection dataSourcesToKill, @Nullable break; } } - - if (submittedTasks >= availableKillTaskSlots) { - log.info(StringUtils.format( - "Submitted [%d] kill tasks and reached kill task slot limit [%d]. Will resume " - + "on the next coordinator cycle.", submittedTasks, availableKillTaskSlots)); - break; - } } - log.debug("Submitted [%d] kill tasks for [%d] datasources.", submittedTasks, dataSourcesToKill.size()); + log.debug("Submitted kill tasks for [%d] datasources.", submittedTasks); } /** @@ -197,75 +174,4 @@ private Interval findIntervalForKill(String dataSource) } } - private int getAvailableKillTaskSlots(@Nullable Double killTaskSlotRatio) - { - return Math.max(0, getKillTaskCapacity(killTaskSlotRatio) - getNumActiveKillTaskSlots()); - } - - private int getNumActiveKillTaskSlots() - { - final CloseableIterator activeTasks = - FutureUtils.getUnchecked(overlordClient.taskStatuses(null, null, 0), true); - // Fetch currently running kill tasks - int numActiveKillTasks = 0; - - try (final Closer closer = Closer.create()) { - closer.register(activeTasks); - while (activeTasks.hasNext()) { - final TaskStatusPlus status = activeTasks.next(); - - // taskType can be null if middleManagers are running with an older version. Here, we consevatively regard - // the tasks of the unknown taskType as the killTask. This is because it's important to not run - // killTasks more than the configured limit at any time which might impact to the ingestion - // performance. - if (status.getType() == null || KILL_TASK_TYPE.equals(status.getType())) { - numActiveKillTasks++; - } - } - } - catch (IOException e) { - throw new RuntimeException(e); - } - - return numActiveKillTasks; - } - - private int getKillTaskCapacity(@Nullable Double killTaskSlotRatio) - { - int totalWorkerCapacity; - try { - final IndexingTotalWorkerCapacityInfo workerCapacityInfo = - FutureUtils.get(overlordClient.getTotalWorkerCapacity(), true); - totalWorkerCapacity = workerCapacityInfo.getMaximumCapacityWithAutoScale(); - if (totalWorkerCapacity < 0) { - totalWorkerCapacity = workerCapacityInfo.getCurrentClusterCapacity(); - } - } - catch (ExecutionException e) { - // Call to getTotalWorkerCapacity may fail during a rolling upgrade: API was added in 0.23.0. - if (e.getCause() instanceof HttpResponseException - && ((HttpResponseException) e.getCause()).getResponse().getStatus().equals(HttpResponseStatus.NOT_FOUND)) { - log.noStackTrace().warn(e, "Call to getTotalWorkerCapacity failed. Falling back to getWorkers."); - totalWorkerCapacity = - FutureUtils.getUnchecked(overlordClient.getWorkers(), true) - .stream() - .mapToInt(worker -> worker.getWorker().getCapacity()) - .sum(); - } else { - throw new RuntimeException(e.getCause()); - } - } - catch (InterruptedException e) { - Thread.currentThread().interrupt(); - throw new RuntimeException(e); - } - - - return Math.max( - killTaskSlotRatio == null - ? totalWorkerCapacity - : (int) (totalWorkerCapacity * killTaskSlotRatio), - 1 - ); - } } diff --git a/server/src/test/java/org/apache/druid/server/coordinator/duty/KillUnusedSegmentsTest.java b/server/src/test/java/org/apache/druid/server/coordinator/duty/KillUnusedSegmentsTest.java index cd58e2c1941d..039174eac7e0 100644 --- a/server/src/test/java/org/apache/druid/server/coordinator/duty/KillUnusedSegmentsTest.java +++ b/server/src/test/java/org/apache/druid/server/coordinator/duty/KillUnusedSegmentsTest.java @@ -20,13 +20,6 @@ package org.apache.druid.server.coordinator.duty; import com.google.common.collect.ImmutableList; -import com.google.common.util.concurrent.Futures; -import org.apache.druid.client.indexing.IndexingTotalWorkerCapacityInfo; -import org.apache.druid.indexer.RunnerTaskState; -import org.apache.druid.indexer.TaskLocation; -import org.apache.druid.indexer.TaskState; -import org.apache.druid.indexer.TaskStatusPlus; -import org.apache.druid.java.util.common.CloseableIterators; import org.apache.druid.java.util.common.DateTimes; import org.apache.druid.metadata.SegmentsMetadataManager; import org.apache.druid.rpc.indexing.OverlordClient; @@ -48,8 +41,6 @@ import org.mockito.Mockito; import org.mockito.junit.MockitoJUnitRunner; -import javax.annotation.Nullable; - import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; @@ -152,7 +143,6 @@ public void testRunWithNoIntervalShouldNotKillAnySegments() ArgumentMatchers.anyInt() ); - mockAvailableTaskSlotsForKill(null); target.run(params); Mockito.verify(overlordClient, Mockito.never()) .runKillTask(anyString(), anyString(), any(Interval.class)); @@ -166,10 +156,9 @@ public void testRunWithSpecificDatasourceAndNoIntervalShouldNotKillAnySegments() target = new KillUnusedSegments(segmentsMetadataManager, overlordClient, config); // No unused segment is older than the retention period - mockAvailableTaskSlotsForKill(null); target.run(params); Mockito.verify(overlordClient, Mockito.never()) - .runKillTask(anyString(), anyString(), any(Interval.class), any(Integer.class)); + .runKillTask(anyString(), anyString(), any(Interval.class)); } @Test @@ -180,29 +169,9 @@ public void testDurationToRetain() yearOldSegment.getInterval().getStart(), dayOldSegment.getInterval().getEnd() ); - mockAvailableTaskSlotsForKill(null); runAndVerifyKillInterval(expectedKillInterval); } - @Test - public void testAtLeastOneTaskCapacityEvenIfKillTaskRatio0() - { - // Only segments more than a day old are killed - Interval expectedKillInterval = new Interval( - yearOldSegment.getInterval().getStart(), - dayOldSegment.getInterval().getEnd() - ); - mockAvailableTaskSlotsForKill(0.0); - runAndVerifyKillInterval(expectedKillInterval); - } - - @Test - public void testNoAvailableTaskCapacityForKill() - { - mockNoAvailableTaskSlotsForKill(); - runAndVerifyNoKill(); - } - @Test public void testNegativeDurationToRetain() { @@ -216,7 +185,6 @@ public void testNegativeDurationToRetain() yearOldSegment.getInterval().getStart(), nextDaySegment.getInterval().getEnd() ); - mockAvailableTaskSlotsForKill(null); runAndVerifyKillInterval(expectedKillInterval); } @@ -232,7 +200,6 @@ public void testIgnoreDurationToRetain() yearOldSegment.getInterval().getStart(), nextMonthSegment.getInterval().getEnd() ); - mockAvailableTaskSlotsForKill(null); runAndVerifyKillInterval(expectedKillInterval); } @@ -243,31 +210,19 @@ public void testMaxSegmentsToKill() .when(config).getCoordinatorKillMaxSegments(); target = new KillUnusedSegments(segmentsMetadataManager, overlordClient, config); - mockAvailableTaskSlotsForKill(null); // Only 1 unused segment is killed runAndVerifyKillInterval(yearOldSegment.getInterval()); } private void runAndVerifyKillInterval(Interval expectedKillInterval) { - int maxSegmentsToKill = config.getCoordinatorKillMaxSegments(); + int limit = config.getCoordinatorKillMaxSegments(); target.run(params); Mockito.verify(overlordClient, Mockito.times(1)).runKillTask( ArgumentMatchers.anyString(), ArgumentMatchers.eq("DS1"), ArgumentMatchers.eq(expectedKillInterval), - ArgumentMatchers.eq(maxSegmentsToKill) - ); - } - - private void runAndVerifyNoKill() - { - target.run(params); - Mockito.verify(overlordClient, Mockito.never()).runKillTask( - ArgumentMatchers.anyString(), - ArgumentMatchers.anyString(), - ArgumentMatchers.any(Interval.class), - ArgumentMatchers.anyInt() + ArgumentMatchers.eq(limit) ); } @@ -285,40 +240,4 @@ private DataSegment createSegmentWithEnd(DateTime endTime) 0 ); } - - private void mockAvailableTaskSlotsForKill(@Nullable Double killTaskSlotRatio) - { - Mockito.doReturn(killTaskSlotRatio) - .when(coordinatorDynamicConfig).getKillTaskSlotRatio(); - Mockito.doReturn(Futures.immediateFuture(new IndexingTotalWorkerCapacityInfo(1, 10))) - .when(overlordClient) - .getTotalWorkerCapacity(); - Mockito.doReturn(Futures.immediateFuture( - CloseableIterators.withEmptyBaggage(ImmutableList.of().iterator()))) - .when(overlordClient) - .taskStatuses(null, null, 0); - } - private void mockNoAvailableTaskSlotsForKill() - { - Mockito.doReturn(Futures.immediateFuture(new IndexingTotalWorkerCapacityInfo(1, 1))) - .when(overlordClient) - .getTotalWorkerCapacity(); - final TaskStatusPlus runningConflictCompactionTask = new TaskStatusPlus( - "taskId", - "groupId", - KillUnusedSegments.KILL_TASK_TYPE, - DateTimes.EPOCH, - DateTimes.EPOCH, - TaskState.RUNNING, - RunnerTaskState.RUNNING, - -1L, - TaskLocation.unknown(), - "datasource", - null - ); - Mockito.doReturn(Futures.immediateFuture( - CloseableIterators.withEmptyBaggage(ImmutableList.of(runningConflictCompactionTask).iterator()))) - .when(overlordClient) - .taskStatuses(null, null, 0); - } } diff --git a/server/src/test/java/org/apache/druid/server/http/CoordinatorDynamicConfigTest.java b/server/src/test/java/org/apache/druid/server/http/CoordinatorDynamicConfigTest.java index cd7bfdb1e4ff..a7744256d5d4 100644 --- a/server/src/test/java/org/apache/druid/server/http/CoordinatorDynamicConfigTest.java +++ b/server/src/test/java/org/apache/druid/server/http/CoordinatorDynamicConfigTest.java @@ -79,7 +79,6 @@ public void testSerde() throws Exception 1, 2, whitelist, - 1.0, false, 1, decommissioning, @@ -100,7 +99,6 @@ public void testSerde() throws Exception 1, 2, whitelist, - 1.0, false, 1, ImmutableSet.of("host1"), @@ -121,7 +119,6 @@ public void testSerde() throws Exception 1, 2, whitelist, - 1.0, false, 1, ImmutableSet.of("host1"), @@ -142,7 +139,6 @@ public void testSerde() throws Exception 1, 2, whitelist, - 1.0, false, 1, ImmutableSet.of("host1"), @@ -163,7 +159,6 @@ public void testSerde() throws Exception 1, 2, whitelist, - 1.0, false, 1, ImmutableSet.of("host1"), @@ -184,7 +179,6 @@ public void testSerde() throws Exception 1, 2, whitelist, - 1.0, false, 1, ImmutableSet.of("host1"), @@ -222,7 +216,6 @@ public void testConstructorWithNullsShouldKillUnusedSegmentsInAllDataSources() null, null, null, - null, ImmutableSet.of("host1"), 5, true, @@ -250,7 +243,6 @@ public void testConstructorWithSpecificDataSourcesToKillShouldNotKillUnusedSegme ImmutableSet.of("test1"), null, null, - null, ImmutableSet.of("host1"), 5, true, @@ -300,7 +292,6 @@ public void testDecommissioningParametersBackwardCompatibility() throws Exceptio 1, 2, whitelist, - 1.0, false, 1, decommissioning, @@ -321,7 +312,6 @@ public void testDecommissioningParametersBackwardCompatibility() throws Exceptio 1, 2, whitelist, - 1.0, false, 1, ImmutableSet.of("host1"), @@ -342,7 +332,6 @@ public void testDecommissioningParametersBackwardCompatibility() throws Exceptio 1, 2, whitelist, - 1.0, false, 1, ImmutableSet.of("host1"), @@ -387,7 +376,6 @@ public void testSerdeWithStringInKillDataSourceWhitelist() throws Exception 1, 2, ImmutableSet.of("test1", "test2"), - 1.0, false, 1, ImmutableSet.of(), @@ -447,7 +435,6 @@ public void testHandleMissingPercentOfSegmentsToConsiderPerMove() throws Excepti 1, 2, whitelist, - 1.0, false, 1, decommissioning, @@ -490,7 +477,6 @@ public void testSerdeWithKillAllDataSources() throws Exception 1, 2, ImmutableSet.of(), - 1.0, true, 1, ImmutableSet.of(), @@ -544,7 +530,6 @@ public void testDeserializeWithoutMaxSegmentsInNodeLoadingQueue() throws Excepti 1, 2, ImmutableSet.of(), - 1.0, true, EXPECTED_DEFAULT_MAX_SEGMENTS_IN_NODE_LOADING_QUEUE, ImmutableSet.of(), @@ -570,7 +555,6 @@ public void testBuilderDefaults() 500, 1, emptyList, - 1.0, true, EXPECTED_DEFAULT_MAX_SEGMENTS_IN_NODE_LOADING_QUEUE, emptyList, @@ -599,38 +583,7 @@ public void testBuilderWithDefaultSpecificDataSourcesToKillUnusedSegmentsInSpeci 500, 1, ImmutableSet.of("DATASOURCE"), - 1.0, - false, - EXPECTED_DEFAULT_MAX_SEGMENTS_IN_NODE_LOADING_QUEUE, - ImmutableSet.of(), - 70, false, - false, - Integer.MAX_VALUE - ); - } - - @Test - public void testBuilderWithKillTaskSlotRatio() - { - double killTaskSlotRatio = 0.2; - CoordinatorDynamicConfig defaultConfig = - CoordinatorDynamicConfig.builder() - .withKillTaskSlotRatio(killTaskSlotRatio) - .build(); - CoordinatorDynamicConfig config = CoordinatorDynamicConfig.builder().build(defaultConfig); - assertConfig( - config, - 900000, - 524288000, - 100, - 100, - 15, - 500, - 1, - ImmutableSet.of(), - killTaskSlotRatio, - true, EXPECTED_DEFAULT_MAX_SEGMENTS_IN_NODE_LOADING_QUEUE, ImmutableSet.of(), 70, @@ -668,7 +621,6 @@ public void testUpdate() null, null, null, - null, null ).build(current) ); @@ -718,7 +670,6 @@ private void assertConfig( int expectedReplicationThrottleLimit, int expectedBalancerComputeThreads, Set expectedSpecificDataSourcesToKillUnusedSegmentsIn, - Double expectedKillTaskSlotRatio, boolean expectedKillUnusedSegmentsInAllDataSources, int expectedMaxSegmentsInNodeLoadingQueue, Set decommissioningNodes, @@ -742,10 +693,6 @@ private void assertConfig( expectedSpecificDataSourcesToKillUnusedSegmentsIn, config.getSpecificDataSourcesToKillUnusedSegmentsIn() ); - Assert.assertEquals( - expectedKillTaskSlotRatio, - config.getKillTaskSlotRatio() - ); Assert.assertEquals(expectedKillUnusedSegmentsInAllDataSources, config.isKillUnusedSegmentsInAllDataSources()); Assert.assertEquals(expectedMaxSegmentsInNodeLoadingQueue, config.getMaxSegmentsInNodeLoadingQueue()); Assert.assertEquals(decommissioningNodes, config.getDecommissioningNodes()); From 48b493832b9cc54d2cc5ef6b21b4fc99cb97c2f9 Mon Sep 17 00:00:00 2001 From: zachjsh Date: Thu, 3 Aug 2023 11:20:32 -0400 Subject: [PATCH 21/23] * fix failing tests --- .../actions/RetrieveUnusedSegmentsAction.java | 13 +++++++------ .../ClientKillUnusedSegmentsTaskQuerySerdeTest.java | 2 +- .../test/TestIndexerMetadataStorageCoordinator.java | 5 +++-- .../overlord/IndexerMetadataStorageCoordinator.java | 4 ++-- .../IndexerSQLMetadataStorageCoordinator.java | 4 ++-- .../druid/metadata/SqlSegmentsMetadataQuery.java | 12 ++++++------ 6 files changed, 21 insertions(+), 19 deletions(-) diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/RetrieveUnusedSegmentsAction.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/RetrieveUnusedSegmentsAction.java index 1b7b98d283a0..150648858c15 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/RetrieveUnusedSegmentsAction.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/RetrieveUnusedSegmentsAction.java @@ -40,18 +40,18 @@ public class RetrieveUnusedSegmentsAction implements TaskAction> getReturnTypeReference() public List perform(Task task, TaskActionToolbox toolbox) { return toolbox.getIndexerMetadataStorageCoordinator() - .retrieveUnusedSegmentsForInterval(dataSource, interval, maxSegments); + .retrieveUnusedSegmentsForInterval(dataSource, interval, limit); } @Override @@ -98,6 +98,7 @@ public String toString() return getClass().getSimpleName() + "{" + "dataSource='" + dataSource + '\'' + ", interval=" + interval + + ", limit=" + limit + '}'; } } diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/ClientKillUnusedSegmentsTaskQuerySerdeTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/ClientKillUnusedSegmentsTaskQuerySerdeTest.java index 1d6639cce170..3ab6bae4688f 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/ClientKillUnusedSegmentsTaskQuerySerdeTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/ClientKillUnusedSegmentsTaskQuerySerdeTest.java @@ -51,7 +51,7 @@ public void testClientKillUnusedSegmentsTaskQueryToKillUnusedSegmentsTask() thro "killTaskId", "datasource", Intervals.of("2020-01-01/P1D"), - true, + false, 99, 5 ); diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/test/TestIndexerMetadataStorageCoordinator.java b/indexing-service/src/test/java/org/apache/druid/indexing/test/TestIndexerMetadataStorageCoordinator.java index 3f1016ae9bc8..659995e31e15 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/test/TestIndexerMetadataStorageCoordinator.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/test/TestIndexerMetadataStorageCoordinator.java @@ -42,6 +42,7 @@ import java.util.List; import java.util.Map; import java.util.Set; +import java.util.stream.Collectors; public class TestIndexerMetadataStorageCoordinator implements IndexerMetadataStorageCoordinator { @@ -114,11 +115,11 @@ public List retrieveUnusedSegmentsForInterval(String dataSource, In public List retrieveUnusedSegmentsForInterval( String dataSource, Interval interval, - @Nullable Integer maxSegments + @Nullable Integer limit ) { synchronized (unusedSegments) { - return unusedSegments.subList(0, maxSegments); + return unusedSegments.stream().limit(limit).collect(Collectors.toList()); } } diff --git a/server/src/main/java/org/apache/druid/indexing/overlord/IndexerMetadataStorageCoordinator.java b/server/src/main/java/org/apache/druid/indexing/overlord/IndexerMetadataStorageCoordinator.java index e7892d69269d..b426403a0241 100644 --- a/server/src/main/java/org/apache/druid/indexing/overlord/IndexerMetadataStorageCoordinator.java +++ b/server/src/main/java/org/apache/druid/indexing/overlord/IndexerMetadataStorageCoordinator.java @@ -134,7 +134,7 @@ Collection retrieveUsedSegmentsForIntervals( * * @param dataSource The data source the segments belong to * @param interval Filter the data segments to ones that include data in this interval exclusively. - * @param maxSegments The maximum number of unused segments to retreive. If null, no limit is applied. + * @param limit The maximum number of unused segments to retreive. If null, no limit is applied. * * @return DataSegments which include ONLY data within the requested interval and are marked as unused. Segments NOT * returned here may include data in the interval @@ -142,7 +142,7 @@ Collection retrieveUsedSegmentsForIntervals( default List retrieveUnusedSegmentsForInterval( String dataSource, Interval interval, - @Nullable Integer maxSegments + @Nullable Integer limit ) { return retrieveUnusedSegmentsForInterval(dataSource, interval); diff --git a/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java b/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java index 24fd064c5c00..6c4d523133a7 100644 --- a/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java +++ b/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java @@ -198,14 +198,14 @@ public List retrieveUnusedSegmentsForInterval(final String dataSour public List retrieveUnusedSegmentsForInterval( String dataSource, Interval interval, - @Nullable Integer maxSegments + @Nullable Integer limit ) { final List matchingSegments = connector.inReadOnlyTransaction( (handle, status) -> { try (final CloseableIterator iterator = SqlSegmentsMetadataQuery.forHandle(handle, connector, dbTables, jsonMapper) - .retrieveUnusedSegments(dataSource, Collections.singletonList(interval), maxSegments)) { + .retrieveUnusedSegments(dataSource, Collections.singletonList(interval), limit)) { return ImmutableList.copyOf(iterator); } } diff --git a/server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataQuery.java b/server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataQuery.java index 49b100c3608c..45896a865ef9 100644 --- a/server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataQuery.java +++ b/server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataQuery.java @@ -121,10 +121,10 @@ public CloseableIterator retrieveUsedSegments( public CloseableIterator retrieveUnusedSegments( final String dataSource, final Collection intervals, - @Nullable final Integer maxSegments + @Nullable final Integer limit ) { - return retrieveSegments(dataSource, intervals, IntervalMode.CONTAINS, false, maxSegments); + return retrieveSegments(dataSource, intervals, IntervalMode.CONTAINS, false, limit); } /** @@ -217,7 +217,7 @@ private CloseableIterator retrieveSegments( final Collection intervals, final IntervalMode matchMode, final boolean used, - @Nullable final Integer maxSegments + @Nullable final Integer limit ) { // Check if the intervals all support comparing as strings. If so, bake them into the SQL. @@ -258,13 +258,13 @@ private CloseableIterator retrieveSegments( sb.append(")"); } - Query> sql = handle + final Query> sql = handle .createQuery(StringUtils.format(sb.toString(), dbTables.getSegmentsTable())) .setFetchSize(connector.getStreamingFetchSize()) .bind("used", used) .bind("dataSource", dataSource); - if (null != maxSegments) { - sql = sql.setMaxRows(maxSegments); + if (null != limit) { + sql.setMaxRows(limit); } if (compareAsString) { From c95f13b66bde5ce2975a8c055d75117de00040a0 Mon Sep 17 00:00:00 2001 From: zachjsh Date: Thu, 3 Aug 2023 11:47:02 -0400 Subject: [PATCH 22/23] * address review comments --- .../common/task/KillUnusedSegmentsTask.java | 20 ++++++++++--------- .../indexing/overlord/TaskLifecycleTest.java | 2 +- ...TestIndexerMetadataStorageCoordinator.java | 18 ++++++++++------- .../ClientKillUnusedSegmentsTaskQuery.java | 4 +--- .../IndexerMetadataStorageCoordinator.java | 12 +++++------ 5 files changed, 30 insertions(+), 26 deletions(-) diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java index 3dd525da86cd..35653aa2303f 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java @@ -37,6 +37,7 @@ import org.apache.druid.indexing.common.actions.TaskActionClient; import org.apache.druid.indexing.common.actions.TaskLocks; import org.apache.druid.java.util.common.ISE; +import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.java.util.common.logger.Logger; import org.apache.druid.server.security.ResourceAction; import org.apache.druid.timeline.DataSegment; @@ -189,7 +190,16 @@ public TaskStatus runTask(TaskToolbox toolbox) throws Exception int nextBatchSize = computeNextBatchSize(numSegmentsKilled); @Nullable Integer numTotalBatches = getNumTotalBatches(); List unusedSegments; + LOG.info( + "Starting kill with batchSize[%d], up to limit[%d] segments will be deleted%s", + batchSize, + limit, + numTotalBatches != null ? StringUtils.format(" in([%d] batches]).", numTotalBatches) : "." + ); do { + if (nextBatchSize <= 0) { + break; + } unusedSegments = toolbox .getTaskActionClient() .submit(new RetrieveUnusedSegmentsAction(getDataSource(), getInterval(), nextBatchSize)); @@ -214,15 +224,7 @@ public TaskStatus runTask(TaskToolbox toolbox) throws Exception numBatchesProcessed++; numSegmentsKilled += unusedSegments.size(); - if (numBatchesProcessed % 10 == 0) { - if (null != numTotalBatches) { - LOG.info("Processed [%d/%d] batches for kill task[%s].", - numBatchesProcessed, numTotalBatches, getId() - ); - } else { - LOG.info("Processed [%d] batches for kill task[%s].", numBatchesProcessed, getId()); - } - } + LOG.info("Processed [%d] batches for kill task[%s].", numBatchesProcessed, getId()); nextBatchSize = computeNextBatchSize(numSegmentsKilled); } while (unusedSegments.size() != 0 && (null == numTotalBatches || numBatchesProcessed < numTotalBatches)); diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskLifecycleTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskLifecycleTest.java index cf02ddce7271..31ec645b3fdd 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskLifecycleTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskLifecycleTest.java @@ -958,7 +958,7 @@ public DataSegment apply(String input) Assert.assertEquals("merged statusCode", TaskState.SUCCESS, status.getStatusCode()); Assert.assertEquals("num segments published", 0, mdc.getPublished().size()); Assert.assertEquals("num segments nuked", 3, mdc.getNuked().size()); - Assert.assertEquals("delete segment batch call count", 1, mdc.getDeleteSegmentsCount()); + Assert.assertEquals("delete segment batch call count", 2, mdc.getDeleteSegmentsCount()); Assert.assertTrue( "expected unused segments get killed", expectedUnusedSegments.containsAll(mdc.getNuked()) && mdc.getNuked().containsAll( diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/test/TestIndexerMetadataStorageCoordinator.java b/indexing-service/src/test/java/org/apache/druid/indexing/test/TestIndexerMetadataStorageCoordinator.java index 659995e31e15..66af0a973043 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/test/TestIndexerMetadataStorageCoordinator.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/test/TestIndexerMetadataStorageCoordinator.java @@ -42,7 +42,7 @@ import java.util.List; import java.util.Map; import java.util.Set; -import java.util.stream.Collectors; +import java.util.stream.Stream; public class TestIndexerMetadataStorageCoordinator implements IndexerMetadataStorageCoordinator { @@ -112,14 +112,18 @@ public List retrieveUnusedSegmentsForInterval(String dataSource, In } @Override - public List retrieveUnusedSegmentsForInterval( - String dataSource, - Interval interval, - @Nullable Integer limit - ) + public List retrieveUnusedSegmentsForInterval(String dataSource, Interval interval, @Nullable Integer limit) { synchronized (unusedSegments) { - return unusedSegments.stream().limit(limit).collect(Collectors.toList()); + Stream resultStream = unusedSegments.stream(); + + resultStream = resultStream.filter(ds -> !nuked.contains(ds)); + + if (limit != null) { + resultStream = resultStream.limit(limit); + } + + return ImmutableList.copyOf(resultStream.iterator()); } } diff --git a/server/src/main/java/org/apache/druid/client/indexing/ClientKillUnusedSegmentsTaskQuery.java b/server/src/main/java/org/apache/druid/client/indexing/ClientKillUnusedSegmentsTaskQuery.java index 77e52159775c..3676d684097f 100644 --- a/server/src/main/java/org/apache/druid/client/indexing/ClientKillUnusedSegmentsTaskQuery.java +++ b/server/src/main/java/org/apache/druid/client/indexing/ClientKillUnusedSegmentsTaskQuery.java @@ -59,9 +59,7 @@ public ClientKillUnusedSegmentsTaskQuery( this.interval = interval; this.markAsUnused = markAsUnused; this.batchSize = batchSize; - if (null != limit) { - Preconditions.checkArgument(limit > 0, "limit must be > 0"); - } + Preconditions.checkArgument(limit == null || limit > 0, "limit must be > 0"); this.limit = limit; } diff --git a/server/src/main/java/org/apache/druid/indexing/overlord/IndexerMetadataStorageCoordinator.java b/server/src/main/java/org/apache/druid/indexing/overlord/IndexerMetadataStorageCoordinator.java index b426403a0241..3d8f4b858657 100644 --- a/server/src/main/java/org/apache/druid/indexing/overlord/IndexerMetadataStorageCoordinator.java +++ b/server/src/main/java/org/apache/druid/indexing/overlord/IndexerMetadataStorageCoordinator.java @@ -126,7 +126,10 @@ Collection retrieveUsedSegmentsForIntervals( /** * see {@link #retrieveUnusedSegmentsForInterval(String, Interval, Integer)} */ - List retrieveUnusedSegmentsForInterval(String dataSource, Interval interval); + default List retrieveUnusedSegmentsForInterval(String dataSource, Interval interval) + { + return retrieveUnusedSegmentsForInterval(dataSource, interval, null); + } /** * Retrieve all published segments which include ONLY data within the given interval and are marked as unused from the @@ -139,14 +142,11 @@ Collection retrieveUsedSegmentsForIntervals( * @return DataSegments which include ONLY data within the requested interval and are marked as unused. Segments NOT * returned here may include data in the interval */ - default List retrieveUnusedSegmentsForInterval( + List retrieveUnusedSegmentsForInterval( String dataSource, Interval interval, @Nullable Integer limit - ) - { - return retrieveUnusedSegmentsForInterval(dataSource, interval); - } + ); /** * Mark as unused segments which include ONLY data within the given interval. From 48e6fdcf3cd192c416b0ee67b6007891abd352f5 Mon Sep 17 00:00:00 2001 From: zachjsh Date: Thu, 3 Aug 2023 17:55:57 -0400 Subject: [PATCH 23/23] * docs update --- docs/data-management/delete.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/data-management/delete.md b/docs/data-management/delete.md index 556791971406..9e59c751bc2d 100644 --- a/docs/data-management/delete.md +++ b/docs/data-management/delete.md @@ -97,16 +97,16 @@ The available grammar is: "interval" : , "context": , "batchSize": , - "maxSegmentsToKill": + "limit": } ``` Some of the parameters used in the task payload are further explained below: -| Parameter | Default | Explanation | -|-------------|-----------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Parameter | Default | Explanation | +|-------------|-----------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| | `batchSize` |100 | Maximum number of segments that are deleted in one kill batch. Some operations on the Overlord may get stuck while a `kill` task is in progress due to concurrency constraints (such as in `TaskLockbox`). Thus, a `kill` task splits the list of unused segments to be deleted into smaller batches to yield the Overlord resources intermittently to other task operations.| -| `limit` | null - no limit | Maximum number of segments that are deleted in the kill task. This property may not be combined with `markAsUnused: true` | +| `limit` | null - no limit | Maximum number of segments for the kill task to delete.| **WARNING:** The `kill` task permanently removes all information about the affected segments from the metadata store and