From 48b05e3510f96057333bd9309097362bea87d074 Mon Sep 17 00:00:00 2001 From: Adarsh Sanjeev Date: Fri, 28 Mar 2025 13:22:56 +0530 Subject: [PATCH 01/24] Add unmanaged dynamic config --- .../coordinator/CoordinatorDynamicConfig.java | 48 +++++++++++++++++-- .../server/coordinator/ServerHolder.java | 20 ++++++++ .../balancer/TierSegmentBalancer.java | 3 +- .../duty/PrepareBalancerAndLoadQueues.java | 2 + .../loading/StrategicSegmentAssigner.java | 2 +- .../http/CoordinatorDynamicConfigTest.java | 8 +++- 6 files changed, 75 insertions(+), 8 deletions(-) diff --git a/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorDynamicConfig.java b/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorDynamicConfig.java index 806b6ebbee1e..87aaa6739e2b 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorDynamicConfig.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorDynamicConfig.java @@ -73,6 +73,8 @@ public class CoordinatorDynamicConfig private final Map validDebugDimensions; private final Set turboLoadingNodes; + private final Set unmanagedNodes; + private final Map cloneServers; /** * Stale pending segments belonging to the data sources in this list are not killed by {@code @@ -123,7 +125,9 @@ public CoordinatorDynamicConfig( @JsonProperty("useRoundRobinSegmentAssignment") @Nullable Boolean useRoundRobinSegmentAssignment, @JsonProperty("smartSegmentLoading") @Nullable Boolean smartSegmentLoading, @JsonProperty("debugDimensions") @Nullable Map debugDimensions, - @JsonProperty("turboLoadingNodes") @Nullable Set turboLoadingNodes + @JsonProperty("turboLoadingNodes") @Nullable Set turboLoadingNodes, + @JsonProperty("unmanagedNodes") @Nullable Set unmanagedNodes, + @JsonProperty("cloneServers") @Nullable Map cloneServers ) { this.markSegmentAsUnusedDelayMillis = @@ -168,6 +172,8 @@ public CoordinatorDynamicConfig( this.debugDimensions = debugDimensions; this.validDebugDimensions = validateDebugDimensions(debugDimensions); this.turboLoadingNodes = Configs.valueOrDefault(turboLoadingNodes, Set.of()); + this.unmanagedNodes = Configs.valueOrDefault(unmanagedNodes, Set.of()); + this.cloneServers = Configs.valueOrDefault(cloneServers, Map.of()); } private Map validateDebugDimensions(Map debugDimensions) @@ -321,6 +327,18 @@ public boolean getReplicateAfterLoadTimeout() return replicateAfterLoadTimeout; } + @JsonProperty + public Set getUnmanagedNodes() + { + return unmanagedNodes; + } + + @JsonProperty + public Map getCloneServers() + { + return cloneServers; + } + /** * List of servers to put in turbo-loading mode. These servers will use a larger thread pool to load * segments. This causes decreases the average time taken to load segments. However, this also means less resources @@ -460,6 +478,8 @@ public static class Builder private Boolean useRoundRobinSegmentAssignment; private Boolean smartSegmentLoading; private Set turboLoadingNodes; + private Set unmanagedNodes; + private Map cloneServers; public Builder() { @@ -483,7 +503,9 @@ public Builder( @JsonProperty("useRoundRobinSegmentAssignment") @Nullable Boolean useRoundRobinSegmentAssignment, @JsonProperty("smartSegmentLoading") @Nullable Boolean smartSegmentLoading, @JsonProperty("debugDimensions") @Nullable Map debugDimensions, - @JsonProperty("turboLoadingNodes") @Nullable Set turboLoadingNodes + @JsonProperty("turboLoadingNodes") @Nullable Set turboLoadingNodes, + @JsonProperty("unmanagedNodes") @Nullable Set unmanagedNodes, + @JsonProperty("cloneServers") @Nullable Map cloneServers ) { this.markSegmentAsUnusedDelayMillis = markSegmentAsUnusedDelayMillis; @@ -503,6 +525,8 @@ public Builder( this.smartSegmentLoading = smartSegmentLoading; this.debugDimensions = debugDimensions; this.turboLoadingNodes = turboLoadingNodes; + this.unmanagedNodes = unmanagedNodes; + this.cloneServers = cloneServers; } public Builder withMarkSegmentAsUnusedDelayMillis(long leadingTimeMillis) @@ -595,6 +619,18 @@ public Builder withUseRoundRobinSegmentAssignment(boolean useRoundRobinSegmentAs return this; } + public Builder withUnmanagedNodes(Set unmanagedNodes) + { + this.unmanagedNodes = unmanagedNodes; + return this; + } + + public Builder withCloneServers(Map cloneServers) + { + this.cloneServers = cloneServers; + return this; + } + /** * Builds a CoordinatoryDynamicConfig using either the configured values, or * the default value if not configured. @@ -621,7 +657,9 @@ public CoordinatorDynamicConfig build() valueOrDefault(useRoundRobinSegmentAssignment, Defaults.USE_ROUND_ROBIN_ASSIGNMENT), valueOrDefault(smartSegmentLoading, Defaults.SMART_SEGMENT_LOADING), debugDimensions, - turboLoadingNodes + turboLoadingNodes, + unmanagedNodes, + cloneServers ); } @@ -652,7 +690,9 @@ public CoordinatorDynamicConfig build(CoordinatorDynamicConfig defaults) valueOrDefault(useRoundRobinSegmentAssignment, defaults.isUseRoundRobinSegmentAssignment()), valueOrDefault(smartSegmentLoading, defaults.isSmartSegmentLoading()), valueOrDefault(debugDimensions, defaults.getDebugDimensions()), - valueOrDefault(turboLoadingNodes, defaults.getTurboLoadingNodes()) + valueOrDefault(turboLoadingNodes, defaults.getTurboLoadingNodes()), + valueOrDefault(unmanagedNodes, defaults.getUnmanagedNodes()), + valueOrDefault(cloneServers, defaults.getCloneServers()) ); } } diff --git a/server/src/main/java/org/apache/druid/server/coordinator/ServerHolder.java b/server/src/main/java/org/apache/druid/server/coordinator/ServerHolder.java index 5de1bd5ee060..d6b4d73816d8 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/ServerHolder.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/ServerHolder.java @@ -57,6 +57,7 @@ public class ServerHolder implements Comparable private final ImmutableDruidServer server; private final LoadQueuePeon peon; private final boolean isDecommissioning; + private final boolean isUnmanaged; private final int maxAssignmentsInRun; private final int maxLifetimeInQueue; @@ -85,6 +86,17 @@ public ServerHolder(ImmutableDruidServer server, LoadQueuePeon peon, boolean isD this(server, peon, isDecommissioning, 0, 1); } + public ServerHolder( + ImmutableDruidServer server, + LoadQueuePeon peon, + boolean isDecommissioning, + int maxSegmentsInLoadQueue, + int maxLifetimeInQueue + ) + { + this(server, peon, isDecommissioning, false, maxSegmentsInLoadQueue, maxLifetimeInQueue); + } + /** * Creates a new ServerHolder valid for a single coordinator run. * @@ -101,6 +113,7 @@ public ServerHolder( ImmutableDruidServer server, LoadQueuePeon peon, boolean isDecommissioning, + boolean isUnmanaged, int maxSegmentsInLoadQueue, int maxLifetimeInQueue ) @@ -108,6 +121,7 @@ public ServerHolder( this.server = server; this.peon = peon; this.isDecommissioning = isDecommissioning; + this.isUnmanaged = isUnmanaged; this.maxAssignmentsInRun = maxSegmentsInLoadQueue == 0 ? Integer.MAX_VALUE @@ -213,6 +227,11 @@ public boolean isDecommissioning() return isDecommissioning; } + public boolean isUnmanaged() + { + return isUnmanaged; + } + public boolean isLoadQueueFull() { return totalAssignmentsInRun >= maxAssignmentsInRun; @@ -238,6 +257,7 @@ public long getAvailableSize() public boolean canLoadSegment(DataSegment segment) { return !isDecommissioning + && !isUnmanaged && !hasSegmentLoaded(segment.getId()) && getActionOnSegment(segment) == null && totalAssignmentsInRun < maxAssignmentsInRun diff --git a/server/src/main/java/org/apache/druid/server/coordinator/balancer/TierSegmentBalancer.java b/server/src/main/java/org/apache/druid/server/coordinator/balancer/TierSegmentBalancer.java index 36be8a61de93..39c60a75abac 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/balancer/TierSegmentBalancer.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/balancer/TierSegmentBalancer.java @@ -77,7 +77,8 @@ public TierSegmentBalancer( Map> partitions = servers.stream().collect(Collectors.partitioningBy(ServerHolder::isDecommissioning)); this.decommissioningServers = partitions.get(true); - this.activeServers = partitions.get(false); + this.activeServers = // TODO: cleanup + partitions.get(false).stream().collect(Collectors.partitioningBy(ServerHolder::isUnmanaged)).get(false); this.movingSegmentCount = activeServers.stream().mapToInt(ServerHolder::getNumMovingSegments).sum(); this.maxSegmentsToMove = maxSegmentsToMove; diff --git a/server/src/main/java/org/apache/druid/server/coordinator/duty/PrepareBalancerAndLoadQueues.java b/server/src/main/java/org/apache/druid/server/coordinator/duty/PrepareBalancerAndLoadQueues.java index a9e926ea4f7d..d62827df6709 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/duty/PrepareBalancerAndLoadQueues.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/duty/PrepareBalancerAndLoadQueues.java @@ -152,6 +152,7 @@ private DruidCluster prepareCluster( ) { final Set decommissioningServers = dynamicConfig.getDecommissioningNodes(); + final Set unmanagedServers = dynamicConfig.getUnmanagedNodes(); final DruidCluster.Builder cluster = DruidCluster.builder(); for (ImmutableDruidServer server : currentServers) { cluster.add( @@ -159,6 +160,7 @@ private DruidCluster prepareCluster( server, taskMaster.getPeonForServer(server), decommissioningServers.contains(server.getHost()), + unmanagedServers.contains(server.getHost()), segmentLoadingConfig.getMaxSegmentsInLoadQueue(), segmentLoadingConfig.getMaxLifetimeInLoadQueue() ) diff --git a/server/src/main/java/org/apache/druid/server/coordinator/loading/StrategicSegmentAssigner.java b/server/src/main/java/org/apache/druid/server/coordinator/loading/StrategicSegmentAssigner.java index a5e38eee7d4b..2a5825f25bd6 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/loading/StrategicSegmentAssigner.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/loading/StrategicSegmentAssigner.java @@ -446,7 +446,7 @@ private int dropReplicas( for (ServerHolder server : eligibleServers) { if (server.isDecommissioning()) { eligibleDyingServers.add(server); - } else { + } else if (!server.isUnmanaged()){ eligibleLiveServers.add(server); } } diff --git a/server/src/test/java/org/apache/druid/server/http/CoordinatorDynamicConfigTest.java b/server/src/test/java/org/apache/druid/server/http/CoordinatorDynamicConfigTest.java index 1be987ddc278..30702e8af368 100644 --- a/server/src/test/java/org/apache/druid/server/http/CoordinatorDynamicConfigTest.java +++ b/server/src/test/java/org/apache/druid/server/http/CoordinatorDynamicConfigTest.java @@ -244,7 +244,9 @@ public void testConstructorWithNullsShouldKillUnusedSegmentsInAllDataSources() false, false, null, - ImmutableSet.of("host1") + ImmutableSet.of("host1"), + null, + null ); Assert.assertTrue(config.getSpecificDataSourcesToKillUnusedSegmentsIn().isEmpty()); } @@ -269,7 +271,9 @@ public void testConstructorWithSpecificDataSourcesToKillShouldNotKillUnusedSegme false, false, null, - ImmutableSet.of("host1") + ImmutableSet.of("host1"), + null, + null ); Assert.assertEquals(ImmutableSet.of("test1"), config.getSpecificDataSourcesToKillUnusedSegmentsIn()); } From da180ff08bc4f1229a08b81e5f165dc3b6105052 Mon Sep 17 00:00:00 2001 From: Adarsh Sanjeev Date: Tue, 1 Apr 2025 15:38:04 +0530 Subject: [PATCH 02/24] Add clone duty --- .../server/coordinator/DruidCoordinator.java | 4 +- .../server/coordinator/duty/HandleClones.java | 55 +++++++++++++++++++ .../loading/HttpLoadQueuePeon.java | 1 + 3 files changed, 59 insertions(+), 1 deletion(-) create mode 100644 server/src/main/java/org/apache/druid/server/coordinator/duty/HandleClones.java diff --git a/server/src/main/java/org/apache/druid/server/coordinator/DruidCoordinator.java b/server/src/main/java/org/apache/druid/server/coordinator/DruidCoordinator.java index 0fd3956dfdd3..c07805f9f2ce 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/DruidCoordinator.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/DruidCoordinator.java @@ -69,6 +69,7 @@ import org.apache.druid.server.coordinator.duty.CoordinatorDuty; import org.apache.druid.server.coordinator.duty.CoordinatorDutyGroup; import org.apache.druid.server.coordinator.duty.DutyGroupStatus; +import org.apache.druid.server.coordinator.duty.HandleClones; import org.apache.druid.server.coordinator.duty.KillAuditLog; import org.apache.druid.server.coordinator.duty.KillCompactionConfig; import org.apache.druid.server.coordinator.duty.KillDatasourceMetadata; @@ -557,7 +558,8 @@ private List makeHistoricalManagementDuties() new MarkOvershadowedSegmentsAsUnused(deleteSegments), new MarkEternityTombstonesAsUnused(deleteSegments), new BalanceSegments(config.getCoordinatorPeriod()), - new CollectLoadQueueStats() + new CollectLoadQueueStats(), + new HandleClones() ); } diff --git a/server/src/main/java/org/apache/druid/server/coordinator/duty/HandleClones.java b/server/src/main/java/org/apache/druid/server/coordinator/duty/HandleClones.java new file mode 100644 index 000000000000..aaae0ff68c07 --- /dev/null +++ b/server/src/main/java/org/apache/druid/server/coordinator/duty/HandleClones.java @@ -0,0 +1,55 @@ +package org.apache.druid.server.coordinator.duty; + +import org.apache.druid.java.util.common.logger.Logger; +import org.apache.druid.server.coordinator.DruidCoordinatorRuntimeParams; +import org.apache.druid.server.coordinator.ServerHolder; +import org.apache.druid.server.coordinator.loading.SegmentAction; +import org.apache.druid.timeline.DataSegment; + +import javax.annotation.Nullable; +import java.util.Collection; +import java.util.HashMap; +import java.util.Map; + +public class HandleClones implements CoordinatorDuty +{ + private static final Logger log = new Logger(HandleClones.class); + + @Nullable + @Override + public DruidCoordinatorRuntimeParams run(DruidCoordinatorRuntimeParams params) + { + Map cloneServers = params.getCoordinatorDynamicConfig().getCloneServers(); + Map historicalMap = new HashMap<>(); // TODO: redo + params.getDruidCluster().getHistoricals().forEach((tier, historicals) -> { + historicals.forEach( + historical -> { + historicalMap.put(historical.getServer().getHost(), historical); + } + ); + }); + + for (Map.Entry entry : cloneServers.entrySet()) { + String sourceHistoricalName = entry.getKey(); + ServerHolder sourceServer = historicalMap.get(sourceHistoricalName); + + String targetHistorical = entry.getValue(); + ServerHolder targetServer = historicalMap.get(targetHistorical); + + for (DataSegment segment : sourceServer.getServedSegments()) { + if (!targetServer.getServedSegments().contains(segment)) { + log.warn("Cloning load of [%s] from [%s] to [%s]", segment, sourceServer, targetServer); + targetServer.getPeon().loadSegment(segment, SegmentAction.LOAD, null); + } + } + + for (DataSegment segment : targetServer.getServedSegments()) { + if (!sourceServer.getServedSegments().contains(segment)) { + log.warn("Cloning drop of [%s] from [%s] to [%s]", segment, sourceServer, targetServer); + targetServer.getPeon().dropSegment(segment, null); + } + } + } + return params; + } +} diff --git a/server/src/main/java/org/apache/druid/server/coordinator/loading/HttpLoadQueuePeon.java b/server/src/main/java/org/apache/druid/server/coordinator/loading/HttpLoadQueuePeon.java index 38eeb42413e7..793c5ab84bdb 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/loading/HttpLoadQueuePeon.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/loading/HttpLoadQueuePeon.java @@ -209,6 +209,7 @@ private void doSegmentManagement() while (newRequests.size() < batchSize && queuedSegmentIterator.hasNext()) { final SegmentHolder holder = queuedSegmentIterator.next(); final DataSegment segment = holder.getSegment(); + log.warn("HTTP PEON OPERATION TO [%s]: [%s][%s]", serverId, loadingMode, segment); if (holder.hasRequestTimedOut()) { onRequestFailed(holder, SegmentChangeStatus.failed("timed out")); queuedSegmentIterator.remove(); From 555f7421a6917728d565fc3272db568856d189b1 Mon Sep 17 00:00:00 2001 From: Adarsh Sanjeev Date: Tue, 1 Apr 2025 16:03:15 +0530 Subject: [PATCH 03/24] clean --- .../server/coordinator/duty/HandleClones.java | 24 ++++++++++--------- .../loading/HttpLoadQueuePeon.java | 2 +- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/server/src/main/java/org/apache/druid/server/coordinator/duty/HandleClones.java b/server/src/main/java/org/apache/druid/server/coordinator/duty/HandleClones.java index aaae0ff68c07..346e6a2c5f9a 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/duty/HandleClones.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/duty/HandleClones.java @@ -8,8 +8,8 @@ import javax.annotation.Nullable; import java.util.Collection; -import java.util.HashMap; import java.util.Map; +import java.util.stream.Collectors; public class HandleClones implements CoordinatorDuty { @@ -20,14 +20,16 @@ public class HandleClones implements CoordinatorDuty public DruidCoordinatorRuntimeParams run(DruidCoordinatorRuntimeParams params) { Map cloneServers = params.getCoordinatorDynamicConfig().getCloneServers(); - Map historicalMap = new HashMap<>(); // TODO: redo - params.getDruidCluster().getHistoricals().forEach((tier, historicals) -> { - historicals.forEach( - historical -> { - historicalMap.put(historical.getServer().getHost(), historical); - } - ); - }); + // TODO: clean up + Map historicalMap = params.getDruidCluster() + .getHistoricals() + .values() + .stream() + .flatMap(Collection::stream) + .collect(Collectors.toMap( + serverHolder -> serverHolder.getServer().getHost(), + serverHolder -> serverHolder + )); for (Map.Entry entry : cloneServers.entrySet()) { String sourceHistoricalName = entry.getKey(); @@ -38,14 +40,14 @@ public DruidCoordinatorRuntimeParams run(DruidCoordinatorRuntimeParams params) for (DataSegment segment : sourceServer.getServedSegments()) { if (!targetServer.getServedSegments().contains(segment)) { - log.warn("Cloning load of [%s] from [%s] to [%s]", segment, sourceServer, targetServer); + log.info("Cloning load of [%s] from [%s] to [%s]", segment, sourceServer, targetServer); targetServer.getPeon().loadSegment(segment, SegmentAction.LOAD, null); } } for (DataSegment segment : targetServer.getServedSegments()) { if (!sourceServer.getServedSegments().contains(segment)) { - log.warn("Cloning drop of [%s] from [%s] to [%s]", segment, sourceServer, targetServer); + log.info("Cloning drop of [%s] from [%s] to [%s]", segment, sourceServer, targetServer); targetServer.getPeon().dropSegment(segment, null); } } diff --git a/server/src/main/java/org/apache/druid/server/coordinator/loading/HttpLoadQueuePeon.java b/server/src/main/java/org/apache/druid/server/coordinator/loading/HttpLoadQueuePeon.java index 793c5ab84bdb..c4b722976bd1 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/loading/HttpLoadQueuePeon.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/loading/HttpLoadQueuePeon.java @@ -209,7 +209,7 @@ private void doSegmentManagement() while (newRequests.size() < batchSize && queuedSegmentIterator.hasNext()) { final SegmentHolder holder = queuedSegmentIterator.next(); final DataSegment segment = holder.getSegment(); - log.warn("HTTP PEON OPERATION TO [%s]: [%s][%s]", serverId, loadingMode, segment); + log.info("HTTP PEON OPERATION TO [%s]: [%s][%s]", serverId, loadingMode, segment); if (holder.hasRequestTimedOut()) { onRequestFailed(holder, SegmentChangeStatus.failed("timed out")); queuedSegmentIterator.remove(); From f3b699607b26b37bf07e02154bd865296448cb3e Mon Sep 17 00:00:00 2001 From: Adarsh Sanjeev Date: Wed, 2 Apr 2025 13:15:21 +0530 Subject: [PATCH 04/24] Fix bugs --- .../server/coordinator/DruidCoordinator.java | 4 ++-- .../coordinator/SegmentCountsPerInterval.java | 10 ++++++++ .../server/coordinator/duty/HandleClones.java | 24 +++++++++---------- .../loading/SegmentReplicaCountMap.java | 4 ++++ 4 files changed, 28 insertions(+), 14 deletions(-) diff --git a/server/src/main/java/org/apache/druid/server/coordinator/DruidCoordinator.java b/server/src/main/java/org/apache/druid/server/coordinator/DruidCoordinator.java index c07805f9f2ce..cd1312077f20 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/DruidCoordinator.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/DruidCoordinator.java @@ -558,8 +558,8 @@ private List makeHistoricalManagementDuties() new MarkOvershadowedSegmentsAsUnused(deleteSegments), new MarkEternityTombstonesAsUnused(deleteSegments), new BalanceSegments(config.getCoordinatorPeriod()), - new CollectLoadQueueStats(), - new HandleClones() + new HandleClones(), + new CollectLoadQueueStats() ); } diff --git a/server/src/main/java/org/apache/druid/server/coordinator/SegmentCountsPerInterval.java b/server/src/main/java/org/apache/druid/server/coordinator/SegmentCountsPerInterval.java index a9767b8f7682..90c253a27fde 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/SegmentCountsPerInterval.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/SegmentCountsPerInterval.java @@ -26,7 +26,9 @@ import org.joda.time.Interval; import java.util.HashMap; +import java.util.HashSet; import java.util.Map; +import java.util.Set; /** * Maintains a count of segments for each datasource and interval. @@ -38,17 +40,20 @@ public class SegmentCountsPerInterval private final Map> datasourceIntervalToSegmentCount = new HashMap<>(); private final Object2IntMap intervalToTotalSegmentCount = new Object2IntOpenHashMap<>(); private final Object2IntMap datasourceToTotalSegmentCount = new Object2IntOpenHashMap<>(); + private final Set segments = new HashSet<>(); public void addSegment(DataSegment segment) { updateCountInInterval(segment, 1); totalSegmentBytes += segment.getSize(); + segments.add(segment); } public void removeSegment(DataSegment segment) { updateCountInInterval(segment, -1); totalSegmentBytes -= segment.getSize(); + segments.remove(segment); } public int getTotalSegmentCount() @@ -61,6 +66,11 @@ public long getTotalSegmentBytes() return totalSegmentBytes; } + public Set getSegments() + { + return segments; + } + public Object2IntMap getDatasourceToTotalSegmentCount() { return datasourceToTotalSegmentCount; diff --git a/server/src/main/java/org/apache/druid/server/coordinator/duty/HandleClones.java b/server/src/main/java/org/apache/druid/server/coordinator/duty/HandleClones.java index 346e6a2c5f9a..55d15dbf295d 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/duty/HandleClones.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/duty/HandleClones.java @@ -19,17 +19,17 @@ public class HandleClones implements CoordinatorDuty @Override public DruidCoordinatorRuntimeParams run(DruidCoordinatorRuntimeParams params) { - Map cloneServers = params.getCoordinatorDynamicConfig().getCloneServers(); + final Map cloneServers = params.getCoordinatorDynamicConfig().getCloneServers(); // TODO: clean up - Map historicalMap = params.getDruidCluster() - .getHistoricals() - .values() - .stream() - .flatMap(Collection::stream) - .collect(Collectors.toMap( - serverHolder -> serverHolder.getServer().getHost(), - serverHolder -> serverHolder - )); + final Map historicalMap = params.getDruidCluster() + .getHistoricals() + .values() + .stream() + .flatMap(Collection::stream) + .collect(Collectors.toMap( + serverHolder -> serverHolder.getServer().getHost(), + serverHolder -> serverHolder + )); for (Map.Entry entry : cloneServers.entrySet()) { String sourceHistoricalName = entry.getKey(); @@ -38,14 +38,14 @@ public DruidCoordinatorRuntimeParams run(DruidCoordinatorRuntimeParams params) String targetHistorical = entry.getValue(); ServerHolder targetServer = historicalMap.get(targetHistorical); - for (DataSegment segment : sourceServer.getServedSegments()) { + for (DataSegment segment : sourceServer.getProjectedSegments().getSegments()) { if (!targetServer.getServedSegments().contains(segment)) { log.info("Cloning load of [%s] from [%s] to [%s]", segment, sourceServer, targetServer); targetServer.getPeon().loadSegment(segment, SegmentAction.LOAD, null); } } - for (DataSegment segment : targetServer.getServedSegments()) { + for (DataSegment segment : targetServer.getProjectedSegments().getSegments()) { if (!sourceServer.getServedSegments().contains(segment)) { log.info("Cloning drop of [%s] from [%s] to [%s]", segment, sourceServer, targetServer); targetServer.getPeon().dropSegment(segment, null); diff --git a/server/src/main/java/org/apache/druid/server/coordinator/loading/SegmentReplicaCountMap.java b/server/src/main/java/org/apache/druid/server/coordinator/loading/SegmentReplicaCountMap.java index 3d3e34072fba..31842fcf6a18 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/loading/SegmentReplicaCountMap.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/loading/SegmentReplicaCountMap.java @@ -49,6 +49,10 @@ private void initReplicaCounts(DruidCluster cluster) cluster.getHistoricals().forEach( (tier, historicals) -> historicals.forEach( serverHolder -> { + if (serverHolder.isUnmanaged()) { + // Don't manage + return; + } // Add segments already loaded on this server for (DataSegment segment : serverHolder.getServedSegments()) { computeIfAbsent(segment.getId(), tier).incrementLoaded(); From f1a4ebc26d69aa4d3298b647d20ecf947cc5873c Mon Sep 17 00:00:00 2001 From: Adarsh Sanjeev Date: Thu, 3 Apr 2025 11:45:50 +0530 Subject: [PATCH 05/24] Add test --- .../server/coordinator/DruidCoordinator.java | 4 +- .../balancer/TierSegmentBalancer.java | 7 +- ...Clones.java => HistoricalCloningDuty.java} | 21 ++++- .../druid/server/coordinator/stats/Stats.java | 4 + .../simulate/HistoricalCloningTest.java | 78 +++++++++++++++++++ 5 files changed, 105 insertions(+), 9 deletions(-) rename server/src/main/java/org/apache/druid/server/coordinator/duty/{HandleClones.java => HistoricalCloningDuty.java} (73%) create mode 100644 server/src/test/java/org/apache/druid/server/coordinator/simulate/HistoricalCloningTest.java diff --git a/server/src/main/java/org/apache/druid/server/coordinator/DruidCoordinator.java b/server/src/main/java/org/apache/druid/server/coordinator/DruidCoordinator.java index cd1312077f20..b720b7fbf709 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/DruidCoordinator.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/DruidCoordinator.java @@ -69,7 +69,7 @@ import org.apache.druid.server.coordinator.duty.CoordinatorDuty; import org.apache.druid.server.coordinator.duty.CoordinatorDutyGroup; import org.apache.druid.server.coordinator.duty.DutyGroupStatus; -import org.apache.druid.server.coordinator.duty.HandleClones; +import org.apache.druid.server.coordinator.duty.HistoricalCloningDuty; import org.apache.druid.server.coordinator.duty.KillAuditLog; import org.apache.druid.server.coordinator.duty.KillCompactionConfig; import org.apache.druid.server.coordinator.duty.KillDatasourceMetadata; @@ -558,7 +558,7 @@ private List makeHistoricalManagementDuties() new MarkOvershadowedSegmentsAsUnused(deleteSegments), new MarkEternityTombstonesAsUnused(deleteSegments), new BalanceSegments(config.getCoordinatorPeriod()), - new HandleClones(), + new HistoricalCloningDuty(), new CollectLoadQueueStats() ); } diff --git a/server/src/main/java/org/apache/druid/server/coordinator/balancer/TierSegmentBalancer.java b/server/src/main/java/org/apache/druid/server/coordinator/balancer/TierSegmentBalancer.java index 39c60a75abac..6df273be57ef 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/balancer/TierSegmentBalancer.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/balancer/TierSegmentBalancer.java @@ -75,10 +75,11 @@ public TierSegmentBalancer( this.runStats = params.getCoordinatorStats(); Map> partitions = - servers.stream().collect(Collectors.partitioningBy(ServerHolder::isDecommissioning)); + servers.stream() + .filter(s -> !s.isUnmanaged()) + .collect(Collectors.partitioningBy(ServerHolder::isDecommissioning)); this.decommissioningServers = partitions.get(true); - this.activeServers = // TODO: cleanup - partitions.get(false).stream().collect(Collectors.partitioningBy(ServerHolder::isUnmanaged)).get(false); + this.activeServers = partitions.get(false); this.movingSegmentCount = activeServers.stream().mapToInt(ServerHolder::getNumMovingSegments).sum(); this.maxSegmentsToMove = maxSegmentsToMove; diff --git a/server/src/main/java/org/apache/druid/server/coordinator/duty/HandleClones.java b/server/src/main/java/org/apache/druid/server/coordinator/duty/HistoricalCloningDuty.java similarity index 73% rename from server/src/main/java/org/apache/druid/server/coordinator/duty/HandleClones.java rename to server/src/main/java/org/apache/druid/server/coordinator/duty/HistoricalCloningDuty.java index 55d15dbf295d..dce2feb77144 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/duty/HandleClones.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/duty/HistoricalCloningDuty.java @@ -4,6 +4,10 @@ import org.apache.druid.server.coordinator.DruidCoordinatorRuntimeParams; import org.apache.druid.server.coordinator.ServerHolder; import org.apache.druid.server.coordinator.loading.SegmentAction; +import org.apache.druid.server.coordinator.stats.CoordinatorRunStats; +import org.apache.druid.server.coordinator.stats.Dimension; +import org.apache.druid.server.coordinator.stats.RowKey; +import org.apache.druid.server.coordinator.stats.Stats; import org.apache.druid.timeline.DataSegment; import javax.annotation.Nullable; @@ -11,15 +15,16 @@ import java.util.Map; import java.util.stream.Collectors; -public class HandleClones implements CoordinatorDuty +public class HistoricalCloningDuty implements CoordinatorDuty { - private static final Logger log = new Logger(HandleClones.class); + private static final Logger log = new Logger(HistoricalCloningDuty.class); @Nullable @Override public DruidCoordinatorRuntimeParams run(DruidCoordinatorRuntimeParams params) { final Map cloneServers = params.getCoordinatorDynamicConfig().getCloneServers(); + final CoordinatorRunStats stats = params.getCoordinatorStats(); // TODO: clean up final Map historicalMap = params.getDruidCluster() .getHistoricals() @@ -40,15 +45,23 @@ public DruidCoordinatorRuntimeParams run(DruidCoordinatorRuntimeParams params) for (DataSegment segment : sourceServer.getProjectedSegments().getSegments()) { if (!targetServer.getServedSegments().contains(segment)) { - log.info("Cloning load of [%s] from [%s] to [%s]", segment, sourceServer, targetServer); targetServer.getPeon().loadSegment(segment, SegmentAction.LOAD, null); + stats.add( + Stats.CoordinatorRun.CLONE_LOAD, + RowKey.of(Dimension.SERVER, targetServer.getServer().getHost()), + 1L + ); } } for (DataSegment segment : targetServer.getProjectedSegments().getSegments()) { if (!sourceServer.getServedSegments().contains(segment)) { - log.info("Cloning drop of [%s] from [%s] to [%s]", segment, sourceServer, targetServer); targetServer.getPeon().dropSegment(segment, null); + stats.add( + Stats.CoordinatorRun.CLONE_DROP, + RowKey.of(Dimension.SERVER, targetServer.getServer().getHost()), + 1L + ); } } } diff --git a/server/src/main/java/org/apache/druid/server/coordinator/stats/Stats.java b/server/src/main/java/org/apache/druid/server/coordinator/stats/Stats.java index 0bc3b609bd38..9108de7d2977 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/stats/Stats.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/stats/Stats.java @@ -137,6 +137,10 @@ public static class CoordinatorRun = CoordinatorStat.toDebugAndEmit("dutyRunTime", "coordinator/time"); public static final CoordinatorStat GROUP_RUN_TIME = CoordinatorStat.toDebugAndEmit("groupRunTime", "coordinator/global/time"); + public static final CoordinatorStat CLONE_LOAD + = CoordinatorStat.toDebugAndEmit("cloneLoad", "coordinator/clone/load"); + public static final CoordinatorStat CLONE_DROP + = CoordinatorStat.toDebugAndEmit("cloneDrop", "coordinator/clone/drop"); } public static class Kill diff --git a/server/src/test/java/org/apache/druid/server/coordinator/simulate/HistoricalCloningTest.java b/server/src/test/java/org/apache/druid/server/coordinator/simulate/HistoricalCloningTest.java new file mode 100644 index 000000000000..67902909b3f2 --- /dev/null +++ b/server/src/test/java/org/apache/druid/server/coordinator/simulate/HistoricalCloningTest.java @@ -0,0 +1,78 @@ +package org.apache.druid.server.coordinator.simulate; + +import org.apache.druid.client.DruidServer; +import org.apache.druid.segment.TestDataSource; +import org.apache.druid.server.coordinator.CoordinatorDynamicConfig; +import org.apache.druid.server.coordinator.stats.Stats; +import org.apache.druid.timeline.DataSegment; +import org.junit.Assert; +import org.junit.Test; + +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +public class HistoricalCloningTest extends CoordinatorSimulationBaseTest +{ + private DruidServer historicalT11; + private DruidServer historicalT12; + + private final String datasource = TestDataSource.WIKI; + private final List segments = Segments.WIKI_10X1D; + + @Override + public void setUp() + { + // Setup historicals for 2 tiers, size 10 GB each + historicalT11 = createHistorical(1, Tier.T1, 10_000); + historicalT12 = createHistorical(2, Tier.T1, 10_000); + } + + @Test + public void testCloningHistorical() + { + final CoordinatorSimulation sim = + CoordinatorSimulation.builder() + .withSegments(segments) + .withServers(historicalT11, historicalT12) + .withRules(datasource, Load.on(Tier.T1, 1).forever()) + .withDynamicConfig( + withCloneServers( + Map.of( + historicalT11.getHost(), historicalT12.getHost() + ) + )) + .build(); + + startSimulation(sim); + runCoordinatorCycle(); + + verifyValue(Metric.ASSIGNED_COUNT, 10L); + verifyValue( + Stats.CoordinatorRun.CLONE_LOAD.getMetricName(), + Map.of("server", historicalT12.getName()), + 10L + ); + + loadQueuedSegments(); + Assert.assertEquals(10, historicalT11.getTotalSegments()); + Assert.assertEquals(10, historicalT12.getTotalSegments()); + } + + /** + * Creates a dynamic config with unlimited load queue, balancing disabled and + * the given {@code replicationThrottleLimit}. + */ + private CoordinatorDynamicConfig withCloneServers(Map cloneServers) + { + final Set unmanagedServers = new HashSet<>(cloneServers.values()); + + return CoordinatorDynamicConfig.builder() + .withSmartSegmentLoading(true) + .withCloneServers(cloneServers) + .withUnmanagedNodes(unmanagedServers) + .withTurboLoadingNodes(unmanagedServers) + .build(); + } +} From 616946aa7f4da55f316c2eaaa5967d8a2a7afe56 Mon Sep 17 00:00:00 2001 From: Adarsh Sanjeev Date: Thu, 3 Apr 2025 21:43:20 +0530 Subject: [PATCH 06/24] Fix tests --- .../coordinator/loading/HttpLoadQueuePeon.java | 1 - .../coordinator/simulate/HistoricalCloningTest.java | 13 +++++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/server/src/main/java/org/apache/druid/server/coordinator/loading/HttpLoadQueuePeon.java b/server/src/main/java/org/apache/druid/server/coordinator/loading/HttpLoadQueuePeon.java index c4b722976bd1..38eeb42413e7 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/loading/HttpLoadQueuePeon.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/loading/HttpLoadQueuePeon.java @@ -209,7 +209,6 @@ private void doSegmentManagement() while (newRequests.size() < batchSize && queuedSegmentIterator.hasNext()) { final SegmentHolder holder = queuedSegmentIterator.next(); final DataSegment segment = holder.getSegment(); - log.info("HTTP PEON OPERATION TO [%s]: [%s][%s]", serverId, loadingMode, segment); if (holder.hasRequestTimedOut()) { onRequestFailed(holder, SegmentChangeStatus.failed("timed out")); queuedSegmentIterator.remove(); diff --git a/server/src/test/java/org/apache/druid/server/coordinator/simulate/HistoricalCloningTest.java b/server/src/test/java/org/apache/druid/server/coordinator/simulate/HistoricalCloningTest.java index 67902909b3f2..f6e1dd84ac82 100644 --- a/server/src/test/java/org/apache/druid/server/coordinator/simulate/HistoricalCloningTest.java +++ b/server/src/test/java/org/apache/druid/server/coordinator/simulate/HistoricalCloningTest.java @@ -47,6 +47,7 @@ public void testCloningHistorical() startSimulation(sim); runCoordinatorCycle(); + loadQueuedSegments(); verifyValue(Metric.ASSIGNED_COUNT, 10L); verifyValue( @@ -55,6 +56,18 @@ public void testCloningHistorical() 10L ); + runCoordinatorCycle(); + verifyValue( + Metric.SUCCESS_ACTIONS, + Map.of("server", historicalT11.getName(), "description", "LOAD: NORMAL"), + 10L + ); + verifyValue( + Metric.SUCCESS_ACTIONS, + Map.of("server", historicalT12.getName(), "description", "LOAD: TURBO"), + 10L + ); + loadQueuedSegments(); Assert.assertEquals(10, historicalT11.getTotalSegments()); Assert.assertEquals(10, historicalT12.getTotalSegments()); From 06b0b2b13b257424e8e0cbe552e8321d5fbeb1b2 Mon Sep 17 00:00:00 2001 From: Adarsh Sanjeev Date: Thu, 3 Apr 2025 21:44:16 +0530 Subject: [PATCH 07/24] Clean up peon test --- .../loading/HttpLoadQueuePeon.java | 3 +- .../loading/HttpLoadQueuePeonTest.java | 33 +++++++++---------- 2 files changed, 17 insertions(+), 19 deletions(-) diff --git a/server/src/main/java/org/apache/druid/server/coordinator/loading/HttpLoadQueuePeon.java b/server/src/main/java/org/apache/druid/server/coordinator/loading/HttpLoadQueuePeon.java index 38eeb42413e7..e4fa849beb18 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/loading/HttpLoadQueuePeon.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/loading/HttpLoadQueuePeon.java @@ -150,8 +150,7 @@ public HttpLoadQueuePeon( this.serverCapabilities = fetchSegmentLoadingCapabilities(); } - @VisibleForTesting - SegmentLoadingCapabilities fetchSegmentLoadingCapabilities() + private SegmentLoadingCapabilities fetchSegmentLoadingCapabilities() { try { final URL segmentLoadingCapabilitiesURL = new URL( diff --git a/server/src/test/java/org/apache/druid/server/coordinator/loading/HttpLoadQueuePeonTest.java b/server/src/test/java/org/apache/druid/server/coordinator/loading/HttpLoadQueuePeonTest.java index fba98bff3aec..cb4a8f1d7f3e 100644 --- a/server/src/test/java/org/apache/druid/server/coordinator/loading/HttpLoadQueuePeonTest.java +++ b/server/src/test/java/org/apache/druid/server/coordinator/loading/HttpLoadQueuePeonTest.java @@ -73,10 +73,12 @@ public class HttpLoadQueuePeonTest private TestHttpClient httpClient; private HttpLoadQueuePeon httpLoadQueuePeon; + private SegmentLoadingCapabilities segmentLoadingCapabilities; @Before public void setUp() { + segmentLoadingCapabilities = new SegmentLoadingCapabilities(1, 3); httpClient = new TestHttpClient(); httpLoadQueuePeon = new HttpLoadQueuePeon( "http://dummy:4000", @@ -90,14 +92,7 @@ public void setUp() true ), httpClient.callbackExecutor - ) - { - @Override - SegmentLoadingCapabilities fetchSegmentLoadingCapabilities() - { - return new SegmentLoadingCapabilities(1, 3); - } - }; + ); httpLoadQueuePeon.start(); } @@ -344,14 +339,7 @@ public void testBatchSize() true ), httpClient.callbackExecutor - ) - { - @Override - SegmentLoadingCapabilities fetchSegmentLoadingCapabilities() - { - return new SegmentLoadingCapabilities(1, 3); - } - }; + ); Assert.assertEquals(1, httpLoadQueuePeon.calculateBatchSize(SegmentLoadingMode.NORMAL)); Assert.assertEquals(3, httpLoadQueuePeon.calculateBatchSize(SegmentLoadingMode.TURBO)); @@ -362,7 +350,7 @@ private LoadPeonCallback markSegmentProcessed(DataSegment segment) return success -> httpClient.processedSegments.add(segment); } - private static class TestHttpClient implements HttpClient, DataSegmentChangeHandler + private class TestHttpClient implements HttpClient, DataSegmentChangeHandler { final BlockingExecutorService processingExecutor = new BlockingExecutorService("HttpLoadQueuePeonTest-%s"); final BlockingExecutorService callbackExecutor = new BlockingExecutorService("HttpLoadQueuePeonTest-cb"); @@ -379,6 +367,7 @@ public ListenableFuture go( } @Override + @SuppressWarnings("unchecked") public ListenableFuture go( Request request, HttpResponseHandler httpResponseHandler, @@ -388,7 +377,17 @@ public ListenableFuture go( HttpResponse httpResponse = new DefaultHttpResponse(HttpVersion.HTTP_1_1, HttpResponseStatus.OK); httpResponse.setContent(ChannelBuffers.buffer(0)); httpResponseHandler.handleResponse(httpResponse, null); + try { + if (request.getUrl().toString().contains("/loadCapabilities")) { + return (ListenableFuture) Futures.immediateFuture( + new ByteArrayInputStream( + MAPPER.writerFor(SegmentLoadingCapabilities.class) + .writeValueAsBytes(segmentLoadingCapabilities) + ) + ); + } + List changeRequests = MAPPER.readValue( request.getContent().array(), HttpLoadQueuePeon.REQUEST_ENTITY_TYPE_REF From fc51d99269746e370aaefd6f94745bf2dd3bf9f7 Mon Sep 17 00:00:00 2001 From: Adarsh Sanjeev Date: Thu, 3 Apr 2025 21:44:34 +0530 Subject: [PATCH 08/24] Fix checkstyle --- .../duty/HistoricalCloningDuty.java | 23 +++++++++++++++++++ .../loading/StrategicSegmentAssigner.java | 2 +- .../simulate/HistoricalCloningTest.java | 19 +++++++++++++++ 3 files changed, 43 insertions(+), 1 deletion(-) diff --git a/server/src/main/java/org/apache/druid/server/coordinator/duty/HistoricalCloningDuty.java b/server/src/main/java/org/apache/druid/server/coordinator/duty/HistoricalCloningDuty.java index dce2feb77144..c0f6f0d4cac3 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/duty/HistoricalCloningDuty.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/duty/HistoricalCloningDuty.java @@ -1,3 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + package org.apache.druid.server.coordinator.duty; import org.apache.druid.java.util.common.logger.Logger; @@ -24,6 +43,10 @@ public class HistoricalCloningDuty implements CoordinatorDuty public DruidCoordinatorRuntimeParams run(DruidCoordinatorRuntimeParams params) { final Map cloneServers = params.getCoordinatorDynamicConfig().getCloneServers(); + if (cloneServers.isEmpty()) { + return params; + } + final CoordinatorRunStats stats = params.getCoordinatorStats(); // TODO: clean up final Map historicalMap = params.getDruidCluster() diff --git a/server/src/main/java/org/apache/druid/server/coordinator/loading/StrategicSegmentAssigner.java b/server/src/main/java/org/apache/druid/server/coordinator/loading/StrategicSegmentAssigner.java index 2a5825f25bd6..6c912b8734b8 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/loading/StrategicSegmentAssigner.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/loading/StrategicSegmentAssigner.java @@ -446,7 +446,7 @@ private int dropReplicas( for (ServerHolder server : eligibleServers) { if (server.isDecommissioning()) { eligibleDyingServers.add(server); - } else if (!server.isUnmanaged()){ + } else if (!server.isUnmanaged()) { eligibleLiveServers.add(server); } } diff --git a/server/src/test/java/org/apache/druid/server/coordinator/simulate/HistoricalCloningTest.java b/server/src/test/java/org/apache/druid/server/coordinator/simulate/HistoricalCloningTest.java index f6e1dd84ac82..b86184ac9a7b 100644 --- a/server/src/test/java/org/apache/druid/server/coordinator/simulate/HistoricalCloningTest.java +++ b/server/src/test/java/org/apache/druid/server/coordinator/simulate/HistoricalCloningTest.java @@ -1,3 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + package org.apache.druid.server.coordinator.simulate; import org.apache.druid.client.DruidServer; From 8b5cd0b445aa6dd127d38400767a8c53c004db77 Mon Sep 17 00:00:00 2001 From: Adarsh Sanjeev Date: Thu, 3 Apr 2025 22:44:21 +0530 Subject: [PATCH 09/24] Add more tests --- .../coordinator/SegmentCountsPerInterval.java | 2 +- .../duty/HistoricalCloningDuty.java | 28 ++- .../loading/SegmentReplicaCountMap.java | 2 +- .../simulate/HistoricalCloningTest.java | 191 +++++++++++++++++- 4 files changed, 206 insertions(+), 17 deletions(-) diff --git a/server/src/main/java/org/apache/druid/server/coordinator/SegmentCountsPerInterval.java b/server/src/main/java/org/apache/druid/server/coordinator/SegmentCountsPerInterval.java index 90c253a27fde..115c5496a192 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/SegmentCountsPerInterval.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/SegmentCountsPerInterval.java @@ -31,7 +31,7 @@ import java.util.Set; /** - * Maintains a count of segments for each datasource and interval. + * Maintains a set of segments, along with count of segments for each datasource and interval. */ public class SegmentCountsPerInterval { diff --git a/server/src/main/java/org/apache/druid/server/coordinator/duty/HistoricalCloningDuty.java b/server/src/main/java/org/apache/druid/server/coordinator/duty/HistoricalCloningDuty.java index c0f6f0d4cac3..18cf9b00280a 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/duty/HistoricalCloningDuty.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/duty/HistoricalCloningDuty.java @@ -20,6 +20,7 @@ package org.apache.druid.server.coordinator.duty; import org.apache.druid.java.util.common.logger.Logger; +import org.apache.druid.server.coordinator.CoordinatorDynamicConfig; import org.apache.druid.server.coordinator.DruidCoordinatorRuntimeParams; import org.apache.druid.server.coordinator.ServerHolder; import org.apache.druid.server.coordinator.loading.SegmentAction; @@ -29,21 +30,26 @@ import org.apache.druid.server.coordinator.stats.Stats; import org.apache.druid.timeline.DataSegment; -import javax.annotation.Nullable; import java.util.Collection; import java.util.Map; import java.util.stream.Collectors; +/** + * Handles cloning of historicals. Given the historical to historical clone mappings, based on + * {@link CoordinatorDynamicConfig#getCloneServers()}, copies any segments load or unload requests from the source + * historical to the target historical. + */ public class HistoricalCloningDuty implements CoordinatorDuty { private static final Logger log = new Logger(HistoricalCloningDuty.class); - @Nullable @Override public DruidCoordinatorRuntimeParams run(DruidCoordinatorRuntimeParams params) { final Map cloneServers = params.getCoordinatorDynamicConfig().getCloneServers(); + if (cloneServers.isEmpty()) { + // No servers to be cloned. return params; } @@ -63,11 +69,21 @@ public DruidCoordinatorRuntimeParams run(DruidCoordinatorRuntimeParams params) String sourceHistoricalName = entry.getKey(); ServerHolder sourceServer = historicalMap.get(sourceHistoricalName); - String targetHistorical = entry.getValue(); - ServerHolder targetServer = historicalMap.get(targetHistorical); + String targetHistoricalName = entry.getValue(); + ServerHolder targetServer = historicalMap.get(targetHistoricalName); + + if (sourceServer == null) { + log.info("Could not find source historical [%s]", sourceHistoricalName); + continue; + } + + if (targetServer == null) { + log.info("Could not find target historical [%s]", targetHistoricalName); + continue; + } for (DataSegment segment : sourceServer.getProjectedSegments().getSegments()) { - if (!targetServer.getServedSegments().contains(segment)) { + if (!targetServer.getProjectedSegments().getSegments().contains(segment)) { targetServer.getPeon().loadSegment(segment, SegmentAction.LOAD, null); stats.add( Stats.CoordinatorRun.CLONE_LOAD, @@ -78,7 +94,7 @@ public DruidCoordinatorRuntimeParams run(DruidCoordinatorRuntimeParams params) } for (DataSegment segment : targetServer.getProjectedSegments().getSegments()) { - if (!sourceServer.getServedSegments().contains(segment)) { + if (!sourceServer.getProjectedSegments().getSegments().contains(segment)) { targetServer.getPeon().dropSegment(segment, null); stats.add( Stats.CoordinatorRun.CLONE_DROP, diff --git a/server/src/main/java/org/apache/druid/server/coordinator/loading/SegmentReplicaCountMap.java b/server/src/main/java/org/apache/druid/server/coordinator/loading/SegmentReplicaCountMap.java index 31842fcf6a18..86a4b9a8a4ae 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/loading/SegmentReplicaCountMap.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/loading/SegmentReplicaCountMap.java @@ -50,7 +50,7 @@ private void initReplicaCounts(DruidCluster cluster) (tier, historicals) -> historicals.forEach( serverHolder -> { if (serverHolder.isUnmanaged()) { - // Don't manage + // Don't count segments on unmanaged historicals towards replica counts. return; } // Add segments already loaded on this server diff --git a/server/src/test/java/org/apache/druid/server/coordinator/simulate/HistoricalCloningTest.java b/server/src/test/java/org/apache/druid/server/coordinator/simulate/HistoricalCloningTest.java index b86184ac9a7b..031e59f2f678 100644 --- a/server/src/test/java/org/apache/druid/server/coordinator/simulate/HistoricalCloningTest.java +++ b/server/src/test/java/org/apache/druid/server/coordinator/simulate/HistoricalCloningTest.java @@ -34,26 +34,29 @@ public class HistoricalCloningTest extends CoordinatorSimulationBaseTest { + private static final long SIZE_1TB = 1_000_000; + private DruidServer historicalT11; private DruidServer historicalT12; + private DruidServer historicalT13; private final String datasource = TestDataSource.WIKI; - private final List segments = Segments.WIKI_10X1D; @Override public void setUp() { // Setup historicals for 2 tiers, size 10 GB each - historicalT11 = createHistorical(1, Tier.T1, 10_000); - historicalT12 = createHistorical(2, Tier.T1, 10_000); + historicalT11 = createHistorical(1, Tier.T1, SIZE_1TB); + historicalT12 = createHistorical(2, Tier.T1, SIZE_1TB); + historicalT13 = createHistorical(3, Tier.T1, SIZE_1TB); } @Test - public void testCloningHistorical() + public void testSimpleCloning() { final CoordinatorSimulation sim = CoordinatorSimulation.builder() - .withSegments(segments) + .withSegments(Segments.WIKI_10X1D) .withServers(historicalT11, historicalT12) .withRules(datasource, Load.on(Tier.T1, 1).forever()) .withDynamicConfig( @@ -61,7 +64,8 @@ public void testCloningHistorical() Map.of( historicalT11.getHost(), historicalT12.getHost() ) - )) + ).build() + ) .build(); startSimulation(sim); @@ -76,6 +80,8 @@ public void testCloningHistorical() ); runCoordinatorCycle(); + loadQueuedSegments(); + verifyValue( Metric.SUCCESS_ACTIONS, Map.of("server", historicalT11.getName(), "description", "LOAD: NORMAL"), @@ -87,16 +93,184 @@ public void testCloningHistorical() 10L ); + Assert.assertEquals(10, historicalT11.getTotalSegments()); + Assert.assertEquals(10, historicalT12.getTotalSegments()); + } + + @Test + public void testAddingNewHistorical() + { + final CoordinatorSimulation sim = + CoordinatorSimulation.builder() + .withSegments(Segments.WIKI_10X1D) + .withServers(historicalT11, historicalT12) + .withRules(datasource, Load.on(Tier.T1, 1).forever()) + .withDynamicConfig( + withCloneServers( + Map.of( + historicalT11.getHost(), historicalT12.getHost() + ) + ).build() + ) + .build(); + + // Run 1: Current state is a historical and clone already in sync. + Segments.WIKI_10X1D.forEach(segment -> { + historicalT11.addDataSegment(segment); + historicalT12.addDataSegment(segment); + }); + + startSimulation(sim); + + runCoordinatorCycle(); + loadQueuedSegments(); + + // Confirm number of segments. + Assert.assertEquals(10, historicalT11.getTotalSegments()); + Assert.assertEquals(10, historicalT12.getTotalSegments()); + + // Add a new historical. + final DruidServer newHistorical = createHistorical(3, Tier.T1, 10_000); + addServer(newHistorical); + + // Run 2: Let the coordinator balance segments. + runCoordinatorCycle(); + loadQueuedSegments(); + + // Check that segments have been distributed to the new historical and have also been dropped by the clone + Assert.assertEquals(5, historicalT11.getTotalSegments()); + Assert.assertEquals(5, historicalT12.getTotalSegments()); + Assert.assertEquals(5, newHistorical.getTotalSegments()); + verifyValue( + Stats.CoordinatorRun.CLONE_DROP.getMetricName(), + Map.of("server", historicalT12.getName()), + 5L + ); + } + + @Test + public void testCloningServerDisappearsAndRelaunched() + { + final CoordinatorSimulation sim = + CoordinatorSimulation.builder() + .withSegments(Segments.WIKI_10X1D) + .withServers(historicalT11, historicalT12) + .withRules(datasource, Load.on(Tier.T1, 2).forever()) + .withDynamicConfig( + withCloneServers( + Map.of( + historicalT11.getHost(), historicalT12.getHost() + ) + ).build() + ) + .build(); + + startSimulation(sim); + + // Run 1: All segments are loaded. + runCoordinatorCycle(); + loadQueuedSegments(); + Assert.assertEquals(10, historicalT11.getTotalSegments()); + Assert.assertEquals(10, historicalT12.getTotalSegments()); + + // Target server disappears, loses loaded segments. + removeServer(historicalT12); + Segments.WIKI_10X1D.forEach(segment -> historicalT12.removeDataSegment(segment.getId())); + + // Run 2: No change in source historical. + runCoordinatorCycle(); + loadQueuedSegments(); + + Assert.assertEquals(10, historicalT11.getTotalSegments()); + Assert.assertEquals(0, historicalT12.getTotalSegments()); + + // Server readded + addServer(historicalT12); + + // Run 3: Segments recloned. + runCoordinatorCycle(); + loadQueuedSegments(); + + Assert.assertEquals(10, historicalT11.getTotalSegments()); + Assert.assertEquals(10, historicalT12.getTotalSegments()); + verifyValue( + Stats.CoordinatorRun.CLONE_LOAD.getMetricName(), + Map.of("server", historicalT12.getName()), + 10L + ); + + runCoordinatorCycle(); loadQueuedSegments(); + + verifyValue( + Metric.SUCCESS_ACTIONS, + Map.of("server", historicalT12.getName(), "description", "LOAD: TURBO"), + 10L + ); + Assert.assertEquals(10, historicalT11.getTotalSegments()); Assert.assertEquals(10, historicalT12.getTotalSegments()); } + @Test + public void testClonedServerDoesNotFollowReplicationLimit() + { + final CoordinatorSimulation sim = + CoordinatorSimulation.builder() + .withSegments(Segments.WIKI_10X100D) + .withServers(historicalT11) + .withRules(datasource, Load.on(Tier.T1, 1).forever()) + .withDynamicConfig( + withCloneServers( + Map.of( + historicalT11.getHost(), historicalT12.getHost() + ) + ).withReplicationThrottleLimit(10).build() + ) + .build(); + + startSimulation(sim); + Segments.WIKI_10X100D.forEach( + segment -> historicalT11.addDataSegment(segment) + ); + + // Run 1: All segments are loaded on the source historical + runCoordinatorCycle(); + loadQueuedSegments(); + Assert.assertEquals(1000, historicalT11.getTotalSegments()); + Assert.assertEquals(0, historicalT12.getTotalSegments()); + + // Clone server now added. + addServer(historicalT12); + + // Run 2: Assigns all segments to the cloned historical + runCoordinatorCycle(); + loadQueuedSegments(); + + Assert.assertEquals(1000, historicalT11.getTotalSegments()); + Assert.assertEquals(1000, historicalT12.getTotalSegments()); + + verifyValue( + Stats.CoordinatorRun.CLONE_LOAD.getMetricName(), + Map.of("server", historicalT12.getName()), + 1000L + ); + + runCoordinatorCycle(); + loadQueuedSegments(); + + verifyValue( + Metric.SUCCESS_ACTIONS, + Map.of("server", historicalT12.getName(), "description", "LOAD: TURBO"), + 1000L + ); + } + /** * Creates a dynamic config with unlimited load queue, balancing disabled and * the given {@code replicationThrottleLimit}. */ - private CoordinatorDynamicConfig withCloneServers(Map cloneServers) + private CoordinatorDynamicConfig.Builder withCloneServers(Map cloneServers) { final Set unmanagedServers = new HashSet<>(cloneServers.values()); @@ -104,7 +278,6 @@ private CoordinatorDynamicConfig withCloneServers(Map cloneServe .withSmartSegmentLoading(true) .withCloneServers(cloneServers) .withUnmanagedNodes(unmanagedServers) - .withTurboLoadingNodes(unmanagedServers) - .build(); + .withTurboLoadingNodes(unmanagedServers); } } From 5a912610ff30806e4ac8246d27bb515b6fc4e785 Mon Sep 17 00:00:00 2001 From: Adarsh Sanjeev Date: Fri, 4 Apr 2025 12:31:16 +0530 Subject: [PATCH 10/24] More tests --- .../duty/HistoricalCloningDuty.java | 29 +++++--- .../simulate/HistoricalCloningTest.java | 72 ++++++++++++------- 2 files changed, 68 insertions(+), 33 deletions(-) diff --git a/server/src/main/java/org/apache/druid/server/coordinator/duty/HistoricalCloningDuty.java b/server/src/main/java/org/apache/druid/server/coordinator/duty/HistoricalCloningDuty.java index 18cf9b00280a..d88343556e35 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/duty/HistoricalCloningDuty.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/duty/HistoricalCloningDuty.java @@ -47,14 +47,14 @@ public class HistoricalCloningDuty implements CoordinatorDuty public DruidCoordinatorRuntimeParams run(DruidCoordinatorRuntimeParams params) { final Map cloneServers = params.getCoordinatorDynamicConfig().getCloneServers(); + final CoordinatorRunStats stats = params.getCoordinatorStats(); if (cloneServers.isEmpty()) { // No servers to be cloned. return params; } - final CoordinatorRunStats stats = params.getCoordinatorStats(); - // TODO: clean up + // Create a map of host to historical. final Map historicalMap = params.getDruidCluster() .getHistoricals() .values() @@ -66,22 +66,33 @@ public DruidCoordinatorRuntimeParams run(DruidCoordinatorRuntimeParams params) )); for (Map.Entry entry : cloneServers.entrySet()) { - String sourceHistoricalName = entry.getKey(); - ServerHolder sourceServer = historicalMap.get(sourceHistoricalName); + log.debug("Handling cloning for mapping: [%s]", entry); - String targetHistoricalName = entry.getValue(); - ServerHolder targetServer = historicalMap.get(targetHistoricalName); + final String sourceHistoricalName = entry.getKey(); + final ServerHolder sourceServer = historicalMap.get(sourceHistoricalName); if (sourceServer == null) { - log.info("Could not find source historical [%s]", sourceHistoricalName); + log.warn( + "Could not find source historical [%s]. Skipping over clone mapping [%s].", + sourceHistoricalName, + entry + ); continue; } + final String targetHistoricalName = entry.getValue(); + final ServerHolder targetServer = historicalMap.get(targetHistoricalName); + if (targetServer == null) { - log.info("Could not find target historical [%s]", targetHistoricalName); + log.warn( + "Could not find target historical [%s]. Skipping over clone mapping [%s].", + targetHistoricalName, + entry + ); continue; } + // Load any segments missing in the clone target. for (DataSegment segment : sourceServer.getProjectedSegments().getSegments()) { if (!targetServer.getProjectedSegments().getSegments().contains(segment)) { targetServer.getPeon().loadSegment(segment, SegmentAction.LOAD, null); @@ -93,6 +104,7 @@ public DruidCoordinatorRuntimeParams run(DruidCoordinatorRuntimeParams params) } } + // Drop any segments missing from the clone source. for (DataSegment segment : targetServer.getProjectedSegments().getSegments()) { if (!sourceServer.getProjectedSegments().getSegments().contains(segment)) { targetServer.getPeon().dropSegment(segment, null); @@ -104,6 +116,7 @@ public DruidCoordinatorRuntimeParams run(DruidCoordinatorRuntimeParams params) } } } + return params; } } diff --git a/server/src/test/java/org/apache/druid/server/coordinator/simulate/HistoricalCloningTest.java b/server/src/test/java/org/apache/druid/server/coordinator/simulate/HistoricalCloningTest.java index 031e59f2f678..d6d3789a07d3 100644 --- a/server/src/test/java/org/apache/druid/server/coordinator/simulate/HistoricalCloningTest.java +++ b/server/src/test/java/org/apache/druid/server/coordinator/simulate/HistoricalCloningTest.java @@ -23,12 +23,10 @@ import org.apache.druid.segment.TestDataSource; import org.apache.druid.server.coordinator.CoordinatorDynamicConfig; import org.apache.druid.server.coordinator.stats.Stats; -import org.apache.druid.timeline.DataSegment; import org.junit.Assert; import org.junit.Test; import java.util.HashSet; -import java.util.List; import java.util.Map; import java.util.Set; @@ -66,11 +64,11 @@ public void testSimpleCloning() ) ).build() ) + .withImmediateSegmentLoading(true) .build(); startSimulation(sim); runCoordinatorCycle(); - loadQueuedSegments(); verifyValue(Metric.ASSIGNED_COUNT, 10L); verifyValue( @@ -78,10 +76,6 @@ public void testSimpleCloning() Map.of("server", historicalT12.getName()), 10L ); - - runCoordinatorCycle(); - loadQueuedSegments(); - verifyValue( Metric.SUCCESS_ACTIONS, Map.of("server", historicalT11.getName(), "description", "LOAD: NORMAL"), @@ -112,6 +106,7 @@ public void testAddingNewHistorical() ) ).build() ) + .withImmediateSegmentLoading(true) .build(); // Run 1: Current state is a historical and clone already in sync. @@ -123,7 +118,6 @@ public void testAddingNewHistorical() startSimulation(sim); runCoordinatorCycle(); - loadQueuedSegments(); // Confirm number of segments. Assert.assertEquals(10, historicalT11.getTotalSegments()); @@ -135,7 +129,6 @@ public void testAddingNewHistorical() // Run 2: Let the coordinator balance segments. runCoordinatorCycle(); - loadQueuedSegments(); // Check that segments have been distributed to the new historical and have also been dropped by the clone Assert.assertEquals(5, historicalT11.getTotalSegments()); @@ -163,13 +156,13 @@ public void testCloningServerDisappearsAndRelaunched() ) ).build() ) + .withImmediateSegmentLoading(true) .build(); startSimulation(sim); // Run 1: All segments are loaded. runCoordinatorCycle(); - loadQueuedSegments(); Assert.assertEquals(10, historicalT11.getTotalSegments()); Assert.assertEquals(10, historicalT12.getTotalSegments()); @@ -179,7 +172,6 @@ public void testCloningServerDisappearsAndRelaunched() // Run 2: No change in source historical. runCoordinatorCycle(); - loadQueuedSegments(); Assert.assertEquals(10, historicalT11.getTotalSegments()); Assert.assertEquals(0, historicalT12.getTotalSegments()); @@ -189,7 +181,6 @@ public void testCloningServerDisappearsAndRelaunched() // Run 3: Segments recloned. runCoordinatorCycle(); - loadQueuedSegments(); Assert.assertEquals(10, historicalT11.getTotalSegments()); Assert.assertEquals(10, historicalT12.getTotalSegments()); @@ -198,10 +189,6 @@ public void testCloningServerDisappearsAndRelaunched() Map.of("server", historicalT12.getName()), 10L ); - - runCoordinatorCycle(); - loadQueuedSegments(); - verifyValue( Metric.SUCCESS_ACTIONS, Map.of("server", historicalT12.getName(), "description", "LOAD: TURBO"), @@ -227,16 +214,14 @@ public void testClonedServerDoesNotFollowReplicationLimit() ) ).withReplicationThrottleLimit(10).build() ) + .withImmediateSegmentLoading(true) .build(); + Segments.WIKI_10X100D.forEach(segment -> historicalT11.addDataSegment(segment)); startSimulation(sim); - Segments.WIKI_10X100D.forEach( - segment -> historicalT11.addDataSegment(segment) - ); // Run 1: All segments are loaded on the source historical runCoordinatorCycle(); - loadQueuedSegments(); Assert.assertEquals(1000, historicalT11.getTotalSegments()); Assert.assertEquals(0, historicalT12.getTotalSegments()); @@ -245,7 +230,6 @@ public void testClonedServerDoesNotFollowReplicationLimit() // Run 2: Assigns all segments to the cloned historical runCoordinatorCycle(); - loadQueuedSegments(); Assert.assertEquals(1000, historicalT11.getTotalSegments()); Assert.assertEquals(1000, historicalT12.getTotalSegments()); @@ -256,9 +240,6 @@ public void testClonedServerDoesNotFollowReplicationLimit() 1000L ); - runCoordinatorCycle(); - loadQueuedSegments(); - verifyValue( Metric.SUCCESS_ACTIONS, Map.of("server", historicalT12.getName(), "description", "LOAD: TURBO"), @@ -266,6 +247,47 @@ public void testClonedServerDoesNotFollowReplicationLimit() ); } + @Test + public void testCloningHistoricalWithReplicationLimit() + { + final CoordinatorSimulation sim = + CoordinatorSimulation.builder() + .withSegments(Segments.WIKI_10X1D) + .withServers(historicalT11, historicalT12, historicalT13) + .withRules(datasource, Load.on(Tier.T1, 2).forever()) + .withImmediateSegmentLoading(true) + .withDynamicConfig( + withCloneServers(Map.of(historicalT11.getHost(), historicalT12.getHost())) + .withReplicationThrottleLimit(2) + .withMaxSegmentsToMove(0) + .build() + ) + .withImmediateSegmentLoading(true) + .build(); + Segments.WIKI_10X1D.forEach(historicalT13::addDataSegment); + startSimulation(sim); + + // Check that only replication count segments are loaded each run and that the cloning server copies it. + while (historicalT11.getTotalSegments() < Segments.WIKI_10X1D.size()) { + runCoordinatorCycle(); + + // Check that all segments are cloned. + Assert.assertEquals(historicalT11.getTotalSegments(), historicalT12.getTotalSegments()); + + // Check that the replication throttling is respected. + verifyValue(Metric.ASSIGNED_COUNT, 2L); + verifyValue( + Stats.CoordinatorRun.CLONE_LOAD.getMetricName(), + Map.of("server", historicalT12.getName()), + 2L + ); + } + + Assert.assertEquals(10, historicalT11.getTotalSegments()); + Assert.assertEquals(10, historicalT12.getTotalSegments()); + Assert.assertEquals(10, historicalT13.getTotalSegments()); + } + /** * Creates a dynamic config with unlimited load queue, balancing disabled and * the given {@code replicationThrottleLimit}. @@ -275,7 +297,7 @@ private CoordinatorDynamicConfig.Builder withCloneServers(Map cl final Set unmanagedServers = new HashSet<>(cloneServers.values()); return CoordinatorDynamicConfig.builder() - .withSmartSegmentLoading(true) + .withSmartSegmentLoading(false) .withCloneServers(cloneServers) .withUnmanagedNodes(unmanagedServers) .withTurboLoadingNodes(unmanagedServers); From 9fafbfb873b0fb1eeb48755820d7d6353af64276 Mon Sep 17 00:00:00 2001 From: Adarsh Sanjeev Date: Fri, 4 Apr 2025 13:33:32 +0530 Subject: [PATCH 11/24] Clean up --- .../server/coordinator/DruidCoordinator.java | 4 +- .../server/coordinator/ServerHolder.java | 3 + ...CloningDuty.java => CloneHistoricals.java} | 8 +- .../loading/StrategicSegmentAssigner.java | 1 + .../druid/server/coordinator/stats/Stats.java | 10 ++- .../simulate/HistoricalCloningTest.java | 83 ++++++++----------- 6 files changed, 51 insertions(+), 58 deletions(-) rename server/src/main/java/org/apache/druid/server/coordinator/duty/{HistoricalCloningDuty.java => CloneHistoricals.java} (95%) diff --git a/server/src/main/java/org/apache/druid/server/coordinator/DruidCoordinator.java b/server/src/main/java/org/apache/druid/server/coordinator/DruidCoordinator.java index d5aadc249cbc..13173366e6fd 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/DruidCoordinator.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/DruidCoordinator.java @@ -69,7 +69,7 @@ import org.apache.druid.server.coordinator.duty.CoordinatorDuty; import org.apache.druid.server.coordinator.duty.CoordinatorDutyGroup; import org.apache.druid.server.coordinator.duty.DutyGroupStatus; -import org.apache.druid.server.coordinator.duty.HistoricalCloningDuty; +import org.apache.druid.server.coordinator.duty.CloneHistoricals; import org.apache.druid.server.coordinator.duty.KillAuditLog; import org.apache.druid.server.coordinator.duty.KillCompactionConfig; import org.apache.druid.server.coordinator.duty.KillDatasourceMetadata; @@ -559,7 +559,7 @@ private List makeHistoricalManagementDuties() new MarkOvershadowedSegmentsAsUnused(deleteSegments), new MarkEternityTombstonesAsUnused(deleteSegments), new BalanceSegments(config.getCoordinatorPeriod()), - new HistoricalCloningDuty(), + new CloneHistoricals(), new CollectLoadQueueStats() ); } diff --git a/server/src/main/java/org/apache/druid/server/coordinator/ServerHolder.java b/server/src/main/java/org/apache/druid/server/coordinator/ServerHolder.java index d6b4d73816d8..8e68c9d270e2 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/ServerHolder.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/ServerHolder.java @@ -227,6 +227,9 @@ public boolean isDecommissioning() return isDecommissioning; } + /** + * Returns true if this server is unmanaged and should not participate in segment assignment, drop or balancing. + */ public boolean isUnmanaged() { return isUnmanaged; diff --git a/server/src/main/java/org/apache/druid/server/coordinator/duty/HistoricalCloningDuty.java b/server/src/main/java/org/apache/druid/server/coordinator/duty/CloneHistoricals.java similarity index 95% rename from server/src/main/java/org/apache/druid/server/coordinator/duty/HistoricalCloningDuty.java rename to server/src/main/java/org/apache/druid/server/coordinator/duty/CloneHistoricals.java index d88343556e35..125ffd2252c4 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/duty/HistoricalCloningDuty.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/duty/CloneHistoricals.java @@ -39,9 +39,9 @@ * {@link CoordinatorDynamicConfig#getCloneServers()}, copies any segments load or unload requests from the source * historical to the target historical. */ -public class HistoricalCloningDuty implements CoordinatorDuty +public class CloneHistoricals implements CoordinatorDuty { - private static final Logger log = new Logger(HistoricalCloningDuty.class); + private static final Logger log = new Logger(CloneHistoricals.class); @Override public DruidCoordinatorRuntimeParams run(DruidCoordinatorRuntimeParams params) @@ -97,7 +97,7 @@ public DruidCoordinatorRuntimeParams run(DruidCoordinatorRuntimeParams params) if (!targetServer.getProjectedSegments().getSegments().contains(segment)) { targetServer.getPeon().loadSegment(segment, SegmentAction.LOAD, null); stats.add( - Stats.CoordinatorRun.CLONE_LOAD, + Stats.Segments.CLONE_LOAD, RowKey.of(Dimension.SERVER, targetServer.getServer().getHost()), 1L ); @@ -109,7 +109,7 @@ public DruidCoordinatorRuntimeParams run(DruidCoordinatorRuntimeParams params) if (!sourceServer.getProjectedSegments().getSegments().contains(segment)) { targetServer.getPeon().dropSegment(segment, null); stats.add( - Stats.CoordinatorRun.CLONE_DROP, + Stats.Segments.CLONE_DROP, RowKey.of(Dimension.SERVER, targetServer.getServer().getHost()), 1L ); diff --git a/server/src/main/java/org/apache/druid/server/coordinator/loading/StrategicSegmentAssigner.java b/server/src/main/java/org/apache/druid/server/coordinator/loading/StrategicSegmentAssigner.java index 6c912b8734b8..75febd060934 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/loading/StrategicSegmentAssigner.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/loading/StrategicSegmentAssigner.java @@ -447,6 +447,7 @@ private int dropReplicas( if (server.isDecommissioning()) { eligibleDyingServers.add(server); } else if (!server.isUnmanaged()) { + // Do not assign or drop segments if the server is unmanaged eligibleLiveServers.add(server); } } diff --git a/server/src/main/java/org/apache/druid/server/coordinator/stats/Stats.java b/server/src/main/java/org/apache/druid/server/coordinator/stats/Stats.java index 9108de7d2977..39af81ec7c34 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/stats/Stats.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/stats/Stats.java @@ -65,6 +65,12 @@ public static class Segments // Values computed in a run public static final CoordinatorStat REPLICATION_THROTTLE_LIMIT = CoordinatorStat.toDebugOnly("replicationThrottleLimit"); + + // Cloned segments in a run + public static final CoordinatorStat CLONE_LOAD + = CoordinatorStat.toDebugAndEmit("cloneLoad", "segment/clone/assigned/count"); + public static final CoordinatorStat CLONE_DROP + = CoordinatorStat.toDebugAndEmit("cloneDrop", "segment/clone/dropped/count"); } public static class SegmentQueue @@ -137,10 +143,6 @@ public static class CoordinatorRun = CoordinatorStat.toDebugAndEmit("dutyRunTime", "coordinator/time"); public static final CoordinatorStat GROUP_RUN_TIME = CoordinatorStat.toDebugAndEmit("groupRunTime", "coordinator/global/time"); - public static final CoordinatorStat CLONE_LOAD - = CoordinatorStat.toDebugAndEmit("cloneLoad", "coordinator/clone/load"); - public static final CoordinatorStat CLONE_DROP - = CoordinatorStat.toDebugAndEmit("cloneDrop", "coordinator/clone/drop"); } public static class Kill diff --git a/server/src/test/java/org/apache/druid/server/coordinator/simulate/HistoricalCloningTest.java b/server/src/test/java/org/apache/druid/server/coordinator/simulate/HistoricalCloningTest.java index d6d3789a07d3..f8232f7e4de8 100644 --- a/server/src/test/java/org/apache/druid/server/coordinator/simulate/HistoricalCloningTest.java +++ b/server/src/test/java/org/apache/druid/server/coordinator/simulate/HistoricalCloningTest.java @@ -26,7 +26,6 @@ import org.junit.Assert; import org.junit.Test; -import java.util.HashSet; import java.util.Map; import java.util.Set; @@ -58,11 +57,11 @@ public void testSimpleCloning() .withServers(historicalT11, historicalT12) .withRules(datasource, Load.on(Tier.T1, 1).forever()) .withDynamicConfig( - withCloneServers( - Map.of( - historicalT11.getHost(), historicalT12.getHost() - ) - ).build() + CoordinatorDynamicConfig.builder() + .withCloneServers(Map.of(historicalT11.getHost(), historicalT12.getHost())) + .withUnmanagedNodes(Set.of(historicalT12.getHost())) + .withSmartSegmentLoading(false) + .build() ) .withImmediateSegmentLoading(true) .build(); @@ -72,7 +71,7 @@ public void testSimpleCloning() verifyValue(Metric.ASSIGNED_COUNT, 10L); verifyValue( - Stats.CoordinatorRun.CLONE_LOAD.getMetricName(), + Stats.Segments.CLONE_LOAD.getMetricName(), Map.of("server", historicalT12.getName()), 10L ); @@ -83,7 +82,7 @@ public void testSimpleCloning() ); verifyValue( Metric.SUCCESS_ACTIONS, - Map.of("server", historicalT12.getName(), "description", "LOAD: TURBO"), + Map.of("server", historicalT12.getName(), "description", "LOAD: NORMAL"), 10L ); @@ -100,11 +99,11 @@ public void testAddingNewHistorical() .withServers(historicalT11, historicalT12) .withRules(datasource, Load.on(Tier.T1, 1).forever()) .withDynamicConfig( - withCloneServers( - Map.of( - historicalT11.getHost(), historicalT12.getHost() - ) - ).build() + CoordinatorDynamicConfig.builder() + .withCloneServers(Map.of(historicalT11.getHost(), historicalT12.getHost())) + .withUnmanagedNodes(Set.of(historicalT12.getHost())) + .withSmartSegmentLoading(false) + .build() ) .withImmediateSegmentLoading(true) .build(); @@ -135,7 +134,7 @@ public void testAddingNewHistorical() Assert.assertEquals(5, historicalT12.getTotalSegments()); Assert.assertEquals(5, newHistorical.getTotalSegments()); verifyValue( - Stats.CoordinatorRun.CLONE_DROP.getMetricName(), + Stats.Segments.CLONE_DROP.getMetricName(), Map.of("server", historicalT12.getName()), 5L ); @@ -150,11 +149,11 @@ public void testCloningServerDisappearsAndRelaunched() .withServers(historicalT11, historicalT12) .withRules(datasource, Load.on(Tier.T1, 2).forever()) .withDynamicConfig( - withCloneServers( - Map.of( - historicalT11.getHost(), historicalT12.getHost() - ) - ).build() + CoordinatorDynamicConfig.builder() + .withCloneServers(Map.of(historicalT11.getHost(), historicalT12.getHost())) + .withUnmanagedNodes(Set.of(historicalT12.getHost())) + .withSmartSegmentLoading(false) + .build() ) .withImmediateSegmentLoading(true) .build(); @@ -185,13 +184,13 @@ public void testCloningServerDisappearsAndRelaunched() Assert.assertEquals(10, historicalT11.getTotalSegments()); Assert.assertEquals(10, historicalT12.getTotalSegments()); verifyValue( - Stats.CoordinatorRun.CLONE_LOAD.getMetricName(), + Stats.Segments.CLONE_LOAD.getMetricName(), Map.of("server", historicalT12.getName()), 10L ); verifyValue( Metric.SUCCESS_ACTIONS, - Map.of("server", historicalT12.getName(), "description", "LOAD: TURBO"), + Map.of("server", historicalT12.getName(), "description", "LOAD: NORMAL"), 10L ); @@ -208,11 +207,11 @@ public void testClonedServerDoesNotFollowReplicationLimit() .withServers(historicalT11) .withRules(datasource, Load.on(Tier.T1, 1).forever()) .withDynamicConfig( - withCloneServers( - Map.of( - historicalT11.getHost(), historicalT12.getHost() - ) - ).withReplicationThrottleLimit(10).build() + CoordinatorDynamicConfig.builder() + .withCloneServers(Map.of(historicalT11.getHost(), historicalT12.getHost())) + .withUnmanagedNodes(Set.of(historicalT12.getHost())) + .withSmartSegmentLoading(false) + .build() ) .withImmediateSegmentLoading(true) .build(); @@ -235,14 +234,14 @@ public void testClonedServerDoesNotFollowReplicationLimit() Assert.assertEquals(1000, historicalT12.getTotalSegments()); verifyValue( - Stats.CoordinatorRun.CLONE_LOAD.getMetricName(), + Stats.Segments.CLONE_LOAD.getMetricName(), Map.of("server", historicalT12.getName()), 1000L ); verifyValue( Metric.SUCCESS_ACTIONS, - Map.of("server", historicalT12.getName(), "description", "LOAD: TURBO"), + Map.of("server", historicalT12.getName(), "description", "LOAD: NORMAL"), 1000L ); } @@ -257,10 +256,13 @@ public void testCloningHistoricalWithReplicationLimit() .withRules(datasource, Load.on(Tier.T1, 2).forever()) .withImmediateSegmentLoading(true) .withDynamicConfig( - withCloneServers(Map.of(historicalT11.getHost(), historicalT12.getHost())) - .withReplicationThrottleLimit(2) - .withMaxSegmentsToMove(0) - .build() + CoordinatorDynamicConfig.builder() + .withCloneServers(Map.of(historicalT11.getHost(), historicalT12.getHost())) + .withUnmanagedNodes(Set.of(historicalT12.getHost())) + .withSmartSegmentLoading(false) + .withReplicationThrottleLimit(2) + .withMaxSegmentsToMove(0) + .build() ) .withImmediateSegmentLoading(true) .build(); @@ -277,7 +279,7 @@ public void testCloningHistoricalWithReplicationLimit() // Check that the replication throttling is respected. verifyValue(Metric.ASSIGNED_COUNT, 2L); verifyValue( - Stats.CoordinatorRun.CLONE_LOAD.getMetricName(), + Stats.Segments.CLONE_LOAD.getMetricName(), Map.of("server", historicalT12.getName()), 2L ); @@ -287,19 +289,4 @@ public void testCloningHistoricalWithReplicationLimit() Assert.assertEquals(10, historicalT12.getTotalSegments()); Assert.assertEquals(10, historicalT13.getTotalSegments()); } - - /** - * Creates a dynamic config with unlimited load queue, balancing disabled and - * the given {@code replicationThrottleLimit}. - */ - private CoordinatorDynamicConfig.Builder withCloneServers(Map cloneServers) - { - final Set unmanagedServers = new HashSet<>(cloneServers.values()); - - return CoordinatorDynamicConfig.builder() - .withSmartSegmentLoading(false) - .withCloneServers(cloneServers) - .withUnmanagedNodes(unmanagedServers) - .withTurboLoadingNodes(unmanagedServers); - } } From 0ff4287016a7dcc94b70eb7c5075783ed9e7534a Mon Sep 17 00:00:00 2001 From: Adarsh Sanjeev Date: Fri, 4 Apr 2025 13:50:42 +0530 Subject: [PATCH 12/24] Refactor projected segments --- .../server/coordinator/DruidCoordinator.java | 2 +- .../coordinator/SegmentCountsPerInterval.java | 12 +------ .../server/coordinator/ServerHolder.java | 31 ++++++++++++++----- .../balancer/CostBalancerStrategy.java | 2 +- .../balancer/SegmentToMoveCalculator.java | 14 ++++----- .../balancer/TierSegmentBalancer.java | 2 +- .../coordinator/duty/CloneHistoricals.java | 8 ++--- 7 files changed, 38 insertions(+), 33 deletions(-) diff --git a/server/src/main/java/org/apache/druid/server/coordinator/DruidCoordinator.java b/server/src/main/java/org/apache/druid/server/coordinator/DruidCoordinator.java index 13173366e6fd..c01c6c3099fe 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/DruidCoordinator.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/DruidCoordinator.java @@ -63,13 +63,13 @@ import org.apache.druid.server.coordinator.config.DruidCoordinatorConfig; import org.apache.druid.server.coordinator.config.KillUnusedSegmentsConfig; import org.apache.druid.server.coordinator.duty.BalanceSegments; +import org.apache.druid.server.coordinator.duty.CloneHistoricals; import org.apache.druid.server.coordinator.duty.CompactSegments; import org.apache.druid.server.coordinator.duty.CoordinatorCustomDutyGroup; import org.apache.druid.server.coordinator.duty.CoordinatorCustomDutyGroups; import org.apache.druid.server.coordinator.duty.CoordinatorDuty; import org.apache.druid.server.coordinator.duty.CoordinatorDutyGroup; import org.apache.druid.server.coordinator.duty.DutyGroupStatus; -import org.apache.druid.server.coordinator.duty.CloneHistoricals; import org.apache.druid.server.coordinator.duty.KillAuditLog; import org.apache.druid.server.coordinator.duty.KillCompactionConfig; import org.apache.druid.server.coordinator.duty.KillDatasourceMetadata; diff --git a/server/src/main/java/org/apache/druid/server/coordinator/SegmentCountsPerInterval.java b/server/src/main/java/org/apache/druid/server/coordinator/SegmentCountsPerInterval.java index 115c5496a192..a9767b8f7682 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/SegmentCountsPerInterval.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/SegmentCountsPerInterval.java @@ -26,12 +26,10 @@ import org.joda.time.Interval; import java.util.HashMap; -import java.util.HashSet; import java.util.Map; -import java.util.Set; /** - * Maintains a set of segments, along with count of segments for each datasource and interval. + * Maintains a count of segments for each datasource and interval. */ public class SegmentCountsPerInterval { @@ -40,20 +38,17 @@ public class SegmentCountsPerInterval private final Map> datasourceIntervalToSegmentCount = new HashMap<>(); private final Object2IntMap intervalToTotalSegmentCount = new Object2IntOpenHashMap<>(); private final Object2IntMap datasourceToTotalSegmentCount = new Object2IntOpenHashMap<>(); - private final Set segments = new HashSet<>(); public void addSegment(DataSegment segment) { updateCountInInterval(segment, 1); totalSegmentBytes += segment.getSize(); - segments.add(segment); } public void removeSegment(DataSegment segment) { updateCountInInterval(segment, -1); totalSegmentBytes -= segment.getSize(); - segments.remove(segment); } public int getTotalSegmentCount() @@ -66,11 +61,6 @@ public long getTotalSegmentBytes() return totalSegmentBytes; } - public Set getSegments() - { - return segments; - } - public Object2IntMap getDatasourceToTotalSegmentCount() { return datasourceToTotalSegmentCount; diff --git a/server/src/main/java/org/apache/druid/server/coordinator/ServerHolder.java b/server/src/main/java/org/apache/druid/server/coordinator/ServerHolder.java index 8e68c9d270e2..561b29fd9827 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/ServerHolder.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/ServerHolder.java @@ -32,9 +32,11 @@ import java.util.Collection; import java.util.Comparator; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Objects; +import java.util.Set; import java.util.concurrent.atomic.AtomicInteger; /** @@ -74,7 +76,7 @@ public class ServerHolder implements Comparable */ private final Map queuedSegments = new HashMap<>(); - private final SegmentCountsPerInterval projectedSegments = new SegmentCountsPerInterval(); + private final SegmentCountsPerInterval projectedSegmentCounts = new SegmentCountsPerInterval(); public ServerHolder(ImmutableDruidServer server, LoadQueuePeon peon) { @@ -142,7 +144,7 @@ private void initializeQueuedSegments( ) { for (DataSegment segment : server.iterateAllSegments()) { - projectedSegments.addSegment(segment); + projectedSegmentCounts.addSegment(segment); } final List expiredSegments = new ArrayList<>(); @@ -287,11 +289,24 @@ public Map getQueuedSegments() } /** - * Segments that are expected to be loaded on this server once all the + * Counts for segments that are expected to be loaded on this server once all the * operations in progress have completed. */ - public SegmentCountsPerInterval getProjectedSegments() + public SegmentCountsPerInterval getProjectedSegmentCounts() { + return projectedSegmentCounts; + } + + public Set getProjectedSegments() + { + final Set projectedSegments = new HashSet<>(getServedSegments()); + queuedSegments.forEach((segment, action) -> { + if (action.isLoad()) { + projectedSegments.add(segment); + } else { + projectedSegments.remove(segment); + } + }); return projectedSegments; } @@ -416,10 +431,10 @@ private void addToQueuedSegments(DataSegment segment, SegmentAction action) // Add to projected if load is started, remove from projected if drop has started if (action.isLoad()) { - projectedSegments.addSegment(segment); + projectedSegmentCounts.addSegment(segment); sizeOfLoadingSegments += segment.getSize(); } else { - projectedSegments.removeSegment(segment); + projectedSegmentCounts.removeSegment(segment); if (action == SegmentAction.DROP) { sizeOfDroppingSegments += segment.getSize(); } @@ -433,10 +448,10 @@ private void removeFromQueuedSegments(DataSegment segment, SegmentAction action) queuedSegments.remove(segment); if (action.isLoad()) { - projectedSegments.removeSegment(segment); + projectedSegmentCounts.removeSegment(segment); sizeOfLoadingSegments -= segment.getSize(); } else { - projectedSegments.addSegment(segment); + projectedSegmentCounts.addSegment(segment); if (action == SegmentAction.DROP) { sizeOfDroppingSegments -= segment.getSize(); } diff --git a/server/src/main/java/org/apache/druid/server/coordinator/balancer/CostBalancerStrategy.java b/server/src/main/java/org/apache/druid/server/coordinator/balancer/CostBalancerStrategy.java index 96a6ccccf5cb..d9c883bf6c53 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/balancer/CostBalancerStrategy.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/balancer/CostBalancerStrategy.java @@ -284,7 +284,7 @@ protected double computePlacementCost(DataSegment proposalSegment, ServerHolder // Compute number of segments in each interval final Object2IntOpenHashMap intervalToSegmentCount = new Object2IntOpenHashMap<>(); - final SegmentCountsPerInterval projectedSegments = server.getProjectedSegments(); + final SegmentCountsPerInterval projectedSegments = server.getProjectedSegmentCounts(); projectedSegments.getIntervalToTotalSegmentCount().object2IntEntrySet().forEach(entry -> { final Interval interval = entry.getKey(); if (costComputeInterval.overlaps(interval)) { diff --git a/server/src/main/java/org/apache/druid/server/coordinator/balancer/SegmentToMoveCalculator.java b/server/src/main/java/org/apache/druid/server/coordinator/balancer/SegmentToMoveCalculator.java index 6a1c6199911c..23db0ac22956 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/balancer/SegmentToMoveCalculator.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/balancer/SegmentToMoveCalculator.java @@ -71,7 +71,7 @@ public static int computeNumSegmentsToMoveInTier( ) { final int totalSegments = historicals.stream().mapToInt( - server -> server.getProjectedSegments().getTotalSegmentCount() + server -> server.getProjectedSegmentCounts().getTotalSegmentCount() ).sum(); // Move at least some segments to ensure that the cluster is always balancing itself @@ -187,8 +187,8 @@ private static double getAverageSegmentSize(List servers) int totalSegmentCount = 0; long totalUsageBytes = 0; for (ServerHolder server : servers) { - totalSegmentCount += server.getProjectedSegments().getTotalSegmentCount(); - totalUsageBytes += server.getProjectedSegments().getTotalSegmentBytes(); + totalSegmentCount += server.getProjectedSegmentCounts().getTotalSegmentCount(); + totalUsageBytes += server.getProjectedSegmentCounts().getTotalSegmentBytes(); } if (totalSegmentCount <= 0 || totalUsageBytes <= 0) { @@ -209,7 +209,7 @@ static int computeSegmentsToMoveToBalanceCountsPerDatasource( { // Find all the datasources final Set datasources = servers.stream().flatMap( - s -> s.getProjectedSegments().getDatasourceToTotalSegmentCount().keySet().stream() + s -> s.getProjectedSegmentCounts().getDatasourceToTotalSegmentCount().keySet().stream() ).collect(Collectors.toSet()); if (datasources.isEmpty()) { return 0; @@ -220,7 +220,7 @@ static int computeSegmentsToMoveToBalanceCountsPerDatasource( final Object2IntMap datasourceToMinSegments = new Object2IntOpenHashMap<>(); for (ServerHolder server : servers) { final Object2IntMap datasourceToSegmentCount - = server.getProjectedSegments().getDatasourceToTotalSegmentCount(); + = server.getProjectedSegmentCounts().getDatasourceToTotalSegmentCount(); for (String datasource : datasources) { int count = datasourceToSegmentCount.getInt(datasource); datasourceToMaxSegments.mergeInt(datasource, count, Math::max); @@ -243,7 +243,7 @@ static int computeSegmentsToMoveToBalanceCountsPerDatasource( int minNumSegments = Integer.MAX_VALUE; int maxNumSegments = 0; for (ServerHolder server : servers) { - int countForSkewedDatasource = server.getProjectedSegments() + int countForSkewedDatasource = server.getProjectedSegmentCounts() .getDatasourceToTotalSegmentCount() .getInt(mostUnbalancedDatasource); @@ -276,7 +276,7 @@ private static int computeSegmentsToMoveToBalanceDiskUsage( long maxUsageBytes = 0; long minUsageBytes = Long.MAX_VALUE; for (ServerHolder server : servers) { - final SegmentCountsPerInterval projectedSegments = server.getProjectedSegments(); + final SegmentCountsPerInterval projectedSegments = server.getProjectedSegmentCounts(); // Track the maximum and minimum values long serverUsageBytes = projectedSegments.getTotalSegmentBytes(); diff --git a/server/src/main/java/org/apache/druid/server/coordinator/balancer/TierSegmentBalancer.java b/server/src/main/java/org/apache/druid/server/coordinator/balancer/TierSegmentBalancer.java index 6df273be57ef..180220eb5fe6 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/balancer/TierSegmentBalancer.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/balancer/TierSegmentBalancer.java @@ -207,7 +207,7 @@ private int getNumDecommSegmentsToMove(int maxSegmentsToMove) return 0; } else { final int decommSegmentsToMove = decommissioningServers.stream().mapToInt( - server -> server.getProjectedSegments().getTotalSegmentCount() + server -> server.getProjectedSegmentCounts().getTotalSegmentCount() ).sum(); return Math.min(decommSegmentsToMove, maxSegmentsToMove); } diff --git a/server/src/main/java/org/apache/druid/server/coordinator/duty/CloneHistoricals.java b/server/src/main/java/org/apache/druid/server/coordinator/duty/CloneHistoricals.java index 125ffd2252c4..9d958088f4fc 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/duty/CloneHistoricals.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/duty/CloneHistoricals.java @@ -93,8 +93,8 @@ public DruidCoordinatorRuntimeParams run(DruidCoordinatorRuntimeParams params) } // Load any segments missing in the clone target. - for (DataSegment segment : sourceServer.getProjectedSegments().getSegments()) { - if (!targetServer.getProjectedSegments().getSegments().contains(segment)) { + for (DataSegment segment : sourceServer.getProjectedSegments()) { + if (!targetServer.getProjectedSegments().contains(segment)) { targetServer.getPeon().loadSegment(segment, SegmentAction.LOAD, null); stats.add( Stats.Segments.CLONE_LOAD, @@ -105,8 +105,8 @@ public DruidCoordinatorRuntimeParams run(DruidCoordinatorRuntimeParams params) } // Drop any segments missing from the clone source. - for (DataSegment segment : targetServer.getProjectedSegments().getSegments()) { - if (!sourceServer.getProjectedSegments().getSegments().contains(segment)) { + for (DataSegment segment : targetServer.getProjectedSegments()) { + if (!sourceServer.getProjectedSegments().contains(segment)) { targetServer.getPeon().dropSegment(segment, null); stats.add( Stats.Segments.CLONE_DROP, From 5d54864cd82f8f140e3ee3bfd45e1e16b5620d8e Mon Sep 17 00:00:00 2001 From: Adarsh Sanjeev Date: Fri, 4 Apr 2025 15:19:07 +0530 Subject: [PATCH 13/24] Refactor to use load queue manager --- .../DruidCoordinatorRuntimeParams.java | 20 ++++++++++--- .../coordinator/duty/CloneHistoricals.java | 28 +++++++++++-------- 2 files changed, 32 insertions(+), 16 deletions(-) diff --git a/server/src/main/java/org/apache/druid/server/coordinator/DruidCoordinatorRuntimeParams.java b/server/src/main/java/org/apache/druid/server/coordinator/DruidCoordinatorRuntimeParams.java index 576b2155ac7e..43e76295438b 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/DruidCoordinatorRuntimeParams.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/DruidCoordinatorRuntimeParams.java @@ -54,6 +54,7 @@ public class DruidCoordinatorRuntimeParams private final CoordinatorRunStats stats; private final BalancerStrategy balancerStrategy; private final Set broadcastDatasources; + private final SegmentLoadQueueManager loadQueueManager; private DruidCoordinatorRuntimeParams( DruidCluster druidCluster, @@ -65,7 +66,8 @@ private DruidCoordinatorRuntimeParams( SegmentLoadingConfig segmentLoadingConfig, CoordinatorRunStats stats, BalancerStrategy balancerStrategy, - Set broadcastDatasources + Set broadcastDatasources, + SegmentLoadQueueManager loadQueueManager ) { this.druidCluster = druidCluster; @@ -78,6 +80,7 @@ private DruidCoordinatorRuntimeParams( this.stats = stats; this.balancerStrategy = balancerStrategy; this.broadcastDatasources = broadcastDatasources; + this.loadQueueManager = loadQueueManager; } public DruidCluster getDruidCluster() @@ -102,6 +105,11 @@ public StrategicSegmentAssigner getSegmentAssigner() return segmentAssigner; } + public SegmentLoadQueueManager getLoadQueueManager() + { + return loadQueueManager; + } + public Map getUsedSegmentsTimelinesPerDataSource() { Preconditions.checkState(dataSourcesSnapshot != null, "dataSourcesSnapshot or usedSegments must be set"); @@ -186,7 +194,8 @@ public Builder buildFromExisting() segmentLoadingConfig, stats, balancerStrategy, - broadcastDatasources + broadcastDatasources, + loadQueueManager ); } @@ -221,7 +230,8 @@ private Builder( SegmentLoadingConfig segmentLoadingConfig, CoordinatorRunStats stats, BalancerStrategy balancerStrategy, - Set broadcastDatasources + Set broadcastDatasources, + SegmentLoadQueueManager loadQueueManager ) { this.druidCluster = cluster; @@ -234,6 +244,7 @@ private Builder( this.stats = stats; this.balancerStrategy = balancerStrategy; this.broadcastDatasources = broadcastDatasources; + this.loadQueueManager = loadQueueManager; } public DruidCoordinatorRuntimeParams build() @@ -254,7 +265,8 @@ public DruidCoordinatorRuntimeParams build() segmentLoadingConfig, stats, balancerStrategy, - broadcastDatasources + broadcastDatasources, + loadQueueManager ); } diff --git a/server/src/main/java/org/apache/druid/server/coordinator/duty/CloneHistoricals.java b/server/src/main/java/org/apache/druid/server/coordinator/duty/CloneHistoricals.java index 9d958088f4fc..cc23439ce6d1 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/duty/CloneHistoricals.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/duty/CloneHistoricals.java @@ -24,6 +24,7 @@ import org.apache.druid.server.coordinator.DruidCoordinatorRuntimeParams; import org.apache.druid.server.coordinator.ServerHolder; import org.apache.druid.server.coordinator.loading.SegmentAction; +import org.apache.druid.server.coordinator.loading.SegmentLoadQueueManager; import org.apache.druid.server.coordinator.stats.CoordinatorRunStats; import org.apache.druid.server.coordinator.stats.Dimension; import org.apache.druid.server.coordinator.stats.RowKey; @@ -48,6 +49,7 @@ public DruidCoordinatorRuntimeParams run(DruidCoordinatorRuntimeParams params) { final Map cloneServers = params.getCoordinatorDynamicConfig().getCloneServers(); final CoordinatorRunStats stats = params.getCoordinatorStats(); + final SegmentLoadQueueManager loadQueueManager = params.getLoadQueueManager(); if (cloneServers.isEmpty()) { // No servers to be cloned. @@ -95,24 +97,26 @@ public DruidCoordinatorRuntimeParams run(DruidCoordinatorRuntimeParams params) // Load any segments missing in the clone target. for (DataSegment segment : sourceServer.getProjectedSegments()) { if (!targetServer.getProjectedSegments().contains(segment)) { - targetServer.getPeon().loadSegment(segment, SegmentAction.LOAD, null); - stats.add( - Stats.Segments.CLONE_LOAD, - RowKey.of(Dimension.SERVER, targetServer.getServer().getHost()), - 1L - ); + if (loadQueueManager.loadSegment(segment, targetServer, SegmentAction.LOAD)) { + stats.add( + Stats.Segments.CLONE_LOAD, + RowKey.of(Dimension.SERVER, targetServer.getServer().getHost()), + 1L + ); + } } } // Drop any segments missing from the clone source. for (DataSegment segment : targetServer.getProjectedSegments()) { if (!sourceServer.getProjectedSegments().contains(segment)) { - targetServer.getPeon().dropSegment(segment, null); - stats.add( - Stats.Segments.CLONE_DROP, - RowKey.of(Dimension.SERVER, targetServer.getServer().getHost()), - 1L - ); + if (loadQueueManager.dropSegment(segment, targetServer)) { + stats.add( + Stats.Segments.CLONE_DROP, + RowKey.of(Dimension.SERVER, targetServer.getServer().getHost()), + 1L + ); + } } } } From f8d0b6f1c0f947195625359a8b1e9a4e2287bbcb Mon Sep 17 00:00:00 2001 From: Adarsh Sanjeev Date: Mon, 7 Apr 2025 10:53:28 +0530 Subject: [PATCH 14/24] Address review comments --- .../server/coordinator/DruidCoordinator.java | 2 +- .../DruidCoordinatorRuntimeParams.java | 19 +++-------- .../server/coordinator/ServerHolder.java | 3 ++ .../coordinator/duty/CloneHistoricals.java | 34 +++++++++++-------- .../druid/server/coordinator/stats/Stats.java | 4 +-- .../simulate/HistoricalCloningTest.java | 21 ++++++------ 6 files changed, 40 insertions(+), 43 deletions(-) diff --git a/server/src/main/java/org/apache/druid/server/coordinator/DruidCoordinator.java b/server/src/main/java/org/apache/druid/server/coordinator/DruidCoordinator.java index c01c6c3099fe..8174d829df1c 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/DruidCoordinator.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/DruidCoordinator.java @@ -559,7 +559,7 @@ private List makeHistoricalManagementDuties() new MarkOvershadowedSegmentsAsUnused(deleteSegments), new MarkEternityTombstonesAsUnused(deleteSegments), new BalanceSegments(config.getCoordinatorPeriod()), - new CloneHistoricals(), + new CloneHistoricals(loadQueueManager), new CollectLoadQueueStats() ); } diff --git a/server/src/main/java/org/apache/druid/server/coordinator/DruidCoordinatorRuntimeParams.java b/server/src/main/java/org/apache/druid/server/coordinator/DruidCoordinatorRuntimeParams.java index 43e76295438b..e04a92ae3c21 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/DruidCoordinatorRuntimeParams.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/DruidCoordinatorRuntimeParams.java @@ -54,7 +54,6 @@ public class DruidCoordinatorRuntimeParams private final CoordinatorRunStats stats; private final BalancerStrategy balancerStrategy; private final Set broadcastDatasources; - private final SegmentLoadQueueManager loadQueueManager; private DruidCoordinatorRuntimeParams( DruidCluster druidCluster, @@ -66,8 +65,7 @@ private DruidCoordinatorRuntimeParams( SegmentLoadingConfig segmentLoadingConfig, CoordinatorRunStats stats, BalancerStrategy balancerStrategy, - Set broadcastDatasources, - SegmentLoadQueueManager loadQueueManager + Set broadcastDatasources ) { this.druidCluster = druidCluster; @@ -80,7 +78,6 @@ private DruidCoordinatorRuntimeParams( this.stats = stats; this.balancerStrategy = balancerStrategy; this.broadcastDatasources = broadcastDatasources; - this.loadQueueManager = loadQueueManager; } public DruidCluster getDruidCluster() @@ -105,11 +102,6 @@ public StrategicSegmentAssigner getSegmentAssigner() return segmentAssigner; } - public SegmentLoadQueueManager getLoadQueueManager() - { - return loadQueueManager; - } - public Map getUsedSegmentsTimelinesPerDataSource() { Preconditions.checkState(dataSourcesSnapshot != null, "dataSourcesSnapshot or usedSegments must be set"); @@ -194,8 +186,7 @@ public Builder buildFromExisting() segmentLoadingConfig, stats, balancerStrategy, - broadcastDatasources, - loadQueueManager + broadcastDatasources ); } @@ -230,8 +221,7 @@ private Builder( SegmentLoadingConfig segmentLoadingConfig, CoordinatorRunStats stats, BalancerStrategy balancerStrategy, - Set broadcastDatasources, - SegmentLoadQueueManager loadQueueManager + Set broadcastDatasources ) { this.druidCluster = cluster; @@ -265,8 +255,7 @@ public DruidCoordinatorRuntimeParams build() segmentLoadingConfig, stats, balancerStrategy, - broadcastDatasources, - loadQueueManager + broadcastDatasources ); } diff --git a/server/src/main/java/org/apache/druid/server/coordinator/ServerHolder.java b/server/src/main/java/org/apache/druid/server/coordinator/ServerHolder.java index 561b29fd9827..5772cfea7143 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/ServerHolder.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/ServerHolder.java @@ -297,6 +297,9 @@ public SegmentCountsPerInterval getProjectedSegmentCounts() return projectedSegmentCounts; } + /** + * Segments that are expected to be loaded on this server once all the operations in progress have completed. + */ public Set getProjectedSegments() { final Set projectedSegments = new HashSet<>(getServedSegments()); diff --git a/server/src/main/java/org/apache/druid/server/coordinator/duty/CloneHistoricals.java b/server/src/main/java/org/apache/druid/server/coordinator/duty/CloneHistoricals.java index cc23439ce6d1..09aa25edb007 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/duty/CloneHistoricals.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/duty/CloneHistoricals.java @@ -33,6 +33,7 @@ import java.util.Collection; import java.util.Map; +import java.util.Set; import java.util.stream.Collectors; /** @@ -43,13 +44,18 @@ public class CloneHistoricals implements CoordinatorDuty { private static final Logger log = new Logger(CloneHistoricals.class); + private final SegmentLoadQueueManager loadQueueManager; + + public CloneHistoricals(SegmentLoadQueueManager loadQueueManager) + { + this.loadQueueManager = loadQueueManager; + } @Override public DruidCoordinatorRuntimeParams run(DruidCoordinatorRuntimeParams params) { final Map cloneServers = params.getCoordinatorDynamicConfig().getCloneServers(); final CoordinatorRunStats stats = params.getCoordinatorStats(); - final SegmentLoadQueueManager loadQueueManager = params.getLoadQueueManager(); if (cloneServers.isEmpty()) { // No servers to be cloned. @@ -74,8 +80,8 @@ public DruidCoordinatorRuntimeParams run(DruidCoordinatorRuntimeParams params) final ServerHolder sourceServer = historicalMap.get(sourceHistoricalName); if (sourceServer == null) { - log.warn( - "Could not find source historical [%s]. Skipping over clone mapping [%s].", + log.error( + "Could not process clone mapping[%s] as source historical[%s] does not exist.", sourceHistoricalName, entry ); @@ -86,37 +92,35 @@ public DruidCoordinatorRuntimeParams run(DruidCoordinatorRuntimeParams params) final ServerHolder targetServer = historicalMap.get(targetHistoricalName); if (targetServer == null) { - log.warn( - "Could not find target historical [%s]. Skipping over clone mapping [%s].", + log.error( + "Could not process clone mapping[%s] as target historical[%s] does not exist.", targetHistoricalName, entry ); continue; } + Set sourceProjectedSegments = sourceServer.getProjectedSegments(); + Set targetProjectedSegments = targetServer.getProjectedSegments(); // Load any segments missing in the clone target. - for (DataSegment segment : sourceServer.getProjectedSegments()) { - if (!targetServer.getProjectedSegments().contains(segment)) { - if (loadQueueManager.loadSegment(segment, targetServer, SegmentAction.LOAD)) { + for (DataSegment segment : sourceProjectedSegments) { + if (!targetProjectedSegments.contains(segment) && loadQueueManager.loadSegment(segment, targetServer, SegmentAction.LOAD)) { stats.add( - Stats.Segments.CLONE_LOAD, + Stats.Segments.ASSIGNED_TO_CLONE, RowKey.of(Dimension.SERVER, targetServer.getServer().getHost()), 1L ); - } } } // Drop any segments missing from the clone source. - for (DataSegment segment : targetServer.getProjectedSegments()) { - if (!sourceServer.getProjectedSegments().contains(segment)) { - if (loadQueueManager.dropSegment(segment, targetServer)) { + for (DataSegment segment : targetProjectedSegments) { + if (!sourceProjectedSegments.contains(segment) && loadQueueManager.dropSegment(segment, targetServer)) { stats.add( - Stats.Segments.CLONE_DROP, + Stats.Segments.DROPPED_FROM_CLONE, RowKey.of(Dimension.SERVER, targetServer.getServer().getHost()), 1L ); - } } } } diff --git a/server/src/main/java/org/apache/druid/server/coordinator/stats/Stats.java b/server/src/main/java/org/apache/druid/server/coordinator/stats/Stats.java index 39af81ec7c34..ec152488fc33 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/stats/Stats.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/stats/Stats.java @@ -67,9 +67,9 @@ public static class Segments = CoordinatorStat.toDebugOnly("replicationThrottleLimit"); // Cloned segments in a run - public static final CoordinatorStat CLONE_LOAD + public static final CoordinatorStat ASSIGNED_TO_CLONE = CoordinatorStat.toDebugAndEmit("cloneLoad", "segment/clone/assigned/count"); - public static final CoordinatorStat CLONE_DROP + public static final CoordinatorStat DROPPED_FROM_CLONE = CoordinatorStat.toDebugAndEmit("cloneDrop", "segment/clone/dropped/count"); } diff --git a/server/src/test/java/org/apache/druid/server/coordinator/simulate/HistoricalCloningTest.java b/server/src/test/java/org/apache/druid/server/coordinator/simulate/HistoricalCloningTest.java index f8232f7e4de8..613a692cdbaf 100644 --- a/server/src/test/java/org/apache/druid/server/coordinator/simulate/HistoricalCloningTest.java +++ b/server/src/test/java/org/apache/druid/server/coordinator/simulate/HistoricalCloningTest.java @@ -42,7 +42,7 @@ public class HistoricalCloningTest extends CoordinatorSimulationBaseTest @Override public void setUp() { - // Setup historicals for 2 tiers, size 10 GB each + // Setup historicals for 1 tier, size 1 TB each historicalT11 = createHistorical(1, Tier.T1, SIZE_1TB); historicalT12 = createHistorical(2, Tier.T1, SIZE_1TB); historicalT13 = createHistorical(3, Tier.T1, SIZE_1TB); @@ -60,7 +60,7 @@ public void testSimpleCloning() CoordinatorDynamicConfig.builder() .withCloneServers(Map.of(historicalT11.getHost(), historicalT12.getHost())) .withUnmanagedNodes(Set.of(historicalT12.getHost())) - .withSmartSegmentLoading(false) + .withSmartSegmentLoading(true) .build() ) .withImmediateSegmentLoading(true) @@ -71,7 +71,7 @@ public void testSimpleCloning() verifyValue(Metric.ASSIGNED_COUNT, 10L); verifyValue( - Stats.Segments.CLONE_LOAD.getMetricName(), + Stats.Segments.ASSIGNED_TO_CLONE.getMetricName(), Map.of("server", historicalT12.getName()), 10L ); @@ -102,7 +102,7 @@ public void testAddingNewHistorical() CoordinatorDynamicConfig.builder() .withCloneServers(Map.of(historicalT11.getHost(), historicalT12.getHost())) .withUnmanagedNodes(Set.of(historicalT12.getHost())) - .withSmartSegmentLoading(false) + .withSmartSegmentLoading(true) .build() ) .withImmediateSegmentLoading(true) @@ -134,7 +134,7 @@ public void testAddingNewHistorical() Assert.assertEquals(5, historicalT12.getTotalSegments()); Assert.assertEquals(5, newHistorical.getTotalSegments()); verifyValue( - Stats.Segments.CLONE_DROP.getMetricName(), + Stats.Segments.DROPPED_FROM_CLONE.getMetricName(), Map.of("server", historicalT12.getName()), 5L ); @@ -152,7 +152,7 @@ public void testCloningServerDisappearsAndRelaunched() CoordinatorDynamicConfig.builder() .withCloneServers(Map.of(historicalT11.getHost(), historicalT12.getHost())) .withUnmanagedNodes(Set.of(historicalT12.getHost())) - .withSmartSegmentLoading(false) + .withSmartSegmentLoading(true) .build() ) .withImmediateSegmentLoading(true) @@ -184,7 +184,7 @@ public void testCloningServerDisappearsAndRelaunched() Assert.assertEquals(10, historicalT11.getTotalSegments()); Assert.assertEquals(10, historicalT12.getTotalSegments()); verifyValue( - Stats.Segments.CLONE_LOAD.getMetricName(), + Stats.Segments.ASSIGNED_TO_CLONE.getMetricName(), Map.of("server", historicalT12.getName()), 10L ); @@ -210,7 +210,8 @@ public void testClonedServerDoesNotFollowReplicationLimit() CoordinatorDynamicConfig.builder() .withCloneServers(Map.of(historicalT11.getHost(), historicalT12.getHost())) .withUnmanagedNodes(Set.of(historicalT12.getHost())) - .withSmartSegmentLoading(false) + .withSmartSegmentLoading(true) + .withReplicationThrottleLimit(2) .build() ) .withImmediateSegmentLoading(true) @@ -234,7 +235,7 @@ public void testClonedServerDoesNotFollowReplicationLimit() Assert.assertEquals(1000, historicalT12.getTotalSegments()); verifyValue( - Stats.Segments.CLONE_LOAD.getMetricName(), + Stats.Segments.ASSIGNED_TO_CLONE.getMetricName(), Map.of("server", historicalT12.getName()), 1000L ); @@ -279,7 +280,7 @@ public void testCloningHistoricalWithReplicationLimit() // Check that the replication throttling is respected. verifyValue(Metric.ASSIGNED_COUNT, 2L); verifyValue( - Stats.Segments.CLONE_LOAD.getMetricName(), + Stats.Segments.ASSIGNED_TO_CLONE.getMetricName(), Map.of("server", historicalT12.getName()), 2L ); From c9475c4715d8f7fb17805da23f18334f237f424d Mon Sep 17 00:00:00 2001 From: Adarsh Sanjeev Date: Mon, 7 Apr 2025 11:05:56 +0530 Subject: [PATCH 15/24] Remove unmnaged nodes --- .../coordinator/CoordinatorDynamicConfig.java | 20 ------------------- .../DruidCoordinatorRuntimeParams.java | 1 - .../coordinator/duty/CloneHistoricals.java | 4 ++-- .../duty/PrepareBalancerAndLoadQueues.java | 3 ++- .../simulate/HistoricalCloningTest.java | 6 ------ .../http/CoordinatorDynamicConfigTest.java | 2 -- 6 files changed, 4 insertions(+), 32 deletions(-) diff --git a/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorDynamicConfig.java b/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorDynamicConfig.java index 1c8bcb1a039a..d0a399895ca6 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorDynamicConfig.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorDynamicConfig.java @@ -74,7 +74,6 @@ public class CoordinatorDynamicConfig private final Map validDebugDimensions; private final Set turboLoadingNodes; - private final Set unmanagedNodes; private final Map cloneServers; /** @@ -127,7 +126,6 @@ public CoordinatorDynamicConfig( @JsonProperty("smartSegmentLoading") @Nullable Boolean smartSegmentLoading, @JsonProperty("debugDimensions") @Nullable Map debugDimensions, @JsonProperty("turboLoadingNodes") @Nullable Set turboLoadingNodes, - @JsonProperty("unmanagedNodes") @Nullable Set unmanagedNodes, @JsonProperty("cloneServers") @Nullable Map cloneServers ) { @@ -173,7 +171,6 @@ public CoordinatorDynamicConfig( this.debugDimensions = debugDimensions; this.validDebugDimensions = validateDebugDimensions(debugDimensions); this.turboLoadingNodes = Configs.valueOrDefault(turboLoadingNodes, Set.of()); - this.unmanagedNodes = Configs.valueOrDefault(unmanagedNodes, Set.of()); this.cloneServers = Configs.valueOrDefault(cloneServers, Map.of()); } @@ -328,12 +325,6 @@ public boolean getReplicateAfterLoadTimeout() return replicateAfterLoadTimeout; } - @JsonProperty - public Set getUnmanagedNodes() - { - return unmanagedNodes; - } - @JsonProperty public Map getCloneServers() { @@ -482,7 +473,6 @@ public static class Builder private Boolean useRoundRobinSegmentAssignment; private Boolean smartSegmentLoading; private Set turboLoadingNodes; - private Set unmanagedNodes; private Map cloneServers; public Builder() @@ -508,7 +498,6 @@ public Builder( @JsonProperty("smartSegmentLoading") @Nullable Boolean smartSegmentLoading, @JsonProperty("debugDimensions") @Nullable Map debugDimensions, @JsonProperty("turboLoadingNodes") @Nullable Set turboLoadingNodes, - @JsonProperty("unmanagedNodes") @Nullable Set unmanagedNodes, @JsonProperty("cloneServers") @Nullable Map cloneServers ) { @@ -529,7 +518,6 @@ public Builder( this.smartSegmentLoading = smartSegmentLoading; this.debugDimensions = debugDimensions; this.turboLoadingNodes = turboLoadingNodes; - this.unmanagedNodes = unmanagedNodes; this.cloneServers = cloneServers; } @@ -623,12 +611,6 @@ public Builder withUseRoundRobinSegmentAssignment(boolean useRoundRobinSegmentAs return this; } - public Builder withUnmanagedNodes(Set unmanagedNodes) - { - this.unmanagedNodes = unmanagedNodes; - return this; - } - public Builder withCloneServers(Map cloneServers) { this.cloneServers = cloneServers; @@ -662,7 +644,6 @@ public CoordinatorDynamicConfig build() valueOrDefault(smartSegmentLoading, Defaults.SMART_SEGMENT_LOADING), debugDimensions, turboLoadingNodes, - unmanagedNodes, cloneServers ); } @@ -695,7 +676,6 @@ public CoordinatorDynamicConfig build(CoordinatorDynamicConfig defaults) valueOrDefault(smartSegmentLoading, defaults.isSmartSegmentLoading()), valueOrDefault(debugDimensions, defaults.getDebugDimensions()), valueOrDefault(turboLoadingNodes, defaults.getTurboLoadingNodes()), - valueOrDefault(unmanagedNodes, defaults.getUnmanagedNodes()), valueOrDefault(cloneServers, defaults.getCloneServers()) ); } diff --git a/server/src/main/java/org/apache/druid/server/coordinator/DruidCoordinatorRuntimeParams.java b/server/src/main/java/org/apache/druid/server/coordinator/DruidCoordinatorRuntimeParams.java index e04a92ae3c21..576b2155ac7e 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/DruidCoordinatorRuntimeParams.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/DruidCoordinatorRuntimeParams.java @@ -234,7 +234,6 @@ private Builder( this.stats = stats; this.balancerStrategy = balancerStrategy; this.broadcastDatasources = broadcastDatasources; - this.loadQueueManager = loadQueueManager; } public DruidCoordinatorRuntimeParams build() diff --git a/server/src/main/java/org/apache/druid/server/coordinator/duty/CloneHistoricals.java b/server/src/main/java/org/apache/druid/server/coordinator/duty/CloneHistoricals.java index 09aa25edb007..d82380598307 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/duty/CloneHistoricals.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/duty/CloneHistoricals.java @@ -100,8 +100,8 @@ public DruidCoordinatorRuntimeParams run(DruidCoordinatorRuntimeParams params) continue; } - Set sourceProjectedSegments = sourceServer.getProjectedSegments(); - Set targetProjectedSegments = targetServer.getProjectedSegments(); + final Set sourceProjectedSegments = sourceServer.getProjectedSegments(); + final Set targetProjectedSegments = targetServer.getProjectedSegments(); // Load any segments missing in the clone target. for (DataSegment segment : sourceProjectedSegments) { if (!targetProjectedSegments.contains(segment) && loadQueueManager.loadSegment(segment, targetServer, SegmentAction.LOAD)) { diff --git a/server/src/main/java/org/apache/druid/server/coordinator/duty/PrepareBalancerAndLoadQueues.java b/server/src/main/java/org/apache/druid/server/coordinator/duty/PrepareBalancerAndLoadQueues.java index d62827df6709..a34614ee9446 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/duty/PrepareBalancerAndLoadQueues.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/duty/PrepareBalancerAndLoadQueues.java @@ -38,6 +38,7 @@ import org.apache.druid.server.coordinator.stats.Stats; import org.apache.druid.timeline.DataSegment; +import java.util.HashSet; import java.util.List; import java.util.Set; import java.util.concurrent.atomic.AtomicInteger; @@ -152,7 +153,7 @@ private DruidCluster prepareCluster( ) { final Set decommissioningServers = dynamicConfig.getDecommissioningNodes(); - final Set unmanagedServers = dynamicConfig.getUnmanagedNodes(); + final Set unmanagedServers = new HashSet<>(dynamicConfig.getCloneServers().values()); final DruidCluster.Builder cluster = DruidCluster.builder(); for (ImmutableDruidServer server : currentServers) { cluster.add( diff --git a/server/src/test/java/org/apache/druid/server/coordinator/simulate/HistoricalCloningTest.java b/server/src/test/java/org/apache/druid/server/coordinator/simulate/HistoricalCloningTest.java index 613a692cdbaf..4ae709a9ecdd 100644 --- a/server/src/test/java/org/apache/druid/server/coordinator/simulate/HistoricalCloningTest.java +++ b/server/src/test/java/org/apache/druid/server/coordinator/simulate/HistoricalCloningTest.java @@ -27,7 +27,6 @@ import org.junit.Test; import java.util.Map; -import java.util.Set; public class HistoricalCloningTest extends CoordinatorSimulationBaseTest { @@ -59,7 +58,6 @@ public void testSimpleCloning() .withDynamicConfig( CoordinatorDynamicConfig.builder() .withCloneServers(Map.of(historicalT11.getHost(), historicalT12.getHost())) - .withUnmanagedNodes(Set.of(historicalT12.getHost())) .withSmartSegmentLoading(true) .build() ) @@ -101,7 +99,6 @@ public void testAddingNewHistorical() .withDynamicConfig( CoordinatorDynamicConfig.builder() .withCloneServers(Map.of(historicalT11.getHost(), historicalT12.getHost())) - .withUnmanagedNodes(Set.of(historicalT12.getHost())) .withSmartSegmentLoading(true) .build() ) @@ -151,7 +148,6 @@ public void testCloningServerDisappearsAndRelaunched() .withDynamicConfig( CoordinatorDynamicConfig.builder() .withCloneServers(Map.of(historicalT11.getHost(), historicalT12.getHost())) - .withUnmanagedNodes(Set.of(historicalT12.getHost())) .withSmartSegmentLoading(true) .build() ) @@ -209,7 +205,6 @@ public void testClonedServerDoesNotFollowReplicationLimit() .withDynamicConfig( CoordinatorDynamicConfig.builder() .withCloneServers(Map.of(historicalT11.getHost(), historicalT12.getHost())) - .withUnmanagedNodes(Set.of(historicalT12.getHost())) .withSmartSegmentLoading(true) .withReplicationThrottleLimit(2) .build() @@ -259,7 +254,6 @@ public void testCloningHistoricalWithReplicationLimit() .withDynamicConfig( CoordinatorDynamicConfig.builder() .withCloneServers(Map.of(historicalT11.getHost(), historicalT12.getHost())) - .withUnmanagedNodes(Set.of(historicalT12.getHost())) .withSmartSegmentLoading(false) .withReplicationThrottleLimit(2) .withMaxSegmentsToMove(0) diff --git a/server/src/test/java/org/apache/druid/server/http/CoordinatorDynamicConfigTest.java b/server/src/test/java/org/apache/druid/server/http/CoordinatorDynamicConfigTest.java index 30702e8af368..98324792ef7b 100644 --- a/server/src/test/java/org/apache/druid/server/http/CoordinatorDynamicConfigTest.java +++ b/server/src/test/java/org/apache/druid/server/http/CoordinatorDynamicConfigTest.java @@ -245,7 +245,6 @@ public void testConstructorWithNullsShouldKillUnusedSegmentsInAllDataSources() false, null, ImmutableSet.of("host1"), - null, null ); Assert.assertTrue(config.getSpecificDataSourcesToKillUnusedSegmentsIn().isEmpty()); @@ -272,7 +271,6 @@ public void testConstructorWithSpecificDataSourcesToKillShouldNotKillUnusedSegme false, null, ImmutableSet.of("host1"), - null, null ); Assert.assertEquals(ImmutableSet.of("test1"), config.getSpecificDataSourcesToKillUnusedSegmentsIn()); From 017975d5bbae304e1a710714c5e7231b868b74d9 Mon Sep 17 00:00:00 2001 From: Adarsh Sanjeev Date: Mon, 7 Apr 2025 12:33:43 +0530 Subject: [PATCH 16/24] Add docs --- .../dynamic-configuration-api.md | 10 ++++--- docs/configuration/index.md | 1 + .../coordinator/duty/CloneHistoricals.java | 20 +++++++------- .../simulate/HistoricalCloningTest.java | 27 ++++++++++++++----- 4 files changed, 38 insertions(+), 20 deletions(-) diff --git a/docs/api-reference/dynamic-configuration-api.md b/docs/api-reference/dynamic-configuration-api.md index 971aa81d206a..cad61e4b88fa 100644 --- a/docs/api-reference/dynamic-configuration-api.md +++ b/docs/api-reference/dynamic-configuration-api.md @@ -106,7 +106,9 @@ Host: http://ROUTER_IP:ROUTER_PORT "useRoundRobinSegmentAssignment": true, "smartSegmentLoading": true, "debugDimensions": null, - "turboLoadingNodes": [] + "turboLoadingNodes": [], + "cloneServers": {} + } ``` @@ -174,7 +176,8 @@ curl "http://ROUTER_IP:ROUTER_PORT/druid/coordinator/v1/config" \ "replicateAfterLoadTimeout": false, "maxNonPrimaryReplicantsToLoad": 2147483647, "useRoundRobinSegmentAssignment": true, - "turboLoadingNodes": [] + "turboLoadingNodes": [], + "cloneServers": {} }' ``` @@ -206,7 +209,8 @@ Content-Length: 683 "replicateAfterLoadTimeout": false, "maxNonPrimaryReplicantsToLoad": 2147483647, "useRoundRobinSegmentAssignment": true, - "turboLoadingNodes": [] + "turboLoadingNodes": [], + "cloneServers": {} } ``` diff --git a/docs/configuration/index.md b/docs/configuration/index.md index 91168d9471a1..8263b107c949 100644 --- a/docs/configuration/index.md +++ b/docs/configuration/index.md @@ -954,6 +954,7 @@ The following table shows the dynamic configuration properties for the Coordinat |`pauseCoordination`|Boolean flag for whether or not the Coordinator should execute its various duties of coordinating the cluster. Setting this to true essentially pauses all coordination work while allowing the API to remain up. Duties that are paused include all classes that implement the `CoordinatorDuty` interface. Such duties include: segment balancing, segment compaction, submitting kill tasks for unused segments (if enabled), logging of used segments in the cluster, marking of newly unused or overshadowed segments, matching and execution of load/drop rules for used segments, unloading segments that are no longer marked as used from Historical servers. An example of when an admin may want to pause coordination would be if they are doing deep storage maintenance on HDFS name nodes with downtime and don't want the Coordinator to be directing Historical nodes to hit the name node with API requests until maintenance is done and the deep store is declared healthy for use again.|false| |`replicateAfterLoadTimeout`|Boolean flag for whether or not additional replication is needed for segments that have failed to load due to the expiry of `druid.coordinator.load.timeout`. If this is set to true, the Coordinator will attempt to replicate the failed segment on a different historical server. This helps improve the segment availability if there are a few slow Historicals in the cluster. However, the slow Historical may still load the segment later and the Coordinator may issue drop requests if the segment is over-replicated.|false| |`turboLoadingNodes`| Experimental. List of Historical servers to place in turbo loading mode. These servers use a larger thread-pool to load segments faster but at the cost of query performance. For servers specified in `turboLoadingNodes`, `druid.coordinator.loadqueuepeon.http.batchSize` is ignored and the coordinator uses the value of the respective `numLoadingThreads` instead.
Please use this config with caution. All servers should eventually be removed from this list once the segment loading on the respective historicals is finished. |none| +|`cloneServers`| Experimental. Map of source to target Historicals to place into cloning mode. Target historicals are not involved in normal segment assignment or segment balancing. Instead, segment assignments to the source Historical are mirrored to the target Historical, so that the source Historical becomes an exact copy. Segments on the target Historical do not count towards the desired segment replication counts.
Please use this config with caution. All servers should eventually be removed from this list once the desried state on the respective historicals is achieved. |none| ##### Smart segment loading diff --git a/server/src/main/java/org/apache/druid/server/coordinator/duty/CloneHistoricals.java b/server/src/main/java/org/apache/druid/server/coordinator/duty/CloneHistoricals.java index d82380598307..00cd1ed8ae55 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/duty/CloneHistoricals.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/duty/CloneHistoricals.java @@ -105,22 +105,22 @@ public DruidCoordinatorRuntimeParams run(DruidCoordinatorRuntimeParams params) // Load any segments missing in the clone target. for (DataSegment segment : sourceProjectedSegments) { if (!targetProjectedSegments.contains(segment) && loadQueueManager.loadSegment(segment, targetServer, SegmentAction.LOAD)) { - stats.add( - Stats.Segments.ASSIGNED_TO_CLONE, - RowKey.of(Dimension.SERVER, targetServer.getServer().getHost()), - 1L - ); + stats.add( + Stats.Segments.ASSIGNED_TO_CLONE, + RowKey.of(Dimension.SERVER, targetServer.getServer().getHost()), + 1L + ); } } // Drop any segments missing from the clone source. for (DataSegment segment : targetProjectedSegments) { if (!sourceProjectedSegments.contains(segment) && loadQueueManager.dropSegment(segment, targetServer)) { - stats.add( - Stats.Segments.DROPPED_FROM_CLONE, - RowKey.of(Dimension.SERVER, targetServer.getServer().getHost()), - 1L - ); + stats.add( + Stats.Segments.DROPPED_FROM_CLONE, + RowKey.of(Dimension.SERVER, targetServer.getServer().getHost()), + 1L + ); } } } diff --git a/server/src/test/java/org/apache/druid/server/coordinator/simulate/HistoricalCloningTest.java b/server/src/test/java/org/apache/druid/server/coordinator/simulate/HistoricalCloningTest.java index 4ae709a9ecdd..69c6b8f6a74c 100644 --- a/server/src/test/java/org/apache/druid/server/coordinator/simulate/HistoricalCloningTest.java +++ b/server/src/test/java/org/apache/druid/server/coordinator/simulate/HistoricalCloningTest.java @@ -84,8 +84,12 @@ public void testSimpleCloning() 10L ); - Assert.assertEquals(10, historicalT11.getTotalSegments()); - Assert.assertEquals(10, historicalT12.getTotalSegments()); + Assert.assertEquals(Segments.WIKI_10X1D.size(), historicalT11.getTotalSegments()); + Assert.assertEquals(Segments.WIKI_10X1D.size(), historicalT12.getTotalSegments()); + Segments.WIKI_10X1D.forEach(segment -> { + Assert.assertEquals(segment, historicalT11.getSegment(segment.getId())); + Assert.assertEquals(segment, historicalT12.getSegment(segment.getId())); + }); } @Test @@ -190,8 +194,12 @@ public void testCloningServerDisappearsAndRelaunched() 10L ); - Assert.assertEquals(10, historicalT11.getTotalSegments()); - Assert.assertEquals(10, historicalT12.getTotalSegments()); + Assert.assertEquals(Segments.WIKI_10X1D.size(), historicalT11.getTotalSegments()); + Assert.assertEquals(Segments.WIKI_10X1D.size(), historicalT12.getTotalSegments()); + Segments.WIKI_10X1D.forEach(segment -> { + Assert.assertEquals(segment, historicalT11.getSegment(segment.getId())); + Assert.assertEquals(segment, historicalT12.getSegment(segment.getId())); + }); } @Test @@ -280,8 +288,13 @@ public void testCloningHistoricalWithReplicationLimit() ); } - Assert.assertEquals(10, historicalT11.getTotalSegments()); - Assert.assertEquals(10, historicalT12.getTotalSegments()); - Assert.assertEquals(10, historicalT13.getTotalSegments()); + Assert.assertEquals(Segments.WIKI_10X1D.size(), historicalT11.getTotalSegments()); + Assert.assertEquals(Segments.WIKI_10X1D.size(), historicalT12.getTotalSegments()); + Assert.assertEquals(Segments.WIKI_10X1D.size(), historicalT13.getTotalSegments()); + Segments.WIKI_10X1D.forEach(segment -> { + Assert.assertEquals(segment, historicalT11.getSegment(segment.getId())); + Assert.assertEquals(segment, historicalT12.getSegment(segment.getId())); + Assert.assertEquals(segment, historicalT13.getSegment(segment.getId())); + }); } } From c85767bf64de08ac04bb11dcdf54aae02b3e0ad6 Mon Sep 17 00:00:00 2001 From: Adarsh Sanjeev Date: Mon, 7 Apr 2025 12:35:45 +0530 Subject: [PATCH 17/24] Add javadoc --- .../druid/server/coordinator/CoordinatorDynamicConfig.java | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorDynamicConfig.java b/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorDynamicConfig.java index d0a399895ca6..4e5bfa78b85b 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorDynamicConfig.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorDynamicConfig.java @@ -325,6 +325,12 @@ public boolean getReplicateAfterLoadTimeout() return replicateAfterLoadTimeout; } + /** + * Map of source to target Historicals to place into cloning mode. Target historicals are not involved in normal + * segment assignment or segment balancing. Instead, segment assignments to the source Historical are mirrored to + * the target Historical, so that the source Historical becomes an exact copy. Segments on the target Historical do + * not count towards the desired segment replication counts. + */ @JsonProperty public Map getCloneServers() { From 6ed434dd481315e5ea0644b0da877528c54d3782 Mon Sep 17 00:00:00 2001 From: Adarsh Sanjeev Date: Mon, 7 Apr 2025 12:44:35 +0530 Subject: [PATCH 18/24] Spellcheck --- docs/configuration/index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/configuration/index.md b/docs/configuration/index.md index 8263b107c949..778edfc5f6fc 100644 --- a/docs/configuration/index.md +++ b/docs/configuration/index.md @@ -954,7 +954,7 @@ The following table shows the dynamic configuration properties for the Coordinat |`pauseCoordination`|Boolean flag for whether or not the Coordinator should execute its various duties of coordinating the cluster. Setting this to true essentially pauses all coordination work while allowing the API to remain up. Duties that are paused include all classes that implement the `CoordinatorDuty` interface. Such duties include: segment balancing, segment compaction, submitting kill tasks for unused segments (if enabled), logging of used segments in the cluster, marking of newly unused or overshadowed segments, matching and execution of load/drop rules for used segments, unloading segments that are no longer marked as used from Historical servers. An example of when an admin may want to pause coordination would be if they are doing deep storage maintenance on HDFS name nodes with downtime and don't want the Coordinator to be directing Historical nodes to hit the name node with API requests until maintenance is done and the deep store is declared healthy for use again.|false| |`replicateAfterLoadTimeout`|Boolean flag for whether or not additional replication is needed for segments that have failed to load due to the expiry of `druid.coordinator.load.timeout`. If this is set to true, the Coordinator will attempt to replicate the failed segment on a different historical server. This helps improve the segment availability if there are a few slow Historicals in the cluster. However, the slow Historical may still load the segment later and the Coordinator may issue drop requests if the segment is over-replicated.|false| |`turboLoadingNodes`| Experimental. List of Historical servers to place in turbo loading mode. These servers use a larger thread-pool to load segments faster but at the cost of query performance. For servers specified in `turboLoadingNodes`, `druid.coordinator.loadqueuepeon.http.batchSize` is ignored and the coordinator uses the value of the respective `numLoadingThreads` instead.
Please use this config with caution. All servers should eventually be removed from this list once the segment loading on the respective historicals is finished. |none| -|`cloneServers`| Experimental. Map of source to target Historicals to place into cloning mode. Target historicals are not involved in normal segment assignment or segment balancing. Instead, segment assignments to the source Historical are mirrored to the target Historical, so that the source Historical becomes an exact copy. Segments on the target Historical do not count towards the desired segment replication counts.
Please use this config with caution. All servers should eventually be removed from this list once the desried state on the respective historicals is achieved. |none| +|`cloneServers`| Experimental. Map of source to target Historicals to place into cloning mode. Target historicals are not involved in normal segment assignment or segment balancing. Instead, segment assignments to the source Historical are mirrored to the target Historical, so that the source Historical becomes an exact copy. Segments on the target Historical do not count towards the desired segment replication counts.
Please use this config with caution. All servers should eventually be removed from this list once the desired state on the respective historicals is achieved. |none| ##### Smart segment loading From f69a527a465aca2c3574b75b933f4c0e22dac31b Mon Sep 17 00:00:00 2001 From: Adarsh Sanjeev Date: Mon, 7 Apr 2025 15:38:37 +0530 Subject: [PATCH 19/24] Update javadoc --- docs/configuration/index.md | 2 +- .../druid/server/coordinator/CoordinatorDynamicConfig.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/configuration/index.md b/docs/configuration/index.md index 778edfc5f6fc..c7a7dddbfe11 100644 --- a/docs/configuration/index.md +++ b/docs/configuration/index.md @@ -954,7 +954,7 @@ The following table shows the dynamic configuration properties for the Coordinat |`pauseCoordination`|Boolean flag for whether or not the Coordinator should execute its various duties of coordinating the cluster. Setting this to true essentially pauses all coordination work while allowing the API to remain up. Duties that are paused include all classes that implement the `CoordinatorDuty` interface. Such duties include: segment balancing, segment compaction, submitting kill tasks for unused segments (if enabled), logging of used segments in the cluster, marking of newly unused or overshadowed segments, matching and execution of load/drop rules for used segments, unloading segments that are no longer marked as used from Historical servers. An example of when an admin may want to pause coordination would be if they are doing deep storage maintenance on HDFS name nodes with downtime and don't want the Coordinator to be directing Historical nodes to hit the name node with API requests until maintenance is done and the deep store is declared healthy for use again.|false| |`replicateAfterLoadTimeout`|Boolean flag for whether or not additional replication is needed for segments that have failed to load due to the expiry of `druid.coordinator.load.timeout`. If this is set to true, the Coordinator will attempt to replicate the failed segment on a different historical server. This helps improve the segment availability if there are a few slow Historicals in the cluster. However, the slow Historical may still load the segment later and the Coordinator may issue drop requests if the segment is over-replicated.|false| |`turboLoadingNodes`| Experimental. List of Historical servers to place in turbo loading mode. These servers use a larger thread-pool to load segments faster but at the cost of query performance. For servers specified in `turboLoadingNodes`, `druid.coordinator.loadqueuepeon.http.batchSize` is ignored and the coordinator uses the value of the respective `numLoadingThreads` instead.
Please use this config with caution. All servers should eventually be removed from this list once the segment loading on the respective historicals is finished. |none| -|`cloneServers`| Experimental. Map of source to target Historicals to place into cloning mode. Target historicals are not involved in normal segment assignment or segment balancing. Instead, segment assignments to the source Historical are mirrored to the target Historical, so that the source Historical becomes an exact copy. Segments on the target Historical do not count towards the desired segment replication counts.
Please use this config with caution. All servers should eventually be removed from this list once the desired state on the respective historicals is achieved. |none| +|`cloneServers`| Experimental. Map of source to target Historicals to place into cloning mode. Target historicals are not involved in normal segment assignment or segment balancing. Instead, segment assignments to the source Historical are mirrored to the target Historical, so that the target Historical becomes an exact copy. Segments on the target Historical do not count towards the desired segment replication counts.
Please use this config with caution. All servers should eventually be removed from this list once the desired state on the respective historicals is achieved. |none| ##### Smart segment loading diff --git a/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorDynamicConfig.java b/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorDynamicConfig.java index 4e5bfa78b85b..3a11981f9bfd 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorDynamicConfig.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorDynamicConfig.java @@ -328,7 +328,7 @@ public boolean getReplicateAfterLoadTimeout() /** * Map of source to target Historicals to place into cloning mode. Target historicals are not involved in normal * segment assignment or segment balancing. Instead, segment assignments to the source Historical are mirrored to - * the target Historical, so that the source Historical becomes an exact copy. Segments on the target Historical do + * the target Historical, so that the target Historical becomes an exact copy. Segments on the target Historical do * not count towards the desired segment replication counts. */ @JsonProperty From f034ee2ac9fa65d9184b96c745f036a36424d3bb Mon Sep 17 00:00:00 2001 From: Adarsh Sanjeev Date: Tue, 8 Apr 2025 10:29:40 +0530 Subject: [PATCH 20/24] Address review comments --- docs/configuration/index.md | 3 ++- .../coordinator/CoordinatorDynamicConfig.java | 10 ++++++---- .../coordinator/duty/CloneHistoricals.java | 19 ++++--------------- 3 files changed, 12 insertions(+), 20 deletions(-) diff --git a/docs/configuration/index.md b/docs/configuration/index.md index c7a7dddbfe11..97ad3b3021d6 100644 --- a/docs/configuration/index.md +++ b/docs/configuration/index.md @@ -954,7 +954,8 @@ The following table shows the dynamic configuration properties for the Coordinat |`pauseCoordination`|Boolean flag for whether or not the Coordinator should execute its various duties of coordinating the cluster. Setting this to true essentially pauses all coordination work while allowing the API to remain up. Duties that are paused include all classes that implement the `CoordinatorDuty` interface. Such duties include: segment balancing, segment compaction, submitting kill tasks for unused segments (if enabled), logging of used segments in the cluster, marking of newly unused or overshadowed segments, matching and execution of load/drop rules for used segments, unloading segments that are no longer marked as used from Historical servers. An example of when an admin may want to pause coordination would be if they are doing deep storage maintenance on HDFS name nodes with downtime and don't want the Coordinator to be directing Historical nodes to hit the name node with API requests until maintenance is done and the deep store is declared healthy for use again.|false| |`replicateAfterLoadTimeout`|Boolean flag for whether or not additional replication is needed for segments that have failed to load due to the expiry of `druid.coordinator.load.timeout`. If this is set to true, the Coordinator will attempt to replicate the failed segment on a different historical server. This helps improve the segment availability if there are a few slow Historicals in the cluster. However, the slow Historical may still load the segment later and the Coordinator may issue drop requests if the segment is over-replicated.|false| |`turboLoadingNodes`| Experimental. List of Historical servers to place in turbo loading mode. These servers use a larger thread-pool to load segments faster but at the cost of query performance. For servers specified in `turboLoadingNodes`, `druid.coordinator.loadqueuepeon.http.batchSize` is ignored and the coordinator uses the value of the respective `numLoadingThreads` instead.
Please use this config with caution. All servers should eventually be removed from this list once the segment loading on the respective historicals is finished. |none| -|`cloneServers`| Experimental. Map of source to target Historicals to place into cloning mode. Target historicals are not involved in normal segment assignment or segment balancing. Instead, segment assignments to the source Historical are mirrored to the target Historical, so that the target Historical becomes an exact copy. Segments on the target Historical do not count towards the desired segment replication counts.
Please use this config with caution. All servers should eventually be removed from this list once the desired state on the respective historicals is achieved. |none| + +|`cloneServers`| Experimental. Map from source Historical server to target Historical server which should be made a clone of the source. The target Historical does not participate in regular segment assignment or balancing. Instead, the Coordinator mirrors any segment assignment made to the source Historical onto the target Historical, so that the target becomes an exact copy of the source. Segments on the target Historical do not count towards replica counts either. If the source disappears, the target remains in the last known state of the source server until removed from the cloneServers.
Use this config with caution. All servers should eventually be removed from this list once the desired state on the respective Historicals is achieved. |none| ##### Smart segment loading diff --git a/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorDynamicConfig.java b/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorDynamicConfig.java index 3a11981f9bfd..8d807fdbbcfc 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorDynamicConfig.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorDynamicConfig.java @@ -326,10 +326,12 @@ public boolean getReplicateAfterLoadTimeout() } /** - * Map of source to target Historicals to place into cloning mode. Target historicals are not involved in normal - * segment assignment or segment balancing. Instead, segment assignments to the source Historical are mirrored to - * the target Historical, so that the target Historical becomes an exact copy. Segments on the target Historical do - * not count towards the desired segment replication counts. + * Map from source Historical server to target Historical server which should be made a clone of the source. The + * target Historical does not participate in regular segment assignment or balancing. Instead, the Coordinator + * mirrors any segment assignment made to the source Historical onto the target Historical, so that the target + * becomes an exact copy of the source. Segments on the target Historical do not count towards replica counts either. + * If the source disappears, the target remains in the last known state of the source server until removed from the + * cloneServers. */ @JsonProperty public Map getCloneServers() diff --git a/server/src/main/java/org/apache/druid/server/coordinator/duty/CloneHistoricals.java b/server/src/main/java/org/apache/druid/server/coordinator/duty/CloneHistoricals.java index 00cd1ed8ae55..d2159658f85f 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/duty/CloneHistoricals.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/duty/CloneHistoricals.java @@ -74,28 +74,17 @@ public DruidCoordinatorRuntimeParams run(DruidCoordinatorRuntimeParams params) )); for (Map.Entry entry : cloneServers.entrySet()) { - log.debug("Handling cloning for mapping: [%s]", entry); - final String sourceHistoricalName = entry.getKey(); final ServerHolder sourceServer = historicalMap.get(sourceHistoricalName); - if (sourceServer == null) { - log.error( - "Could not process clone mapping[%s] as source historical[%s] does not exist.", - sourceHistoricalName, - entry - ); - continue; - } - final String targetHistoricalName = entry.getValue(); final ServerHolder targetServer = historicalMap.get(targetHistoricalName); - if (targetServer == null) { + if (sourceServer == null || targetServer == null) { log.error( - "Could not process clone mapping[%s] as target historical[%s] does not exist.", - targetHistoricalName, - entry + "Could not process clone mapping[%s] as historical[%s] does not exist.", + entry, + (sourceServer == null ? sourceHistoricalName : targetHistoricalName) ); continue; } From e4fe4a542948cb2b8e2148680eb222ab8a84903a Mon Sep 17 00:00:00 2001 From: Adarsh Sanjeev Date: Tue, 8 Apr 2025 10:55:14 +0530 Subject: [PATCH 21/24] Change mapping --- docs/configuration/index.md | 3 +-- .../server/coordinator/CoordinatorDynamicConfig.java | 11 +++++------ .../server/coordinator/duty/CloneHistoricals.java | 8 ++++---- .../duty/PrepareBalancerAndLoadQueues.java | 2 +- .../coordinator/simulate/HistoricalCloningTest.java | 10 +++++----- 5 files changed, 16 insertions(+), 18 deletions(-) diff --git a/docs/configuration/index.md b/docs/configuration/index.md index 97ad3b3021d6..18472b152f77 100644 --- a/docs/configuration/index.md +++ b/docs/configuration/index.md @@ -954,8 +954,7 @@ The following table shows the dynamic configuration properties for the Coordinat |`pauseCoordination`|Boolean flag for whether or not the Coordinator should execute its various duties of coordinating the cluster. Setting this to true essentially pauses all coordination work while allowing the API to remain up. Duties that are paused include all classes that implement the `CoordinatorDuty` interface. Such duties include: segment balancing, segment compaction, submitting kill tasks for unused segments (if enabled), logging of used segments in the cluster, marking of newly unused or overshadowed segments, matching and execution of load/drop rules for used segments, unloading segments that are no longer marked as used from Historical servers. An example of when an admin may want to pause coordination would be if they are doing deep storage maintenance on HDFS name nodes with downtime and don't want the Coordinator to be directing Historical nodes to hit the name node with API requests until maintenance is done and the deep store is declared healthy for use again.|false| |`replicateAfterLoadTimeout`|Boolean flag for whether or not additional replication is needed for segments that have failed to load due to the expiry of `druid.coordinator.load.timeout`. If this is set to true, the Coordinator will attempt to replicate the failed segment on a different historical server. This helps improve the segment availability if there are a few slow Historicals in the cluster. However, the slow Historical may still load the segment later and the Coordinator may issue drop requests if the segment is over-replicated.|false| |`turboLoadingNodes`| Experimental. List of Historical servers to place in turbo loading mode. These servers use a larger thread-pool to load segments faster but at the cost of query performance. For servers specified in `turboLoadingNodes`, `druid.coordinator.loadqueuepeon.http.batchSize` is ignored and the coordinator uses the value of the respective `numLoadingThreads` instead.
Please use this config with caution. All servers should eventually be removed from this list once the segment loading on the respective historicals is finished. |none| - -|`cloneServers`| Experimental. Map from source Historical server to target Historical server which should be made a clone of the source. The target Historical does not participate in regular segment assignment or balancing. Instead, the Coordinator mirrors any segment assignment made to the source Historical onto the target Historical, so that the target becomes an exact copy of the source. Segments on the target Historical do not count towards replica counts either. If the source disappears, the target remains in the last known state of the source server until removed from the cloneServers.
Use this config with caution. All servers should eventually be removed from this list once the desired state on the respective Historicals is achieved. |none| +|`cloneServers`| Experimental. Map from target Historical server to source Historical server which should be cloned by the target. The target Historical does not participate in regular segment assignment or balancing. Instead, the Coordinator mirrors any segment assignment made to the source Historical onto the target Historical, so that the target becomes an exact copy of the source. Segments on the target Historical do not count towards replica counts either. If the source disappears, the target remains in the last known state of the source server until removed from the cloneServers.
Use this config with caution. All servers should eventually be removed from this list once the desired state on the respective Historicals is achieved. |none| ##### Smart segment loading diff --git a/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorDynamicConfig.java b/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorDynamicConfig.java index 8d807fdbbcfc..8a219484acf1 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorDynamicConfig.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorDynamicConfig.java @@ -326,12 +326,11 @@ public boolean getReplicateAfterLoadTimeout() } /** - * Map from source Historical server to target Historical server which should be made a clone of the source. The - * target Historical does not participate in regular segment assignment or balancing. Instead, the Coordinator - * mirrors any segment assignment made to the source Historical onto the target Historical, so that the target - * becomes an exact copy of the source. Segments on the target Historical do not count towards replica counts either. - * If the source disappears, the target remains in the last known state of the source server until removed from the - * cloneServers. + * Map from target Historical server to source Historical server which should be cloned by the target. The target + * Historical does not participate in regular segment assignment or balancing. Instead, the Coordinator mirrors any + * segment assignment made to the source Historical onto the target Historical, so that the target becomes an exact + * copy of the source. Segments on the target Historical do not count towards replica counts either. If the source + * disappears, the target remains in the last known state of the source server until removed from the cloneServers. */ @JsonProperty public Map getCloneServers() diff --git a/server/src/main/java/org/apache/druid/server/coordinator/duty/CloneHistoricals.java b/server/src/main/java/org/apache/druid/server/coordinator/duty/CloneHistoricals.java index d2159658f85f..25534f1ddfbd 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/duty/CloneHistoricals.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/duty/CloneHistoricals.java @@ -74,12 +74,12 @@ public DruidCoordinatorRuntimeParams run(DruidCoordinatorRuntimeParams params) )); for (Map.Entry entry : cloneServers.entrySet()) { - final String sourceHistoricalName = entry.getKey(); - final ServerHolder sourceServer = historicalMap.get(sourceHistoricalName); - - final String targetHistoricalName = entry.getValue(); + final String targetHistoricalName = entry.getKey(); final ServerHolder targetServer = historicalMap.get(targetHistoricalName); + final String sourceHistoricalName = entry.getValue(); + final ServerHolder sourceServer = historicalMap.get(sourceHistoricalName); + if (sourceServer == null || targetServer == null) { log.error( "Could not process clone mapping[%s] as historical[%s] does not exist.", diff --git a/server/src/main/java/org/apache/druid/server/coordinator/duty/PrepareBalancerAndLoadQueues.java b/server/src/main/java/org/apache/druid/server/coordinator/duty/PrepareBalancerAndLoadQueues.java index a34614ee9446..4baf40e16df9 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/duty/PrepareBalancerAndLoadQueues.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/duty/PrepareBalancerAndLoadQueues.java @@ -153,7 +153,7 @@ private DruidCluster prepareCluster( ) { final Set decommissioningServers = dynamicConfig.getDecommissioningNodes(); - final Set unmanagedServers = new HashSet<>(dynamicConfig.getCloneServers().values()); + final Set unmanagedServers = new HashSet<>(dynamicConfig.getCloneServers().keySet()); final DruidCluster.Builder cluster = DruidCluster.builder(); for (ImmutableDruidServer server : currentServers) { cluster.add( diff --git a/server/src/test/java/org/apache/druid/server/coordinator/simulate/HistoricalCloningTest.java b/server/src/test/java/org/apache/druid/server/coordinator/simulate/HistoricalCloningTest.java index 69c6b8f6a74c..60be0e1496df 100644 --- a/server/src/test/java/org/apache/druid/server/coordinator/simulate/HistoricalCloningTest.java +++ b/server/src/test/java/org/apache/druid/server/coordinator/simulate/HistoricalCloningTest.java @@ -57,7 +57,7 @@ public void testSimpleCloning() .withRules(datasource, Load.on(Tier.T1, 1).forever()) .withDynamicConfig( CoordinatorDynamicConfig.builder() - .withCloneServers(Map.of(historicalT11.getHost(), historicalT12.getHost())) + .withCloneServers(Map.of(historicalT12.getHost(), historicalT11.getHost())) .withSmartSegmentLoading(true) .build() ) @@ -102,7 +102,7 @@ public void testAddingNewHistorical() .withRules(datasource, Load.on(Tier.T1, 1).forever()) .withDynamicConfig( CoordinatorDynamicConfig.builder() - .withCloneServers(Map.of(historicalT11.getHost(), historicalT12.getHost())) + .withCloneServers(Map.of(historicalT12.getHost(), historicalT11.getHost())) .withSmartSegmentLoading(true) .build() ) @@ -151,7 +151,7 @@ public void testCloningServerDisappearsAndRelaunched() .withRules(datasource, Load.on(Tier.T1, 2).forever()) .withDynamicConfig( CoordinatorDynamicConfig.builder() - .withCloneServers(Map.of(historicalT11.getHost(), historicalT12.getHost())) + .withCloneServers(Map.of(historicalT12.getHost(), historicalT11.getHost())) .withSmartSegmentLoading(true) .build() ) @@ -212,7 +212,7 @@ public void testClonedServerDoesNotFollowReplicationLimit() .withRules(datasource, Load.on(Tier.T1, 1).forever()) .withDynamicConfig( CoordinatorDynamicConfig.builder() - .withCloneServers(Map.of(historicalT11.getHost(), historicalT12.getHost())) + .withCloneServers(Map.of(historicalT12.getHost(), historicalT11.getHost())) .withSmartSegmentLoading(true) .withReplicationThrottleLimit(2) .build() @@ -261,7 +261,7 @@ public void testCloningHistoricalWithReplicationLimit() .withImmediateSegmentLoading(true) .withDynamicConfig( CoordinatorDynamicConfig.builder() - .withCloneServers(Map.of(historicalT11.getHost(), historicalT12.getHost())) + .withCloneServers(Map.of(historicalT12.getHost(), historicalT11.getHost())) .withSmartSegmentLoading(false) .withReplicationThrottleLimit(2) .withMaxSegmentsToMove(0) From 727d1dd6a68e02337e6b50d9f6588ae7fd57a179 Mon Sep 17 00:00:00 2001 From: Adarsh Sanjeev Date: Tue, 8 Apr 2025 11:49:51 +0530 Subject: [PATCH 22/24] Refactor DruidCluster --- .../server/coordinator/DruidCluster.java | 38 +++++++++++++++++++ .../server/coordinator/ServerHolder.java | 2 + .../balancer/TierSegmentBalancer.java | 4 +- .../coordinator/duty/BalanceSegments.java | 4 +- .../MarkOvershadowedSegmentsAsUnused.java | 2 +- .../duty/PrepareBalancerAndLoadQueues.java | 11 +++++- .../duty/UnloadUnusedSegments.java | 2 +- .../loading/RoundRobinServerSelector.java | 2 +- .../loading/SegmentReplicaCountMap.java | 6 +-- .../loading/StrategicSegmentAssigner.java | 9 ++--- .../druid/server/coordinator/stats/Stats.java | 2 + 11 files changed, 63 insertions(+), 19 deletions(-) diff --git a/server/src/main/java/org/apache/druid/server/coordinator/DruidCluster.java b/server/src/main/java/org/apache/druid/server/coordinator/DruidCluster.java index da0b8e8b2e70..6fc6fe2dc95f 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/DruidCluster.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/DruidCluster.java @@ -33,6 +33,7 @@ import java.util.Map; import java.util.NavigableSet; import java.util.Set; +import java.util.stream.Collectors; /** * Contains a representation of the current state of the cluster by tier. @@ -44,8 +45,10 @@ public class DruidCluster private final Set realtimes; private final Map> historicals; + private final Map> managedHistoricals; private final Set brokers; private final List allServers; + private final List allManagedServers; private DruidCluster( Set realtimes, @@ -58,8 +61,17 @@ private DruidCluster( historicals, holders -> CollectionUtils.newTreeSet(Comparator.naturalOrder(), holders) ); + this.managedHistoricals = CollectionUtils.mapValues( + historicals, + holders -> CollectionUtils.newTreeSet(Comparator.naturalOrder(), + holders.stream() + .filter(serverHolder -> !serverHolder.isUnmanaged()) + .collect(Collectors.toList()) + ) + ); this.brokers = Collections.unmodifiableSet(brokers); this.allServers = initAllServers(); + this.allManagedServers = initAllManagedServers(); } public Set getRealtimes() @@ -72,6 +84,11 @@ public Map> getHistoricals() return historicals; } + public Map> getManagedHistoricals() + { + return managedHistoricals; + } + public Set getBrokers() { return brokers; @@ -92,6 +109,11 @@ public List getAllServers() return allServers; } + public List getAllManagedServers() + { + return allManagedServers; + } + private List initAllServers() { final int historicalSize = historicals.values().stream().mapToInt(Collection::size).sum(); @@ -104,6 +126,22 @@ private List initAllServers() return allServers; } + private List initAllManagedServers() + { + final int historicalSize = historicals.values().stream().mapToInt(Collection::size).sum(); + final int realtimeSize = realtimes.size(); + final List allManagedServers = new ArrayList<>(historicalSize + realtimeSize); + + historicals.values() + .stream() + .flatMap(Collection::stream) + .filter(serverHolder -> !serverHolder.isUnmanaged()) + .forEach(allManagedServers::add); + allManagedServers.addAll(brokers); + allManagedServers.addAll(realtimes); + return allManagedServers; + } + public boolean isEmpty() { return historicals.isEmpty() && realtimes.isEmpty() && brokers.isEmpty(); diff --git a/server/src/main/java/org/apache/druid/server/coordinator/ServerHolder.java b/server/src/main/java/org/apache/druid/server/coordinator/ServerHolder.java index 5772cfea7143..c7846d56f046 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/ServerHolder.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/ServerHolder.java @@ -105,6 +105,8 @@ public ServerHolder( * @param server Underlying Druid server * @param peon Load queue peon for this server * @param isDecommissioning Whether the server is decommissioning + * @param isUnmanaged Whether this server is unmanaged and should not participate in segment assignment, + * drop or balancing. * @param maxSegmentsInLoadQueue Max number of segments that can be present in * the load queue at any point. If this is 0, the * load queue can have an unlimited number of segments. diff --git a/server/src/main/java/org/apache/druid/server/coordinator/balancer/TierSegmentBalancer.java b/server/src/main/java/org/apache/druid/server/coordinator/balancer/TierSegmentBalancer.java index 180220eb5fe6..9643a547c991 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/balancer/TierSegmentBalancer.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/balancer/TierSegmentBalancer.java @@ -75,9 +75,7 @@ public TierSegmentBalancer( this.runStats = params.getCoordinatorStats(); Map> partitions = - servers.stream() - .filter(s -> !s.isUnmanaged()) - .collect(Collectors.partitioningBy(ServerHolder::isDecommissioning)); + servers.stream().collect(Collectors.partitioningBy(ServerHolder::isDecommissioning)); this.decommissioningServers = partitions.get(true); this.activeServers = partitions.get(false); diff --git a/server/src/main/java/org/apache/druid/server/coordinator/duty/BalanceSegments.java b/server/src/main/java/org/apache/druid/server/coordinator/duty/BalanceSegments.java index c27f95a002de..e3791c9dc233 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/duty/BalanceSegments.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/duty/BalanceSegments.java @@ -60,7 +60,7 @@ public DruidCoordinatorRuntimeParams run(DruidCoordinatorRuntimeParams params) return params; } - params.getDruidCluster().getHistoricals().forEach( + params.getDruidCluster().getManagedHistoricals().forEach( (tier, servers) -> new TierSegmentBalancer(tier, servers, maxSegmentsToMove, params).run() ); @@ -113,7 +113,7 @@ private Pair getNumHistoricalsAndSegments(DruidCluster cluster int numHistoricals = 0; int numSegments = 0; - for (Set historicals : cluster.getHistoricals().values()) { + for (Set historicals : cluster.getManagedHistoricals().values()) { for (ServerHolder historical : historicals) { ++numHistoricals; numSegments += historical.getServer().getNumSegments() + historical.getNumQueuedSegments(); diff --git a/server/src/main/java/org/apache/druid/server/coordinator/duty/MarkOvershadowedSegmentsAsUnused.java b/server/src/main/java/org/apache/druid/server/coordinator/duty/MarkOvershadowedSegmentsAsUnused.java index 596f9df7017f..dba11d2c2901 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/duty/MarkOvershadowedSegmentsAsUnused.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/duty/MarkOvershadowedSegmentsAsUnused.java @@ -83,7 +83,7 @@ public DruidCoordinatorRuntimeParams run(DruidCoordinatorRuntimeParams params) final DruidCluster cluster = params.getDruidCluster(); final Map timelines = new HashMap<>(); - cluster.getHistoricals().values().forEach( + cluster.getManagedHistoricals().values().forEach( historicals -> historicals.forEach( historical -> addSegmentsFromServer(historical, timelines) ) diff --git a/server/src/main/java/org/apache/druid/server/coordinator/duty/PrepareBalancerAndLoadQueues.java b/server/src/main/java/org/apache/druid/server/coordinator/duty/PrepareBalancerAndLoadQueues.java index 4baf40e16df9..cac81701be9f 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/duty/PrepareBalancerAndLoadQueues.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/duty/PrepareBalancerAndLoadQueues.java @@ -176,7 +176,16 @@ private void collectHistoricalStats(DruidCluster cluster, CoordinatorRunStats st RowKey rowKey = RowKey.of(Dimension.TIER, tier); stats.add(Stats.Tier.HISTORICAL_COUNT, rowKey, historicals.size()); - long totalCapacity = historicals.stream().mapToLong(ServerHolder::getMaxSize).sum(); + long totalCapacity = 0; + long cloneCount = 0; + for (ServerHolder holder : historicals) { + if (holder.isUnmanaged()) { + cloneCount += 1; + } else { + totalCapacity += holder.getMaxSize(); + } + } + stats.add(Stats.Tier.CLONE_COUNT, rowKey, cloneCount); stats.add(Stats.Tier.TOTAL_CAPACITY, rowKey, totalCapacity); }); } diff --git a/server/src/main/java/org/apache/druid/server/coordinator/duty/UnloadUnusedSegments.java b/server/src/main/java/org/apache/druid/server/coordinator/duty/UnloadUnusedSegments.java index 2bd9fd295481..761e3383ede1 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/duty/UnloadUnusedSegments.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/duty/UnloadUnusedSegments.java @@ -62,7 +62,7 @@ public DruidCoordinatorRuntimeParams run(DruidCoordinatorRuntimeParams params) broadcastStatusByDatasource.put(broadcastDatasource, true); } - final List allServers = params.getDruidCluster().getAllServers(); + final List allServers = params.getDruidCluster().getAllManagedServers(); int numCancelledLoads = allServers.stream().mapToInt( server -> cancelLoadOfUnusedSegments(server, broadcastStatusByDatasource, params) ).sum(); diff --git a/server/src/main/java/org/apache/druid/server/coordinator/loading/RoundRobinServerSelector.java b/server/src/main/java/org/apache/druid/server/coordinator/loading/RoundRobinServerSelector.java index 1f9307cae56e..cc007449b578 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/loading/RoundRobinServerSelector.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/loading/RoundRobinServerSelector.java @@ -51,7 +51,7 @@ public class RoundRobinServerSelector public RoundRobinServerSelector(DruidCluster cluster) { - cluster.getHistoricals().forEach( + cluster.getManagedHistoricals().forEach( (tier, servers) -> tierToServers.put(tier, new CircularServerList(servers)) ); } diff --git a/server/src/main/java/org/apache/druid/server/coordinator/loading/SegmentReplicaCountMap.java b/server/src/main/java/org/apache/druid/server/coordinator/loading/SegmentReplicaCountMap.java index 86a4b9a8a4ae..241759f8b951 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/loading/SegmentReplicaCountMap.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/loading/SegmentReplicaCountMap.java @@ -46,13 +46,9 @@ static SegmentReplicaCountMap create(DruidCluster cluster) private void initReplicaCounts(DruidCluster cluster) { - cluster.getHistoricals().forEach( + cluster.getManagedHistoricals().forEach( (tier, historicals) -> historicals.forEach( serverHolder -> { - if (serverHolder.isUnmanaged()) { - // Don't count segments on unmanaged historicals towards replica counts. - return; - } // Add segments already loaded on this server for (DataSegment segment : serverHolder.getServedSegments()) { computeIfAbsent(segment.getId(), tier).incrementLoaded(); diff --git a/server/src/main/java/org/apache/druid/server/coordinator/loading/StrategicSegmentAssigner.java b/server/src/main/java/org/apache/druid/server/coordinator/loading/StrategicSegmentAssigner.java index 75febd060934..6049d914a134 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/loading/StrategicSegmentAssigner.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/loading/StrategicSegmentAssigner.java @@ -88,7 +88,7 @@ public StrategicSegmentAssigner( this.useRoundRobinAssignment = loadingConfig.isUseRoundRobinSegmentAssignment(); this.serverSelector = useRoundRobinAssignment ? new RoundRobinServerSelector(cluster) : null; - cluster.getHistoricals().forEach( + cluster.getManagedHistoricals().forEach( (tier, historicals) -> tierToHistoricalCount.put(tier, historicals.size()) ); } @@ -326,7 +326,7 @@ private void reportTierCapacityStats(DataSegment segment, int requiredReplicas, public void broadcastSegment(DataSegment segment) { final Object2IntOpenHashMap tierToRequiredReplicas = new Object2IntOpenHashMap<>(); - for (ServerHolder server : cluster.getAllServers()) { + for (ServerHolder server : cluster.getAllManagedServers()) { // Ignore servers which are not broadcast targets if (!server.getServer().getType().isSegmentBroadcastTarget()) { continue; @@ -446,8 +446,7 @@ private int dropReplicas( for (ServerHolder server : eligibleServers) { if (server.isDecommissioning()) { eligibleDyingServers.add(server); - } else if (!server.isUnmanaged()) { - // Do not assign or drop segments if the server is unmanaged + } else { eligibleLiveServers.add(server); } } @@ -578,7 +577,7 @@ private static ReplicationThrottler createReplicationThrottler( { final Map tierToLoadingReplicaCount = new HashMap<>(); - cluster.getHistoricals().forEach( + cluster.getManagedHistoricals().forEach( (tier, historicals) -> { int numLoadingReplicas = historicals.stream().mapToInt(ServerHolder::getNumLoadingReplicas).sum(); tierToLoadingReplicaCount.put(tier, numLoadingReplicas); diff --git a/server/src/main/java/org/apache/druid/server/coordinator/stats/Stats.java b/server/src/main/java/org/apache/druid/server/coordinator/stats/Stats.java index ec152488fc33..f13eaded408e 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/stats/Stats.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/stats/Stats.java @@ -104,6 +104,8 @@ public static class Tier = CoordinatorStat.toDebugAndEmit("maxRepFactor", "tier/replication/factor"); public static final CoordinatorStat HISTORICAL_COUNT = CoordinatorStat.toDebugAndEmit("numHistorical", "tier/historical/count"); + public static final CoordinatorStat CLONE_COUNT + = CoordinatorStat.toDebugAndEmit("numHistorical", "tier/historical/clone/count"); } public static class Compaction From a60602f4e0d5c41edc13ce87a3112e1410d523e1 Mon Sep 17 00:00:00 2001 From: Adarsh Sanjeev Date: Tue, 8 Apr 2025 12:16:09 +0530 Subject: [PATCH 23/24] Fix spelling --- docs/configuration/index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/configuration/index.md b/docs/configuration/index.md index 18472b152f77..328ab3508251 100644 --- a/docs/configuration/index.md +++ b/docs/configuration/index.md @@ -954,7 +954,7 @@ The following table shows the dynamic configuration properties for the Coordinat |`pauseCoordination`|Boolean flag for whether or not the Coordinator should execute its various duties of coordinating the cluster. Setting this to true essentially pauses all coordination work while allowing the API to remain up. Duties that are paused include all classes that implement the `CoordinatorDuty` interface. Such duties include: segment balancing, segment compaction, submitting kill tasks for unused segments (if enabled), logging of used segments in the cluster, marking of newly unused or overshadowed segments, matching and execution of load/drop rules for used segments, unloading segments that are no longer marked as used from Historical servers. An example of when an admin may want to pause coordination would be if they are doing deep storage maintenance on HDFS name nodes with downtime and don't want the Coordinator to be directing Historical nodes to hit the name node with API requests until maintenance is done and the deep store is declared healthy for use again.|false| |`replicateAfterLoadTimeout`|Boolean flag for whether or not additional replication is needed for segments that have failed to load due to the expiry of `druid.coordinator.load.timeout`. If this is set to true, the Coordinator will attempt to replicate the failed segment on a different historical server. This helps improve the segment availability if there are a few slow Historicals in the cluster. However, the slow Historical may still load the segment later and the Coordinator may issue drop requests if the segment is over-replicated.|false| |`turboLoadingNodes`| Experimental. List of Historical servers to place in turbo loading mode. These servers use a larger thread-pool to load segments faster but at the cost of query performance. For servers specified in `turboLoadingNodes`, `druid.coordinator.loadqueuepeon.http.batchSize` is ignored and the coordinator uses the value of the respective `numLoadingThreads` instead.
Please use this config with caution. All servers should eventually be removed from this list once the segment loading on the respective historicals is finished. |none| -|`cloneServers`| Experimental. Map from target Historical server to source Historical server which should be cloned by the target. The target Historical does not participate in regular segment assignment or balancing. Instead, the Coordinator mirrors any segment assignment made to the source Historical onto the target Historical, so that the target becomes an exact copy of the source. Segments on the target Historical do not count towards replica counts either. If the source disappears, the target remains in the last known state of the source server until removed from the cloneServers.
Use this config with caution. All servers should eventually be removed from this list once the desired state on the respective Historicals is achieved. |none| +|`cloneServers`| Experimental. Map from target Historical server to source Historical server which should be cloned by the target. The target Historical does not participate in regular segment assignment or balancing. Instead, the Coordinator mirrors any segment assignment made to the source Historical onto the target Historical, so that the target becomes an exact copy of the source. Segments on the target Historical do not count towards replica counts either. If the source disappears, the target remains in the last known state of the source server until removed from the configuration.
Use this config with caution. All servers should eventually be removed from this list once the desired state on the respective Historicals is achieved. |none| ##### Smart segment loading From e21d563a246ebbc59d4f39921f88bb4fccba2b0c Mon Sep 17 00:00:00 2001 From: Adarsh Sanjeev Date: Tue, 8 Apr 2025 20:45:02 +0530 Subject: [PATCH 24/24] Refactor --- .../server/coordinator/DruidCluster.java | 50 +++++++------------ .../server/coordinator/ServerHolder.java | 1 - .../duty/PrepareBalancerAndLoadQueues.java | 2 +- .../loading/StrategicSegmentAssigner.java | 2 +- .../druid/server/coordinator/stats/Stats.java | 2 +- .../server/coordinator/DruidClusterTest.java | 4 +- 6 files changed, 23 insertions(+), 38 deletions(-) diff --git a/server/src/main/java/org/apache/druid/server/coordinator/DruidCluster.java b/server/src/main/java/org/apache/druid/server/coordinator/DruidCluster.java index 6fc6fe2dc95f..5e3bcfd31de5 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/DruidCluster.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/DruidCluster.java @@ -47,7 +47,6 @@ public class DruidCluster private final Map> historicals; private final Map> managedHistoricals; private final Set brokers; - private final List allServers; private final List allManagedServers; private DruidCluster( @@ -63,14 +62,15 @@ private DruidCluster( ); this.managedHistoricals = CollectionUtils.mapValues( historicals, - holders -> CollectionUtils.newTreeSet(Comparator.naturalOrder(), - holders.stream() - .filter(serverHolder -> !serverHolder.isUnmanaged()) - .collect(Collectors.toList()) - ) + holders -> { + List managedServers = holders.stream() + .filter(serverHolder -> !serverHolder.isUnmanaged()) + .collect(Collectors.toList()); + + return CollectionUtils.newTreeSet(Comparator.naturalOrder(), managedServers); + } ); this.brokers = Collections.unmodifiableSet(brokers); - this.allServers = initAllServers(); this.allManagedServers = initAllManagedServers(); } @@ -79,11 +79,18 @@ public Set getRealtimes() return realtimes; } + /** + * Return all historicals. + */ public Map> getHistoricals() { return historicals; } + /** + * Returns all managed historicals. Managed historicals are historicals which can participate in segment assignment, + * drop or balancing. + */ public Map> getManagedHistoricals() { return managedHistoricals; @@ -99,14 +106,9 @@ public Iterable getTierNames() return historicals.keySet(); } - public NavigableSet getHistoricalsByTier(String tier) + public NavigableSet getManagedHistoricalsByTier(String tier) { - return historicals.get(tier); - } - - public List getAllServers() - { - return allServers; + return managedHistoricals.get(tier); } public List getAllManagedServers() @@ -114,29 +116,13 @@ public List getAllManagedServers() return allManagedServers; } - private List initAllServers() - { - final int historicalSize = historicals.values().stream().mapToInt(Collection::size).sum(); - final int realtimeSize = realtimes.size(); - final List allServers = new ArrayList<>(historicalSize + realtimeSize); - - historicals.values().forEach(allServers::addAll); - allServers.addAll(brokers); - allServers.addAll(realtimes); - return allServers; - } - private List initAllManagedServers() { - final int historicalSize = historicals.values().stream().mapToInt(Collection::size).sum(); + final int historicalSize = managedHistoricals.values().stream().mapToInt(Collection::size).sum(); final int realtimeSize = realtimes.size(); final List allManagedServers = new ArrayList<>(historicalSize + realtimeSize); - historicals.values() - .stream() - .flatMap(Collection::stream) - .filter(serverHolder -> !serverHolder.isUnmanaged()) - .forEach(allManagedServers::add); + managedHistoricals.values().forEach(allManagedServers::addAll); allManagedServers.addAll(brokers); allManagedServers.addAll(realtimes); return allManagedServers; diff --git a/server/src/main/java/org/apache/druid/server/coordinator/ServerHolder.java b/server/src/main/java/org/apache/druid/server/coordinator/ServerHolder.java index c7846d56f046..b4a924dbf2c9 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/ServerHolder.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/ServerHolder.java @@ -264,7 +264,6 @@ public long getAvailableSize() public boolean canLoadSegment(DataSegment segment) { return !isDecommissioning - && !isUnmanaged && !hasSegmentLoaded(segment.getId()) && getActionOnSegment(segment) == null && totalAssignmentsInRun < maxAssignmentsInRun diff --git a/server/src/main/java/org/apache/druid/server/coordinator/duty/PrepareBalancerAndLoadQueues.java b/server/src/main/java/org/apache/druid/server/coordinator/duty/PrepareBalancerAndLoadQueues.java index cac81701be9f..8e99d15c07ef 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/duty/PrepareBalancerAndLoadQueues.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/duty/PrepareBalancerAndLoadQueues.java @@ -120,7 +120,7 @@ private void cancelLoadsOnDecommissioningServers(DruidCluster cluster) { final AtomicInteger cancelledCount = new AtomicInteger(0); final List decommissioningServers - = cluster.getAllServers().stream() + = cluster.getAllManagedServers().stream() .filter(ServerHolder::isDecommissioning) .collect(Collectors.toList()); diff --git a/server/src/main/java/org/apache/druid/server/coordinator/loading/StrategicSegmentAssigner.java b/server/src/main/java/org/apache/druid/server/coordinator/loading/StrategicSegmentAssigner.java index 6049d914a134..654fe42b220b 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/loading/StrategicSegmentAssigner.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/loading/StrategicSegmentAssigner.java @@ -275,7 +275,7 @@ private int updateReplicasInTier( } final SegmentStatusInTier segmentStatus = - new SegmentStatusInTier(segment, cluster.getHistoricalsByTier(tier)); + new SegmentStatusInTier(segment, cluster.getManagedHistoricalsByTier(tier)); // Cancel all moves in this tier if it does not need to have replicas if (shouldCancelMoves) { diff --git a/server/src/main/java/org/apache/druid/server/coordinator/stats/Stats.java b/server/src/main/java/org/apache/druid/server/coordinator/stats/Stats.java index f13eaded408e..0571245c9853 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/stats/Stats.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/stats/Stats.java @@ -105,7 +105,7 @@ public static class Tier public static final CoordinatorStat HISTORICAL_COUNT = CoordinatorStat.toDebugAndEmit("numHistorical", "tier/historical/count"); public static final CoordinatorStat CLONE_COUNT - = CoordinatorStat.toDebugAndEmit("numHistorical", "tier/historical/clone/count"); + = CoordinatorStat.toDebugAndEmit("numClones", "tier/historical/clone/count"); } public static class Compaction diff --git a/server/src/test/java/org/apache/druid/server/coordinator/DruidClusterTest.java b/server/src/test/java/org/apache/druid/server/coordinator/DruidClusterTest.java index 17a4de1d73fb..d47cf2fb7419 100644 --- a/server/src/test/java/org/apache/druid/server/coordinator/DruidClusterTest.java +++ b/server/src/test/java/org/apache/druid/server/coordinator/DruidClusterTest.java @@ -98,7 +98,7 @@ public void testAdd() } @Test - public void testGetAllServers() + public void testGetAllManagedServers() { clusterBuilder.add(NEW_REALTIME); clusterBuilder.add(NEW_HISTORICAL); @@ -107,7 +107,7 @@ public void testGetAllServers() final Set expectedRealtimes = cluster.getRealtimes(); final Map> expectedHistoricals = cluster.getHistoricals(); - final Collection allServers = cluster.getAllServers(); + final Collection allServers = cluster.getAllManagedServers(); Assert.assertEquals(4, allServers.size()); Assert.assertTrue(allServers.containsAll(cluster.getRealtimes())); Assert.assertTrue(