From 950b03f5c37416272ac144835ee76fb9a70915ae Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Wed, 27 Feb 2019 15:44:54 -0800 Subject: [PATCH 01/14] rename maintenance mode to decommission --- docs/content/configuration/index.md | 8 +- .../coordinator/CoordinatorDynamicConfig.java | 89 ++++++++++--------- .../server/coordinator/DruidCoordinator.java | 4 +- .../server/coordinator/ServerHolder.java | 16 ++-- .../helper/DruidCoordinatorBalancer.java | 32 +++---- .../rules/BroadcastDistributionRule.java | 2 +- .../server/coordinator/rules/LoadRule.java | 10 +-- .../DruidCoordinatorBalancerTest.java | 34 +++---- .../rules/BroadcastDistributionRuleTest.java | 38 ++++---- .../coordinator/rules/LoadRuleTest.java | 24 ++--- .../http/CoordinatorDynamicConfigTest.java | 30 +++---- 11 files changed, 144 insertions(+), 143 deletions(-) diff --git a/docs/content/configuration/index.md b/docs/content/configuration/index.md index 22dd1aea59cd..bcac1e73590c 100644 --- a/docs/content/configuration/index.md +++ b/docs/content/configuration/index.md @@ -783,8 +783,8 @@ A sample Coordinator dynamic config JSON object is shown below: "replicationThrottleLimit": 10, "emitBalancingStats": false, "killDataSourceWhitelist": ["wikipedia", "testDatasource"], - "historicalNodesInMaintenance": ["localhost:8182", "localhost:8282"], - "nodesInMaintenancePriority": 7 + "decommissionNodes": ["localhost:8182", "localhost:8282"], + "decommissionPriority": 7 } ``` @@ -804,8 +804,8 @@ Issuing a GET request at the same URL will return the spec that is currently in |`killAllDataSources`|Send kill tasks for ALL dataSources if property `druid.coordinator.kill.on` is true. If this is set to true then `killDataSourceWhitelist` must not be specified or be empty list.|false| |`killPendingSegmentsSkipList`|List of dataSources for which pendingSegments are _NOT_ cleaned up if property `druid.coordinator.kill.pendingSegments.on` is true. This can be a list of comma-separated dataSources or a JSON array.|none| |`maxSegmentsInNodeLoadingQueue`|The maximum number of segments that could be queued for loading to any given server. This parameter could be used to speed up segments loading process, especially if there are "slow" nodes in the cluster (with low loading speed) or if too much segments scheduled to be replicated to some particular node (faster loading could be preferred to better segments distribution). Desired value depends on segments loading speed, acceptable replication time and number of nodes. Value 1000 could be a start point for a rather big cluster. Default value is 0 (loading queue is unbounded) |0| -|`historicalNodesInMaintenance`| List of Historical nodes in maintenance mode. Coordinator doesn't assign new segments on those nodes and moves segments from the nodes according to a specified priority.|none| -|`nodesInMaintenancePriority`| Priority of segments from servers in maintenance. Coordinator takes ceil(maxSegmentsToMove * (priority / 10)) from servers in maitenance during balancing phase, i.e.:
0 - no segments from servers in maintenance will be processed during balancing
5 - 50% segments from servers in maintenance
10 - 100% segments from servers in maintenance
By leveraging the priority an operator can prevent general nodes from overload or decrease maitenance time instead.|7| +|`decommissionNodes`| List of 'decommissioned' historical nodes. The Coordinator doesn't assign new segments to these nodes and moves segments from the nodes at the rate specified by `decommissionPriority`.|none| +|`decommissionPriority`| Priority of how many 'move' operations will be spent towards 'decommissioning' servers by moving segments from them to non-decommissioned servers, instead of 'balancing' segments between servers. Coordinator takes ceil(maxSegmentsToMove * (priority / 10)) from servers in maitenance during balancing phase, i.e.:
0 - no segments from decommissioned servers will be processed during balancing
5 - 50% segments from decommissioned servers
10 - 100% segments from decommissioned servers
By leveraging the priority an operator can prevent general nodes from overload or decrease decommissioning time instead.|7| To view the audit history of Coordinator dynamic config issue a GET request to the URL - diff --git a/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorDynamicConfig.java b/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorDynamicConfig.java index de034af5dfc2..f01dcdb69e4e 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorDynamicConfig.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorDynamicConfig.java @@ -56,8 +56,8 @@ public class CoordinatorDynamicConfig private final boolean emitBalancingStats; private final boolean killAllDataSources; private final Set killableDataSources; - private final Set historicalNodesInMaintenance; - private final int nodesInMaintenancePriority; + private final Set decommissionNodes; + private final int decommissionPriority; // The pending segments of the dataSources in this list are not killed. private final Set protectedPendingSegmentDatasources; @@ -88,8 +88,8 @@ public CoordinatorDynamicConfig( @JsonProperty("killAllDataSources") boolean killAllDataSources, @JsonProperty("killPendingSegmentsSkipList") Object protectedPendingSegmentDatasources, @JsonProperty("maxSegmentsInNodeLoadingQueue") int maxSegmentsInNodeLoadingQueue, - @JsonProperty("historicalNodesInMaintenance") Object historicalNodesInMaintenance, - @JsonProperty("nodesInMaintenancePriority") int nodesInMaintenancePriority + @JsonProperty("decommissionNodes") Object decommissionNodes, + @JsonProperty("decommissionPriority") int decommissionPriority ) { this.millisToWaitBeforeDeleting = millisToWaitBeforeDeleting; @@ -104,12 +104,12 @@ public CoordinatorDynamicConfig( this.killableDataSources = parseJsonStringOrArray(killableDataSources); this.protectedPendingSegmentDatasources = parseJsonStringOrArray(protectedPendingSegmentDatasources); this.maxSegmentsInNodeLoadingQueue = maxSegmentsInNodeLoadingQueue; - this.historicalNodesInMaintenance = parseJsonStringOrArray(historicalNodesInMaintenance); + this.decommissionNodes = parseJsonStringOrArray(decommissionNodes); Preconditions.checkArgument( - nodesInMaintenancePriority >= 0 && nodesInMaintenancePriority <= 10, - "nodesInMaintenancePriority should be in range [0, 10]" + decommissionPriority >= 0 && decommissionPriority <= 10, + "decommissionPriority should be in range [0, 10]" ); - this.nodesInMaintenancePriority = nodesInMaintenancePriority; + this.decommissionPriority = decommissionPriority; if (this.killAllDataSources && !this.killableDataSources.isEmpty()) { throw new IAE("can't have killAllDataSources and non-empty killDataSourceWhitelist"); @@ -231,32 +231,33 @@ public int getMaxSegmentsInNodeLoadingQueue() } /** - * Historical nodes list in maintenance mode. Coordinator doesn't assign new segments on those nodes and moves + * List of historical nodes to 'decommission'. Coordinator doesn't assign new segments on those nodes and moves * segments from those nodes according to a specified priority. * * @return list of host:port entries */ @JsonProperty - public Set getHistoricalNodesInMaintenance() + public Set getDecommissionNodes() { - return historicalNodesInMaintenance; + return decommissionNodes; } /** - * Priority of segments from servers in maintenance. Coordinator takes ceil(maxSegmentsToMove * (priority / 10)) - * from servers in maitenance during balancing phase, i.e.: - * 0 - no segments from servers in maintenance will be processed during balancing - * 5 - 50% segments from servers in maintenance - * 10 - 100% segments from servers in maintenance - * By leveraging the priority an operator can prevent general nodes from overload or decrease maitenance time + * Priority of how many of 'move' operations will be spent towards 'decommissioning' servers by moving segments from + * them to non-decommissioned servers, instead of 'balancing' segments between servers. + * Coordinator takes ceil(maxSegmentsToMove * (priority / 10)) from servers in maitenance during balancing phase: + * 0 - no segments from decommissioned servers will be processed during balancing + * 5 - 50% segments from decommissioned servers + * 10 - 100% segments from decommissioned servers + * By leveraging the priority an operator can prevent general nodes from overload or decrease 'decommissioning' time * instead. * * @return number in range [0, 10] */ @JsonProperty - public int getNodesInMaintenancePriority() + public int getDecommissionPriority() { - return nodesInMaintenancePriority; + return decommissionPriority; } @Override @@ -275,8 +276,8 @@ public String toString() ", killDataSourceWhitelist=" + killableDataSources + ", protectedPendingSegmentDatasources=" + protectedPendingSegmentDatasources + ", maxSegmentsInNodeLoadingQueue=" + maxSegmentsInNodeLoadingQueue + - ", historicalNodesInMaintenance=" + historicalNodesInMaintenance + - ", nodesInMaintenancePriority=" + nodesInMaintenancePriority + + ", decommissionNodes=" + decommissionNodes + + ", decommissionPriority=" + decommissionPriority + '}'; } @@ -328,10 +329,10 @@ public boolean equals(Object o) if (!Objects.equals(protectedPendingSegmentDatasources, that.protectedPendingSegmentDatasources)) { return false; } - if (!Objects.equals(historicalNodesInMaintenance, that.historicalNodesInMaintenance)) { + if (!Objects.equals(decommissionNodes, that.decommissionNodes)) { return false; } - return nodesInMaintenancePriority == that.nodesInMaintenancePriority; + return decommissionPriority == that.decommissionPriority; } @Override @@ -350,8 +351,8 @@ public int hashCode() maxSegmentsInNodeLoadingQueue, killableDataSources, protectedPendingSegmentDatasources, - historicalNodesInMaintenance, - nodesInMaintenancePriority + decommissionNodes, + decommissionPriority ); } @@ -372,7 +373,7 @@ public static class Builder private static final boolean DEFAULT_EMIT_BALANCING_STATS = false; private static final boolean DEFAULT_KILL_ALL_DATA_SOURCES = false; private static final int DEFAULT_MAX_SEGMENTS_IN_NODE_LOADING_QUEUE = 0; - private static final int DEFAULT_MAINTENANCE_MODE_SEGMENTS_PRIORITY = 7; + private static final int DEFAULT_DECOMMISSIONING_PRIORITY = 7; private Long millisToWaitBeforeDeleting; private Long mergeBytesLimit; @@ -386,8 +387,8 @@ public static class Builder private Boolean killAllDataSources; private Object killPendingSegmentsSkipList; private Integer maxSegmentsInNodeLoadingQueue; - private Object maintenanceList; - private Integer maintenanceModeSegmentsPriority; + private Object decommissionNodes; + private Integer decommissionPriority; public Builder() { @@ -407,8 +408,8 @@ public Builder( @JsonProperty("killAllDataSources") @Nullable Boolean killAllDataSources, @JsonProperty("killPendingSegmentsSkipList") @Nullable Object killPendingSegmentsSkipList, @JsonProperty("maxSegmentsInNodeLoadingQueue") @Nullable Integer maxSegmentsInNodeLoadingQueue, - @JsonProperty("historicalNodesInMaintenance") @Nullable Object maintenanceList, - @JsonProperty("nodesInMaintenancePriority") @Nullable Integer maintenanceModeSegmentsPriority + @JsonProperty("decommissionNodes") @Nullable Object decommissionNodes, + @JsonProperty("decommissionPriority") @Nullable Integer decommissionPriority ) { this.millisToWaitBeforeDeleting = millisToWaitBeforeDeleting; @@ -423,8 +424,8 @@ public Builder( this.killableDataSources = killableDataSources; this.killPendingSegmentsSkipList = killPendingSegmentsSkipList; this.maxSegmentsInNodeLoadingQueue = maxSegmentsInNodeLoadingQueue; - this.maintenanceList = maintenanceList; - this.maintenanceModeSegmentsPriority = maintenanceModeSegmentsPriority; + this.decommissionNodes = decommissionNodes; + this.decommissionPriority = decommissionPriority; } public Builder withMillisToWaitBeforeDeleting(long millisToWaitBeforeDeleting) @@ -493,15 +494,15 @@ public Builder withMaxSegmentsInNodeLoadingQueue(int maxSegmentsInNodeLoadingQue return this; } - public Builder withMaintenanceList(Set list) + public Builder withDecommissionNodes(Set decommissioned) { - this.maintenanceList = list; + this.decommissionNodes = decommissioned; return this; } - public Builder withMaintenanceModeSegmentsPriority(Integer priority) + public Builder withDecommissionPriority(Integer priority) { - this.maintenanceModeSegmentsPriority = priority; + this.decommissionPriority = priority; return this; } @@ -522,10 +523,10 @@ public CoordinatorDynamicConfig build() maxSegmentsInNodeLoadingQueue == null ? DEFAULT_MAX_SEGMENTS_IN_NODE_LOADING_QUEUE : maxSegmentsInNodeLoadingQueue, - maintenanceList, - maintenanceModeSegmentsPriority == null - ? DEFAULT_MAINTENANCE_MODE_SEGMENTS_PRIORITY - : maintenanceModeSegmentsPriority + decommissionNodes, + decommissionPriority == null + ? DEFAULT_DECOMMISSIONING_PRIORITY + : decommissionPriority ); } @@ -548,10 +549,10 @@ public CoordinatorDynamicConfig build(CoordinatorDynamicConfig defaults) maxSegmentsInNodeLoadingQueue == null ? defaults.getMaxSegmentsInNodeLoadingQueue() : maxSegmentsInNodeLoadingQueue, - maintenanceList == null ? defaults.getHistoricalNodesInMaintenance() : maintenanceList, - maintenanceModeSegmentsPriority == null - ? defaults.getNodesInMaintenancePriority() - : maintenanceModeSegmentsPriority + decommissionNodes == null ? defaults.getDecommissionNodes() : decommissionNodes, + decommissionPriority == null + ? defaults.getDecommissionPriority() + : decommissionPriority ); } } diff --git a/server/src/main/java/org/apache/druid/server/coordinator/DruidCoordinator.java b/server/src/main/java/org/apache/druid/server/coordinator/DruidCoordinator.java index b92effc8651f..9653ca6f2881 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/DruidCoordinator.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/DruidCoordinator.java @@ -694,7 +694,7 @@ public CoordinatorHistoricalManagerRunnable(final int startingLeaderCounter) } // Find all historical servers, group them by subType and sort by ascending usage - Set nodesInMaintenance = params.getCoordinatorDynamicConfig().getHistoricalNodesInMaintenance(); + Set decommissioned = params.getCoordinatorDynamicConfig().getDecommissionNodes(); final DruidCluster cluster = new DruidCluster(); for (ImmutableDruidServer server : servers) { if (!loadManagementPeons.containsKey(server.getName())) { @@ -709,7 +709,7 @@ public CoordinatorHistoricalManagerRunnable(final int startingLeaderCounter) new ServerHolder( server, loadManagementPeons.get(server.getName()), - nodesInMaintenance.contains(server.getHost()) + decommissioned.contains(server.getHost()) ) ); } diff --git a/server/src/main/java/org/apache/druid/server/coordinator/ServerHolder.java b/server/src/main/java/org/apache/druid/server/coordinator/ServerHolder.java index c7d7a86c825c..c9fc4c188a86 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/ServerHolder.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/ServerHolder.java @@ -32,18 +32,18 @@ public class ServerHolder implements Comparable private static final Logger log = new Logger(ServerHolder.class); private final ImmutableDruidServer server; private final LoadQueuePeon peon; - private final boolean inMaintenance; + private final boolean isDecommissioned; public ServerHolder(ImmutableDruidServer server, LoadQueuePeon peon) { this(server, peon, false); } - public ServerHolder(ImmutableDruidServer server, LoadQueuePeon peon, boolean inMaintenance) + public ServerHolder(ImmutableDruidServer server, LoadQueuePeon peon, boolean isDecommissioned) { this.server = server; this.peon = peon; - this.inMaintenance = inMaintenance; + this.isDecommissioned = isDecommissioned; } public ImmutableDruidServer getServer() @@ -82,14 +82,14 @@ public double getPercentUsed() } /** - * Historical nodes can be placed in maintenance mode, which instructs Coordinator to move segments from them - * according to a specified priority. The mechanism allows to drain segments from nodes which are planned for + * Historical nodes can be 'decommissioned', which instructs Coordinator to move segments from them + * according to a specified priority. The mechanism allows draining segments from nodes which are planned for * replacement. - * @return true if the node is in maitenance mode + * @return true if the node is being decommissioned */ - public boolean isInMaintenance() + public boolean isDecommissioned() { - return inMaintenance; + return isDecommissioned; } public long getAvailableSize() diff --git a/server/src/main/java/org/apache/druid/server/coordinator/helper/DruidCoordinatorBalancer.java b/server/src/main/java/org/apache/druid/server/coordinator/helper/DruidCoordinatorBalancer.java index 10499ac807a4..99cd0289d0c3 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/helper/DruidCoordinatorBalancer.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/helper/DruidCoordinatorBalancer.java @@ -107,25 +107,25 @@ private void balanceTier( } /* - Take as much segments from maintenance servers as priority allows and find the best location for them on + Take as many segments from decommissioned servers as priority allows and find the best location for them on available servers. After that, balance segments within available servers pool. */ Map> partitions = - servers.stream().collect(Collectors.partitioningBy(ServerHolder::isInMaintenance)); - final List maintenanceServers = partitions.get(true); + servers.stream().collect(Collectors.partitioningBy(ServerHolder::isDecommissioned)); + final List decommssionedServers = partitions.get(true); final List availableServers = partitions.get(false); log.info( - "Found %d servers in maintenance, %d available servers servers", - maintenanceServers.size(), + "Found %d decomissioned servers, %d available servers servers", + decommssionedServers.size(), availableServers.size() ); - if (maintenanceServers.isEmpty()) { + if (decommssionedServers.isEmpty()) { if (availableServers.size() <= 1) { log.info("[%s]: %d available servers servers found. Cannot balance.", tier, availableServers.size()); } } else if (availableServers.isEmpty()) { - log.info("[%s]: no available servers servers found during maintenance. Cannot balance.", tier); + log.info("[%s]: no available servers servers found during decommissioning. Cannot balance.", tier); } int numSegments = 0; @@ -139,18 +139,18 @@ private void balanceTier( } final int maxSegmentsToMove = Math.min(params.getCoordinatorDynamicConfig().getMaxSegmentsToMove(), numSegments); - int priority = params.getCoordinatorDynamicConfig().getNodesInMaintenancePriority(); - int maxMaintenanceSegmentsToMove = (int) Math.ceil(maxSegmentsToMove * priority / 10.0); - log.info("Processing %d segments from servers in maintenance mode", maxMaintenanceSegmentsToMove); - Pair maintenanceResult = - balanceServers(params, maintenanceServers, availableServers, maxMaintenanceSegmentsToMove); - int maxGeneralSegmentsToMove = maxSegmentsToMove - maintenanceResult.lhs; - log.info("Processing %d segments from servers in general mode", maxGeneralSegmentsToMove); + int priority = params.getCoordinatorDynamicConfig().getDecommissionPriority(); + int maxDecommissionedSegmentsToMove = (int) Math.ceil(maxSegmentsToMove * priority / 10.0); + log.info("Processing %d segments from decommissioned servers", maxDecommissionedSegmentsToMove); + Pair decommissionedResult = + balanceServers(params, decommssionedServers, availableServers, maxDecommissionedSegmentsToMove); + int maxGeneralSegmentsToMove = maxSegmentsToMove - decommissionedResult.lhs; + log.info("Processing %d segments for balancing", maxGeneralSegmentsToMove); Pair generalResult = balanceServers(params, availableServers, availableServers, maxGeneralSegmentsToMove); - int moved = generalResult.lhs + maintenanceResult.lhs; - int unmoved = generalResult.rhs + maintenanceResult.rhs; + int moved = generalResult.lhs + decommissionedResult.lhs; + int unmoved = generalResult.rhs + decommissionedResult.rhs; if (unmoved == maxSegmentsToMove) { // Cluster should be alive and constantly adjusting log.info("No good moves found in tier [%s]", tier); diff --git a/server/src/main/java/org/apache/druid/server/coordinator/rules/BroadcastDistributionRule.java b/server/src/main/java/org/apache/druid/server/coordinator/rules/BroadcastDistributionRule.java index b28f569e55b4..b2d031b36408 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/rules/BroadcastDistributionRule.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/rules/BroadcastDistributionRule.java @@ -46,7 +46,7 @@ public CoordinatorStats run(DruidCoordinator coordinator, DruidCoordinatorRuntim } else { params.getDruidCluster().getAllServers().forEach( eachHolder -> { - if (!eachHolder.isInMaintenance() + if (!eachHolder.isDecommissioned() && colocatedDataSources.stream() .anyMatch(source -> eachHolder.getServer().getDataSource(source) != null)) { loadServerHolders.add(eachHolder); diff --git a/server/src/main/java/org/apache/druid/server/coordinator/rules/LoadRule.java b/server/src/main/java/org/apache/druid/server/coordinator/rules/LoadRule.java index 3de93cfdaf99..fd8f6dab0d66 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/rules/LoadRule.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/rules/LoadRule.java @@ -153,8 +153,8 @@ private static List getFilteredHolders( log.makeAlert("Tier[%s] has no servers! Check your cluster configuration!", tier).emit(); return Collections.emptyList(); } - Predicate isNotInMaintenance = s -> !s.isInMaintenance(); - return queue.stream().filter(isNotInMaintenance.and(predicate)).collect(Collectors.toList()); + Predicate isNotDecommissioned = s -> !s.isDecommissioned(); + return queue.stream().filter(isNotDecommissioned.and(predicate)).collect(Collectors.toList()); } /** @@ -385,12 +385,12 @@ private static int dropForTier( Map> holders = holdersInTier.stream() .filter(s -> s.isServingSegment(segment)) .collect(Collectors.partitioningBy( - ServerHolder::isInMaintenance, + ServerHolder::isDecommissioned, Collectors.toCollection(TreeSet::new) )); - TreeSet maintenanceServers = holders.get(true); + TreeSet decommissionedServers = holders.get(true); TreeSet availableServers = holders.get(false); - int left = dropSegmentFromServers(balancerStrategy, segment, maintenanceServers, numToDrop); + int left = dropSegmentFromServers(balancerStrategy, segment, decommissionedServers, numToDrop); if (left > 0) { left = dropSegmentFromServers(balancerStrategy, segment, availableServers, left); } diff --git a/server/src/test/java/org/apache/druid/server/coordinator/DruidCoordinatorBalancerTest.java b/server/src/test/java/org/apache/druid/server/coordinator/DruidCoordinatorBalancerTest.java index 67521e397e00..282719862665 100644 --- a/server/src/test/java/org/apache/druid/server/coordinator/DruidCoordinatorBalancerTest.java +++ b/server/src/test/java/org/apache/druid/server/coordinator/DruidCoordinatorBalancerTest.java @@ -201,14 +201,14 @@ public void testMoveToEmptyServerBalancer() /** * Server 1 has 2 segments. - * Server 2 (maintenance) has 2 segments. + * Server 2 (decommissioned) has 2 segments. * Server 3 is empty. - * Maintenance has priority 7. + * Decommission has priority 7. * Max segments to move is 3. * 2 (of 2) segments should be moved from Server 2 and 1 (of 2) from Server 1. */ @Test - public void testMoveMaintenancePriority() + public void testMoveDecommissionPriority() { mockDruidServer(druidServer1, "1", "normal", 30L, 100L, Arrays.asList(segment1, segment2)); mockDruidServer(druidServer2, "2", "normal", 30L, 100L, Arrays.asList(segment3, segment4)); @@ -239,8 +239,8 @@ public void testMoveMaintenancePriority() .withDynamicConfigs( CoordinatorDynamicConfig.builder() .withMaxSegmentsToMove(3) - .withMaintenanceModeSegmentsPriority(6) - .build() // ceil(3 * 0.6) = 2 segments from servers in maintenance + .withDecommissionPriority(6) + .build() // ceil(3 * 0.6) = 2 segments from decommissioned servers ) .withBalancerStrategy(strategy) .build(); @@ -251,7 +251,7 @@ public void testMoveMaintenancePriority() } @Test - public void testZeroMaintenancePriority() + public void testZeroDecommissionPriority() { DruidCoordinatorRuntimeParams params = setupParamsForMaintenancePriority(0); params = new DruidCoordinatorBalancerTester(coordinator).run(params); @@ -260,7 +260,7 @@ public void testZeroMaintenancePriority() } @Test - public void testMaxMaintenancePriority() + public void testMaxDecommissionPriority() { DruidCoordinatorRuntimeParams params = setupParamsForMaintenancePriority(10); params = new DruidCoordinatorBalancerTester(coordinator).run(params); @@ -269,10 +269,10 @@ public void testMaxMaintenancePriority() } /** - * Should balance segments as usual (ignoring priority) with empty maintenanceList. + * Should balance segments as usual (ignoring priority) with empty decommissionedList. */ @Test - public void testMoveMaintenancePriorityWithNoMaintenance() + public void testMoveDecommissionPriorityWithNoDecommission() { mockDruidServer(druidServer1, "1", "normal", 30L, 100L, Arrays.asList(segment1, segment2)); mockDruidServer(druidServer2, "2", "normal", 0L, 100L, Arrays.asList(segment3, segment4)); @@ -300,7 +300,7 @@ public void testMoveMaintenancePriorityWithNoMaintenance() ImmutableList.of(false, false, false) ) .withDynamicConfigs( - CoordinatorDynamicConfig.builder().withMaxSegmentsToMove(3).withMaintenanceModeSegmentsPriority(9).build() + CoordinatorDynamicConfig.builder().withMaxSegmentsToMove(3).withDecommissionPriority(9).build() ) .withBalancerStrategy(strategy) .build(); @@ -311,10 +311,10 @@ public void testMoveMaintenancePriorityWithNoMaintenance() } /** - * Shouldn't move segments to a server in maintenance mode. + * Shouldn't move segments to a decommissioned server. */ @Test - public void testMoveToServerInMaintenance() + public void testMoveToDecommissionedServer() { mockDruidServer(druidServer1, "1", "normal", 30L, 100L, segments); mockDruidServer(druidServer2, "2", "normal", 0L, 100L, Collections.emptyList()); @@ -347,7 +347,7 @@ public void testMoveToServerInMaintenance() } @Test - public void testMoveFromServerInMaintenance() + public void testMoveFromDecommissionedServer() { mockDruidServer(druidServer1, "1", "normal", 30L, 100L, segments); mockDruidServer(druidServer2, "2", "normal", 0L, 100L, Collections.emptyList()); @@ -512,7 +512,7 @@ private DruidCoordinatorRuntimeParams.Builder defaultRuntimeParamsBuilder( private DruidCoordinatorRuntimeParams.Builder defaultRuntimeParamsBuilder( List druidServers, List peons, - List maintenance + List decommissioned ) { return DruidCoordinatorRuntimeParams @@ -524,7 +524,7 @@ private DruidCoordinatorRuntimeParams.Builder defaultRuntimeParamsBuilder( "normal", IntStream .range(0, druidServers.size()) - .mapToObj(i -> new ServerHolder(druidServers.get(i), peons.get(i), maintenance.get(i))) + .mapToObj(i -> new ServerHolder(druidServers.get(i), peons.get(i), decommissioned.get(i))) .collect(Collectors.toSet()) ) ) @@ -632,7 +632,7 @@ private DruidCoordinatorRuntimeParams setupParamsForMaintenancePriority(int prio mockCoordinator(coordinator); - // either maintenance servers list or general ones (ie servers list is [2] or [1, 3]) + // either decommissioned servers list or general ones (ie servers list is [2] or [1, 3]) BalancerStrategy strategy = EasyMock.createMock(BalancerStrategy.class); EasyMock.expect(strategy.pickSegmentToMove(ImmutableList.of(new ServerHolder(druidServer2, peon2, true)))) .andReturn(new BalancerSegmentHolder(druidServer2, segment2)); @@ -651,7 +651,7 @@ private DruidCoordinatorRuntimeParams setupParamsForMaintenancePriority(int prio .withDynamicConfigs( CoordinatorDynamicConfig.builder() .withMaxSegmentsToMove(1) - .withMaintenanceModeSegmentsPriority(priority) + .withDecommissionPriority(priority) .build() ) .withBalancerStrategy(strategy) diff --git a/server/src/test/java/org/apache/druid/server/coordinator/rules/BroadcastDistributionRuleTest.java b/server/src/test/java/org/apache/druid/server/coordinator/rules/BroadcastDistributionRuleTest.java index 359fb6895cce..f4149393b982 100644 --- a/server/src/test/java/org/apache/druid/server/coordinator/rules/BroadcastDistributionRuleTest.java +++ b/server/src/test/java/org/apache/druid/server/coordinator/rules/BroadcastDistributionRuleTest.java @@ -59,8 +59,8 @@ public class BroadcastDistributionRuleTest private DataSegment smallSegment; private DruidCluster secondCluster; private ServerHolder generalServer; - private ServerHolder maintenanceServer2; - private ServerHolder maintenanceServer1; + private ServerHolder decommissionedServer2; + private ServerHolder decommissionedServer1; @Before public void setUp() @@ -214,9 +214,9 @@ public void setUp() new LoadQueuePeonTester() ); - maintenanceServer1 = new ServerHolder( + decommissionedServer1 = new ServerHolder( new DruidServer( - "maintenance1", + "decommissioned1", "host2", null, 100, @@ -229,9 +229,9 @@ public void setUp() true ); - maintenanceServer2 = new ServerHolder( + decommissionedServer2 = new ServerHolder( new DruidServer( - "maintenance2", + "decommissioned2", "host3", null, 100, @@ -268,8 +268,8 @@ public void setUp() "tier1", Stream.of( generalServer, - maintenanceServer1, - maintenanceServer2 + decommissionedServer1, + decommissionedServer2 ).collect(Collectors.toCollection(() -> new TreeSet<>(Collections.reverseOrder()))) ) ); @@ -315,19 +315,19 @@ public void testBroadcastToSingleDataSource() /** * Servers: - * name | segments - * -------------+-------------- - * general | large segment - * maintenance1 | small segment - * maintenance2 | large segment + * name | segments + * ----------------+-------------- + * general | large segment + * decommissioned1 | small segment + * decommissioned2 | large segment * * After running the rule for the small segment: - * general | large & small segments - * maintenance1 | - * maintenance2 | large segment + * general | large & small segments + * decommissioned1 | + * decommissioned2 | large segment */ @Test - public void testBroadcastWithMaintenance() + public void testBroadcastDecommissioned() { final ForeverBroadcastDistributionRule rule = new ForeverBroadcastDistributionRule(ImmutableList.of("large_source")); @@ -349,8 +349,8 @@ public void testBroadcastWithMaintenance() assertEquals(false, stats.hasPerTierStats()); assertEquals(1, generalServer.getPeon().getSegmentsToLoad().size()); - assertEquals(1, maintenanceServer1.getPeon().getSegmentsToDrop().size()); - assertEquals(0, maintenanceServer2.getPeon().getSegmentsToLoad().size()); + assertEquals(1, decommissionedServer1.getPeon().getSegmentsToDrop().size()); + assertEquals(0, decommissionedServer2.getPeon().getSegmentsToLoad().size()); } @Test diff --git a/server/src/test/java/org/apache/druid/server/coordinator/rules/LoadRuleTest.java b/server/src/test/java/org/apache/druid/server/coordinator/rules/LoadRuleTest.java index 9a9bcb170a75..d1193814404e 100644 --- a/server/src/test/java/org/apache/druid/server/coordinator/rules/LoadRuleTest.java +++ b/server/src/test/java/org/apache/druid/server/coordinator/rules/LoadRuleTest.java @@ -687,11 +687,11 @@ public void testMaxLoadingQueueSize() } /** - * 2 servers in different tiers, the first is in maitenance mode. - * Should not load a segment to the server in maintenance mode. + * 2 servers in different tiers, the first is decommissioned. + * Should not load a segment to the server that is decommissioned */ @Test - public void testLoadDuringMaitenance() + public void testLoadDecommissioned() { final LoadQueuePeon mockPeon1 = createEmptyPeon(); final LoadQueuePeon mockPeon2 = createOneCallPeonMock(); @@ -737,8 +737,8 @@ public void testLoadDuringMaitenance() } /** - * 2 tiers, 2 servers each, 1 server of the second tier is in maintenance. - * Should not load a segment to the server in maintenance mode. + * 2 tiers, 2 servers each, 1 server of the second tier is decommissioned. + * Should not load a segment to the server that is decommssioned. */ @Test public void testLoadReplicaDuringMaitenance() @@ -796,11 +796,11 @@ public void testLoadReplicaDuringMaitenance() } /** - * 2 servers with a segment, one server in maintenance mode. + * 2 servers with a segment, one server decommissioned. * Should drop a segment from both. */ @Test - public void testDropDuringMaintenance() + public void testDropDuringDecommssion() { final LoadQueuePeon mockPeon = createEmptyPeon(); mockPeon.dropSegment(EasyMock.anyObject(), EasyMock.anyObject()); @@ -859,12 +859,12 @@ public void testDropDuringMaintenance() /** * 3 servers hosting 3 replicas of the segment. - * 1 servers is in maitenance. + * 1 servers is decommissioned. * 1 replica is redundant. - * Should drop from the server in maintenance. + * Should drop from the decommissioned server. */ @Test - public void testRedundantReplicaDropDuringMaintenance() + public void testRedundantReplicaDropDuringDecommissioned() { final LoadQueuePeon mockPeon1 = new LoadQueuePeonTester(); final LoadQueuePeon mockPeon2 = new LoadQueuePeonTester(); @@ -1019,12 +1019,12 @@ private static LoadQueuePeon createOneCallPeonMock() return mockPeon2; } - private static ServerHolder createServerHolder(String tier, LoadQueuePeon mockPeon1, boolean maintenance) + private static ServerHolder createServerHolder(String tier, LoadQueuePeon mockPeon1, boolean decommission) { return new ServerHolder( createServer(tier).toImmutableDruidServer(), mockPeon1, - maintenance + decommission ); } } diff --git a/server/src/test/java/org/apache/druid/server/http/CoordinatorDynamicConfigTest.java b/server/src/test/java/org/apache/druid/server/http/CoordinatorDynamicConfigTest.java index e0979252012c..6d413585cb81 100644 --- a/server/src/test/java/org/apache/druid/server/http/CoordinatorDynamicConfigTest.java +++ b/server/src/test/java/org/apache/druid/server/http/CoordinatorDynamicConfigTest.java @@ -50,8 +50,8 @@ public void testSerde() throws Exception + " \"emitBalancingStats\": true,\n" + " \"killDataSourceWhitelist\": [\"test1\",\"test2\"],\n" + " \"maxSegmentsInNodeLoadingQueue\": 1,\n" - + " \"historicalNodesInMaintenance\": [\"host1\", \"host2\"],\n" - + " \"nodesInMaintenancePriority\": 9\n" + + " \"decommissionNodes\": [\"host1\", \"host2\"],\n" + + " \"decommissionPriority\": 9\n" + "}\n"; CoordinatorDynamicConfig actual = mapper.readValue( @@ -63,19 +63,19 @@ public void testSerde() throws Exception ), CoordinatorDynamicConfig.class ); - ImmutableSet maintenance = ImmutableSet.of("host1", "host2"); + ImmutableSet decommissioned = ImmutableSet.of("host1", "host2"); ImmutableSet whitelist = ImmutableSet.of("test1", "test2"); - assertConfig(actual, 1, 1, 1, 1, 1, 1, 2, true, whitelist, false, 1, maintenance, 9); + assertConfig(actual, 1, 1, 1, 1, 1, 1, 2, true, whitelist, false, 1, decommissioned, 9); - actual = CoordinatorDynamicConfig.builder().withMaintenanceList(ImmutableSet.of("host1")).build(actual); + actual = CoordinatorDynamicConfig.builder().withDecommissionNodes(ImmutableSet.of("host1")).build(actual); assertConfig(actual, 1, 1, 1, 1, 1, 1, 2, true, whitelist, false, 1, ImmutableSet.of("host1"), 9); - actual = CoordinatorDynamicConfig.builder().withMaintenanceModeSegmentsPriority(5).build(actual); + actual = CoordinatorDynamicConfig.builder().withDecommissionPriority(5).build(actual); assertConfig(actual, 1, 1, 1, 1, 1, 1, 2, true, whitelist, false, 1, ImmutableSet.of("host1"), 5); } @Test - public void testMaintenanceParametersBackwardCompatibility() throws Exception + public void testDecommissionParametersBackwardCompatibility() throws Exception { String jsonStr = "{\n" + " \"millisToWaitBeforeDeleting\": 1,\n" @@ -99,14 +99,14 @@ public void testMaintenanceParametersBackwardCompatibility() throws Exception ), CoordinatorDynamicConfig.class ); - ImmutableSet maintenance = ImmutableSet.of(); + ImmutableSet decommissioned = ImmutableSet.of(); ImmutableSet whitelist = ImmutableSet.of("test1", "test2"); - assertConfig(actual, 1, 1, 1, 1, 1, 1, 2, true, whitelist, false, 1, maintenance, 0); + assertConfig(actual, 1, 1, 1, 1, 1, 1, 2, true, whitelist, false, 1, decommissioned, 0); - actual = CoordinatorDynamicConfig.builder().withMaintenanceList(ImmutableSet.of("host1")).build(actual); + actual = CoordinatorDynamicConfig.builder().withDecommissionNodes(ImmutableSet.of("host1")).build(actual); assertConfig(actual, 1, 1, 1, 1, 1, 1, 2, true, whitelist, false, 1, ImmutableSet.of("host1"), 0); - actual = CoordinatorDynamicConfig.builder().withMaintenanceModeSegmentsPriority(5).build(actual); + actual = CoordinatorDynamicConfig.builder().withDecommissionPriority(5).build(actual); assertConfig(actual, 1, 1, 1, 1, 1, 1, 2, true, whitelist, false, 1, ImmutableSet.of("host1"), 5); } @@ -257,8 +257,8 @@ private void assertConfig( Set expectedKillableDatasources, boolean expectedKillAllDataSources, int expectedMaxSegmentsInNodeLoadingQueue, - Set maintenanceList, - int maintenancePriority + Set decommissioned, + int decommissionPriority ) { Assert.assertEquals(expectedMillisToWaitBeforeDeleting, config.getMillisToWaitBeforeDeleting()); @@ -272,7 +272,7 @@ private void assertConfig( Assert.assertEquals(expectedKillableDatasources, config.getKillableDataSources()); Assert.assertEquals(expectedKillAllDataSources, config.isKillAllDataSources()); Assert.assertEquals(expectedMaxSegmentsInNodeLoadingQueue, config.getMaxSegmentsInNodeLoadingQueue()); - Assert.assertEquals(maintenanceList, config.getHistoricalNodesInMaintenance()); - Assert.assertEquals(maintenancePriority, config.getNodesInMaintenancePriority()); + Assert.assertEquals(decommissioned, config.getDecommissionNodes()); + Assert.assertEquals(decommissionPriority, config.getDecommissionPriority()); } } From ab31ac52293f7a9924893fc75ac4150d4708c411 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Thu, 28 Feb 2019 13:59:34 -0800 Subject: [PATCH 02/14] review changes --- docs/content/configuration/index.md | 8 +- .../coordinator/CoordinatorDynamicConfig.java | 91 ++++++++++--------- .../server/coordinator/DruidCoordinator.java | 4 +- .../server/coordinator/ServerHolder.java | 16 ++-- .../helper/DruidCoordinatorBalancer.java | 56 ++++++------ .../rules/BroadcastDistributionRule.java | 2 +- .../server/coordinator/rules/LoadRule.java | 14 +-- .../DruidCoordinatorBalancerTest.java | 40 ++++---- .../rules/BroadcastDistributionRuleTest.java | 38 ++++---- .../coordinator/rules/LoadRuleTest.java | 20 ++-- .../http/CoordinatorDynamicConfigTest.java | 24 ++--- 11 files changed, 158 insertions(+), 155 deletions(-) diff --git a/docs/content/configuration/index.md b/docs/content/configuration/index.md index bcac1e73590c..d31572224db6 100644 --- a/docs/content/configuration/index.md +++ b/docs/content/configuration/index.md @@ -783,8 +783,8 @@ A sample Coordinator dynamic config JSON object is shown below: "replicationThrottleLimit": 10, "emitBalancingStats": false, "killDataSourceWhitelist": ["wikipedia", "testDatasource"], - "decommissionNodes": ["localhost:8182", "localhost:8282"], - "decommissionPriority": 7 + "decommissioningNodes": ["localhost:8182", "localhost:8282"], + "decommissioningVelocity": 7 } ``` @@ -804,8 +804,8 @@ Issuing a GET request at the same URL will return the spec that is currently in |`killAllDataSources`|Send kill tasks for ALL dataSources if property `druid.coordinator.kill.on` is true. If this is set to true then `killDataSourceWhitelist` must not be specified or be empty list.|false| |`killPendingSegmentsSkipList`|List of dataSources for which pendingSegments are _NOT_ cleaned up if property `druid.coordinator.kill.pendingSegments.on` is true. This can be a list of comma-separated dataSources or a JSON array.|none| |`maxSegmentsInNodeLoadingQueue`|The maximum number of segments that could be queued for loading to any given server. This parameter could be used to speed up segments loading process, especially if there are "slow" nodes in the cluster (with low loading speed) or if too much segments scheduled to be replicated to some particular node (faster loading could be preferred to better segments distribution). Desired value depends on segments loading speed, acceptable replication time and number of nodes. Value 1000 could be a start point for a rather big cluster. Default value is 0 (loading queue is unbounded) |0| -|`decommissionNodes`| List of 'decommissioned' historical nodes. The Coordinator doesn't assign new segments to these nodes and moves segments from the nodes at the rate specified by `decommissionPriority`.|none| -|`decommissionPriority`| Priority of how many 'move' operations will be spent towards 'decommissioning' servers by moving segments from them to non-decommissioned servers, instead of 'balancing' segments between servers. Coordinator takes ceil(maxSegmentsToMove * (priority / 10)) from servers in maitenance during balancing phase, i.e.:
0 - no segments from decommissioned servers will be processed during balancing
5 - 50% segments from decommissioned servers
10 - 100% segments from decommissioned servers
By leveraging the priority an operator can prevent general nodes from overload or decrease decommissioning time instead.|7| +|`decommissioningNodes`| List of 'decommissioning' historical nodes. The Coordinator doesn't assign new segments to these nodes and moves segments from the nodes at the rate specified by `decommissioningVelocity`.|none| +|`decommissioningVelocity`| Decommissioning velocity indicates what proportion of balancer 'move' operations out of `maxSegmentsToMove` total will be spent towards 'decommissioning' servers by moving their segments to active servers, instead of normal 'balancing' moves. Coordinator takes ceil(maxSegmentsToMove * (velocity / 10)) from servers in maitenance during balancing phase, i.e.:
0 - no segments from decommissioning servers will be processed during balancing
5 - 50% segments from decommissioning servers
10 - 100% segments from decommissioning servers
By leveraging the velocity an operator can prevent general nodes from overload or decrease decommissioning time instead.|7| To view the audit history of Coordinator dynamic config issue a GET request to the URL - diff --git a/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorDynamicConfig.java b/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorDynamicConfig.java index f01dcdb69e4e..e1fc74541247 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorDynamicConfig.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorDynamicConfig.java @@ -56,8 +56,8 @@ public class CoordinatorDynamicConfig private final boolean emitBalancingStats; private final boolean killAllDataSources; private final Set killableDataSources; - private final Set decommissionNodes; - private final int decommissionPriority; + private final Set decommissioningNodes; + private final int decommissioningVelocity; // The pending segments of the dataSources in this list are not killed. private final Set protectedPendingSegmentDatasources; @@ -88,8 +88,8 @@ public CoordinatorDynamicConfig( @JsonProperty("killAllDataSources") boolean killAllDataSources, @JsonProperty("killPendingSegmentsSkipList") Object protectedPendingSegmentDatasources, @JsonProperty("maxSegmentsInNodeLoadingQueue") int maxSegmentsInNodeLoadingQueue, - @JsonProperty("decommissionNodes") Object decommissionNodes, - @JsonProperty("decommissionPriority") int decommissionPriority + @JsonProperty("decommissioningNodes") Object decommissioningNodes, + @JsonProperty("decommissioningVelocity") int decommissioningVelocity ) { this.millisToWaitBeforeDeleting = millisToWaitBeforeDeleting; @@ -104,12 +104,12 @@ public CoordinatorDynamicConfig( this.killableDataSources = parseJsonStringOrArray(killableDataSources); this.protectedPendingSegmentDatasources = parseJsonStringOrArray(protectedPendingSegmentDatasources); this.maxSegmentsInNodeLoadingQueue = maxSegmentsInNodeLoadingQueue; - this.decommissionNodes = parseJsonStringOrArray(decommissionNodes); + this.decommissioningNodes = parseJsonStringOrArray(decommissioningNodes); Preconditions.checkArgument( - decommissionPriority >= 0 && decommissionPriority <= 10, - "decommissionPriority should be in range [0, 10]" + decommissioningVelocity >= 0 && decommissioningVelocity <= 10, + "decommissioningVelocity should be in range [0, 10]" ); - this.decommissionPriority = decommissionPriority; + this.decommissioningVelocity = decommissioningVelocity; if (this.killAllDataSources && !this.killableDataSources.isEmpty()) { throw new IAE("can't have killAllDataSources and non-empty killDataSourceWhitelist"); @@ -232,32 +232,33 @@ public int getMaxSegmentsInNodeLoadingQueue() /** * List of historical nodes to 'decommission'. Coordinator doesn't assign new segments on those nodes and moves - * segments from those nodes according to a specified priority. + * segments from those nodes according to a specified velocity. * * @return list of host:port entries */ @JsonProperty - public Set getDecommissionNodes() + public Set getDecommissioningNodes() { - return decommissionNodes; + return decommissioningNodes; } /** - * Priority of how many of 'move' operations will be spent towards 'decommissioning' servers by moving segments from - * them to non-decommissioned servers, instead of 'balancing' segments between servers. - * Coordinator takes ceil(maxSegmentsToMove * (priority / 10)) from servers in maitenance during balancing phase: - * 0 - no segments from decommissioned servers will be processed during balancing - * 5 - 50% segments from decommissioned servers - * 10 - 100% segments from decommissioned servers - * By leveraging the priority an operator can prevent general nodes from overload or decrease 'decommissioning' time + * Decommissioning velocity indicates what proportion of balancer 'move' operations out of + * {@link CoordinatorDynamicConfig#getMaxSegmentsToMove()} total will be spent towards 'decommissioning' servers + * by moving their segments to active servers, instead of normal 'balancing' segments between servers. + * Coordinator takes ceil(maxSegmentsToMove * (velocity / 10)) from servers in maitenance during balancing phase: + * 0 - no segments from decommissioning servers will be processed during balancing + * 5 - 50% segments from decommissioning servers + * 10 - 100% segments from decommissioning servers + * By leveraging the velocity an operator can prevent general nodes from overload or decrease 'decommissioning' time * instead. * * @return number in range [0, 10] */ @JsonProperty - public int getDecommissionPriority() + public int getDecommissioningVelocity() { - return decommissionPriority; + return decommissioningVelocity; } @Override @@ -276,8 +277,8 @@ public String toString() ", killDataSourceWhitelist=" + killableDataSources + ", protectedPendingSegmentDatasources=" + protectedPendingSegmentDatasources + ", maxSegmentsInNodeLoadingQueue=" + maxSegmentsInNodeLoadingQueue + - ", decommissionNodes=" + decommissionNodes + - ", decommissionPriority=" + decommissionPriority + + ", decommissioningNodes=" + decommissioningNodes + + ", decommissioningVelocity=" + decommissioningVelocity + '}'; } @@ -329,10 +330,10 @@ public boolean equals(Object o) if (!Objects.equals(protectedPendingSegmentDatasources, that.protectedPendingSegmentDatasources)) { return false; } - if (!Objects.equals(decommissionNodes, that.decommissionNodes)) { + if (!Objects.equals(decommissioningNodes, that.decommissioningNodes)) { return false; } - return decommissionPriority == that.decommissionPriority; + return decommissioningVelocity == that.decommissioningVelocity; } @Override @@ -351,8 +352,8 @@ public int hashCode() maxSegmentsInNodeLoadingQueue, killableDataSources, protectedPendingSegmentDatasources, - decommissionNodes, - decommissionPriority + decommissioningNodes, + decommissioningVelocity ); } @@ -373,7 +374,7 @@ public static class Builder private static final boolean DEFAULT_EMIT_BALANCING_STATS = false; private static final boolean DEFAULT_KILL_ALL_DATA_SOURCES = false; private static final int DEFAULT_MAX_SEGMENTS_IN_NODE_LOADING_QUEUE = 0; - private static final int DEFAULT_DECOMMISSIONING_PRIORITY = 7; + private static final int DEFAULT_DECOMMISSIONING_VELOCITY = 7; private Long millisToWaitBeforeDeleting; private Long mergeBytesLimit; @@ -387,8 +388,8 @@ public static class Builder private Boolean killAllDataSources; private Object killPendingSegmentsSkipList; private Integer maxSegmentsInNodeLoadingQueue; - private Object decommissionNodes; - private Integer decommissionPriority; + private Object decommissioningNodes; + private Integer decommissioningVelocity; public Builder() { @@ -408,8 +409,8 @@ public Builder( @JsonProperty("killAllDataSources") @Nullable Boolean killAllDataSources, @JsonProperty("killPendingSegmentsSkipList") @Nullable Object killPendingSegmentsSkipList, @JsonProperty("maxSegmentsInNodeLoadingQueue") @Nullable Integer maxSegmentsInNodeLoadingQueue, - @JsonProperty("decommissionNodes") @Nullable Object decommissionNodes, - @JsonProperty("decommissionPriority") @Nullable Integer decommissionPriority + @JsonProperty("decommissioningNodes") @Nullable Object decommissioningNodes, + @JsonProperty("decommissioningVelocity") @Nullable Integer decommissioningVelocity ) { this.millisToWaitBeforeDeleting = millisToWaitBeforeDeleting; @@ -424,8 +425,8 @@ public Builder( this.killableDataSources = killableDataSources; this.killPendingSegmentsSkipList = killPendingSegmentsSkipList; this.maxSegmentsInNodeLoadingQueue = maxSegmentsInNodeLoadingQueue; - this.decommissionNodes = decommissionNodes; - this.decommissionPriority = decommissionPriority; + this.decommissioningNodes = decommissioningNodes; + this.decommissioningVelocity = decommissioningVelocity; } public Builder withMillisToWaitBeforeDeleting(long millisToWaitBeforeDeleting) @@ -494,15 +495,15 @@ public Builder withMaxSegmentsInNodeLoadingQueue(int maxSegmentsInNodeLoadingQue return this; } - public Builder withDecommissionNodes(Set decommissioned) + public Builder withDecommissionNodes(Set decommissioning) { - this.decommissionNodes = decommissioned; + this.decommissioningNodes = decommissioning; return this; } - public Builder withDecommissionPriority(Integer priority) + public Builder withDecommissionVelocity(Integer velocity) { - this.decommissionPriority = priority; + this.decommissioningVelocity = velocity; return this; } @@ -523,10 +524,10 @@ public CoordinatorDynamicConfig build() maxSegmentsInNodeLoadingQueue == null ? DEFAULT_MAX_SEGMENTS_IN_NODE_LOADING_QUEUE : maxSegmentsInNodeLoadingQueue, - decommissionNodes, - decommissionPriority == null - ? DEFAULT_DECOMMISSIONING_PRIORITY - : decommissionPriority + decommissioningNodes, + decommissioningVelocity == null + ? DEFAULT_DECOMMISSIONING_VELOCITY + : decommissioningVelocity ); } @@ -549,10 +550,10 @@ public CoordinatorDynamicConfig build(CoordinatorDynamicConfig defaults) maxSegmentsInNodeLoadingQueue == null ? defaults.getMaxSegmentsInNodeLoadingQueue() : maxSegmentsInNodeLoadingQueue, - decommissionNodes == null ? defaults.getDecommissionNodes() : decommissionNodes, - decommissionPriority == null - ? defaults.getDecommissionPriority() - : decommissionPriority + decommissioningNodes == null ? defaults.getDecommissioningNodes() : decommissioningNodes, + decommissioningVelocity == null + ? defaults.getDecommissioningVelocity() + : decommissioningVelocity ); } } diff --git a/server/src/main/java/org/apache/druid/server/coordinator/DruidCoordinator.java b/server/src/main/java/org/apache/druid/server/coordinator/DruidCoordinator.java index 9653ca6f2881..c20ae0c5d514 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/DruidCoordinator.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/DruidCoordinator.java @@ -694,7 +694,7 @@ public CoordinatorHistoricalManagerRunnable(final int startingLeaderCounter) } // Find all historical servers, group them by subType and sort by ascending usage - Set decommissioned = params.getCoordinatorDynamicConfig().getDecommissionNodes(); + Set decommissioningServers = params.getCoordinatorDynamicConfig().getDecommissioningNodes(); final DruidCluster cluster = new DruidCluster(); for (ImmutableDruidServer server : servers) { if (!loadManagementPeons.containsKey(server.getName())) { @@ -709,7 +709,7 @@ public CoordinatorHistoricalManagerRunnable(final int startingLeaderCounter) new ServerHolder( server, loadManagementPeons.get(server.getName()), - decommissioned.contains(server.getHost()) + decommissioningServers.contains(server.getHost()) ) ); } diff --git a/server/src/main/java/org/apache/druid/server/coordinator/ServerHolder.java b/server/src/main/java/org/apache/druid/server/coordinator/ServerHolder.java index c9fc4c188a86..86d824a96510 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/ServerHolder.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/ServerHolder.java @@ -32,18 +32,18 @@ public class ServerHolder implements Comparable private static final Logger log = new Logger(ServerHolder.class); private final ImmutableDruidServer server; private final LoadQueuePeon peon; - private final boolean isDecommissioned; + private final boolean isDecommissioning; public ServerHolder(ImmutableDruidServer server, LoadQueuePeon peon) { this(server, peon, false); } - public ServerHolder(ImmutableDruidServer server, LoadQueuePeon peon, boolean isDecommissioned) + public ServerHolder(ImmutableDruidServer server, LoadQueuePeon peon, boolean isDecommissioning) { this.server = server; this.peon = peon; - this.isDecommissioned = isDecommissioned; + this.isDecommissioning = isDecommissioning; } public ImmutableDruidServer getServer() @@ -83,13 +83,13 @@ public double getPercentUsed() /** * Historical nodes can be 'decommissioned', which instructs Coordinator to move segments from them - * according to a specified priority. The mechanism allows draining segments from nodes which are planned for - * replacement. - * @return true if the node is being decommissioned + * according to a specified velocity which diverts normal balancer moves for this purpose. The mechanism allows + * draining segments from nodes which are planned for replacement. + * @return true if the node is decommissioning */ - public boolean isDecommissioned() + public boolean isDecommissioning() { - return isDecommissioned; + return isDecommissioning; } public long getAvailableSize() diff --git a/server/src/main/java/org/apache/druid/server/coordinator/helper/DruidCoordinatorBalancer.java b/server/src/main/java/org/apache/druid/server/coordinator/helper/DruidCoordinatorBalancer.java index 99cd0289d0c3..b0feb4239a4f 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/helper/DruidCoordinatorBalancer.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/helper/DruidCoordinatorBalancer.java @@ -95,37 +95,38 @@ private void balanceTier( { if (params.getAvailableSegments().size() == 0) { - log.info("Metadata segments are not available. Cannot balance."); + log.warn("Metadata segments are not available. Cannot balance."); return; } currentlyMovingSegments.computeIfAbsent(tier, t -> new ConcurrentHashMap<>()); if (!currentlyMovingSegments.get(tier).isEmpty()) { reduceLifetimes(tier); - log.info("[%s]: Still waiting on %,d segments to be moved", tier, currentlyMovingSegments.get(tier).size()); + log.info( + "[%s]: Still waiting on %,d segments to be moved. Skipping balance.", + tier, + currentlyMovingSegments.get(tier).size() + ); return; } /* - Take as many segments from decommissioned servers as priority allows and find the best location for them on - available servers. After that, balance segments within available servers pool. + Take as many segments from decommissioning servers as velocity allows and find the best location for them on + active servers. After that, balance segments within active servers pool. */ Map> partitions = - servers.stream().collect(Collectors.partitioningBy(ServerHolder::isDecommissioned)); - final List decommssionedServers = partitions.get(true); - final List availableServers = partitions.get(false); + servers.stream().collect(Collectors.partitioningBy(ServerHolder::isDecommissioning)); + final List decommissioningServers = partitions.get(true); + final List activeServers = partitions.get(false); log.info( - "Found %d decomissioned servers, %d available servers servers", - decommssionedServers.size(), - availableServers.size() + "Found %d active servers, %d decommissioning servers", + activeServers.size(), + decommissioningServers.size() ); - if (decommssionedServers.isEmpty()) { - if (availableServers.size() <= 1) { - log.info("[%s]: %d available servers servers found. Cannot balance.", tier, availableServers.size()); - } - } else if (availableServers.isEmpty()) { - log.info("[%s]: no available servers servers found during decommissioning. Cannot balance.", tier); + if ((decommissioningServers.isEmpty() && activeServers.size() <= 1) || activeServers.isEmpty()) { + log.warn("[%s]: insufficient active servers. Cannot balance.", tier); + return; } int numSegments = 0; @@ -134,23 +135,24 @@ private void balanceTier( } if (numSegments == 0) { - log.info("No segments found. Cannot balance."); + log.info("No segments found. Cannot balance."); return; } final int maxSegmentsToMove = Math.min(params.getCoordinatorDynamicConfig().getMaxSegmentsToMove(), numSegments); - int priority = params.getCoordinatorDynamicConfig().getDecommissionPriority(); - int maxDecommissionedSegmentsToMove = (int) Math.ceil(maxSegmentsToMove * priority / 10.0); - log.info("Processing %d segments from decommissioned servers", maxDecommissionedSegmentsToMove); - Pair decommissionedResult = - balanceServers(params, decommssionedServers, availableServers, maxDecommissionedSegmentsToMove); - int maxGeneralSegmentsToMove = maxSegmentsToMove - decommissionedResult.lhs; - log.info("Processing %d segments for balancing", maxGeneralSegmentsToMove); + int decommissioningVelocity = params.getCoordinatorDynamicConfig().getDecommissioningVelocity(); + int maxSegmentsToMoveFromDecommissioningNodes = (int) Math.ceil(maxSegmentsToMove * decommissioningVelocity / 10.0); + log.info("Processing %d segments for moving from decommissioning servers", maxSegmentsToMoveFromDecommissioningNodes); + Pair decommissioningResult = + balanceServers(params, decommissioningServers, activeServers, maxSegmentsToMoveFromDecommissioningNodes); + + int maxGeneralSegmentsToMove = maxSegmentsToMove - decommissioningResult.lhs; + log.info("Processing %d segments for balancing between active servers", maxGeneralSegmentsToMove); Pair generalResult = - balanceServers(params, availableServers, availableServers, maxGeneralSegmentsToMove); + balanceServers(params, activeServers, activeServers, maxGeneralSegmentsToMove); - int moved = generalResult.lhs + decommissionedResult.lhs; - int unmoved = generalResult.rhs + decommissionedResult.rhs; + int moved = generalResult.lhs + decommissioningResult.lhs; + int unmoved = generalResult.rhs + decommissioningResult.rhs; if (unmoved == maxSegmentsToMove) { // Cluster should be alive and constantly adjusting log.info("No good moves found in tier [%s]", tier); diff --git a/server/src/main/java/org/apache/druid/server/coordinator/rules/BroadcastDistributionRule.java b/server/src/main/java/org/apache/druid/server/coordinator/rules/BroadcastDistributionRule.java index b2d031b36408..658171236adc 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/rules/BroadcastDistributionRule.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/rules/BroadcastDistributionRule.java @@ -46,7 +46,7 @@ public CoordinatorStats run(DruidCoordinator coordinator, DruidCoordinatorRuntim } else { params.getDruidCluster().getAllServers().forEach( eachHolder -> { - if (!eachHolder.isDecommissioned() + if (!eachHolder.isDecommissioning() && colocatedDataSources.stream() .anyMatch(source -> eachHolder.getServer().getDataSource(source) != null)) { loadServerHolders.add(eachHolder); diff --git a/server/src/main/java/org/apache/druid/server/coordinator/rules/LoadRule.java b/server/src/main/java/org/apache/druid/server/coordinator/rules/LoadRule.java index fd8f6dab0d66..1de3479fe30e 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/rules/LoadRule.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/rules/LoadRule.java @@ -153,8 +153,8 @@ private static List getFilteredHolders( log.makeAlert("Tier[%s] has no servers! Check your cluster configuration!", tier).emit(); return Collections.emptyList(); } - Predicate isNotDecommissioned = s -> !s.isDecommissioned(); - return queue.stream().filter(isNotDecommissioned.and(predicate)).collect(Collectors.toList()); + Predicate isActive = s -> !s.isDecommissioning(); + return queue.stream().filter(isActive.and(predicate)).collect(Collectors.toList()); } /** @@ -385,14 +385,14 @@ private static int dropForTier( Map> holders = holdersInTier.stream() .filter(s -> s.isServingSegment(segment)) .collect(Collectors.partitioningBy( - ServerHolder::isDecommissioned, + ServerHolder::isDecommissioning, Collectors.toCollection(TreeSet::new) )); - TreeSet decommissionedServers = holders.get(true); - TreeSet availableServers = holders.get(false); - int left = dropSegmentFromServers(balancerStrategy, segment, decommissionedServers, numToDrop); + TreeSet decommissioningServers = holders.get(true); + TreeSet activeServers = holders.get(false); + int left = dropSegmentFromServers(balancerStrategy, segment, decommissioningServers, numToDrop); if (left > 0) { - left = dropSegmentFromServers(balancerStrategy, segment, availableServers, left); + left = dropSegmentFromServers(balancerStrategy, segment, activeServers, left); } if (left != 0) { log.warn("Wtf, holder was null? I have no servers serving [%s]?", segment.getId()); diff --git a/server/src/test/java/org/apache/druid/server/coordinator/DruidCoordinatorBalancerTest.java b/server/src/test/java/org/apache/druid/server/coordinator/DruidCoordinatorBalancerTest.java index 282719862665..83f5dc57e3bb 100644 --- a/server/src/test/java/org/apache/druid/server/coordinator/DruidCoordinatorBalancerTest.java +++ b/server/src/test/java/org/apache/druid/server/coordinator/DruidCoordinatorBalancerTest.java @@ -201,14 +201,14 @@ public void testMoveToEmptyServerBalancer() /** * Server 1 has 2 segments. - * Server 2 (decommissioned) has 2 segments. + * Server 2 (decommissioning) has 2 segments. * Server 3 is empty. - * Decommission has priority 7. + * Decommissioning has velocity 7. * Max segments to move is 3. * 2 (of 2) segments should be moved from Server 2 and 1 (of 2) from Server 1. */ @Test - public void testMoveDecommissionPriority() + public void testMoveDecommissioningVelocity() { mockDruidServer(druidServer1, "1", "normal", 30L, 100L, Arrays.asList(segment1, segment2)); mockDruidServer(druidServer2, "2", "normal", 30L, 100L, Arrays.asList(segment3, segment4)); @@ -239,8 +239,8 @@ public void testMoveDecommissionPriority() .withDynamicConfigs( CoordinatorDynamicConfig.builder() .withMaxSegmentsToMove(3) - .withDecommissionPriority(6) - .build() // ceil(3 * 0.6) = 2 segments from decommissioned servers + .withDecommissionVelocity(6) + .build() // ceil(3 * 0.6) = 2 segments from decommissioning servers ) .withBalancerStrategy(strategy) .build(); @@ -251,28 +251,28 @@ public void testMoveDecommissionPriority() } @Test - public void testZeroDecommissionPriority() + public void testZeroDecommissioningVelocity() { - DruidCoordinatorRuntimeParams params = setupParamsForMaintenancePriority(0); + DruidCoordinatorRuntimeParams params = setupParamsForDecommissioningVelocity(0); params = new DruidCoordinatorBalancerTester(coordinator).run(params); Assert.assertEquals(1L, params.getCoordinatorStats().getTieredStat("movedCount", "normal")); Assert.assertThat(peon3.getSegmentsToLoad(), is(equalTo(ImmutableSet.of(segment1)))); } @Test - public void testMaxDecommissionPriority() + public void testMaxDecommissioningVelocity() { - DruidCoordinatorRuntimeParams params = setupParamsForMaintenancePriority(10); + DruidCoordinatorRuntimeParams params = setupParamsForDecommissioningVelocity(10); params = new DruidCoordinatorBalancerTester(coordinator).run(params); Assert.assertEquals(1L, params.getCoordinatorStats().getTieredStat("movedCount", "normal")); Assert.assertThat(peon3.getSegmentsToLoad(), is(equalTo(ImmutableSet.of(segment2)))); } /** - * Should balance segments as usual (ignoring priority) with empty decommissionedList. + * Should balance segments as usual (ignoring velocity) with empty decommissioningList. */ @Test - public void testMoveDecommissionPriorityWithNoDecommission() + public void testMoveDecommissioningVelocityWithNoDecommissioning() { mockDruidServer(druidServer1, "1", "normal", 30L, 100L, Arrays.asList(segment1, segment2)); mockDruidServer(druidServer2, "2", "normal", 0L, 100L, Arrays.asList(segment3, segment4)); @@ -300,7 +300,7 @@ public void testMoveDecommissionPriorityWithNoDecommission() ImmutableList.of(false, false, false) ) .withDynamicConfigs( - CoordinatorDynamicConfig.builder().withMaxSegmentsToMove(3).withDecommissionPriority(9).build() + CoordinatorDynamicConfig.builder().withMaxSegmentsToMove(3).withDecommissionVelocity(9).build() ) .withBalancerStrategy(strategy) .build(); @@ -311,10 +311,10 @@ public void testMoveDecommissionPriorityWithNoDecommission() } /** - * Shouldn't move segments to a decommissioned server. + * Shouldn't move segments to a decommissioning server. */ @Test - public void testMoveToDecommissionedServer() + public void testMoveToDecommissioningServer() { mockDruidServer(druidServer1, "1", "normal", 30L, 100L, segments); mockDruidServer(druidServer2, "2", "normal", 0L, 100L, Collections.emptyList()); @@ -347,7 +347,7 @@ public void testMoveToDecommissionedServer() } @Test - public void testMoveFromDecommissionedServer() + public void testMoveFromDecommissioningServer() { mockDruidServer(druidServer1, "1", "normal", 30L, 100L, segments); mockDruidServer(druidServer2, "2", "normal", 0L, 100L, Collections.emptyList()); @@ -512,7 +512,7 @@ private DruidCoordinatorRuntimeParams.Builder defaultRuntimeParamsBuilder( private DruidCoordinatorRuntimeParams.Builder defaultRuntimeParamsBuilder( List druidServers, List peons, - List decommissioned + List decommissioning ) { return DruidCoordinatorRuntimeParams @@ -524,7 +524,7 @@ private DruidCoordinatorRuntimeParams.Builder defaultRuntimeParamsBuilder( "normal", IntStream .range(0, druidServers.size()) - .mapToObj(i -> new ServerHolder(druidServers.get(i), peons.get(i), decommissioned.get(i))) + .mapToObj(i -> new ServerHolder(druidServers.get(i), peons.get(i), decommissioning.get(i))) .collect(Collectors.toSet()) ) ) @@ -622,7 +622,7 @@ public void emitStats(String tier, CoordinatorStats stats, List se } } - private DruidCoordinatorRuntimeParams setupParamsForMaintenancePriority(int priority) + private DruidCoordinatorRuntimeParams setupParamsForDecommissioningVelocity(int velocity) { mockDruidServer(druidServer1, "1", "normal", 30L, 100L, Arrays.asList(segment1, segment3)); mockDruidServer(druidServer2, "2", "normal", 30L, 100L, Arrays.asList(segment2, segment3)); @@ -632,7 +632,7 @@ private DruidCoordinatorRuntimeParams setupParamsForMaintenancePriority(int prio mockCoordinator(coordinator); - // either decommissioned servers list or general ones (ie servers list is [2] or [1, 3]) + // either decommissioning servers list or general ones (ie servers list is [2] or [1, 3]) BalancerStrategy strategy = EasyMock.createMock(BalancerStrategy.class); EasyMock.expect(strategy.pickSegmentToMove(ImmutableList.of(new ServerHolder(druidServer2, peon2, true)))) .andReturn(new BalancerSegmentHolder(druidServer2, segment2)); @@ -651,7 +651,7 @@ private DruidCoordinatorRuntimeParams setupParamsForMaintenancePriority(int prio .withDynamicConfigs( CoordinatorDynamicConfig.builder() .withMaxSegmentsToMove(1) - .withDecommissionPriority(priority) + .withDecommissionVelocity(velocity) .build() ) .withBalancerStrategy(strategy) diff --git a/server/src/test/java/org/apache/druid/server/coordinator/rules/BroadcastDistributionRuleTest.java b/server/src/test/java/org/apache/druid/server/coordinator/rules/BroadcastDistributionRuleTest.java index f4149393b982..58613979b468 100644 --- a/server/src/test/java/org/apache/druid/server/coordinator/rules/BroadcastDistributionRuleTest.java +++ b/server/src/test/java/org/apache/druid/server/coordinator/rules/BroadcastDistributionRuleTest.java @@ -59,8 +59,8 @@ public class BroadcastDistributionRuleTest private DataSegment smallSegment; private DruidCluster secondCluster; private ServerHolder generalServer; - private ServerHolder decommissionedServer2; - private ServerHolder decommissionedServer1; + private ServerHolder decommissioningServer1; + private ServerHolder decommissioningServer2; @Before public void setUp() @@ -214,9 +214,9 @@ public void setUp() new LoadQueuePeonTester() ); - decommissionedServer1 = new ServerHolder( + decommissioningServer1 = new ServerHolder( new DruidServer( - "decommissioned1", + "decommissioning1", "host2", null, 100, @@ -229,9 +229,9 @@ public void setUp() true ); - decommissionedServer2 = new ServerHolder( + decommissioningServer2 = new ServerHolder( new DruidServer( - "decommissioned2", + "decommissioning2", "host3", null, 100, @@ -268,8 +268,8 @@ public void setUp() "tier1", Stream.of( generalServer, - decommissionedServer1, - decommissionedServer2 + decommissioningServer1, + decommissioningServer2 ).collect(Collectors.toCollection(() -> new TreeSet<>(Collections.reverseOrder()))) ) ); @@ -315,19 +315,19 @@ public void testBroadcastToSingleDataSource() /** * Servers: - * name | segments - * ----------------+-------------- - * general | large segment - * decommissioned1 | small segment - * decommissioned2 | large segment + * name | segments + * -----------------+-------------- + * general | large segment + * decommissioning1 | small segment + * decommissioning2 | large segment * * After running the rule for the small segment: - * general | large & small segments - * decommissioned1 | - * decommissioned2 | large segment + * general | large & small segments + * decommissioning1 | + * decommissionint2 | large segment */ @Test - public void testBroadcastDecommissioned() + public void testBroadcastDecommissioning() { final ForeverBroadcastDistributionRule rule = new ForeverBroadcastDistributionRule(ImmutableList.of("large_source")); @@ -349,8 +349,8 @@ public void testBroadcastDecommissioned() assertEquals(false, stats.hasPerTierStats()); assertEquals(1, generalServer.getPeon().getSegmentsToLoad().size()); - assertEquals(1, decommissionedServer1.getPeon().getSegmentsToDrop().size()); - assertEquals(0, decommissionedServer2.getPeon().getSegmentsToLoad().size()); + assertEquals(1, decommissioningServer1.getPeon().getSegmentsToDrop().size()); + assertEquals(0, decommissioningServer2.getPeon().getSegmentsToLoad().size()); } @Test diff --git a/server/src/test/java/org/apache/druid/server/coordinator/rules/LoadRuleTest.java b/server/src/test/java/org/apache/druid/server/coordinator/rules/LoadRuleTest.java index d1193814404e..a8437c12f272 100644 --- a/server/src/test/java/org/apache/druid/server/coordinator/rules/LoadRuleTest.java +++ b/server/src/test/java/org/apache/druid/server/coordinator/rules/LoadRuleTest.java @@ -687,11 +687,11 @@ public void testMaxLoadingQueueSize() } /** - * 2 servers in different tiers, the first is decommissioned. - * Should not load a segment to the server that is decommissioned + * 2 servers in different tiers, the first is decommissioning. + * Should not load a segment to the server that is decommissioning */ @Test - public void testLoadDecommissioned() + public void testLoadDecommissioning() { final LoadQueuePeon mockPeon1 = createEmptyPeon(); final LoadQueuePeon mockPeon2 = createOneCallPeonMock(); @@ -737,8 +737,8 @@ public void testLoadDecommissioned() } /** - * 2 tiers, 2 servers each, 1 server of the second tier is decommissioned. - * Should not load a segment to the server that is decommssioned. + * 2 tiers, 2 servers each, 1 server of the second tier is decommissioning. + * Should not load a segment to the server that is decommssioning. */ @Test public void testLoadReplicaDuringMaitenance() @@ -796,11 +796,11 @@ public void testLoadReplicaDuringMaitenance() } /** - * 2 servers with a segment, one server decommissioned. + * 2 servers with a segment, one server decommissioning. * Should drop a segment from both. */ @Test - public void testDropDuringDecommssion() + public void testDropDuringDecommissioning() { final LoadQueuePeon mockPeon = createEmptyPeon(); mockPeon.dropSegment(EasyMock.anyObject(), EasyMock.anyObject()); @@ -859,12 +859,12 @@ public void testDropDuringDecommssion() /** * 3 servers hosting 3 replicas of the segment. - * 1 servers is decommissioned. + * 1 servers is decommissioning. * 1 replica is redundant. - * Should drop from the decommissioned server. + * Should drop from the decommissioning server. */ @Test - public void testRedundantReplicaDropDuringDecommissioned() + public void testRedundantReplicaDropDuringDecommissioning() { final LoadQueuePeon mockPeon1 = new LoadQueuePeonTester(); final LoadQueuePeon mockPeon2 = new LoadQueuePeonTester(); diff --git a/server/src/test/java/org/apache/druid/server/http/CoordinatorDynamicConfigTest.java b/server/src/test/java/org/apache/druid/server/http/CoordinatorDynamicConfigTest.java index 6d413585cb81..b20f9915a72e 100644 --- a/server/src/test/java/org/apache/druid/server/http/CoordinatorDynamicConfigTest.java +++ b/server/src/test/java/org/apache/druid/server/http/CoordinatorDynamicConfigTest.java @@ -50,8 +50,8 @@ public void testSerde() throws Exception + " \"emitBalancingStats\": true,\n" + " \"killDataSourceWhitelist\": [\"test1\",\"test2\"],\n" + " \"maxSegmentsInNodeLoadingQueue\": 1,\n" - + " \"decommissionNodes\": [\"host1\", \"host2\"],\n" - + " \"decommissionPriority\": 9\n" + + " \"decommissioningNodes\": [\"host1\", \"host2\"],\n" + + " \"decommissioningVelocity\": 9\n" + "}\n"; CoordinatorDynamicConfig actual = mapper.readValue( @@ -63,19 +63,19 @@ public void testSerde() throws Exception ), CoordinatorDynamicConfig.class ); - ImmutableSet decommissioned = ImmutableSet.of("host1", "host2"); + ImmutableSet decommissioning = ImmutableSet.of("host1", "host2"); ImmutableSet whitelist = ImmutableSet.of("test1", "test2"); - assertConfig(actual, 1, 1, 1, 1, 1, 1, 2, true, whitelist, false, 1, decommissioned, 9); + assertConfig(actual, 1, 1, 1, 1, 1, 1, 2, true, whitelist, false, 1, decommissioning, 9); actual = CoordinatorDynamicConfig.builder().withDecommissionNodes(ImmutableSet.of("host1")).build(actual); assertConfig(actual, 1, 1, 1, 1, 1, 1, 2, true, whitelist, false, 1, ImmutableSet.of("host1"), 9); - actual = CoordinatorDynamicConfig.builder().withDecommissionPriority(5).build(actual); + actual = CoordinatorDynamicConfig.builder().withDecommissionVelocity(5).build(actual); assertConfig(actual, 1, 1, 1, 1, 1, 1, 2, true, whitelist, false, 1, ImmutableSet.of("host1"), 5); } @Test - public void testDecommissionParametersBackwardCompatibility() throws Exception + public void testDecommissioningParametersBackwardCompatibility() throws Exception { String jsonStr = "{\n" + " \"millisToWaitBeforeDeleting\": 1,\n" @@ -99,14 +99,14 @@ public void testDecommissionParametersBackwardCompatibility() throws Exception ), CoordinatorDynamicConfig.class ); - ImmutableSet decommissioned = ImmutableSet.of(); + ImmutableSet decommissioning = ImmutableSet.of(); ImmutableSet whitelist = ImmutableSet.of("test1", "test2"); - assertConfig(actual, 1, 1, 1, 1, 1, 1, 2, true, whitelist, false, 1, decommissioned, 0); + assertConfig(actual, 1, 1, 1, 1, 1, 1, 2, true, whitelist, false, 1, decommissioning, 0); actual = CoordinatorDynamicConfig.builder().withDecommissionNodes(ImmutableSet.of("host1")).build(actual); assertConfig(actual, 1, 1, 1, 1, 1, 1, 2, true, whitelist, false, 1, ImmutableSet.of("host1"), 0); - actual = CoordinatorDynamicConfig.builder().withDecommissionPriority(5).build(actual); + actual = CoordinatorDynamicConfig.builder().withDecommissionVelocity(5).build(actual); assertConfig(actual, 1, 1, 1, 1, 1, 1, 2, true, whitelist, false, 1, ImmutableSet.of("host1"), 5); } @@ -257,7 +257,7 @@ private void assertConfig( Set expectedKillableDatasources, boolean expectedKillAllDataSources, int expectedMaxSegmentsInNodeLoadingQueue, - Set decommissioned, + Set decommissioning, int decommissionPriority ) { @@ -272,7 +272,7 @@ private void assertConfig( Assert.assertEquals(expectedKillableDatasources, config.getKillableDataSources()); Assert.assertEquals(expectedKillAllDataSources, config.isKillAllDataSources()); Assert.assertEquals(expectedMaxSegmentsInNodeLoadingQueue, config.getMaxSegmentsInNodeLoadingQueue()); - Assert.assertEquals(decommissioned, config.getDecommissionNodes()); - Assert.assertEquals(decommissionPriority, config.getDecommissionPriority()); + Assert.assertEquals(decommissioning, config.getDecommissioningNodes()); + Assert.assertEquals(decommissionPriority, config.getDecommissioningVelocity()); } } From 85f6ef6aa5ea236d7d0be11c7dc652a66f36a151 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Thu, 28 Feb 2019 14:34:42 -0800 Subject: [PATCH 03/14] missed one --- .../server/coordinator/CoordinatorDynamicConfig.java | 4 ++-- .../server/coordinator/DruidCoordinatorBalancerTest.java | 6 +++--- .../druid/server/http/CoordinatorDynamicConfigTest.java | 8 ++++---- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorDynamicConfig.java b/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorDynamicConfig.java index e1fc74541247..6fd7e32a7a77 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorDynamicConfig.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorDynamicConfig.java @@ -495,13 +495,13 @@ public Builder withMaxSegmentsInNodeLoadingQueue(int maxSegmentsInNodeLoadingQue return this; } - public Builder withDecommissionNodes(Set decommissioning) + public Builder withDecommissioningNodes(Set decommissioning) { this.decommissioningNodes = decommissioning; return this; } - public Builder withDecommissionVelocity(Integer velocity) + public Builder withDecommissioningVelocity(Integer velocity) { this.decommissioningVelocity = velocity; return this; diff --git a/server/src/test/java/org/apache/druid/server/coordinator/DruidCoordinatorBalancerTest.java b/server/src/test/java/org/apache/druid/server/coordinator/DruidCoordinatorBalancerTest.java index 83f5dc57e3bb..b0e89e0b835c 100644 --- a/server/src/test/java/org/apache/druid/server/coordinator/DruidCoordinatorBalancerTest.java +++ b/server/src/test/java/org/apache/druid/server/coordinator/DruidCoordinatorBalancerTest.java @@ -239,7 +239,7 @@ public void testMoveDecommissioningVelocity() .withDynamicConfigs( CoordinatorDynamicConfig.builder() .withMaxSegmentsToMove(3) - .withDecommissionVelocity(6) + .withDecommissioningVelocity(6) .build() // ceil(3 * 0.6) = 2 segments from decommissioning servers ) .withBalancerStrategy(strategy) @@ -300,7 +300,7 @@ public void testMoveDecommissioningVelocityWithNoDecommissioning() ImmutableList.of(false, false, false) ) .withDynamicConfigs( - CoordinatorDynamicConfig.builder().withMaxSegmentsToMove(3).withDecommissionVelocity(9).build() + CoordinatorDynamicConfig.builder().withMaxSegmentsToMove(3).withDecommissioningVelocity(9).build() ) .withBalancerStrategy(strategy) .build(); @@ -651,7 +651,7 @@ private DruidCoordinatorRuntimeParams setupParamsForDecommissioningVelocity(int .withDynamicConfigs( CoordinatorDynamicConfig.builder() .withMaxSegmentsToMove(1) - .withDecommissionVelocity(velocity) + .withDecommissioningVelocity(velocity) .build() ) .withBalancerStrategy(strategy) diff --git a/server/src/test/java/org/apache/druid/server/http/CoordinatorDynamicConfigTest.java b/server/src/test/java/org/apache/druid/server/http/CoordinatorDynamicConfigTest.java index b20f9915a72e..473e5339a965 100644 --- a/server/src/test/java/org/apache/druid/server/http/CoordinatorDynamicConfigTest.java +++ b/server/src/test/java/org/apache/druid/server/http/CoordinatorDynamicConfigTest.java @@ -67,10 +67,10 @@ public void testSerde() throws Exception ImmutableSet whitelist = ImmutableSet.of("test1", "test2"); assertConfig(actual, 1, 1, 1, 1, 1, 1, 2, true, whitelist, false, 1, decommissioning, 9); - actual = CoordinatorDynamicConfig.builder().withDecommissionNodes(ImmutableSet.of("host1")).build(actual); + actual = CoordinatorDynamicConfig.builder().withDecommissioningNodes(ImmutableSet.of("host1")).build(actual); assertConfig(actual, 1, 1, 1, 1, 1, 1, 2, true, whitelist, false, 1, ImmutableSet.of("host1"), 9); - actual = CoordinatorDynamicConfig.builder().withDecommissionVelocity(5).build(actual); + actual = CoordinatorDynamicConfig.builder().withDecommissioningVelocity(5).build(actual); assertConfig(actual, 1, 1, 1, 1, 1, 1, 2, true, whitelist, false, 1, ImmutableSet.of("host1"), 5); } @@ -103,10 +103,10 @@ public void testDecommissioningParametersBackwardCompatibility() throws Exceptio ImmutableSet whitelist = ImmutableSet.of("test1", "test2"); assertConfig(actual, 1, 1, 1, 1, 1, 1, 2, true, whitelist, false, 1, decommissioning, 0); - actual = CoordinatorDynamicConfig.builder().withDecommissionNodes(ImmutableSet.of("host1")).build(actual); + actual = CoordinatorDynamicConfig.builder().withDecommissioningNodes(ImmutableSet.of("host1")).build(actual); assertConfig(actual, 1, 1, 1, 1, 1, 1, 2, true, whitelist, false, 1, ImmutableSet.of("host1"), 0); - actual = CoordinatorDynamicConfig.builder().withDecommissionVelocity(5).build(actual); + actual = CoordinatorDynamicConfig.builder().withDecommissioningVelocity(5).build(actual); assertConfig(actual, 1, 1, 1, 1, 1, 1, 2, true, whitelist, false, 1, ImmutableSet.of("host1"), 5); } From 5fd6ffefc7468f8d5d1c067c34fc9496ddb6263d Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Thu, 28 Feb 2019 15:18:19 -0800 Subject: [PATCH 04/14] fix straggler, add doc about decommissioning stalling if no active servers --- docs/content/configuration/index.md | 2 +- .../druid/server/coordinator/CoordinatorDynamicConfig.java | 2 +- .../druid/server/http/CoordinatorDynamicConfigTest.java | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/content/configuration/index.md b/docs/content/configuration/index.md index d31572224db6..9ba652c3487c 100644 --- a/docs/content/configuration/index.md +++ b/docs/content/configuration/index.md @@ -805,7 +805,7 @@ Issuing a GET request at the same URL will return the spec that is currently in |`killPendingSegmentsSkipList`|List of dataSources for which pendingSegments are _NOT_ cleaned up if property `druid.coordinator.kill.pendingSegments.on` is true. This can be a list of comma-separated dataSources or a JSON array.|none| |`maxSegmentsInNodeLoadingQueue`|The maximum number of segments that could be queued for loading to any given server. This parameter could be used to speed up segments loading process, especially if there are "slow" nodes in the cluster (with low loading speed) or if too much segments scheduled to be replicated to some particular node (faster loading could be preferred to better segments distribution). Desired value depends on segments loading speed, acceptable replication time and number of nodes. Value 1000 could be a start point for a rather big cluster. Default value is 0 (loading queue is unbounded) |0| |`decommissioningNodes`| List of 'decommissioning' historical nodes. The Coordinator doesn't assign new segments to these nodes and moves segments from the nodes at the rate specified by `decommissioningVelocity`.|none| -|`decommissioningVelocity`| Decommissioning velocity indicates what proportion of balancer 'move' operations out of `maxSegmentsToMove` total will be spent towards 'decommissioning' servers by moving their segments to active servers, instead of normal 'balancing' moves. Coordinator takes ceil(maxSegmentsToMove * (velocity / 10)) from servers in maitenance during balancing phase, i.e.:
0 - no segments from decommissioning servers will be processed during balancing
5 - 50% segments from decommissioning servers
10 - 100% segments from decommissioning servers
By leveraging the velocity an operator can prevent general nodes from overload or decrease decommissioning time instead.|7| +|`decommissioningVelocity`| Decommissioning velocity indicates what proportion of balancer 'move' operations out of `maxSegmentsToMove` total will be spent towards 'decommissioning' servers by moving their segments to active servers, instead of normal 'balancing' moves. Coordinator takes ceil(maxSegmentsToMove * (velocity / 10)) from servers in maitenance during balancing phase, i.e.:
0 - no segments from decommissioning servers will be processed during balancing
5 - 50% segments from decommissioning servers
10 - 100% segments from decommissioning servers
By leveraging the velocity an operator can prevent general nodes from overload or decrease decommissioning time instead. Decommissioning can become stalled if there are no available active servers to place the segments.|7| To view the audit history of Coordinator dynamic config issue a GET request to the URL - diff --git a/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorDynamicConfig.java b/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorDynamicConfig.java index 6fd7e32a7a77..96c9656fad0b 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorDynamicConfig.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorDynamicConfig.java @@ -251,7 +251,7 @@ public Set getDecommissioningNodes() * 5 - 50% segments from decommissioning servers * 10 - 100% segments from decommissioning servers * By leveraging the velocity an operator can prevent general nodes from overload or decrease 'decommissioning' time - * instead. + * instead. Decommissioning can become stalled if there are no available active servers to place the segments. * * @return number in range [0, 10] */ diff --git a/server/src/test/java/org/apache/druid/server/http/CoordinatorDynamicConfigTest.java b/server/src/test/java/org/apache/druid/server/http/CoordinatorDynamicConfigTest.java index 473e5339a965..748ae78c962a 100644 --- a/server/src/test/java/org/apache/druid/server/http/CoordinatorDynamicConfigTest.java +++ b/server/src/test/java/org/apache/druid/server/http/CoordinatorDynamicConfigTest.java @@ -258,7 +258,7 @@ private void assertConfig( boolean expectedKillAllDataSources, int expectedMaxSegmentsInNodeLoadingQueue, Set decommissioning, - int decommissionPriority + int decommissioningVelocity ) { Assert.assertEquals(expectedMillisToWaitBeforeDeleting, config.getMillisToWaitBeforeDeleting()); @@ -273,6 +273,6 @@ private void assertConfig( Assert.assertEquals(expectedKillAllDataSources, config.isKillAllDataSources()); Assert.assertEquals(expectedMaxSegmentsInNodeLoadingQueue, config.getMaxSegmentsInNodeLoadingQueue()); Assert.assertEquals(decommissioning, config.getDecommissioningNodes()); - Assert.assertEquals(decommissionPriority, config.getDecommissioningVelocity()); + Assert.assertEquals(decommissioningVelocity, config.getDecommissioningVelocity()); } } From 756ac62d9925acf917193cc4f12a0e4c023aea75 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Mon, 4 Mar 2019 15:24:47 -0800 Subject: [PATCH 05/14] fix missed typo, docs --- docs/content/configuration/index.md | 4 ++-- .../druid/server/coordinator/CoordinatorDynamicConfig.java | 5 +++-- .../coordinator/rules/BroadcastDistributionRuleTest.java | 4 ++-- .../apache/druid/server/coordinator/rules/LoadRuleTest.java | 6 +++--- 4 files changed, 10 insertions(+), 9 deletions(-) diff --git a/docs/content/configuration/index.md b/docs/content/configuration/index.md index f30f0fb3dc91..2b435e7f9800 100644 --- a/docs/content/configuration/index.md +++ b/docs/content/configuration/index.md @@ -804,8 +804,8 @@ Issuing a GET request at the same URL will return the spec that is currently in |`killAllDataSources`|Send kill tasks for ALL dataSources if property `druid.coordinator.kill.on` is true. If this is set to true then `killDataSourceWhitelist` must not be specified or be empty list.|false| |`killPendingSegmentsSkipList`|List of dataSources for which pendingSegments are _NOT_ cleaned up if property `druid.coordinator.kill.pendingSegments.on` is true. This can be a list of comma-separated dataSources or a JSON array.|none| |`maxSegmentsInNodeLoadingQueue`|The maximum number of segments that could be queued for loading to any given server. This parameter could be used to speed up segments loading process, especially if there are "slow" nodes in the cluster (with low loading speed) or if too much segments scheduled to be replicated to some particular node (faster loading could be preferred to better segments distribution). Desired value depends on segments loading speed, acceptable replication time and number of nodes. Value 1000 could be a start point for a rather big cluster. Default value is 0 (loading queue is unbounded) |0| -|`decommissioningNodes`| List of 'decommissioning' historical servers. The Coordinator doesn't assign new segments to these servers and moves segments from the servers at the rate specified by `decommissioningVelocity`.|none| -|`decommissioningVelocity`| Decommissioning velocity indicates what proportion of balancer 'move' operations out of `maxSegmentsToMove` total will be spent towards 'decommissioning' servers by moving their segments to active servers, instead of normal 'balancing' moves. Coordinator takes ceil(maxSegmentsToMove * (velocity / 10)) from servers in maitenance during balancing phase, i.e.:
0 - no segments from decommissioning servers will be processed during balancing
5 - 50% segments from decommissioning servers
10 - 100% segments from decommissioning servers
By leveraging the velocity an operator can prevent general servers from overload or decrease decommissioning time instead. Decommissioning can become stalled if there are no available active servers to place the segments.|7| +|`decommissioningNodes`| List of 'decommissioning' historical servers. The Coordinator doesn't assign new segments to these servers and moves segments away from the 'decommissioning' servers at the maximum rate specified by `decommissioningVelocity`.|none| +|`decommissioningVelocity`| Decommissioning velocity indicates what proportion of balancer 'move' operations out of `maxSegmentsToMove` total will be spent towards 'decommissioning' servers by moving their segments to active servers, instead of normal 'balancing' moves. Coordinator takes ceil(maxSegmentsToMove * (velocity / 10)) from decommissioning servers during balancing phase, i.e.:
0 - no segments from decommissioning servers will be processed during balancing
5 - 50% segments from decommissioning servers
10 - 100% segments from decommissioning servers
By leveraging the velocity an operator can prevent general servers from overload or decrease decommissioning time instead. Decommissioning can become stalled if there are no available active servers to place the segments.|7| To view the audit history of Coordinator dynamic config issue a GET request to the URL - diff --git a/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorDynamicConfig.java b/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorDynamicConfig.java index 96c9656fad0b..b7ce50f28e3e 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorDynamicConfig.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorDynamicConfig.java @@ -232,7 +232,8 @@ public int getMaxSegmentsInNodeLoadingQueue() /** * List of historical nodes to 'decommission'. Coordinator doesn't assign new segments on those nodes and moves - * segments from those nodes according to a specified velocity. + * segments away from the 'decommissioning' servers at the maximum rate specified by + * {@link CoordinatorDynamicConfig#getDecommissioningVelocity}. * * @return list of host:port entries */ @@ -246,7 +247,7 @@ public Set getDecommissioningNodes() * Decommissioning velocity indicates what proportion of balancer 'move' operations out of * {@link CoordinatorDynamicConfig#getMaxSegmentsToMove()} total will be spent towards 'decommissioning' servers * by moving their segments to active servers, instead of normal 'balancing' segments between servers. - * Coordinator takes ceil(maxSegmentsToMove * (velocity / 10)) from servers in maitenance during balancing phase: + * Coordinator takes ceil(maxSegmentsToMove * (velocity / 10)) from decommissioning servers during balancing phase: * 0 - no segments from decommissioning servers will be processed during balancing * 5 - 50% segments from decommissioning servers * 10 - 100% segments from decommissioning servers diff --git a/server/src/test/java/org/apache/druid/server/coordinator/rules/BroadcastDistributionRuleTest.java b/server/src/test/java/org/apache/druid/server/coordinator/rules/BroadcastDistributionRuleTest.java index 58613979b468..e4dc5c9f516e 100644 --- a/server/src/test/java/org/apache/druid/server/coordinator/rules/BroadcastDistributionRuleTest.java +++ b/server/src/test/java/org/apache/druid/server/coordinator/rules/BroadcastDistributionRuleTest.java @@ -317,12 +317,12 @@ public void testBroadcastToSingleDataSource() * Servers: * name | segments * -----------------+-------------- - * general | large segment + * active | large segment * decommissioning1 | small segment * decommissioning2 | large segment * * After running the rule for the small segment: - * general | large & small segments + * active | large & small segments * decommissioning1 | * decommissionint2 | large segment */ diff --git a/server/src/test/java/org/apache/druid/server/coordinator/rules/LoadRuleTest.java b/server/src/test/java/org/apache/druid/server/coordinator/rules/LoadRuleTest.java index a8437c12f272..4e44de56380c 100644 --- a/server/src/test/java/org/apache/druid/server/coordinator/rules/LoadRuleTest.java +++ b/server/src/test/java/org/apache/druid/server/coordinator/rules/LoadRuleTest.java @@ -741,7 +741,7 @@ public void testLoadDecommissioning() * Should not load a segment to the server that is decommssioning. */ @Test - public void testLoadReplicaDuringMaitenance() + public void testLoadReplicaDuringDecomissioning() { EasyMock.expect(throttler.canCreateReplicant(EasyMock.anyString())).andReturn(true).anyTimes(); @@ -1019,12 +1019,12 @@ private static LoadQueuePeon createOneCallPeonMock() return mockPeon2; } - private static ServerHolder createServerHolder(String tier, LoadQueuePeon mockPeon1, boolean decommission) + private static ServerHolder createServerHolder(String tier, LoadQueuePeon mockPeon1, boolean isDecommissioning) { return new ServerHolder( createServer(tier).toImmutableDruidServer(), mockPeon1, - decommission + isDecommissioning ); } } From 335a84a359d9b9fa5553b4ed3392406de39a446c Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Mon, 4 Mar 2019 16:15:54 -0800 Subject: [PATCH 06/14] refine docs --- docs/content/configuration/index.md | 2 +- .../coordinator/CoordinatorDynamicConfig.java | 21 +++++++++++-------- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/docs/content/configuration/index.md b/docs/content/configuration/index.md index 2b435e7f9800..8f5c453ba1b1 100644 --- a/docs/content/configuration/index.md +++ b/docs/content/configuration/index.md @@ -805,7 +805,7 @@ Issuing a GET request at the same URL will return the spec that is currently in |`killPendingSegmentsSkipList`|List of dataSources for which pendingSegments are _NOT_ cleaned up if property `druid.coordinator.kill.pendingSegments.on` is true. This can be a list of comma-separated dataSources or a JSON array.|none| |`maxSegmentsInNodeLoadingQueue`|The maximum number of segments that could be queued for loading to any given server. This parameter could be used to speed up segments loading process, especially if there are "slow" nodes in the cluster (with low loading speed) or if too much segments scheduled to be replicated to some particular node (faster loading could be preferred to better segments distribution). Desired value depends on segments loading speed, acceptable replication time and number of nodes. Value 1000 could be a start point for a rather big cluster. Default value is 0 (loading queue is unbounded) |0| |`decommissioningNodes`| List of 'decommissioning' historical servers. The Coordinator doesn't assign new segments to these servers and moves segments away from the 'decommissioning' servers at the maximum rate specified by `decommissioningVelocity`.|none| -|`decommissioningVelocity`| Decommissioning velocity indicates what proportion of balancer 'move' operations out of `maxSegmentsToMove` total will be spent towards 'decommissioning' servers by moving their segments to active servers, instead of normal 'balancing' moves. Coordinator takes ceil(maxSegmentsToMove * (velocity / 10)) from decommissioning servers during balancing phase, i.e.:
0 - no segments from decommissioning servers will be processed during balancing
5 - 50% segments from decommissioning servers
10 - 100% segments from decommissioning servers
By leveraging the velocity an operator can prevent general servers from overload or decrease decommissioning time instead. Decommissioning can become stalled if there are no available active servers to place the segments.|7| +|`decommissioningVelocity`| Decommissioning velocity determines the maximum number of segments that may be moved away from 'decommissioning' servers non-decommissioning (that is, active) servers during one Coordinator's run, relative to the total maximum segment movements allowed during one Coordinator's run (which, in turn, is determined by the maxSegmentsToMove configuration). Specifically, the maximum is ceil(maxSegmentsToMove * (velocity / 10)). For example, if decommissioningVelocity is 0 no segments will be moved away from 'decommissioning' servers. If decommissioningVelocity is 5 no more than ceil(maxSegmentsToMove * 0.5) segments may be moved away from 'decommissioning' servers. By leveraging the velocity an operator can prevent active servers from overload by prioritizing balancing, or decrease decommissioning time instead. Decommissioning can become stalled if there are no available active servers to place the segments. The value should be between 0 and 10.|7| To view the audit history of Coordinator dynamic config issue a GET request to the URL - diff --git a/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorDynamicConfig.java b/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorDynamicConfig.java index b7ce50f28e3e..0af352f61aa3 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorDynamicConfig.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorDynamicConfig.java @@ -244,16 +244,19 @@ public Set getDecommissioningNodes() } /** - * Decommissioning velocity indicates what proportion of balancer 'move' operations out of - * {@link CoordinatorDynamicConfig#getMaxSegmentsToMove()} total will be spent towards 'decommissioning' servers - * by moving their segments to active servers, instead of normal 'balancing' segments between servers. - * Coordinator takes ceil(maxSegmentsToMove * (velocity / 10)) from decommissioning servers during balancing phase: - * 0 - no segments from decommissioning servers will be processed during balancing - * 5 - 50% segments from decommissioning servers - * 10 - 100% segments from decommissioning servers - * By leveraging the velocity an operator can prevent general nodes from overload or decrease 'decommissioning' time - * instead. Decommissioning can become stalled if there are no available active servers to place the segments. + * Decommissioning velocity determines the maximum number of segments that may be moved away from 'decommissioning' + * servers non-decommissioning (that is, active) servers during one Coordinator's run, relative to the total maximum + * segment movements allowed during one Coordinator's run (which, in turn, is determined by the + * {@link CoordinatorDynamicConfig#getMaxSegmentsToMove()}). + * Specifically, the maximum is ceil(maxSegmentsToMove * (velocity / 10)). * + * For example, + * if decommissioningVelocity is 0 no segments will be moved away from 'decommissioning' servers. + * If decommissioningVelocity is 5 no more than ceil(maxSegmentsToMove * 0.5) segments may be moved away from 'decommissioning' servers. + * + * By leveraging the velocity an operator can prevent active servers from overload by prioritizing balancing, + * or decrease decommissioning time instead. Decommissioning can become stalled if there are no available active + * servers to place the segments. The value should be between 0 and 10. * @return number in range [0, 10] */ @JsonProperty From fc5416179726d86f58e1df82f9c7c93288fe4dad Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Tue, 5 Mar 2019 16:09:30 -0800 Subject: [PATCH 07/14] doc changes, replace generals --- docs/content/configuration/index.md | 2 +- .../coordinator/CoordinatorDynamicConfig.java | 21 ++++++++++--------- .../helper/DruidCoordinatorBalancer.java | 1 + .../DruidCoordinatorBalancerTest.java | 2 +- .../rules/BroadcastDistributionRuleTest.java | 10 ++++----- 5 files changed, 19 insertions(+), 17 deletions(-) diff --git a/docs/content/configuration/index.md b/docs/content/configuration/index.md index 8f5c453ba1b1..08325808fbe1 100644 --- a/docs/content/configuration/index.md +++ b/docs/content/configuration/index.md @@ -805,7 +805,7 @@ Issuing a GET request at the same URL will return the spec that is currently in |`killPendingSegmentsSkipList`|List of dataSources for which pendingSegments are _NOT_ cleaned up if property `druid.coordinator.kill.pendingSegments.on` is true. This can be a list of comma-separated dataSources or a JSON array.|none| |`maxSegmentsInNodeLoadingQueue`|The maximum number of segments that could be queued for loading to any given server. This parameter could be used to speed up segments loading process, especially if there are "slow" nodes in the cluster (with low loading speed) or if too much segments scheduled to be replicated to some particular node (faster loading could be preferred to better segments distribution). Desired value depends on segments loading speed, acceptable replication time and number of nodes. Value 1000 could be a start point for a rather big cluster. Default value is 0 (loading queue is unbounded) |0| |`decommissioningNodes`| List of 'decommissioning' historical servers. The Coordinator doesn't assign new segments to these servers and moves segments away from the 'decommissioning' servers at the maximum rate specified by `decommissioningVelocity`.|none| -|`decommissioningVelocity`| Decommissioning velocity determines the maximum number of segments that may be moved away from 'decommissioning' servers non-decommissioning (that is, active) servers during one Coordinator's run, relative to the total maximum segment movements allowed during one Coordinator's run (which, in turn, is determined by the maxSegmentsToMove configuration). Specifically, the maximum is ceil(maxSegmentsToMove * (velocity / 10)). For example, if decommissioningVelocity is 0 no segments will be moved away from 'decommissioning' servers. If decommissioningVelocity is 5 no more than ceil(maxSegmentsToMove * 0.5) segments may be moved away from 'decommissioning' servers. By leveraging the velocity an operator can prevent active servers from overload by prioritizing balancing, or decrease decommissioning time instead. Decommissioning can become stalled if there are no available active servers to place the segments. The value should be between 0 and 10.|7| +|`decommissioningVelocity`| Decommissioning velocity determines the maximum number of segments that may be moved away from 'decommissioning' servers to non-decommissioning (that is, active) servers during one Coordinator's run. This value is relative to the total maximum segment movements allowed during one run which is determined by the `maxSegmentsToMove` configuration. Specifically, the maximum is `ceil(maxSegmentsToMove * (velocity / 10))`. For example, if `decommissioningVelocity` is 5, no more than `ceil(maxSegmentsToMove * 0.5)` segments may be moved away from 'decommissioning' servers. If `decommissioningVelocity` is 0, segments will neither be moved from _or to_ 'decommissioning' servers, effectively putting them in a sort of 'maintenance' mode that will not participate in balancing or assignment by load rules. Decommissioning can also become stalled if there are no available active servers to place the segments. By leveraging the velocity an operator can prevent active servers from overload by prioritizing balancing, or decrease decommissioning time instead. The value should be between 0 and 10.|7| To view the audit history of Coordinator dynamic config issue a GET request to the URL - diff --git a/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorDynamicConfig.java b/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorDynamicConfig.java index 0af352f61aa3..c9a52905dc4a 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorDynamicConfig.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorDynamicConfig.java @@ -245,18 +245,19 @@ public Set getDecommissioningNodes() /** * Decommissioning velocity determines the maximum number of segments that may be moved away from 'decommissioning' - * servers non-decommissioning (that is, active) servers during one Coordinator's run, relative to the total maximum - * segment movements allowed during one Coordinator's run (which, in turn, is determined by the - * {@link CoordinatorDynamicConfig#getMaxSegmentsToMove()}). - * Specifically, the maximum is ceil(maxSegmentsToMove * (velocity / 10)). + * servers to non-decommissioning (that is, active) servers during one Coordinator's run. This value is relative to + * the total maximum segment movements allowed during one run which is determined by + * `{@link CoordinatorDynamicConfig#getMaxSegmentsToMove()}. * - * For example, - * if decommissioningVelocity is 0 no segments will be moved away from 'decommissioning' servers. - * If decommissioningVelocity is 5 no more than ceil(maxSegmentsToMove * 0.5) segments may be moved away from 'decommissioning' servers. + * Specifically, the maximum is `ceil(maxSegmentsToMove * (velocity / 10))`. For example, if `decommissioningVelocity` + * is 5, no more than `ceil(maxSegmentsToMove * 0.5)` segments may be moved away from 'decommissioning' servers. + * + * If `decommissioningVelocity` is 0, segments will neither be moved from _or to_ 'decommissioning' servers, + * effectively putting them in a sort of 'maintenance' mode that will not participate in balancing or assignment by + * load rules. Decommissioning can also become stalled if there are no available active servers to place the segments. + * By leveraging the velocity an operator can prevent active servers from overload by prioritizing balancing, or + * decrease decommissioning time instead. The value should be between 0 and 10. * - * By leveraging the velocity an operator can prevent active servers from overload by prioritizing balancing, - * or decrease decommissioning time instead. Decommissioning can become stalled if there are no available active - * servers to place the segments. The value should be between 0 and 10. * @return number in range [0, 10] */ @JsonProperty diff --git a/server/src/main/java/org/apache/druid/server/coordinator/helper/DruidCoordinatorBalancer.java b/server/src/main/java/org/apache/druid/server/coordinator/helper/DruidCoordinatorBalancer.java index b0feb4239a4f..3261804e9bb5 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/helper/DruidCoordinatorBalancer.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/helper/DruidCoordinatorBalancer.java @@ -96,6 +96,7 @@ private void balanceTier( if (params.getAvailableSegments().size() == 0) { log.warn("Metadata segments are not available. Cannot balance."); + // skip emit 0,0 stats return; } currentlyMovingSegments.computeIfAbsent(tier, t -> new ConcurrentHashMap<>()); diff --git a/server/src/test/java/org/apache/druid/server/coordinator/DruidCoordinatorBalancerTest.java b/server/src/test/java/org/apache/druid/server/coordinator/DruidCoordinatorBalancerTest.java index b0e89e0b835c..e86985792a0f 100644 --- a/server/src/test/java/org/apache/druid/server/coordinator/DruidCoordinatorBalancerTest.java +++ b/server/src/test/java/org/apache/druid/server/coordinator/DruidCoordinatorBalancerTest.java @@ -632,7 +632,7 @@ private DruidCoordinatorRuntimeParams setupParamsForDecommissioningVelocity(int mockCoordinator(coordinator); - // either decommissioning servers list or general ones (ie servers list is [2] or [1, 3]) + // either decommissioning servers list or acitve ones (ie servers list is [2] or [1, 3]) BalancerStrategy strategy = EasyMock.createMock(BalancerStrategy.class); EasyMock.expect(strategy.pickSegmentToMove(ImmutableList.of(new ServerHolder(druidServer2, peon2, true)))) .andReturn(new BalancerSegmentHolder(druidServer2, segment2)); diff --git a/server/src/test/java/org/apache/druid/server/coordinator/rules/BroadcastDistributionRuleTest.java b/server/src/test/java/org/apache/druid/server/coordinator/rules/BroadcastDistributionRuleTest.java index e4dc5c9f516e..83398d7f6dc4 100644 --- a/server/src/test/java/org/apache/druid/server/coordinator/rules/BroadcastDistributionRuleTest.java +++ b/server/src/test/java/org/apache/druid/server/coordinator/rules/BroadcastDistributionRuleTest.java @@ -58,7 +58,7 @@ public class BroadcastDistributionRuleTest private final List largeSegments2 = new ArrayList<>(); private DataSegment smallSegment; private DruidCluster secondCluster; - private ServerHolder generalServer; + private ServerHolder activeServer; private ServerHolder decommissioningServer1; private ServerHolder decommissioningServer2; @@ -200,9 +200,9 @@ public void setUp() ) ); - generalServer = new ServerHolder( + activeServer = new ServerHolder( new DruidServer( - "general", + "active", "host1", null, 100, @@ -267,7 +267,7 @@ public void setUp() ImmutableMap.of( "tier1", Stream.of( - generalServer, + activeServer, decommissioningServer1, decommissioningServer2 ).collect(Collectors.toCollection(() -> new TreeSet<>(Collections.reverseOrder()))) @@ -348,7 +348,7 @@ public void testBroadcastDecommissioning() assertEquals(1L, stats.getGlobalStat(LoadRule.ASSIGNED_COUNT)); assertEquals(false, stats.hasPerTierStats()); - assertEquals(1, generalServer.getPeon().getSegmentsToLoad().size()); + assertEquals(1, activeServer.getPeon().getSegmentsToLoad().size()); assertEquals(1, decommissioningServer1.getPeon().getSegmentsToDrop().size()); assertEquals(0, decommissioningServer2.getPeon().getSegmentsToLoad().size()); } From ce2555a02d42a6c297aff262ae43dfe65f199240 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Tue, 5 Mar 2019 16:31:21 -0800 Subject: [PATCH 08/14] add explicit comment to mention suppressed stats for balanceTier --- .../server/coordinator/helper/DruidCoordinatorBalancer.java | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/server/src/main/java/org/apache/druid/server/coordinator/helper/DruidCoordinatorBalancer.java b/server/src/main/java/org/apache/druid/server/coordinator/helper/DruidCoordinatorBalancer.java index 3261804e9bb5..64335f5ffb74 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/helper/DruidCoordinatorBalancer.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/helper/DruidCoordinatorBalancer.java @@ -96,7 +96,7 @@ private void balanceTier( if (params.getAvailableSegments().size() == 0) { log.warn("Metadata segments are not available. Cannot balance."); - // skip emit 0,0 stats + // suppress emit zero stats return; } currentlyMovingSegments.computeIfAbsent(tier, t -> new ConcurrentHashMap<>()); @@ -108,6 +108,7 @@ private void balanceTier( tier, currentlyMovingSegments.get(tier).size() ); + // suppress emit zero stats return; } @@ -127,6 +128,7 @@ private void balanceTier( if ((decommissioningServers.isEmpty() && activeServers.size() <= 1) || activeServers.isEmpty()) { log.warn("[%s]: insufficient active servers. Cannot balance.", tier); + // suppress emit zero stats return; } @@ -137,6 +139,7 @@ private void balanceTier( if (numSegments == 0) { log.info("No segments found. Cannot balance."); + // suppress emit zero stats return; } From e9b12428bb4de92214ccb20240aa62cc24d92109 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Wed, 6 Mar 2019 15:25:01 -0800 Subject: [PATCH 09/14] rename decommissioningVelocity to decommissioningMaxSegmentsToMovePercent and update docs --- docs/content/configuration/index.md | 6 +- .../coordinator/CoordinatorDynamicConfig.java | 72 +++++++++---------- .../server/coordinator/ServerHolder.java | 7 +- .../helper/DruidCoordinatorBalancer.java | 15 ++-- .../DruidCoordinatorBalancerTest.java | 24 +++---- .../http/CoordinatorDynamicConfigTest.java | 10 +-- 6 files changed, 68 insertions(+), 66 deletions(-) diff --git a/docs/content/configuration/index.md b/docs/content/configuration/index.md index 08325808fbe1..2a40bdfa173f 100644 --- a/docs/content/configuration/index.md +++ b/docs/content/configuration/index.md @@ -784,7 +784,7 @@ A sample Coordinator dynamic config JSON object is shown below: "emitBalancingStats": false, "killDataSourceWhitelist": ["wikipedia", "testDatasource"], "decommissioningNodes": ["localhost:8182", "localhost:8282"], - "decommissioningVelocity": 7 + "decommissioningMaxSegmentsToMovePercent": 7 } ``` @@ -804,8 +804,8 @@ Issuing a GET request at the same URL will return the spec that is currently in |`killAllDataSources`|Send kill tasks for ALL dataSources if property `druid.coordinator.kill.on` is true. If this is set to true then `killDataSourceWhitelist` must not be specified or be empty list.|false| |`killPendingSegmentsSkipList`|List of dataSources for which pendingSegments are _NOT_ cleaned up if property `druid.coordinator.kill.pendingSegments.on` is true. This can be a list of comma-separated dataSources or a JSON array.|none| |`maxSegmentsInNodeLoadingQueue`|The maximum number of segments that could be queued for loading to any given server. This parameter could be used to speed up segments loading process, especially if there are "slow" nodes in the cluster (with low loading speed) or if too much segments scheduled to be replicated to some particular node (faster loading could be preferred to better segments distribution). Desired value depends on segments loading speed, acceptable replication time and number of nodes. Value 1000 could be a start point for a rather big cluster. Default value is 0 (loading queue is unbounded) |0| -|`decommissioningNodes`| List of 'decommissioning' historical servers. The Coordinator doesn't assign new segments to these servers and moves segments away from the 'decommissioning' servers at the maximum rate specified by `decommissioningVelocity`.|none| -|`decommissioningVelocity`| Decommissioning velocity determines the maximum number of segments that may be moved away from 'decommissioning' servers to non-decommissioning (that is, active) servers during one Coordinator's run. This value is relative to the total maximum segment movements allowed during one run which is determined by the `maxSegmentsToMove` configuration. Specifically, the maximum is `ceil(maxSegmentsToMove * (velocity / 10))`. For example, if `decommissioningVelocity` is 5, no more than `ceil(maxSegmentsToMove * 0.5)` segments may be moved away from 'decommissioning' servers. If `decommissioningVelocity` is 0, segments will neither be moved from _or to_ 'decommissioning' servers, effectively putting them in a sort of 'maintenance' mode that will not participate in balancing or assignment by load rules. Decommissioning can also become stalled if there are no available active servers to place the segments. By leveraging the velocity an operator can prevent active servers from overload by prioritizing balancing, or decrease decommissioning time instead. The value should be between 0 and 10.|7| +|`decommissioningNodes`| List of historical servers to 'decommission'. Coordinator will not assign new segments to 'decomissioning' servers, and segments will be moved away from them to be placed on 'active' servers at the maximum rate specified by `decommissioningMaxSegmentsToMovePercent`.|none| +|`decommissioningMaxSegmentsToMovePercent`| The maximum number of segments that may be moved away from 'decommissioning' servers to non-decommissioning (that is, active) servers during one Coordinator's run. This value is relative to the total maximum segment movements allowed during one run which is determined by `maxSegmentsToMove`. If `decommissioningMaxSegmentsToMovePercent` is 0, segments will neither be moved from _or to_ 'decommissioning' servers, effectively putting them in a sort of 'maintenance' mode that will not participate in balancing or assignment by load rules. Decommissioning can also become stalled if there are no available active servers to place the segments. By leveraging decommissioning percent, an operator can prevent active servers from overload by prioritizing balancing, or decrease decommissioning time instead. The value should be between 0 and 100.|70| To view the audit history of Coordinator dynamic config issue a GET request to the URL - diff --git a/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorDynamicConfig.java b/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorDynamicConfig.java index c9a52905dc4a..3b51221e3e41 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorDynamicConfig.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorDynamicConfig.java @@ -57,7 +57,7 @@ public class CoordinatorDynamicConfig private final boolean killAllDataSources; private final Set killableDataSources; private final Set decommissioningNodes; - private final int decommissioningVelocity; + private final int decommissioningMaxSegmentsToMovePercent; // The pending segments of the dataSources in this list are not killed. private final Set protectedPendingSegmentDatasources; @@ -89,7 +89,7 @@ public CoordinatorDynamicConfig( @JsonProperty("killPendingSegmentsSkipList") Object protectedPendingSegmentDatasources, @JsonProperty("maxSegmentsInNodeLoadingQueue") int maxSegmentsInNodeLoadingQueue, @JsonProperty("decommissioningNodes") Object decommissioningNodes, - @JsonProperty("decommissioningVelocity") int decommissioningVelocity + @JsonProperty("decommissioningMaxSegmentsToMovePercent") int decommissioningMaxSegmentsToMovePercent ) { this.millisToWaitBeforeDeleting = millisToWaitBeforeDeleting; @@ -106,10 +106,10 @@ public CoordinatorDynamicConfig( this.maxSegmentsInNodeLoadingQueue = maxSegmentsInNodeLoadingQueue; this.decommissioningNodes = parseJsonStringOrArray(decommissioningNodes); Preconditions.checkArgument( - decommissioningVelocity >= 0 && decommissioningVelocity <= 10, - "decommissioningVelocity should be in range [0, 10]" + decommissioningMaxSegmentsToMovePercent >= 0 && decommissioningMaxSegmentsToMovePercent <= 10, + "decommissioningMaxSegmentsToMovePercent should be in range [0, 10]" ); - this.decommissioningVelocity = decommissioningVelocity; + this.decommissioningMaxSegmentsToMovePercent = decommissioningMaxSegmentsToMovePercent; if (this.killAllDataSources && !this.killableDataSources.isEmpty()) { throw new IAE("can't have killAllDataSources and non-empty killDataSourceWhitelist"); @@ -231,9 +231,9 @@ public int getMaxSegmentsInNodeLoadingQueue() } /** - * List of historical nodes to 'decommission'. Coordinator doesn't assign new segments on those nodes and moves - * segments away from the 'decommissioning' servers at the maximum rate specified by - * {@link CoordinatorDynamicConfig#getDecommissioningVelocity}. + * List of historical servers to 'decommission'. Coordinator will not assign new segments to 'decomissioning' servers, + * and segments will be moved away from them to be placed on 'active' servers at the maximum rate specified by + * {@link CoordinatorDynamicConfig#getDecommissioningMaxSegmentsToMovePercent}. * * @return list of host:port entries */ @@ -244,26 +244,22 @@ public Set getDecommissioningNodes() } /** - * Decommissioning velocity determines the maximum number of segments that may be moved away from 'decommissioning' - * servers to non-decommissioning (that is, active) servers during one Coordinator's run. This value is relative to - * the total maximum segment movements allowed during one run which is determined by - * `{@link CoordinatorDynamicConfig#getMaxSegmentsToMove()}. + * The maximum number of segments that may be moved away from 'decommissioning' servers to non-decommissioning + * (that is, active) servers during one Coordinator's run. This value is relative to the total maximum segment + * movements allowed during one run which is determined by `{@link CoordinatorDynamicConfig#getMaxSegmentsToMove()}. * - * Specifically, the maximum is `ceil(maxSegmentsToMove * (velocity / 10))`. For example, if `decommissioningVelocity` - * is 5, no more than `ceil(maxSegmentsToMove * 0.5)` segments may be moved away from 'decommissioning' servers. + * If `decommissioningMaxSegmentsToMovePercent` is 0, segments will neither be moved from _or to_ 'decommissioning' + * servers, effectively putting them in a sort of 'maintenance' mode that will not participate in balancing or + * assignment by load rules. Decommissioning can also become stalled if there are no available active servers to place + * the segments. By leveraging decommissioning percent, an operator can prevent active servers from overload by + * prioritizing balancing, or decrease decommissioning time instead. The value should be between 0 and 100. * - * If `decommissioningVelocity` is 0, segments will neither be moved from _or to_ 'decommissioning' servers, - * effectively putting them in a sort of 'maintenance' mode that will not participate in balancing or assignment by - * load rules. Decommissioning can also become stalled if there are no available active servers to place the segments. - * By leveraging the velocity an operator can prevent active servers from overload by prioritizing balancing, or - * decrease decommissioning time instead. The value should be between 0 and 10. - * - * @return number in range [0, 10] + * @return number in range [0, 100] */ @JsonProperty - public int getDecommissioningVelocity() + public int getDecommissioningMaxSegmentsToMovePercent() { - return decommissioningVelocity; + return decommissioningMaxSegmentsToMovePercent; } @Override @@ -283,7 +279,7 @@ public String toString() ", protectedPendingSegmentDatasources=" + protectedPendingSegmentDatasources + ", maxSegmentsInNodeLoadingQueue=" + maxSegmentsInNodeLoadingQueue + ", decommissioningNodes=" + decommissioningNodes + - ", decommissioningVelocity=" + decommissioningVelocity + + ", decommissioningMaxSegmentsToMovePercent=" + decommissioningMaxSegmentsToMovePercent + '}'; } @@ -338,7 +334,7 @@ public boolean equals(Object o) if (!Objects.equals(decommissioningNodes, that.decommissioningNodes)) { return false; } - return decommissioningVelocity == that.decommissioningVelocity; + return decommissioningMaxSegmentsToMovePercent == that.decommissioningMaxSegmentsToMovePercent; } @Override @@ -358,7 +354,7 @@ public int hashCode() killableDataSources, protectedPendingSegmentDatasources, decommissioningNodes, - decommissioningVelocity + decommissioningMaxSegmentsToMovePercent ); } @@ -379,7 +375,7 @@ public static class Builder private static final boolean DEFAULT_EMIT_BALANCING_STATS = false; private static final boolean DEFAULT_KILL_ALL_DATA_SOURCES = false; private static final int DEFAULT_MAX_SEGMENTS_IN_NODE_LOADING_QUEUE = 0; - private static final int DEFAULT_DECOMMISSIONING_VELOCITY = 7; + private static final int DEFAULT_DECOMMISSIONING_MAX_SEGMENTS_TO_MOVE_PERCENT = 70; private Long millisToWaitBeforeDeleting; private Long mergeBytesLimit; @@ -394,7 +390,7 @@ public static class Builder private Object killPendingSegmentsSkipList; private Integer maxSegmentsInNodeLoadingQueue; private Object decommissioningNodes; - private Integer decommissioningVelocity; + private Integer decommissioningMaxSegmentsToMovePercent; public Builder() { @@ -415,7 +411,7 @@ public Builder( @JsonProperty("killPendingSegmentsSkipList") @Nullable Object killPendingSegmentsSkipList, @JsonProperty("maxSegmentsInNodeLoadingQueue") @Nullable Integer maxSegmentsInNodeLoadingQueue, @JsonProperty("decommissioningNodes") @Nullable Object decommissioningNodes, - @JsonProperty("decommissioningVelocity") @Nullable Integer decommissioningVelocity + @JsonProperty("decommissioningMaxSegmentsToMovePercent") @Nullable Integer decommissioningMaxSegmentsToMovePercent ) { this.millisToWaitBeforeDeleting = millisToWaitBeforeDeleting; @@ -431,7 +427,7 @@ public Builder( this.killPendingSegmentsSkipList = killPendingSegmentsSkipList; this.maxSegmentsInNodeLoadingQueue = maxSegmentsInNodeLoadingQueue; this.decommissioningNodes = decommissioningNodes; - this.decommissioningVelocity = decommissioningVelocity; + this.decommissioningMaxSegmentsToMovePercent = decommissioningMaxSegmentsToMovePercent; } public Builder withMillisToWaitBeforeDeleting(long millisToWaitBeforeDeleting) @@ -506,9 +502,9 @@ public Builder withDecommissioningNodes(Set decommissioning) return this; } - public Builder withDecommissioningVelocity(Integer velocity) + public Builder withDecommissioningMaxSegmentsToMovePercent(Integer percent) { - this.decommissioningVelocity = velocity; + this.decommissioningMaxSegmentsToMovePercent = percent; return this; } @@ -530,9 +526,9 @@ public CoordinatorDynamicConfig build() ? DEFAULT_MAX_SEGMENTS_IN_NODE_LOADING_QUEUE : maxSegmentsInNodeLoadingQueue, decommissioningNodes, - decommissioningVelocity == null - ? DEFAULT_DECOMMISSIONING_VELOCITY - : decommissioningVelocity + decommissioningMaxSegmentsToMovePercent == null + ? DEFAULT_DECOMMISSIONING_MAX_SEGMENTS_TO_MOVE_PERCENT + : decommissioningMaxSegmentsToMovePercent ); } @@ -556,9 +552,9 @@ public CoordinatorDynamicConfig build(CoordinatorDynamicConfig defaults) ? defaults.getMaxSegmentsInNodeLoadingQueue() : maxSegmentsInNodeLoadingQueue, decommissioningNodes == null ? defaults.getDecommissioningNodes() : decommissioningNodes, - decommissioningVelocity == null - ? defaults.getDecommissioningVelocity() - : decommissioningVelocity + decommissioningMaxSegmentsToMovePercent == null + ? defaults.getDecommissioningMaxSegmentsToMovePercent() + : decommissioningMaxSegmentsToMovePercent ); } } diff --git a/server/src/main/java/org/apache/druid/server/coordinator/ServerHolder.java b/server/src/main/java/org/apache/druid/server/coordinator/ServerHolder.java index 86d824a96510..9acd28a311f6 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/ServerHolder.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/ServerHolder.java @@ -82,9 +82,10 @@ public double getPercentUsed() } /** - * Historical nodes can be 'decommissioned', which instructs Coordinator to move segments from them - * according to a specified velocity which diverts normal balancer moves for this purpose. The mechanism allows - * draining segments from nodes which are planned for replacement. + * Historical nodes can be 'decommissioned', which instructs Coordinator to move segments from them according to + * the percent of move operations diverted from normal balancer moves for this purpose by + * {@link CoordinatorDynamicConfig#getDecommissioningMaxSegmentsToMovePercent()}. The mechanism allows draining + * segments from nodes which are planned for replacement. * @return true if the node is decommissioning */ public boolean isDecommissioning() diff --git a/server/src/main/java/org/apache/druid/server/coordinator/helper/DruidCoordinatorBalancer.java b/server/src/main/java/org/apache/druid/server/coordinator/helper/DruidCoordinatorBalancer.java index 64335f5ffb74..9171d6aa5b51 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/helper/DruidCoordinatorBalancer.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/helper/DruidCoordinatorBalancer.java @@ -113,8 +113,8 @@ private void balanceTier( } /* - Take as many segments from decommissioning servers as velocity allows and find the best location for them on - active servers. After that, balance segments within active servers pool. + Take as many segments from decommissioning servers as decommissioningMaxSegmentsToMovePercent allows and find + the best location for them on active servers. After that, balance segments within active servers pool. */ Map> partitions = servers.stream().collect(Collectors.partitioningBy(ServerHolder::isDecommissioning)); @@ -144,9 +144,14 @@ private void balanceTier( } final int maxSegmentsToMove = Math.min(params.getCoordinatorDynamicConfig().getMaxSegmentsToMove(), numSegments); - int decommissioningVelocity = params.getCoordinatorDynamicConfig().getDecommissioningVelocity(); - int maxSegmentsToMoveFromDecommissioningNodes = (int) Math.ceil(maxSegmentsToMove * decommissioningVelocity / 10.0); - log.info("Processing %d segments for moving from decommissioning servers", maxSegmentsToMoveFromDecommissioningNodes); + int decommissioningMaxSegmentsToMovePercent = + params.getCoordinatorDynamicConfig().getDecommissioningMaxSegmentsToMovePercent(); + int maxSegmentsToMoveFromDecommissioningNodes = + (int) Math.ceil(maxSegmentsToMove * (decommissioningMaxSegmentsToMovePercent / 100.0)); + log.info( + "Processing %d segments for moving from decommissioning servers", + maxSegmentsToMoveFromDecommissioningNodes + ); Pair decommissioningResult = balanceServers(params, decommissioningServers, activeServers, maxSegmentsToMoveFromDecommissioningNodes); diff --git a/server/src/test/java/org/apache/druid/server/coordinator/DruidCoordinatorBalancerTest.java b/server/src/test/java/org/apache/druid/server/coordinator/DruidCoordinatorBalancerTest.java index e86985792a0f..c9da046040c9 100644 --- a/server/src/test/java/org/apache/druid/server/coordinator/DruidCoordinatorBalancerTest.java +++ b/server/src/test/java/org/apache/druid/server/coordinator/DruidCoordinatorBalancerTest.java @@ -203,12 +203,12 @@ public void testMoveToEmptyServerBalancer() * Server 1 has 2 segments. * Server 2 (decommissioning) has 2 segments. * Server 3 is empty. - * Decommissioning has velocity 7. + * Decommissioning percent is 70. * Max segments to move is 3. * 2 (of 2) segments should be moved from Server 2 and 1 (of 2) from Server 1. */ @Test - public void testMoveDecommissioningVelocity() + public void testMoveDecommissioningMaxSegmentsToMovePercent() { mockDruidServer(druidServer1, "1", "normal", 30L, 100L, Arrays.asList(segment1, segment2)); mockDruidServer(druidServer2, "2", "normal", 30L, 100L, Arrays.asList(segment3, segment4)); @@ -239,7 +239,7 @@ public void testMoveDecommissioningVelocity() .withDynamicConfigs( CoordinatorDynamicConfig.builder() .withMaxSegmentsToMove(3) - .withDecommissioningVelocity(6) + .withDecommissioningMaxSegmentsToMovePercent(6) .build() // ceil(3 * 0.6) = 2 segments from decommissioning servers ) .withBalancerStrategy(strategy) @@ -251,28 +251,28 @@ public void testMoveDecommissioningVelocity() } @Test - public void testZeroDecommissioningVelocity() + public void testZeroDecommissioningMaxSegmentsToMovePercent() { - DruidCoordinatorRuntimeParams params = setupParamsForDecommissioningVelocity(0); + DruidCoordinatorRuntimeParams params = setupParamsForDecommissioningMaxSegmentsToMovePercent(0); params = new DruidCoordinatorBalancerTester(coordinator).run(params); Assert.assertEquals(1L, params.getCoordinatorStats().getTieredStat("movedCount", "normal")); Assert.assertThat(peon3.getSegmentsToLoad(), is(equalTo(ImmutableSet.of(segment1)))); } @Test - public void testMaxDecommissioningVelocity() + public void testMaxDecommissioningMaxSegmentsToMovePercent() { - DruidCoordinatorRuntimeParams params = setupParamsForDecommissioningVelocity(10); + DruidCoordinatorRuntimeParams params = setupParamsForDecommissioningMaxSegmentsToMovePercent(10); params = new DruidCoordinatorBalancerTester(coordinator).run(params); Assert.assertEquals(1L, params.getCoordinatorStats().getTieredStat("movedCount", "normal")); Assert.assertThat(peon3.getSegmentsToLoad(), is(equalTo(ImmutableSet.of(segment2)))); } /** - * Should balance segments as usual (ignoring velocity) with empty decommissioningList. + * Should balance segments as usual (ignoring percent) with empty decommissioningList. */ @Test - public void testMoveDecommissioningVelocityWithNoDecommissioning() + public void testMoveDecommissioningMaxSegmentsToMovePercentWithNoDecommissioning() { mockDruidServer(druidServer1, "1", "normal", 30L, 100L, Arrays.asList(segment1, segment2)); mockDruidServer(druidServer2, "2", "normal", 0L, 100L, Arrays.asList(segment3, segment4)); @@ -300,7 +300,7 @@ public void testMoveDecommissioningVelocityWithNoDecommissioning() ImmutableList.of(false, false, false) ) .withDynamicConfigs( - CoordinatorDynamicConfig.builder().withMaxSegmentsToMove(3).withDecommissioningVelocity(9).build() + CoordinatorDynamicConfig.builder().withMaxSegmentsToMove(3).withDecommissioningMaxSegmentsToMovePercent(9).build() ) .withBalancerStrategy(strategy) .build(); @@ -622,7 +622,7 @@ public void emitStats(String tier, CoordinatorStats stats, List se } } - private DruidCoordinatorRuntimeParams setupParamsForDecommissioningVelocity(int velocity) + private DruidCoordinatorRuntimeParams setupParamsForDecommissioningMaxSegmentsToMovePercent(int percent) { mockDruidServer(druidServer1, "1", "normal", 30L, 100L, Arrays.asList(segment1, segment3)); mockDruidServer(druidServer2, "2", "normal", 30L, 100L, Arrays.asList(segment2, segment3)); @@ -651,7 +651,7 @@ private DruidCoordinatorRuntimeParams setupParamsForDecommissioningVelocity(int .withDynamicConfigs( CoordinatorDynamicConfig.builder() .withMaxSegmentsToMove(1) - .withDecommissioningVelocity(velocity) + .withDecommissioningMaxSegmentsToMovePercent(percent) .build() ) .withBalancerStrategy(strategy) diff --git a/server/src/test/java/org/apache/druid/server/http/CoordinatorDynamicConfigTest.java b/server/src/test/java/org/apache/druid/server/http/CoordinatorDynamicConfigTest.java index 748ae78c962a..cafa7a66e66a 100644 --- a/server/src/test/java/org/apache/druid/server/http/CoordinatorDynamicConfigTest.java +++ b/server/src/test/java/org/apache/druid/server/http/CoordinatorDynamicConfigTest.java @@ -51,7 +51,7 @@ public void testSerde() throws Exception + " \"killDataSourceWhitelist\": [\"test1\",\"test2\"],\n" + " \"maxSegmentsInNodeLoadingQueue\": 1,\n" + " \"decommissioningNodes\": [\"host1\", \"host2\"],\n" - + " \"decommissioningVelocity\": 9\n" + + " \"decommissioningMaxSegmentsToMovePercent\": 9\n" + "}\n"; CoordinatorDynamicConfig actual = mapper.readValue( @@ -70,7 +70,7 @@ public void testSerde() throws Exception actual = CoordinatorDynamicConfig.builder().withDecommissioningNodes(ImmutableSet.of("host1")).build(actual); assertConfig(actual, 1, 1, 1, 1, 1, 1, 2, true, whitelist, false, 1, ImmutableSet.of("host1"), 9); - actual = CoordinatorDynamicConfig.builder().withDecommissioningVelocity(5).build(actual); + actual = CoordinatorDynamicConfig.builder().withDecommissioningMaxSegmentsToMovePercent(5).build(actual); assertConfig(actual, 1, 1, 1, 1, 1, 1, 2, true, whitelist, false, 1, ImmutableSet.of("host1"), 5); } @@ -106,7 +106,7 @@ public void testDecommissioningParametersBackwardCompatibility() throws Exceptio actual = CoordinatorDynamicConfig.builder().withDecommissioningNodes(ImmutableSet.of("host1")).build(actual); assertConfig(actual, 1, 1, 1, 1, 1, 1, 2, true, whitelist, false, 1, ImmutableSet.of("host1"), 0); - actual = CoordinatorDynamicConfig.builder().withDecommissioningVelocity(5).build(actual); + actual = CoordinatorDynamicConfig.builder().withDecommissioningMaxSegmentsToMovePercent(5).build(actual); assertConfig(actual, 1, 1, 1, 1, 1, 1, 2, true, whitelist, false, 1, ImmutableSet.of("host1"), 5); } @@ -258,7 +258,7 @@ private void assertConfig( boolean expectedKillAllDataSources, int expectedMaxSegmentsInNodeLoadingQueue, Set decommissioning, - int decommissioningVelocity + int decommissioningMaxSegmentsToMovePercent ) { Assert.assertEquals(expectedMillisToWaitBeforeDeleting, config.getMillisToWaitBeforeDeleting()); @@ -273,6 +273,6 @@ private void assertConfig( Assert.assertEquals(expectedKillAllDataSources, config.isKillAllDataSources()); Assert.assertEquals(expectedMaxSegmentsInNodeLoadingQueue, config.getMaxSegmentsInNodeLoadingQueue()); Assert.assertEquals(decommissioning, config.getDecommissioningNodes()); - Assert.assertEquals(decommissioningVelocity, config.getDecommissioningVelocity()); + Assert.assertEquals(decommissioningMaxSegmentsToMovePercent, config.getDecommissioningMaxSegmentsToMovePercent()); } } From 1da2cb8ab0ac4bb1f8ca722d4c456c12f40b4b67 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Wed, 6 Mar 2019 16:17:12 -0800 Subject: [PATCH 10/14] fix precondition check --- .../druid/server/coordinator/CoordinatorDynamicConfig.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorDynamicConfig.java b/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorDynamicConfig.java index 3b51221e3e41..e1da3bcbb968 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorDynamicConfig.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorDynamicConfig.java @@ -106,8 +106,8 @@ public CoordinatorDynamicConfig( this.maxSegmentsInNodeLoadingQueue = maxSegmentsInNodeLoadingQueue; this.decommissioningNodes = parseJsonStringOrArray(decommissioningNodes); Preconditions.checkArgument( - decommissioningMaxSegmentsToMovePercent >= 0 && decommissioningMaxSegmentsToMovePercent <= 10, - "decommissioningMaxSegmentsToMovePercent should be in range [0, 10]" + decommissioningMaxSegmentsToMovePercent >= 0 && decommissioningMaxSegmentsToMovePercent <= 100, + "decommissioningMaxSegmentsToMovePercent should be in range [0, 100]" ); this.decommissioningMaxSegmentsToMovePercent = decommissioningMaxSegmentsToMovePercent; From 5abb5da96fb7aebbfdc2715c5fc006d09135bb7a Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Wed, 6 Mar 2019 16:51:01 -0800 Subject: [PATCH 11/14] decommissioningMaxPercentOfMaxSegmentsToMove --- docs/content/configuration/index.md | 6 +-- .../coordinator/CoordinatorDynamicConfig.java | 44 +++++++++---------- .../server/coordinator/ServerHolder.java | 2 +- .../helper/DruidCoordinatorBalancer.java | 8 ++-- .../DruidCoordinatorBalancerTest.java | 20 ++++----- .../http/CoordinatorDynamicConfigTest.java | 10 ++--- 6 files changed, 45 insertions(+), 45 deletions(-) diff --git a/docs/content/configuration/index.md b/docs/content/configuration/index.md index 2a40bdfa173f..e5570597dcd1 100644 --- a/docs/content/configuration/index.md +++ b/docs/content/configuration/index.md @@ -784,7 +784,7 @@ A sample Coordinator dynamic config JSON object is shown below: "emitBalancingStats": false, "killDataSourceWhitelist": ["wikipedia", "testDatasource"], "decommissioningNodes": ["localhost:8182", "localhost:8282"], - "decommissioningMaxSegmentsToMovePercent": 7 + "decommissioningMaxPercentOfMaxSegmentsToMove": 7 } ``` @@ -804,8 +804,8 @@ Issuing a GET request at the same URL will return the spec that is currently in |`killAllDataSources`|Send kill tasks for ALL dataSources if property `druid.coordinator.kill.on` is true. If this is set to true then `killDataSourceWhitelist` must not be specified or be empty list.|false| |`killPendingSegmentsSkipList`|List of dataSources for which pendingSegments are _NOT_ cleaned up if property `druid.coordinator.kill.pendingSegments.on` is true. This can be a list of comma-separated dataSources or a JSON array.|none| |`maxSegmentsInNodeLoadingQueue`|The maximum number of segments that could be queued for loading to any given server. This parameter could be used to speed up segments loading process, especially if there are "slow" nodes in the cluster (with low loading speed) or if too much segments scheduled to be replicated to some particular node (faster loading could be preferred to better segments distribution). Desired value depends on segments loading speed, acceptable replication time and number of nodes. Value 1000 could be a start point for a rather big cluster. Default value is 0 (loading queue is unbounded) |0| -|`decommissioningNodes`| List of historical servers to 'decommission'. Coordinator will not assign new segments to 'decomissioning' servers, and segments will be moved away from them to be placed on 'active' servers at the maximum rate specified by `decommissioningMaxSegmentsToMovePercent`.|none| -|`decommissioningMaxSegmentsToMovePercent`| The maximum number of segments that may be moved away from 'decommissioning' servers to non-decommissioning (that is, active) servers during one Coordinator's run. This value is relative to the total maximum segment movements allowed during one run which is determined by `maxSegmentsToMove`. If `decommissioningMaxSegmentsToMovePercent` is 0, segments will neither be moved from _or to_ 'decommissioning' servers, effectively putting them in a sort of 'maintenance' mode that will not participate in balancing or assignment by load rules. Decommissioning can also become stalled if there are no available active servers to place the segments. By leveraging decommissioning percent, an operator can prevent active servers from overload by prioritizing balancing, or decrease decommissioning time instead. The value should be between 0 and 100.|70| +|`decommissioningNodes`| List of historical servers to 'decommission'. Coordinator will not assign new segments to 'decomissioning' servers, and segments will be moved away from them to be placed on 'active' servers at the maximum rate specified by `decommissioningMaxPercentOfMaxSegmentsToMove`.|none| +|`decommissioningMaxPercentOfMaxSegmentsToMove`| The maximum number of segments that may be moved away from 'decommissioning' servers to non-decommissioning (that is, active) servers during one Coordinator's run. This value is relative to the total maximum segment movements allowed during one run which is determined by `maxSegmentsToMove`. If `decommissioningMaxPercentOfMaxSegmentsToMove` is 0, segments will neither be moved from _or to_ 'decommissioning' servers, effectively putting them in a sort of 'maintenance' mode that will not participate in balancing or assignment by load rules. Decommissioning can also become stalled if there are no available active servers to place the segments. By leveraging decommissioning percent, an operator can prevent active servers from overload by prioritizing balancing, or decrease decommissioning time instead. The value should be between 0 and 100.|70| To view the audit history of Coordinator dynamic config issue a GET request to the URL - diff --git a/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorDynamicConfig.java b/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorDynamicConfig.java index e1da3bcbb968..c60c042bad61 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorDynamicConfig.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorDynamicConfig.java @@ -57,7 +57,7 @@ public class CoordinatorDynamicConfig private final boolean killAllDataSources; private final Set killableDataSources; private final Set decommissioningNodes; - private final int decommissioningMaxSegmentsToMovePercent; + private final int decommissioningMaxPercentOfMaxSegmentsToMove; // The pending segments of the dataSources in this list are not killed. private final Set protectedPendingSegmentDatasources; @@ -89,7 +89,7 @@ public CoordinatorDynamicConfig( @JsonProperty("killPendingSegmentsSkipList") Object protectedPendingSegmentDatasources, @JsonProperty("maxSegmentsInNodeLoadingQueue") int maxSegmentsInNodeLoadingQueue, @JsonProperty("decommissioningNodes") Object decommissioningNodes, - @JsonProperty("decommissioningMaxSegmentsToMovePercent") int decommissioningMaxSegmentsToMovePercent + @JsonProperty("decommissioningMaxPercentOfMaxSegmentsToMove") int decommissioningMaxPercentOfMaxSegmentsToMove ) { this.millisToWaitBeforeDeleting = millisToWaitBeforeDeleting; @@ -106,10 +106,10 @@ public CoordinatorDynamicConfig( this.maxSegmentsInNodeLoadingQueue = maxSegmentsInNodeLoadingQueue; this.decommissioningNodes = parseJsonStringOrArray(decommissioningNodes); Preconditions.checkArgument( - decommissioningMaxSegmentsToMovePercent >= 0 && decommissioningMaxSegmentsToMovePercent <= 100, - "decommissioningMaxSegmentsToMovePercent should be in range [0, 100]" + decommissioningMaxPercentOfMaxSegmentsToMove >= 0 && decommissioningMaxPercentOfMaxSegmentsToMove <= 100, + "decommissioningMaxPercentOfMaxSegmentsToMove should be in range [0, 100]" ); - this.decommissioningMaxSegmentsToMovePercent = decommissioningMaxSegmentsToMovePercent; + this.decommissioningMaxPercentOfMaxSegmentsToMove = decommissioningMaxPercentOfMaxSegmentsToMove; if (this.killAllDataSources && !this.killableDataSources.isEmpty()) { throw new IAE("can't have killAllDataSources and non-empty killDataSourceWhitelist"); @@ -233,7 +233,7 @@ public int getMaxSegmentsInNodeLoadingQueue() /** * List of historical servers to 'decommission'. Coordinator will not assign new segments to 'decomissioning' servers, * and segments will be moved away from them to be placed on 'active' servers at the maximum rate specified by - * {@link CoordinatorDynamicConfig#getDecommissioningMaxSegmentsToMovePercent}. + * {@link CoordinatorDynamicConfig#getDecommissioningMaxPercentOfMaxSegmentsToMove}. * * @return list of host:port entries */ @@ -248,7 +248,7 @@ public Set getDecommissioningNodes() * (that is, active) servers during one Coordinator's run. This value is relative to the total maximum segment * movements allowed during one run which is determined by `{@link CoordinatorDynamicConfig#getMaxSegmentsToMove()}. * - * If `decommissioningMaxSegmentsToMovePercent` is 0, segments will neither be moved from _or to_ 'decommissioning' + * If `decommissioningMaxPercentOfMaxSegmentsToMove` is 0, segments will neither be moved from _or to_ 'decommissioning' * servers, effectively putting them in a sort of 'maintenance' mode that will not participate in balancing or * assignment by load rules. Decommissioning can also become stalled if there are no available active servers to place * the segments. By leveraging decommissioning percent, an operator can prevent active servers from overload by @@ -257,9 +257,9 @@ public Set getDecommissioningNodes() * @return number in range [0, 100] */ @JsonProperty - public int getDecommissioningMaxSegmentsToMovePercent() + public int getDecommissioningMaxPercentOfMaxSegmentsToMove() { - return decommissioningMaxSegmentsToMovePercent; + return decommissioningMaxPercentOfMaxSegmentsToMove; } @Override @@ -279,7 +279,7 @@ public String toString() ", protectedPendingSegmentDatasources=" + protectedPendingSegmentDatasources + ", maxSegmentsInNodeLoadingQueue=" + maxSegmentsInNodeLoadingQueue + ", decommissioningNodes=" + decommissioningNodes + - ", decommissioningMaxSegmentsToMovePercent=" + decommissioningMaxSegmentsToMovePercent + + ", decommissioningMaxPercentOfMaxSegmentsToMove=" + decommissioningMaxPercentOfMaxSegmentsToMove + '}'; } @@ -334,7 +334,7 @@ public boolean equals(Object o) if (!Objects.equals(decommissioningNodes, that.decommissioningNodes)) { return false; } - return decommissioningMaxSegmentsToMovePercent == that.decommissioningMaxSegmentsToMovePercent; + return decommissioningMaxPercentOfMaxSegmentsToMove == that.decommissioningMaxPercentOfMaxSegmentsToMove; } @Override @@ -354,7 +354,7 @@ public int hashCode() killableDataSources, protectedPendingSegmentDatasources, decommissioningNodes, - decommissioningMaxSegmentsToMovePercent + decommissioningMaxPercentOfMaxSegmentsToMove ); } @@ -390,7 +390,7 @@ public static class Builder private Object killPendingSegmentsSkipList; private Integer maxSegmentsInNodeLoadingQueue; private Object decommissioningNodes; - private Integer decommissioningMaxSegmentsToMovePercent; + private Integer decommissioningMaxPercentOfMaxSegmentsToMove; public Builder() { @@ -411,7 +411,7 @@ public Builder( @JsonProperty("killPendingSegmentsSkipList") @Nullable Object killPendingSegmentsSkipList, @JsonProperty("maxSegmentsInNodeLoadingQueue") @Nullable Integer maxSegmentsInNodeLoadingQueue, @JsonProperty("decommissioningNodes") @Nullable Object decommissioningNodes, - @JsonProperty("decommissioningMaxSegmentsToMovePercent") @Nullable Integer decommissioningMaxSegmentsToMovePercent + @JsonProperty("decommissioningMaxPercentOfMaxSegmentsToMove") @Nullable Integer decommissioningMaxPercentOfMaxSegmentsToMove ) { this.millisToWaitBeforeDeleting = millisToWaitBeforeDeleting; @@ -427,7 +427,7 @@ public Builder( this.killPendingSegmentsSkipList = killPendingSegmentsSkipList; this.maxSegmentsInNodeLoadingQueue = maxSegmentsInNodeLoadingQueue; this.decommissioningNodes = decommissioningNodes; - this.decommissioningMaxSegmentsToMovePercent = decommissioningMaxSegmentsToMovePercent; + this.decommissioningMaxPercentOfMaxSegmentsToMove = decommissioningMaxPercentOfMaxSegmentsToMove; } public Builder withMillisToWaitBeforeDeleting(long millisToWaitBeforeDeleting) @@ -502,9 +502,9 @@ public Builder withDecommissioningNodes(Set decommissioning) return this; } - public Builder withDecommissioningMaxSegmentsToMovePercent(Integer percent) + public Builder withDecommissioningMaxPercentOfMaxSegmentsToMove(Integer percent) { - this.decommissioningMaxSegmentsToMovePercent = percent; + this.decommissioningMaxPercentOfMaxSegmentsToMove = percent; return this; } @@ -526,9 +526,9 @@ public CoordinatorDynamicConfig build() ? DEFAULT_MAX_SEGMENTS_IN_NODE_LOADING_QUEUE : maxSegmentsInNodeLoadingQueue, decommissioningNodes, - decommissioningMaxSegmentsToMovePercent == null + decommissioningMaxPercentOfMaxSegmentsToMove == null ? DEFAULT_DECOMMISSIONING_MAX_SEGMENTS_TO_MOVE_PERCENT - : decommissioningMaxSegmentsToMovePercent + : decommissioningMaxPercentOfMaxSegmentsToMove ); } @@ -552,9 +552,9 @@ public CoordinatorDynamicConfig build(CoordinatorDynamicConfig defaults) ? defaults.getMaxSegmentsInNodeLoadingQueue() : maxSegmentsInNodeLoadingQueue, decommissioningNodes == null ? defaults.getDecommissioningNodes() : decommissioningNodes, - decommissioningMaxSegmentsToMovePercent == null - ? defaults.getDecommissioningMaxSegmentsToMovePercent() - : decommissioningMaxSegmentsToMovePercent + decommissioningMaxPercentOfMaxSegmentsToMove == null + ? defaults.getDecommissioningMaxPercentOfMaxSegmentsToMove() + : decommissioningMaxPercentOfMaxSegmentsToMove ); } } diff --git a/server/src/main/java/org/apache/druid/server/coordinator/ServerHolder.java b/server/src/main/java/org/apache/druid/server/coordinator/ServerHolder.java index 9acd28a311f6..ba96566a4dfd 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/ServerHolder.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/ServerHolder.java @@ -84,7 +84,7 @@ public double getPercentUsed() /** * Historical nodes can be 'decommissioned', which instructs Coordinator to move segments from them according to * the percent of move operations diverted from normal balancer moves for this purpose by - * {@link CoordinatorDynamicConfig#getDecommissioningMaxSegmentsToMovePercent()}. The mechanism allows draining + * {@link CoordinatorDynamicConfig#getDecommissioningMaxPercentOfMaxSegmentsToMove()}. The mechanism allows draining * segments from nodes which are planned for replacement. * @return true if the node is decommissioning */ diff --git a/server/src/main/java/org/apache/druid/server/coordinator/helper/DruidCoordinatorBalancer.java b/server/src/main/java/org/apache/druid/server/coordinator/helper/DruidCoordinatorBalancer.java index 9171d6aa5b51..cf8d7253191f 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/helper/DruidCoordinatorBalancer.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/helper/DruidCoordinatorBalancer.java @@ -113,7 +113,7 @@ private void balanceTier( } /* - Take as many segments from decommissioning servers as decommissioningMaxSegmentsToMovePercent allows and find + Take as many segments from decommissioning servers as decommissioningMaxPercentOfMaxSegmentsToMove allows and find the best location for them on active servers. After that, balance segments within active servers pool. */ Map> partitions = @@ -144,10 +144,10 @@ private void balanceTier( } final int maxSegmentsToMove = Math.min(params.getCoordinatorDynamicConfig().getMaxSegmentsToMove(), numSegments); - int decommissioningMaxSegmentsToMovePercent = - params.getCoordinatorDynamicConfig().getDecommissioningMaxSegmentsToMovePercent(); + int decommissioningMaxPercentOfMaxSegmentsToMove = + params.getCoordinatorDynamicConfig().getDecommissioningMaxPercentOfMaxSegmentsToMove(); int maxSegmentsToMoveFromDecommissioningNodes = - (int) Math.ceil(maxSegmentsToMove * (decommissioningMaxSegmentsToMovePercent / 100.0)); + (int) Math.ceil(maxSegmentsToMove * (decommissioningMaxPercentOfMaxSegmentsToMove / 100.0)); log.info( "Processing %d segments for moving from decommissioning servers", maxSegmentsToMoveFromDecommissioningNodes diff --git a/server/src/test/java/org/apache/druid/server/coordinator/DruidCoordinatorBalancerTest.java b/server/src/test/java/org/apache/druid/server/coordinator/DruidCoordinatorBalancerTest.java index c9da046040c9..af6cb51c8be5 100644 --- a/server/src/test/java/org/apache/druid/server/coordinator/DruidCoordinatorBalancerTest.java +++ b/server/src/test/java/org/apache/druid/server/coordinator/DruidCoordinatorBalancerTest.java @@ -208,7 +208,7 @@ public void testMoveToEmptyServerBalancer() * 2 (of 2) segments should be moved from Server 2 and 1 (of 2) from Server 1. */ @Test - public void testMoveDecommissioningMaxSegmentsToMovePercent() + public void testMoveDecommissioningMaxPercentOfMaxSegmentsToMove() { mockDruidServer(druidServer1, "1", "normal", 30L, 100L, Arrays.asList(segment1, segment2)); mockDruidServer(druidServer2, "2", "normal", 30L, 100L, Arrays.asList(segment3, segment4)); @@ -239,7 +239,7 @@ public void testMoveDecommissioningMaxSegmentsToMovePercent() .withDynamicConfigs( CoordinatorDynamicConfig.builder() .withMaxSegmentsToMove(3) - .withDecommissioningMaxSegmentsToMovePercent(6) + .withDecommissioningMaxPercentOfMaxSegmentsToMove(6) .build() // ceil(3 * 0.6) = 2 segments from decommissioning servers ) .withBalancerStrategy(strategy) @@ -251,18 +251,18 @@ public void testMoveDecommissioningMaxSegmentsToMovePercent() } @Test - public void testZeroDecommissioningMaxSegmentsToMovePercent() + public void testZeroDecommissioningMaxPercentOfMaxSegmentsToMove() { - DruidCoordinatorRuntimeParams params = setupParamsForDecommissioningMaxSegmentsToMovePercent(0); + DruidCoordinatorRuntimeParams params = setupParamsForDecommissioningMaxPercentOfMaxSegmentsToMove(0); params = new DruidCoordinatorBalancerTester(coordinator).run(params); Assert.assertEquals(1L, params.getCoordinatorStats().getTieredStat("movedCount", "normal")); Assert.assertThat(peon3.getSegmentsToLoad(), is(equalTo(ImmutableSet.of(segment1)))); } @Test - public void testMaxDecommissioningMaxSegmentsToMovePercent() + public void testMaxDecommissioningMaxPercentOfMaxSegmentsToMove() { - DruidCoordinatorRuntimeParams params = setupParamsForDecommissioningMaxSegmentsToMovePercent(10); + DruidCoordinatorRuntimeParams params = setupParamsForDecommissioningMaxPercentOfMaxSegmentsToMove(10); params = new DruidCoordinatorBalancerTester(coordinator).run(params); Assert.assertEquals(1L, params.getCoordinatorStats().getTieredStat("movedCount", "normal")); Assert.assertThat(peon3.getSegmentsToLoad(), is(equalTo(ImmutableSet.of(segment2)))); @@ -272,7 +272,7 @@ public void testMaxDecommissioningMaxSegmentsToMovePercent() * Should balance segments as usual (ignoring percent) with empty decommissioningList. */ @Test - public void testMoveDecommissioningMaxSegmentsToMovePercentWithNoDecommissioning() + public void testMoveDecommissioningMaxPercentOfMaxSegmentsToMoveWithNoDecommissioning() { mockDruidServer(druidServer1, "1", "normal", 30L, 100L, Arrays.asList(segment1, segment2)); mockDruidServer(druidServer2, "2", "normal", 0L, 100L, Arrays.asList(segment3, segment4)); @@ -300,7 +300,7 @@ public void testMoveDecommissioningMaxSegmentsToMovePercentWithNoDecommissioning ImmutableList.of(false, false, false) ) .withDynamicConfigs( - CoordinatorDynamicConfig.builder().withMaxSegmentsToMove(3).withDecommissioningMaxSegmentsToMovePercent(9).build() + CoordinatorDynamicConfig.builder().withMaxSegmentsToMove(3).withDecommissioningMaxPercentOfMaxSegmentsToMove(9).build() ) .withBalancerStrategy(strategy) .build(); @@ -622,7 +622,7 @@ public void emitStats(String tier, CoordinatorStats stats, List se } } - private DruidCoordinatorRuntimeParams setupParamsForDecommissioningMaxSegmentsToMovePercent(int percent) + private DruidCoordinatorRuntimeParams setupParamsForDecommissioningMaxPercentOfMaxSegmentsToMove(int percent) { mockDruidServer(druidServer1, "1", "normal", 30L, 100L, Arrays.asList(segment1, segment3)); mockDruidServer(druidServer2, "2", "normal", 30L, 100L, Arrays.asList(segment2, segment3)); @@ -651,7 +651,7 @@ private DruidCoordinatorRuntimeParams setupParamsForDecommissioningMaxSegmentsTo .withDynamicConfigs( CoordinatorDynamicConfig.builder() .withMaxSegmentsToMove(1) - .withDecommissioningMaxSegmentsToMovePercent(percent) + .withDecommissioningMaxPercentOfMaxSegmentsToMove(percent) .build() ) .withBalancerStrategy(strategy) diff --git a/server/src/test/java/org/apache/druid/server/http/CoordinatorDynamicConfigTest.java b/server/src/test/java/org/apache/druid/server/http/CoordinatorDynamicConfigTest.java index cafa7a66e66a..5ac8a01f7227 100644 --- a/server/src/test/java/org/apache/druid/server/http/CoordinatorDynamicConfigTest.java +++ b/server/src/test/java/org/apache/druid/server/http/CoordinatorDynamicConfigTest.java @@ -51,7 +51,7 @@ public void testSerde() throws Exception + " \"killDataSourceWhitelist\": [\"test1\",\"test2\"],\n" + " \"maxSegmentsInNodeLoadingQueue\": 1,\n" + " \"decommissioningNodes\": [\"host1\", \"host2\"],\n" - + " \"decommissioningMaxSegmentsToMovePercent\": 9\n" + + " \"decommissioningMaxPercentOfMaxSegmentsToMove\": 9\n" + "}\n"; CoordinatorDynamicConfig actual = mapper.readValue( @@ -70,7 +70,7 @@ public void testSerde() throws Exception actual = CoordinatorDynamicConfig.builder().withDecommissioningNodes(ImmutableSet.of("host1")).build(actual); assertConfig(actual, 1, 1, 1, 1, 1, 1, 2, true, whitelist, false, 1, ImmutableSet.of("host1"), 9); - actual = CoordinatorDynamicConfig.builder().withDecommissioningMaxSegmentsToMovePercent(5).build(actual); + actual = CoordinatorDynamicConfig.builder().withDecommissioningMaxPercentOfMaxSegmentsToMove(5).build(actual); assertConfig(actual, 1, 1, 1, 1, 1, 1, 2, true, whitelist, false, 1, ImmutableSet.of("host1"), 5); } @@ -106,7 +106,7 @@ public void testDecommissioningParametersBackwardCompatibility() throws Exceptio actual = CoordinatorDynamicConfig.builder().withDecommissioningNodes(ImmutableSet.of("host1")).build(actual); assertConfig(actual, 1, 1, 1, 1, 1, 1, 2, true, whitelist, false, 1, ImmutableSet.of("host1"), 0); - actual = CoordinatorDynamicConfig.builder().withDecommissioningMaxSegmentsToMovePercent(5).build(actual); + actual = CoordinatorDynamicConfig.builder().withDecommissioningMaxPercentOfMaxSegmentsToMove(5).build(actual); assertConfig(actual, 1, 1, 1, 1, 1, 1, 2, true, whitelist, false, 1, ImmutableSet.of("host1"), 5); } @@ -258,7 +258,7 @@ private void assertConfig( boolean expectedKillAllDataSources, int expectedMaxSegmentsInNodeLoadingQueue, Set decommissioning, - int decommissioningMaxSegmentsToMovePercent + int decommissioningMaxPercentOfMaxSegmentsToMove ) { Assert.assertEquals(expectedMillisToWaitBeforeDeleting, config.getMillisToWaitBeforeDeleting()); @@ -273,6 +273,6 @@ private void assertConfig( Assert.assertEquals(expectedKillAllDataSources, config.isKillAllDataSources()); Assert.assertEquals(expectedMaxSegmentsInNodeLoadingQueue, config.getMaxSegmentsInNodeLoadingQueue()); Assert.assertEquals(decommissioning, config.getDecommissioningNodes()); - Assert.assertEquals(decommissioningMaxSegmentsToMovePercent, config.getDecommissioningMaxSegmentsToMovePercent()); + Assert.assertEquals(decommissioningMaxPercentOfMaxSegmentsToMove, config.getDecommissioningMaxPercentOfMaxSegmentsToMove()); } } From 4834037c0fbdc325a81b9967736ed555a23e47cb Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Wed, 6 Mar 2019 18:45:22 -0800 Subject: [PATCH 12/14] fix test --- .../apache/druid/server/http/CoordinatorDynamicConfigTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/test/java/org/apache/druid/server/http/CoordinatorDynamicConfigTest.java b/server/src/test/java/org/apache/druid/server/http/CoordinatorDynamicConfigTest.java index 5ac8a01f7227..af97906f9cc7 100644 --- a/server/src/test/java/org/apache/druid/server/http/CoordinatorDynamicConfigTest.java +++ b/server/src/test/java/org/apache/druid/server/http/CoordinatorDynamicConfigTest.java @@ -217,7 +217,7 @@ public void testBuilderDefaults() { CoordinatorDynamicConfig defaultConfig = CoordinatorDynamicConfig.builder().build(); ImmutableSet emptyList = ImmutableSet.of(); - assertConfig(defaultConfig, 900000, 524288000, 100, 5, 15, 10, 1, false, emptyList, false, 0, emptyList, 7); + assertConfig(defaultConfig, 900000, 524288000, 100, 5, 15, 10, 1, false, emptyList, false, 0, emptyList, 70); } @Test From 4b5bd4c3dec0ae78e03dde140ac03d1fd02c731f Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Wed, 6 Mar 2019 19:56:01 -0800 Subject: [PATCH 13/14] fix test --- .../server/coordinator/DruidCoordinatorBalancerTest.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/server/src/test/java/org/apache/druid/server/coordinator/DruidCoordinatorBalancerTest.java b/server/src/test/java/org/apache/druid/server/coordinator/DruidCoordinatorBalancerTest.java index af6cb51c8be5..c5e5a3f11c86 100644 --- a/server/src/test/java/org/apache/druid/server/coordinator/DruidCoordinatorBalancerTest.java +++ b/server/src/test/java/org/apache/druid/server/coordinator/DruidCoordinatorBalancerTest.java @@ -203,7 +203,7 @@ public void testMoveToEmptyServerBalancer() * Server 1 has 2 segments. * Server 2 (decommissioning) has 2 segments. * Server 3 is empty. - * Decommissioning percent is 70. + * Decommissioning percent is 60. * Max segments to move is 3. * 2 (of 2) segments should be moved from Server 2 and 1 (of 2) from Server 1. */ @@ -239,7 +239,7 @@ public void testMoveDecommissioningMaxPercentOfMaxSegmentsToMove() .withDynamicConfigs( CoordinatorDynamicConfig.builder() .withMaxSegmentsToMove(3) - .withDecommissioningMaxPercentOfMaxSegmentsToMove(6) + .withDecommissioningMaxPercentOfMaxSegmentsToMove(60) .build() // ceil(3 * 0.6) = 2 segments from decommissioning servers ) .withBalancerStrategy(strategy) From be4daba40e5cc7571d8789503e4d2f2c472332bc Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Thu, 7 Mar 2019 16:35:49 -0800 Subject: [PATCH 14/14] fixes --- docs/content/configuration/index.md | 8 +++--- .../coordinator/CoordinatorDynamicConfig.java | 26 +++++++++++-------- .../DruidCoordinatorBalancerTest.java | 2 +- .../coordinator/rules/LoadRuleTest.java | 2 +- 4 files changed, 21 insertions(+), 17 deletions(-) diff --git a/docs/content/configuration/index.md b/docs/content/configuration/index.md index e5570597dcd1..89008af0aaa7 100644 --- a/docs/content/configuration/index.md +++ b/docs/content/configuration/index.md @@ -784,7 +784,7 @@ A sample Coordinator dynamic config JSON object is shown below: "emitBalancingStats": false, "killDataSourceWhitelist": ["wikipedia", "testDatasource"], "decommissioningNodes": ["localhost:8182", "localhost:8282"], - "decommissioningMaxPercentOfMaxSegmentsToMove": 7 + "decommissioningMaxPercentOfMaxSegmentsToMove": 70 } ``` @@ -798,14 +798,14 @@ Issuing a GET request at the same URL will return the spec that is currently in |`maxSegmentsToMove`|The maximum number of segments that can be moved at any given time.|5| |`replicantLifetime`|The maximum number of Coordinator runs for a segment to be replicated before we start alerting.|15| |`replicationThrottleLimit`|The maximum number of segments that can be replicated at one time.|10| -|`balancerComputeThreads`|Thread pool size for computing moving cost of segments in segment balancing. Consider increasing this if you have a lot of segments and moving segment starts to get stuck.|1| +|`balancerComputeThreads`|Thread pool size for computing moving cost of segments in segment balancing. Consider increasing this if you have a lot of segments and moving segments starts to get stuck.|1| |`emitBalancingStats`|Boolean flag for whether or not we should emit balancing stats. This is an expensive operation.|false| |`killDataSourceWhitelist`|List of dataSources for which kill tasks are sent if property `druid.coordinator.kill.on` is true. This can be a list of comma-separated dataSources or a JSON array.|none| |`killAllDataSources`|Send kill tasks for ALL dataSources if property `druid.coordinator.kill.on` is true. If this is set to true then `killDataSourceWhitelist` must not be specified or be empty list.|false| |`killPendingSegmentsSkipList`|List of dataSources for which pendingSegments are _NOT_ cleaned up if property `druid.coordinator.kill.pendingSegments.on` is true. This can be a list of comma-separated dataSources or a JSON array.|none| |`maxSegmentsInNodeLoadingQueue`|The maximum number of segments that could be queued for loading to any given server. This parameter could be used to speed up segments loading process, especially if there are "slow" nodes in the cluster (with low loading speed) or if too much segments scheduled to be replicated to some particular node (faster loading could be preferred to better segments distribution). Desired value depends on segments loading speed, acceptable replication time and number of nodes. Value 1000 could be a start point for a rather big cluster. Default value is 0 (loading queue is unbounded) |0| -|`decommissioningNodes`| List of historical servers to 'decommission'. Coordinator will not assign new segments to 'decomissioning' servers, and segments will be moved away from them to be placed on 'active' servers at the maximum rate specified by `decommissioningMaxPercentOfMaxSegmentsToMove`.|none| -|`decommissioningMaxPercentOfMaxSegmentsToMove`| The maximum number of segments that may be moved away from 'decommissioning' servers to non-decommissioning (that is, active) servers during one Coordinator's run. This value is relative to the total maximum segment movements allowed during one run which is determined by `maxSegmentsToMove`. If `decommissioningMaxPercentOfMaxSegmentsToMove` is 0, segments will neither be moved from _or to_ 'decommissioning' servers, effectively putting them in a sort of 'maintenance' mode that will not participate in balancing or assignment by load rules. Decommissioning can also become stalled if there are no available active servers to place the segments. By leveraging decommissioning percent, an operator can prevent active servers from overload by prioritizing balancing, or decrease decommissioning time instead. The value should be between 0 and 100.|70| +|`decommissioningNodes`| List of historical servers to 'decommission'. Coordinator will not assign new segments to 'decommissioning' servers, and segments will be moved away from them to be placed on non-decommissioning servers at the maximum rate specified by `decommissioningMaxPercentOfMaxSegmentsToMove`.|none| +|`decommissioningMaxPercentOfMaxSegmentsToMove`| The maximum number of segments that may be moved away from 'decommissioning' servers to non-decommissioning (that is, active) servers during one Coordinator run. This value is relative to the total maximum segment movements allowed during one run which is determined by `maxSegmentsToMove`. If `decommissioningMaxPercentOfMaxSegmentsToMove` is 0, segments will neither be moved from _or to_ 'decommissioning' servers, effectively putting them in a sort of "maintenance" mode that will not participate in balancing or assignment by load rules. Decommissioning can also become stalled if there are no available active servers to place the segments. By leveraging the maximum percent of decommissioning segment movements, an operator can prevent active servers from overload by prioritizing balancing, or decrease decommissioning time instead. The value should be between 0 and 100.|70| To view the audit history of Coordinator dynamic config issue a GET request to the URL - diff --git a/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorDynamicConfig.java b/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorDynamicConfig.java index c60c042bad61..14bf3395add3 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorDynamicConfig.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorDynamicConfig.java @@ -28,6 +28,8 @@ import javax.annotation.Nonnull; import javax.annotation.Nullable; +import javax.validation.constraints.Max; +import javax.validation.constraints.Min; import java.util.Collection; import java.util.HashSet; import java.util.Objects; @@ -231,8 +233,8 @@ public int getMaxSegmentsInNodeLoadingQueue() } /** - * List of historical servers to 'decommission'. Coordinator will not assign new segments to 'decomissioning' servers, - * and segments will be moved away from them to be placed on 'active' servers at the maximum rate specified by + * List of historical servers to 'decommission'. Coordinator will not assign new segments to 'decommissioning' servers, + * and segments will be moved away from them to be placed on non-decommissioning servers at the maximum rate specified by * {@link CoordinatorDynamicConfig#getDecommissioningMaxPercentOfMaxSegmentsToMove}. * * @return list of host:port entries @@ -241,21 +243,23 @@ public int getMaxSegmentsInNodeLoadingQueue() public Set getDecommissioningNodes() { return decommissioningNodes; + } /** - * The maximum number of segments that may be moved away from 'decommissioning' servers to non-decommissioning - * (that is, active) servers during one Coordinator's run. This value is relative to the total maximum segment - * movements allowed during one run which is determined by `{@link CoordinatorDynamicConfig#getMaxSegmentsToMove()}. - * - * If `decommissioningMaxPercentOfMaxSegmentsToMove` is 0, segments will neither be moved from _or to_ 'decommissioning' - * servers, effectively putting them in a sort of 'maintenance' mode that will not participate in balancing or - * assignment by load rules. Decommissioning can also become stalled if there are no available active servers to place - * the segments. By leveraging decommissioning percent, an operator can prevent active servers from overload by - * prioritizing balancing, or decrease decommissioning time instead. The value should be between 0 and 100. + * The percent of {@link CoordinatorDynamicConfig#getMaxSegmentsToMove()} that determines the maximum number of + * segments that may be moved away from 'decommissioning' servers (specified by + * {@link CoordinatorDynamicConfig#getDecommissioningNodes()}) to non-decommissioning servers during one Coordinator + * balancer run. If this value is 0, segments will neither be moved from or to 'decommissioning' servers, effectively + * putting them in a sort of "maintenance" mode that will not participate in balancing or assignment by load rules. + * Decommissioning can also become stalled if there are no available active servers to place the segments. By + * adjusting this value, an operator can prevent active servers from overload by prioritizing balancing, or + * decrease decommissioning time instead. * * @return number in range [0, 100] */ + @Min(0) + @Max(100) @JsonProperty public int getDecommissioningMaxPercentOfMaxSegmentsToMove() { diff --git a/server/src/test/java/org/apache/druid/server/coordinator/DruidCoordinatorBalancerTest.java b/server/src/test/java/org/apache/druid/server/coordinator/DruidCoordinatorBalancerTest.java index c5e5a3f11c86..dbd3048e5396 100644 --- a/server/src/test/java/org/apache/druid/server/coordinator/DruidCoordinatorBalancerTest.java +++ b/server/src/test/java/org/apache/druid/server/coordinator/DruidCoordinatorBalancerTest.java @@ -269,7 +269,7 @@ public void testMaxDecommissioningMaxPercentOfMaxSegmentsToMove() } /** - * Should balance segments as usual (ignoring percent) with empty decommissioningList. + * Should balance segments as usual (ignoring percent) with empty decommissioningNodes. */ @Test public void testMoveDecommissioningMaxPercentOfMaxSegmentsToMoveWithNoDecommissioning() diff --git a/server/src/test/java/org/apache/druid/server/coordinator/rules/LoadRuleTest.java b/server/src/test/java/org/apache/druid/server/coordinator/rules/LoadRuleTest.java index 4e44de56380c..a8793b2a6474 100644 --- a/server/src/test/java/org/apache/druid/server/coordinator/rules/LoadRuleTest.java +++ b/server/src/test/java/org/apache/druid/server/coordinator/rules/LoadRuleTest.java @@ -741,7 +741,7 @@ public void testLoadDecommissioning() * Should not load a segment to the server that is decommssioning. */ @Test - public void testLoadReplicaDuringDecomissioning() + public void testLoadReplicaDuringDecommissioning() { EasyMock.expect(throttler.canCreateReplicant(EasyMock.anyString())).andReturn(true).anyTimes();