From d48e7f7ec1b0f462f543b82bc569d6ac1e32c6ae Mon Sep 17 00:00:00 2001 From: Li Cheng Date: Thu, 5 Sep 2019 11:51:40 +0800 Subject: [PATCH 01/19] HDDS-1577. Add default pipeline placement policy implementation. (#1366) (cherry picked from commit b640a5f6d53830aee4b9c2a7d17bf57c987962cd) --- .../apache/hadoop/hdds/scm/ScmConfigKeys.java | 5 + .../src/main/resources/ozone-default.xml | 7 + .../hadoop/hdds/scm/node/NodeManager.java | 14 + .../hdds/scm/node/NodeStateManager.java | 9 + .../hadoop/hdds/scm/node/SCMNodeManager.java | 19 + .../hdds/scm/node/states/Node2ObjectsMap.java | 4 +- .../scm/node/states/Node2PipelineMap.java | 12 +- .../scm/pipeline/PipelinePlacementPolicy.java | 338 ++++++++++++++++++ .../hdds/scm/container/MockNodeManager.java | 36 +- .../pipeline/TestPipelinePlacementPolicy.java | 197 ++++++++++ .../testutils/ReplicationNodeManagerMock.java | 16 + 11 files changed, 654 insertions(+), 3 deletions(-) create mode 100644 hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelinePlacementPolicy.java create mode 100644 hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestPipelinePlacementPolicy.java diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java index d0ac0667d5e8..9fa71add43f9 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java @@ -292,6 +292,11 @@ public final class ScmConfigKeys { public static final String OZONE_SCM_PIPELINE_OWNER_CONTAINER_COUNT = "ozone.scm.pipeline.owner.container.count"; public static final int OZONE_SCM_PIPELINE_OWNER_CONTAINER_COUNT_DEFAULT = 3; + // Pipeline placement policy: + // the max number of pipelines can a single datanode be engaged in. + public static final String OZONE_DATANODE_MAX_PIPELINE_ENGAGEMENT = + "ozone.scm.datanode.max.pipeline.engagement"; + public static final int OZONE_DATANODE_MAX_PIPELINE_ENGAGEMENT_DEFAULT = 5; public static final String OZONE_SCM_KEY_VALUE_CONTAINER_DELETION_CHOOSING_POLICY = diff --git a/hadoop-hdds/common/src/main/resources/ozone-default.xml b/hadoop-hdds/common/src/main/resources/ozone-default.xml index ecc25e93cb60..ba465c57255f 100644 --- a/hadoop-hdds/common/src/main/resources/ozone-default.xml +++ b/hadoop-hdds/common/src/main/resources/ozone-default.xml @@ -788,6 +788,13 @@ Number of containers per owner in a pipeline. + + ozone.scm.datanode.max.pipeline.engagement + 5 + OZONE, SCM, PIPELINE + Max number of pipelines per datanode can be engaged in. + + ozone.scm.container.size 5GB diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeManager.java index fd8bb87ceb12..37562fe9f293 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeManager.java @@ -19,6 +19,7 @@ import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.NodeReportProto; import org.apache.hadoop.hdds.scm.container.ContainerID; +import org.apache.hadoop.hdds.scm.net.NetworkTopology; import org.apache.hadoop.hdds.scm.pipeline.Pipeline; import org.apache.hadoop.hdds.scm.pipeline.PipelineID; import org.apache.hadoop.hdds.scm.container.placement.metrics.SCMNodeMetric; @@ -117,6 +118,13 @@ public interface NodeManager extends StorageContainerNodeProtocol, */ Set getPipelines(DatanodeDetails datanodeDetails); + /** + * Get the count of pipelines a datanodes is associated with. + * @param datanodeDetails DatanodeDetails + * @return The number of pipelines + */ + int getPipelinesCount(DatanodeDetails datanodeDetails); + /** * Add pipeline information in the NodeManager. * @param pipeline - Pipeline to be added @@ -199,4 +207,10 @@ void processNodeReport(DatanodeDetails datanodeDetails, * @return the given datanode, or empty list if none found */ List getNodesByAddress(String address); + + /** + * Get cluster map as in network topology for this node manager. + * @return cluster map + */ + NetworkTopology getClusterNetworkTopologyMap(); } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeStateManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeStateManager.java index 954cb0e8ea46..9d2a9f224cd2 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeStateManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeStateManager.java @@ -283,6 +283,15 @@ public void addPipeline(Pipeline pipeline) { node2PipelineMap.addPipeline(pipeline); } + /** + * Get the count of pipelines associated to single datanode. + * @param datanodeDetails single datanode + * @return number of pipelines associated with it + */ + public int getPipelinesCount(DatanodeDetails datanodeDetails) { + return node2PipelineMap.getPipelinesCount(datanodeDetails.getUuid()); + } + /** * Get information about the node. * diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java index d84b75b7e65b..46534fbf5fe5 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java @@ -511,6 +511,16 @@ public Set getPipelines(DatanodeDetails datanodeDetails) { return nodeStateManager.getPipelineByDnID(datanodeDetails.getUuid()); } + /** + * Get the count of pipelines a datanodes is associated with. + * @param datanodeDetails DatanodeDetails + * @return The number of pipelines + */ + @Override + public int getPipelinesCount(DatanodeDetails datanodeDetails) { + return nodeStateManager.getPipelinesCount(datanodeDetails); + } + /** * Add pipeline information in the NodeManager. * @@ -645,6 +655,15 @@ public List getNodesByAddress(String address) { return results; } + /** + * Get cluster map as in network topology for this node manager. + * @return cluster map + */ + @Override + public NetworkTopology getClusterNetworkTopologyMap() { + return clusterMap; + } + private String nodeResolve(String hostname) { List hosts = new ArrayList<>(1); hosts.add(hostname); diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/states/Node2ObjectsMap.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/states/Node2ObjectsMap.java index 37525b0076e8..57a377d998f4 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/states/Node2ObjectsMap.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/states/Node2ObjectsMap.java @@ -67,6 +67,7 @@ public boolean isKnownDatanode(UUID datanodeID) { * @param datanodeID -- Datanode UUID * @param containerIDs - List of ContainerIDs. */ + @VisibleForTesting public void insertNewDatanode(UUID datanodeID, Set containerIDs) throws SCMException { Preconditions.checkNotNull(containerIDs); @@ -83,7 +84,8 @@ public void insertNewDatanode(UUID datanodeID, Set containerIDs) * * @param datanodeID - Datanode ID. */ - void removeDatanode(UUID datanodeID) { + @VisibleForTesting + public void removeDatanode(UUID datanodeID) { Preconditions.checkNotNull(datanodeID); dn2ObjectMap.computeIfPresent(datanodeID, (k, v) -> null); } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/states/Node2PipelineMap.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/states/Node2PipelineMap.java index f8633f9fcbcd..714188dbf78e 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/states/Node2PipelineMap.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/states/Node2PipelineMap.java @@ -42,7 +42,7 @@ public Node2PipelineMap() { } /** - * Returns null if there no pipelines associated with this datanode ID. + * Returns null if there are no pipelines associated with this datanode ID. * * @param datanode - UUID * @return Set of pipelines or Null. @@ -51,6 +51,16 @@ public Set getPipelines(UUID datanode) { return getObjects(datanode); } + /** + * Return 0 if there are no pipelines associated with this datanode ID. + * @param datanode - UUID + * @return Number of pipelines or 0. + */ + public int getPipelinesCount(UUID datanode) { + Set pipelines = getObjects(datanode); + return pipelines == null ? 0 : pipelines.size(); + } + /** * Adds a pipeline entry to a given dataNode in the map. * diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelinePlacementPolicy.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelinePlacementPolicy.java new file mode 100644 index 000000000000..cb9954da2964 --- /dev/null +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelinePlacementPolicy.java @@ -0,0 +1,338 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdds.scm.pipeline; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Preconditions; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos; +import org.apache.hadoop.hdds.scm.ScmConfigKeys; +import org.apache.hadoop.hdds.scm.container.placement.algorithms.SCMCommonPolicy; +import org.apache.hadoop.hdds.scm.container.placement.metrics.SCMNodeMetric; +import org.apache.hadoop.hdds.scm.exceptions.SCMException; +import org.apache.hadoop.hdds.scm.net.NetworkTopology; +import org.apache.hadoop.hdds.scm.net.Node; +import org.apache.hadoop.hdds.scm.node.NodeManager; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.stream.Collectors; + +/** + * Pipeline placement policy that choose datanodes based on load balancing + * and network topology to supply pipeline creation. + *

+ * 1. get a list of healthy nodes + * 2. filter out nodes that are not too heavily engaged in other pipelines + * 3. Choose an anchor node among the viable nodes. + * 4. Choose other nodes around the anchor node based on network topology + */ +public final class PipelinePlacementPolicy extends SCMCommonPolicy { + @VisibleForTesting + static final Logger LOG = + LoggerFactory.getLogger(PipelinePlacementPolicy.class); + private final NodeManager nodeManager; + private final Configuration conf; + private final int heavyNodeCriteria; + + /** + * Constructs a pipeline placement with considering network topology, + * load balancing and rack awareness. + * + * @param nodeManager Node Manager + * @param conf Configuration + */ + public PipelinePlacementPolicy( + final NodeManager nodeManager, final Configuration conf) { + super(nodeManager, conf); + this.nodeManager = nodeManager; + this.conf = conf; + heavyNodeCriteria = conf.getInt( + ScmConfigKeys.OZONE_DATANODE_MAX_PIPELINE_ENGAGEMENT, + ScmConfigKeys.OZONE_DATANODE_MAX_PIPELINE_ENGAGEMENT_DEFAULT); + } + + /** + * Returns true if this node meets the criteria. + * + * @param datanodeDetails DatanodeDetails + * @return true if we have enough space. + */ + @VisibleForTesting + boolean meetCriteria(DatanodeDetails datanodeDetails, long heavyNodeLimit) { + return (nodeManager.getPipelinesCount(datanodeDetails) <= heavyNodeLimit); + } + + /** + * Filter out viable nodes based on + * 1. nodes that are healthy + * 2. nodes that are not too heavily engaged in other pipelines + * + * @param excludedNodes - excluded nodes + * @param nodesRequired - number of datanodes required. + * @return a list of viable nodes + * @throws SCMException when viable nodes are not enough in numbers + */ + List filterViableNodes( + List excludedNodes, int nodesRequired) + throws SCMException { + // get nodes in HEALTHY state + List healthyNodes = + nodeManager.getNodes(HddsProtos.NodeState.HEALTHY); + if (excludedNodes != null) { + healthyNodes.removeAll(excludedNodes); + } + String msg; + if (healthyNodes.size() == 0) { + msg = "No healthy node found to allocate pipeline."; + LOG.error(msg); + throw new SCMException(msg, SCMException.ResultCodes + .FAILED_TO_FIND_HEALTHY_NODES); + } + + if (healthyNodes.size() < nodesRequired) { + msg = String.format("Not enough healthy nodes to allocate pipeline. %d " + + " datanodes required. Found %d", + nodesRequired, healthyNodes.size()); + LOG.error(msg); + throw new SCMException(msg, + SCMException.ResultCodes.FAILED_TO_FIND_SUITABLE_NODE); + } + + // filter nodes that meet the size and pipeline engagement criteria. + // Pipeline placement doesn't take node space left into account. + List healthyList = healthyNodes.stream().filter(d -> + meetCriteria(d, heavyNodeCriteria)).collect(Collectors.toList()); + + if (healthyList.size() < nodesRequired) { + msg = String.format("Unable to find enough nodes that meet " + + "the criteria that cannot engage in more than %d pipelines." + + " Nodes required: %d Found: %d", + heavyNodeCriteria, nodesRequired, healthyList.size()); + LOG.error(msg); + throw new SCMException(msg, + SCMException.ResultCodes.FAILED_TO_FIND_SUITABLE_NODE); + } + return healthyList; + } + + /** + * Pipeline placement choose datanodes to join the pipeline. + * + * @param excludedNodes - excluded nodes + * @param favoredNodes - list of nodes preferred. + * @param nodesRequired - number of datanodes required. + * @param sizeRequired - size required for the container or block. + * @return a list of chosen datanodeDetails + * @throws SCMException when chosen nodes are not enough in numbers + */ + @Override + public List chooseDatanodes( + List excludedNodes, List favoredNodes, + int nodesRequired, final long sizeRequired) throws SCMException { + // get a list of viable nodes based on criteria + List healthyNodes = + filterViableNodes(excludedNodes, nodesRequired); + + List results = new ArrayList<>(); + + // Randomly picks nodes when all nodes are equal. + // This happens when network topology is absent or + // all nodes are on the same rack. + if (checkAllNodesAreEqual(nodeManager.getClusterNetworkTopologyMap())) { + LOG.info("All nodes are considered equal. Now randomly pick nodes. " + + "Required nodes: {}", nodesRequired); + results = super.getResultSet(nodesRequired, healthyNodes); + if (results.size() < nodesRequired) { + LOG.error("Unable to find the required number of healthy nodes that " + + "meet the criteria. Required nodes: {}, Found nodes: {}", + nodesRequired, results.size()); + throw new SCMException("Unable to find required number of nodes.", + SCMException.ResultCodes.FAILED_TO_FIND_SUITABLE_NODE); + } + return results; + } + + // Since nodes are widely distributed, the results should be selected + // base on distance in topology, rack awareness and load balancing. + List exclude = new ArrayList<>(); + exclude.addAll(excludedNodes); + // First choose an anchor nodes randomly + DatanodeDetails anchor = chooseNode(healthyNodes); + if (anchor == null) { + LOG.error("Unable to find the first healthy nodes that " + + "meet the criteria. Required nodes: {}, Found nodes: {}", + nodesRequired, results.size()); + throw new SCMException("Unable to find required number of nodes.", + SCMException.ResultCodes.FAILED_TO_FIND_SUITABLE_NODE); + } + + results.add(anchor); + exclude.add(anchor); + nodesRequired--; + + // Choose the second node on different racks from anchor. + DatanodeDetails nodeOnDifferentRack = chooseNodeBasedOnRackAwareness( + healthyNodes, excludedNodes, + nodeManager.getClusterNetworkTopologyMap(), anchor); + if (nodeOnDifferentRack == null) { + LOG.error("Unable to find nodes on different racks that " + + "meet the criteria. Required nodes: {}, Found nodes: {}", + nodesRequired, results.size()); + throw new SCMException("Unable to find required number of nodes.", + SCMException.ResultCodes.FAILED_TO_FIND_SUITABLE_NODE); + } + + results.add(nodeOnDifferentRack); + exclude.add(nodeOnDifferentRack); + nodesRequired--; + + // Then choose nodes close to anchor based on network topology + for (int x = 0; x < nodesRequired; x++) { + // invoke the choose function defined in the derived classes. + DatanodeDetails pick = chooseNodeFromNetworkTopology( + nodeManager.getClusterNetworkTopologyMap(), anchor, exclude); + if (pick != null) { + results.add(pick); + // exclude the picked node for next time + exclude.add(pick); + } + } + + if (results.size() < nodesRequired) { + LOG.error("Unable to find the required number of healthy nodes that " + + "meet the criteria. Required nodes: {}, Found nodes: {}", + nodesRequired, results.size()); + throw new SCMException("Unable to find required number of nodes.", + SCMException.ResultCodes.FAILED_TO_FIND_SUITABLE_NODE); + } + return results; + } + + /** + * Find a node from the healthy list and return it after removing it from the + * list that we are operating on. + * + * @param healthyNodes - Set of healthy nodes we can choose from. + * @return chosen datanodDetails + */ + @Override + public DatanodeDetails chooseNode( + List healthyNodes) { + int firstNodeNdx = getRand().nextInt(healthyNodes.size()); + int secondNodeNdx = getRand().nextInt(healthyNodes.size()); + + DatanodeDetails datanodeDetails; + // There is a possibility that both numbers will be same. + // if that is so, we just return the node. + if (firstNodeNdx == secondNodeNdx) { + datanodeDetails = healthyNodes.get(firstNodeNdx); + } else { + DatanodeDetails firstNodeDetails = healthyNodes.get(firstNodeNdx); + DatanodeDetails secondNodeDetails = healthyNodes.get(secondNodeNdx); + SCMNodeMetric firstNodeMetric = + nodeManager.getNodeStat(firstNodeDetails); + SCMNodeMetric secondNodeMetric = + nodeManager.getNodeStat(secondNodeDetails); + datanodeDetails = firstNodeMetric.isGreater(secondNodeMetric.get()) + ? firstNodeDetails : secondNodeDetails; + } + // the pick is decided and it should be removed from candidates. + healthyNodes.remove(datanodeDetails); + return datanodeDetails; + } + + /** + * Choose node on different racks as anchor is on based on rack awareness. + * If a node on different racks cannot be found, then return a random node. + * @param healthyNodes healthy nodes + * @param excludedNodes excluded nodes + * @param networkTopology network topology + * @param anchor anchor node + * @return a node on different rack + */ + @VisibleForTesting + protected DatanodeDetails chooseNodeBasedOnRackAwareness( + List healthyNodes, List excludedNodes, + NetworkTopology networkTopology, DatanodeDetails anchor) { + Preconditions.checkArgument(networkTopology != null); + if (checkAllNodesAreEqual(networkTopology)) { + return null; + } + + for (DatanodeDetails node : healthyNodes) { + if (excludedNodes.contains(node) + || networkTopology.isSameParent(anchor, node)) { + continue; + } else { + // the pick is decided and it should be removed from candidates. + healthyNodes.remove(node); + return node; + } + } + return null; + } + + /** + * Check if all nodes are equal in topology. + * They are equal when network topology is absent or there are on + * the same rack. + * @param topology network topology + * @return true when all nodes are equal + */ + private boolean checkAllNodesAreEqual(NetworkTopology topology) { + if (topology == null) { + return true; + } + return (topology.getNumOfNodes(topology.getMaxLevel() - 1) == 1); + } + + /** + * Choose node based on network topology. + * @param networkTopology network topology + * @param anchor anchor datanode to start with + * @param excludedNodes excluded datanodes + * @return chosen datanode + */ + @VisibleForTesting + protected DatanodeDetails chooseNodeFromNetworkTopology( + NetworkTopology networkTopology, DatanodeDetails anchor, + List excludedNodes) { + Preconditions.checkArgument(networkTopology != null); + + Collection excluded = new ArrayList<>(); + if (excludedNodes != null && excludedNodes.size() != 0) { + excluded.addAll(excludedNodes); + } + excluded.add(anchor); + + Node pick = networkTopology.chooseRandom( + anchor.getNetworkLocation(), excluded); + DatanodeDetails pickedNode = (DatanodeDetails) pick; + // exclude the picked node for next time + if (excludedNodes != null) { + excludedNodes.add(pickedNode); + } + return pickedNode; + } +} diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/MockNodeManager.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/MockNodeManager.java index 06dc67535720..bca4189072d2 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/MockNodeManager.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/MockNodeManager.java @@ -17,10 +17,12 @@ package org.apache.hadoop.hdds.scm.container; import org.apache.hadoop.hdds.protocol.MockDatanodeDetails; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdds.protocol.proto .StorageContainerDatanodeProtocolProtos.PipelineReportsProto; import org.apache.hadoop.hdds.scm.net.NetConstants; import org.apache.hadoop.hdds.scm.net.NetworkTopology; +import org.apache.hadoop.hdds.scm.net.NetworkTopologyImpl; import org.apache.hadoop.hdds.scm.net.Node; import org.apache.hadoop.hdds.scm.pipeline.Pipeline; import org.apache.hadoop.hdds.scm.pipeline.PipelineID; @@ -86,7 +88,7 @@ public class MockNodeManager implements NodeManager { private final SCMNodeStat aggregateStat; private boolean safemode; private final Map> commandMap; - private final Node2PipelineMap node2PipelineMap; + private Node2PipelineMap node2PipelineMap; private final Node2ContainerMap node2ContainerMap; private NetworkTopology clusterMap; private ConcurrentMap> dnsToUuidMap; @@ -100,6 +102,7 @@ public MockNodeManager(boolean initializeFakeNodes, int nodeCount) { this.node2ContainerMap = new Node2ContainerMap(); this.dnsToUuidMap = new ConcurrentHashMap<>(); aggregateStat = new SCMNodeStat(); + clusterMap = new NetworkTopologyImpl(new Configuration()); if (initializeFakeNodes) { for (int x = 0; x < nodeCount; x++) { DatanodeDetails dd = MockDatanodeDetails.randomDatanodeDetails(); @@ -250,6 +253,16 @@ public Set getPipelines(DatanodeDetails dnId) { return node2PipelineMap.getPipelines(dnId.getUuid()); } + /** + * Get the count of pipelines a datanodes is associated with. + * @param datanodeDetails DatanodeDetails + * @return The number of pipelines + */ + @Override + public int getPipelinesCount(DatanodeDetails datanodeDetails) { + return node2PipelineMap.getPipelinesCount(datanodeDetails.getUuid()); + } + /** * Add pipeline information in the NodeManager. * @param pipeline - Pipeline to be added @@ -259,6 +272,22 @@ public void addPipeline(Pipeline pipeline) { node2PipelineMap.addPipeline(pipeline); } + /** + * Get the entire Node2PipelineMap. + * @return Node2PipelineMap + */ + public Node2PipelineMap getNode2PipelineMap() { + return node2PipelineMap; + } + + /** + * Set the Node2PipelineMap. + * @param node2PipelineMap Node2PipelineMap + */ + public void setNode2PipelineMap(Node2PipelineMap node2PipelineMap) { + this.node2PipelineMap = node2PipelineMap; + } + /** * Remove a pipeline information from the NodeManager. * @param pipeline - Pipeline to be removed @@ -517,6 +546,11 @@ public List getNodesByAddress(String address) { return results; } + @Override + public NetworkTopology getClusterNetworkTopologyMap() { + return clusterMap; + } + public void setNetworkTopology(NetworkTopology topology) { this.clusterMap = topology; } diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestPipelinePlacementPolicy.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestPipelinePlacementPolicy.java new file mode 100644 index 000000000000..2e0d0b179c64 --- /dev/null +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestPipelinePlacementPolicy.java @@ -0,0 +1,197 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdds.scm.pipeline; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos; +import org.apache.hadoop.hdds.scm.ScmConfigKeys; +import org.apache.hadoop.hdds.scm.container.MockNodeManager; +import org.apache.hadoop.hdds.scm.exceptions.SCMException; +import org.apache.hadoop.hdds.scm.net.*; +import org.apache.hadoop.hdds.scm.node.states.Node2PipelineMap; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import java.util.*; +import java.util.stream.Collectors; + +/** + * Test for PipelinePlacementPolicy. + */ +public class TestPipelinePlacementPolicy { + private MockNodeManager nodeManager; + private PipelinePlacementPolicy placementPolicy; + private static final int PIPELINE_PLACEMENT_MAX_NODES_COUNT = 10; + + @Before + public void init() throws Exception { + nodeManager = new MockNodeManager(true, + PIPELINE_PLACEMENT_MAX_NODES_COUNT); + placementPolicy = + new PipelinePlacementPolicy(nodeManager, new OzoneConfiguration()); + } + + @Test + public void testChooseNodeBasedOnNetworkTopology() { + List healthyNodes = + nodeManager.getNodes(HddsProtos.NodeState.HEALTHY); + DatanodeDetails anchor = placementPolicy.chooseNode(healthyNodes); + // anchor should be removed from healthyNodes after being chosen. + Assert.assertFalse(healthyNodes.contains(anchor)); + + List excludedNodes = + new ArrayList<>(PIPELINE_PLACEMENT_MAX_NODES_COUNT); + DatanodeDetails nextNode = placementPolicy.chooseNodeFromNetworkTopology( + nodeManager.getClusterNetworkTopologyMap(), anchor, excludedNodes); + // excludedNodes should contain nextNode after being chosen. + Assert.assertTrue(excludedNodes.contains(nextNode)); + // nextNode should not be the same as anchor. + Assert.assertTrue(anchor.getUuid() != nextNode.getUuid()); + } + + @Test + public void testChooseNodeBasedOnRackAwareness() { + List healthyNodes = overWriteLocationInNodes( + nodeManager.getNodes(HddsProtos.NodeState.HEALTHY)); + DatanodeDetails anchor = placementPolicy.chooseNode(healthyNodes); + NetworkTopology topologyWithDifRacks = + createNetworkTopologyOnDifRacks(); + DatanodeDetails nextNode = placementPolicy.chooseNodeBasedOnRackAwareness( + healthyNodes, new ArrayList<>(PIPELINE_PLACEMENT_MAX_NODES_COUNT), + topologyWithDifRacks, anchor); + Assert.assertFalse(topologyWithDifRacks.isSameParent(anchor, nextNode)); + } + + private final static Node[] NODES = new NodeImpl[] { + new NodeImpl("h1", "/r1", NetConstants.NODE_COST_DEFAULT), + new NodeImpl("h2", "/r1", NetConstants.NODE_COST_DEFAULT), + new NodeImpl("h3", "/r1", NetConstants.NODE_COST_DEFAULT), + new NodeImpl("h4", "/r1", NetConstants.NODE_COST_DEFAULT), + new NodeImpl("h5", "/r2", NetConstants.NODE_COST_DEFAULT), + new NodeImpl("h6", "/r2", NetConstants.NODE_COST_DEFAULT), + new NodeImpl("h7", "/r2", NetConstants.NODE_COST_DEFAULT), + new NodeImpl("h8", "/r2", NetConstants.NODE_COST_DEFAULT), + }; + + + private NetworkTopology createNetworkTopologyOnDifRacks() { + NetworkTopology topology = new NetworkTopologyImpl(new Configuration()); + for (Node n : NODES) { + topology.add(n); + } + return topology; + } + + private List overWriteLocationInNodes( + List datanodes) { + List results = new ArrayList<>(datanodes.size()); + for (int i = 0; i < datanodes.size(); i++) { + DatanodeDetails datanode = datanodes.get(i); + DatanodeDetails result = DatanodeDetails.newBuilder() + .setUuid(datanode.getUuidString()) + .setHostName(datanode.getHostName()) + .setIpAddress(datanode.getIpAddress()) + .addPort(datanode.getPort(DatanodeDetails.Port.Name.STANDALONE)) + .addPort(datanode.getPort(DatanodeDetails.Port.Name.RATIS)) + .addPort(datanode.getPort(DatanodeDetails.Port.Name.REST)) + .setNetworkLocation(NODES[i].getNetworkLocation()).build(); + results.add(result); + } + return results; + } + + @Test + public void testHeavyNodeShouldBeExcluded() throws SCMException{ + List healthyNodes = + nodeManager.getNodes(HddsProtos.NodeState.HEALTHY); + int nodesRequired = healthyNodes.size()/2; + // only minority of healthy NODES are heavily engaged in pipelines. + int minorityHeavy = healthyNodes.size()/2 - 1; + List pickedNodes1 = placementPolicy.chooseDatanodes( + new ArrayList<>(PIPELINE_PLACEMENT_MAX_NODES_COUNT), + new ArrayList<>(PIPELINE_PLACEMENT_MAX_NODES_COUNT), + nodesRequired, 0); + // modify node to pipeline mapping. + insertHeavyNodesIntoNodeManager(healthyNodes, minorityHeavy); + // NODES should be sufficient. + Assert.assertEquals(nodesRequired, pickedNodes1.size()); + // make sure pipeline placement policy won't select duplicated NODES. + Assert.assertTrue(checkDuplicateNodesUUID(pickedNodes1)); + + // majority of healthy NODES are heavily engaged in pipelines. + int majorityHeavy = healthyNodes.size()/2 + 2; + insertHeavyNodesIntoNodeManager(healthyNodes, majorityHeavy); + boolean thrown = false; + List pickedNodes2 = null; + try { + pickedNodes2 = placementPolicy.chooseDatanodes( + new ArrayList<>(PIPELINE_PLACEMENT_MAX_NODES_COUNT), + new ArrayList<>(PIPELINE_PLACEMENT_MAX_NODES_COUNT), + nodesRequired, 0); + } catch (SCMException e) { + Assert.assertFalse(thrown); + thrown = true; + } + // NODES should NOT be sufficient and exception should be thrown. + Assert.assertNull(pickedNodes2); + Assert.assertTrue(thrown); + } + + private boolean checkDuplicateNodesUUID(List nodes) { + HashSet uuids = nodes.stream(). + map(DatanodeDetails::getUuid). + collect(Collectors.toCollection(HashSet::new)); + return uuids.size() == nodes.size(); + } + + private Set mockPipelineIDs(int count) { + Set pipelineIDs = new HashSet<>(count); + for (int i = 0; i < count; i++) { + pipelineIDs.add(PipelineID.randomId()); + } + return pipelineIDs; + } + + private void insertHeavyNodesIntoNodeManager( + List nodes, int heavyNodeCount) throws SCMException{ + if (nodes == null) { + throw new SCMException("", + SCMException.ResultCodes.FAILED_TO_FIND_SUITABLE_NODE); + } + + int considerHeavyCount = + ScmConfigKeys.OZONE_DATANODE_MAX_PIPELINE_ENGAGEMENT_DEFAULT + 1; + + Node2PipelineMap mockMap = new Node2PipelineMap(); + for (DatanodeDetails node : nodes) { + // mock heavy node + if (heavyNodeCount > 0) { + mockMap.insertNewDatanode( + node.getUuid(), mockPipelineIDs(considerHeavyCount)); + heavyNodeCount--; + } else { + mockMap.insertNewDatanode(node.getUuid(), mockPipelineIDs(1)); + } + } + nodeManager.setNode2PipelineMap(mockMap); + } +} diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/ozone/container/testutils/ReplicationNodeManagerMock.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/ozone/container/testutils/ReplicationNodeManagerMock.java index 0ecff3f541a7..7e8ec52bdf9a 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/ozone/container/testutils/ReplicationNodeManagerMock.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/ozone/container/testutils/ReplicationNodeManagerMock.java @@ -20,6 +20,7 @@ import org.apache.hadoop.hdds.protocol.proto .StorageContainerDatanodeProtocolProtos.PipelineReportsProto; import org.apache.hadoop.hdds.scm.container.ContainerID; +import org.apache.hadoop.hdds.scm.net.NetworkTopology; import org.apache.hadoop.hdds.scm.pipeline.Pipeline; import org.apache.hadoop.hdds.scm.pipeline.PipelineID; import org.apache.hadoop.hdds.scm.container.placement.metrics.SCMNodeMetric; @@ -165,6 +166,16 @@ public Set getPipelines(DatanodeDetails dnId) { throw new UnsupportedOperationException("Not yet implemented"); } + /** + * Get the count of pipelines a datanodes is associated with. + * @param dnId DatanodeDetails + * @return The number of pipelines + */ + @Override + public int getPipelinesCount(DatanodeDetails dnId) { + throw new UnsupportedOperationException("Not yet implemented"); + } + /** * Add pipeline information in the NodeManager. * @param pipeline - Pipeline to be added @@ -327,4 +338,9 @@ public DatanodeDetails getNodeByUuid(String address) { public List getNodesByAddress(String address) { return new LinkedList<>(); } + + @Override + public NetworkTopology getClusterNetworkTopologyMap() { + return null; + } } From 260938ec5d89d79c1154c30779692fb60d116e07 Mon Sep 17 00:00:00 2001 From: Li Cheng Date: Tue, 10 Sep 2019 20:15:51 +0800 Subject: [PATCH 02/19] HDDS-1571. Create an interface for pipeline placement policy to support network topologies. (#1395) (cherry picked from commit 753fc6703a39154ed6013e44dbae572391748906) --- ...cementPolicy.java => PlacementPolicy.java} | 12 +++--- .../placement/algorithms/package-info.java | 21 ---------- .../src/main/resources/ozone-default.xml | 6 ++- ...icy.java => SCMCommonPlacementPolicy.java} | 23 +++++----- .../scm/container/ReplicationManager.java | 13 +++--- .../ContainerPlacementPolicyFactory.java | 18 ++++---- .../SCMContainerPlacementCapacity.java | 4 +- .../SCMContainerPlacementRackAware.java | 12 +++--- .../SCMContainerPlacementRandom.java | 6 ++- .../scm/pipeline/PipelinePlacementPolicy.java | 42 +++++++++++-------- .../scm/pipeline/RatisPipelineProvider.java | 15 ++++--- .../scm/server/StorageContainerManager.java | 4 +- .../scm/container/TestReplicationManager.java | 7 ++-- .../TestContainerPlacementFactory.java | 7 ++-- .../hdds/scm/node/TestContainerPlacement.java | 5 +-- .../scm/safemode/TestSafeModeHandler.java | 5 +-- .../hadoop/ozone/TestContainerOperations.java | 7 +--- .../TestContainerStateMachineIdempotency.java | 5 +-- .../ozone/dn/scrubber/TestDataScrubber.java | 4 +- .../ozone/scm/TestContainerSmallFile.java | 4 +- .../TestGetCommittedBlockLengthAndPutKey.java | 5 +-- 21 files changed, 106 insertions(+), 119 deletions(-) rename hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/{container/placement/algorithms/ContainerPlacementPolicy.java => PlacementPolicy.java} (80%) delete mode 100644 hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/package-info.java rename hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/{container/placement/algorithms/SCMCommonPolicy.java => SCMCommonPlacementPolicy.java} (90%) diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/ContainerPlacementPolicy.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/PlacementPolicy.java similarity index 80% rename from hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/ContainerPlacementPolicy.java rename to hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/PlacementPolicy.java index 52ce7964b676..f6a0e8bf7eb0 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/ContainerPlacementPolicy.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/PlacementPolicy.java @@ -15,7 +15,7 @@ * the License. */ -package org.apache.hadoop.hdds.scm.container.placement.algorithms; +package org.apache.hadoop.hdds.scm; import org.apache.hadoop.hdds.protocol.DatanodeDetails; @@ -23,14 +23,14 @@ import java.util.List; /** - * A ContainerPlacementPolicy support choosing datanodes to build replication - * pipeline with specified constraints. + * A PlacementPolicy support choosing datanodes to build + * pipelines or containers with specified constraints. */ -public interface ContainerPlacementPolicy { +public interface PlacementPolicy { /** - * Given the replication factor and size required, return set of datanodes - * that satisfy the nodes and size requirement. + * Given an initial set of datanodes and the size required, + * return set of datanodes that satisfy the nodes and size requirement. * * @param excludedNodes - list of nodes to be excluded. * @param favoredNodes - list of nodes preferred. diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/package-info.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/package-info.java deleted file mode 100644 index dac4752fe66f..000000000000 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/package-info.java +++ /dev/null @@ -1,21 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hdds.scm.container.placement.algorithms; -/** - Contains container placement policy interface definition. - **/ \ No newline at end of file diff --git a/hadoop-hdds/common/src/main/resources/ozone-default.xml b/hadoop-hdds/common/src/main/resources/ozone-default.xml index ba465c57255f..4144500cdd99 100644 --- a/hadoop-hdds/common/src/main/resources/ozone-default.xml +++ b/hadoop-hdds/common/src/main/resources/ozone-default.xml @@ -776,9 +776,11 @@ OZONE, MANAGEMENT - The full name of class which implements org.apache.hadoop.hdds.scm.container.placement.algorithms.ContainerPlacementPolicy. + The full name of class which implements + org.apache.hadoop.hdds.scm.PlacementPolicy. The class decides which datanode will be used to host the container replica. If not set, - org.apache.hadoop.hdds.scm.container.placement.algorithms.SCMContainerPlacementRandom will be used as default value. + org.apache.hadoop.hdds.scm.container.placement.algorithms.SCMContainerPlacementRandom will be used as default + value. diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/SCMCommonPolicy.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/SCMCommonPlacementPolicy.java similarity index 90% rename from hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/SCMCommonPolicy.java rename to hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/SCMCommonPlacementPolicy.java index 77cdd83f7938..25457f72bc8c 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/SCMCommonPolicy.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/SCMCommonPlacementPolicy.java @@ -15,7 +15,7 @@ * the License. */ -package org.apache.hadoop.hdds.scm.container.placement.algorithms; +package org.apache.hadoop.hdds.scm; import com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.conf.Configuration; @@ -33,25 +33,25 @@ import java.util.stream.Collectors; /** - * SCM CommonPolicy implements a set of invariants which are common - * for all container placement policies, acts as the repository of helper + * This policy implements a set of invariants which are common + * for all basic placement policies, acts as the repository of helper * functions which are common to placement policies. */ -public abstract class SCMCommonPolicy implements ContainerPlacementPolicy { +public abstract class SCMCommonPlacementPolicy implements PlacementPolicy { @VisibleForTesting static final Logger LOG = - LoggerFactory.getLogger(SCMCommonPolicy.class); + LoggerFactory.getLogger(SCMCommonPlacementPolicy.class); private final NodeManager nodeManager; private final Random rand; private final Configuration conf; /** - * Constructs SCM Common Policy Class. + * Constructor. * * @param nodeManager NodeManager * @param conf Configuration class. */ - public SCMCommonPolicy(NodeManager nodeManager, Configuration conf) { + public SCMCommonPlacementPolicy(NodeManager nodeManager, Configuration conf) { this.nodeManager = nodeManager; this.rand = new Random(); this.conf = conf; @@ -85,7 +85,7 @@ public Configuration getConf() { } /** - * Given the replication factor and size required, return set of datanodes + * Given size required, return set of datanodes * that satisfy the nodes and size requirement. *

* Here are some invariants of container placement. @@ -149,7 +149,7 @@ public List chooseDatanodes( * @param datanodeDetails DatanodeDetails * @return true if we have enough space. */ - boolean hasEnoughSpace(DatanodeDetails datanodeDetails, + public boolean hasEnoughSpace(DatanodeDetails datanodeDetails, long sizeRequired) { SCMNodeMetric nodeMetric = nodeManager.getNodeStat(datanodeDetails); return (nodeMetric != null) && (nodeMetric.get() != null) @@ -164,7 +164,7 @@ boolean hasEnoughSpace(DatanodeDetails datanodeDetails, * @param nodesRequired - Nodes Required * @param healthyNodes - List of Nodes in the result set. * @return List of Datanodes that can be used for placement. - * @throws SCMException + * @throws SCMException SCMException */ public List getResultSet( int nodesRequired, List healthyNodes) @@ -190,8 +190,7 @@ public List getResultSet( /** * Choose a datanode according to the policy, this function is implemented - * by the actual policy class. For example, PlacementCapacity or - * PlacementRandom. + * by the actual policy class. * * @param healthyNodes - Set of healthy nodes we can choose from. * @return DatanodeDetails diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ReplicationManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ReplicationManager.java index 9c1d9ad1b503..251d94315864 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ReplicationManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ReplicationManager.java @@ -38,8 +38,9 @@ import org.apache.hadoop.hdds.conf.ConfigType; import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleState; -import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State; -import org.apache.hadoop.hdds.scm.container.placement.algorithms.ContainerPlacementPolicy; +import org.apache.hadoop.hdds.protocol.proto + .StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State; +import org.apache.hadoop.hdds.scm.PlacementPolicy; import org.apache.hadoop.hdds.scm.events.SCMEvents; import org.apache.hadoop.hdds.server.events.EventPublisher; import org.apache.hadoop.metrics2.MetricsCollector; @@ -85,7 +86,7 @@ public class ReplicationManager implements MetricsSource { * PlacementPolicy which is used to identify where a container * should be replicated. */ - private final ContainerPlacementPolicy containerPlacement; + private final PlacementPolicy containerPlacement; /** * EventPublisher to fire Replicate and Delete container events. @@ -131,12 +132,12 @@ public class ReplicationManager implements MetricsSource { * * @param conf OzoneConfiguration * @param containerManager ContainerManager - * @param containerPlacement ContainerPlacementPolicy + * @param containerPlacement PlacementPolicy * @param eventPublisher EventPublisher */ public ReplicationManager(final ReplicationManagerConfiguration conf, final ContainerManager containerManager, - final ContainerPlacementPolicy containerPlacement, + final PlacementPolicy containerPlacement, final EventPublisher eventPublisher, final LockManager lockManager) { this.containerManager = containerManager; @@ -476,7 +477,7 @@ private void forceCloseContainer(final ContainerInfo container, /** * If the given container is under replicated, identify a new set of - * datanode(s) to replicate the container using ContainerPlacementPolicy + * datanode(s) to replicate the container using PlacementPolicy * and send replicate container command to the identified datanode(s). * * @param container ContainerInfo diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/ContainerPlacementPolicyFactory.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/ContainerPlacementPolicyFactory.java index 18ec2c385b0c..adaeb87fc9c0 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/ContainerPlacementPolicyFactory.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/ContainerPlacementPolicyFactory.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.hdds.scm.container.placement.algorithms; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hdds.scm.PlacementPolicy; import org.apache.hadoop.hdds.scm.ScmConfigKeys; import org.apache.hadoop.hdds.scm.exceptions.SCMException; import org.apache.hadoop.hdds.scm.net.NetworkTopology; @@ -34,22 +35,23 @@ public final class ContainerPlacementPolicyFactory { private static final Logger LOG = LoggerFactory.getLogger(ContainerPlacementPolicyFactory.class); - private static final Class + private static final Class OZONE_SCM_CONTAINER_PLACEMENT_IMPL_DEFAULT = SCMContainerPlacementRandom.class; private ContainerPlacementPolicyFactory() { } - public static ContainerPlacementPolicy getPolicy(Configuration conf, - final NodeManager nodeManager, NetworkTopology clusterMap, - final boolean fallback, SCMContainerPlacementMetrics metrics) - throws SCMException{ - final Class placementClass = conf + + public static PlacementPolicy getPolicy(Configuration conf, + final NodeManager nodeManager, NetworkTopology clusterMap, + final boolean fallback, SCMContainerPlacementMetrics metrics) + throws SCMException{ + final Class placementClass = conf .getClass(ScmConfigKeys.OZONE_SCM_CONTAINER_PLACEMENT_IMPL_KEY, OZONE_SCM_CONTAINER_PLACEMENT_IMPL_DEFAULT, - ContainerPlacementPolicy.class); - Constructor constructor; + PlacementPolicy.class); + Constructor constructor; try { constructor = placementClass.getDeclaredConstructor(NodeManager.class, Configuration.class, NetworkTopology.class, boolean.class, diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/SCMContainerPlacementCapacity.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/SCMContainerPlacementCapacity.java index 85d281cf6dc2..19093448b927 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/SCMContainerPlacementCapacity.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/SCMContainerPlacementCapacity.java @@ -21,6 +21,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.scm.SCMCommonPlacementPolicy; import org.apache.hadoop.hdds.scm.container.placement.metrics.SCMNodeMetric; import org.apache.hadoop.hdds.scm.exceptions.SCMException; import org.apache.hadoop.hdds.scm.net.NetworkTopology; @@ -65,7 +66,8 @@ * little or no work and the cluster will achieve a balanced distribution * over time. */ -public final class SCMContainerPlacementCapacity extends SCMCommonPolicy { +public final class SCMContainerPlacementCapacity + extends SCMCommonPlacementPolicy { @VisibleForTesting static final Logger LOG = LoggerFactory.getLogger(SCMContainerPlacementCapacity.class); diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/SCMContainerPlacementRackAware.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/SCMContainerPlacementRackAware.java index 6d49459b739f..8933fe953a7f 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/SCMContainerPlacementRackAware.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/SCMContainerPlacementRackAware.java @@ -21,6 +21,7 @@ import com.google.common.base.Preconditions; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.scm.SCMCommonPlacementPolicy; import org.apache.hadoop.hdds.scm.exceptions.SCMException; import org.apache.hadoop.hdds.scm.net.NetConstants; import org.apache.hadoop.hdds.scm.net.NetworkTopology; @@ -45,7 +46,8 @@ * recommend to use this if the network topology has more layers. *

*/ -public final class SCMContainerPlacementRackAware extends SCMCommonPolicy { +public final class SCMContainerPlacementRackAware + extends SCMCommonPlacementPolicy { @VisibleForTesting static final Logger LOG = LoggerFactory.getLogger(SCMContainerPlacementRackAware.class); @@ -271,11 +273,9 @@ private Node chooseNode(List excludedNodes, Node affinityNode, throw new SCMException("No satisfied datanode to meet the" + " excludedNodes and affinityNode constrains.", null); } - if (hasEnoughSpace((DatanodeDetails)node, sizeRequired)) { - if (LOG.isDebugEnabled()) { - LOG.debug("Datanode {} is chosen for container. Required size is {}", - node.toString(), sizeRequired); - } + if (super.hasEnoughSpace((DatanodeDetails)node, sizeRequired)) { + LOG.debug("Datanode {} is chosen. Required size is {}", + node.toString(), sizeRequired); metrics.incrDatanodeChooseSuccessCount(); if (isFallbacked) { metrics.incrDatanodeChooseFallbackCount(); diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/SCMContainerPlacementRandom.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/SCMContainerPlacementRandom.java index 6b1a5c8c6cb1..ce5d10d4e517 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/SCMContainerPlacementRandom.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/SCMContainerPlacementRandom.java @@ -19,6 +19,8 @@ import com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hdds.scm.PlacementPolicy; +import org.apache.hadoop.hdds.scm.SCMCommonPlacementPolicy; import org.apache.hadoop.hdds.scm.exceptions.SCMException; import org.apache.hadoop.hdds.scm.net.NetworkTopology; import org.apache.hadoop.hdds.scm.node.NodeManager; @@ -37,8 +39,8 @@ * Balancer will need to support containers as a feature before this class * can be practically used. */ -public final class SCMContainerPlacementRandom extends SCMCommonPolicy - implements ContainerPlacementPolicy { +public final class SCMContainerPlacementRandom extends SCMCommonPlacementPolicy + implements PlacementPolicy { @VisibleForTesting static final Logger LOG = LoggerFactory.getLogger(SCMContainerPlacementRandom.class); diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelinePlacementPolicy.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelinePlacementPolicy.java index cb9954da2964..1983ed606dff 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelinePlacementPolicy.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelinePlacementPolicy.java @@ -24,7 +24,7 @@ import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import org.apache.hadoop.hdds.scm.ScmConfigKeys; -import org.apache.hadoop.hdds.scm.container.placement.algorithms.SCMCommonPolicy; +import org.apache.hadoop.hdds.scm.SCMCommonPlacementPolicy; import org.apache.hadoop.hdds.scm.container.placement.metrics.SCMNodeMetric; import org.apache.hadoop.hdds.scm.exceptions.SCMException; import org.apache.hadoop.hdds.scm.net.NetworkTopology; @@ -47,7 +47,7 @@ * 3. Choose an anchor node among the viable nodes. * 4. Choose other nodes around the anchor node based on network topology */ -public final class PipelinePlacementPolicy extends SCMCommonPolicy { +public final class PipelinePlacementPolicy extends SCMCommonPlacementPolicy { @VisibleForTesting static final Logger LOG = LoggerFactory.getLogger(PipelinePlacementPolicy.class); @@ -150,33 +150,41 @@ List filterViableNodes( public List chooseDatanodes( List excludedNodes, List favoredNodes, int nodesRequired, final long sizeRequired) throws SCMException { - // get a list of viable nodes based on criteria + // Get a list of viable nodes based on criteria + // and make sure excludedNodes are excluded from list. List healthyNodes = filterViableNodes(excludedNodes, nodesRequired); - - List results = new ArrayList<>(); - + // Randomly picks nodes when all nodes are equal. // This happens when network topology is absent or // all nodes are on the same rack. if (checkAllNodesAreEqual(nodeManager.getClusterNetworkTopologyMap())) { LOG.info("All nodes are considered equal. Now randomly pick nodes. " + "Required nodes: {}", nodesRequired); - results = super.getResultSet(nodesRequired, healthyNodes); - if (results.size() < nodesRequired) { - LOG.error("Unable to find the required number of healthy nodes that " + - "meet the criteria. Required nodes: {}, Found nodes: {}", - nodesRequired, results.size()); - throw new SCMException("Unable to find required number of nodes.", - SCMException.ResultCodes.FAILED_TO_FIND_SUITABLE_NODE); - } - return results; + return super.getResultSet(nodesRequired, healthyNodes); + } else { + // Since topology and rack awareness are available, picks nodes + // based on them. + return this.getResultSet(nodesRequired, healthyNodes); } + } + /** + * Get result set based on the pipeline placement algorithm which considers + * network topology and rack awareness. + * @param nodesRequired - Nodes Required + * @param healthyNodes - List of Nodes in the result set. + * @return a list of datanodes + * @throws SCMException SCMException + */ + @Override + public List getResultSet( + int nodesRequired, List healthyNodes) + throws SCMException { + List results = new ArrayList<>(nodesRequired); // Since nodes are widely distributed, the results should be selected // base on distance in topology, rack awareness and load balancing. List exclude = new ArrayList<>(); - exclude.addAll(excludedNodes); // First choose an anchor nodes randomly DatanodeDetails anchor = chooseNode(healthyNodes); if (anchor == null) { @@ -193,7 +201,7 @@ public List chooseDatanodes( // Choose the second node on different racks from anchor. DatanodeDetails nodeOnDifferentRack = chooseNodeBasedOnRackAwareness( - healthyNodes, excludedNodes, + healthyNodes, exclude, nodeManager.getClusterNetworkTopologyMap(), anchor); if (nodeOnDifferentRack == null) { LOG.error("Unable to find nodes on different racks that " + diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/RatisPipelineProvider.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/RatisPipelineProvider.java index 6b93192219b4..dacc4ca0585c 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/RatisPipelineProvider.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/RatisPipelineProvider.java @@ -24,11 +24,10 @@ import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState; import org.apache.hadoop.hdds.scm.ScmConfigKeys; -import org.apache.hadoop.hdds.scm.container.placement.algorithms - .ContainerPlacementPolicy; import org.apache.hadoop.hdds.scm.container.placement.algorithms .SCMContainerPlacementRandom; import org.apache.hadoop.hdds.scm.events.SCMEvents; +import org.apache.hadoop.hdds.scm.PlacementPolicy; import org.apache.hadoop.hdds.scm.node.NodeManager; import org.apache.hadoop.hdds.scm.pipeline.Pipeline.PipelineState; import org.apache.hadoop.hdds.server.events.EventPublisher; @@ -94,16 +93,16 @@ public class RatisPipelineProvider implements PipelineProvider { * @return SCM container placement policy implementation instance. */ @SuppressWarnings("unchecked") - // TODO: should we rename ContainerPlacementPolicy to PipelinePlacementPolicy? - private static ContainerPlacementPolicy createContainerPlacementPolicy( + // TODO: should we rename PlacementPolicy to PipelinePlacementPolicy? + private static PlacementPolicy createContainerPlacementPolicy( final NodeManager nodeManager, final Configuration conf) { - Class implClass = - (Class) conf.getClass( + Class implClass = + (Class) conf.getClass( ScmConfigKeys.OZONE_SCM_CONTAINER_PLACEMENT_IMPL_KEY, SCMContainerPlacementRandom.class); try { - Constructor ctor = + Constructor ctor = implClass.getDeclaredConstructor(NodeManager.class, Configuration.class); return ctor.newInstance(nodeManager, conf); @@ -116,7 +115,7 @@ private static ContainerPlacementPolicy createContainerPlacementPolicy( // LOG.error("Unhandled exception occurred, Placement policy will not " + // "be functional."); throw new IllegalArgumentException("Unable to load " + - "ContainerPlacementPolicy", e); + "PlacementPolicy", e); } } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java index 2dfde78dc562..53adcf27e018 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java @@ -58,7 +58,7 @@ import org.apache.hadoop.hdds.scm.container.ContainerReportHandler; import org.apache.hadoop.hdds.scm.container.IncrementalContainerReportHandler; import org.apache.hadoop.hdds.scm.container.SCMContainerManager; -import org.apache.hadoop.hdds.scm.container.placement.algorithms.ContainerPlacementPolicy; +import org.apache.hadoop.hdds.scm.PlacementPolicy; import org.apache.hadoop.hdds.scm.container.placement.metrics.ContainerStat; import org.apache.hadoop.hdds.scm.container.placement.metrics.SCMMetrics; import org.apache.hadoop.hdds.scm.container.ReplicationManager; @@ -391,7 +391,7 @@ private void initializeSystemManagers(OzoneConfiguration conf, } placementMetrics = SCMContainerPlacementMetrics.create(); - ContainerPlacementPolicy containerPlacementPolicy = + PlacementPolicy containerPlacementPolicy = ContainerPlacementPolicyFactory.getPolicy(conf, scmNodeManager, clusterMap, true, placementMetrics); diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestReplicationManager.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestReplicationManager.java index dadb3093261b..87d76558d27c 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestReplicationManager.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestReplicationManager.java @@ -27,8 +27,7 @@ import org.apache.hadoop.hdds.protocol.proto .StorageContainerDatanodeProtocolProtos.SCMCommandProto; import org.apache.hadoop.hdds.scm.container.ReplicationManager.ReplicationManagerConfiguration; -import org.apache.hadoop.hdds.scm.container.placement.algorithms - .ContainerPlacementPolicy; +import org.apache.hadoop.hdds.scm.PlacementPolicy; import org.apache.hadoop.hdds.scm.events.SCMEvents; import org.apache.hadoop.hdds.scm.exceptions.SCMException; import org.apache.hadoop.hdds.server.events.EventHandler; @@ -66,7 +65,7 @@ public class TestReplicationManager { private ReplicationManager replicationManager; private ContainerStateManager containerStateManager; - private ContainerPlacementPolicy containerPlacementPolicy; + private PlacementPolicy containerPlacementPolicy; private EventQueue eventQueue; private DatanodeCommandHandler datanodeCommandHandler; @@ -93,7 +92,7 @@ public void setup() throws IOException, InterruptedException { .thenAnswer(invocation -> containerStateManager .getContainerReplicas((ContainerID)invocation.getArguments()[0])); - containerPlacementPolicy = Mockito.mock(ContainerPlacementPolicy.class); + containerPlacementPolicy = Mockito.mock(PlacementPolicy.class); Mockito.when(containerPlacementPolicy.chooseDatanodes( Mockito.anyListOf(DatanodeDetails.class), diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/TestContainerPlacementFactory.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/TestContainerPlacementFactory.java index b685ba903d9b..a454de2672a7 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/TestContainerPlacementFactory.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/TestContainerPlacementFactory.java @@ -21,6 +21,7 @@ import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.MockDatanodeDetails; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState; +import org.apache.hadoop.hdds.scm.PlacementPolicy; import org.apache.hadoop.hdds.scm.ScmConfigKeys; import org.apache.hadoop.hdds.scm.container.placement.metrics.SCMNodeMetric; import org.apache.hadoop.hdds.scm.exceptions.SCMException; @@ -99,7 +100,7 @@ public void testRackAwarePolicy() throws IOException { when(nodeManager.getNodeStat(datanodes.get(4))) .thenReturn(new SCMNodeMetric(storageCapacity, 70L, 30L)); - ContainerPlacementPolicy policy = ContainerPlacementPolicyFactory + PlacementPolicy policy = ContainerPlacementPolicyFactory .getPolicy(conf, nodeManager, cluster, true, SCMContainerPlacementMetrics.create()); @@ -117,7 +118,7 @@ public void testRackAwarePolicy() throws IOException { @Test public void testDefaultPolicy() throws IOException { - ContainerPlacementPolicy policy = ContainerPlacementPolicyFactory + PlacementPolicy policy = ContainerPlacementPolicyFactory .getPolicy(conf, null, null, true, null); Assert.assertSame(SCMContainerPlacementRandom.class, policy.getClass()); } @@ -125,7 +126,7 @@ public void testDefaultPolicy() throws IOException { /** * A dummy container placement implementation for test. */ - public static class DummyImpl implements ContainerPlacementPolicy { + public static class DummyImpl implements PlacementPolicy { @Override public List chooseDatanodes( List excludedNodes, List favoredNodes, diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestContainerPlacement.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestContainerPlacement.java index 39058234a89f..88085467c5dd 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestContainerPlacement.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestContainerPlacement.java @@ -27,8 +27,7 @@ import org.apache.hadoop.hdds.scm.XceiverClientManager; import org.apache.hadoop.hdds.scm.container.ContainerInfo; import org.apache.hadoop.hdds.scm.container.SCMContainerManager; -import org.apache.hadoop.hdds.scm.container.placement.algorithms - .ContainerPlacementPolicy; +import org.apache.hadoop.hdds.scm.PlacementPolicy; import org.apache.hadoop.hdds.scm.container.placement.algorithms .SCMContainerPlacementCapacity; import org.apache.hadoop.hdds.conf.OzoneConfiguration; @@ -135,7 +134,7 @@ public void testContainerPlacementCapacity() throws IOException, conf.set(HddsConfigKeys.OZONE_METADATA_DIRS, testDir.getAbsolutePath()); conf.setClass(ScmConfigKeys.OZONE_SCM_CONTAINER_PLACEMENT_IMPL_KEY, - SCMContainerPlacementCapacity.class, ContainerPlacementPolicy.class); + SCMContainerPlacementCapacity.class, PlacementPolicy.class); SCMNodeManager nodeManager = createNodeManager(conf); SCMContainerManager containerManager = diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestSafeModeHandler.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestSafeModeHandler.java index 5572e9aa1ef4..4ad3456e7ba8 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestSafeModeHandler.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestSafeModeHandler.java @@ -25,8 +25,7 @@ import org.apache.hadoop.hdds.scm.container.ContainerManager; import org.apache.hadoop.hdds.scm.container.ReplicationManager; import org.apache.hadoop.hdds.scm.container.ReplicationManager.ReplicationManagerConfiguration; -import org.apache.hadoop.hdds.scm.container.placement.algorithms - .ContainerPlacementPolicy; +import org.apache.hadoop.hdds.scm.PlacementPolicy; import org.apache.hadoop.hdds.scm.events.SCMEvents; import org.apache.hadoop.hdds.scm.pipeline.PipelineManager; import org.apache.hadoop.hdds.scm.pipeline.SCMPipelineManager; @@ -70,7 +69,7 @@ public void setup(boolean enabled) { .thenReturn(new HashSet<>()); replicationManager = new ReplicationManager( new ReplicationManagerConfiguration(), - containerManager, Mockito.mock(ContainerPlacementPolicy.class), + containerManager, Mockito.mock(PlacementPolicy.class), eventQueue, new LockManager(configuration)); scmPipelineManager = Mockito.mock(SCMPipelineManager.class); blockManager = Mockito.mock(BlockManagerImpl.class); diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestContainerOperations.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestContainerOperations.java index eadb520b7915..17c090d9a5a8 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestContainerOperations.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestContainerOperations.java @@ -17,11 +17,10 @@ */ package org.apache.hadoop.ozone; -import org.apache.hadoop.conf.StorageUnit; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerWithPipeline; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; -import org.apache.hadoop.hdds.scm.container.placement.algorithms.ContainerPlacementPolicy; +import org.apache.hadoop.hdds.scm.PlacementPolicy; import org.apache.hadoop.hdds.scm.container.placement.algorithms.SCMContainerPlacementCapacity; import org.apache.hadoop.hdds.scm.ScmConfigKeys; import org.apache.hadoop.hdds.scm.client.ContainerOperationClient; @@ -30,7 +29,6 @@ import org.junit.BeforeClass; import org.junit.Test; -import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_CONTAINER_SIZE; import static org.junit.Assert.assertEquals; /** @@ -47,8 +45,7 @@ public class TestContainerOperations { public static void setup() throws Exception { ozoneConf = new OzoneConfiguration(); ozoneConf.setClass(ScmConfigKeys.OZONE_SCM_CONTAINER_PLACEMENT_IMPL_KEY, - SCMContainerPlacementCapacity.class, ContainerPlacementPolicy.class); - ozoneConf.setStorageSize(OZONE_SCM_CONTAINER_SIZE, 5, StorageUnit.GB); + SCMContainerPlacementCapacity.class, PlacementPolicy.class); cluster = MiniOzoneCluster.newBuilder(ozoneConf).setNumDatanodes(3).build(); storageClient = new ContainerOperationClient(ozoneConf); cluster.waitForClusterToBeReady(); diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestContainerStateMachineIdempotency.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestContainerStateMachineIdempotency.java index 548f9b600d86..b0b3fbf73bbb 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestContainerStateMachineIdempotency.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestContainerStateMachineIdempotency.java @@ -29,8 +29,7 @@ import org.apache.hadoop.hdds.scm.XceiverClientSpi; import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerWithPipeline; import org.apache.hadoop.hdds.scm.pipeline.Pipeline; -import org.apache.hadoop.hdds.scm.container.placement.algorithms. - ContainerPlacementPolicy; +import org.apache.hadoop.hdds.scm.PlacementPolicy; import org.apache.hadoop.hdds.scm.container.placement.algorithms. SCMContainerPlacementCapacity; import org.apache.hadoop.hdds.scm.protocolPB. @@ -59,7 +58,7 @@ public class TestContainerStateMachineIdempotency { public static void init() throws Exception { ozoneConfig = new OzoneConfiguration(); ozoneConfig.setClass(ScmConfigKeys.OZONE_SCM_CONTAINER_PLACEMENT_IMPL_KEY, - SCMContainerPlacementCapacity.class, ContainerPlacementPolicy.class); + SCMContainerPlacementCapacity.class, PlacementPolicy.class); cluster = MiniOzoneCluster.newBuilder(ozoneConfig).setNumDatanodes(3).build(); cluster.waitForClusterToBeReady(); diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scrubber/TestDataScrubber.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scrubber/TestDataScrubber.java index 108d20431d7a..dd29189d0e40 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scrubber/TestDataScrubber.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scrubber/TestDataScrubber.java @@ -30,7 +30,7 @@ import org.apache.hadoop.hdds.scm.container.ContainerInfo; import org.apache.hadoop.hdds.scm.container.ContainerManager; import org.apache.hadoop.hdds.scm.container.ContainerReplica; -import org.apache.hadoop.hdds.scm.container.placement.algorithms.ContainerPlacementPolicy; +import org.apache.hadoop.hdds.scm.PlacementPolicy; import org.apache.hadoop.hdds.scm.container.placement.algorithms.SCMContainerPlacementCapacity; import org.apache.hadoop.hdds.scm.protocolPB.StorageContainerLocationProtocolClientSideTranslatorPB; import org.apache.hadoop.ozone.HddsDatanodeService; @@ -84,7 +84,7 @@ public static void init() throws Exception { ozoneConfig = new OzoneConfiguration(); ozoneConfig.set(HddsConfigKeys.HDDS_CONTAINER_REPORT_INTERVAL, "1s"); ozoneConfig.setClass(ScmConfigKeys.OZONE_SCM_CONTAINER_PLACEMENT_IMPL_KEY, - SCMContainerPlacementCapacity.class, ContainerPlacementPolicy.class); + SCMContainerPlacementCapacity.class, PlacementPolicy.class); ozoneConfig.setBoolean(HddsConfigKeys.HDDS_SCM_SAFEMODE_PIPELINE_CREATION, false); cluster = MiniOzoneCluster.newBuilder(ozoneConfig).setNumDatanodes(1) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/TestContainerSmallFile.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/TestContainerSmallFile.java index 9d187ff7d561..3b6da67a338c 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/TestContainerSmallFile.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/TestContainerSmallFile.java @@ -24,7 +24,7 @@ import org.apache.hadoop.ozone.MiniOzoneCluster; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; -import org.apache.hadoop.hdds.scm.container.placement.algorithms.ContainerPlacementPolicy; +import org.apache.hadoop.hdds.scm.PlacementPolicy; import org.apache.hadoop.hdds.scm.container.placement.algorithms.SCMContainerPlacementCapacity; import org.apache.hadoop.hdds.scm.ScmConfigKeys; import org.apache.hadoop.hdds.scm.protocolPB @@ -60,7 +60,7 @@ public class TestContainerSmallFile { public static void init() throws Exception { ozoneConfig = new OzoneConfiguration(); ozoneConfig.setClass(ScmConfigKeys.OZONE_SCM_CONTAINER_PLACEMENT_IMPL_KEY, - SCMContainerPlacementCapacity.class, ContainerPlacementPolicy.class); + SCMContainerPlacementCapacity.class, PlacementPolicy.class); cluster = MiniOzoneCluster.newBuilder(ozoneConfig).setNumDatanodes(3) .build(); cluster.waitForClusterToBeReady(); diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/TestGetCommittedBlockLengthAndPutKey.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/TestGetCommittedBlockLengthAndPutKey.java index cdd3f8a87578..0cb9329fda92 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/TestGetCommittedBlockLengthAndPutKey.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/TestGetCommittedBlockLengthAndPutKey.java @@ -31,8 +31,7 @@ import org.apache.hadoop.hdds.scm.pipeline.Pipeline; import org.apache.hadoop.hdds.scm.container.common.helpers. StorageContainerException; -import org.apache.hadoop.hdds.scm.container.placement.algorithms. - ContainerPlacementPolicy; +import org.apache.hadoop.hdds.scm.PlacementPolicy; import org.apache.hadoop.hdds.scm.container.placement.algorithms. SCMContainerPlacementCapacity; import org.apache.hadoop.hdds.scm.protocolPB. @@ -65,7 +64,7 @@ public class TestGetCommittedBlockLengthAndPutKey { public static void init() throws Exception { ozoneConfig = new OzoneConfiguration(); ozoneConfig.setClass(ScmConfigKeys.OZONE_SCM_CONTAINER_PLACEMENT_IMPL_KEY, - SCMContainerPlacementCapacity.class, ContainerPlacementPolicy.class); + SCMContainerPlacementCapacity.class, PlacementPolicy.class); cluster = MiniOzoneCluster.newBuilder(ozoneConfig).setNumDatanodes(3).build(); cluster.waitForClusterToBeReady(); From 7576f4d10adf6ec0cc5d2baf8ee9547b7018714a Mon Sep 17 00:00:00 2001 From: Li Cheng Date: Fri, 13 Sep 2019 07:01:16 +0800 Subject: [PATCH 03/19] HDDS-2089: Add createPipeline CLI. (#1418) (cherry picked from commit 326b5acd4a63fe46821919322867f5daff30750c) --- .../apache/hadoop/ozone/audit/SCMAction.java | 1 + .../scm/pipeline/SimplePipelineProvider.java | 2 +- .../scm/server/SCMClientProtocolServer.java | 8 +-- .../pipeline/CreatePipelineSubcommand.java | 71 +++++++++++++++++++ .../scm/cli/pipeline/PipelineCommands.java | 1 + 5 files changed, 78 insertions(+), 5 deletions(-) create mode 100644 hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/pipeline/CreatePipelineSubcommand.java diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/audit/SCMAction.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/audit/SCMAction.java index c3e9440425fd..fada2d8de8db 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/audit/SCMAction.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/audit/SCMAction.java @@ -31,6 +31,7 @@ public enum SCMAction implements AuditAction { GET_CONTAINER, GET_CONTAINER_WITH_PIPELINE, LIST_CONTAINER, + CREATE_PIPELINE, LIST_PIPELINE, CLOSE_PIPELINE, ACTIVATE_PIPELINE, diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/SimplePipelineProvider.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/SimplePipelineProvider.java index 00cb7ae164b3..a772a972529f 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/SimplePipelineProvider.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/SimplePipelineProvider.java @@ -48,7 +48,7 @@ public Pipeline create(ReplicationFactor factor) throws IOException { String e = String .format("Cannot create pipeline of factor %d using %d nodes.", factor.getNumber(), dns.size()); - throw new IOException(e); + throw new InsufficientDatanodesException(e); } Collections.shuffle(dns); diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java index b16f7a5f4adb..dad1622449dd 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java @@ -395,10 +395,10 @@ public void closeContainer(long containerID) throws IOException { public Pipeline createReplicationPipeline(HddsProtos.ReplicationType type, HddsProtos.ReplicationFactor factor, HddsProtos.NodePool nodePool) throws IOException { - // TODO: will be addressed in future patch. - // This is needed only for debugging purposes to make sure cluster is - // working correctly. - return null; + Pipeline result = scm.getPipelineManager().createPipeline(type, factor); + AUDIT.logWriteSuccess( + buildAuditMessageForSuccess(SCMAction.CREATE_PIPELINE, null)); + return result; } @Override diff --git a/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/pipeline/CreatePipelineSubcommand.java b/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/pipeline/CreatePipelineSubcommand.java new file mode 100644 index 000000000000..edeb786726a9 --- /dev/null +++ b/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/pipeline/CreatePipelineSubcommand.java @@ -0,0 +1,71 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdds.scm.cli.pipeline; + +import org.apache.hadoop.hdds.cli.HddsVersionProvider; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos; +import org.apache.hadoop.hdds.scm.cli.SCMCLI; +import org.apache.hadoop.hdds.scm.client.ScmClient; +import picocli.CommandLine; + +import java.util.concurrent.Callable; + +/** + * Handler of createPipeline command. + */ +@CommandLine.Command( + name = "createPipeline", + description = "create pipeline", + mixinStandardHelpOptions = true, + versionProvider = HddsVersionProvider.class) +public class CreatePipelineSubcommand implements Callable { + @CommandLine.ParentCommand + private SCMCLI parent; + + @CommandLine.Option( + names = {"-t", "--replicationType"}, + description = "Replication type (STAND_ALONE, RATIS)", + defaultValue = "STAND_ALONE" + ) + private HddsProtos.ReplicationType type + = HddsProtos.ReplicationType.STAND_ALONE; + + @CommandLine.Option( + names = {"-f", "--replicationFactor"}, + description = "Replication factor (ONE, THREE)", + defaultValue = "ONE" + ) + private HddsProtos.ReplicationFactor factor + = HddsProtos.ReplicationFactor.ONE; + + @Override + public Void call() throws Exception { + if (type == HddsProtos.ReplicationType.CHAINED) { + throw new IllegalArgumentException(type.name() + + " is not supported yet."); + } + try (ScmClient scmClient = parent.createScmClient()) { + scmClient.createReplicationPipeline( + type, + factor, + HddsProtos.NodePool.getDefaultInstance()); + return null; + } + } +} \ No newline at end of file diff --git a/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/pipeline/PipelineCommands.java b/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/pipeline/PipelineCommands.java index 948a51a8eb58..0bdbc19fe44e 100644 --- a/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/pipeline/PipelineCommands.java +++ b/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/pipeline/PipelineCommands.java @@ -37,6 +37,7 @@ ListPipelinesSubcommand.class, ActivatePipelineSubcommand.class, DeactivatePipelineSubcommand.class, + CreatePipelineSubcommand.class, ClosePipelineSubcommand.class }) public class PipelineCommands implements Callable { From 3749f40d28d441a68842a9e805a277c38a554a3f Mon Sep 17 00:00:00 2001 From: Li Cheng Date: Tue, 29 Oct 2019 12:46:00 +0800 Subject: [PATCH 04/19] HDDS-1569 Support creating multiple pipelines with same datanode. Contributed by Li Cheng. This closes #28 --- .../apache/hadoop/hdds/scm/ScmConfigKeys.java | 10 +- .../src/main/resources/ozone-default.xml | 15 +- .../hdds/scm/block/BlockManagerImpl.java | 5 + .../ContainerPlacementPolicyFactory.java | 8 +- .../scm/node/states/Node2PipelineMap.java | 2 +- .../pipeline/BackgroundPipelineCreator.java | 1 + .../scm/pipeline/PipelinePlacementPolicy.java | 89 +++++++++--- .../hdds/scm/pipeline/PipelineStateMap.java | 5 +- .../scm/pipeline/RatisPipelineProvider.java | 137 +++++++++++------- .../hdds/scm/pipeline/RatisPipelineUtils.java | 96 ++++++++++++ .../hdds/scm/pipeline/SCMPipelineManager.java | 13 +- .../hdds/scm/pipeline/SCMPipelineMetrics.java | 8 + .../safemode/HealthyPipelineSafeModeRule.java | 13 +- .../hdds/scm/node/TestDeadNodeHandler.java | 3 + .../pipeline/TestPipelinePlacementPolicy.java | 15 +- .../scm/pipeline/TestSCMPipelineManager.java | 17 ++- .../hadoop/fs/ozone/TestOzoneFsHAURLs.java | 3 + .../hdds/scm/pipeline/TestPipelineClose.java | 1 + .../TestRatisPipelineCreateAndDestroy.java | 24 ++- .../hdds/scm/pipeline/TestSCMRestart.java | 5 +- .../TestSCMSafeModeWithPipelineRules.java | 3 + .../apache/hadoop/ozone/MiniOzoneCluster.java | 12 ++ .../hadoop/ozone/MiniOzoneClusterImpl.java | 4 + .../client/rpc/Test2WayCommitInRatis.java | 1 + .../client/rpc/TestBlockOutputStream.java | 1 + .../TestBlockOutputStreamWithFailures.java | 7 +- .../ozone/client/rpc/TestCommitWatcher.java | 1 + .../rpc/TestContainerReplicationEndToEnd.java | 5 +- .../client/rpc/TestContainerStateMachine.java | 5 +- .../rpc/TestDeleteWithSlowFollower.java | 12 +- .../rpc/TestFailureHandlingByClient.java | 4 +- .../rpc/TestHybridPipelineOnDatanode.java | 3 +- .../ozone/client/rpc/TestKeyInputStream.java | 1 + .../TestMultiBlockWritesWithDnFailures.java | 8 +- .../TestOzoneClientRetriesOnException.java | 1 + .../rpc/TestOzoneRpcClientAbstract.java | 1 + .../ozone/client/rpc/TestWatchForCommit.java | 3 + .../TestCloseContainerByPipeline.java | 5 + .../hadoop/ozone/freon/TestDataValidate.java | 2 +- .../freon/TestFreonWithPipelineDestroy.java | 1 + ...estSCMContainerPlacementPolicyMetrics.java | 1 + .../hadoop/ozone/scm/node/TestQueryNode.java | 3 + 42 files changed, 419 insertions(+), 135 deletions(-) create mode 100644 hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/RatisPipelineUtils.java diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java index 9fa71add43f9..15a47f348784 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java @@ -296,7 +296,15 @@ public final class ScmConfigKeys { // the max number of pipelines can a single datanode be engaged in. public static final String OZONE_DATANODE_MAX_PIPELINE_ENGAGEMENT = "ozone.scm.datanode.max.pipeline.engagement"; - public static final int OZONE_DATANODE_MAX_PIPELINE_ENGAGEMENT_DEFAULT = 5; + // Setting to zero by default means this limit doesn't take effect. + public static final int OZONE_DATANODE_MAX_PIPELINE_ENGAGEMENT_DEFAULT = 0; + + // Upper limit for how many pipelines can be created. + // Only for test purpose now. + public static final String OZONE_SCM_PIPELINE_NUMBER_LIMIT = + "ozone.scm.pipeline.number.limit"; + // Setting to zero by default means this limit doesn't take effect. + public static final int OZONE_SCM_PIPELINE_NUMBER_LIMIT_DEFAULT = 0; public static final String OZONE_SCM_KEY_VALUE_CONTAINER_DELETION_CHOOSING_POLICY = diff --git a/hadoop-hdds/common/src/main/resources/ozone-default.xml b/hadoop-hdds/common/src/main/resources/ozone-default.xml index 4144500cdd99..8bc2ea177734 100644 --- a/hadoop-hdds/common/src/main/resources/ozone-default.xml +++ b/hadoop-hdds/common/src/main/resources/ozone-default.xml @@ -791,10 +791,19 @@ - ozone.scm.datanode.max.pipeline.engagement - 5 + ozone.scm.datanode.max.pipeline.engagement + 0 + OZONE, SCM, PIPELINE + Max number of pipelines per datanode can be engaged in. + + + + ozone.scm.pipeline.number.limit + 0 OZONE, SCM, PIPELINE - Max number of pipelines per datanode can be engaged in. + Upper limit for how many pipelines can be OPEN in SCM. + 0 as default means there is no limit. Otherwise, the number is the limit + of max amount of pipelines which are OPEN. diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/BlockManagerImpl.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/BlockManagerImpl.java index b7a7525cbaca..cdc3878b3146 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/BlockManagerImpl.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/BlockManagerImpl.java @@ -197,8 +197,13 @@ public AllocatedBlock allocateBlock(final long size, ReplicationType type, // TODO: #CLUTIL Remove creation logic when all replication types and // factors are handled by pipeline creator pipeline = pipelineManager.createPipeline(type, factor); + // wait until pipeline is ready pipelineManager.waitPipelineReady(pipeline.getId(), 0); + } catch (SCMException se) { + LOG.warn("Pipeline creation failed for type:{} factor:{}. " + + "Datanodes may be used up.", type, factor, se); + break; } catch (IOException e) { LOG.warn("Pipeline creation failed for type:{} factor:{}. Retrying " + "get pipelines call once.", type, factor, e); diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/ContainerPlacementPolicyFactory.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/ContainerPlacementPolicyFactory.java index adaeb87fc9c0..74431f9b05e8 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/ContainerPlacementPolicyFactory.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/ContainerPlacementPolicyFactory.java @@ -43,10 +43,10 @@ private ContainerPlacementPolicyFactory() { } - public static PlacementPolicy getPolicy(Configuration conf, - final NodeManager nodeManager, NetworkTopology clusterMap, - final boolean fallback, SCMContainerPlacementMetrics metrics) - throws SCMException{ + public static PlacementPolicy getPolicy( + Configuration conf, final NodeManager nodeManager, + NetworkTopology clusterMap, final boolean fallback, + SCMContainerPlacementMetrics metrics) throws SCMException{ final Class placementClass = conf .getClass(ScmConfigKeys.OZONE_SCM_CONTAINER_PLACEMENT_IMPL_KEY, OZONE_SCM_CONTAINER_PLACEMENT_IMPL_DEFAULT, diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/states/Node2PipelineMap.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/states/Node2PipelineMap.java index 714188dbf78e..18809ed4450a 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/states/Node2PipelineMap.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/states/Node2PipelineMap.java @@ -80,7 +80,7 @@ public synchronized void removePipeline(Pipeline pipeline) { dn2ObjectMap.computeIfPresent(dnId, (k, v) -> { v.remove(pipeline.getId()); - return v; + return v.isEmpty() ? null : v; }); } } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/BackgroundPipelineCreator.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/BackgroundPipelineCreator.java index 30069876ab39..b663f2aa1bf2 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/BackgroundPipelineCreator.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/BackgroundPipelineCreator.java @@ -115,6 +115,7 @@ private void createPipelines() { if (scheduler.isClosed()) { break; } + pipelineManager.createPipeline(type, factor); } catch (IOException ioe) { break; diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelinePlacementPolicy.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelinePlacementPolicy.java index 1983ed606dff..23eb5745421f 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelinePlacementPolicy.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelinePlacementPolicy.java @@ -36,6 +36,7 @@ import java.util.ArrayList; import java.util.Collection; import java.util.List; +import java.util.Set; import java.util.stream.Collectors; /** @@ -52,6 +53,7 @@ public final class PipelinePlacementPolicy extends SCMCommonPlacementPolicy { static final Logger LOG = LoggerFactory.getLogger(PipelinePlacementPolicy.class); private final NodeManager nodeManager; + private final PipelineStateManager stateManager; private final Configuration conf; private final int heavyNodeCriteria; @@ -59,15 +61,17 @@ public final class PipelinePlacementPolicy extends SCMCommonPlacementPolicy { * Constructs a pipeline placement with considering network topology, * load balancing and rack awareness. * - * @param nodeManager Node Manager + * @param nodeManager NodeManager + * @param stateManager PipelineStateManager * @param conf Configuration */ - public PipelinePlacementPolicy( - final NodeManager nodeManager, final Configuration conf) { + public PipelinePlacementPolicy(final NodeManager nodeManager, + final PipelineStateManager stateManager, final Configuration conf) { super(nodeManager, conf); this.nodeManager = nodeManager; this.conf = conf; - heavyNodeCriteria = conf.getInt( + this.stateManager = stateManager; + this.heavyNodeCriteria = conf.getInt( ScmConfigKeys.OZONE_DATANODE_MAX_PIPELINE_ENGAGEMENT, ScmConfigKeys.OZONE_DATANODE_MAX_PIPELINE_ENGAGEMENT_DEFAULT); } @@ -76,11 +80,46 @@ public PipelinePlacementPolicy( * Returns true if this node meets the criteria. * * @param datanodeDetails DatanodeDetails + * @param nodesRequired nodes required count * @return true if we have enough space. */ @VisibleForTesting - boolean meetCriteria(DatanodeDetails datanodeDetails, long heavyNodeLimit) { - return (nodeManager.getPipelinesCount(datanodeDetails) <= heavyNodeLimit); + boolean meetCriteria(DatanodeDetails datanodeDetails, int nodesRequired) { + if (heavyNodeCriteria == 0) { + // no limit applied. + return true; + } + // Datanodes from pipeline in some states can also be considered available + // for pipeline allocation. Thus the number of these pipeline shall be + // deducted from total heaviness calculation. + int pipelineNumDeductable = 0; + Set pipelines = nodeManager.getPipelines(datanodeDetails); + for (PipelineID pid : pipelines) { + Pipeline pipeline; + try { + pipeline = stateManager.getPipeline(pid); + } catch (PipelineNotFoundException e) { + LOG.error("Pipeline not found in pipeline state manager during" + + " pipeline creation. PipelineID: " + pid + + " exception: " + e.getMessage()); + continue; + } + if (pipeline != null && + pipeline.getFactor().getNumber() == nodesRequired && + pipeline.getType() == HddsProtos.ReplicationType.RATIS && + pipeline.getPipelineState() == Pipeline.PipelineState.CLOSED) { + pipelineNumDeductable++; + } + } + boolean meet = (nodeManager.getPipelinesCount(datanodeDetails) + - pipelineNumDeductable) < heavyNodeCriteria; + if (!meet) { + LOG.info("Pipeline Placement: can't place more pipeline on heavy " + + "datanode: " + datanodeDetails.getUuid().toString() + " Heaviness: " + + nodeManager.getPipelinesCount(datanodeDetails) + " limit: " + + heavyNodeCriteria); + } + return meet; } /** @@ -102,18 +141,19 @@ List filterViableNodes( if (excludedNodes != null) { healthyNodes.removeAll(excludedNodes); } + int initialHealthyNodesCount = healthyNodes.size(); String msg; - if (healthyNodes.size() == 0) { + if (initialHealthyNodesCount == 0) { msg = "No healthy node found to allocate pipeline."; LOG.error(msg); throw new SCMException(msg, SCMException.ResultCodes .FAILED_TO_FIND_HEALTHY_NODES); } - if (healthyNodes.size() < nodesRequired) { + if (initialHealthyNodesCount < nodesRequired) { msg = String.format("Not enough healthy nodes to allocate pipeline. %d " + " datanodes required. Found %d", - nodesRequired, healthyNodes.size()); + nodesRequired, initialHealthyNodesCount); LOG.error(msg); throw new SCMException(msg, SCMException.ResultCodes.FAILED_TO_FIND_SUITABLE_NODE); @@ -121,14 +161,17 @@ List filterViableNodes( // filter nodes that meet the size and pipeline engagement criteria. // Pipeline placement doesn't take node space left into account. - List healthyList = healthyNodes.stream().filter(d -> - meetCriteria(d, heavyNodeCriteria)).collect(Collectors.toList()); + List healthyList = healthyNodes.stream() + .filter(d -> meetCriteria(d, nodesRequired)).limit(nodesRequired) + .collect(Collectors.toList()); if (healthyList.size() < nodesRequired) { msg = String.format("Unable to find enough nodes that meet " + "the criteria that cannot engage in more than %d pipelines." + - " Nodes required: %d Found: %d", - heavyNodeCriteria, nodesRequired, healthyList.size()); + " Nodes required: %d Found: %d, healthy nodes count in " + + "NodeManager: %d.", + heavyNodeCriteria, nodesRequired, healthyList.size(), + initialHealthyNodesCount); LOG.error(msg); throw new SCMException(msg, SCMException.ResultCodes.FAILED_TO_FIND_SUITABLE_NODE); @@ -154,13 +197,11 @@ public List chooseDatanodes( // and make sure excludedNodes are excluded from list. List healthyNodes = filterViableNodes(excludedNodes, nodesRequired); - - // Randomly picks nodes when all nodes are equal. + + // Randomly picks nodes when all nodes are equal or factor is ONE. // This happens when network topology is absent or // all nodes are on the same rack. if (checkAllNodesAreEqual(nodeManager.getClusterNetworkTopologyMap())) { - LOG.info("All nodes are considered equal. Now randomly pick nodes. " + - "Required nodes: {}", nodesRequired); return super.getResultSet(nodesRequired, healthyNodes); } else { // Since topology and rack awareness are available, picks nodes @@ -188,8 +229,8 @@ public List getResultSet( // First choose an anchor nodes randomly DatanodeDetails anchor = chooseNode(healthyNodes); if (anchor == null) { - LOG.error("Unable to find the first healthy nodes that " + - "meet the criteria. Required nodes: {}, Found nodes: {}", + LOG.error("Pipeline Placement: Unable to find the first healthy nodes " + + "that meet the criteria. Required nodes: {}, Found nodes: {}", nodesRequired, results.size()); throw new SCMException("Unable to find required number of nodes.", SCMException.ResultCodes.FAILED_TO_FIND_SUITABLE_NODE); @@ -204,8 +245,8 @@ public List getResultSet( healthyNodes, exclude, nodeManager.getClusterNetworkTopologyMap(), anchor); if (nodeOnDifferentRack == null) { - LOG.error("Unable to find nodes on different racks that " + - "meet the criteria. Required nodes: {}, Found nodes: {}", + LOG.error("Pipeline Placement: Unable to find nodes on different racks " + + " that meet the criteria. Required nodes: {}, Found nodes: {}", nodesRequired, results.size()); throw new SCMException("Unable to find required number of nodes.", SCMException.ResultCodes.FAILED_TO_FIND_SUITABLE_NODE); @@ -228,9 +269,9 @@ public List getResultSet( } if (results.size() < nodesRequired) { - LOG.error("Unable to find the required number of healthy nodes that " + - "meet the criteria. Required nodes: {}, Found nodes: {}", - nodesRequired, results.size()); + LOG.error("Pipeline Placement: Unable to find the required number of " + + "healthy nodes that meet the criteria. Required nodes: {}, " + + "Found nodes: {}", nodesRequired, results.size()); throw new SCMException("Unable to find required number of nodes.", SCMException.ResultCodes.FAILED_TO_FIND_SUITABLE_NODE); } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelineStateMap.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelineStateMap.java index 443378cd1835..8e0f32de1599 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelineStateMap.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelineStateMap.java @@ -30,6 +30,7 @@ import java.io.IOException; import java.util.*; +import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.CopyOnWriteArrayList; import java.util.function.Predicate; import java.util.stream.Collectors; @@ -52,8 +53,8 @@ class PipelineStateMap { PipelineStateMap() { // TODO: Use TreeMap for range operations? - pipelineMap = new HashMap<>(); - pipeline2container = new HashMap<>(); + pipelineMap = new ConcurrentHashMap<>(); + pipeline2container = new ConcurrentHashMap<>(); query2OpenPipelines = new HashMap<>(); initializeQueryMap(); diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/RatisPipelineProvider.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/RatisPipelineProvider.java index dacc4ca0585c..23b02ed2528a 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/RatisPipelineProvider.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/RatisPipelineProvider.java @@ -20,14 +20,12 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationType; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor; -import org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState; import org.apache.hadoop.hdds.scm.ScmConfigKeys; -import org.apache.hadoop.hdds.scm.container.placement.algorithms - .SCMContainerPlacementRandom; import org.apache.hadoop.hdds.scm.events.SCMEvents; -import org.apache.hadoop.hdds.scm.PlacementPolicy; +import org.apache.hadoop.hdds.scm.exceptions.SCMException; import org.apache.hadoop.hdds.scm.node.NodeManager; import org.apache.hadoop.hdds.scm.pipeline.Pipeline.PipelineState; import org.apache.hadoop.hdds.server.events.EventPublisher; @@ -38,8 +36,6 @@ import org.slf4j.LoggerFactory; import java.io.IOException; -import java.lang.reflect.Constructor; -import java.lang.reflect.InvocationTargetException; import java.util.HashSet; import java.util.List; import java.util.Set; @@ -60,6 +56,9 @@ public class RatisPipelineProvider implements PipelineProvider { private final PipelineStateManager stateManager; private final Configuration conf; private final EventPublisher eventPublisher; + private final PipelinePlacementPolicy placementPolicy; + private int pipelineNumberLimit; + private int maxPipelinePerDatanode; // Set parallelism at 3, as now in Ratis we create 1 and 3 node pipelines. private final int parallelismForPool = 3; @@ -82,65 +81,93 @@ public class RatisPipelineProvider implements PipelineProvider { this.stateManager = stateManager; this.conf = conf; this.eventPublisher = eventPublisher; + this.placementPolicy = + new PipelinePlacementPolicy(nodeManager, stateManager, conf); + this.pipelineNumberLimit = conf.getInt( + ScmConfigKeys.OZONE_SCM_PIPELINE_NUMBER_LIMIT, + ScmConfigKeys.OZONE_SCM_PIPELINE_NUMBER_LIMIT_DEFAULT); + this.maxPipelinePerDatanode = conf.getInt( + ScmConfigKeys.OZONE_DATANODE_MAX_PIPELINE_ENGAGEMENT, + ScmConfigKeys.OZONE_DATANODE_MAX_PIPELINE_ENGAGEMENT_DEFAULT); } + private List pickNodesNeverUsed(ReplicationFactor factor) + throws SCMException { + Set dnsUsed = new HashSet<>(); + stateManager.getPipelines(ReplicationType.RATIS, factor) + .stream().filter( + p -> p.getPipelineState().equals(PipelineState.OPEN) || + p.getPipelineState().equals(PipelineState.DORMANT) || + p.getPipelineState().equals(PipelineState.ALLOCATED)) + .forEach(p -> dnsUsed.addAll(p.getNodes())); - /** - * Create pluggable container placement policy implementation instance. - * - * @param nodeManager - SCM node manager. - * @param conf - configuration. - * @return SCM container placement policy implementation instance. - */ - @SuppressWarnings("unchecked") - // TODO: should we rename PlacementPolicy to PipelinePlacementPolicy? - private static PlacementPolicy createContainerPlacementPolicy( - final NodeManager nodeManager, final Configuration conf) { - Class implClass = - (Class) conf.getClass( - ScmConfigKeys.OZONE_SCM_CONTAINER_PLACEMENT_IMPL_KEY, - SCMContainerPlacementRandom.class); + // Get list of healthy nodes + List dns = nodeManager + .getNodes(HddsProtos.NodeState.HEALTHY) + .parallelStream() + .filter(dn -> !dnsUsed.contains(dn)) + .limit(factor.getNumber()) + .collect(Collectors.toList()); + if (dns.size() < factor.getNumber()) { + String e = String + .format("Cannot create pipeline of factor %d using %d nodes." + + " Used %d nodes. Healthy nodes %d", factor.getNumber(), + dns.size(), dnsUsed.size(), + nodeManager.getNodes(HddsProtos.NodeState.HEALTHY).size()); + throw new SCMException(e, + SCMException.ResultCodes.FAILED_TO_FIND_SUITABLE_NODE); + } + return dns; + } - try { - Constructor ctor = - implClass.getDeclaredConstructor(NodeManager.class, - Configuration.class); - return ctor.newInstance(nodeManager, conf); - } catch (RuntimeException e) { - throw e; - } catch (InvocationTargetException e) { - throw new RuntimeException(implClass.getName() - + " could not be constructed.", e.getCause()); - } catch (Exception e) { -// LOG.error("Unhandled exception occurred, Placement policy will not " + -// "be functional."); - throw new IllegalArgumentException("Unable to load " + - "PlacementPolicy", e); + private boolean exceedPipelineNumberLimit(ReplicationFactor factor) { + if (factor != ReplicationFactor.THREE) { + // Only put limits for Factor THREE pipelines. + return false; + } + // Per datanode limit + if (maxPipelinePerDatanode > 0) { + return (stateManager.getPipelines(ReplicationType.RATIS, factor).size() - + stateManager.getPipelines(ReplicationType.RATIS, factor, + Pipeline.PipelineState.CLOSED).size()) > maxPipelinePerDatanode * + nodeManager.getNodeCount(HddsProtos.NodeState.HEALTHY) / + factor.getNumber(); + } + + // Global limit + if (pipelineNumberLimit > 0) { + return (stateManager.getPipelines(ReplicationType.RATIS, + ReplicationFactor.THREE).size() - stateManager.getPipelines( + ReplicationType.RATIS, ReplicationFactor.THREE, + Pipeline.PipelineState.CLOSED).size()) > + (pipelineNumberLimit - stateManager.getPipelines( + ReplicationType.RATIS, ReplicationFactor.ONE).size()); } + + return false; } @Override public Pipeline create(ReplicationFactor factor) throws IOException { - // Get set of datanodes already used for ratis pipeline - Set dnsUsed = new HashSet<>(); - stateManager.getPipelines(ReplicationType.RATIS, factor).stream().filter( - p -> p.getPipelineState().equals(PipelineState.OPEN) || - p.getPipelineState().equals(PipelineState.DORMANT) || - p.getPipelineState().equals(PipelineState.ALLOCATED)) - .forEach(p -> dnsUsed.addAll(p.getNodes())); + if (exceedPipelineNumberLimit(factor)) { + throw new SCMException("Ratis pipeline number meets the limit: " + + pipelineNumberLimit + " factor : " + + factor.getNumber(), + SCMException.ResultCodes.FAILED_TO_FIND_SUITABLE_NODE); + } - // Get list of healthy nodes - List dns = - nodeManager.getNodes(NodeState.HEALTHY) - .parallelStream() - .filter(dn -> !dnsUsed.contains(dn)) - .limit(factor.getNumber()) - .collect(Collectors.toList()); - if (dns.size() < factor.getNumber()) { - String e = String - .format("Cannot create pipeline of factor %d using %d nodes.", - factor.getNumber(), dns.size()); - throw new InsufficientDatanodesException(e); + List dns; + + switch(factor) { + case ONE: + dns = pickNodesNeverUsed(ReplicationFactor.ONE); + break; + case THREE: + dns = placementPolicy.chooseDatanodes(null, + null, factor.getNumber(), 0); + break; + default: + throw new IllegalStateException("Unknown factor: " + factor.name()); } Pipeline pipeline = Pipeline.newBuilder() diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/RatisPipelineUtils.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/RatisPipelineUtils.java new file mode 100644 index 000000000000..b8cdf061300c --- /dev/null +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/RatisPipelineUtils.java @@ -0,0 +1,96 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdds.scm.pipeline; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.scm.ScmConfigKeys; +import org.apache.hadoop.hdds.ratis.RatisHelper; +import org.apache.ratis.client.RaftClient; +import org.apache.ratis.grpc.GrpcTlsConfig; +import org.apache.ratis.protocol.RaftGroup; +import org.apache.ratis.protocol.RaftGroupId; +import org.apache.ratis.protocol.RaftPeer; +import org.apache.ratis.retry.RetryPolicy; +import org.apache.ratis.rpc.SupportedRpcType; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + + +/** + * Utility class for Ratis pipelines. Contains methods to create and destroy + * ratis pipelines. + */ +public final class RatisPipelineUtils { + + private static final Logger LOG = + LoggerFactory.getLogger(RatisPipelineUtils.class); + + private RatisPipelineUtils() { + } + /** + * Removes pipeline from SCM. Sends ratis command to destroy pipeline on all + * the datanodes. + * + * @param pipeline - Pipeline to be destroyed + * @param ozoneConf - Ozone configuration + * @param grpcTlsConfig + * @throws IOException + */ + public static void destroyPipeline(Pipeline pipeline, Configuration ozoneConf, + GrpcTlsConfig grpcTlsConfig) { + final RaftGroup group = RatisHelper.newRaftGroup(pipeline); + if (LOG.isDebugEnabled()) { + LOG.debug("destroying pipeline:{} with {}", pipeline.getId(), group); + } + for (DatanodeDetails dn : pipeline.getNodes()) { + try { + destroyPipeline(dn, pipeline.getId(), ozoneConf, grpcTlsConfig); + } catch (IOException e) { + LOG.warn("Pipeline destroy failed for pipeline={} dn={} exception={}", + pipeline.getId(), dn, e.getMessage()); + } + } + } + + /** + * Sends ratis command to destroy pipeline on the given datanode. + * + * @param dn - Datanode on which pipeline needs to be destroyed + * @param pipelineID - ID of pipeline to be destroyed + * @param ozoneConf - Ozone configuration + * @param grpcTlsConfig - grpc tls configuration + * @throws IOException + */ + static void destroyPipeline(DatanodeDetails dn, PipelineID pipelineID, + Configuration ozoneConf, GrpcTlsConfig grpcTlsConfig) throws IOException { + final String rpcType = ozoneConf + .get(ScmConfigKeys.DFS_CONTAINER_RATIS_RPC_TYPE_KEY, + ScmConfigKeys.DFS_CONTAINER_RATIS_RPC_TYPE_DEFAULT); + final RetryPolicy retryPolicy = RatisHelper.createRetryPolicy(ozoneConf); + final RaftPeer p = RatisHelper.toRaftPeer(dn); + try(RaftClient client = RatisHelper + .newRaftClient(SupportedRpcType.valueOfIgnoreCase(rpcType), p, + retryPolicy, grpcTlsConfig, ozoneConf)) { + client.groupRemove(RaftGroupId.valueOf(pipelineID.getId()), + true, p.getId()); + } + } +} diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/SCMPipelineManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/SCMPipelineManager.java index 32aa7b693ba6..035d60405c1b 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/SCMPipelineManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/SCMPipelineManager.java @@ -55,10 +55,6 @@ import java.util.concurrent.locks.ReadWriteLock; import java.util.concurrent.locks.ReentrantReadWriteLock; -import static org.apache.hadoop.hdds.scm - .ScmConfigKeys.OZONE_SCM_DB_CACHE_SIZE_DEFAULT; -import static org.apache.hadoop.hdds.scm - .ScmConfigKeys.OZONE_SCM_DB_CACHE_SIZE_MB; import static org.apache.hadoop.ozone.OzoneConsts.SCM_PIPELINE_DB; /** @@ -109,8 +105,8 @@ protected SCMPipelineManager(Configuration conf, NodeManager nodeManager, scheduler = new Scheduler("RatisPipelineUtilsThread", false, 1); this.backgroundPipelineCreator = new BackgroundPipelineCreator(this, scheduler, conf); - int cacheSize = conf.getInt(OZONE_SCM_DB_CACHE_SIZE_MB, - OZONE_SCM_DB_CACHE_SIZE_DEFAULT); + int cacheSize = conf.getInt(ScmConfigKeys.OZONE_SCM_DB_CACHE_SIZE_MB, + ScmConfigKeys.OZONE_SCM_DB_CACHE_SIZE_DEFAULT); final File pipelineDBPath = getPipelineDBPath(conf); this.pipelineStore = MetadataStoreBuilder.newBuilder() @@ -176,10 +172,9 @@ public synchronized Pipeline createPipeline(ReplicationType type, metrics.createPerPipelineMetrics(pipeline); } return pipeline; - } catch (InsufficientDatanodesException idEx) { - throw idEx; } catch (IOException ex) { metrics.incNumPipelineCreationFailed(); + LOG.error("Pipeline creation failed.", ex); throw ex; } finally { lock.writeLock().unlock(); @@ -188,7 +183,7 @@ public synchronized Pipeline createPipeline(ReplicationType type, @Override public Pipeline createPipeline(ReplicationType type, ReplicationFactor factor, - List nodes) { + List nodes) { // This will mostly be used to create dummy pipeline for SimplePipelines. // We don't update the metrics for SimplePipelines. lock.writeLock().lock(); diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/SCMPipelineMetrics.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/SCMPipelineMetrics.java index 40a6f290b54e..8c348ed87cf9 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/SCMPipelineMetrics.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/SCMPipelineMetrics.java @@ -134,6 +134,14 @@ void incNumPipelineCreated() { numPipelineCreated.incr(); } + /** + * Get the number of pipeline created. + * @return number of pipeline + */ + long getNumPipelineCreated() { + return numPipelineCreated.value(); + } + /** * Increments number of failed pipeline creation count. */ diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/HealthyPipelineSafeModeRule.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/HealthyPipelineSafeModeRule.java index 33936d5580cf..1a03c34b2b06 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/HealthyPipelineSafeModeRule.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/HealthyPipelineSafeModeRule.java @@ -17,11 +17,14 @@ */ package org.apache.hadoop.hdds.scm.safemode; +import java.util.HashSet; +import java.util.Set; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdds.HddsConfigKeys; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import org.apache.hadoop.hdds.scm.events.SCMEvents; import org.apache.hadoop.hdds.scm.pipeline.Pipeline; +import org.apache.hadoop.hdds.scm.pipeline.PipelineID; import org.apache.hadoop.hdds.scm.pipeline.PipelineManager; import com.google.common.base.Preconditions; import org.apache.hadoop.hdds.server.events.EventQueue; @@ -46,6 +49,8 @@ public class HealthyPipelineSafeModeRule private int healthyPipelineThresholdCount; private int currentHealthyPipelineCount = 0; private final double healthyPipelinesPercent; + private final Set processedPipelineIDs = + new HashSet<>(); HealthyPipelineSafeModeRule(String ruleName, EventQueue eventQueue, PipelineManager pipelineManager, @@ -117,8 +122,11 @@ protected void process(Pipeline pipeline) { Preconditions.checkNotNull(pipeline); if (pipeline.getType() == HddsProtos.ReplicationType.RATIS && pipeline.getFactor() == HddsProtos.ReplicationFactor.THREE) { - getSafeModeMetrics().incCurrentHealthyPipelinesCount(); - currentHealthyPipelineCount++; + if (!processedPipelineIDs.contains(pipeline.getId())) { + getSafeModeMetrics().incCurrentHealthyPipelinesCount(); + currentHealthyPipelineCount++; + processedPipelineIDs.add(pipeline.getId()); + } } if (scmInSafeMode()) { @@ -131,6 +139,7 @@ protected void process(Pipeline pipeline) { @Override protected void cleanup() { + processedPipelineIDs.clear(); } @VisibleForTesting diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestDeadNodeHandler.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestDeadNodeHandler.java index 594ea5cfd523..977038ebac71 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestDeadNodeHandler.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestDeadNodeHandler.java @@ -66,6 +66,8 @@ import org.junit.Test; import org.mockito.Mockito; +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_DATANODE_MAX_PIPELINE_ENGAGEMENT; + /** * Test DeadNodeHandler. */ @@ -87,6 +89,7 @@ public void setup() throws IOException, AuthenticationException { storageDir = GenericTestUtils.getTempPath( TestDeadNodeHandler.class.getSimpleName() + UUID.randomUUID()); conf.set(HddsConfigKeys.OZONE_METADATA_DIRS, storageDir); + conf.setInt(OZONE_DATANODE_MAX_PIPELINE_ENGAGEMENT, 0); eventQueue = new EventQueue(); scm = HddsTestUtils.getScm(conf); nodeManager = (SCMNodeManager) scm.getScmNodeManager(); diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestPipelinePlacementPolicy.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestPipelinePlacementPolicy.java index 2e0d0b179c64..1e340393c476 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestPipelinePlacementPolicy.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestPipelinePlacementPolicy.java @@ -34,11 +34,14 @@ import java.util.*; import java.util.stream.Collectors; +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_DATANODE_MAX_PIPELINE_ENGAGEMENT; + /** * Test for PipelinePlacementPolicy. */ public class TestPipelinePlacementPolicy { private MockNodeManager nodeManager; + private OzoneConfiguration conf; private PipelinePlacementPolicy placementPolicy; private static final int PIPELINE_PLACEMENT_MAX_NODES_COUNT = 10; @@ -46,8 +49,10 @@ public class TestPipelinePlacementPolicy { public void init() throws Exception { nodeManager = new MockNodeManager(true, PIPELINE_PLACEMENT_MAX_NODES_COUNT); - placementPolicy = - new PipelinePlacementPolicy(nodeManager, new OzoneConfiguration()); + conf = new OzoneConfiguration(); + conf.setInt(OZONE_DATANODE_MAX_PIPELINE_ENGAGEMENT, 5); + placementPolicy = new PipelinePlacementPolicy( + nodeManager, new PipelineStateManager(conf), conf); } @Test @@ -123,7 +128,7 @@ private List overWriteLocationInNodes( public void testHeavyNodeShouldBeExcluded() throws SCMException{ List healthyNodes = nodeManager.getNodes(HddsProtos.NodeState.HEALTHY); - int nodesRequired = healthyNodes.size()/2; + int nodesRequired = HddsProtos.ReplicationFactor.THREE.getNumber(); // only minority of healthy NODES are heavily engaged in pipelines. int minorityHeavy = healthyNodes.size()/2 - 1; List pickedNodes1 = placementPolicy.chooseDatanodes( @@ -179,7 +184,9 @@ private void insertHeavyNodesIntoNodeManager( } int considerHeavyCount = - ScmConfigKeys.OZONE_DATANODE_MAX_PIPELINE_ENGAGEMENT_DEFAULT + 1; + conf.getInt( + ScmConfigKeys.OZONE_DATANODE_MAX_PIPELINE_ENGAGEMENT, + ScmConfigKeys.OZONE_DATANODE_MAX_PIPELINE_ENGAGEMENT_DEFAULT) + 1; Node2PipelineMap mockMap = new Node2PipelineMap(); for (DatanodeDetails node : nodes) { diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestSCMPipelineManager.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestSCMPipelineManager.java index 81723e1afcbe..08f51858720a 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestSCMPipelineManager.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestSCMPipelineManager.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hdds.scm.pipeline; +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_DATANODE_MAX_PIPELINE_ENGAGEMENT; import static org.apache.hadoop.test.MetricsAsserts.getLongCounter; import static org.apache.hadoop.test.MetricsAsserts.getMetrics; @@ -34,12 +35,13 @@ import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; +import org.apache.hadoop.hdds.scm.TestUtils; +import org.apache.hadoop.hdds.scm.exceptions.SCMException; +import org.apache.hadoop.hdds.scm.safemode.SCMSafeModeManager; import org.apache.hadoop.hdds.scm.container.ContainerID; import org.apache.hadoop.hdds.scm.container.MockNodeManager; -import org.apache.hadoop.hdds.scm.safemode.SCMSafeModeManager; import org.apache.hadoop.hdds.scm.server.SCMDatanodeHeartbeatDispatcher .PipelineReportFromDatanode; -import org.apache.hadoop.hdds.scm.TestUtils; import org.apache.hadoop.hdds.server.events.EventQueue; import org.apache.hadoop.metrics2.MetricsRecordBuilder; import org.apache.hadoop.test.GenericTestUtils; @@ -59,6 +61,7 @@ public class TestSCMPipelineManager { @Before public void setUp() throws Exception { conf = new OzoneConfiguration(); + conf.setInt(OZONE_DATANODE_MAX_PIPELINE_ENGAGEMENT, 1); testDir = GenericTestUtils .getTestDir(TestSCMPipelineManager.class.getSimpleName()); conf.set(HddsConfigKeys.OZONE_METADATA_DIRS, testDir.getAbsolutePath()); @@ -253,10 +256,10 @@ public void testPipelineCreationFailedMetric() throws Exception { pipelineManager.createPipeline(HddsProtos.ReplicationType.RATIS, HddsProtos.ReplicationFactor.THREE); Assert.fail(); - } catch (InsufficientDatanodesException idEx) { - Assert.assertEquals( - "Cannot create pipeline of factor 3 using 1 nodes.", - idEx.getMessage()); + } catch (SCMException ioe) { + // pipeline creation failed this time. + Assert.assertEquals(SCMException.ResultCodes.FAILED_TO_FIND_SUITABLE_NODE, + ioe.getResult()); } metrics = getMetrics( @@ -266,7 +269,7 @@ public void testPipelineCreationFailedMetric() throws Exception { numPipelineCreateFailed = getLongCounter( "NumPipelineCreationFailed", metrics); - Assert.assertTrue(numPipelineCreateFailed == 0); + Assert.assertTrue(numPipelineCreateFailed == 1); // clean up pipelineManager.close(); diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestOzoneFsHAURLs.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestOzoneFsHAURLs.java index 23d7833333c2..acc40317b0c2 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestOzoneFsHAURLs.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestOzoneFsHAURLs.java @@ -97,6 +97,7 @@ public void init() throws Exception { conf.setTimeDuration( OMConfigKeys.OZONE_OM_LEADER_ELECTION_MINIMUM_TIMEOUT_DURATION_KEY, LEADER_ELECTION_TIMEOUT, TimeUnit.MILLISECONDS); + conf.setInt(ScmConfigKeys.OZONE_DATANODE_MAX_PIPELINE_ENGAGEMENT, 3); OMStorage omStore = new OMStorage(conf); omStore.setClusterId(clusterId); @@ -106,6 +107,8 @@ public void init() throws Exception { // Start the cluster cluster = MiniOzoneCluster.newHABuilder(conf) + .setNumDatanodes(7) + .setTotalPipelineNumLimit(10) .setClusterId(clusterId) .setScmId(scmId) .setOMServiceId(omServiceId) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestPipelineClose.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestPipelineClose.java index 21fa7bdca8b2..aba9caed41d3 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestPipelineClose.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestPipelineClose.java @@ -170,6 +170,7 @@ public void testPipelineCloseWithPipelineAction() throws Exception { pipelineActionHandler .onMessage(pipelineActionsFromDatanode, new EventQueue()); Thread.sleep(5000); + OzoneContainer ozoneContainer = cluster.getHddsDatanodes().get(0).getDatanodeStateMachine() .getContainer(); diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestRatisPipelineCreateAndDestroy.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestRatisPipelineCreateAndDestroy.java index 3590e4380844..fc90ee9a9e01 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestRatisPipelineCreateAndDestroy.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestRatisPipelineCreateAndDestroy.java @@ -20,6 +20,7 @@ import org.apache.hadoop.hdds.HddsConfigKeys; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; +import org.apache.hadoop.hdds.scm.exceptions.SCMException; import org.apache.hadoop.hdds.scm.server.StorageContainerManager; import org.apache.hadoop.ozone.HddsDatanodeService; import org.apache.hadoop.ozone.MiniOzoneCluster; @@ -36,6 +37,7 @@ import java.util.concurrent.TimeoutException; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_PIPELINE_AUTO_CREATE_FACTOR_ONE; +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_DATANODE_MAX_PIPELINE_ENGAGEMENT; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_STALENODE_INTERVAL; /** @@ -51,9 +53,12 @@ public class TestRatisPipelineCreateAndDestroy { public void init(int numDatanodes) throws Exception { conf.set(HddsConfigKeys.OZONE_METADATA_DIRS, GenericTestUtils.getRandomizedTempPath()); + conf.setInt(OZONE_DATANODE_MAX_PIPELINE_ENGAGEMENT, 2); + cluster = MiniOzoneCluster.newBuilder(conf) .setNumDatanodes(numDatanodes) - .setHbInterval(1000) + .setTotalPipelineNumLimit(numDatanodes + numDatanodes/3) + .setHbInterval(2000) .setHbProcessorInterval(1000) .build(); cluster.waitForClusterToBeReady(); @@ -134,7 +139,9 @@ public void testPipelineCreationOnNodeRestart() throws Exception { } catch (IOException ioe) { // As now all datanodes are shutdown, they move to stale state, there // will be no sufficient datanodes to create the pipeline. - Assert.assertTrue(ioe instanceof InsufficientDatanodesException); + Assert.assertTrue(ioe instanceof SCMException); + Assert.assertEquals(SCMException.ResultCodes.FAILED_TO_FIND_SUITABLE_NODE, + ((SCMException) ioe).getResult()); } // make sure pipelines is destroyed @@ -147,9 +154,14 @@ public void testPipelineCreationOnNodeRestart() throws Exception { for (Pipeline pipeline : pipelines) { pipelineManager.finalizeAndDestroyPipeline(pipeline, false); } - // make sure pipelines is created after node start - pipelineManager.triggerPipelineCreation(); - waitForPipelines(1); + + if (cluster.getStorageContainerManager() + .getScmNodeManager().getNodeCount(HddsProtos.NodeState.HEALTHY) >= + HddsProtos.ReplicationFactor.THREE.getNumber()) { + // make sure pipelines is created after node start + pipelineManager.triggerPipelineCreation(); + waitForPipelines(1); + } } private void waitForPipelines(int numPipelines) @@ -157,6 +169,6 @@ private void waitForPipelines(int numPipelines) GenericTestUtils.waitFor(() -> pipelineManager .getPipelines(HddsProtos.ReplicationType.RATIS, HddsProtos.ReplicationFactor.THREE, Pipeline.PipelineState.OPEN) - .size() == numPipelines, 100, 40000); + .size() >= numPipelines, 100, 40000); } } diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestSCMRestart.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestSCMRestart.java index 459a67ae882a..baeee6a31fb8 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestSCMRestart.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestSCMRestart.java @@ -57,8 +57,11 @@ public class TestSCMRestart { @BeforeClass public static void init() throws Exception { conf = new OzoneConfiguration(); + int numOfNodes = 4; cluster = MiniOzoneCluster.newBuilder(conf) - .setNumDatanodes(4) + .setNumDatanodes(numOfNodes) + // allow only one FACTOR THREE pipeline. + .setTotalPipelineNumLimit(numOfNodes + 1) .setHbInterval(1000) .setHbProcessorInterval(1000) .build(); diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestSCMSafeModeWithPipelineRules.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestSCMSafeModeWithPipelineRules.java index 866d0b018bf0..4b35317f5259 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestSCMSafeModeWithPipelineRules.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestSCMSafeModeWithPipelineRules.java @@ -39,6 +39,7 @@ import java.util.List; import java.util.concurrent.TimeoutException; +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_DATANODE_MAX_PIPELINE_ENGAGEMENT; import static org.junit.Assert.fail; /** @@ -64,6 +65,8 @@ public void setup(int numDatanodes) throws Exception { true); conf.set(HddsConfigKeys.HDDS_SCM_WAIT_TIME_AFTER_SAFE_MODE_EXIT, "10s"); conf.set(ScmConfigKeys.OZONE_SCM_PIPELINE_CREATION_INTERVAL, "10s"); + conf.setInt(OZONE_DATANODE_MAX_PIPELINE_ENGAGEMENT, 50); + clusterBuilder = MiniOzoneCluster.newBuilder(conf) .setNumDatanodes(numDatanodes) .setHbInterval(1000) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneCluster.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneCluster.java index de27d5a17d3e..0042363d862b 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneCluster.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneCluster.java @@ -251,6 +251,7 @@ abstract class Builder { protected static final int DEFAULT_HB_INTERVAL_MS = 1000; protected static final int DEFAULT_HB_PROCESSOR_INTERVAL_MS = 100; protected static final int ACTIVE_OMS_NOT_SET = -1; + protected static final int DEFAULT_PIPELIME_LIMIT = 3; protected final OzoneConfiguration conf; protected String path; @@ -278,6 +279,7 @@ abstract class Builder { protected int numOfDatanodes = 3; protected boolean startDataNodes = true; protected CertificateClient certClient; + protected int pipelineNumLimit = DEFAULT_PIPELIME_LIMIT; protected Builder(OzoneConfiguration conf) { this.conf = conf; @@ -364,6 +366,16 @@ public Builder setNumDatanodes(int val) { return this; } + /** + * Sets the total number of pipelines to create. + * @param val number of pipelines + * @return MiniOzoneCluster.Builder + */ + public Builder setTotalPipelineNumLimit(int val) { + pipelineNumLimit = val; + return this; + } + /** * Sets the number of HeartBeat Interval of Datanodes, the value should be * in MilliSeconds. diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java index c2e196a9be8a..9bfa8bd7b442 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java @@ -575,6 +575,10 @@ protected void initializeConfiguration() throws IOException { streamBufferMaxSize.get(), streamBufferSizeUnit.get()); conf.setStorageSize(OzoneConfigKeys.OZONE_SCM_BLOCK_SIZE, blockSize.get(), streamBufferSizeUnit.get()); + // MiniOzoneCluster should have global pipeline upper limit. + conf.setInt(ScmConfigKeys.OZONE_SCM_PIPELINE_NUMBER_LIMIT, + pipelineNumLimit == DEFAULT_PIPELIME_LIMIT ? + 2 * numOfDatanodes : pipelineNumLimit); configureTrace(); } diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/Test2WayCommitInRatis.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/Test2WayCommitInRatis.java index c8bf36bd06dd..64ded12aabc2 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/Test2WayCommitInRatis.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/Test2WayCommitInRatis.java @@ -83,6 +83,7 @@ private void startCluster(OzoneConfiguration conf) throws Exception { conf.setQuietMode(false); cluster = MiniOzoneCluster.newBuilder(conf) .setNumDatanodes(7) + .setTotalPipelineNumLimit(10) .setBlockSize(blockSize) .setChunkSize(chunkSize) .setStreamBufferFlushSize(flushSize) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestBlockOutputStream.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestBlockOutputStream.java index 96226d86e07e..fa7783c93c15 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestBlockOutputStream.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestBlockOutputStream.java @@ -84,6 +84,7 @@ public static void init() throws Exception { StorageUnit.MB); cluster = MiniOzoneCluster.newBuilder(conf) .setNumDatanodes(7) + .setTotalPipelineNumLimit(10) .setBlockSize(blockSize) .setChunkSize(chunkSize) .setStreamBufferFlushSize(flushSize) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestBlockOutputStreamWithFailures.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestBlockOutputStreamWithFailures.java index e236b8539abc..07e306ee4f49 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestBlockOutputStreamWithFailures.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestBlockOutputStreamWithFailures.java @@ -21,6 +21,7 @@ import org.apache.hadoop.hdds.client.ReplicationType; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; +import org.apache.hadoop.hdds.scm.ScmConfigKeys; import org.apache.hadoop.hdds.scm.XceiverClientManager; import org.apache.hadoop.hdds.scm.XceiverClientMetrics; import org.apache.hadoop.hdds.scm.XceiverClientRatis; @@ -91,9 +92,11 @@ public void init() throws Exception { conf.setQuietMode(false); conf.setStorageSize(OzoneConfigKeys.OZONE_SCM_BLOCK_SIZE, 4, StorageUnit.MB); + conf.setInt(ScmConfigKeys.OZONE_DATANODE_MAX_PIPELINE_ENGAGEMENT, 3); + cluster = MiniOzoneCluster.newBuilder(conf).setNumDatanodes(7) - .setBlockSize(blockSize).setChunkSize(chunkSize) - .setStreamBufferFlushSize(flushSize) + .setTotalPipelineNumLimit(10).setBlockSize(blockSize) + .setChunkSize(chunkSize).setStreamBufferFlushSize(flushSize) .setStreamBufferMaxSize(maxFlushSize) .setStreamBufferSizeUnit(StorageUnit.BYTES).build(); cluster.waitForClusterToBeReady(); diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestCommitWatcher.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestCommitWatcher.java index 4a5f5289ad80..16f50c67b596 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestCommitWatcher.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestCommitWatcher.java @@ -97,6 +97,7 @@ public static void init() throws Exception { StorageUnit.MB); cluster = MiniOzoneCluster.newBuilder(conf) .setNumDatanodes(7) + .setTotalPipelineNumLimit(10) .setBlockSize(blockSize) .setChunkSize(chunkSize) .setStreamBufferFlushSize(flushSize) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestContainerReplicationEndToEnd.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestContainerReplicationEndToEnd.java index 36f720b51322..439287e726d8 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestContainerReplicationEndToEnd.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestContainerReplicationEndToEnd.java @@ -57,6 +57,7 @@ import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_CONTAINER_REPORT_INTERVAL; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_PIPELINE_DESTROY_TIMEOUT; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_STALENODE_INTERVAL; +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_DATANODE_MAX_PIPELINE_ENGAGEMENT; /** * Tests delete key operation with a slow follower in the datanode @@ -107,10 +108,12 @@ public static void init() throws Exception { 1000, TimeUnit.SECONDS); conf.setLong("hdds.scm.replication.thread.interval", containerReportInterval); + conf.setInt(OZONE_DATANODE_MAX_PIPELINE_ENGAGEMENT, 2); conf.setQuietMode(false); cluster = - MiniOzoneCluster.newBuilder(conf).setNumDatanodes(4).setHbInterval(200) + MiniOzoneCluster.newBuilder(conf).setNumDatanodes(4) + .setTotalPipelineNumLimit(6).setHbInterval(200) .build(); cluster.waitForClusterToBeReady(); cluster.getStorageContainerManager().getReplicationManager().start(); diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestContainerStateMachine.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestContainerStateMachine.java index 6bef0600e719..ba5ed9f03d59 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestContainerStateMachine.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestContainerStateMachine.java @@ -53,8 +53,7 @@ import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_BLOCK_TOKEN_ENABLED; import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_COMMAND_STATUS_REPORT_INTERVAL; import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_CONTAINER_REPORT_INTERVAL; -import static org.apache.hadoop.hdds.scm.ScmConfigKeys.HDDS_SCM_WATCHER_TIMEOUT; -import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_STALENODE_INTERVAL; +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.*; /** * Tests the containerStateMachine failure handling. @@ -83,7 +82,7 @@ public void setup() throws Exception { baseDir.mkdirs(); conf.setBoolean(HDDS_BLOCK_TOKEN_ENABLED, true); - // conf.setBoolean(OZONE_SECURITY_ENABLED_KEY, true); + // conf.setBoolean(OZONE_SECURITY_ENABLED_KEY, true); conf.setTimeDuration(HDDS_CONTAINER_REPORT_INTERVAL, 200, TimeUnit.MILLISECONDS); conf.setTimeDuration(HDDS_COMMAND_STATUS_REPORT_INTERVAL, 200, diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestDeleteWithSlowFollower.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestDeleteWithSlowFollower.java index cf96a7486d8d..da2d656acaad 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestDeleteWithSlowFollower.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestDeleteWithSlowFollower.java @@ -83,6 +83,7 @@ public class TestDeleteWithSlowFollower { private static String bucketName; private static String path; private static XceiverClientManager xceiverClientManager; + private static final int FACTOR_THREE_PIPELINE_COUNT = 1; /** * Create a MiniDFSCluster for testing. @@ -120,10 +121,13 @@ public static void init() throws Exception { 1000, TimeUnit.SECONDS); conf.setTimeDuration(OzoneConfigKeys.OZONE_BLOCK_DELETING_SERVICE_INTERVAL, 1, TimeUnit.SECONDS); - conf.setQuietMode(false); - cluster = - MiniOzoneCluster.newBuilder(conf).setNumDatanodes(3).setHbInterval(100) + int numOfDatanodes = 3; + cluster = MiniOzoneCluster.newBuilder(conf) + .setNumDatanodes(numOfDatanodes) + .setTotalPipelineNumLimit( + numOfDatanodes + FACTOR_THREE_PIPELINE_COUNT) + .setHbInterval(100) .build(); cluster.waitForClusterToBeReady(); //the easiest way to create an open container is creating a key @@ -185,7 +189,7 @@ public void testDeleteKeyWithSlowFollower() throws Exception { cluster.getStorageContainerManager().getPipelineManager() .getPipelines(HddsProtos.ReplicationType.RATIS, HddsProtos.ReplicationFactor.THREE); - Assert.assertTrue(pipelineList.size() == 1); + Assert.assertTrue(pipelineList.size() >= FACTOR_THREE_PIPELINE_COUNT); Pipeline pipeline = pipelineList.get(0); for (HddsDatanodeService dn : cluster.getHddsDatanodes()) { if (ContainerTestHelper.isRatisFollower(dn, pipeline)) { diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestFailureHandlingByClient.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestFailureHandlingByClient.java index 21b51e755482..d4e5d7d7f052 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestFailureHandlingByClient.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestFailureHandlingByClient.java @@ -23,6 +23,7 @@ import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; +import org.apache.hadoop.hdds.scm.ScmConfigKeys; import org.apache.hadoop.hdds.scm.container.ContainerID; import org.apache.hadoop.hdds.scm.container.ContainerInfo; import org.apache.hadoop.hdds.scm.pipeline.Pipeline; @@ -98,6 +99,7 @@ private void init() throws Exception { 1, TimeUnit.SECONDS); conf.setBoolean( OzoneConfigKeys.OZONE_NETWORK_TOPOLOGY_AWARE_READ_KEY, true); + conf.setInt(ScmConfigKeys.OZONE_DATANODE_MAX_PIPELINE_ENGAGEMENT, 2); conf.setQuietMode(false); conf.setClass(NET_TOPOLOGY_NODE_SWITCH_MAPPING_IMPL_KEY, @@ -106,7 +108,7 @@ private void init() throws Exception { Collections.singleton(HddsUtils.getHostName(conf))).get(0), "/rack1"); cluster = MiniOzoneCluster.newBuilder(conf) - .setNumDatanodes(10).build(); + .setNumDatanodes(10).setTotalPipelineNumLimit(15).build(); cluster.waitForClusterToBeReady(); //the easiest way to create an open container is creating a key client = OzoneClientFactory.getClient(conf); diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestHybridPipelineOnDatanode.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestHybridPipelineOnDatanode.java index 47a716e85ca2..75af061fe984 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestHybridPipelineOnDatanode.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestHybridPipelineOnDatanode.java @@ -67,7 +67,8 @@ public class TestHybridPipelineOnDatanode { @BeforeClass public static void init() throws Exception { conf = new OzoneConfiguration(); - cluster = MiniOzoneCluster.newBuilder(conf).setNumDatanodes(3).build(); + cluster = MiniOzoneCluster.newBuilder(conf).setNumDatanodes(3) + .setTotalPipelineNumLimit(5).build(); cluster.waitForClusterToBeReady(); //the easiest way to create an open container is creating a key client = OzoneClientFactory.getClient(conf); diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestKeyInputStream.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestKeyInputStream.java index bb7b6f0374a0..589208346061 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestKeyInputStream.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestKeyInputStream.java @@ -82,6 +82,7 @@ public static void init() throws Exception { StorageUnit.MB); cluster = MiniOzoneCluster.newBuilder(conf) .setNumDatanodes(3) + .setTotalPipelineNumLimit(5) .setBlockSize(blockSize) .setChunkSize(chunkSize) .setStreamBufferFlushSize(flushSize) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestMultiBlockWritesWithDnFailures.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestMultiBlockWritesWithDnFailures.java index 281ad4ab297b..9b6292329978 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestMultiBlockWritesWithDnFailures.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestMultiBlockWritesWithDnFailures.java @@ -49,8 +49,7 @@ import java.util.UUID; import java.util.concurrent.TimeUnit; -import static org.apache.hadoop.hdds.scm.ScmConfigKeys.HDDS_SCM_WATCHER_TIMEOUT; -import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_STALENODE_INTERVAL; +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.*; /** * Tests MultiBlock Writes with Dn failures by Ozone Client. @@ -88,10 +87,13 @@ private void startCluster(int datanodes) throws Exception { conf.setTimeDuration( OzoneConfigKeys.DFS_RATIS_LEADER_ELECTION_MINIMUM_TIMEOUT_DURATION_KEY, 1, TimeUnit.SECONDS); + conf.setInt(OZONE_DATANODE_MAX_PIPELINE_ENGAGEMENT, 2); conf.setQuietMode(false); cluster = MiniOzoneCluster.newBuilder(conf) - .setNumDatanodes(datanodes).build(); + .setNumDatanodes(datanodes) + .setTotalPipelineNumLimit(0) + .build(); cluster.waitForClusterToBeReady(); //the easiest way to create an open container is creating a key client = OzoneClientFactory.getClient(conf); diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestOzoneClientRetriesOnException.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestOzoneClientRetriesOnException.java index 5aefcc8f3f52..1bf2ea3ff91e 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestOzoneClientRetriesOnException.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestOzoneClientRetriesOnException.java @@ -93,6 +93,7 @@ public void init() throws Exception { conf.setQuietMode(false); cluster = MiniOzoneCluster.newBuilder(conf) .setNumDatanodes(7) + .setTotalPipelineNumLimit(10) .setBlockSize(blockSize) .setChunkSize(chunkSize) .setStreamBufferFlushSize(flushSize) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestOzoneRpcClientAbstract.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestOzoneRpcClientAbstract.java index c188e80758d7..03f98b631aa1 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestOzoneRpcClientAbstract.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestOzoneRpcClientAbstract.java @@ -165,6 +165,7 @@ public abstract class TestOzoneRpcClientAbstract { static void startCluster(OzoneConfiguration conf) throws Exception { cluster = MiniOzoneCluster.newBuilder(conf) .setNumDatanodes(3) + .setTotalPipelineNumLimit(10) .setScmId(scmId) .build(); cluster.waitForClusterToBeReady(); diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestWatchForCommit.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestWatchForCommit.java index d2007ceb910e..b84e61cf8b63 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestWatchForCommit.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestWatchForCommit.java @@ -58,6 +58,7 @@ import static java.nio.charset.StandardCharsets.UTF_8; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.HDDS_SCM_WATCHER_TIMEOUT; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_STALENODE_INTERVAL; +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_DATANODE_MAX_PIPELINE_ENGAGEMENT; /** * This class verifies the watchForCommit Handling by xceiverClient. @@ -95,10 +96,12 @@ private void startCluster(OzoneConfiguration conf) throws Exception { conf.setTimeDuration( OzoneConfigKeys.DFS_RATIS_CLIENT_REQUEST_RETRY_INTERVAL_KEY, 1, TimeUnit.SECONDS); + conf.setInt(OZONE_DATANODE_MAX_PIPELINE_ENGAGEMENT, 5); conf.setQuietMode(false); cluster = MiniOzoneCluster.newBuilder(conf) .setNumDatanodes(7) + .setTotalPipelineNumLimit(10) .setBlockSize(blockSize) .setChunkSize(chunkSize) .setStreamBufferFlushSize(flushSize) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestCloseContainerByPipeline.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestCloseContainerByPipeline.java index c65ce954a542..8ee47a9f1425 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestCloseContainerByPipeline.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestCloseContainerByPipeline.java @@ -53,6 +53,8 @@ import java.util.List; import java.util.concurrent.TimeoutException; +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_DATANODE_MAX_PIPELINE_ENGAGEMENT; + /** * Test container closing. */ @@ -75,8 +77,11 @@ public class TestCloseContainerByPipeline { public static void init() throws Exception { conf = new OzoneConfiguration(); conf.set(ScmConfigKeys.OZONE_SCM_PIPELINE_OWNER_CONTAINER_COUNT, "1"); + conf.setInt(OZONE_DATANODE_MAX_PIPELINE_ENGAGEMENT, 2); + cluster = MiniOzoneCluster.newBuilder(conf) .setNumDatanodes(10) + .setTotalPipelineNumLimit(15) .build(); cluster.waitForClusterToBeReady(); //the easiest way to create an open container is creating a key diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/freon/TestDataValidate.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/freon/TestDataValidate.java index 7857e1f9599f..0c875c983d35 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/freon/TestDataValidate.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/freon/TestDataValidate.java @@ -40,7 +40,7 @@ public abstract class TestDataValidate { */ static void startCluster(OzoneConfiguration conf) throws Exception { cluster = MiniOzoneCluster.newBuilder(conf) - .setNumDatanodes(5).build(); + .setNumDatanodes(5).setTotalPipelineNumLimit(8).build(); cluster.waitForClusterToBeReady(); } diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/freon/TestFreonWithPipelineDestroy.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/freon/TestFreonWithPipelineDestroy.java index 80ef246f12b1..5150fd4d8f2f 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/freon/TestFreonWithPipelineDestroy.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/freon/TestFreonWithPipelineDestroy.java @@ -55,6 +55,7 @@ public static void init() throws Exception { .setHbProcessorInterval(1000) .setHbInterval(1000) .setNumDatanodes(3) + .setTotalPipelineNumLimit(8) .build(); cluster.waitForClusterToBeReady(); } diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/TestSCMContainerPlacementPolicyMetrics.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/TestSCMContainerPlacementPolicyMetrics.java index 1f9d9fbf01ba..4025acac439f 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/TestSCMContainerPlacementPolicyMetrics.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/TestSCMContainerPlacementPolicyMetrics.java @@ -85,6 +85,7 @@ public void setup() throws Exception { "/rack1"); cluster = MiniOzoneCluster.newBuilder(conf) .setNumDatanodes(4) + .setTotalPipelineNumLimit(10) .build(); cluster.waitForClusterToBeReady(); metrics = getMetrics(SCMContainerPlacementMetrics.class.getSimpleName()); diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/node/TestQueryNode.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/node/TestQueryNode.java index 841fd85f3666..1ca3110e826b 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/node/TestQueryNode.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/node/TestQueryNode.java @@ -16,6 +16,7 @@ */ package org.apache.hadoop.ozone.scm.node; +import org.apache.hadoop.hdds.scm.ScmConfigKeys; import org.apache.hadoop.ozone.MiniOzoneCluster; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; @@ -78,9 +79,11 @@ public void setUp() throws Exception { conf.setTimeDuration(HDDS_NODE_REPORT_INTERVAL, 1, SECONDS); conf.setTimeDuration(OZONE_SCM_STALENODE_INTERVAL, 3, SECONDS); conf.setTimeDuration(OZONE_SCM_DEADNODE_INTERVAL, 6, SECONDS); + conf.setInt(ScmConfigKeys.OZONE_DATANODE_MAX_PIPELINE_ENGAGEMENT, 3); cluster = MiniOzoneCluster.newBuilder(conf) .setNumDatanodes(numOfDatanodes) + .setTotalPipelineNumLimit(numOfDatanodes + numOfDatanodes/2) .build(); cluster.waitForClusterToBeReady(); scmClient = new ContainerOperationClient(conf); From e1b168f543b5b31d321523d6d5555760088ebf66 Mon Sep 17 00:00:00 2001 From: Li Cheng Date: Wed, 27 Nov 2019 20:18:50 +0800 Subject: [PATCH 05/19] HDDS-1572 Implement a Pipeline scrubber to clean up non-OPEN pipeline. (#237) --- .../apache/hadoop/hdds/scm/ScmConfigKeys.java | 7 +++ .../hadoop/hdds/scm/pipeline/Pipeline.java | 31 ++++++++++++- hadoop-hdds/common/src/main/proto/hdds.proto | 1 + .../src/main/resources/ozone-default.xml | 12 ++++++ .../pipeline/BackgroundPipelineCreator.java | 8 +++- .../hdds/scm/pipeline/PipelineManager.java | 3 ++ .../hdds/scm/pipeline/SCMPipelineManager.java | 26 +++++++++++ .../pipeline/MockRatisPipelineProvider.java | 28 ++++++++++++ .../pipeline/TestRatisPipelineProvider.java | 10 ++++- .../scm/pipeline/TestSCMPipelineManager.java | 43 ++++++++++++++++++- 10 files changed, 165 insertions(+), 4 deletions(-) diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java index 15a47f348784..5f52e922acec 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java @@ -310,6 +310,13 @@ public final class ScmConfigKeys { OZONE_SCM_KEY_VALUE_CONTAINER_DELETION_CHOOSING_POLICY = "ozone.scm.keyvalue.container.deletion-choosing.policy"; + // Max timeout for pipeline to stay at ALLOCATED state before scrubbed. + public static final String OZONE_SCM_PIPELINE_ALLOCATED_TIMEOUT = + "ozone.scm.pipeline.allocated.timeout"; + + public static final String OZONE_SCM_PIPELINE_ALLOCATED_TIMEOUT_DEFAULT = + "5m"; + public static final String OZONE_SCM_CONTAINER_CREATION_LEASE_TIMEOUT = "ozone.scm.container.creation.lease.timeout"; diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/pipeline/Pipeline.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/pipeline/Pipeline.java index 594fcf738e11..295156d50fc4 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/pipeline/Pipeline.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/pipeline/Pipeline.java @@ -56,6 +56,8 @@ public final class Pipeline { private ThreadLocal> nodesInOrder = new ThreadLocal<>(); // Current reported Leader for the pipeline private UUID leaderId; + // Timestamp for pipeline upon creation + private Long creationTimestamp; /** * The immutable properties of pipeline object is used in @@ -70,6 +72,7 @@ private Pipeline(PipelineID id, ReplicationType type, this.factor = factor; this.state = state; this.nodeStatus = nodeStatus; + this.creationTimestamp = System.currentTimeMillis(); } /** @@ -108,6 +111,24 @@ public PipelineState getPipelineState() { return state; } + /** + * Return the creation time of pipeline. + * + * @return Creation Timestamp + */ + public Long getCreationTimestamp() { + return creationTimestamp; + } + + /** + * Set the creation timestamp. Only for protobuf now. + * + * @param creationTimestamp + */ + void setCreationTimestamp(Long creationTimestamp) { + this.creationTimestamp = creationTimestamp; + } + /** * Return the pipeline leader's UUID. * @@ -221,6 +242,7 @@ public HddsProtos.Pipeline getProtobufMessage() .setFactor(factor) .setState(PipelineState.getProtobuf(state)) .setLeaderID(leaderId != null ? leaderId.toString() : "") + .setCreationTimeStamp(creationTimestamp) .addAllMembers(nodeStatus.keySet().stream() .map(DatanodeDetails::getProtoBufMessage) .collect(Collectors.toList())); @@ -299,7 +321,8 @@ public String toString() { b.append(", Factor:").append(getFactor()); b.append(", State:").append(getPipelineState()); b.append(", leaderId:").append(getLeaderId()); - b.append(" ]"); + b.append(", CreationTimestamp").append(getCreationTimestamp()); + b.append("]"); return b.toString(); } @@ -323,6 +346,7 @@ public static class Builder { private List nodeOrder = null; private List nodesInOrder = null; private UUID leaderId = null; + private Long creationTimestamp = null; public Builder() {} @@ -334,6 +358,7 @@ public Builder(Pipeline pipeline) { this.nodeStatus = pipeline.nodeStatus; this.nodesInOrder = pipeline.nodesInOrder.get(); this.leaderId = pipeline.getLeaderId(); + this.creationTimestamp = pipeline.getCreationTimestamp(); } public Builder setId(PipelineID id1) { @@ -380,6 +405,10 @@ public Pipeline build() { Preconditions.checkNotNull(nodeStatus); Pipeline pipeline = new Pipeline(id, type, factor, state, nodeStatus); pipeline.setLeaderId(leaderId); + // overwrite with original creationTimestamp + if (creationTimestamp != null) { + pipeline.setCreationTimestamp(creationTimestamp); + } if (nodeOrder != null && !nodeOrder.isEmpty()) { // This branch is for build from ProtoBuf diff --git a/hadoop-hdds/common/src/main/proto/hdds.proto b/hadoop-hdds/common/src/main/proto/hdds.proto index 8da35760180d..c78175cb4ea1 100644 --- a/hadoop-hdds/common/src/main/proto/hdds.proto +++ b/hadoop-hdds/common/src/main/proto/hdds.proto @@ -75,6 +75,7 @@ message Pipeline { required PipelineID id = 5; optional string leaderID = 6; repeated uint32 memberOrders = 7; + optional uint64 creationTimeStamp = 8; } message KeyValue { diff --git a/hadoop-hdds/common/src/main/resources/ozone-default.xml b/hadoop-hdds/common/src/main/resources/ozone-default.xml index 8bc2ea177734..93ef0a50436a 100644 --- a/hadoop-hdds/common/src/main/resources/ozone-default.xml +++ b/hadoop-hdds/common/src/main/resources/ozone-default.xml @@ -806,6 +806,18 @@ of max amount of pipelines which are OPEN. + + ozone.scm.pipeline.allocated.timeout + 5m + OZONE, SCM, PIPELINE + + Timeout for every pipeline to stay in ALLOCATED stage. When pipeline is created, + it should be at OPEN stage once pipeline report is successfully received by SCM. + If a pipeline stays at ALLOCATED for too long, it should be scrubbed so that new + pipeline can be created. This timeout is for how long pipeline can stay at ALLOCATED + stage until it gets scrubbed. + + ozone.scm.container.size 5GB diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/BackgroundPipelineCreator.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/BackgroundPipelineCreator.java index b663f2aa1bf2..8e4ec6a00f8f 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/BackgroundPipelineCreator.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/BackgroundPipelineCreator.java @@ -110,12 +110,18 @@ private void createPipelines() { // Skip this iteration for creating pipeline continue; } + + try { + pipelineManager.scrubPipeline(type, factor); + } catch (IOException e) { + LOG.error("Error while scrubbing pipelines {}", e); + } + while (true) { try { if (scheduler.isClosed()) { break; } - pipelineManager.createPipeline(type, factor); } catch (IOException ioe) { break; diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelineManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelineManager.java index 08552957789f..635e032c4764 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelineManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelineManager.java @@ -77,6 +77,9 @@ NavigableSet getContainersInPipeline(PipelineID pipelineID) void finalizeAndDestroyPipeline(Pipeline pipeline, boolean onTimeout) throws IOException; + void scrubPipeline(ReplicationType type, ReplicationFactor factor) + throws IOException; + void startPipelineCreator(); void triggerPipelineCreation(); diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/SCMPipelineManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/SCMPipelineManager.java index 035d60405c1b..01af465be7f3 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/SCMPipelineManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/SCMPipelineManager.java @@ -54,6 +54,7 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.locks.ReadWriteLock; import java.util.concurrent.locks.ReentrantReadWriteLock; +import java.util.stream.Collectors; import static org.apache.hadoop.ozone.OzoneConsts.SCM_PIPELINE_DB; @@ -362,6 +363,31 @@ public void finalizeAndDestroyPipeline(Pipeline pipeline, boolean onTimeout) } } + @Override + public void scrubPipeline(ReplicationType type, ReplicationFactor factor) + throws IOException{ + if (type != ReplicationType.RATIS || factor != ReplicationFactor.THREE) { + // Only srub pipeline for RATIS THREE pipeline + return; + } + Long currentTime = System.currentTimeMillis(); + Long pipelineScrubTimeoutInMills = conf.getTimeDuration( + ScmConfigKeys.OZONE_SCM_PIPELINE_ALLOCATED_TIMEOUT, + ScmConfigKeys.OZONE_SCM_PIPELINE_ALLOCATED_TIMEOUT_DEFAULT, + TimeUnit.MILLISECONDS); + List needToSrubPipelines = stateManager.getPipelines(type, factor, + Pipeline.PipelineState.ALLOCATED).stream() + .filter(p -> (currentTime - p.getCreationTimestamp() + >= pipelineScrubTimeoutInMills)) + .collect(Collectors.toList()); + for (Pipeline p : needToSrubPipelines) { + LOG.info("srubbing pipeline: id: " + p.getId().toString() + + " since it stays at ALLOCATED stage for " + + (currentTime - p.getCreationTimestamp())/60000 + " mins."); + finalizeAndDestroyPipeline(p, false); + } + } + @Override public Map getPipelineInfo() { final Map pipelineInfo = new HashMap<>(); diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/MockRatisPipelineProvider.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/MockRatisPipelineProvider.java index 25b0adc32651..7513cada58ff 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/MockRatisPipelineProvider.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/MockRatisPipelineProvider.java @@ -33,6 +33,15 @@ */ public class MockRatisPipelineProvider extends RatisPipelineProvider { + private boolean autoOpenPipeline; + + public MockRatisPipelineProvider(NodeManager nodeManager, + PipelineStateManager stateManager, + Configuration conf, boolean autoOpen) { + super(nodeManager, stateManager, conf, null); + autoOpenPipeline = autoOpen; + } + public MockRatisPipelineProvider(NodeManager nodeManager, PipelineStateManager stateManager, Configuration conf) { @@ -43,12 +52,31 @@ public MockRatisPipelineProvider(NodeManager nodeManager, PipelineStateManager stateManager, Configuration conf, EventPublisher eventPublisher) { super(nodeManager, stateManager, conf, eventPublisher); + autoOpenPipeline = true; } protected void initializePipeline(Pipeline pipeline) throws IOException { // do nothing as the datanodes do not exists } + @Override + public Pipeline create(HddsProtos.ReplicationFactor factor) + throws IOException { + if (autoOpenPipeline) { + return super.create(factor); + } else { + Pipeline initialPipeline = super.create(factor); + return Pipeline.newBuilder() + .setId(initialPipeline.getId()) + // overwrite pipeline state to main ALLOCATED + .setState(Pipeline.PipelineState.ALLOCATED) + .setType(initialPipeline.getType()) + .setFactor(factor) + .setNodes(initialPipeline.getNodes()) + .build(); + } + } + @Override public void shutdown() { // Do nothing. diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestRatisPipelineProvider.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestRatisPipelineProvider.java index f5e3f842448c..66991e4f2ee2 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestRatisPipelineProvider.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestRatisPipelineProvider.java @@ -22,6 +22,7 @@ import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.MockDatanodeDetails; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; +import org.apache.hadoop.hdds.scm.ScmConfigKeys; import org.apache.hadoop.hdds.scm.container.MockNodeManager; import org.apache.hadoop.hdds.scm.node.NodeManager; import org.junit.Assume; @@ -53,9 +54,11 @@ public class TestRatisPipelineProvider { @Before public void init() throws Exception { nodeManager = new MockNodeManager(true, 10); + OzoneConfiguration conf = new OzoneConfiguration(); + conf.setInt(ScmConfigKeys.OZONE_DATANODE_MAX_PIPELINE_ENGAGEMENT, 1); stateManager = new PipelineStateManager(); provider = new MockRatisPipelineProvider(nodeManager, - stateManager, new OzoneConfiguration()); + stateManager, conf); } private void createPipelineAndAssertions( @@ -64,6 +67,7 @@ private void createPipelineAndAssertions( assertPipelineProperties(pipeline, factor, REPLICATION_TYPE, Pipeline.PipelineState.ALLOCATED); stateManager.addPipeline(pipeline); + nodeManager.addPipeline(pipeline); Pipeline pipeline1 = provider.create(factor); assertPipelineProperties(pipeline1, factor, REPLICATION_TYPE, @@ -149,6 +153,9 @@ public void testCreatePipelinesDnExclude() throws IOException { Pipeline pipeline = provider.create(factor); assertPipelineProperties(pipeline, factor, REPLICATION_TYPE, Pipeline.PipelineState.ALLOCATED); + nodeManager.addPipeline(pipeline); + stateManager.addPipeline(pipeline); + List nodes = pipeline.getNodes(); @@ -184,5 +191,6 @@ private void addPipeline( .build(); stateManager.addPipeline(openPipeline); + nodeManager.addPipeline(openPipeline); } } \ No newline at end of file diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestSCMPipelineManager.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestSCMPipelineManager.java index 08f51858720a..2df851d01201 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestSCMPipelineManager.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestSCMPipelineManager.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hdds.scm.pipeline; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_DATANODE_MAX_PIPELINE_ENGAGEMENT; +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_PIPELINE_ALLOCATED_TIMEOUT; import static org.apache.hadoop.test.MetricsAsserts.getLongCounter; import static org.apache.hadoop.test.MetricsAsserts.getMetrics; @@ -28,6 +29,7 @@ import java.util.HashSet; import java.util.List; import java.util.Set; +import java.util.concurrent.TimeUnit; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileUtil; @@ -270,7 +272,7 @@ public void testPipelineCreationFailedMetric() throws Exception { numPipelineCreateFailed = getLongCounter( "NumPipelineCreationFailed", metrics); Assert.assertTrue(numPipelineCreateFailed == 1); - + // clean up pipelineManager.close(); } @@ -374,6 +376,45 @@ public void testPipelineOpenOnlyWhenLeaderReported() throws Exception { pipelineManager.close(); } + @Test + public void testScrubPipeline() throws IOException { + // No timeout for pipeline scrubber. + conf.setTimeDuration( + OZONE_SCM_PIPELINE_ALLOCATED_TIMEOUT, -1, + TimeUnit.MILLISECONDS); + + final SCMPipelineManager pipelineManager = + new SCMPipelineManager(conf, nodeManager, new EventQueue(), null); + final PipelineProvider ratisProvider = new MockRatisPipelineProvider( + nodeManager, pipelineManager.getStateManager(), conf, false); + + pipelineManager.setPipelineProvider(HddsProtos.ReplicationType.RATIS, + ratisProvider); + + Pipeline pipeline = pipelineManager + .createPipeline(HddsProtos.ReplicationType.RATIS, + HddsProtos.ReplicationFactor.THREE); + // At this point, pipeline is not at OPEN stage. + Assert.assertEquals(pipeline.getPipelineState(), + Pipeline.PipelineState.ALLOCATED); + + // pipeline should be seen in pipelineManager as ALLOCATED. + Assert.assertTrue(pipelineManager + .getPipelines(HddsProtos.ReplicationType.RATIS, + HddsProtos.ReplicationFactor.THREE, + Pipeline.PipelineState.ALLOCATED).contains(pipeline)); + pipelineManager.scrubPipeline(HddsProtos.ReplicationType.RATIS, + HddsProtos.ReplicationFactor.THREE); + + // pipeline should be scrubbed. + Assert.assertFalse(pipelineManager + .getPipelines(HddsProtos.ReplicationType.RATIS, + HddsProtos.ReplicationFactor.THREE, + Pipeline.PipelineState.ALLOCATED).contains(pipeline)); + + pipelineManager.close(); + } + private void sendPipelineReport(DatanodeDetails dn, Pipeline pipeline, PipelineReportHandler pipelineReportHandler, boolean isLeader, EventQueue eventQueue) { From 41a30caf74752e2b42a68a047bd970a4e6f278b6 Mon Sep 17 00:00:00 2001 From: Sammi Chen Date: Tue, 3 Dec 2019 16:32:08 +0800 Subject: [PATCH 06/19] Rebase Fix --- .../hadoop/hdds/scm/pipeline/MockRatisPipelineProvider.java | 6 +++--- .../hadoop/hdds/scm/pipeline/TestSCMPipelineManager.java | 6 ++++-- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/MockRatisPipelineProvider.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/MockRatisPipelineProvider.java index 7513cada58ff..ff5247027259 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/MockRatisPipelineProvider.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/MockRatisPipelineProvider.java @@ -36,9 +36,9 @@ public class MockRatisPipelineProvider extends RatisPipelineProvider { private boolean autoOpenPipeline; public MockRatisPipelineProvider(NodeManager nodeManager, - PipelineStateManager stateManager, - Configuration conf, boolean autoOpen) { - super(nodeManager, stateManager, conf, null); + PipelineStateManager stateManager, Configuration conf, + EventPublisher eventPublisher, boolean autoOpen) { + super(nodeManager, stateManager, conf, eventPublisher); autoOpenPipeline = autoOpen; } diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestSCMPipelineManager.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestSCMPipelineManager.java index 2df851d01201..491e2893cf3d 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestSCMPipelineManager.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestSCMPipelineManager.java @@ -383,10 +383,12 @@ public void testScrubPipeline() throws IOException { OZONE_SCM_PIPELINE_ALLOCATED_TIMEOUT, -1, TimeUnit.MILLISECONDS); + EventQueue eventQueue = new EventQueue(); final SCMPipelineManager pipelineManager = - new SCMPipelineManager(conf, nodeManager, new EventQueue(), null); + new SCMPipelineManager(conf, nodeManager, eventQueue); final PipelineProvider ratisProvider = new MockRatisPipelineProvider( - nodeManager, pipelineManager.getStateManager(), conf, false); + nodeManager, pipelineManager.getStateManager(), conf, eventQueue, + false); pipelineManager.setPipelineProvider(HddsProtos.ReplicationType.RATIS, ratisProvider); From 90f794f9b683fb3b62c6e73e8f5b91b477a2809f Mon Sep 17 00:00:00 2001 From: Li Cheng Date: Thu, 12 Dec 2019 14:11:00 +0800 Subject: [PATCH 07/19] HDDS-2650 Fix createPipeline CLI. (#340) --- .../hdds/scm/cli/pipeline/CreatePipelineSubcommand.java | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/pipeline/CreatePipelineSubcommand.java b/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/pipeline/CreatePipelineSubcommand.java index edeb786726a9..58a177865204 100644 --- a/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/pipeline/CreatePipelineSubcommand.java +++ b/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/pipeline/CreatePipelineSubcommand.java @@ -20,7 +20,6 @@ import org.apache.hadoop.hdds.cli.HddsVersionProvider; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; -import org.apache.hadoop.hdds.scm.cli.SCMCLI; import org.apache.hadoop.hdds.scm.client.ScmClient; import picocli.CommandLine; @@ -30,13 +29,13 @@ * Handler of createPipeline command. */ @CommandLine.Command( - name = "createPipeline", + name = "create", description = "create pipeline", mixinStandardHelpOptions = true, versionProvider = HddsVersionProvider.class) public class CreatePipelineSubcommand implements Callable { @CommandLine.ParentCommand - private SCMCLI parent; + private PipelineCommands parent; @CommandLine.Option( names = {"-t", "--replicationType"}, @@ -60,7 +59,7 @@ public Void call() throws Exception { throw new IllegalArgumentException(type.name() + " is not supported yet."); } - try (ScmClient scmClient = parent.createScmClient()) { + try (ScmClient scmClient = parent.getParent().createScmClient()) { scmClient.createReplicationPipeline( type, factor, From e720e7ad8af5428df9e363d24c2afb5a2b80f3e2 Mon Sep 17 00:00:00 2001 From: Li Cheng Date: Wed, 18 Dec 2019 19:05:52 +0800 Subject: [PATCH 08/19] HDDS-2035 Implement datanode level CLI to reveal pipeline relation. (#348) --- .../apache/hadoop/hdds/scm/cli/SCMCLI.java | 2 + .../scm/cli/datanode/DatanodeCommands.java | 52 ++++++++ .../scm/cli/datanode/ListInfoSubcommand.java | 124 ++++++++++++++++++ .../hdds/scm/cli/datanode/package-info.java | 22 ++++ 4 files changed, 200 insertions(+) create mode 100644 hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/DatanodeCommands.java create mode 100644 hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/ListInfoSubcommand.java create mode 100644 hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/package-info.java diff --git a/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/SCMCLI.java b/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/SCMCLI.java index 8c0fb0370834..20a35a8c4e21 100644 --- a/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/SCMCLI.java +++ b/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/SCMCLI.java @@ -29,6 +29,7 @@ import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.scm.ScmConfigKeys; import org.apache.hadoop.hdds.scm.cli.container.ContainerCommands; +import org.apache.hadoop.hdds.scm.cli.datanode.DatanodeCommands; import org.apache.hadoop.hdds.scm.cli.pipeline.PipelineCommands; import org.apache.hadoop.hdds.scm.client.ContainerOperationClient; import org.apache.hadoop.hdds.scm.client.ScmClient; @@ -59,6 +60,7 @@ SafeModeCommands.class, ContainerCommands.class, PipelineCommands.class, + DatanodeCommands.class, TopologySubcommand.class, ReplicationManagerCommands.class }, diff --git a/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/DatanodeCommands.java b/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/DatanodeCommands.java new file mode 100644 index 000000000000..94763d356cb2 --- /dev/null +++ b/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/DatanodeCommands.java @@ -0,0 +1,52 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdds.scm.cli.datanode; + +import org.apache.hadoop.hdds.cli.HddsVersionProvider; +import org.apache.hadoop.hdds.cli.MissingSubcommandException; +import org.apache.hadoop.hdds.scm.cli.SCMCLI; +import picocli.CommandLine; + +import java.util.concurrent.Callable; + +/** + * Subcommand for datanode related operations. + */ +@CommandLine.Command( + name = "datanode", + description = "Datanode specific operations", + mixinStandardHelpOptions = true, + versionProvider = HddsVersionProvider.class, + subcommands = { + ListInfoSubcommand.class + }) +public class DatanodeCommands implements Callable { + + @CommandLine.ParentCommand + private SCMCLI parent; + + public SCMCLI getParent() { + return parent; + } + + @Override + public Void call() throws Exception { + throw new MissingSubcommandException( + this.parent.getCmd().getSubcommands().get("datanode")); + } +} diff --git a/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/ListInfoSubcommand.java b/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/ListInfoSubcommand.java new file mode 100644 index 000000000000..dcd8402fef47 --- /dev/null +++ b/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/ListInfoSubcommand.java @@ -0,0 +1,124 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdds.scm.cli.datanode; + +import org.apache.hadoop.hdds.cli.HddsVersionProvider; +import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos; +import org.apache.hadoop.hdds.scm.client.ScmClient; +import org.apache.hadoop.hdds.scm.pipeline.Pipeline; +import picocli.CommandLine; + +import java.io.IOException; +import java.util.List; +import java.util.concurrent.Callable; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +/** + * Handler of list datanodes info command. + */ +@CommandLine.Command( + name = "list", + description = "List info of datanodes", + mixinStandardHelpOptions = true, + versionProvider = HddsVersionProvider.class) +public class ListInfoSubcommand implements Callable { + + @CommandLine.ParentCommand + private DatanodeCommands parent; + + @CommandLine.Option(names = {"--ip"}, + description = "Show info by ip address.", + defaultValue = "", + required = false) + private String ipaddress; + + @CommandLine.Option(names = {"--id"}, + description = "Show info by datanode UUID.", + defaultValue = "", + required = false) + private String uuid; + + private List pipelines; + + + @Override + public Void call() throws Exception { + try (ScmClient scmClient = parent.getParent().createScmClient()) { + pipelines = scmClient.listPipelines(); + if (isNullOrEmpty(ipaddress) && isNullOrEmpty(uuid)) { + getAllNodes(scmClient).stream().forEach(p -> printDatanodeInfo(p)); + } else { + Stream allNodes = getAllNodes(scmClient).stream(); + if (!isNullOrEmpty(ipaddress)) { + allNodes = allNodes.filter(p -> p.getIpAddress() + .compareToIgnoreCase(ipaddress) == 0); + } + if (!isNullOrEmpty(uuid)) { + allNodes = allNodes.filter(p -> p.getUuid().toString().equals(uuid)); + } + allNodes.forEach(p -> printDatanodeInfo(p)); + } + return null; + } + } + + private List getAllNodes(ScmClient scmClient) + throws IOException { + List nodes = scmClient.queryNode( + HddsProtos.NodeState.HEALTHY, HddsProtos.QueryScope.CLUSTER, ""); + + return nodes.stream() + .map(p -> DatanodeDetails.getFromProtoBuf(p.getNodeID())) + .collect(Collectors.toList()); + } + + private void printDatanodeInfo(DatanodeDetails datanode) { + StringBuilder pipelineListInfo = new StringBuilder(); + int relatedPipelineNum = 0; + if (!pipelines.isEmpty()) { + List relatedPipelines = pipelines.stream().filter( + p -> p.getNodes().contains(datanode)).collect(Collectors.toList()); + if (relatedPipelines.isEmpty()) { + pipelineListInfo.append("No related pipelines" + + " or the node is not in Healthy state."); + } else { + relatedPipelineNum = relatedPipelines.size(); + relatedPipelines.stream().forEach( + p -> pipelineListInfo.append(p.getId().getId().toString()) + .append("/").append(p.getFactor().toString()).append("/") + .append(p.getType().toString()).append("/") + .append(p.getPipelineState().toString()).append("/") + .append(datanode.getUuid().equals(p.getLeaderId()) ? + "Leader" : "Follower") + .append(System.getProperty("line.separator"))); + } + } else { + pipelineListInfo.append("No pipelines in cluster."); + } + System.out.println("Datanode: " + datanode.getUuid().toString() + + " (" + datanode.getIpAddress() + "/" + + datanode.getHostName() + "/" + relatedPipelineNum + + " pipelines) \n" + "Related pipelines: \n" + pipelineListInfo); + } + + protected static boolean isNullOrEmpty(String str) { + return ((str == null) || str.trim().isEmpty()); + } +} \ No newline at end of file diff --git a/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/package-info.java b/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/package-info.java new file mode 100644 index 000000000000..f4c45cfa0e3e --- /dev/null +++ b/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/package-info.java @@ -0,0 +1,22 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Contains all of the datanode related scm commands. + */ +package org.apache.hadoop.hdds.scm.cli.datanode; \ No newline at end of file From 5c6d41290b5d3b1e472a86206935099255cf2ac6 Mon Sep 17 00:00:00 2001 From: Sammi Chen Date: Wed, 18 Dec 2019 19:47:56 +0800 Subject: [PATCH 09/19] Revert "HDDS-2650 Fix createPipeline CLI. (#340)" This reverts commit 7c7171082e344d51b5ef9473413db487e5267ac0. --- .../hdds/scm/cli/pipeline/CreatePipelineSubcommand.java | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/pipeline/CreatePipelineSubcommand.java b/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/pipeline/CreatePipelineSubcommand.java index 58a177865204..edeb786726a9 100644 --- a/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/pipeline/CreatePipelineSubcommand.java +++ b/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/pipeline/CreatePipelineSubcommand.java @@ -20,6 +20,7 @@ import org.apache.hadoop.hdds.cli.HddsVersionProvider; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; +import org.apache.hadoop.hdds.scm.cli.SCMCLI; import org.apache.hadoop.hdds.scm.client.ScmClient; import picocli.CommandLine; @@ -29,13 +30,13 @@ * Handler of createPipeline command. */ @CommandLine.Command( - name = "create", + name = "createPipeline", description = "create pipeline", mixinStandardHelpOptions = true, versionProvider = HddsVersionProvider.class) public class CreatePipelineSubcommand implements Callable { @CommandLine.ParentCommand - private PipelineCommands parent; + private SCMCLI parent; @CommandLine.Option( names = {"-t", "--replicationType"}, @@ -59,7 +60,7 @@ public Void call() throws Exception { throw new IllegalArgumentException(type.name() + " is not supported yet."); } - try (ScmClient scmClient = parent.getParent().createScmClient()) { + try (ScmClient scmClient = parent.createScmClient()) { scmClient.createReplicationPipeline( type, factor, From 1a27b2515d50a07f329b4a5324aaf7909568d8d5 Mon Sep 17 00:00:00 2001 From: Li Cheng Date: Wed, 18 Dec 2019 22:27:04 +0800 Subject: [PATCH 10/19] HDDS-2650 Fix createPipeline CLI and make it message based. (#370) --- ...ocationProtocolServerSideTranslatorPB.java | 26 +++++++++++++++++++ .../pipeline/CreatePipelineSubcommand.java | 7 +++-- 2 files changed, 29 insertions(+), 4 deletions(-) diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocolServerSideTranslatorPB.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocolServerSideTranslatorPB.java index 5ff75e7c24f6..f2e4253be4ad 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocolServerSideTranslatorPB.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocolServerSideTranslatorPB.java @@ -23,6 +23,7 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.PipelineResponseProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ActivatePipelineRequestProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ActivatePipelineResponseProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ClosePipelineRequestProto; @@ -72,6 +73,9 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import static org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.PipelineResponseProto.Error.errorPipelineAlreadyExists; +import static org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.PipelineResponseProto.Error.success; + /** * This class is the server-side translator that forwards requests received on * {@link StorageContainerLocationProtocolPB} to the @@ -160,6 +164,12 @@ public ScmContainerLocationResponse processRequest( .setScmCloseContainerResponse(closeContainer( request.getScmCloseContainerRequest())) .build(); + case AllocatePipeline: + return ScmContainerLocationResponse.newBuilder() + .setCmdType(request.getCmdType()) + .setStatus(Status.OK) + .setPipelineResponse(allocatePipeline(request.getPipelineRequest())) + .build(); case ListPipelines: return ScmContainerLocationResponse.newBuilder() .setCmdType(request.getCmdType()) @@ -327,6 +337,22 @@ public SCMCloseContainerResponseProto closeContainer( return SCMCloseContainerResponseProto.newBuilder().build(); } + public PipelineResponseProto allocatePipeline( + StorageContainerLocationProtocolProtos.PipelineRequestProto request) + throws IOException { + Pipeline pipeline = impl.createReplicationPipeline( + request.getReplicationType(), request.getReplicationFactor(), + HddsProtos.NodePool.getDefaultInstance()); + if (pipeline == null) { + return PipelineResponseProto.newBuilder() + .setErrorCode(errorPipelineAlreadyExists).build(); + } + PipelineResponseProto response = PipelineResponseProto.newBuilder() + .setErrorCode(success) + .setPipeline(pipeline.getProtobufMessage()).build(); + return response; + } + public ListPipelineResponseProto listPipelines( ListPipelineRequestProto request) throws IOException { diff --git a/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/pipeline/CreatePipelineSubcommand.java b/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/pipeline/CreatePipelineSubcommand.java index edeb786726a9..58a177865204 100644 --- a/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/pipeline/CreatePipelineSubcommand.java +++ b/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/pipeline/CreatePipelineSubcommand.java @@ -20,7 +20,6 @@ import org.apache.hadoop.hdds.cli.HddsVersionProvider; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; -import org.apache.hadoop.hdds.scm.cli.SCMCLI; import org.apache.hadoop.hdds.scm.client.ScmClient; import picocli.CommandLine; @@ -30,13 +29,13 @@ * Handler of createPipeline command. */ @CommandLine.Command( - name = "createPipeline", + name = "create", description = "create pipeline", mixinStandardHelpOptions = true, versionProvider = HddsVersionProvider.class) public class CreatePipelineSubcommand implements Callable { @CommandLine.ParentCommand - private SCMCLI parent; + private PipelineCommands parent; @CommandLine.Option( names = {"-t", "--replicationType"}, @@ -60,7 +59,7 @@ public Void call() throws Exception { throw new IllegalArgumentException(type.name() + " is not supported yet."); } - try (ScmClient scmClient = parent.createScmClient()) { + try (ScmClient scmClient = parent.getParent().createScmClient()) { scmClient.createReplicationPipeline( type, factor, From 0bd57145470d6f1137056fb8d5426a440e2a8a35 Mon Sep 17 00:00:00 2001 From: Li Cheng Date: Thu, 19 Dec 2019 06:00:35 +0800 Subject: [PATCH 11/19] HDDS-1574 Average out pipeline allocation on datanodes and add metrcs/test (#291) --- .../hadoop/hdds/scm/pipeline/Pipeline.java | 19 +++ .../scm/pipeline/PipelinePlacementPolicy.java | 3 +- .../scm/pipeline/PipelineStateManager.java | 7 + .../scm/pipeline/RatisPipelineProvider.java | 8 ++ .../hdds/scm/pipeline/RatisPipelineUtils.java | 38 +++++- .../hdds/scm/pipeline/SCMPipelineManager.java | 12 ++ .../hdds/scm/pipeline/SCMPipelineMetrics.java | 9 ++ .../hdds/scm/container/MockNodeManager.java | 14 +- .../TestCloseContainerEventHandler.java | 7 +- .../pipeline/MockRatisPipelineProvider.java | 4 + .../TestPipelineDatanodesIntersection.java | 129 ++++++++++++++++++ .../pipeline/TestRatisPipelineProvider.java | 73 ++++++++-- .../scm/pipeline/TestSCMPipelineManager.java | 15 +- .../testutils/ReplicationNodeManagerMock.java | 4 +- 14 files changed, 324 insertions(+), 18 deletions(-) create mode 100644 hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestPipelineDatanodesIntersection.java diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/pipeline/Pipeline.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/pipeline/Pipeline.java index 295156d50fc4..1dc23735a911 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/pipeline/Pipeline.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/pipeline/Pipeline.java @@ -58,6 +58,8 @@ public final class Pipeline { private UUID leaderId; // Timestamp for pipeline upon creation private Long creationTimestamp; + // Only valid for Ratis THREE pipeline. No need persist. + private int nodeIdsHash; /** * The immutable properties of pipeline object is used in @@ -73,6 +75,7 @@ private Pipeline(PipelineID id, ReplicationType type, this.state = state; this.nodeStatus = nodeStatus; this.creationTimestamp = System.currentTimeMillis(); + this.nodeIdsHash = 0; } /** @@ -129,6 +132,14 @@ void setCreationTimestamp(Long creationTimestamp) { this.creationTimestamp = creationTimestamp; } + public int getNodeIdsHash() { + return nodeIdsHash; + } + + void setNodeIdsHash(int nodeIdsHash) { + this.nodeIdsHash = nodeIdsHash; + } + /** * Return the pipeline leader's UUID. * @@ -347,6 +358,7 @@ public static class Builder { private List nodesInOrder = null; private UUID leaderId = null; private Long creationTimestamp = null; + private int nodeIdsHash = 0; public Builder() {} @@ -359,6 +371,7 @@ public Builder(Pipeline pipeline) { this.nodesInOrder = pipeline.nodesInOrder.get(); this.leaderId = pipeline.getLeaderId(); this.creationTimestamp = pipeline.getCreationTimestamp(); + this.nodeIdsHash = 0; } public Builder setId(PipelineID id1) { @@ -397,6 +410,11 @@ public Builder setNodesInOrder(List orders) { return this; } + public Builder setNodeIdsHash(int nodeIdsHash1) { + this.nodeIdsHash = nodeIdsHash1; + return this; + } + public Pipeline build() { Preconditions.checkNotNull(id); Preconditions.checkNotNull(type); @@ -405,6 +423,7 @@ public Pipeline build() { Preconditions.checkNotNull(nodeStatus); Pipeline pipeline = new Pipeline(id, type, factor, state, nodeStatus); pipeline.setLeaderId(leaderId); + pipeline.setNodeIdsHash(nodeIdsHash); // overwrite with original creationTimestamp if (creationTimestamp != null) { pipeline.setCreationTimestamp(creationTimestamp); diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelinePlacementPolicy.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelinePlacementPolicy.java index 23eb5745421f..bc65d14d4a53 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelinePlacementPolicy.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelinePlacementPolicy.java @@ -162,7 +162,7 @@ List filterViableNodes( // filter nodes that meet the size and pipeline engagement criteria. // Pipeline placement doesn't take node space left into account. List healthyList = healthyNodes.stream() - .filter(d -> meetCriteria(d, nodesRequired)).limit(nodesRequired) + .filter(d -> meetCriteria(d, nodesRequired)) .collect(Collectors.toList()); if (healthyList.size() < nodesRequired) { @@ -308,6 +308,7 @@ public DatanodeDetails chooseNode( } // the pick is decided and it should be removed from candidates. healthyNodes.remove(datanodeDetails); + return datanodeDetails; } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelineStateManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelineStateManager.java index bb56a0380b1b..051202b45600 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelineStateManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelineStateManager.java @@ -132,6 +132,13 @@ Pipeline openPipeline(PipelineID pipelineId) throws IOException { pipeline = pipelineStateMap .updatePipelineState(pipelineId, PipelineState.OPEN); } + // Amend nodeIdsHash if needed. + if (pipeline.getType() == ReplicationType.RATIS && + pipeline.getFactor() == ReplicationFactor.THREE && + pipeline.getNodeIdsHash() == 0) { + pipeline.setNodeIdsHash(RatisPipelineUtils + .encodeNodeIdsOfFactorThreePipeline(pipeline.getNodes())); + } return pipeline; } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/RatisPipelineProvider.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/RatisPipelineProvider.java index 23b02ed2528a..95859070f27d 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/RatisPipelineProvider.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/RatisPipelineProvider.java @@ -157,6 +157,7 @@ public Pipeline create(ReplicationFactor factor) throws IOException { } List dns; + int nodeIdHash = 0; switch(factor) { case ONE: @@ -165,6 +166,7 @@ public Pipeline create(ReplicationFactor factor) throws IOException { case THREE: dns = placementPolicy.chooseDatanodes(null, null, factor.getNumber(), 0); + nodeIdHash = RatisPipelineUtils.encodeNodeIdsOfFactorThreePipeline(dns); break; default: throw new IllegalStateException("Unknown factor: " + factor.name()); @@ -176,6 +178,7 @@ public Pipeline create(ReplicationFactor factor) throws IOException { .setType(ReplicationType.RATIS) .setFactor(factor) .setNodes(dns) + .setNodeIdsHash(nodeIdHash) .build(); // Send command to datanodes to create pipeline @@ -196,12 +199,17 @@ public Pipeline create(ReplicationFactor factor) throws IOException { @Override public Pipeline create(ReplicationFactor factor, List nodes) { + int nodeIdHash = 0; + if (factor == ReplicationFactor.THREE) { + nodeIdHash = RatisPipelineUtils.encodeNodeIdsOfFactorThreePipeline(nodes); + } return Pipeline.newBuilder() .setId(PipelineID.randomId()) .setState(PipelineState.ALLOCATED) .setType(ReplicationType.RATIS) .setFactor(factor) .setNodes(nodes) + .setNodeIdsHash(nodeIdHash) .build(); } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/RatisPipelineUtils.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/RatisPipelineUtils.java index b8cdf061300c..f9f2011b5a81 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/RatisPipelineUtils.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/RatisPipelineUtils.java @@ -18,9 +18,12 @@ package org.apache.hadoop.hdds.scm.pipeline; import java.io.IOException; +import java.util.List; +import java.util.stream.Collectors; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import org.apache.hadoop.hdds.scm.ScmConfigKeys; import org.apache.hadoop.hdds.ratis.RatisHelper; import org.apache.ratis.client.RaftClient; @@ -33,7 +36,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; - /** * Utility class for Ratis pipelines. Contains methods to create and destroy * ratis pipelines. @@ -93,4 +95,38 @@ static void destroyPipeline(DatanodeDetails dn, PipelineID pipelineID, true, p.getId()); } } + + static int encodeNodeIdsOfFactorThreePipeline(List nodes) { + if (nodes.size() != HddsProtos.ReplicationFactor.THREE.getNumber()) { + return 0; + } + return nodes.get(0).getUuid().hashCode() ^ + nodes.get(1).getUuid().hashCode() ^ + nodes.get(2).getUuid().hashCode(); + } + + /** + * Return first existed pipeline which share the same set of datanodes + * with the input pipeline. + * @param stateManager PipelineStateManager + * @param pipeline input pipeline + * @return first matched pipeline + */ + static Pipeline checkPipelineContainSameDatanodes( + PipelineStateManager stateManager, Pipeline pipeline) { + List matchedPipelines = stateManager.getPipelines( + HddsProtos.ReplicationType.RATIS, + HddsProtos.ReplicationFactor.THREE) + .stream().filter(p -> !p.getId().equals(pipeline.getId()) && + (// For all OPEN or ALLOCATED pipelines + p.getPipelineState() == Pipeline.PipelineState.OPEN || + p.getPipelineState() == Pipeline.PipelineState.ALLOCATED) && + p.getNodeIdsHash() == pipeline.getNodeIdsHash()) + .collect(Collectors.toList()); + if (matchedPipelines.size() == 0) { + return null; + } else { + return matchedPipelines.stream().findFirst().get(); + } + } } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/SCMPipelineManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/SCMPipelineManager.java index 01af465be7f3..11e9916f8f45 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/SCMPipelineManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/SCMPipelineManager.java @@ -172,6 +172,18 @@ public synchronized Pipeline createPipeline(ReplicationType type, metrics.incNumPipelineCreated(); metrics.createPerPipelineMetrics(pipeline); } + Pipeline overlapPipeline = RatisPipelineUtils + .checkPipelineContainSameDatanodes(stateManager, pipeline); + if (overlapPipeline != null) { + metrics.incNumPipelineContainSameDatanodes(); + //TODO remove until pipeline allocation is proved equally distributed. + LOG.info("Pipeline: " + pipeline.getId().toString() + + " contains same datanodes as previous pipeline: " + + overlapPipeline.getId().toString() + " nodeIds: " + + pipeline.getNodes().get(0).getUuid().toString() + + ", " + pipeline.getNodes().get(1).getUuid().toString() + + ", " + pipeline.getNodes().get(2).getUuid().toString()); + } return pipeline; } catch (IOException ex) { metrics.incNumPipelineCreationFailed(); diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/SCMPipelineMetrics.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/SCMPipelineMetrics.java index 8c348ed87cf9..1cf8d3a1e528 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/SCMPipelineMetrics.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/SCMPipelineMetrics.java @@ -54,6 +54,7 @@ public final class SCMPipelineMetrics implements MetricsSource { private @Metric MutableCounterLong numPipelineDestroyFailed; private @Metric MutableCounterLong numPipelineReportProcessed; private @Metric MutableCounterLong numPipelineReportProcessingFailed; + private @Metric MutableCounterLong numPipelineContainSameDatanodes; private Map numBlocksAllocated; /** Private constructor. */ @@ -92,6 +93,7 @@ public void getMetrics(MetricsCollector collector, boolean all) { numPipelineDestroyFailed.snapshot(recordBuilder, true); numPipelineReportProcessed.snapshot(recordBuilder, true); numPipelineReportProcessingFailed.snapshot(recordBuilder, true); + numPipelineContainSameDatanodes.snapshot(recordBuilder, true); numBlocksAllocated .forEach((pid, metric) -> metric.snapshot(recordBuilder, true)); } @@ -176,4 +178,11 @@ void incNumPipelineReportProcessed() { void incNumPipelineReportProcessingFailed() { numPipelineReportProcessingFailed.incr(); } + + /** + * Increments number of pipeline who contains same set of datanodes. + */ + void incNumPipelineContainSameDatanodes() { + numPipelineContainSameDatanodes.incr(); + } } diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/MockNodeManager.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/MockNodeManager.java index bca4189072d2..cbeef7f67ab7 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/MockNodeManager.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/MockNodeManager.java @@ -17,7 +17,7 @@ package org.apache.hadoop.hdds.scm.container; import org.apache.hadoop.hdds.protocol.MockDatanodeDetails; -import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.protocol.proto .StorageContainerDatanodeProtocolProtos.PipelineReportsProto; import org.apache.hadoop.hdds.scm.net.NetConstants; @@ -93,7 +93,8 @@ public class MockNodeManager implements NodeManager { private NetworkTopology clusterMap; private ConcurrentMap> dnsToUuidMap; - public MockNodeManager(boolean initializeFakeNodes, int nodeCount) { + public MockNodeManager(NetworkTopologyImpl clusterMap, + boolean initializeFakeNodes, int nodeCount) { this.healthyNodes = new LinkedList<>(); this.staleNodes = new LinkedList<>(); this.deadNodes = new LinkedList<>(); @@ -101,8 +102,8 @@ public MockNodeManager(boolean initializeFakeNodes, int nodeCount) { this.node2PipelineMap = new Node2PipelineMap(); this.node2ContainerMap = new Node2ContainerMap(); this.dnsToUuidMap = new ConcurrentHashMap<>(); - aggregateStat = new SCMNodeStat(); - clusterMap = new NetworkTopologyImpl(new Configuration()); + this.aggregateStat = new SCMNodeStat(); + this.clusterMap = clusterMap; if (initializeFakeNodes) { for (int x = 0; x < nodeCount; x++) { DatanodeDetails dd = MockDatanodeDetails.randomDatanodeDetails(); @@ -114,6 +115,11 @@ public MockNodeManager(boolean initializeFakeNodes, int nodeCount) { this.commandMap = new HashMap<>(); } + public MockNodeManager(boolean initializeFakeNodes, int nodeCount) { + this(new NetworkTopologyImpl(new OzoneConfiguration()), + initializeFakeNodes, nodeCount); + } + /** * Invoked from ctor to create some node Metrics. * diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestCloseContainerEventHandler.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestCloseContainerEventHandler.java index 612bf5dd99df..f35bfe2850eb 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestCloseContainerEventHandler.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestCloseContainerEventHandler.java @@ -24,6 +24,7 @@ import org.apache.hadoop.hdds.HddsConfigKeys; import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; +import org.apache.hadoop.hdds.scm.ScmConfigKeys; import org.apache.hadoop.hdds.scm.TestUtils; import org.apache.hadoop.hdds.scm.pipeline.MockRatisPipelineProvider; import org.apache.hadoop.hdds.scm.pipeline.PipelineProvider; @@ -67,13 +68,14 @@ public static void setUp() throws Exception { .getTestDir(TestCloseContainerEventHandler.class.getSimpleName()); configuration .set(HddsConfigKeys.OZONE_METADATA_DIRS, testDir.getAbsolutePath()); + configuration.setInt(ScmConfigKeys.OZONE_SCM_PIPELINE_NUMBER_LIMIT, 16); nodeManager = new MockNodeManager(true, 10); eventQueue = new EventQueue(); pipelineManager = new SCMPipelineManager(configuration, nodeManager, eventQueue); PipelineProvider mockRatisProvider = new MockRatisPipelineProvider(nodeManager, - pipelineManager.getStateManager(), configuration); + pipelineManager.getStateManager(), configuration, eventQueue); pipelineManager.setPipelineProvider(HddsProtos.ReplicationType.RATIS, mockRatisProvider); containerManager = new SCMContainerManager(configuration, pipelineManager); @@ -91,6 +93,9 @@ public static void tearDown() throws Exception { if (containerManager != null) { containerManager.close(); } + if (pipelineManager != null) { + pipelineManager.close(); + } FileUtil.fullyDelete(testDir); } diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/MockRatisPipelineProvider.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/MockRatisPipelineProvider.java index ff5247027259..3eb146a2c9cc 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/MockRatisPipelineProvider.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/MockRatisPipelineProvider.java @@ -73,6 +73,8 @@ public Pipeline create(HddsProtos.ReplicationFactor factor) .setType(initialPipeline.getType()) .setFactor(factor) .setNodes(initialPipeline.getNodes()) + .setNodeIdsHash(RatisPipelineUtils + .encodeNodeIdsOfFactorThreePipeline(initialPipeline.getNodes())) .build(); } } @@ -91,6 +93,8 @@ public Pipeline create(HddsProtos.ReplicationFactor factor, .setType(HddsProtos.ReplicationType.RATIS) .setFactor(factor) .setNodes(nodes) + .setNodeIdsHash(RatisPipelineUtils + .encodeNodeIdsOfFactorThreePipeline(nodes)) .build(); } } diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestPipelineDatanodesIntersection.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestPipelineDatanodesIntersection.java new file mode 100644 index 000000000000..45f85eff17d1 --- /dev/null +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestPipelineDatanodesIntersection.java @@ -0,0 +1,129 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdds.scm.pipeline; + +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos; +import org.apache.hadoop.hdds.scm.container.MockNodeManager; +import org.apache.hadoop.hdds.scm.exceptions.SCMException; +import org.apache.hadoop.hdds.scm.node.NodeManager; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Collection; + +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_DATANODE_MAX_PIPELINE_ENGAGEMENT; +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_PIPELINE_AUTO_CREATE_FACTOR_ONE; + +/** + * Test for pipeline datanodes intersection. + */ +@RunWith(Parameterized.class) +public class TestPipelineDatanodesIntersection { + private static final Logger LOG = LoggerFactory + .getLogger(TestPipelineDatanodesIntersection.class.getName()); + + private int nodeCount; + private int nodeHeaviness; + private OzoneConfiguration conf; + private boolean end; + + @Before + public void initialize() { + conf = new OzoneConfiguration(); + end = false; + } + + public TestPipelineDatanodesIntersection(int nodeCount, int nodeHeaviness) { + this.nodeCount = nodeCount; + this.nodeHeaviness = nodeHeaviness; + } + + @Parameterized.Parameters + public static Collection inputParams() { + return Arrays.asList(new Object[][] { + {4, 5}, + {10, 5}, + {20, 5}, + {50, 5}, + {100, 5}, + {100, 10} + }); + } + + @Test + public void testPipelineDatanodesIntersection() { + NodeManager nodeManager= new MockNodeManager(true, nodeCount); + conf.setInt(OZONE_DATANODE_MAX_PIPELINE_ENGAGEMENT, nodeHeaviness); + conf.setBoolean(OZONE_SCM_PIPELINE_AUTO_CREATE_FACTOR_ONE, false); + PipelineStateManager stateManager = new PipelineStateManager(conf); + PipelineProvider provider = new MockRatisPipelineProvider(nodeManager, + stateManager, conf); + + int healthyNodeCount = nodeManager + .getNodeCount(HddsProtos.NodeState.HEALTHY); + int intersectionCount = 0; + int createdPipelineCount = 0; + while (!end && createdPipelineCount <= healthyNodeCount * nodeHeaviness) { + try { + Pipeline pipeline = provider.create(HddsProtos.ReplicationFactor.THREE); + stateManager.addPipeline(pipeline); + nodeManager.addPipeline(pipeline); + Pipeline overlapPipeline = RatisPipelineUtils + .checkPipelineContainSameDatanodes(stateManager, pipeline); + if (overlapPipeline != null){ + intersectionCount++; + LOG.info("This pipeline: " + pipeline.getId().toString() + + " overlaps with previous pipeline: " + overlapPipeline.getId() + + ". They share same set of datanodes as: " + + pipeline.getNodesInOrder().get(0).getUuid() + "/" + + pipeline.getNodesInOrder().get(1).getUuid() + "/" + + pipeline.getNodesInOrder().get(2).getUuid() + " and " + + overlapPipeline.getNodesInOrder().get(0).getUuid() + "/" + + overlapPipeline.getNodesInOrder().get(1).getUuid() + "/" + + overlapPipeline.getNodesInOrder().get(2).getUuid() + + " is the same."); + } + createdPipelineCount++; + } catch(SCMException e) { + end = true; + } catch (IOException e) { + end = true; + // Should not throw regular IOException. + Assert.fail(); + } + } + + end = false; + + LOG.info("Among total " + + stateManager.getPipelines(HddsProtos.ReplicationType.RATIS, + HddsProtos.ReplicationFactor.THREE).size() + " created pipelines" + + " with " + healthyNodeCount + " healthy datanodes and " + + nodeHeaviness + " as node heaviness, " + + intersectionCount + " pipelines has same set of datanodes."); + } +} diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestRatisPipelineProvider.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestRatisPipelineProvider.java index 66991e4f2ee2..46fd8c8f48ca 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestRatisPipelineProvider.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestRatisPipelineProvider.java @@ -25,6 +25,7 @@ import org.apache.hadoop.hdds.scm.ScmConfigKeys; import org.apache.hadoop.hdds.scm.container.MockNodeManager; import org.apache.hadoop.hdds.scm.node.NodeManager; +import org.junit.Assert; import org.junit.Assume; import org.junit.Before; import org.junit.Test; @@ -34,9 +35,14 @@ import java.util.HashSet; import java.util.List; import java.util.Set; +import java.util.UUID; +import java.util.stream.Collectors; import static org.apache.commons.collections.CollectionUtils.intersection; +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_DATANODE_MAX_PIPELINE_ENGAGEMENT; +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_DATANODE_MAX_PIPELINE_ENGAGEMENT_DEFAULT; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotEquals; import static org.junit.Assert.assertTrue; /** @@ -50,12 +56,13 @@ public class TestRatisPipelineProvider { private NodeManager nodeManager; private PipelineProvider provider; private PipelineStateManager stateManager; + private OzoneConfiguration conf; @Before public void init() throws Exception { nodeManager = new MockNodeManager(true, 10); OzoneConfiguration conf = new OzoneConfiguration(); - conf.setInt(ScmConfigKeys.OZONE_DATANODE_MAX_PIPELINE_ENGAGEMENT, 1); + conf.setInt(ScmConfigKeys.OZONE_DATANODE_MAX_PIPELINE_ENGAGEMENT, 2); stateManager = new PipelineStateManager(); provider = new MockRatisPipelineProvider(nodeManager, stateManager, conf); @@ -75,8 +82,12 @@ private void createPipelineAndAssertions( // New pipeline should not overlap with the previous created pipeline assertTrue( intersection(pipeline.getNodes(), pipeline1.getNodes()) - .isEmpty()); + .size() < factor.getNumber()); + if (pipeline.getFactor() == HddsProtos.ReplicationFactor.THREE) { + assertNotEquals(pipeline.getNodeIdsHash(), pipeline1.getNodeIdsHash()); + } stateManager.addPipeline(pipeline1); + nodeManager.addPipeline(pipeline1); } @Test @@ -92,10 +103,9 @@ public void testCreatePipelineWithFactor() throws IOException { assertPipelineProperties(pipeline1, factor, REPLICATION_TYPE, Pipeline.PipelineState.ALLOCATED); stateManager.addPipeline(pipeline1); - // New pipeline should overlap with the previous created pipeline, - // and one datanode should overlap between the two types. - assertEquals(1, - intersection(pipeline.getNodes(), pipeline1.getNodes()).size()); + // With enough pipeline quote on datanodes, they should not share + // the same set of datanodes. + assertNotEquals(pipeline.getNodeIdsHash(), pipeline1.getNodeIdsHash()); } @Test @@ -130,6 +140,49 @@ public void testCreatePipelineWithNodes() { Pipeline.PipelineState.OPEN); } + @Test + public void testComputeNodeIdsHash() { + int total = HddsProtos.ReplicationFactor.THREE.getNumber(); + List nodes1 = new ArrayList<>(); + for (int i = 0; i < total; i++) { + nodes1.add(MockDatanodeDetails.createDatanodeDetails( + UUID.fromString("00000-11000-00000-00000-0000" + (i + 1)))); + } + + Assert.assertEquals(total, nodes1.size()); + Assert.assertNotEquals(0, + RatisPipelineUtils.encodeNodeIdsOfFactorThreePipeline(nodes1)); + + List nodes2 = new ArrayList<>(); + for (int i = 0; i < total; i++) { + nodes2.add(MockDatanodeDetails.createDatanodeDetails( + UUID.fromString("00000-11000-00000-00000-0000" + (total - i)))); + } + Assert.assertEquals(total, nodes2.size()); + Assert.assertNotEquals(0, + RatisPipelineUtils.encodeNodeIdsOfFactorThreePipeline(nodes2)); + + Assert.assertEquals( + RatisPipelineUtils.encodeNodeIdsOfFactorThreePipeline(nodes1), + RatisPipelineUtils.encodeNodeIdsOfFactorThreePipeline(nodes2)); + } + + @Test + public void testCreateFactorTHREEPipelineWithSameDatanodes() { + List healthyNodes = nodeManager + .getNodes(HddsProtos.NodeState.HEALTHY).stream() + .limit(3).collect(Collectors.toList()); + + Pipeline pipeline1 = provider.create( + HddsProtos.ReplicationFactor.THREE, healthyNodes); + Pipeline pipeline2 = provider.create( + HddsProtos.ReplicationFactor.THREE, healthyNodes); + + Assert.assertTrue(pipeline1.getNodes().parallelStream() + .allMatch(pipeline2.getNodes()::contains)); + Assert.assertEquals(pipeline1.getNodeIdsHash(), pipeline2.getNodeIdsHash()); + } + @Test public void testCreatePipelinesDnExclude() throws IOException { List healthyNodes = @@ -141,7 +194,11 @@ public void testCreatePipelinesDnExclude() throws IOException { // Use up first 3 DNs for an open pipeline. List dns = healthyNodes.subList(0, 3); - addPipeline(dns, factor, Pipeline.PipelineState.OPEN, REPLICATION_TYPE); + for (int i = 0; i < conf.getInt(OZONE_DATANODE_MAX_PIPELINE_ENGAGEMENT, + OZONE_DATANODE_MAX_PIPELINE_ENGAGEMENT_DEFAULT); i++) { + // Saturate pipeline counts on all the 1st 3 DNs. + addPipeline(dns, factor, Pipeline.PipelineState.OPEN, REPLICATION_TYPE); + } Set membersOfOpenPipelines = new HashSet<>(dns); // Use up next 3 DNs for a closed pipeline. @@ -160,7 +217,7 @@ public void testCreatePipelinesDnExclude() throws IOException { List nodes = pipeline.getNodes(); assertTrue( - "nodes of new pipeline cannot be from open pipelines", + "nodes of new pipeline cannot be all from open pipelines", nodes.stream().noneMatch(membersOfOpenPipelines::contains)); assertTrue( diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestSCMPipelineManager.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestSCMPipelineManager.java index 491e2893cf3d..e6bf7a09d018 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestSCMPipelineManager.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestSCMPipelineManager.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hdds.scm.pipeline; +import static org.apache.commons.collections.CollectionUtils.intersection; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_DATANODE_MAX_PIPELINE_ENGAGEMENT; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_PIPELINE_ALLOCATED_TIMEOUT; import static org.apache.hadoop.test.MetricsAsserts.getLongCounter; @@ -30,6 +31,7 @@ import java.util.List; import java.util.Set; import java.util.concurrent.TimeUnit; +import java.util.stream.Collectors; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileUtil; @@ -89,8 +91,10 @@ public void testPipelineReload() throws IOException { pipelineManager.getStateManager(), conf); pipelineManager.setPipelineProvider(HddsProtos.ReplicationType.RATIS, mockRatisProvider); + int pipelineNum = 5; + Set pipelines = new HashSet<>(); - for (int i = 0; i < 5; i++) { + for (int i = 0; i < pipelineNum; i++) { Pipeline pipeline = pipelineManager .createPipeline(HddsProtos.ReplicationType.RATIS, HddsProtos.ReplicationFactor.THREE); @@ -112,6 +116,15 @@ public void testPipelineReload() throws IOException { List pipelineList = pipelineManager.getPipelines(HddsProtos.ReplicationType.RATIS); Assert.assertEquals(pipelines, new HashSet<>(pipelineList)); + // All NodeIdsHash from original pipeline list + List originalPipelineHash = pipelineList.stream() + .map(Pipeline::getNodeIdsHash).collect(Collectors.toList()); + // All NodeIdsHash from reloaded pipeline list + List reloadedPipelineHash = pipelines.stream() + .map(Pipeline::getNodeIdsHash).collect(Collectors.toList()); + // Original NodeIdsHash list should contain same items from reloaded one. + Assert.assertEquals(pipelineNum, + intersection(originalPipelineHash, reloadedPipelineHash).size()); // clean up for (Pipeline pipeline : pipelines) { diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/ozone/container/testutils/ReplicationNodeManagerMock.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/ozone/container/testutils/ReplicationNodeManagerMock.java index 7e8ec52bdf9a..069844360a81 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/ozone/container/testutils/ReplicationNodeManagerMock.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/ozone/container/testutils/ReplicationNodeManagerMock.java @@ -168,11 +168,11 @@ public Set getPipelines(DatanodeDetails dnId) { /** * Get the count of pipelines a datanodes is associated with. - * @param dnId DatanodeDetails + * @param dn DatanodeDetails * @return The number of pipelines */ @Override - public int getPipelinesCount(DatanodeDetails dnId) { + public int getPipelinesCount(DatanodeDetails dn) { throw new UnsupportedOperationException("Not yet implemented"); } From 605f960700970d18c30bb71d70a3725eecc958f1 Mon Sep 17 00:00:00 2001 From: Li Cheng Date: Mon, 23 Dec 2019 15:52:37 +0800 Subject: [PATCH 12/19] Resolve rebase conflict. --- .../pipeline/TestPipelineDatanodesIntersection.java | 2 +- .../scm/pipeline/TestPipelinePlacementPolicy.java | 2 +- .../hdds/scm/pipeline/TestRatisPipelineProvider.java | 11 +++++------ 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestPipelineDatanodesIntersection.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestPipelineDatanodesIntersection.java index 45f85eff17d1..87e8cf42cc05 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestPipelineDatanodesIntersection.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestPipelineDatanodesIntersection.java @@ -79,7 +79,7 @@ public void testPipelineDatanodesIntersection() { NodeManager nodeManager= new MockNodeManager(true, nodeCount); conf.setInt(OZONE_DATANODE_MAX_PIPELINE_ENGAGEMENT, nodeHeaviness); conf.setBoolean(OZONE_SCM_PIPELINE_AUTO_CREATE_FACTOR_ONE, false); - PipelineStateManager stateManager = new PipelineStateManager(conf); + PipelineStateManager stateManager = new PipelineStateManager(); PipelineProvider provider = new MockRatisPipelineProvider(nodeManager, stateManager, conf); diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestPipelinePlacementPolicy.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestPipelinePlacementPolicy.java index 1e340393c476..2fe67f908597 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestPipelinePlacementPolicy.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestPipelinePlacementPolicy.java @@ -52,7 +52,7 @@ public void init() throws Exception { conf = new OzoneConfiguration(); conf.setInt(OZONE_DATANODE_MAX_PIPELINE_ENGAGEMENT, 5); placementPolicy = new PipelinePlacementPolicy( - nodeManager, new PipelineStateManager(conf), conf); + nodeManager, new PipelineStateManager(), conf); } @Test diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestRatisPipelineProvider.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestRatisPipelineProvider.java index 46fd8c8f48ca..56233594e7c0 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestRatisPipelineProvider.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestRatisPipelineProvider.java @@ -39,8 +39,6 @@ import java.util.stream.Collectors; import static org.apache.commons.collections.CollectionUtils.intersection; -import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_DATANODE_MAX_PIPELINE_ENGAGEMENT; -import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_DATANODE_MAX_PIPELINE_ENGAGEMENT_DEFAULT; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotEquals; import static org.junit.Assert.assertTrue; @@ -57,12 +55,14 @@ public class TestRatisPipelineProvider { private PipelineProvider provider; private PipelineStateManager stateManager; private OzoneConfiguration conf; + private int maxPipelinePerNode = 2; @Before public void init() throws Exception { nodeManager = new MockNodeManager(true, 10); - OzoneConfiguration conf = new OzoneConfiguration(); - conf.setInt(ScmConfigKeys.OZONE_DATANODE_MAX_PIPELINE_ENGAGEMENT, 2); + conf = new OzoneConfiguration(); + conf.setInt(ScmConfigKeys.OZONE_DATANODE_MAX_PIPELINE_ENGAGEMENT, + maxPipelinePerNode); stateManager = new PipelineStateManager(); provider = new MockRatisPipelineProvider(nodeManager, stateManager, conf); @@ -194,8 +194,7 @@ public void testCreatePipelinesDnExclude() throws IOException { // Use up first 3 DNs for an open pipeline. List dns = healthyNodes.subList(0, 3); - for (int i = 0; i < conf.getInt(OZONE_DATANODE_MAX_PIPELINE_ENGAGEMENT, - OZONE_DATANODE_MAX_PIPELINE_ENGAGEMENT_DEFAULT); i++) { + for (int i = 0; i < maxPipelinePerNode; i++) { // Saturate pipeline counts on all the 1st 3 DNs. addPipeline(dns, factor, Pipeline.PipelineState.OPEN, REPLICATION_TYPE); } From 8f57dbb899a8e5a388f0306c01db68c61dea640d Mon Sep 17 00:00:00 2001 From: Li Cheng Date: Wed, 8 Jan 2020 17:37:17 +0100 Subject: [PATCH 13/19] HDDS-2756. Handle pipeline creation failure in different way when it exceeds pipeline limit Closes #401 --- .../scm/pipeline/PipelinePlacementPolicy.java | 30 +++++++++++-------- .../hdds/scm/pipeline/SCMPipelineManager.java | 1 - 2 files changed, 17 insertions(+), 14 deletions(-) diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelinePlacementPolicy.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelinePlacementPolicy.java index bc65d14d4a53..f4a13e17d1c3 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelinePlacementPolicy.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelinePlacementPolicy.java @@ -114,10 +114,10 @@ boolean meetCriteria(DatanodeDetails datanodeDetails, int nodesRequired) { boolean meet = (nodeManager.getPipelinesCount(datanodeDetails) - pipelineNumDeductable) < heavyNodeCriteria; if (!meet) { - LOG.info("Pipeline Placement: can't place more pipeline on heavy " + - "datanode: " + datanodeDetails.getUuid().toString() + " Heaviness: " + - nodeManager.getPipelinesCount(datanodeDetails) + " limit: " + - heavyNodeCriteria); + LOG.debug("Pipeline Placement: can't place more pipeline on heavy " + + "datanode: " + datanodeDetails.getUuid().toString() + + " Heaviness: " + nodeManager.getPipelinesCount(datanodeDetails) + + " limit: " + heavyNodeCriteria); } return meet; } @@ -144,17 +144,19 @@ List filterViableNodes( int initialHealthyNodesCount = healthyNodes.size(); String msg; if (initialHealthyNodesCount == 0) { - msg = "No healthy node found to allocate pipeline."; + msg = "No healthy nodes found to allocate pipeline."; LOG.error(msg); throw new SCMException(msg, SCMException.ResultCodes .FAILED_TO_FIND_HEALTHY_NODES); } if (initialHealthyNodesCount < nodesRequired) { - msg = String.format("Not enough healthy nodes to allocate pipeline. %d " + LOG.warn("Not enough healthy nodes to allocate pipeline. %d " + " datanodes required. Found %d", nodesRequired, initialHealthyNodesCount); - LOG.error(msg); + msg = String.format("Pipeline creation failed due to no sufficient" + + " healthy datanodes. Required %d. Found %d.", + nodesRequired, initialHealthyNodesCount); throw new SCMException(msg, SCMException.ResultCodes.FAILED_TO_FIND_SUITABLE_NODE); } @@ -166,13 +168,15 @@ List filterViableNodes( .collect(Collectors.toList()); if (healthyList.size() < nodesRequired) { - msg = String.format("Unable to find enough nodes that meet " + + LOG.debug("Unable to find enough nodes that meet " + "the criteria that cannot engage in more than %d pipelines." + " Nodes required: %d Found: %d, healthy nodes count in " + "NodeManager: %d.", heavyNodeCriteria, nodesRequired, healthyList.size(), initialHealthyNodesCount); - LOG.error(msg); + msg = String.format("Pipeline creation failed due to not enough" + + " healthy datanodes after filter. Required %d. Found %d", + nodesRequired, initialHealthyNodesCount); throw new SCMException(msg, SCMException.ResultCodes.FAILED_TO_FIND_SUITABLE_NODE); } @@ -229,8 +233,8 @@ public List getResultSet( // First choose an anchor nodes randomly DatanodeDetails anchor = chooseNode(healthyNodes); if (anchor == null) { - LOG.error("Pipeline Placement: Unable to find the first healthy nodes " + - "that meet the criteria. Required nodes: {}, Found nodes: {}", + LOG.warn("Unable to find healthy nodes." + + " Required nodes: {}, Found nodes: {}", nodesRequired, results.size()); throw new SCMException("Unable to find required number of nodes.", SCMException.ResultCodes.FAILED_TO_FIND_SUITABLE_NODE); @@ -245,7 +249,7 @@ public List getResultSet( healthyNodes, exclude, nodeManager.getClusterNetworkTopologyMap(), anchor); if (nodeOnDifferentRack == null) { - LOG.error("Pipeline Placement: Unable to find nodes on different racks " + + LOG.warn("Pipeline Placement: Unable to find nodes on different racks " + " that meet the criteria. Required nodes: {}, Found nodes: {}", nodesRequired, results.size()); throw new SCMException("Unable to find required number of nodes.", @@ -269,7 +273,7 @@ public List getResultSet( } if (results.size() < nodesRequired) { - LOG.error("Pipeline Placement: Unable to find the required number of " + + LOG.warn("Unable to find the required number of " + "healthy nodes that meet the criteria. Required nodes: {}, " + "Found nodes: {}", nodesRequired, results.size()); throw new SCMException("Unable to find required number of nodes.", diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/SCMPipelineManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/SCMPipelineManager.java index 11e9916f8f45..f924b4163b6f 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/SCMPipelineManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/SCMPipelineManager.java @@ -187,7 +187,6 @@ public synchronized Pipeline createPipeline(ReplicationType type, return pipeline; } catch (IOException ex) { metrics.incNumPipelineCreationFailed(); - LOG.error("Pipeline creation failed.", ex); throw ex; } finally { lock.writeLock().unlock(); From 6613b0562da36a04c5fbdbe2e834f2ff0980df9c Mon Sep 17 00:00:00 2001 From: Li Cheng Date: Thu, 9 Jan 2020 05:19:34 +0800 Subject: [PATCH 14/19] HDDS-2115 Add acceptance test for createPipeline CLI and datanode list CLI (#375) * HDDS-2115 Add acceptance test for createPipeline CLI and datanode list CLI. --- .../pipeline/CreatePipelineSubcommand.java | 9 +++++- .../src/main/smoketest/scmcli/datanode.robot | 29 +++++++++++++++++++ .../src/main/smoketest/scmcli/pipeline.robot | 7 ++++- 3 files changed, 43 insertions(+), 2 deletions(-) create mode 100644 hadoop-ozone/dist/src/main/smoketest/scmcli/datanode.robot diff --git a/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/pipeline/CreatePipelineSubcommand.java b/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/pipeline/CreatePipelineSubcommand.java index 58a177865204..e0bdddb7797e 100644 --- a/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/pipeline/CreatePipelineSubcommand.java +++ b/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/pipeline/CreatePipelineSubcommand.java @@ -21,6 +21,7 @@ import org.apache.hadoop.hdds.cli.HddsVersionProvider; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import org.apache.hadoop.hdds.scm.client.ScmClient; +import org.apache.hadoop.hdds.scm.pipeline.Pipeline; import picocli.CommandLine; import java.util.concurrent.Callable; @@ -60,10 +61,16 @@ public Void call() throws Exception { + " is not supported yet."); } try (ScmClient scmClient = parent.getParent().createScmClient()) { - scmClient.createReplicationPipeline( + Pipeline pipeline = scmClient.createReplicationPipeline( type, factor, HddsProtos.NodePool.getDefaultInstance()); + + if (pipeline != null) { + System.out.println(pipeline.getId().toString() + + " is created. Factor: " + pipeline.getFactor() + + ", Type: " + pipeline.getType()); + } return null; } } diff --git a/hadoop-ozone/dist/src/main/smoketest/scmcli/datanode.robot b/hadoop-ozone/dist/src/main/smoketest/scmcli/datanode.robot new file mode 100644 index 000000000000..ed1173d79994 --- /dev/null +++ b/hadoop-ozone/dist/src/main/smoketest/scmcli/datanode.robot @@ -0,0 +1,29 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +*** Settings *** +Documentation Smoketest ozone cluster startup +Library OperatingSystem +Library BuiltIn +Resource ../commonlib.robot + +*** Variables *** + + +*** Test Cases *** +Run list datanodes + ${output} = Execute ozone scmcli datanode list + Should contain ${output} Datanode: + Should contain ${output} Related pipelines: \ No newline at end of file diff --git a/hadoop-ozone/dist/src/main/smoketest/scmcli/pipeline.robot b/hadoop-ozone/dist/src/main/smoketest/scmcli/pipeline.robot index 6a6f0b0eb782..f411e0c3af67 100644 --- a/hadoop-ozone/dist/src/main/smoketest/scmcli/pipeline.robot +++ b/hadoop-ozone/dist/src/main/smoketest/scmcli/pipeline.robot @@ -25,4 +25,9 @@ Resource ../commonlib.robot *** Test Cases *** Run list pipeline ${output} = Execute ozone scmcli pipeline list - Should contain ${output} Type:RATIS, Factor:ONE, State:OPEN \ No newline at end of file + Should contain ${output} Type: + Should contain ${output} Factor:ONE, State: + +Run create pipeline + ${output} = Execute ozone scmcli pipeline create + Should contain ${output} is created. Factor: ONE, Type: STAND_ALONE \ No newline at end of file From 71aa879fc1491092e900622a3c93b3f334ffa915 Mon Sep 17 00:00:00 2001 From: Li Cheng Date: Fri, 10 Jan 2020 10:50:38 +0800 Subject: [PATCH 15/19] HDDS-2772 Better management for pipeline creation limitation. (#410) --- .../main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java | 2 +- hadoop-hdds/common/src/main/resources/ozone-default.xml | 2 +- .../hadoop/hdds/scm/pipeline/PipelinePlacementPolicy.java | 2 +- .../org/apache/hadoop/ozone/om/TestOzoneManagerRestart.java | 3 +++ 4 files changed, 6 insertions(+), 3 deletions(-) diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java index 5f52e922acec..980a710171f3 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java @@ -297,7 +297,7 @@ public final class ScmConfigKeys { public static final String OZONE_DATANODE_MAX_PIPELINE_ENGAGEMENT = "ozone.scm.datanode.max.pipeline.engagement"; // Setting to zero by default means this limit doesn't take effect. - public static final int OZONE_DATANODE_MAX_PIPELINE_ENGAGEMENT_DEFAULT = 0; + public static final int OZONE_DATANODE_MAX_PIPELINE_ENGAGEMENT_DEFAULT = 2; // Upper limit for how many pipelines can be created. // Only for test purpose now. diff --git a/hadoop-hdds/common/src/main/resources/ozone-default.xml b/hadoop-hdds/common/src/main/resources/ozone-default.xml index 93ef0a50436a..31fe046c32fb 100644 --- a/hadoop-hdds/common/src/main/resources/ozone-default.xml +++ b/hadoop-hdds/common/src/main/resources/ozone-default.xml @@ -792,7 +792,7 @@ ozone.scm.datanode.max.pipeline.engagement - 0 + 2 OZONE, SCM, PIPELINE Max number of pipelines per datanode can be engaged in. diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelinePlacementPolicy.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelinePlacementPolicy.java index f4a13e17d1c3..8c6e5c79b55e 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelinePlacementPolicy.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelinePlacementPolicy.java @@ -147,7 +147,7 @@ List filterViableNodes( msg = "No healthy nodes found to allocate pipeline."; LOG.error(msg); throw new SCMException(msg, SCMException.ResultCodes - .FAILED_TO_FIND_HEALTHY_NODES); + .FAILED_TO_FIND_SUITABLE_NODE); } if (initialHealthyNodesCount < nodesRequired) { diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOzoneManagerRestart.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOzoneManagerRestart.java index 077886c2debf..ce27eed4c987 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOzoneManagerRestart.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOzoneManagerRestart.java @@ -35,6 +35,8 @@ import org.apache.hadoop.test.GenericTestUtils; import org.apache.commons.lang3.RandomStringUtils; + +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_PIPELINE_NUMBER_LIMIT; import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_ACL_ENABLED; import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_ADMINISTRATORS; import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_ADMINISTRATORS_WILDCARD; @@ -79,6 +81,7 @@ public void init() throws Exception { conf.setBoolean(OZONE_ACL_ENABLED, true); conf.setInt(OZONE_OPEN_KEY_EXPIRE_THRESHOLD_SECONDS, 2); conf.set(OZONE_ADMINISTRATORS, OZONE_ADMINISTRATORS_WILDCARD); + conf.setInt(OZONE_SCM_PIPELINE_NUMBER_LIMIT, 10); cluster = MiniOzoneCluster.newBuilder(conf) .setClusterId(clusterId) .setScmId(scmId) From 484ab00d1604dc4a6924a67ba3d7e20badeceaf9 Mon Sep 17 00:00:00 2001 From: Li Cheng Date: Tue, 28 Jan 2020 02:19:33 +0800 Subject: [PATCH 16/19] HDDS-2913 Update config names and CLI for multi-raft feature. (#462) --- .../apache/hadoop/hdds/scm/ScmConfigKeys.java | 18 ++++----- .../hadoop/hdds/scm/pipeline/Pipeline.java | 19 +++++++--- .../src/main/resources/ozone-default.xml | 9 +++-- .../scm/node/states/Node2PipelineMap.java | 5 +-- .../scm/pipeline/PipelinePlacementPolicy.java | 38 +++++++++---------- .../scm/pipeline/RatisPipelineProvider.java | 8 ++-- .../hdds/scm/pipeline/RatisPipelineUtils.java | 16 +++----- .../hdds/scm/pipeline/SCMPipelineManager.java | 30 +++++++++------ .../TestCloseContainerEventHandler.java | 2 +- .../hdds/scm/node/TestDeadNodeHandler.java | 4 +- .../TestPipelineDatanodesIntersection.java | 32 +++++++++------- .../pipeline/TestPipelinePlacementPolicy.java | 8 ++-- .../pipeline/TestRatisPipelineProvider.java | 2 +- .../scm/pipeline/TestSCMPipelineManager.java | 4 +- .../scm/cli/datanode/ListInfoSubcommand.java | 11 ++---- .../cli/pipeline/ListPipelinesSubcommand.java | 11 ++---- .../hadoop/fs/ozone/TestOzoneFsHAURLs.java | 2 +- .../TestRatisPipelineCreateAndDestroy.java | 4 +- .../TestSCMSafeModeWithPipelineRules.java | 4 +- .../hadoop/ozone/MiniOzoneClusterImpl.java | 6 +-- .../TestBlockOutputStreamWithFailures.java | 2 +- .../rpc/TestContainerReplicationEndToEnd.java | 4 +- .../rpc/TestFailureHandlingByClient.java | 2 +- .../TestMultiBlockWritesWithDnFailures.java | 2 +- .../ozone/client/rpc/TestWatchForCommit.java | 4 +- .../TestCloseContainerByPipeline.java | 4 +- .../ozone/om/TestOzoneManagerRestart.java | 4 +- .../hadoop/ozone/scm/node/TestQueryNode.java | 2 +- 28 files changed, 129 insertions(+), 128 deletions(-) diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java index 980a710171f3..eb2a9e53c99e 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java @@ -293,18 +293,18 @@ public final class ScmConfigKeys { "ozone.scm.pipeline.owner.container.count"; public static final int OZONE_SCM_PIPELINE_OWNER_CONTAINER_COUNT_DEFAULT = 3; // Pipeline placement policy: - // the max number of pipelines can a single datanode be engaged in. - public static final String OZONE_DATANODE_MAX_PIPELINE_ENGAGEMENT = - "ozone.scm.datanode.max.pipeline.engagement"; - // Setting to zero by default means this limit doesn't take effect. - public static final int OZONE_DATANODE_MAX_PIPELINE_ENGAGEMENT_DEFAULT = 2; + // Upper limit for how many pipelines a datanode can engage in. + public static final String OZONE_DATANODE_PIPELINE_LIMIT = + "ozone.datanode.pipeline.limit"; + public static final int OZONE_DATANODE_PIPELINE_LIMIT_DEFAULT = 2; - // Upper limit for how many pipelines can be created. + // Upper limit for how many pipelines can be created + // across the cluster nodes managed by SCM. // Only for test purpose now. - public static final String OZONE_SCM_PIPELINE_NUMBER_LIMIT = - "ozone.scm.pipeline.number.limit"; + public static final String OZONE_SCM_RATIS_PIPELINE_LIMIT = + "ozone.scm.ratis.pipeline.limit"; // Setting to zero by default means this limit doesn't take effect. - public static final int OZONE_SCM_PIPELINE_NUMBER_LIMIT_DEFAULT = 0; + public static final int OZONE_SCM_RATIS_PIPELINE_LIMIT_DEFAULT = 0; public static final String OZONE_SCM_KEY_VALUE_CONTAINER_DELETION_CHOOSING_POLICY = diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/pipeline/Pipeline.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/pipeline/Pipeline.java index 1dc23735a911..68494943b143 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/pipeline/Pipeline.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/pipeline/Pipeline.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hdds.scm.pipeline; import java.io.IOException; +import java.time.Instant; import java.util.ArrayList; import java.util.Iterator; import java.util.LinkedHashMap; @@ -57,7 +58,7 @@ public final class Pipeline { // Current reported Leader for the pipeline private UUID leaderId; // Timestamp for pipeline upon creation - private Long creationTimestamp; + private Instant creationTimestamp; // Only valid for Ratis THREE pipeline. No need persist. private int nodeIdsHash; @@ -74,7 +75,7 @@ private Pipeline(PipelineID id, ReplicationType type, this.factor = factor; this.state = state; this.nodeStatus = nodeStatus; - this.creationTimestamp = System.currentTimeMillis(); + this.creationTimestamp = Instant.now(); this.nodeIdsHash = 0; } @@ -119,7 +120,7 @@ public PipelineState getPipelineState() { * * @return Creation Timestamp */ - public Long getCreationTimestamp() { + public Instant getCreationTimestamp() { return creationTimestamp; } @@ -128,7 +129,7 @@ public Long getCreationTimestamp() { * * @param creationTimestamp */ - void setCreationTimestamp(Long creationTimestamp) { + void setCreationTimestamp(Instant creationTimestamp) { this.creationTimestamp = creationTimestamp; } @@ -253,7 +254,7 @@ public HddsProtos.Pipeline getProtobufMessage() .setFactor(factor) .setState(PipelineState.getProtobuf(state)) .setLeaderID(leaderId != null ? leaderId.toString() : "") - .setCreationTimeStamp(creationTimestamp) + .setCreationTimeStamp(creationTimestamp.toEpochMilli()) .addAllMembers(nodeStatus.keySet().stream() .map(DatanodeDetails::getProtoBufMessage) .collect(Collectors.toList())); @@ -289,6 +290,7 @@ public static Pipeline getFromProtobuf(HddsProtos.Pipeline pipeline) .setNodes(pipeline.getMembersList().stream() .map(DatanodeDetails::getFromProtoBuf).collect(Collectors.toList())) .setNodesInOrder(pipeline.getMemberOrdersList()) + .setCreateTimestamp(pipeline.getCreationTimeStamp()) .build(); } @@ -357,7 +359,7 @@ public static class Builder { private List nodeOrder = null; private List nodesInOrder = null; private UUID leaderId = null; - private Long creationTimestamp = null; + private Instant creationTimestamp = null; private int nodeIdsHash = 0; public Builder() {} @@ -410,6 +412,11 @@ public Builder setNodesInOrder(List orders) { return this; } + public Builder setCreateTimestamp(long createTimestamp) { + this.creationTimestamp = Instant.ofEpochMilli(createTimestamp); + return this; + } + public Builder setNodeIdsHash(int nodeIdsHash1) { this.nodeIdsHash = nodeIdsHash1; return this; diff --git a/hadoop-hdds/common/src/main/resources/ozone-default.xml b/hadoop-hdds/common/src/main/resources/ozone-default.xml index 31fe046c32fb..875f79f9f186 100644 --- a/hadoop-hdds/common/src/main/resources/ozone-default.xml +++ b/hadoop-hdds/common/src/main/resources/ozone-default.xml @@ -791,14 +791,14 @@ - ozone.scm.datanode.max.pipeline.engagement + ozone.datanode.pipeline.limit 2 OZONE, SCM, PIPELINE Max number of pipelines per datanode can be engaged in. - ozone.scm.pipeline.number.limit + ozone.scm.ratis.pipeline.limit 0 OZONE, SCM, PIPELINE Upper limit for how many pipelines can be OPEN in SCM. @@ -813,8 +813,9 @@ Timeout for every pipeline to stay in ALLOCATED stage. When pipeline is created, it should be at OPEN stage once pipeline report is successfully received by SCM. - If a pipeline stays at ALLOCATED for too long, it should be scrubbed so that new - pipeline can be created. This timeout is for how long pipeline can stay at ALLOCATED + If a pipeline stays at ALLOCATED longer than the specified period of time, + it should be scrubbed so that new pipeline can be created. + This timeout is for how long pipeline can stay at ALLOCATED stage until it gets scrubbed. diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/states/Node2PipelineMap.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/states/Node2PipelineMap.java index 18809ed4450a..6533cb807642 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/states/Node2PipelineMap.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/states/Node2PipelineMap.java @@ -57,8 +57,7 @@ public Set getPipelines(UUID datanode) { * @return Number of pipelines or 0. */ public int getPipelinesCount(UUID datanode) { - Set pipelines = getObjects(datanode); - return pipelines == null ? 0 : pipelines.size(); + return getObjects(datanode).size(); } /** @@ -80,7 +79,7 @@ public synchronized void removePipeline(Pipeline pipeline) { dn2ObjectMap.computeIfPresent(dnId, (k, v) -> { v.remove(pipeline.getId()); - return v.isEmpty() ? null : v; + return v; }); } } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelinePlacementPolicy.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelinePlacementPolicy.java index 8c6e5c79b55e..4261a87c4c0d 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelinePlacementPolicy.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelinePlacementPolicy.java @@ -72,8 +72,8 @@ public PipelinePlacementPolicy(final NodeManager nodeManager, this.conf = conf; this.stateManager = stateManager; this.heavyNodeCriteria = conf.getInt( - ScmConfigKeys.OZONE_DATANODE_MAX_PIPELINE_ENGAGEMENT, - ScmConfigKeys.OZONE_DATANODE_MAX_PIPELINE_ENGAGEMENT_DEFAULT); + ScmConfigKeys.OZONE_DATANODE_PIPELINE_LIMIT, + ScmConfigKeys.OZONE_DATANODE_PIPELINE_LIMIT_DEFAULT); } /** @@ -113,7 +113,7 @@ boolean meetCriteria(DatanodeDetails datanodeDetails, int nodesRequired) { } boolean meet = (nodeManager.getPipelinesCount(datanodeDetails) - pipelineNumDeductable) < heavyNodeCriteria; - if (!meet) { + if (!meet && LOG.isDebugEnabled()) { LOG.debug("Pipeline Placement: can't place more pipeline on heavy " + "datanode: " + datanodeDetails.getUuid().toString() + " Heaviness: " + nodeManager.getPipelinesCount(datanodeDetails) + @@ -143,17 +143,11 @@ List filterViableNodes( } int initialHealthyNodesCount = healthyNodes.size(); String msg; - if (initialHealthyNodesCount == 0) { - msg = "No healthy nodes found to allocate pipeline."; - LOG.error(msg); - throw new SCMException(msg, SCMException.ResultCodes - .FAILED_TO_FIND_SUITABLE_NODE); - } if (initialHealthyNodesCount < nodesRequired) { - LOG.warn("Not enough healthy nodes to allocate pipeline. %d " - + " datanodes required. Found %d", - nodesRequired, initialHealthyNodesCount); + LOG.warn("Not enough healthy nodes to allocate pipeline." + + nodesRequired + " datanodes required. Found: " + + initialHealthyNodesCount); msg = String.format("Pipeline creation failed due to no sufficient" + " healthy datanodes. Required %d. Found %d.", nodesRequired, initialHealthyNodesCount); @@ -168,15 +162,17 @@ List filterViableNodes( .collect(Collectors.toList()); if (healthyList.size() < nodesRequired) { - LOG.debug("Unable to find enough nodes that meet " + - "the criteria that cannot engage in more than %d pipelines." + - " Nodes required: %d Found: %d, healthy nodes count in " + - "NodeManager: %d.", - heavyNodeCriteria, nodesRequired, healthyList.size(), - initialHealthyNodesCount); - msg = String.format("Pipeline creation failed due to not enough" + - " healthy datanodes after filter. Required %d. Found %d", - nodesRequired, initialHealthyNodesCount); + if (LOG.isDebugEnabled()) { + LOG.debug("Unable to find enough nodes that meet the criteria that" + + " cannot engage in more than" + heavyNodeCriteria + + " pipelines. Nodes required: " + nodesRequired + " Found:" + + healthyList.size() + " healthy nodes count in NodeManager: " + + initialHealthyNodesCount); + } + msg = String.format("Pipeline creation failed because nodes are engaged" + + " in other pipelines and every node can only be engaged in" + + " max %d pipelines. Required %d. Found %d", + heavyNodeCriteria, nodesRequired, healthyList.size()); throw new SCMException(msg, SCMException.ResultCodes.FAILED_TO_FIND_SUITABLE_NODE); } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/RatisPipelineProvider.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/RatisPipelineProvider.java index 95859070f27d..4865074d7c70 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/RatisPipelineProvider.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/RatisPipelineProvider.java @@ -84,11 +84,11 @@ public class RatisPipelineProvider implements PipelineProvider { this.placementPolicy = new PipelinePlacementPolicy(nodeManager, stateManager, conf); this.pipelineNumberLimit = conf.getInt( - ScmConfigKeys.OZONE_SCM_PIPELINE_NUMBER_LIMIT, - ScmConfigKeys.OZONE_SCM_PIPELINE_NUMBER_LIMIT_DEFAULT); + ScmConfigKeys.OZONE_SCM_RATIS_PIPELINE_LIMIT, + ScmConfigKeys.OZONE_SCM_RATIS_PIPELINE_LIMIT_DEFAULT); this.maxPipelinePerDatanode = conf.getInt( - ScmConfigKeys.OZONE_DATANODE_MAX_PIPELINE_ENGAGEMENT, - ScmConfigKeys.OZONE_DATANODE_MAX_PIPELINE_ENGAGEMENT_DEFAULT); + ScmConfigKeys.OZONE_DATANODE_PIPELINE_LIMIT, + ScmConfigKeys.OZONE_DATANODE_PIPELINE_LIMIT_DEFAULT); } private List pickNodesNeverUsed(ReplicationFactor factor) diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/RatisPipelineUtils.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/RatisPipelineUtils.java index f9f2011b5a81..7fe1cc126f23 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/RatisPipelineUtils.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/RatisPipelineUtils.java @@ -106,27 +106,21 @@ static int encodeNodeIdsOfFactorThreePipeline(List nodes) { } /** - * Return first existed pipeline which share the same set of datanodes + * Return the list of pipelines who share the same set of datanodes * with the input pipeline. * @param stateManager PipelineStateManager * @param pipeline input pipeline * @return first matched pipeline */ - static Pipeline checkPipelineContainSameDatanodes( + static List checkPipelineContainSameDatanodes( PipelineStateManager stateManager, Pipeline pipeline) { - List matchedPipelines = stateManager.getPipelines( + return stateManager.getPipelines( HddsProtos.ReplicationType.RATIS, HddsProtos.ReplicationFactor.THREE) .stream().filter(p -> !p.getId().equals(pipeline.getId()) && (// For all OPEN or ALLOCATED pipelines - p.getPipelineState() == Pipeline.PipelineState.OPEN || - p.getPipelineState() == Pipeline.PipelineState.ALLOCATED) && - p.getNodeIdsHash() == pipeline.getNodeIdsHash()) + p.getPipelineState() != Pipeline.PipelineState.CLOSED && + p.getNodeIdsHash() == pipeline.getNodeIdsHash())) .collect(Collectors.toList()); - if (matchedPipelines.size() == 0) { - return null; - } else { - return matchedPipelines.stream().findFirst().get(); - } } } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/SCMPipelineManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/SCMPipelineManager.java index f924b4163b6f..88c4329275e0 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/SCMPipelineManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/SCMPipelineManager.java @@ -45,6 +45,8 @@ import javax.management.ObjectName; import java.io.File; import java.io.IOException; +import java.time.Duration; +import java.time.Instant; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -172,17 +174,20 @@ public synchronized Pipeline createPipeline(ReplicationType type, metrics.incNumPipelineCreated(); metrics.createPerPipelineMetrics(pipeline); } - Pipeline overlapPipeline = RatisPipelineUtils + List overlapPipelines = RatisPipelineUtils .checkPipelineContainSameDatanodes(stateManager, pipeline); - if (overlapPipeline != null) { + if (!overlapPipelines.isEmpty()) { + // Count 1 overlap at a time. metrics.incNumPipelineContainSameDatanodes(); //TODO remove until pipeline allocation is proved equally distributed. - LOG.info("Pipeline: " + pipeline.getId().toString() + - " contains same datanodes as previous pipeline: " + - overlapPipeline.getId().toString() + " nodeIds: " + - pipeline.getNodes().get(0).getUuid().toString() + - ", " + pipeline.getNodes().get(1).getUuid().toString() + - ", " + pipeline.getNodes().get(2).getUuid().toString()); + for (Pipeline overlapPipeline : overlapPipelines) { + LOG.info("Pipeline: " + pipeline.getId().toString() + + " contains same datanodes as previous pipelines: " + + overlapPipeline.getId().toString() + " nodeIds: " + + pipeline.getNodes().get(0).getUuid().toString() + + ", " + pipeline.getNodes().get(1).getUuid().toString() + + ", " + pipeline.getNodes().get(2).getUuid().toString()); + } } return pipeline; } catch (IOException ex) { @@ -381,20 +386,21 @@ public void scrubPipeline(ReplicationType type, ReplicationFactor factor) // Only srub pipeline for RATIS THREE pipeline return; } - Long currentTime = System.currentTimeMillis(); + Instant currentTime = Instant.now(); Long pipelineScrubTimeoutInMills = conf.getTimeDuration( ScmConfigKeys.OZONE_SCM_PIPELINE_ALLOCATED_TIMEOUT, ScmConfigKeys.OZONE_SCM_PIPELINE_ALLOCATED_TIMEOUT_DEFAULT, TimeUnit.MILLISECONDS); List needToSrubPipelines = stateManager.getPipelines(type, factor, Pipeline.PipelineState.ALLOCATED).stream() - .filter(p -> (currentTime - p.getCreationTimestamp() - >= pipelineScrubTimeoutInMills)) + .filter(p -> currentTime.toEpochMilli() - p.getCreationTimestamp() + .toEpochMilli() >= pipelineScrubTimeoutInMills) .collect(Collectors.toList()); for (Pipeline p : needToSrubPipelines) { LOG.info("srubbing pipeline: id: " + p.getId().toString() + " since it stays at ALLOCATED stage for " + - (currentTime - p.getCreationTimestamp())/60000 + " mins."); + Duration.between(currentTime, p.getCreationTimestamp()).toMinutes() + + " mins."); finalizeAndDestroyPipeline(p, false); } } diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestCloseContainerEventHandler.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestCloseContainerEventHandler.java index f35bfe2850eb..10c38a8fadcf 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestCloseContainerEventHandler.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestCloseContainerEventHandler.java @@ -68,7 +68,7 @@ public static void setUp() throws Exception { .getTestDir(TestCloseContainerEventHandler.class.getSimpleName()); configuration .set(HddsConfigKeys.OZONE_METADATA_DIRS, testDir.getAbsolutePath()); - configuration.setInt(ScmConfigKeys.OZONE_SCM_PIPELINE_NUMBER_LIMIT, 16); + configuration.setInt(ScmConfigKeys.OZONE_SCM_RATIS_PIPELINE_LIMIT, 16); nodeManager = new MockNodeManager(true, 10); eventQueue = new EventQueue(); pipelineManager = diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestDeadNodeHandler.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestDeadNodeHandler.java index 977038ebac71..4cdc46fa2222 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestDeadNodeHandler.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestDeadNodeHandler.java @@ -66,7 +66,7 @@ import org.junit.Test; import org.mockito.Mockito; -import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_DATANODE_MAX_PIPELINE_ENGAGEMENT; +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_DATANODE_PIPELINE_LIMIT; /** * Test DeadNodeHandler. @@ -89,7 +89,7 @@ public void setup() throws IOException, AuthenticationException { storageDir = GenericTestUtils.getTempPath( TestDeadNodeHandler.class.getSimpleName() + UUID.randomUUID()); conf.set(HddsConfigKeys.OZONE_METADATA_DIRS, storageDir); - conf.setInt(OZONE_DATANODE_MAX_PIPELINE_ENGAGEMENT, 0); + conf.setInt(OZONE_DATANODE_PIPELINE_LIMIT, 0); eventQueue = new EventQueue(); scm = HddsTestUtils.getScm(conf); nodeManager = (SCMNodeManager) scm.getScmNodeManager(); diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestPipelineDatanodesIntersection.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestPipelineDatanodesIntersection.java index 87e8cf42cc05..41eea3d9dc67 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestPipelineDatanodesIntersection.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestPipelineDatanodesIntersection.java @@ -34,8 +34,9 @@ import java.io.IOException; import java.util.Arrays; import java.util.Collection; +import java.util.List; -import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_DATANODE_MAX_PIPELINE_ENGAGEMENT; +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_DATANODE_PIPELINE_LIMIT; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_PIPELINE_AUTO_CREATE_FACTOR_ONE; /** @@ -77,7 +78,7 @@ public static Collection inputParams() { @Test public void testPipelineDatanodesIntersection() { NodeManager nodeManager= new MockNodeManager(true, nodeCount); - conf.setInt(OZONE_DATANODE_MAX_PIPELINE_ENGAGEMENT, nodeHeaviness); + conf.setInt(OZONE_DATANODE_PIPELINE_LIMIT, nodeHeaviness); conf.setBoolean(OZONE_SCM_PIPELINE_AUTO_CREATE_FACTOR_ONE, false); PipelineStateManager stateManager = new PipelineStateManager(); PipelineProvider provider = new MockRatisPipelineProvider(nodeManager, @@ -92,20 +93,23 @@ public void testPipelineDatanodesIntersection() { Pipeline pipeline = provider.create(HddsProtos.ReplicationFactor.THREE); stateManager.addPipeline(pipeline); nodeManager.addPipeline(pipeline); - Pipeline overlapPipeline = RatisPipelineUtils + List overlapPipelines = RatisPipelineUtils .checkPipelineContainSameDatanodes(stateManager, pipeline); - if (overlapPipeline != null){ + + if (overlapPipelines.isEmpty()){ intersectionCount++; - LOG.info("This pipeline: " + pipeline.getId().toString() + - " overlaps with previous pipeline: " + overlapPipeline.getId() + - ". They share same set of datanodes as: " + - pipeline.getNodesInOrder().get(0).getUuid() + "/" + - pipeline.getNodesInOrder().get(1).getUuid() + "/" + - pipeline.getNodesInOrder().get(2).getUuid() + " and " + - overlapPipeline.getNodesInOrder().get(0).getUuid() + "/" + - overlapPipeline.getNodesInOrder().get(1).getUuid() + "/" + - overlapPipeline.getNodesInOrder().get(2).getUuid() + - " is the same."); + for (Pipeline overlapPipeline : overlapPipelines) { + LOG.info("This pipeline: " + pipeline.getId().toString() + + " overlaps with previous pipeline: " + overlapPipeline.getId() + + ". They share same set of datanodes as: " + + pipeline.getNodesInOrder().get(0).getUuid() + "/" + + pipeline.getNodesInOrder().get(1).getUuid() + "/" + + pipeline.getNodesInOrder().get(2).getUuid() + " and " + + overlapPipeline.getNodesInOrder().get(0).getUuid() + "/" + + overlapPipeline.getNodesInOrder().get(1).getUuid() + "/" + + overlapPipeline.getNodesInOrder().get(2).getUuid() + + " is the same."); + } } createdPipelineCount++; } catch(SCMException e) { diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestPipelinePlacementPolicy.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestPipelinePlacementPolicy.java index 2fe67f908597..2fff7d901cc7 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestPipelinePlacementPolicy.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestPipelinePlacementPolicy.java @@ -34,7 +34,7 @@ import java.util.*; import java.util.stream.Collectors; -import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_DATANODE_MAX_PIPELINE_ENGAGEMENT; +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_DATANODE_PIPELINE_LIMIT; /** * Test for PipelinePlacementPolicy. @@ -50,7 +50,7 @@ public void init() throws Exception { nodeManager = new MockNodeManager(true, PIPELINE_PLACEMENT_MAX_NODES_COUNT); conf = new OzoneConfiguration(); - conf.setInt(OZONE_DATANODE_MAX_PIPELINE_ENGAGEMENT, 5); + conf.setInt(OZONE_DATANODE_PIPELINE_LIMIT, 5); placementPolicy = new PipelinePlacementPolicy( nodeManager, new PipelineStateManager(), conf); } @@ -185,8 +185,8 @@ private void insertHeavyNodesIntoNodeManager( int considerHeavyCount = conf.getInt( - ScmConfigKeys.OZONE_DATANODE_MAX_PIPELINE_ENGAGEMENT, - ScmConfigKeys.OZONE_DATANODE_MAX_PIPELINE_ENGAGEMENT_DEFAULT) + 1; + ScmConfigKeys.OZONE_DATANODE_PIPELINE_LIMIT, + ScmConfigKeys.OZONE_DATANODE_PIPELINE_LIMIT_DEFAULT) + 1; Node2PipelineMap mockMap = new Node2PipelineMap(); for (DatanodeDetails node : nodes) { diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestRatisPipelineProvider.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestRatisPipelineProvider.java index 56233594e7c0..a17fc08466df 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestRatisPipelineProvider.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestRatisPipelineProvider.java @@ -61,7 +61,7 @@ public class TestRatisPipelineProvider { public void init() throws Exception { nodeManager = new MockNodeManager(true, 10); conf = new OzoneConfiguration(); - conf.setInt(ScmConfigKeys.OZONE_DATANODE_MAX_PIPELINE_ENGAGEMENT, + conf.setInt(ScmConfigKeys.OZONE_DATANODE_PIPELINE_LIMIT, maxPipelinePerNode); stateManager = new PipelineStateManager(); provider = new MockRatisPipelineProvider(nodeManager, diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestSCMPipelineManager.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestSCMPipelineManager.java index e6bf7a09d018..deba91b746a5 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestSCMPipelineManager.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestSCMPipelineManager.java @@ -19,7 +19,7 @@ package org.apache.hadoop.hdds.scm.pipeline; import static org.apache.commons.collections.CollectionUtils.intersection; -import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_DATANODE_MAX_PIPELINE_ENGAGEMENT; +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_DATANODE_PIPELINE_LIMIT; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_PIPELINE_ALLOCATED_TIMEOUT; import static org.apache.hadoop.test.MetricsAsserts.getLongCounter; import static org.apache.hadoop.test.MetricsAsserts.getMetrics; @@ -65,7 +65,7 @@ public class TestSCMPipelineManager { @Before public void setUp() throws Exception { conf = new OzoneConfiguration(); - conf.setInt(OZONE_DATANODE_MAX_PIPELINE_ENGAGEMENT, 1); + conf.setInt(OZONE_DATANODE_PIPELINE_LIMIT, 1); testDir = GenericTestUtils .getTestDir(TestSCMPipelineManager.class.getSimpleName()); conf.set(HddsConfigKeys.OZONE_METADATA_DIRS, testDir.getAbsolutePath()); diff --git a/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/ListInfoSubcommand.java b/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/ListInfoSubcommand.java index dcd8402fef47..badfadc22eb9 100644 --- a/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/ListInfoSubcommand.java +++ b/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/ListInfoSubcommand.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.hdds.scm.cli.datanode; +import com.google.common.base.Strings; import org.apache.hadoop.hdds.cli.HddsVersionProvider; import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; @@ -62,15 +63,15 @@ public class ListInfoSubcommand implements Callable { public Void call() throws Exception { try (ScmClient scmClient = parent.getParent().createScmClient()) { pipelines = scmClient.listPipelines(); - if (isNullOrEmpty(ipaddress) && isNullOrEmpty(uuid)) { + if (Strings.isNullOrEmpty(ipaddress) && Strings.isNullOrEmpty(uuid)) { getAllNodes(scmClient).stream().forEach(p -> printDatanodeInfo(p)); } else { Stream allNodes = getAllNodes(scmClient).stream(); - if (!isNullOrEmpty(ipaddress)) { + if (!Strings.isNullOrEmpty(ipaddress)) { allNodes = allNodes.filter(p -> p.getIpAddress() .compareToIgnoreCase(ipaddress) == 0); } - if (!isNullOrEmpty(uuid)) { + if (!Strings.isNullOrEmpty(uuid)) { allNodes = allNodes.filter(p -> p.getUuid().toString().equals(uuid)); } allNodes.forEach(p -> printDatanodeInfo(p)); @@ -117,8 +118,4 @@ private void printDatanodeInfo(DatanodeDetails datanode) { + datanode.getHostName() + "/" + relatedPipelineNum + " pipelines) \n" + "Related pipelines: \n" + pipelineListInfo); } - - protected static boolean isNullOrEmpty(String str) { - return ((str == null) || str.trim().isEmpty()); - } } \ No newline at end of file diff --git a/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/pipeline/ListPipelinesSubcommand.java b/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/pipeline/ListPipelinesSubcommand.java index 8b3b1b3b8cbd..f8ac1d498759 100644 --- a/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/pipeline/ListPipelinesSubcommand.java +++ b/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/pipeline/ListPipelinesSubcommand.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hdds.scm.cli.pipeline; +import com.google.common.base.Strings; import org.apache.hadoop.hdds.cli.HddsVersionProvider; import org.apache.hadoop.hdds.scm.client.ScmClient; import picocli.CommandLine; @@ -53,13 +54,13 @@ public class ListPipelinesSubcommand implements Callable { @Override public Void call() throws Exception { try (ScmClient scmClient = parent.getParent().createScmClient()) { - if (isNullOrEmpty(factor) && isNullOrEmpty(state)) { + if (Strings.isNullOrEmpty(factor) && Strings.isNullOrEmpty(state)) { scmClient.listPipelines().forEach(System.out::println); } else { scmClient.listPipelines().stream() - .filter(p -> ((isNullOrEmpty(factor) || + .filter(p -> ((Strings.isNullOrEmpty(factor) || (p.getFactor().toString().compareToIgnoreCase(factor) == 0)) - && (isNullOrEmpty(state) || + && (Strings.isNullOrEmpty(state) || (p.getPipelineState().toString().compareToIgnoreCase(state) == 0)))) .forEach(System.out::println); @@ -67,8 +68,4 @@ public Void call() throws Exception { return null; } } - - protected static boolean isNullOrEmpty(String str) { - return ((str == null) || str.trim().isEmpty()); - } } diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestOzoneFsHAURLs.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestOzoneFsHAURLs.java index acc40317b0c2..7a6143cd48d9 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestOzoneFsHAURLs.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestOzoneFsHAURLs.java @@ -97,7 +97,7 @@ public void init() throws Exception { conf.setTimeDuration( OMConfigKeys.OZONE_OM_LEADER_ELECTION_MINIMUM_TIMEOUT_DURATION_KEY, LEADER_ELECTION_TIMEOUT, TimeUnit.MILLISECONDS); - conf.setInt(ScmConfigKeys.OZONE_DATANODE_MAX_PIPELINE_ENGAGEMENT, 3); + conf.setInt(ScmConfigKeys.OZONE_DATANODE_PIPELINE_LIMIT, 3); OMStorage omStore = new OMStorage(conf); omStore.setClusterId(clusterId); diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestRatisPipelineCreateAndDestroy.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestRatisPipelineCreateAndDestroy.java index fc90ee9a9e01..bd677db65f65 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestRatisPipelineCreateAndDestroy.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestRatisPipelineCreateAndDestroy.java @@ -37,7 +37,7 @@ import java.util.concurrent.TimeoutException; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_PIPELINE_AUTO_CREATE_FACTOR_ONE; -import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_DATANODE_MAX_PIPELINE_ENGAGEMENT; +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_DATANODE_PIPELINE_LIMIT; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_STALENODE_INTERVAL; /** @@ -53,7 +53,7 @@ public class TestRatisPipelineCreateAndDestroy { public void init(int numDatanodes) throws Exception { conf.set(HddsConfigKeys.OZONE_METADATA_DIRS, GenericTestUtils.getRandomizedTempPath()); - conf.setInt(OZONE_DATANODE_MAX_PIPELINE_ENGAGEMENT, 2); + conf.setInt(OZONE_DATANODE_PIPELINE_LIMIT, 2); cluster = MiniOzoneCluster.newBuilder(conf) .setNumDatanodes(numDatanodes) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestSCMSafeModeWithPipelineRules.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestSCMSafeModeWithPipelineRules.java index 4b35317f5259..39b67ac2aebc 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestSCMSafeModeWithPipelineRules.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestSCMSafeModeWithPipelineRules.java @@ -39,7 +39,7 @@ import java.util.List; import java.util.concurrent.TimeoutException; -import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_DATANODE_MAX_PIPELINE_ENGAGEMENT; +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_DATANODE_PIPELINE_LIMIT; import static org.junit.Assert.fail; /** @@ -65,7 +65,7 @@ public void setup(int numDatanodes) throws Exception { true); conf.set(HddsConfigKeys.HDDS_SCM_WAIT_TIME_AFTER_SAFE_MODE_EXIT, "10s"); conf.set(ScmConfigKeys.OZONE_SCM_PIPELINE_CREATION_INTERVAL, "10s"); - conf.setInt(OZONE_DATANODE_MAX_PIPELINE_ENGAGEMENT, 50); + conf.setInt(OZONE_DATANODE_PIPELINE_LIMIT, 50); clusterBuilder = MiniOzoneCluster.newBuilder(conf) .setNumDatanodes(numDatanodes) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java index 9bfa8bd7b442..7758c3cd738d 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java @@ -576,9 +576,9 @@ protected void initializeConfiguration() throws IOException { conf.setStorageSize(OzoneConfigKeys.OZONE_SCM_BLOCK_SIZE, blockSize.get(), streamBufferSizeUnit.get()); // MiniOzoneCluster should have global pipeline upper limit. - conf.setInt(ScmConfigKeys.OZONE_SCM_PIPELINE_NUMBER_LIMIT, - pipelineNumLimit == DEFAULT_PIPELIME_LIMIT ? - 2 * numOfDatanodes : pipelineNumLimit); + conf.setInt(ScmConfigKeys.OZONE_SCM_RATIS_PIPELINE_LIMIT, + pipelineNumLimit >= DEFAULT_PIPELIME_LIMIT ? + pipelineNumLimit : DEFAULT_PIPELIME_LIMIT); configureTrace(); } diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestBlockOutputStreamWithFailures.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestBlockOutputStreamWithFailures.java index 07e306ee4f49..1b6b7dc335f9 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestBlockOutputStreamWithFailures.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestBlockOutputStreamWithFailures.java @@ -92,7 +92,7 @@ public void init() throws Exception { conf.setQuietMode(false); conf.setStorageSize(OzoneConfigKeys.OZONE_SCM_BLOCK_SIZE, 4, StorageUnit.MB); - conf.setInt(ScmConfigKeys.OZONE_DATANODE_MAX_PIPELINE_ENGAGEMENT, 3); + conf.setInt(ScmConfigKeys.OZONE_DATANODE_PIPELINE_LIMIT, 3); cluster = MiniOzoneCluster.newBuilder(conf).setNumDatanodes(7) .setTotalPipelineNumLimit(10).setBlockSize(blockSize) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestContainerReplicationEndToEnd.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestContainerReplicationEndToEnd.java index 439287e726d8..6917ab27a7bb 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestContainerReplicationEndToEnd.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestContainerReplicationEndToEnd.java @@ -57,7 +57,7 @@ import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_CONTAINER_REPORT_INTERVAL; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_PIPELINE_DESTROY_TIMEOUT; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_STALENODE_INTERVAL; -import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_DATANODE_MAX_PIPELINE_ENGAGEMENT; +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_DATANODE_PIPELINE_LIMIT; /** * Tests delete key operation with a slow follower in the datanode @@ -108,7 +108,7 @@ public static void init() throws Exception { 1000, TimeUnit.SECONDS); conf.setLong("hdds.scm.replication.thread.interval", containerReportInterval); - conf.setInt(OZONE_DATANODE_MAX_PIPELINE_ENGAGEMENT, 2); + conf.setInt(OZONE_DATANODE_PIPELINE_LIMIT, 2); conf.setQuietMode(false); cluster = diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestFailureHandlingByClient.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestFailureHandlingByClient.java index d4e5d7d7f052..a84e16eea8f3 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestFailureHandlingByClient.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestFailureHandlingByClient.java @@ -99,7 +99,7 @@ private void init() throws Exception { 1, TimeUnit.SECONDS); conf.setBoolean( OzoneConfigKeys.OZONE_NETWORK_TOPOLOGY_AWARE_READ_KEY, true); - conf.setInt(ScmConfigKeys.OZONE_DATANODE_MAX_PIPELINE_ENGAGEMENT, 2); + conf.setInt(ScmConfigKeys.OZONE_DATANODE_PIPELINE_LIMIT, 2); conf.setQuietMode(false); conf.setClass(NET_TOPOLOGY_NODE_SWITCH_MAPPING_IMPL_KEY, diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestMultiBlockWritesWithDnFailures.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestMultiBlockWritesWithDnFailures.java index 9b6292329978..7d31499d1db9 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestMultiBlockWritesWithDnFailures.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestMultiBlockWritesWithDnFailures.java @@ -87,7 +87,7 @@ private void startCluster(int datanodes) throws Exception { conf.setTimeDuration( OzoneConfigKeys.DFS_RATIS_LEADER_ELECTION_MINIMUM_TIMEOUT_DURATION_KEY, 1, TimeUnit.SECONDS); - conf.setInt(OZONE_DATANODE_MAX_PIPELINE_ENGAGEMENT, 2); + conf.setInt(OZONE_DATANODE_PIPELINE_LIMIT, 2); conf.setQuietMode(false); cluster = MiniOzoneCluster.newBuilder(conf) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestWatchForCommit.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestWatchForCommit.java index b84e61cf8b63..95dcedccc313 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestWatchForCommit.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestWatchForCommit.java @@ -58,7 +58,7 @@ import static java.nio.charset.StandardCharsets.UTF_8; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.HDDS_SCM_WATCHER_TIMEOUT; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_STALENODE_INTERVAL; -import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_DATANODE_MAX_PIPELINE_ENGAGEMENT; +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_DATANODE_PIPELINE_LIMIT; /** * This class verifies the watchForCommit Handling by xceiverClient. @@ -96,7 +96,7 @@ private void startCluster(OzoneConfiguration conf) throws Exception { conf.setTimeDuration( OzoneConfigKeys.DFS_RATIS_CLIENT_REQUEST_RETRY_INTERVAL_KEY, 1, TimeUnit.SECONDS); - conf.setInt(OZONE_DATANODE_MAX_PIPELINE_ENGAGEMENT, 5); + conf.setInt(OZONE_DATANODE_PIPELINE_LIMIT, 5); conf.setQuietMode(false); cluster = MiniOzoneCluster.newBuilder(conf) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestCloseContainerByPipeline.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestCloseContainerByPipeline.java index 8ee47a9f1425..869f0910265c 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestCloseContainerByPipeline.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestCloseContainerByPipeline.java @@ -53,7 +53,7 @@ import java.util.List; import java.util.concurrent.TimeoutException; -import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_DATANODE_MAX_PIPELINE_ENGAGEMENT; +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_DATANODE_PIPELINE_LIMIT; /** * Test container closing. @@ -77,7 +77,7 @@ public class TestCloseContainerByPipeline { public static void init() throws Exception { conf = new OzoneConfiguration(); conf.set(ScmConfigKeys.OZONE_SCM_PIPELINE_OWNER_CONTAINER_COUNT, "1"); - conf.setInt(OZONE_DATANODE_MAX_PIPELINE_ENGAGEMENT, 2); + conf.setInt(OZONE_DATANODE_PIPELINE_LIMIT, 2); cluster = MiniOzoneCluster.newBuilder(conf) .setNumDatanodes(10) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOzoneManagerRestart.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOzoneManagerRestart.java index ce27eed4c987..6058fad61d2b 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOzoneManagerRestart.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOzoneManagerRestart.java @@ -36,7 +36,7 @@ import org.apache.commons.lang3.RandomStringUtils; -import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_PIPELINE_NUMBER_LIMIT; +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_RATIS_PIPELINE_LIMIT; import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_ACL_ENABLED; import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_ADMINISTRATORS; import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_ADMINISTRATORS_WILDCARD; @@ -81,7 +81,7 @@ public void init() throws Exception { conf.setBoolean(OZONE_ACL_ENABLED, true); conf.setInt(OZONE_OPEN_KEY_EXPIRE_THRESHOLD_SECONDS, 2); conf.set(OZONE_ADMINISTRATORS, OZONE_ADMINISTRATORS_WILDCARD); - conf.setInt(OZONE_SCM_PIPELINE_NUMBER_LIMIT, 10); + conf.setInt(OZONE_SCM_RATIS_PIPELINE_LIMIT, 10); cluster = MiniOzoneCluster.newBuilder(conf) .setClusterId(clusterId) .setScmId(scmId) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/node/TestQueryNode.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/node/TestQueryNode.java index 1ca3110e826b..14660d67c7b7 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/node/TestQueryNode.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/node/TestQueryNode.java @@ -79,7 +79,7 @@ public void setUp() throws Exception { conf.setTimeDuration(HDDS_NODE_REPORT_INTERVAL, 1, SECONDS); conf.setTimeDuration(OZONE_SCM_STALENODE_INTERVAL, 3, SECONDS); conf.setTimeDuration(OZONE_SCM_DEADNODE_INTERVAL, 6, SECONDS); - conf.setInt(ScmConfigKeys.OZONE_DATANODE_MAX_PIPELINE_ENGAGEMENT, 3); + conf.setInt(ScmConfigKeys.OZONE_DATANODE_PIPELINE_LIMIT, 3); cluster = MiniOzoneCluster.newBuilder(conf) .setNumDatanodes(numOfDatanodes) From 8211fcd04eb1332044fc32c59bdcb93c163b6dba Mon Sep 17 00:00:00 2001 From: Xiaoyu Yao Date: Mon, 27 Jan 2020 12:45:58 -0800 Subject: [PATCH 17/19] HDDS-2924. Fix Pipeline#nodeIdsHash collision issue. (#478) --- .../hadoop/hdds/scm/pipeline/Pipeline.java | 38 +++++++++--------- .../scm/pipeline/PipelinePlacementPolicy.java | 40 +++++++++---------- .../scm/pipeline/PipelineStateManager.java | 7 ---- .../scm/pipeline/RatisPipelineProvider.java | 8 ---- .../hdds/scm/pipeline/RatisPipelineUtils.java | 16 ++------ .../pipeline/MockRatisPipelineProvider.java | 4 -- .../pipeline/TestPipelinePlacementPolicy.java | 7 ++-- .../pipeline/TestRatisPipelineProvider.java | 36 ++--------------- .../scm/pipeline/TestSCMPipelineManager.java | 17 ++++---- .../scm/cli/datanode/ListInfoSubcommand.java | 4 +- .../ozone-topology/docker-compose.yaml | 28 +++++++++++++ 11 files changed, 84 insertions(+), 121 deletions(-) diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/pipeline/Pipeline.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/pipeline/Pipeline.java index 68494943b143..5a28e4275054 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/pipeline/Pipeline.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/pipeline/Pipeline.java @@ -21,11 +21,13 @@ import java.io.IOException; import java.time.Instant; import java.util.ArrayList; +import java.util.Collections; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Optional; +import java.util.Set; import java.util.UUID; import java.util.stream.Collectors; @@ -59,8 +61,6 @@ public final class Pipeline { private UUID leaderId; // Timestamp for pipeline upon creation private Instant creationTimestamp; - // Only valid for Ratis THREE pipeline. No need persist. - private int nodeIdsHash; /** * The immutable properties of pipeline object is used in @@ -76,7 +76,6 @@ private Pipeline(PipelineID id, ReplicationType type, this.state = state; this.nodeStatus = nodeStatus; this.creationTimestamp = Instant.now(); - this.nodeIdsHash = 0; } /** @@ -133,14 +132,6 @@ void setCreationTimestamp(Instant creationTimestamp) { this.creationTimestamp = creationTimestamp; } - public int getNodeIdsHash() { - return nodeIdsHash; - } - - void setNodeIdsHash(int nodeIdsHash) { - this.nodeIdsHash = nodeIdsHash; - } - /** * Return the pipeline leader's UUID. * @@ -166,6 +157,23 @@ public List getNodes() { return new ArrayList<>(nodeStatus.keySet()); } + /** + * Return an immutable set of nodes which form this pipeline. + * @return Set of DatanodeDetails + */ + public Set getNodeSet() { + return Collections.unmodifiableSet(nodeStatus.keySet()); + } + + /** + * Check if the input pipeline share the same set of datanodes. + * @param pipeline + * @return true if the input pipeline shares the same set of datanodes. + */ + public boolean sameDatanodes(Pipeline pipeline) { + return getNodeSet().equals(pipeline.getNodeSet()); + } + /** * Returns the leader if found else defaults to closest node. * @@ -360,7 +368,6 @@ public static class Builder { private List nodesInOrder = null; private UUID leaderId = null; private Instant creationTimestamp = null; - private int nodeIdsHash = 0; public Builder() {} @@ -373,7 +380,6 @@ public Builder(Pipeline pipeline) { this.nodesInOrder = pipeline.nodesInOrder.get(); this.leaderId = pipeline.getLeaderId(); this.creationTimestamp = pipeline.getCreationTimestamp(); - this.nodeIdsHash = 0; } public Builder setId(PipelineID id1) { @@ -417,11 +423,6 @@ public Builder setCreateTimestamp(long createTimestamp) { return this; } - public Builder setNodeIdsHash(int nodeIdsHash1) { - this.nodeIdsHash = nodeIdsHash1; - return this; - } - public Pipeline build() { Preconditions.checkNotNull(id); Preconditions.checkNotNull(type); @@ -430,7 +431,6 @@ public Pipeline build() { Preconditions.checkNotNull(nodeStatus); Pipeline pipeline = new Pipeline(id, type, factor, state, nodeStatus); pipeline.setLeaderId(leaderId); - pipeline.setNodeIdsHash(nodeIdsHash); // overwrite with original creationTimestamp if (creationTimestamp != null) { pipeline.setCreationTimestamp(creationTimestamp); diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelinePlacementPolicy.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelinePlacementPolicy.java index 4261a87c4c0d..9d78063a4dcd 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelinePlacementPolicy.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelinePlacementPolicy.java @@ -145,12 +145,10 @@ List filterViableNodes( String msg; if (initialHealthyNodesCount < nodesRequired) { - LOG.warn("Not enough healthy nodes to allocate pipeline." + - nodesRequired + " datanodes required. Found: " + - initialHealthyNodesCount); msg = String.format("Pipeline creation failed due to no sufficient" + " healthy datanodes. Required %d. Found %d.", nodesRequired, initialHealthyNodesCount); + LOG.warn(msg); throw new SCMException(msg, SCMException.ResultCodes.FAILED_TO_FIND_SUITABLE_NODE); } @@ -229,42 +227,49 @@ public List getResultSet( // First choose an anchor nodes randomly DatanodeDetails anchor = chooseNode(healthyNodes); if (anchor == null) { - LOG.warn("Unable to find healthy nodes." + + LOG.warn("Unable to find healthy node for anchor(first) node." + " Required nodes: {}, Found nodes: {}", nodesRequired, results.size()); throw new SCMException("Unable to find required number of nodes.", SCMException.ResultCodes.FAILED_TO_FIND_SUITABLE_NODE); } + if (LOG.isDebugEnabled()) { + LOG.debug("First node chosen: {}", anchor); + } results.add(anchor); exclude.add(anchor); - nodesRequired--; // Choose the second node on different racks from anchor. DatanodeDetails nodeOnDifferentRack = chooseNodeBasedOnRackAwareness( healthyNodes, exclude, nodeManager.getClusterNetworkTopologyMap(), anchor); if (nodeOnDifferentRack == null) { - LOG.warn("Pipeline Placement: Unable to find nodes on different racks " + - " that meet the criteria. Required nodes: {}, Found nodes: {}", - nodesRequired, results.size()); + LOG.warn("Pipeline Placement: Unable to find 2nd node on different " + + "racks that meets the criteria. Required nodes: {}, Found nodes:" + + " {}", nodesRequired, results.size()); throw new SCMException("Unable to find required number of nodes.", SCMException.ResultCodes.FAILED_TO_FIND_SUITABLE_NODE); } + if (LOG.isDebugEnabled()) { + LOG.debug("Second node chosen: {}", nodeOnDifferentRack); + } results.add(nodeOnDifferentRack); exclude.add(nodeOnDifferentRack); - nodesRequired--; // Then choose nodes close to anchor based on network topology - for (int x = 0; x < nodesRequired; x++) { + int nodesToFind = nodesRequired - results.size(); + for (int x = 0; x < nodesToFind; x++) { // invoke the choose function defined in the derived classes. DatanodeDetails pick = chooseNodeFromNetworkTopology( nodeManager.getClusterNetworkTopologyMap(), anchor, exclude); if (pick != null) { results.add(pick); - // exclude the picked node for next time exclude.add(pick); + if (LOG.isDebugEnabled()) { + LOG.debug("Remaining node chosen: {}", pick); + } } } @@ -306,9 +311,7 @@ public DatanodeDetails chooseNode( datanodeDetails = firstNodeMetric.isGreater(secondNodeMetric.get()) ? firstNodeDetails : secondNodeDetails; } - // the pick is decided and it should be removed from candidates. healthyNodes.remove(datanodeDetails); - return datanodeDetails; } @@ -331,12 +334,10 @@ protected DatanodeDetails chooseNodeBasedOnRackAwareness( } for (DatanodeDetails node : healthyNodes) { - if (excludedNodes.contains(node) - || networkTopology.isSameParent(anchor, node)) { + if (excludedNodes.contains(node) || + anchor.getNetworkLocation().equals(node.getNetworkLocation())) { continue; } else { - // the pick is decided and it should be removed from candidates. - healthyNodes.remove(node); return node; } } @@ -374,15 +375,10 @@ protected DatanodeDetails chooseNodeFromNetworkTopology( if (excludedNodes != null && excludedNodes.size() != 0) { excluded.addAll(excludedNodes); } - excluded.add(anchor); Node pick = networkTopology.chooseRandom( anchor.getNetworkLocation(), excluded); DatanodeDetails pickedNode = (DatanodeDetails) pick; - // exclude the picked node for next time - if (excludedNodes != null) { - excludedNodes.add(pickedNode); - } return pickedNode; } } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelineStateManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelineStateManager.java index 051202b45600..bb56a0380b1b 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelineStateManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelineStateManager.java @@ -132,13 +132,6 @@ Pipeline openPipeline(PipelineID pipelineId) throws IOException { pipeline = pipelineStateMap .updatePipelineState(pipelineId, PipelineState.OPEN); } - // Amend nodeIdsHash if needed. - if (pipeline.getType() == ReplicationType.RATIS && - pipeline.getFactor() == ReplicationFactor.THREE && - pipeline.getNodeIdsHash() == 0) { - pipeline.setNodeIdsHash(RatisPipelineUtils - .encodeNodeIdsOfFactorThreePipeline(pipeline.getNodes())); - } return pipeline; } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/RatisPipelineProvider.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/RatisPipelineProvider.java index 4865074d7c70..13c3b6a5cb13 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/RatisPipelineProvider.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/RatisPipelineProvider.java @@ -157,7 +157,6 @@ public Pipeline create(ReplicationFactor factor) throws IOException { } List dns; - int nodeIdHash = 0; switch(factor) { case ONE: @@ -166,7 +165,6 @@ public Pipeline create(ReplicationFactor factor) throws IOException { case THREE: dns = placementPolicy.chooseDatanodes(null, null, factor.getNumber(), 0); - nodeIdHash = RatisPipelineUtils.encodeNodeIdsOfFactorThreePipeline(dns); break; default: throw new IllegalStateException("Unknown factor: " + factor.name()); @@ -178,7 +176,6 @@ public Pipeline create(ReplicationFactor factor) throws IOException { .setType(ReplicationType.RATIS) .setFactor(factor) .setNodes(dns) - .setNodeIdsHash(nodeIdHash) .build(); // Send command to datanodes to create pipeline @@ -199,17 +196,12 @@ public Pipeline create(ReplicationFactor factor) throws IOException { @Override public Pipeline create(ReplicationFactor factor, List nodes) { - int nodeIdHash = 0; - if (factor == ReplicationFactor.THREE) { - nodeIdHash = RatisPipelineUtils.encodeNodeIdsOfFactorThreePipeline(nodes); - } return Pipeline.newBuilder() .setId(PipelineID.randomId()) .setState(PipelineState.ALLOCATED) .setType(ReplicationType.RATIS) .setFactor(factor) .setNodes(nodes) - .setNodeIdsHash(nodeIdHash) .build(); } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/RatisPipelineUtils.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/RatisPipelineUtils.java index 7fe1cc126f23..552ae7d3c644 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/RatisPipelineUtils.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/RatisPipelineUtils.java @@ -96,21 +96,12 @@ static void destroyPipeline(DatanodeDetails dn, PipelineID pipelineID, } } - static int encodeNodeIdsOfFactorThreePipeline(List nodes) { - if (nodes.size() != HddsProtos.ReplicationFactor.THREE.getNumber()) { - return 0; - } - return nodes.get(0).getUuid().hashCode() ^ - nodes.get(1).getUuid().hashCode() ^ - nodes.get(2).getUuid().hashCode(); - } - /** * Return the list of pipelines who share the same set of datanodes * with the input pipeline. * @param stateManager PipelineStateManager * @param pipeline input pipeline - * @return first matched pipeline + * @return list of matched pipeline */ static List checkPipelineContainSameDatanodes( PipelineStateManager stateManager, Pipeline pipeline) { @@ -118,9 +109,8 @@ static List checkPipelineContainSameDatanodes( HddsProtos.ReplicationType.RATIS, HddsProtos.ReplicationFactor.THREE) .stream().filter(p -> !p.getId().equals(pipeline.getId()) && - (// For all OPEN or ALLOCATED pipelines - p.getPipelineState() != Pipeline.PipelineState.CLOSED && - p.getNodeIdsHash() == pipeline.getNodeIdsHash())) + (p.getPipelineState() != Pipeline.PipelineState.CLOSED && + p.sameDatanodes(pipeline))) .collect(Collectors.toList()); } } diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/MockRatisPipelineProvider.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/MockRatisPipelineProvider.java index 3eb146a2c9cc..ff5247027259 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/MockRatisPipelineProvider.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/MockRatisPipelineProvider.java @@ -73,8 +73,6 @@ public Pipeline create(HddsProtos.ReplicationFactor factor) .setType(initialPipeline.getType()) .setFactor(factor) .setNodes(initialPipeline.getNodes()) - .setNodeIdsHash(RatisPipelineUtils - .encodeNodeIdsOfFactorThreePipeline(initialPipeline.getNodes())) .build(); } } @@ -93,8 +91,6 @@ public Pipeline create(HddsProtos.ReplicationFactor factor, .setType(HddsProtos.ReplicationType.RATIS) .setFactor(factor) .setNodes(nodes) - .setNodeIdsHash(RatisPipelineUtils - .encodeNodeIdsOfFactorThreePipeline(nodes)) .build(); } } diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestPipelinePlacementPolicy.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestPipelinePlacementPolicy.java index 2fff7d901cc7..b9aa9afb0518 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestPipelinePlacementPolicy.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestPipelinePlacementPolicy.java @@ -65,10 +65,10 @@ public void testChooseNodeBasedOnNetworkTopology() { List excludedNodes = new ArrayList<>(PIPELINE_PLACEMENT_MAX_NODES_COUNT); + excludedNodes.add(anchor); DatanodeDetails nextNode = placementPolicy.chooseNodeFromNetworkTopology( nodeManager.getClusterNetworkTopologyMap(), anchor, excludedNodes); - // excludedNodes should contain nextNode after being chosen. - Assert.assertTrue(excludedNodes.contains(nextNode)); + Assert.assertFalse(excludedNodes.contains(nextNode)); // nextNode should not be the same as anchor. Assert.assertTrue(anchor.getUuid() != nextNode.getUuid()); } @@ -83,7 +83,8 @@ public void testChooseNodeBasedOnRackAwareness() { DatanodeDetails nextNode = placementPolicy.chooseNodeBasedOnRackAwareness( healthyNodes, new ArrayList<>(PIPELINE_PLACEMENT_MAX_NODES_COUNT), topologyWithDifRacks, anchor); - Assert.assertFalse(topologyWithDifRacks.isSameParent(anchor, nextNode)); + Assert.assertFalse(anchor.getNetworkLocation().equals( + nextNode.getNetworkLocation())); } private final static Node[] NODES = new NodeImpl[] { diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestRatisPipelineProvider.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestRatisPipelineProvider.java index a17fc08466df..86d54b399186 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestRatisPipelineProvider.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestRatisPipelineProvider.java @@ -35,7 +35,6 @@ import java.util.HashSet; import java.util.List; import java.util.Set; -import java.util.UUID; import java.util.stream.Collectors; import static org.apache.commons.collections.CollectionUtils.intersection; @@ -84,7 +83,7 @@ private void createPipelineAndAssertions( intersection(pipeline.getNodes(), pipeline1.getNodes()) .size() < factor.getNumber()); if (pipeline.getFactor() == HddsProtos.ReplicationFactor.THREE) { - assertNotEquals(pipeline.getNodeIdsHash(), pipeline1.getNodeIdsHash()); + assertNotEquals(pipeline.getNodeSet(), pipeline1.getNodeSet()); } stateManager.addPipeline(pipeline1); nodeManager.addPipeline(pipeline1); @@ -105,7 +104,7 @@ public void testCreatePipelineWithFactor() throws IOException { stateManager.addPipeline(pipeline1); // With enough pipeline quote on datanodes, they should not share // the same set of datanodes. - assertNotEquals(pipeline.getNodeIdsHash(), pipeline1.getNodeIdsHash()); + assertNotEquals(pipeline.getNodeSet(), pipeline1.getNodeSet()); } @Test @@ -140,33 +139,6 @@ public void testCreatePipelineWithNodes() { Pipeline.PipelineState.OPEN); } - @Test - public void testComputeNodeIdsHash() { - int total = HddsProtos.ReplicationFactor.THREE.getNumber(); - List nodes1 = new ArrayList<>(); - for (int i = 0; i < total; i++) { - nodes1.add(MockDatanodeDetails.createDatanodeDetails( - UUID.fromString("00000-11000-00000-00000-0000" + (i + 1)))); - } - - Assert.assertEquals(total, nodes1.size()); - Assert.assertNotEquals(0, - RatisPipelineUtils.encodeNodeIdsOfFactorThreePipeline(nodes1)); - - List nodes2 = new ArrayList<>(); - for (int i = 0; i < total; i++) { - nodes2.add(MockDatanodeDetails.createDatanodeDetails( - UUID.fromString("00000-11000-00000-00000-0000" + (total - i)))); - } - Assert.assertEquals(total, nodes2.size()); - Assert.assertNotEquals(0, - RatisPipelineUtils.encodeNodeIdsOfFactorThreePipeline(nodes2)); - - Assert.assertEquals( - RatisPipelineUtils.encodeNodeIdsOfFactorThreePipeline(nodes1), - RatisPipelineUtils.encodeNodeIdsOfFactorThreePipeline(nodes2)); - } - @Test public void testCreateFactorTHREEPipelineWithSameDatanodes() { List healthyNodes = nodeManager @@ -178,9 +150,7 @@ public void testCreateFactorTHREEPipelineWithSameDatanodes() { Pipeline pipeline2 = provider.create( HddsProtos.ReplicationFactor.THREE, healthyNodes); - Assert.assertTrue(pipeline1.getNodes().parallelStream() - .allMatch(pipeline2.getNodes()::contains)); - Assert.assertEquals(pipeline1.getNodeIdsHash(), pipeline2.getNodeIdsHash()); + Assert.assertEquals(pipeline1.getNodeSet(), pipeline2.getNodeSet()); } @Test diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestSCMPipelineManager.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestSCMPipelineManager.java index deba91b746a5..ab2315326bc3 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestSCMPipelineManager.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestSCMPipelineManager.java @@ -18,7 +18,6 @@ package org.apache.hadoop.hdds.scm.pipeline; -import static org.apache.commons.collections.CollectionUtils.intersection; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_DATANODE_PIPELINE_LIMIT; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_PIPELINE_ALLOCATED_TIMEOUT; import static org.apache.hadoop.test.MetricsAsserts.getLongCounter; @@ -116,15 +115,13 @@ public void testPipelineReload() throws IOException { List pipelineList = pipelineManager.getPipelines(HddsProtos.ReplicationType.RATIS); Assert.assertEquals(pipelines, new HashSet<>(pipelineList)); - // All NodeIdsHash from original pipeline list - List originalPipelineHash = pipelineList.stream() - .map(Pipeline::getNodeIdsHash).collect(Collectors.toList()); - // All NodeIdsHash from reloaded pipeline list - List reloadedPipelineHash = pipelines.stream() - .map(Pipeline::getNodeIdsHash).collect(Collectors.toList()); - // Original NodeIdsHash list should contain same items from reloaded one. - Assert.assertEquals(pipelineNum, - intersection(originalPipelineHash, reloadedPipelineHash).size()); + + Set> originalPipelines = pipelineList.stream() + .map(Pipeline::getNodeSet).collect(Collectors.toSet()); + Set> reloadedPipelineHash = pipelines.stream() + .map(Pipeline::getNodeSet).collect(Collectors.toSet()); + Assert.assertEquals(reloadedPipelineHash, originalPipelines); + Assert.assertEquals(pipelineNum, originalPipelines.size()); // clean up for (Pipeline pipeline : pipelines) { diff --git a/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/ListInfoSubcommand.java b/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/ListInfoSubcommand.java index badfadc22eb9..e4060b3dadaf 100644 --- a/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/ListInfoSubcommand.java +++ b/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/ListInfoSubcommand.java @@ -114,8 +114,8 @@ private void printDatanodeInfo(DatanodeDetails datanode) { pipelineListInfo.append("No pipelines in cluster."); } System.out.println("Datanode: " + datanode.getUuid().toString() + - " (" + datanode.getIpAddress() + "/" - + datanode.getHostName() + "/" + relatedPipelineNum + + " (" + datanode.getNetworkLocation() + "/" + datanode.getIpAddress() + + "/" + datanode.getHostName() + "/" + relatedPipelineNum + " pipelines) \n" + "Related pipelines: \n" + pipelineListInfo); } } \ No newline at end of file diff --git a/hadoop-ozone/dist/src/main/compose/ozone-topology/docker-compose.yaml b/hadoop-ozone/dist/src/main/compose/ozone-topology/docker-compose.yaml index 69611fa674ce..ccd131c7be15 100644 --- a/hadoop-ozone/dist/src/main/compose/ozone-topology/docker-compose.yaml +++ b/hadoop-ozone/dist/src/main/compose/ozone-topology/docker-compose.yaml @@ -72,6 +72,34 @@ services: networks: net: ipv4_address: 10.5.0.7 + datanode_5: + image: apache/ozone-runner:${OZONE_RUNNER_VERSION} + privileged: true #required by the profiler + volumes: + - ../..:/opt/hadoop + ports: + - 9864 + - 9882 + command: ["/opt/hadoop/bin/ozone","datanode"] + env_file: + - ./docker-config + networks: + net: + ipv4_address: 10.5.0.8 + datanode_6: + image: apache/ozone-runner:${OZONE_RUNNER_VERSION} + privileged: true #required by the profiler + volumes: + - ../..:/opt/hadoop + ports: + - 9864 + - 9882 + command: ["/opt/hadoop/bin/ozone","datanode"] + env_file: + - ./docker-config + networks: + net: + ipv4_address: 10.5.0.9 om: image: apache/ozone-runner:${OZONE_RUNNER_VERSION} privileged: true #required by the profiler From a92058eb9460d29676c34645c5b9e060de1ee812 Mon Sep 17 00:00:00 2001 From: Li Cheng Date: Mon, 10 Feb 2020 09:13:57 +0800 Subject: [PATCH 18/19] HDDS-2923 Add fall-back protection for rack awareness in pipeline creation. (#516) --- .../scm/pipeline/PipelinePlacementPolicy.java | 81 +++++++++++++------ .../hdds/scm/pipeline/RatisPipelineUtils.java | 1 + .../pipeline/TestPipelinePlacementPolicy.java | 56 +++++++++++++ 3 files changed, 115 insertions(+), 23 deletions(-) diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelinePlacementPolicy.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelinePlacementPolicy.java index 9d78063a4dcd..0f30449c9750 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelinePlacementPolicy.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelinePlacementPolicy.java @@ -208,6 +208,29 @@ public List chooseDatanodes( } } + // Fall back logic for node pick up. + DatanodeDetails fallBackPickNodes( + List nodeSet, List excludedNodes) + throws SCMException{ + DatanodeDetails node; + if (excludedNodes == null || excludedNodes.isEmpty()) { + node = chooseNode(nodeSet); + } else { + List inputNodes = nodeSet.stream() + .filter(p -> !excludedNodes.contains(p)).collect(Collectors.toList()); + node = chooseNode(inputNodes); + } + + if (node == null) { + String msg = String.format("Unable to find fall back node in" + + " pipeline allocation. nodeSet size: {}", nodeSet.size()); + LOG.warn(msg); + throw new SCMException(msg, + SCMException.ResultCodes.FAILED_TO_FIND_SUITABLE_NODE); + } + return node; + } + /** * Get result set based on the pipeline placement algorithm which considers * network topology and rack awareness. @@ -220,50 +243,59 @@ public List chooseDatanodes( public List getResultSet( int nodesRequired, List healthyNodes) throws SCMException { + if (nodesRequired != HddsProtos.ReplicationFactor.THREE.getNumber()) { + throw new SCMException("Nodes required number is not supported: " + + nodesRequired, SCMException.ResultCodes.INVALID_CAPACITY); + } + + // Assume rack awareness is not enabled. + boolean rackAwareness = false; List results = new ArrayList<>(nodesRequired); // Since nodes are widely distributed, the results should be selected // base on distance in topology, rack awareness and load balancing. List exclude = new ArrayList<>(); // First choose an anchor nodes randomly DatanodeDetails anchor = chooseNode(healthyNodes); - if (anchor == null) { - LOG.warn("Unable to find healthy node for anchor(first) node." + - " Required nodes: {}, Found nodes: {}", - nodesRequired, results.size()); - throw new SCMException("Unable to find required number of nodes.", + if (anchor != null) { + results.add(anchor); + exclude.add(anchor); + } else { + LOG.warn("Unable to find healthy node for anchor(first) node."); + throw new SCMException("Unable to find anchor node.", SCMException.ResultCodes.FAILED_TO_FIND_SUITABLE_NODE); } if (LOG.isDebugEnabled()) { LOG.debug("First node chosen: {}", anchor); } - results.add(anchor); - exclude.add(anchor); // Choose the second node on different racks from anchor. - DatanodeDetails nodeOnDifferentRack = chooseNodeBasedOnRackAwareness( + DatanodeDetails nextNode = chooseNodeBasedOnRackAwareness( healthyNodes, exclude, nodeManager.getClusterNetworkTopologyMap(), anchor); - if (nodeOnDifferentRack == null) { - LOG.warn("Pipeline Placement: Unable to find 2nd node on different " + - "racks that meets the criteria. Required nodes: {}, Found nodes:" + - " {}", nodesRequired, results.size()); - throw new SCMException("Unable to find required number of nodes.", - SCMException.ResultCodes.FAILED_TO_FIND_SUITABLE_NODE); - } - if (LOG.isDebugEnabled()) { - LOG.debug("Second node chosen: {}", nodeOnDifferentRack); + if (nextNode != null) { + // Rack awareness is detected. + rackAwareness = true; + results.add(nextNode); + exclude.add(nextNode); + if (LOG.isDebugEnabled()) { + LOG.debug("Second node chosen: {}", nextNode); + } + } else { + if (LOG.isDebugEnabled()) { + LOG.debug("Pipeline Placement: Unable to find 2nd node on different " + + "rack based on rack awareness."); + } } - results.add(nodeOnDifferentRack); - exclude.add(nodeOnDifferentRack); - // Then choose nodes close to anchor based on network topology int nodesToFind = nodesRequired - results.size(); for (int x = 0; x < nodesToFind; x++) { - // invoke the choose function defined in the derived classes. - DatanodeDetails pick = chooseNodeFromNetworkTopology( - nodeManager.getClusterNetworkTopologyMap(), anchor, exclude); + // Pick remaining nodes based on the existence of rack awareness. + DatanodeDetails pick = rackAwareness + ? chooseNodeFromNetworkTopology( + nodeManager.getClusterNetworkTopologyMap(), anchor, exclude) + : fallBackPickNodes(healthyNodes, exclude); if (pick != null) { results.add(pick); exclude.add(pick); @@ -293,6 +325,9 @@ public List getResultSet( @Override public DatanodeDetails chooseNode( List healthyNodes) { + if (healthyNodes == null || healthyNodes.isEmpty()) { + return null; + } int firstNodeNdx = getRand().nextInt(healthyNodes.size()); int secondNodeNdx = getRand().nextInt(healthyNodes.size()); diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/RatisPipelineUtils.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/RatisPipelineUtils.java index 552ae7d3c644..db9260e60393 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/RatisPipelineUtils.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/RatisPipelineUtils.java @@ -99,6 +99,7 @@ static void destroyPipeline(DatanodeDetails dn, PipelineID pipelineID, /** * Return the list of pipelines who share the same set of datanodes * with the input pipeline. + * * @param stateManager PipelineStateManager * @param pipeline input pipeline * @return list of matched pipeline diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestPipelinePlacementPolicy.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestPipelinePlacementPolicy.java index b9aa9afb0518..daad80834c5a 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestPipelinePlacementPolicy.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestPipelinePlacementPolicy.java @@ -83,10 +83,66 @@ public void testChooseNodeBasedOnRackAwareness() { DatanodeDetails nextNode = placementPolicy.chooseNodeBasedOnRackAwareness( healthyNodes, new ArrayList<>(PIPELINE_PLACEMENT_MAX_NODES_COUNT), topologyWithDifRacks, anchor); + Assert.assertNotNull(nextNode); Assert.assertFalse(anchor.getNetworkLocation().equals( nextNode.getNetworkLocation())); } + @Test + public void testFallBackPickNodes() { + List healthyNodes = overWriteLocationInNodes( + nodeManager.getNodes(HddsProtos.NodeState.HEALTHY)); + DatanodeDetails node; + try { + node = placementPolicy.fallBackPickNodes(healthyNodes, null); + Assert.assertNotNull(node); + } catch (SCMException e) { + Assert.fail("Should not reach here."); + } + + // when input nodeSet are all excluded. + List exclude = healthyNodes; + try { + node = placementPolicy.fallBackPickNodes(healthyNodes, exclude); + Assert.assertNull(node); + } catch (SCMException e) { + Assert.assertEquals(SCMException.ResultCodes.FAILED_TO_FIND_SUITABLE_NODE, + e.getResult()); + } catch (Exception ex) { + Assert.fail("Should not reach here."); + } + } + + @Test + public void testRackAwarenessNotEnabledWithFallBack() throws SCMException{ + List healthyNodes = + nodeManager.getNodes(HddsProtos.NodeState.HEALTHY); + DatanodeDetails anchor = placementPolicy.chooseNode(healthyNodes); + DatanodeDetails randomNode = placementPolicy.chooseNode(healthyNodes); + // rack awareness is not enabled. + Assert.assertTrue(anchor.getNetworkLocation().equals( + randomNode.getNetworkLocation())); + + NetworkTopology topology = new NetworkTopologyImpl(new Configuration()); + DatanodeDetails nextNode = placementPolicy.chooseNodeBasedOnRackAwareness( + healthyNodes, new ArrayList<>(PIPELINE_PLACEMENT_MAX_NODES_COUNT), + topology, anchor); + // RackAwareness should not be able to choose any node. + Assert.assertNull(nextNode); + + // PlacementPolicy should still be able to pick a set of 3 nodes. + int numOfNodes = HddsProtos.ReplicationFactor.THREE.getNumber(); + List results = placementPolicy + .getResultSet(numOfNodes, healthyNodes); + + Assert.assertEquals(numOfNodes, results.size()); + // All nodes are on same rack. + Assert.assertEquals(results.get(0).getNetworkLocation(), + results.get(1).getNetworkLocation()); + Assert.assertEquals(results.get(0).getNetworkLocation(), + results.get(2).getNetworkLocation()); + } + private final static Node[] NODES = new NodeImpl[] { new NodeImpl("h1", "/r1", NetConstants.NODE_COST_DEFAULT), new NodeImpl("h2", "/r1", NetConstants.NODE_COST_DEFAULT), From 378ee1e463f42c7dc228e6344e5d961f43194685 Mon Sep 17 00:00:00 2001 From: Li Cheng Date: Fri, 14 Feb 2020 13:24:13 +0800 Subject: [PATCH 19/19] HDDS-3007 Fix CI test failure for TestSCMNodeManager. (#550) --- .../org/apache/hadoop/hdds/scm/node/TestSCMNodeManager.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestSCMNodeManager.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestSCMNodeManager.java index fca88c7a88f0..91f437d98008 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestSCMNodeManager.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestSCMNodeManager.java @@ -62,6 +62,7 @@ import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState.STALE; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_DEADNODE_INTERVAL; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_HEARTBEAT_PROCESS_INTERVAL; +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_RATIS_PIPELINE_LIMIT; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_STALENODE_INTERVAL; import static org.apache.hadoop.hdds.scm.events.SCMEvents.DATANODE_COMMAND; import org.junit.After; @@ -117,6 +118,7 @@ OzoneConfiguration getConf() { conf.setTimeDuration(OZONE_SCM_HEARTBEAT_PROCESS_INTERVAL, 100, TimeUnit.MILLISECONDS); conf.setBoolean(HddsConfigKeys.HDDS_SCM_SAFEMODE_PIPELINE_CREATION, false); + conf.setInt(OZONE_SCM_RATIS_PIPELINE_LIMIT, 10); return conf; }