diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/ContainerPlacementPolicy.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/PlacementPolicy.java similarity index 80% rename from hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/ContainerPlacementPolicy.java rename to hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/PlacementPolicy.java index 52ce7964b676..f6a0e8bf7eb0 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/ContainerPlacementPolicy.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/PlacementPolicy.java @@ -15,7 +15,7 @@ * the License. */ -package org.apache.hadoop.hdds.scm.container.placement.algorithms; +package org.apache.hadoop.hdds.scm; import org.apache.hadoop.hdds.protocol.DatanodeDetails; @@ -23,14 +23,14 @@ import java.util.List; /** - * A ContainerPlacementPolicy support choosing datanodes to build replication - * pipeline with specified constraints. + * A PlacementPolicy support choosing datanodes to build + * pipelines or containers with specified constraints. */ -public interface ContainerPlacementPolicy { +public interface PlacementPolicy { /** - * Given the replication factor and size required, return set of datanodes - * that satisfy the nodes and size requirement. + * Given an initial set of datanodes and the size required, + * return set of datanodes that satisfy the nodes and size requirement. * * @param excludedNodes - list of nodes to be excluded. * @param favoredNodes - list of nodes preferred. diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java index d0ac0667d5e8..eb2a9e53c99e 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java @@ -292,11 +292,31 @@ public final class ScmConfigKeys { public static final String OZONE_SCM_PIPELINE_OWNER_CONTAINER_COUNT = "ozone.scm.pipeline.owner.container.count"; public static final int OZONE_SCM_PIPELINE_OWNER_CONTAINER_COUNT_DEFAULT = 3; + // Pipeline placement policy: + // Upper limit for how many pipelines a datanode can engage in. + public static final String OZONE_DATANODE_PIPELINE_LIMIT = + "ozone.datanode.pipeline.limit"; + public static final int OZONE_DATANODE_PIPELINE_LIMIT_DEFAULT = 2; + + // Upper limit for how many pipelines can be created + // across the cluster nodes managed by SCM. + // Only for test purpose now. + public static final String OZONE_SCM_RATIS_PIPELINE_LIMIT = + "ozone.scm.ratis.pipeline.limit"; + // Setting to zero by default means this limit doesn't take effect. + public static final int OZONE_SCM_RATIS_PIPELINE_LIMIT_DEFAULT = 0; public static final String OZONE_SCM_KEY_VALUE_CONTAINER_DELETION_CHOOSING_POLICY = "ozone.scm.keyvalue.container.deletion-choosing.policy"; + // Max timeout for pipeline to stay at ALLOCATED state before scrubbed. + public static final String OZONE_SCM_PIPELINE_ALLOCATED_TIMEOUT = + "ozone.scm.pipeline.allocated.timeout"; + + public static final String OZONE_SCM_PIPELINE_ALLOCATED_TIMEOUT_DEFAULT = + "5m"; + public static final String OZONE_SCM_CONTAINER_CREATION_LEASE_TIMEOUT = "ozone.scm.container.creation.lease.timeout"; diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/pipeline/Pipeline.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/pipeline/Pipeline.java index 594fcf738e11..5a28e4275054 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/pipeline/Pipeline.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/pipeline/Pipeline.java @@ -19,12 +19,15 @@ package org.apache.hadoop.hdds.scm.pipeline; import java.io.IOException; +import java.time.Instant; import java.util.ArrayList; +import java.util.Collections; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Optional; +import java.util.Set; import java.util.UUID; import java.util.stream.Collectors; @@ -56,6 +59,8 @@ public final class Pipeline { private ThreadLocal> nodesInOrder = new ThreadLocal<>(); // Current reported Leader for the pipeline private UUID leaderId; + // Timestamp for pipeline upon creation + private Instant creationTimestamp; /** * The immutable properties of pipeline object is used in @@ -70,6 +75,7 @@ private Pipeline(PipelineID id, ReplicationType type, this.factor = factor; this.state = state; this.nodeStatus = nodeStatus; + this.creationTimestamp = Instant.now(); } /** @@ -108,6 +114,24 @@ public PipelineState getPipelineState() { return state; } + /** + * Return the creation time of pipeline. + * + * @return Creation Timestamp + */ + public Instant getCreationTimestamp() { + return creationTimestamp; + } + + /** + * Set the creation timestamp. Only for protobuf now. + * + * @param creationTimestamp + */ + void setCreationTimestamp(Instant creationTimestamp) { + this.creationTimestamp = creationTimestamp; + } + /** * Return the pipeline leader's UUID. * @@ -133,6 +157,23 @@ public List getNodes() { return new ArrayList<>(nodeStatus.keySet()); } + /** + * Return an immutable set of nodes which form this pipeline. + * @return Set of DatanodeDetails + */ + public Set getNodeSet() { + return Collections.unmodifiableSet(nodeStatus.keySet()); + } + + /** + * Check if the input pipeline share the same set of datanodes. + * @param pipeline + * @return true if the input pipeline shares the same set of datanodes. + */ + public boolean sameDatanodes(Pipeline pipeline) { + return getNodeSet().equals(pipeline.getNodeSet()); + } + /** * Returns the leader if found else defaults to closest node. * @@ -221,6 +262,7 @@ public HddsProtos.Pipeline getProtobufMessage() .setFactor(factor) .setState(PipelineState.getProtobuf(state)) .setLeaderID(leaderId != null ? leaderId.toString() : "") + .setCreationTimeStamp(creationTimestamp.toEpochMilli()) .addAllMembers(nodeStatus.keySet().stream() .map(DatanodeDetails::getProtoBufMessage) .collect(Collectors.toList())); @@ -256,6 +298,7 @@ public static Pipeline getFromProtobuf(HddsProtos.Pipeline pipeline) .setNodes(pipeline.getMembersList().stream() .map(DatanodeDetails::getFromProtoBuf).collect(Collectors.toList())) .setNodesInOrder(pipeline.getMemberOrdersList()) + .setCreateTimestamp(pipeline.getCreationTimeStamp()) .build(); } @@ -299,7 +342,8 @@ public String toString() { b.append(", Factor:").append(getFactor()); b.append(", State:").append(getPipelineState()); b.append(", leaderId:").append(getLeaderId()); - b.append(" ]"); + b.append(", CreationTimestamp").append(getCreationTimestamp()); + b.append("]"); return b.toString(); } @@ -323,6 +367,7 @@ public static class Builder { private List nodeOrder = null; private List nodesInOrder = null; private UUID leaderId = null; + private Instant creationTimestamp = null; public Builder() {} @@ -334,6 +379,7 @@ public Builder(Pipeline pipeline) { this.nodeStatus = pipeline.nodeStatus; this.nodesInOrder = pipeline.nodesInOrder.get(); this.leaderId = pipeline.getLeaderId(); + this.creationTimestamp = pipeline.getCreationTimestamp(); } public Builder setId(PipelineID id1) { @@ -372,6 +418,11 @@ public Builder setNodesInOrder(List orders) { return this; } + public Builder setCreateTimestamp(long createTimestamp) { + this.creationTimestamp = Instant.ofEpochMilli(createTimestamp); + return this; + } + public Pipeline build() { Preconditions.checkNotNull(id); Preconditions.checkNotNull(type); @@ -380,6 +431,10 @@ public Pipeline build() { Preconditions.checkNotNull(nodeStatus); Pipeline pipeline = new Pipeline(id, type, factor, state, nodeStatus); pipeline.setLeaderId(leaderId); + // overwrite with original creationTimestamp + if (creationTimestamp != null) { + pipeline.setCreationTimestamp(creationTimestamp); + } if (nodeOrder != null && !nodeOrder.isEmpty()) { // This branch is for build from ProtoBuf diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/audit/SCMAction.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/audit/SCMAction.java index c3e9440425fd..fada2d8de8db 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/audit/SCMAction.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/audit/SCMAction.java @@ -31,6 +31,7 @@ public enum SCMAction implements AuditAction { GET_CONTAINER, GET_CONTAINER_WITH_PIPELINE, LIST_CONTAINER, + CREATE_PIPELINE, LIST_PIPELINE, CLOSE_PIPELINE, ACTIVATE_PIPELINE, diff --git a/hadoop-hdds/common/src/main/proto/hdds.proto b/hadoop-hdds/common/src/main/proto/hdds.proto index 8da35760180d..c78175cb4ea1 100644 --- a/hadoop-hdds/common/src/main/proto/hdds.proto +++ b/hadoop-hdds/common/src/main/proto/hdds.proto @@ -75,6 +75,7 @@ message Pipeline { required PipelineID id = 5; optional string leaderID = 6; repeated uint32 memberOrders = 7; + optional uint64 creationTimeStamp = 8; } message KeyValue { diff --git a/hadoop-hdds/common/src/main/resources/ozone-default.xml b/hadoop-hdds/common/src/main/resources/ozone-default.xml index ecc25e93cb60..875f79f9f186 100644 --- a/hadoop-hdds/common/src/main/resources/ozone-default.xml +++ b/hadoop-hdds/common/src/main/resources/ozone-default.xml @@ -776,9 +776,11 @@ OZONE, MANAGEMENT - The full name of class which implements org.apache.hadoop.hdds.scm.container.placement.algorithms.ContainerPlacementPolicy. + The full name of class which implements + org.apache.hadoop.hdds.scm.PlacementPolicy. The class decides which datanode will be used to host the container replica. If not set, - org.apache.hadoop.hdds.scm.container.placement.algorithms.SCMContainerPlacementRandom will be used as default value. + org.apache.hadoop.hdds.scm.container.placement.algorithms.SCMContainerPlacementRandom will be used as default + value. @@ -788,6 +790,35 @@ Number of containers per owner in a pipeline. + + ozone.datanode.pipeline.limit + 2 + OZONE, SCM, PIPELINE + Max number of pipelines per datanode can be engaged in. + + + + ozone.scm.ratis.pipeline.limit + 0 + OZONE, SCM, PIPELINE + Upper limit for how many pipelines can be OPEN in SCM. + 0 as default means there is no limit. Otherwise, the number is the limit + of max amount of pipelines which are OPEN. + + + + ozone.scm.pipeline.allocated.timeout + 5m + OZONE, SCM, PIPELINE + + Timeout for every pipeline to stay in ALLOCATED stage. When pipeline is created, + it should be at OPEN stage once pipeline report is successfully received by SCM. + If a pipeline stays at ALLOCATED longer than the specified period of time, + it should be scrubbed so that new pipeline can be created. + This timeout is for how long pipeline can stay at ALLOCATED + stage until it gets scrubbed. + + ozone.scm.container.size 5GB diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/SCMCommonPolicy.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/SCMCommonPlacementPolicy.java similarity index 90% rename from hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/SCMCommonPolicy.java rename to hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/SCMCommonPlacementPolicy.java index 77cdd83f7938..25457f72bc8c 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/SCMCommonPolicy.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/SCMCommonPlacementPolicy.java @@ -15,7 +15,7 @@ * the License. */ -package org.apache.hadoop.hdds.scm.container.placement.algorithms; +package org.apache.hadoop.hdds.scm; import com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.conf.Configuration; @@ -33,25 +33,25 @@ import java.util.stream.Collectors; /** - * SCM CommonPolicy implements a set of invariants which are common - * for all container placement policies, acts as the repository of helper + * This policy implements a set of invariants which are common + * for all basic placement policies, acts as the repository of helper * functions which are common to placement policies. */ -public abstract class SCMCommonPolicy implements ContainerPlacementPolicy { +public abstract class SCMCommonPlacementPolicy implements PlacementPolicy { @VisibleForTesting static final Logger LOG = - LoggerFactory.getLogger(SCMCommonPolicy.class); + LoggerFactory.getLogger(SCMCommonPlacementPolicy.class); private final NodeManager nodeManager; private final Random rand; private final Configuration conf; /** - * Constructs SCM Common Policy Class. + * Constructor. * * @param nodeManager NodeManager * @param conf Configuration class. */ - public SCMCommonPolicy(NodeManager nodeManager, Configuration conf) { + public SCMCommonPlacementPolicy(NodeManager nodeManager, Configuration conf) { this.nodeManager = nodeManager; this.rand = new Random(); this.conf = conf; @@ -85,7 +85,7 @@ public Configuration getConf() { } /** - * Given the replication factor and size required, return set of datanodes + * Given size required, return set of datanodes * that satisfy the nodes and size requirement. *

* Here are some invariants of container placement. @@ -149,7 +149,7 @@ public List chooseDatanodes( * @param datanodeDetails DatanodeDetails * @return true if we have enough space. */ - boolean hasEnoughSpace(DatanodeDetails datanodeDetails, + public boolean hasEnoughSpace(DatanodeDetails datanodeDetails, long sizeRequired) { SCMNodeMetric nodeMetric = nodeManager.getNodeStat(datanodeDetails); return (nodeMetric != null) && (nodeMetric.get() != null) @@ -164,7 +164,7 @@ boolean hasEnoughSpace(DatanodeDetails datanodeDetails, * @param nodesRequired - Nodes Required * @param healthyNodes - List of Nodes in the result set. * @return List of Datanodes that can be used for placement. - * @throws SCMException + * @throws SCMException SCMException */ public List getResultSet( int nodesRequired, List healthyNodes) @@ -190,8 +190,7 @@ public List getResultSet( /** * Choose a datanode according to the policy, this function is implemented - * by the actual policy class. For example, PlacementCapacity or - * PlacementRandom. + * by the actual policy class. * * @param healthyNodes - Set of healthy nodes we can choose from. * @return DatanodeDetails diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/BlockManagerImpl.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/BlockManagerImpl.java index b7a7525cbaca..cdc3878b3146 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/BlockManagerImpl.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/BlockManagerImpl.java @@ -197,8 +197,13 @@ public AllocatedBlock allocateBlock(final long size, ReplicationType type, // TODO: #CLUTIL Remove creation logic when all replication types and // factors are handled by pipeline creator pipeline = pipelineManager.createPipeline(type, factor); + // wait until pipeline is ready pipelineManager.waitPipelineReady(pipeline.getId(), 0); + } catch (SCMException se) { + LOG.warn("Pipeline creation failed for type:{} factor:{}. " + + "Datanodes may be used up.", type, factor, se); + break; } catch (IOException e) { LOG.warn("Pipeline creation failed for type:{} factor:{}. Retrying " + "get pipelines call once.", type, factor, e); diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ReplicationManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ReplicationManager.java index 9c1d9ad1b503..251d94315864 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ReplicationManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ReplicationManager.java @@ -38,8 +38,9 @@ import org.apache.hadoop.hdds.conf.ConfigType; import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleState; -import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State; -import org.apache.hadoop.hdds.scm.container.placement.algorithms.ContainerPlacementPolicy; +import org.apache.hadoop.hdds.protocol.proto + .StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State; +import org.apache.hadoop.hdds.scm.PlacementPolicy; import org.apache.hadoop.hdds.scm.events.SCMEvents; import org.apache.hadoop.hdds.server.events.EventPublisher; import org.apache.hadoop.metrics2.MetricsCollector; @@ -85,7 +86,7 @@ public class ReplicationManager implements MetricsSource { * PlacementPolicy which is used to identify where a container * should be replicated. */ - private final ContainerPlacementPolicy containerPlacement; + private final PlacementPolicy containerPlacement; /** * EventPublisher to fire Replicate and Delete container events. @@ -131,12 +132,12 @@ public class ReplicationManager implements MetricsSource { * * @param conf OzoneConfiguration * @param containerManager ContainerManager - * @param containerPlacement ContainerPlacementPolicy + * @param containerPlacement PlacementPolicy * @param eventPublisher EventPublisher */ public ReplicationManager(final ReplicationManagerConfiguration conf, final ContainerManager containerManager, - final ContainerPlacementPolicy containerPlacement, + final PlacementPolicy containerPlacement, final EventPublisher eventPublisher, final LockManager lockManager) { this.containerManager = containerManager; @@ -476,7 +477,7 @@ private void forceCloseContainer(final ContainerInfo container, /** * If the given container is under replicated, identify a new set of - * datanode(s) to replicate the container using ContainerPlacementPolicy + * datanode(s) to replicate the container using PlacementPolicy * and send replicate container command to the identified datanode(s). * * @param container ContainerInfo diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/ContainerPlacementPolicyFactory.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/ContainerPlacementPolicyFactory.java index 18ec2c385b0c..74431f9b05e8 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/ContainerPlacementPolicyFactory.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/ContainerPlacementPolicyFactory.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.hdds.scm.container.placement.algorithms; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hdds.scm.PlacementPolicy; import org.apache.hadoop.hdds.scm.ScmConfigKeys; import org.apache.hadoop.hdds.scm.exceptions.SCMException; import org.apache.hadoop.hdds.scm.net.NetworkTopology; @@ -34,22 +35,23 @@ public final class ContainerPlacementPolicyFactory { private static final Logger LOG = LoggerFactory.getLogger(ContainerPlacementPolicyFactory.class); - private static final Class + private static final Class OZONE_SCM_CONTAINER_PLACEMENT_IMPL_DEFAULT = SCMContainerPlacementRandom.class; private ContainerPlacementPolicyFactory() { } - public static ContainerPlacementPolicy getPolicy(Configuration conf, - final NodeManager nodeManager, NetworkTopology clusterMap, - final boolean fallback, SCMContainerPlacementMetrics metrics) - throws SCMException{ - final Class placementClass = conf + + public static PlacementPolicy getPolicy( + Configuration conf, final NodeManager nodeManager, + NetworkTopology clusterMap, final boolean fallback, + SCMContainerPlacementMetrics metrics) throws SCMException{ + final Class placementClass = conf .getClass(ScmConfigKeys.OZONE_SCM_CONTAINER_PLACEMENT_IMPL_KEY, OZONE_SCM_CONTAINER_PLACEMENT_IMPL_DEFAULT, - ContainerPlacementPolicy.class); - Constructor constructor; + PlacementPolicy.class); + Constructor constructor; try { constructor = placementClass.getDeclaredConstructor(NodeManager.class, Configuration.class, NetworkTopology.class, boolean.class, diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/SCMContainerPlacementCapacity.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/SCMContainerPlacementCapacity.java index 85d281cf6dc2..19093448b927 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/SCMContainerPlacementCapacity.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/SCMContainerPlacementCapacity.java @@ -21,6 +21,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.scm.SCMCommonPlacementPolicy; import org.apache.hadoop.hdds.scm.container.placement.metrics.SCMNodeMetric; import org.apache.hadoop.hdds.scm.exceptions.SCMException; import org.apache.hadoop.hdds.scm.net.NetworkTopology; @@ -65,7 +66,8 @@ * little or no work and the cluster will achieve a balanced distribution * over time. */ -public final class SCMContainerPlacementCapacity extends SCMCommonPolicy { +public final class SCMContainerPlacementCapacity + extends SCMCommonPlacementPolicy { @VisibleForTesting static final Logger LOG = LoggerFactory.getLogger(SCMContainerPlacementCapacity.class); diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/SCMContainerPlacementRackAware.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/SCMContainerPlacementRackAware.java index 6d49459b739f..8933fe953a7f 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/SCMContainerPlacementRackAware.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/SCMContainerPlacementRackAware.java @@ -21,6 +21,7 @@ import com.google.common.base.Preconditions; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.scm.SCMCommonPlacementPolicy; import org.apache.hadoop.hdds.scm.exceptions.SCMException; import org.apache.hadoop.hdds.scm.net.NetConstants; import org.apache.hadoop.hdds.scm.net.NetworkTopology; @@ -45,7 +46,8 @@ * recommend to use this if the network topology has more layers. *

*/ -public final class SCMContainerPlacementRackAware extends SCMCommonPolicy { +public final class SCMContainerPlacementRackAware + extends SCMCommonPlacementPolicy { @VisibleForTesting static final Logger LOG = LoggerFactory.getLogger(SCMContainerPlacementRackAware.class); @@ -271,11 +273,9 @@ private Node chooseNode(List excludedNodes, Node affinityNode, throw new SCMException("No satisfied datanode to meet the" + " excludedNodes and affinityNode constrains.", null); } - if (hasEnoughSpace((DatanodeDetails)node, sizeRequired)) { - if (LOG.isDebugEnabled()) { - LOG.debug("Datanode {} is chosen for container. Required size is {}", - node.toString(), sizeRequired); - } + if (super.hasEnoughSpace((DatanodeDetails)node, sizeRequired)) { + LOG.debug("Datanode {} is chosen. Required size is {}", + node.toString(), sizeRequired); metrics.incrDatanodeChooseSuccessCount(); if (isFallbacked) { metrics.incrDatanodeChooseFallbackCount(); diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/SCMContainerPlacementRandom.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/SCMContainerPlacementRandom.java index 6b1a5c8c6cb1..ce5d10d4e517 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/SCMContainerPlacementRandom.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/SCMContainerPlacementRandom.java @@ -19,6 +19,8 @@ import com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hdds.scm.PlacementPolicy; +import org.apache.hadoop.hdds.scm.SCMCommonPlacementPolicy; import org.apache.hadoop.hdds.scm.exceptions.SCMException; import org.apache.hadoop.hdds.scm.net.NetworkTopology; import org.apache.hadoop.hdds.scm.node.NodeManager; @@ -37,8 +39,8 @@ * Balancer will need to support containers as a feature before this class * can be practically used. */ -public final class SCMContainerPlacementRandom extends SCMCommonPolicy - implements ContainerPlacementPolicy { +public final class SCMContainerPlacementRandom extends SCMCommonPlacementPolicy + implements PlacementPolicy { @VisibleForTesting static final Logger LOG = LoggerFactory.getLogger(SCMContainerPlacementRandom.class); diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeManager.java index fd8bb87ceb12..37562fe9f293 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeManager.java @@ -19,6 +19,7 @@ import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.NodeReportProto; import org.apache.hadoop.hdds.scm.container.ContainerID; +import org.apache.hadoop.hdds.scm.net.NetworkTopology; import org.apache.hadoop.hdds.scm.pipeline.Pipeline; import org.apache.hadoop.hdds.scm.pipeline.PipelineID; import org.apache.hadoop.hdds.scm.container.placement.metrics.SCMNodeMetric; @@ -117,6 +118,13 @@ public interface NodeManager extends StorageContainerNodeProtocol, */ Set getPipelines(DatanodeDetails datanodeDetails); + /** + * Get the count of pipelines a datanodes is associated with. + * @param datanodeDetails DatanodeDetails + * @return The number of pipelines + */ + int getPipelinesCount(DatanodeDetails datanodeDetails); + /** * Add pipeline information in the NodeManager. * @param pipeline - Pipeline to be added @@ -199,4 +207,10 @@ void processNodeReport(DatanodeDetails datanodeDetails, * @return the given datanode, or empty list if none found */ List getNodesByAddress(String address); + + /** + * Get cluster map as in network topology for this node manager. + * @return cluster map + */ + NetworkTopology getClusterNetworkTopologyMap(); } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeStateManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeStateManager.java index 954cb0e8ea46..9d2a9f224cd2 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeStateManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeStateManager.java @@ -283,6 +283,15 @@ public void addPipeline(Pipeline pipeline) { node2PipelineMap.addPipeline(pipeline); } + /** + * Get the count of pipelines associated to single datanode. + * @param datanodeDetails single datanode + * @return number of pipelines associated with it + */ + public int getPipelinesCount(DatanodeDetails datanodeDetails) { + return node2PipelineMap.getPipelinesCount(datanodeDetails.getUuid()); + } + /** * Get information about the node. * diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java index d84b75b7e65b..46534fbf5fe5 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java @@ -511,6 +511,16 @@ public Set getPipelines(DatanodeDetails datanodeDetails) { return nodeStateManager.getPipelineByDnID(datanodeDetails.getUuid()); } + /** + * Get the count of pipelines a datanodes is associated with. + * @param datanodeDetails DatanodeDetails + * @return The number of pipelines + */ + @Override + public int getPipelinesCount(DatanodeDetails datanodeDetails) { + return nodeStateManager.getPipelinesCount(datanodeDetails); + } + /** * Add pipeline information in the NodeManager. * @@ -645,6 +655,15 @@ public List getNodesByAddress(String address) { return results; } + /** + * Get cluster map as in network topology for this node manager. + * @return cluster map + */ + @Override + public NetworkTopology getClusterNetworkTopologyMap() { + return clusterMap; + } + private String nodeResolve(String hostname) { List hosts = new ArrayList<>(1); hosts.add(hostname); diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/states/Node2ObjectsMap.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/states/Node2ObjectsMap.java index 37525b0076e8..57a377d998f4 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/states/Node2ObjectsMap.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/states/Node2ObjectsMap.java @@ -67,6 +67,7 @@ public boolean isKnownDatanode(UUID datanodeID) { * @param datanodeID -- Datanode UUID * @param containerIDs - List of ContainerIDs. */ + @VisibleForTesting public void insertNewDatanode(UUID datanodeID, Set containerIDs) throws SCMException { Preconditions.checkNotNull(containerIDs); @@ -83,7 +84,8 @@ public void insertNewDatanode(UUID datanodeID, Set containerIDs) * * @param datanodeID - Datanode ID. */ - void removeDatanode(UUID datanodeID) { + @VisibleForTesting + public void removeDatanode(UUID datanodeID) { Preconditions.checkNotNull(datanodeID); dn2ObjectMap.computeIfPresent(datanodeID, (k, v) -> null); } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/states/Node2PipelineMap.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/states/Node2PipelineMap.java index f8633f9fcbcd..6533cb807642 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/states/Node2PipelineMap.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/states/Node2PipelineMap.java @@ -42,7 +42,7 @@ public Node2PipelineMap() { } /** - * Returns null if there no pipelines associated with this datanode ID. + * Returns null if there are no pipelines associated with this datanode ID. * * @param datanode - UUID * @return Set of pipelines or Null. @@ -51,6 +51,15 @@ public Set getPipelines(UUID datanode) { return getObjects(datanode); } + /** + * Return 0 if there are no pipelines associated with this datanode ID. + * @param datanode - UUID + * @return Number of pipelines or 0. + */ + public int getPipelinesCount(UUID datanode) { + return getObjects(datanode).size(); + } + /** * Adds a pipeline entry to a given dataNode in the map. * diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/BackgroundPipelineCreator.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/BackgroundPipelineCreator.java index 30069876ab39..8e4ec6a00f8f 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/BackgroundPipelineCreator.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/BackgroundPipelineCreator.java @@ -110,6 +110,13 @@ private void createPipelines() { // Skip this iteration for creating pipeline continue; } + + try { + pipelineManager.scrubPipeline(type, factor); + } catch (IOException e) { + LOG.error("Error while scrubbing pipelines {}", e); + } + while (true) { try { if (scheduler.isClosed()) { diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelineManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelineManager.java index 08552957789f..635e032c4764 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelineManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelineManager.java @@ -77,6 +77,9 @@ NavigableSet getContainersInPipeline(PipelineID pipelineID) void finalizeAndDestroyPipeline(Pipeline pipeline, boolean onTimeout) throws IOException; + void scrubPipeline(ReplicationType type, ReplicationFactor factor) + throws IOException; + void startPipelineCreator(); void triggerPipelineCreation(); diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelinePlacementPolicy.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelinePlacementPolicy.java new file mode 100644 index 000000000000..0f30449c9750 --- /dev/null +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelinePlacementPolicy.java @@ -0,0 +1,419 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdds.scm.pipeline; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Preconditions; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos; +import org.apache.hadoop.hdds.scm.ScmConfigKeys; +import org.apache.hadoop.hdds.scm.SCMCommonPlacementPolicy; +import org.apache.hadoop.hdds.scm.container.placement.metrics.SCMNodeMetric; +import org.apache.hadoop.hdds.scm.exceptions.SCMException; +import org.apache.hadoop.hdds.scm.net.NetworkTopology; +import org.apache.hadoop.hdds.scm.net.Node; +import org.apache.hadoop.hdds.scm.node.NodeManager; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; + +/** + * Pipeline placement policy that choose datanodes based on load balancing + * and network topology to supply pipeline creation. + *

+ * 1. get a list of healthy nodes + * 2. filter out nodes that are not too heavily engaged in other pipelines + * 3. Choose an anchor node among the viable nodes. + * 4. Choose other nodes around the anchor node based on network topology + */ +public final class PipelinePlacementPolicy extends SCMCommonPlacementPolicy { + @VisibleForTesting + static final Logger LOG = + LoggerFactory.getLogger(PipelinePlacementPolicy.class); + private final NodeManager nodeManager; + private final PipelineStateManager stateManager; + private final Configuration conf; + private final int heavyNodeCriteria; + + /** + * Constructs a pipeline placement with considering network topology, + * load balancing and rack awareness. + * + * @param nodeManager NodeManager + * @param stateManager PipelineStateManager + * @param conf Configuration + */ + public PipelinePlacementPolicy(final NodeManager nodeManager, + final PipelineStateManager stateManager, final Configuration conf) { + super(nodeManager, conf); + this.nodeManager = nodeManager; + this.conf = conf; + this.stateManager = stateManager; + this.heavyNodeCriteria = conf.getInt( + ScmConfigKeys.OZONE_DATANODE_PIPELINE_LIMIT, + ScmConfigKeys.OZONE_DATANODE_PIPELINE_LIMIT_DEFAULT); + } + + /** + * Returns true if this node meets the criteria. + * + * @param datanodeDetails DatanodeDetails + * @param nodesRequired nodes required count + * @return true if we have enough space. + */ + @VisibleForTesting + boolean meetCriteria(DatanodeDetails datanodeDetails, int nodesRequired) { + if (heavyNodeCriteria == 0) { + // no limit applied. + return true; + } + // Datanodes from pipeline in some states can also be considered available + // for pipeline allocation. Thus the number of these pipeline shall be + // deducted from total heaviness calculation. + int pipelineNumDeductable = 0; + Set pipelines = nodeManager.getPipelines(datanodeDetails); + for (PipelineID pid : pipelines) { + Pipeline pipeline; + try { + pipeline = stateManager.getPipeline(pid); + } catch (PipelineNotFoundException e) { + LOG.error("Pipeline not found in pipeline state manager during" + + " pipeline creation. PipelineID: " + pid + + " exception: " + e.getMessage()); + continue; + } + if (pipeline != null && + pipeline.getFactor().getNumber() == nodesRequired && + pipeline.getType() == HddsProtos.ReplicationType.RATIS && + pipeline.getPipelineState() == Pipeline.PipelineState.CLOSED) { + pipelineNumDeductable++; + } + } + boolean meet = (nodeManager.getPipelinesCount(datanodeDetails) + - pipelineNumDeductable) < heavyNodeCriteria; + if (!meet && LOG.isDebugEnabled()) { + LOG.debug("Pipeline Placement: can't place more pipeline on heavy " + + "datanode: " + datanodeDetails.getUuid().toString() + + " Heaviness: " + nodeManager.getPipelinesCount(datanodeDetails) + + " limit: " + heavyNodeCriteria); + } + return meet; + } + + /** + * Filter out viable nodes based on + * 1. nodes that are healthy + * 2. nodes that are not too heavily engaged in other pipelines + * + * @param excludedNodes - excluded nodes + * @param nodesRequired - number of datanodes required. + * @return a list of viable nodes + * @throws SCMException when viable nodes are not enough in numbers + */ + List filterViableNodes( + List excludedNodes, int nodesRequired) + throws SCMException { + // get nodes in HEALTHY state + List healthyNodes = + nodeManager.getNodes(HddsProtos.NodeState.HEALTHY); + if (excludedNodes != null) { + healthyNodes.removeAll(excludedNodes); + } + int initialHealthyNodesCount = healthyNodes.size(); + String msg; + + if (initialHealthyNodesCount < nodesRequired) { + msg = String.format("Pipeline creation failed due to no sufficient" + + " healthy datanodes. Required %d. Found %d.", + nodesRequired, initialHealthyNodesCount); + LOG.warn(msg); + throw new SCMException(msg, + SCMException.ResultCodes.FAILED_TO_FIND_SUITABLE_NODE); + } + + // filter nodes that meet the size and pipeline engagement criteria. + // Pipeline placement doesn't take node space left into account. + List healthyList = healthyNodes.stream() + .filter(d -> meetCriteria(d, nodesRequired)) + .collect(Collectors.toList()); + + if (healthyList.size() < nodesRequired) { + if (LOG.isDebugEnabled()) { + LOG.debug("Unable to find enough nodes that meet the criteria that" + + " cannot engage in more than" + heavyNodeCriteria + + " pipelines. Nodes required: " + nodesRequired + " Found:" + + healthyList.size() + " healthy nodes count in NodeManager: " + + initialHealthyNodesCount); + } + msg = String.format("Pipeline creation failed because nodes are engaged" + + " in other pipelines and every node can only be engaged in" + + " max %d pipelines. Required %d. Found %d", + heavyNodeCriteria, nodesRequired, healthyList.size()); + throw new SCMException(msg, + SCMException.ResultCodes.FAILED_TO_FIND_SUITABLE_NODE); + } + return healthyList; + } + + /** + * Pipeline placement choose datanodes to join the pipeline. + * + * @param excludedNodes - excluded nodes + * @param favoredNodes - list of nodes preferred. + * @param nodesRequired - number of datanodes required. + * @param sizeRequired - size required for the container or block. + * @return a list of chosen datanodeDetails + * @throws SCMException when chosen nodes are not enough in numbers + */ + @Override + public List chooseDatanodes( + List excludedNodes, List favoredNodes, + int nodesRequired, final long sizeRequired) throws SCMException { + // Get a list of viable nodes based on criteria + // and make sure excludedNodes are excluded from list. + List healthyNodes = + filterViableNodes(excludedNodes, nodesRequired); + + // Randomly picks nodes when all nodes are equal or factor is ONE. + // This happens when network topology is absent or + // all nodes are on the same rack. + if (checkAllNodesAreEqual(nodeManager.getClusterNetworkTopologyMap())) { + return super.getResultSet(nodesRequired, healthyNodes); + } else { + // Since topology and rack awareness are available, picks nodes + // based on them. + return this.getResultSet(nodesRequired, healthyNodes); + } + } + + // Fall back logic for node pick up. + DatanodeDetails fallBackPickNodes( + List nodeSet, List excludedNodes) + throws SCMException{ + DatanodeDetails node; + if (excludedNodes == null || excludedNodes.isEmpty()) { + node = chooseNode(nodeSet); + } else { + List inputNodes = nodeSet.stream() + .filter(p -> !excludedNodes.contains(p)).collect(Collectors.toList()); + node = chooseNode(inputNodes); + } + + if (node == null) { + String msg = String.format("Unable to find fall back node in" + + " pipeline allocation. nodeSet size: {}", nodeSet.size()); + LOG.warn(msg); + throw new SCMException(msg, + SCMException.ResultCodes.FAILED_TO_FIND_SUITABLE_NODE); + } + return node; + } + + /** + * Get result set based on the pipeline placement algorithm which considers + * network topology and rack awareness. + * @param nodesRequired - Nodes Required + * @param healthyNodes - List of Nodes in the result set. + * @return a list of datanodes + * @throws SCMException SCMException + */ + @Override + public List getResultSet( + int nodesRequired, List healthyNodes) + throws SCMException { + if (nodesRequired != HddsProtos.ReplicationFactor.THREE.getNumber()) { + throw new SCMException("Nodes required number is not supported: " + + nodesRequired, SCMException.ResultCodes.INVALID_CAPACITY); + } + + // Assume rack awareness is not enabled. + boolean rackAwareness = false; + List results = new ArrayList<>(nodesRequired); + // Since nodes are widely distributed, the results should be selected + // base on distance in topology, rack awareness and load balancing. + List exclude = new ArrayList<>(); + // First choose an anchor nodes randomly + DatanodeDetails anchor = chooseNode(healthyNodes); + if (anchor != null) { + results.add(anchor); + exclude.add(anchor); + } else { + LOG.warn("Unable to find healthy node for anchor(first) node."); + throw new SCMException("Unable to find anchor node.", + SCMException.ResultCodes.FAILED_TO_FIND_SUITABLE_NODE); + } + if (LOG.isDebugEnabled()) { + LOG.debug("First node chosen: {}", anchor); + } + + + // Choose the second node on different racks from anchor. + DatanodeDetails nextNode = chooseNodeBasedOnRackAwareness( + healthyNodes, exclude, + nodeManager.getClusterNetworkTopologyMap(), anchor); + if (nextNode != null) { + // Rack awareness is detected. + rackAwareness = true; + results.add(nextNode); + exclude.add(nextNode); + if (LOG.isDebugEnabled()) { + LOG.debug("Second node chosen: {}", nextNode); + } + } else { + if (LOG.isDebugEnabled()) { + LOG.debug("Pipeline Placement: Unable to find 2nd node on different " + + "rack based on rack awareness."); + } + } + + // Then choose nodes close to anchor based on network topology + int nodesToFind = nodesRequired - results.size(); + for (int x = 0; x < nodesToFind; x++) { + // Pick remaining nodes based on the existence of rack awareness. + DatanodeDetails pick = rackAwareness + ? chooseNodeFromNetworkTopology( + nodeManager.getClusterNetworkTopologyMap(), anchor, exclude) + : fallBackPickNodes(healthyNodes, exclude); + if (pick != null) { + results.add(pick); + exclude.add(pick); + if (LOG.isDebugEnabled()) { + LOG.debug("Remaining node chosen: {}", pick); + } + } + } + + if (results.size() < nodesRequired) { + LOG.warn("Unable to find the required number of " + + "healthy nodes that meet the criteria. Required nodes: {}, " + + "Found nodes: {}", nodesRequired, results.size()); + throw new SCMException("Unable to find required number of nodes.", + SCMException.ResultCodes.FAILED_TO_FIND_SUITABLE_NODE); + } + return results; + } + + /** + * Find a node from the healthy list and return it after removing it from the + * list that we are operating on. + * + * @param healthyNodes - Set of healthy nodes we can choose from. + * @return chosen datanodDetails + */ + @Override + public DatanodeDetails chooseNode( + List healthyNodes) { + if (healthyNodes == null || healthyNodes.isEmpty()) { + return null; + } + int firstNodeNdx = getRand().nextInt(healthyNodes.size()); + int secondNodeNdx = getRand().nextInt(healthyNodes.size()); + + DatanodeDetails datanodeDetails; + // There is a possibility that both numbers will be same. + // if that is so, we just return the node. + if (firstNodeNdx == secondNodeNdx) { + datanodeDetails = healthyNodes.get(firstNodeNdx); + } else { + DatanodeDetails firstNodeDetails = healthyNodes.get(firstNodeNdx); + DatanodeDetails secondNodeDetails = healthyNodes.get(secondNodeNdx); + SCMNodeMetric firstNodeMetric = + nodeManager.getNodeStat(firstNodeDetails); + SCMNodeMetric secondNodeMetric = + nodeManager.getNodeStat(secondNodeDetails); + datanodeDetails = firstNodeMetric.isGreater(secondNodeMetric.get()) + ? firstNodeDetails : secondNodeDetails; + } + healthyNodes.remove(datanodeDetails); + return datanodeDetails; + } + + /** + * Choose node on different racks as anchor is on based on rack awareness. + * If a node on different racks cannot be found, then return a random node. + * @param healthyNodes healthy nodes + * @param excludedNodes excluded nodes + * @param networkTopology network topology + * @param anchor anchor node + * @return a node on different rack + */ + @VisibleForTesting + protected DatanodeDetails chooseNodeBasedOnRackAwareness( + List healthyNodes, List excludedNodes, + NetworkTopology networkTopology, DatanodeDetails anchor) { + Preconditions.checkArgument(networkTopology != null); + if (checkAllNodesAreEqual(networkTopology)) { + return null; + } + + for (DatanodeDetails node : healthyNodes) { + if (excludedNodes.contains(node) || + anchor.getNetworkLocation().equals(node.getNetworkLocation())) { + continue; + } else { + return node; + } + } + return null; + } + + /** + * Check if all nodes are equal in topology. + * They are equal when network topology is absent or there are on + * the same rack. + * @param topology network topology + * @return true when all nodes are equal + */ + private boolean checkAllNodesAreEqual(NetworkTopology topology) { + if (topology == null) { + return true; + } + return (topology.getNumOfNodes(topology.getMaxLevel() - 1) == 1); + } + + /** + * Choose node based on network topology. + * @param networkTopology network topology + * @param anchor anchor datanode to start with + * @param excludedNodes excluded datanodes + * @return chosen datanode + */ + @VisibleForTesting + protected DatanodeDetails chooseNodeFromNetworkTopology( + NetworkTopology networkTopology, DatanodeDetails anchor, + List excludedNodes) { + Preconditions.checkArgument(networkTopology != null); + + Collection excluded = new ArrayList<>(); + if (excludedNodes != null && excludedNodes.size() != 0) { + excluded.addAll(excludedNodes); + } + + Node pick = networkTopology.chooseRandom( + anchor.getNetworkLocation(), excluded); + DatanodeDetails pickedNode = (DatanodeDetails) pick; + return pickedNode; + } +} diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelineStateMap.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelineStateMap.java index 443378cd1835..8e0f32de1599 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelineStateMap.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelineStateMap.java @@ -30,6 +30,7 @@ import java.io.IOException; import java.util.*; +import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.CopyOnWriteArrayList; import java.util.function.Predicate; import java.util.stream.Collectors; @@ -52,8 +53,8 @@ class PipelineStateMap { PipelineStateMap() { // TODO: Use TreeMap for range operations? - pipelineMap = new HashMap<>(); - pipeline2container = new HashMap<>(); + pipelineMap = new ConcurrentHashMap<>(); + pipeline2container = new ConcurrentHashMap<>(); query2OpenPipelines = new HashMap<>(); initializeQueryMap(); diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/RatisPipelineProvider.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/RatisPipelineProvider.java index 6b93192219b4..13c3b6a5cb13 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/RatisPipelineProvider.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/RatisPipelineProvider.java @@ -20,15 +20,12 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationType; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor; -import org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState; import org.apache.hadoop.hdds.scm.ScmConfigKeys; -import org.apache.hadoop.hdds.scm.container.placement.algorithms - .ContainerPlacementPolicy; -import org.apache.hadoop.hdds.scm.container.placement.algorithms - .SCMContainerPlacementRandom; import org.apache.hadoop.hdds.scm.events.SCMEvents; +import org.apache.hadoop.hdds.scm.exceptions.SCMException; import org.apache.hadoop.hdds.scm.node.NodeManager; import org.apache.hadoop.hdds.scm.pipeline.Pipeline.PipelineState; import org.apache.hadoop.hdds.server.events.EventPublisher; @@ -39,8 +36,6 @@ import org.slf4j.LoggerFactory; import java.io.IOException; -import java.lang.reflect.Constructor; -import java.lang.reflect.InvocationTargetException; import java.util.HashSet; import java.util.List; import java.util.Set; @@ -61,6 +56,9 @@ public class RatisPipelineProvider implements PipelineProvider { private final PipelineStateManager stateManager; private final Configuration conf; private final EventPublisher eventPublisher; + private final PipelinePlacementPolicy placementPolicy; + private int pipelineNumberLimit; + private int maxPipelinePerDatanode; // Set parallelism at 3, as now in Ratis we create 1 and 3 node pipelines. private final int parallelismForPool = 3; @@ -83,65 +81,93 @@ public class RatisPipelineProvider implements PipelineProvider { this.stateManager = stateManager; this.conf = conf; this.eventPublisher = eventPublisher; + this.placementPolicy = + new PipelinePlacementPolicy(nodeManager, stateManager, conf); + this.pipelineNumberLimit = conf.getInt( + ScmConfigKeys.OZONE_SCM_RATIS_PIPELINE_LIMIT, + ScmConfigKeys.OZONE_SCM_RATIS_PIPELINE_LIMIT_DEFAULT); + this.maxPipelinePerDatanode = conf.getInt( + ScmConfigKeys.OZONE_DATANODE_PIPELINE_LIMIT, + ScmConfigKeys.OZONE_DATANODE_PIPELINE_LIMIT_DEFAULT); } + private List pickNodesNeverUsed(ReplicationFactor factor) + throws SCMException { + Set dnsUsed = new HashSet<>(); + stateManager.getPipelines(ReplicationType.RATIS, factor) + .stream().filter( + p -> p.getPipelineState().equals(PipelineState.OPEN) || + p.getPipelineState().equals(PipelineState.DORMANT) || + p.getPipelineState().equals(PipelineState.ALLOCATED)) + .forEach(p -> dnsUsed.addAll(p.getNodes())); - /** - * Create pluggable container placement policy implementation instance. - * - * @param nodeManager - SCM node manager. - * @param conf - configuration. - * @return SCM container placement policy implementation instance. - */ - @SuppressWarnings("unchecked") - // TODO: should we rename ContainerPlacementPolicy to PipelinePlacementPolicy? - private static ContainerPlacementPolicy createContainerPlacementPolicy( - final NodeManager nodeManager, final Configuration conf) { - Class implClass = - (Class) conf.getClass( - ScmConfigKeys.OZONE_SCM_CONTAINER_PLACEMENT_IMPL_KEY, - SCMContainerPlacementRandom.class); + // Get list of healthy nodes + List dns = nodeManager + .getNodes(HddsProtos.NodeState.HEALTHY) + .parallelStream() + .filter(dn -> !dnsUsed.contains(dn)) + .limit(factor.getNumber()) + .collect(Collectors.toList()); + if (dns.size() < factor.getNumber()) { + String e = String + .format("Cannot create pipeline of factor %d using %d nodes." + + " Used %d nodes. Healthy nodes %d", factor.getNumber(), + dns.size(), dnsUsed.size(), + nodeManager.getNodes(HddsProtos.NodeState.HEALTHY).size()); + throw new SCMException(e, + SCMException.ResultCodes.FAILED_TO_FIND_SUITABLE_NODE); + } + return dns; + } - try { - Constructor ctor = - implClass.getDeclaredConstructor(NodeManager.class, - Configuration.class); - return ctor.newInstance(nodeManager, conf); - } catch (RuntimeException e) { - throw e; - } catch (InvocationTargetException e) { - throw new RuntimeException(implClass.getName() - + " could not be constructed.", e.getCause()); - } catch (Exception e) { -// LOG.error("Unhandled exception occurred, Placement policy will not " + -// "be functional."); - throw new IllegalArgumentException("Unable to load " + - "ContainerPlacementPolicy", e); + private boolean exceedPipelineNumberLimit(ReplicationFactor factor) { + if (factor != ReplicationFactor.THREE) { + // Only put limits for Factor THREE pipelines. + return false; + } + // Per datanode limit + if (maxPipelinePerDatanode > 0) { + return (stateManager.getPipelines(ReplicationType.RATIS, factor).size() - + stateManager.getPipelines(ReplicationType.RATIS, factor, + Pipeline.PipelineState.CLOSED).size()) > maxPipelinePerDatanode * + nodeManager.getNodeCount(HddsProtos.NodeState.HEALTHY) / + factor.getNumber(); + } + + // Global limit + if (pipelineNumberLimit > 0) { + return (stateManager.getPipelines(ReplicationType.RATIS, + ReplicationFactor.THREE).size() - stateManager.getPipelines( + ReplicationType.RATIS, ReplicationFactor.THREE, + Pipeline.PipelineState.CLOSED).size()) > + (pipelineNumberLimit - stateManager.getPipelines( + ReplicationType.RATIS, ReplicationFactor.ONE).size()); } + + return false; } @Override public Pipeline create(ReplicationFactor factor) throws IOException { - // Get set of datanodes already used for ratis pipeline - Set dnsUsed = new HashSet<>(); - stateManager.getPipelines(ReplicationType.RATIS, factor).stream().filter( - p -> p.getPipelineState().equals(PipelineState.OPEN) || - p.getPipelineState().equals(PipelineState.DORMANT) || - p.getPipelineState().equals(PipelineState.ALLOCATED)) - .forEach(p -> dnsUsed.addAll(p.getNodes())); + if (exceedPipelineNumberLimit(factor)) { + throw new SCMException("Ratis pipeline number meets the limit: " + + pipelineNumberLimit + " factor : " + + factor.getNumber(), + SCMException.ResultCodes.FAILED_TO_FIND_SUITABLE_NODE); + } - // Get list of healthy nodes - List dns = - nodeManager.getNodes(NodeState.HEALTHY) - .parallelStream() - .filter(dn -> !dnsUsed.contains(dn)) - .limit(factor.getNumber()) - .collect(Collectors.toList()); - if (dns.size() < factor.getNumber()) { - String e = String - .format("Cannot create pipeline of factor %d using %d nodes.", - factor.getNumber(), dns.size()); - throw new InsufficientDatanodesException(e); + List dns; + + switch(factor) { + case ONE: + dns = pickNodesNeverUsed(ReplicationFactor.ONE); + break; + case THREE: + dns = placementPolicy.chooseDatanodes(null, + null, factor.getNumber(), 0); + break; + default: + throw new IllegalStateException("Unknown factor: " + factor.name()); } Pipeline pipeline = Pipeline.newBuilder() diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/RatisPipelineUtils.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/RatisPipelineUtils.java new file mode 100644 index 000000000000..db9260e60393 --- /dev/null +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/RatisPipelineUtils.java @@ -0,0 +1,117 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdds.scm.pipeline; + +import java.io.IOException; +import java.util.List; +import java.util.stream.Collectors; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos; +import org.apache.hadoop.hdds.scm.ScmConfigKeys; +import org.apache.hadoop.hdds.ratis.RatisHelper; +import org.apache.ratis.client.RaftClient; +import org.apache.ratis.grpc.GrpcTlsConfig; +import org.apache.ratis.protocol.RaftGroup; +import org.apache.ratis.protocol.RaftGroupId; +import org.apache.ratis.protocol.RaftPeer; +import org.apache.ratis.retry.RetryPolicy; +import org.apache.ratis.rpc.SupportedRpcType; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Utility class for Ratis pipelines. Contains methods to create and destroy + * ratis pipelines. + */ +public final class RatisPipelineUtils { + + private static final Logger LOG = + LoggerFactory.getLogger(RatisPipelineUtils.class); + + private RatisPipelineUtils() { + } + /** + * Removes pipeline from SCM. Sends ratis command to destroy pipeline on all + * the datanodes. + * + * @param pipeline - Pipeline to be destroyed + * @param ozoneConf - Ozone configuration + * @param grpcTlsConfig + * @throws IOException + */ + public static void destroyPipeline(Pipeline pipeline, Configuration ozoneConf, + GrpcTlsConfig grpcTlsConfig) { + final RaftGroup group = RatisHelper.newRaftGroup(pipeline); + if (LOG.isDebugEnabled()) { + LOG.debug("destroying pipeline:{} with {}", pipeline.getId(), group); + } + for (DatanodeDetails dn : pipeline.getNodes()) { + try { + destroyPipeline(dn, pipeline.getId(), ozoneConf, grpcTlsConfig); + } catch (IOException e) { + LOG.warn("Pipeline destroy failed for pipeline={} dn={} exception={}", + pipeline.getId(), dn, e.getMessage()); + } + } + } + + /** + * Sends ratis command to destroy pipeline on the given datanode. + * + * @param dn - Datanode on which pipeline needs to be destroyed + * @param pipelineID - ID of pipeline to be destroyed + * @param ozoneConf - Ozone configuration + * @param grpcTlsConfig - grpc tls configuration + * @throws IOException + */ + static void destroyPipeline(DatanodeDetails dn, PipelineID pipelineID, + Configuration ozoneConf, GrpcTlsConfig grpcTlsConfig) throws IOException { + final String rpcType = ozoneConf + .get(ScmConfigKeys.DFS_CONTAINER_RATIS_RPC_TYPE_KEY, + ScmConfigKeys.DFS_CONTAINER_RATIS_RPC_TYPE_DEFAULT); + final RetryPolicy retryPolicy = RatisHelper.createRetryPolicy(ozoneConf); + final RaftPeer p = RatisHelper.toRaftPeer(dn); + try(RaftClient client = RatisHelper + .newRaftClient(SupportedRpcType.valueOfIgnoreCase(rpcType), p, + retryPolicy, grpcTlsConfig, ozoneConf)) { + client.groupRemove(RaftGroupId.valueOf(pipelineID.getId()), + true, p.getId()); + } + } + + /** + * Return the list of pipelines who share the same set of datanodes + * with the input pipeline. + * + * @param stateManager PipelineStateManager + * @param pipeline input pipeline + * @return list of matched pipeline + */ + static List checkPipelineContainSameDatanodes( + PipelineStateManager stateManager, Pipeline pipeline) { + return stateManager.getPipelines( + HddsProtos.ReplicationType.RATIS, + HddsProtos.ReplicationFactor.THREE) + .stream().filter(p -> !p.getId().equals(pipeline.getId()) && + (p.getPipelineState() != Pipeline.PipelineState.CLOSED && + p.sameDatanodes(pipeline))) + .collect(Collectors.toList()); + } +} diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/SCMPipelineManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/SCMPipelineManager.java index 32aa7b693ba6..88c4329275e0 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/SCMPipelineManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/SCMPipelineManager.java @@ -45,6 +45,8 @@ import javax.management.ObjectName; import java.io.File; import java.io.IOException; +import java.time.Duration; +import java.time.Instant; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -54,11 +56,8 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.locks.ReadWriteLock; import java.util.concurrent.locks.ReentrantReadWriteLock; +import java.util.stream.Collectors; -import static org.apache.hadoop.hdds.scm - .ScmConfigKeys.OZONE_SCM_DB_CACHE_SIZE_DEFAULT; -import static org.apache.hadoop.hdds.scm - .ScmConfigKeys.OZONE_SCM_DB_CACHE_SIZE_MB; import static org.apache.hadoop.ozone.OzoneConsts.SCM_PIPELINE_DB; /** @@ -109,8 +108,8 @@ protected SCMPipelineManager(Configuration conf, NodeManager nodeManager, scheduler = new Scheduler("RatisPipelineUtilsThread", false, 1); this.backgroundPipelineCreator = new BackgroundPipelineCreator(this, scheduler, conf); - int cacheSize = conf.getInt(OZONE_SCM_DB_CACHE_SIZE_MB, - OZONE_SCM_DB_CACHE_SIZE_DEFAULT); + int cacheSize = conf.getInt(ScmConfigKeys.OZONE_SCM_DB_CACHE_SIZE_MB, + ScmConfigKeys.OZONE_SCM_DB_CACHE_SIZE_DEFAULT); final File pipelineDBPath = getPipelineDBPath(conf); this.pipelineStore = MetadataStoreBuilder.newBuilder() @@ -175,9 +174,22 @@ public synchronized Pipeline createPipeline(ReplicationType type, metrics.incNumPipelineCreated(); metrics.createPerPipelineMetrics(pipeline); } + List overlapPipelines = RatisPipelineUtils + .checkPipelineContainSameDatanodes(stateManager, pipeline); + if (!overlapPipelines.isEmpty()) { + // Count 1 overlap at a time. + metrics.incNumPipelineContainSameDatanodes(); + //TODO remove until pipeline allocation is proved equally distributed. + for (Pipeline overlapPipeline : overlapPipelines) { + LOG.info("Pipeline: " + pipeline.getId().toString() + + " contains same datanodes as previous pipelines: " + + overlapPipeline.getId().toString() + " nodeIds: " + + pipeline.getNodes().get(0).getUuid().toString() + + ", " + pipeline.getNodes().get(1).getUuid().toString() + + ", " + pipeline.getNodes().get(2).getUuid().toString()); + } + } return pipeline; - } catch (InsufficientDatanodesException idEx) { - throw idEx; } catch (IOException ex) { metrics.incNumPipelineCreationFailed(); throw ex; @@ -188,7 +200,7 @@ public synchronized Pipeline createPipeline(ReplicationType type, @Override public Pipeline createPipeline(ReplicationType type, ReplicationFactor factor, - List nodes) { + List nodes) { // This will mostly be used to create dummy pipeline for SimplePipelines. // We don't update the metrics for SimplePipelines. lock.writeLock().lock(); @@ -367,6 +379,32 @@ public void finalizeAndDestroyPipeline(Pipeline pipeline, boolean onTimeout) } } + @Override + public void scrubPipeline(ReplicationType type, ReplicationFactor factor) + throws IOException{ + if (type != ReplicationType.RATIS || factor != ReplicationFactor.THREE) { + // Only srub pipeline for RATIS THREE pipeline + return; + } + Instant currentTime = Instant.now(); + Long pipelineScrubTimeoutInMills = conf.getTimeDuration( + ScmConfigKeys.OZONE_SCM_PIPELINE_ALLOCATED_TIMEOUT, + ScmConfigKeys.OZONE_SCM_PIPELINE_ALLOCATED_TIMEOUT_DEFAULT, + TimeUnit.MILLISECONDS); + List needToSrubPipelines = stateManager.getPipelines(type, factor, + Pipeline.PipelineState.ALLOCATED).stream() + .filter(p -> currentTime.toEpochMilli() - p.getCreationTimestamp() + .toEpochMilli() >= pipelineScrubTimeoutInMills) + .collect(Collectors.toList()); + for (Pipeline p : needToSrubPipelines) { + LOG.info("srubbing pipeline: id: " + p.getId().toString() + + " since it stays at ALLOCATED stage for " + + Duration.between(currentTime, p.getCreationTimestamp()).toMinutes() + + " mins."); + finalizeAndDestroyPipeline(p, false); + } + } + @Override public Map getPipelineInfo() { final Map pipelineInfo = new HashMap<>(); diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/SCMPipelineMetrics.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/SCMPipelineMetrics.java index 40a6f290b54e..1cf8d3a1e528 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/SCMPipelineMetrics.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/SCMPipelineMetrics.java @@ -54,6 +54,7 @@ public final class SCMPipelineMetrics implements MetricsSource { private @Metric MutableCounterLong numPipelineDestroyFailed; private @Metric MutableCounterLong numPipelineReportProcessed; private @Metric MutableCounterLong numPipelineReportProcessingFailed; + private @Metric MutableCounterLong numPipelineContainSameDatanodes; private Map numBlocksAllocated; /** Private constructor. */ @@ -92,6 +93,7 @@ public void getMetrics(MetricsCollector collector, boolean all) { numPipelineDestroyFailed.snapshot(recordBuilder, true); numPipelineReportProcessed.snapshot(recordBuilder, true); numPipelineReportProcessingFailed.snapshot(recordBuilder, true); + numPipelineContainSameDatanodes.snapshot(recordBuilder, true); numBlocksAllocated .forEach((pid, metric) -> metric.snapshot(recordBuilder, true)); } @@ -134,6 +136,14 @@ void incNumPipelineCreated() { numPipelineCreated.incr(); } + /** + * Get the number of pipeline created. + * @return number of pipeline + */ + long getNumPipelineCreated() { + return numPipelineCreated.value(); + } + /** * Increments number of failed pipeline creation count. */ @@ -168,4 +178,11 @@ void incNumPipelineReportProcessed() { void incNumPipelineReportProcessingFailed() { numPipelineReportProcessingFailed.incr(); } + + /** + * Increments number of pipeline who contains same set of datanodes. + */ + void incNumPipelineContainSameDatanodes() { + numPipelineContainSameDatanodes.incr(); + } } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/SimplePipelineProvider.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/SimplePipelineProvider.java index 00cb7ae164b3..a772a972529f 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/SimplePipelineProvider.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/SimplePipelineProvider.java @@ -48,7 +48,7 @@ public Pipeline create(ReplicationFactor factor) throws IOException { String e = String .format("Cannot create pipeline of factor %d using %d nodes.", factor.getNumber(), dns.size()); - throw new IOException(e); + throw new InsufficientDatanodesException(e); } Collections.shuffle(dns); diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocolServerSideTranslatorPB.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocolServerSideTranslatorPB.java index 5ff75e7c24f6..f2e4253be4ad 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocolServerSideTranslatorPB.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocolServerSideTranslatorPB.java @@ -23,6 +23,7 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.PipelineResponseProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ActivatePipelineRequestProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ActivatePipelineResponseProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ClosePipelineRequestProto; @@ -72,6 +73,9 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import static org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.PipelineResponseProto.Error.errorPipelineAlreadyExists; +import static org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.PipelineResponseProto.Error.success; + /** * This class is the server-side translator that forwards requests received on * {@link StorageContainerLocationProtocolPB} to the @@ -160,6 +164,12 @@ public ScmContainerLocationResponse processRequest( .setScmCloseContainerResponse(closeContainer( request.getScmCloseContainerRequest())) .build(); + case AllocatePipeline: + return ScmContainerLocationResponse.newBuilder() + .setCmdType(request.getCmdType()) + .setStatus(Status.OK) + .setPipelineResponse(allocatePipeline(request.getPipelineRequest())) + .build(); case ListPipelines: return ScmContainerLocationResponse.newBuilder() .setCmdType(request.getCmdType()) @@ -327,6 +337,22 @@ public SCMCloseContainerResponseProto closeContainer( return SCMCloseContainerResponseProto.newBuilder().build(); } + public PipelineResponseProto allocatePipeline( + StorageContainerLocationProtocolProtos.PipelineRequestProto request) + throws IOException { + Pipeline pipeline = impl.createReplicationPipeline( + request.getReplicationType(), request.getReplicationFactor(), + HddsProtos.NodePool.getDefaultInstance()); + if (pipeline == null) { + return PipelineResponseProto.newBuilder() + .setErrorCode(errorPipelineAlreadyExists).build(); + } + PipelineResponseProto response = PipelineResponseProto.newBuilder() + .setErrorCode(success) + .setPipeline(pipeline.getProtobufMessage()).build(); + return response; + } + public ListPipelineResponseProto listPipelines( ListPipelineRequestProto request) throws IOException { diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/HealthyPipelineSafeModeRule.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/HealthyPipelineSafeModeRule.java index 33936d5580cf..1a03c34b2b06 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/HealthyPipelineSafeModeRule.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/HealthyPipelineSafeModeRule.java @@ -17,11 +17,14 @@ */ package org.apache.hadoop.hdds.scm.safemode; +import java.util.HashSet; +import java.util.Set; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdds.HddsConfigKeys; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import org.apache.hadoop.hdds.scm.events.SCMEvents; import org.apache.hadoop.hdds.scm.pipeline.Pipeline; +import org.apache.hadoop.hdds.scm.pipeline.PipelineID; import org.apache.hadoop.hdds.scm.pipeline.PipelineManager; import com.google.common.base.Preconditions; import org.apache.hadoop.hdds.server.events.EventQueue; @@ -46,6 +49,8 @@ public class HealthyPipelineSafeModeRule private int healthyPipelineThresholdCount; private int currentHealthyPipelineCount = 0; private final double healthyPipelinesPercent; + private final Set processedPipelineIDs = + new HashSet<>(); HealthyPipelineSafeModeRule(String ruleName, EventQueue eventQueue, PipelineManager pipelineManager, @@ -117,8 +122,11 @@ protected void process(Pipeline pipeline) { Preconditions.checkNotNull(pipeline); if (pipeline.getType() == HddsProtos.ReplicationType.RATIS && pipeline.getFactor() == HddsProtos.ReplicationFactor.THREE) { - getSafeModeMetrics().incCurrentHealthyPipelinesCount(); - currentHealthyPipelineCount++; + if (!processedPipelineIDs.contains(pipeline.getId())) { + getSafeModeMetrics().incCurrentHealthyPipelinesCount(); + currentHealthyPipelineCount++; + processedPipelineIDs.add(pipeline.getId()); + } } if (scmInSafeMode()) { @@ -131,6 +139,7 @@ protected void process(Pipeline pipeline) { @Override protected void cleanup() { + processedPipelineIDs.clear(); } @VisibleForTesting diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java index b16f7a5f4adb..dad1622449dd 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java @@ -395,10 +395,10 @@ public void closeContainer(long containerID) throws IOException { public Pipeline createReplicationPipeline(HddsProtos.ReplicationType type, HddsProtos.ReplicationFactor factor, HddsProtos.NodePool nodePool) throws IOException { - // TODO: will be addressed in future patch. - // This is needed only for debugging purposes to make sure cluster is - // working correctly. - return null; + Pipeline result = scm.getPipelineManager().createPipeline(type, factor); + AUDIT.logWriteSuccess( + buildAuditMessageForSuccess(SCMAction.CREATE_PIPELINE, null)); + return result; } @Override diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java index 2dfde78dc562..53adcf27e018 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java @@ -58,7 +58,7 @@ import org.apache.hadoop.hdds.scm.container.ContainerReportHandler; import org.apache.hadoop.hdds.scm.container.IncrementalContainerReportHandler; import org.apache.hadoop.hdds.scm.container.SCMContainerManager; -import org.apache.hadoop.hdds.scm.container.placement.algorithms.ContainerPlacementPolicy; +import org.apache.hadoop.hdds.scm.PlacementPolicy; import org.apache.hadoop.hdds.scm.container.placement.metrics.ContainerStat; import org.apache.hadoop.hdds.scm.container.placement.metrics.SCMMetrics; import org.apache.hadoop.hdds.scm.container.ReplicationManager; @@ -391,7 +391,7 @@ private void initializeSystemManagers(OzoneConfiguration conf, } placementMetrics = SCMContainerPlacementMetrics.create(); - ContainerPlacementPolicy containerPlacementPolicy = + PlacementPolicy containerPlacementPolicy = ContainerPlacementPolicyFactory.getPolicy(conf, scmNodeManager, clusterMap, true, placementMetrics); diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/MockNodeManager.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/MockNodeManager.java index 06dc67535720..cbeef7f67ab7 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/MockNodeManager.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/MockNodeManager.java @@ -17,10 +17,12 @@ package org.apache.hadoop.hdds.scm.container; import org.apache.hadoop.hdds.protocol.MockDatanodeDetails; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.protocol.proto .StorageContainerDatanodeProtocolProtos.PipelineReportsProto; import org.apache.hadoop.hdds.scm.net.NetConstants; import org.apache.hadoop.hdds.scm.net.NetworkTopology; +import org.apache.hadoop.hdds.scm.net.NetworkTopologyImpl; import org.apache.hadoop.hdds.scm.net.Node; import org.apache.hadoop.hdds.scm.pipeline.Pipeline; import org.apache.hadoop.hdds.scm.pipeline.PipelineID; @@ -86,12 +88,13 @@ public class MockNodeManager implements NodeManager { private final SCMNodeStat aggregateStat; private boolean safemode; private final Map> commandMap; - private final Node2PipelineMap node2PipelineMap; + private Node2PipelineMap node2PipelineMap; private final Node2ContainerMap node2ContainerMap; private NetworkTopology clusterMap; private ConcurrentMap> dnsToUuidMap; - public MockNodeManager(boolean initializeFakeNodes, int nodeCount) { + public MockNodeManager(NetworkTopologyImpl clusterMap, + boolean initializeFakeNodes, int nodeCount) { this.healthyNodes = new LinkedList<>(); this.staleNodes = new LinkedList<>(); this.deadNodes = new LinkedList<>(); @@ -99,7 +102,8 @@ public MockNodeManager(boolean initializeFakeNodes, int nodeCount) { this.node2PipelineMap = new Node2PipelineMap(); this.node2ContainerMap = new Node2ContainerMap(); this.dnsToUuidMap = new ConcurrentHashMap<>(); - aggregateStat = new SCMNodeStat(); + this.aggregateStat = new SCMNodeStat(); + this.clusterMap = clusterMap; if (initializeFakeNodes) { for (int x = 0; x < nodeCount; x++) { DatanodeDetails dd = MockDatanodeDetails.randomDatanodeDetails(); @@ -111,6 +115,11 @@ public MockNodeManager(boolean initializeFakeNodes, int nodeCount) { this.commandMap = new HashMap<>(); } + public MockNodeManager(boolean initializeFakeNodes, int nodeCount) { + this(new NetworkTopologyImpl(new OzoneConfiguration()), + initializeFakeNodes, nodeCount); + } + /** * Invoked from ctor to create some node Metrics. * @@ -250,6 +259,16 @@ public Set getPipelines(DatanodeDetails dnId) { return node2PipelineMap.getPipelines(dnId.getUuid()); } + /** + * Get the count of pipelines a datanodes is associated with. + * @param datanodeDetails DatanodeDetails + * @return The number of pipelines + */ + @Override + public int getPipelinesCount(DatanodeDetails datanodeDetails) { + return node2PipelineMap.getPipelinesCount(datanodeDetails.getUuid()); + } + /** * Add pipeline information in the NodeManager. * @param pipeline - Pipeline to be added @@ -259,6 +278,22 @@ public void addPipeline(Pipeline pipeline) { node2PipelineMap.addPipeline(pipeline); } + /** + * Get the entire Node2PipelineMap. + * @return Node2PipelineMap + */ + public Node2PipelineMap getNode2PipelineMap() { + return node2PipelineMap; + } + + /** + * Set the Node2PipelineMap. + * @param node2PipelineMap Node2PipelineMap + */ + public void setNode2PipelineMap(Node2PipelineMap node2PipelineMap) { + this.node2PipelineMap = node2PipelineMap; + } + /** * Remove a pipeline information from the NodeManager. * @param pipeline - Pipeline to be removed @@ -517,6 +552,11 @@ public List getNodesByAddress(String address) { return results; } + @Override + public NetworkTopology getClusterNetworkTopologyMap() { + return clusterMap; + } + public void setNetworkTopology(NetworkTopology topology) { this.clusterMap = topology; } diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestCloseContainerEventHandler.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestCloseContainerEventHandler.java index 612bf5dd99df..10c38a8fadcf 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestCloseContainerEventHandler.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestCloseContainerEventHandler.java @@ -24,6 +24,7 @@ import org.apache.hadoop.hdds.HddsConfigKeys; import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; +import org.apache.hadoop.hdds.scm.ScmConfigKeys; import org.apache.hadoop.hdds.scm.TestUtils; import org.apache.hadoop.hdds.scm.pipeline.MockRatisPipelineProvider; import org.apache.hadoop.hdds.scm.pipeline.PipelineProvider; @@ -67,13 +68,14 @@ public static void setUp() throws Exception { .getTestDir(TestCloseContainerEventHandler.class.getSimpleName()); configuration .set(HddsConfigKeys.OZONE_METADATA_DIRS, testDir.getAbsolutePath()); + configuration.setInt(ScmConfigKeys.OZONE_SCM_RATIS_PIPELINE_LIMIT, 16); nodeManager = new MockNodeManager(true, 10); eventQueue = new EventQueue(); pipelineManager = new SCMPipelineManager(configuration, nodeManager, eventQueue); PipelineProvider mockRatisProvider = new MockRatisPipelineProvider(nodeManager, - pipelineManager.getStateManager(), configuration); + pipelineManager.getStateManager(), configuration, eventQueue); pipelineManager.setPipelineProvider(HddsProtos.ReplicationType.RATIS, mockRatisProvider); containerManager = new SCMContainerManager(configuration, pipelineManager); @@ -91,6 +93,9 @@ public static void tearDown() throws Exception { if (containerManager != null) { containerManager.close(); } + if (pipelineManager != null) { + pipelineManager.close(); + } FileUtil.fullyDelete(testDir); } diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestReplicationManager.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestReplicationManager.java index dadb3093261b..87d76558d27c 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestReplicationManager.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestReplicationManager.java @@ -27,8 +27,7 @@ import org.apache.hadoop.hdds.protocol.proto .StorageContainerDatanodeProtocolProtos.SCMCommandProto; import org.apache.hadoop.hdds.scm.container.ReplicationManager.ReplicationManagerConfiguration; -import org.apache.hadoop.hdds.scm.container.placement.algorithms - .ContainerPlacementPolicy; +import org.apache.hadoop.hdds.scm.PlacementPolicy; import org.apache.hadoop.hdds.scm.events.SCMEvents; import org.apache.hadoop.hdds.scm.exceptions.SCMException; import org.apache.hadoop.hdds.server.events.EventHandler; @@ -66,7 +65,7 @@ public class TestReplicationManager { private ReplicationManager replicationManager; private ContainerStateManager containerStateManager; - private ContainerPlacementPolicy containerPlacementPolicy; + private PlacementPolicy containerPlacementPolicy; private EventQueue eventQueue; private DatanodeCommandHandler datanodeCommandHandler; @@ -93,7 +92,7 @@ public void setup() throws IOException, InterruptedException { .thenAnswer(invocation -> containerStateManager .getContainerReplicas((ContainerID)invocation.getArguments()[0])); - containerPlacementPolicy = Mockito.mock(ContainerPlacementPolicy.class); + containerPlacementPolicy = Mockito.mock(PlacementPolicy.class); Mockito.when(containerPlacementPolicy.chooseDatanodes( Mockito.anyListOf(DatanodeDetails.class), diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/TestContainerPlacementFactory.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/TestContainerPlacementFactory.java index b685ba903d9b..a454de2672a7 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/TestContainerPlacementFactory.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/TestContainerPlacementFactory.java @@ -21,6 +21,7 @@ import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.MockDatanodeDetails; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState; +import org.apache.hadoop.hdds.scm.PlacementPolicy; import org.apache.hadoop.hdds.scm.ScmConfigKeys; import org.apache.hadoop.hdds.scm.container.placement.metrics.SCMNodeMetric; import org.apache.hadoop.hdds.scm.exceptions.SCMException; @@ -99,7 +100,7 @@ public void testRackAwarePolicy() throws IOException { when(nodeManager.getNodeStat(datanodes.get(4))) .thenReturn(new SCMNodeMetric(storageCapacity, 70L, 30L)); - ContainerPlacementPolicy policy = ContainerPlacementPolicyFactory + PlacementPolicy policy = ContainerPlacementPolicyFactory .getPolicy(conf, nodeManager, cluster, true, SCMContainerPlacementMetrics.create()); @@ -117,7 +118,7 @@ public void testRackAwarePolicy() throws IOException { @Test public void testDefaultPolicy() throws IOException { - ContainerPlacementPolicy policy = ContainerPlacementPolicyFactory + PlacementPolicy policy = ContainerPlacementPolicyFactory .getPolicy(conf, null, null, true, null); Assert.assertSame(SCMContainerPlacementRandom.class, policy.getClass()); } @@ -125,7 +126,7 @@ public void testDefaultPolicy() throws IOException { /** * A dummy container placement implementation for test. */ - public static class DummyImpl implements ContainerPlacementPolicy { + public static class DummyImpl implements PlacementPolicy { @Override public List chooseDatanodes( List excludedNodes, List favoredNodes, diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestContainerPlacement.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestContainerPlacement.java index 39058234a89f..88085467c5dd 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestContainerPlacement.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestContainerPlacement.java @@ -27,8 +27,7 @@ import org.apache.hadoop.hdds.scm.XceiverClientManager; import org.apache.hadoop.hdds.scm.container.ContainerInfo; import org.apache.hadoop.hdds.scm.container.SCMContainerManager; -import org.apache.hadoop.hdds.scm.container.placement.algorithms - .ContainerPlacementPolicy; +import org.apache.hadoop.hdds.scm.PlacementPolicy; import org.apache.hadoop.hdds.scm.container.placement.algorithms .SCMContainerPlacementCapacity; import org.apache.hadoop.hdds.conf.OzoneConfiguration; @@ -135,7 +134,7 @@ public void testContainerPlacementCapacity() throws IOException, conf.set(HddsConfigKeys.OZONE_METADATA_DIRS, testDir.getAbsolutePath()); conf.setClass(ScmConfigKeys.OZONE_SCM_CONTAINER_PLACEMENT_IMPL_KEY, - SCMContainerPlacementCapacity.class, ContainerPlacementPolicy.class); + SCMContainerPlacementCapacity.class, PlacementPolicy.class); SCMNodeManager nodeManager = createNodeManager(conf); SCMContainerManager containerManager = diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestDeadNodeHandler.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestDeadNodeHandler.java index 594ea5cfd523..4cdc46fa2222 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestDeadNodeHandler.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestDeadNodeHandler.java @@ -66,6 +66,8 @@ import org.junit.Test; import org.mockito.Mockito; +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_DATANODE_PIPELINE_LIMIT; + /** * Test DeadNodeHandler. */ @@ -87,6 +89,7 @@ public void setup() throws IOException, AuthenticationException { storageDir = GenericTestUtils.getTempPath( TestDeadNodeHandler.class.getSimpleName() + UUID.randomUUID()); conf.set(HddsConfigKeys.OZONE_METADATA_DIRS, storageDir); + conf.setInt(OZONE_DATANODE_PIPELINE_LIMIT, 0); eventQueue = new EventQueue(); scm = HddsTestUtils.getScm(conf); nodeManager = (SCMNodeManager) scm.getScmNodeManager(); diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestSCMNodeManager.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestSCMNodeManager.java index fca88c7a88f0..91f437d98008 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestSCMNodeManager.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestSCMNodeManager.java @@ -62,6 +62,7 @@ import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState.STALE; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_DEADNODE_INTERVAL; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_HEARTBEAT_PROCESS_INTERVAL; +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_RATIS_PIPELINE_LIMIT; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_STALENODE_INTERVAL; import static org.apache.hadoop.hdds.scm.events.SCMEvents.DATANODE_COMMAND; import org.junit.After; @@ -117,6 +118,7 @@ OzoneConfiguration getConf() { conf.setTimeDuration(OZONE_SCM_HEARTBEAT_PROCESS_INTERVAL, 100, TimeUnit.MILLISECONDS); conf.setBoolean(HddsConfigKeys.HDDS_SCM_SAFEMODE_PIPELINE_CREATION, false); + conf.setInt(OZONE_SCM_RATIS_PIPELINE_LIMIT, 10); return conf; } diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/MockRatisPipelineProvider.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/MockRatisPipelineProvider.java index 25b0adc32651..ff5247027259 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/MockRatisPipelineProvider.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/MockRatisPipelineProvider.java @@ -33,6 +33,15 @@ */ public class MockRatisPipelineProvider extends RatisPipelineProvider { + private boolean autoOpenPipeline; + + public MockRatisPipelineProvider(NodeManager nodeManager, + PipelineStateManager stateManager, Configuration conf, + EventPublisher eventPublisher, boolean autoOpen) { + super(nodeManager, stateManager, conf, eventPublisher); + autoOpenPipeline = autoOpen; + } + public MockRatisPipelineProvider(NodeManager nodeManager, PipelineStateManager stateManager, Configuration conf) { @@ -43,12 +52,31 @@ public MockRatisPipelineProvider(NodeManager nodeManager, PipelineStateManager stateManager, Configuration conf, EventPublisher eventPublisher) { super(nodeManager, stateManager, conf, eventPublisher); + autoOpenPipeline = true; } protected void initializePipeline(Pipeline pipeline) throws IOException { // do nothing as the datanodes do not exists } + @Override + public Pipeline create(HddsProtos.ReplicationFactor factor) + throws IOException { + if (autoOpenPipeline) { + return super.create(factor); + } else { + Pipeline initialPipeline = super.create(factor); + return Pipeline.newBuilder() + .setId(initialPipeline.getId()) + // overwrite pipeline state to main ALLOCATED + .setState(Pipeline.PipelineState.ALLOCATED) + .setType(initialPipeline.getType()) + .setFactor(factor) + .setNodes(initialPipeline.getNodes()) + .build(); + } + } + @Override public void shutdown() { // Do nothing. diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestPipelineDatanodesIntersection.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestPipelineDatanodesIntersection.java new file mode 100644 index 000000000000..41eea3d9dc67 --- /dev/null +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestPipelineDatanodesIntersection.java @@ -0,0 +1,133 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdds.scm.pipeline; + +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos; +import org.apache.hadoop.hdds.scm.container.MockNodeManager; +import org.apache.hadoop.hdds.scm.exceptions.SCMException; +import org.apache.hadoop.hdds.scm.node.NodeManager; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Collection; +import java.util.List; + +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_DATANODE_PIPELINE_LIMIT; +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_PIPELINE_AUTO_CREATE_FACTOR_ONE; + +/** + * Test for pipeline datanodes intersection. + */ +@RunWith(Parameterized.class) +public class TestPipelineDatanodesIntersection { + private static final Logger LOG = LoggerFactory + .getLogger(TestPipelineDatanodesIntersection.class.getName()); + + private int nodeCount; + private int nodeHeaviness; + private OzoneConfiguration conf; + private boolean end; + + @Before + public void initialize() { + conf = new OzoneConfiguration(); + end = false; + } + + public TestPipelineDatanodesIntersection(int nodeCount, int nodeHeaviness) { + this.nodeCount = nodeCount; + this.nodeHeaviness = nodeHeaviness; + } + + @Parameterized.Parameters + public static Collection inputParams() { + return Arrays.asList(new Object[][] { + {4, 5}, + {10, 5}, + {20, 5}, + {50, 5}, + {100, 5}, + {100, 10} + }); + } + + @Test + public void testPipelineDatanodesIntersection() { + NodeManager nodeManager= new MockNodeManager(true, nodeCount); + conf.setInt(OZONE_DATANODE_PIPELINE_LIMIT, nodeHeaviness); + conf.setBoolean(OZONE_SCM_PIPELINE_AUTO_CREATE_FACTOR_ONE, false); + PipelineStateManager stateManager = new PipelineStateManager(); + PipelineProvider provider = new MockRatisPipelineProvider(nodeManager, + stateManager, conf); + + int healthyNodeCount = nodeManager + .getNodeCount(HddsProtos.NodeState.HEALTHY); + int intersectionCount = 0; + int createdPipelineCount = 0; + while (!end && createdPipelineCount <= healthyNodeCount * nodeHeaviness) { + try { + Pipeline pipeline = provider.create(HddsProtos.ReplicationFactor.THREE); + stateManager.addPipeline(pipeline); + nodeManager.addPipeline(pipeline); + List overlapPipelines = RatisPipelineUtils + .checkPipelineContainSameDatanodes(stateManager, pipeline); + + if (overlapPipelines.isEmpty()){ + intersectionCount++; + for (Pipeline overlapPipeline : overlapPipelines) { + LOG.info("This pipeline: " + pipeline.getId().toString() + + " overlaps with previous pipeline: " + overlapPipeline.getId() + + ". They share same set of datanodes as: " + + pipeline.getNodesInOrder().get(0).getUuid() + "/" + + pipeline.getNodesInOrder().get(1).getUuid() + "/" + + pipeline.getNodesInOrder().get(2).getUuid() + " and " + + overlapPipeline.getNodesInOrder().get(0).getUuid() + "/" + + overlapPipeline.getNodesInOrder().get(1).getUuid() + "/" + + overlapPipeline.getNodesInOrder().get(2).getUuid() + + " is the same."); + } + } + createdPipelineCount++; + } catch(SCMException e) { + end = true; + } catch (IOException e) { + end = true; + // Should not throw regular IOException. + Assert.fail(); + } + } + + end = false; + + LOG.info("Among total " + + stateManager.getPipelines(HddsProtos.ReplicationType.RATIS, + HddsProtos.ReplicationFactor.THREE).size() + " created pipelines" + + " with " + healthyNodeCount + " healthy datanodes and " + + nodeHeaviness + " as node heaviness, " + + intersectionCount + " pipelines has same set of datanodes."); + } +} diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestPipelinePlacementPolicy.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestPipelinePlacementPolicy.java new file mode 100644 index 000000000000..daad80834c5a --- /dev/null +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestPipelinePlacementPolicy.java @@ -0,0 +1,261 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdds.scm.pipeline; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos; +import org.apache.hadoop.hdds.scm.ScmConfigKeys; +import org.apache.hadoop.hdds.scm.container.MockNodeManager; +import org.apache.hadoop.hdds.scm.exceptions.SCMException; +import org.apache.hadoop.hdds.scm.net.*; +import org.apache.hadoop.hdds.scm.node.states.Node2PipelineMap; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import java.util.*; +import java.util.stream.Collectors; + +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_DATANODE_PIPELINE_LIMIT; + +/** + * Test for PipelinePlacementPolicy. + */ +public class TestPipelinePlacementPolicy { + private MockNodeManager nodeManager; + private OzoneConfiguration conf; + private PipelinePlacementPolicy placementPolicy; + private static final int PIPELINE_PLACEMENT_MAX_NODES_COUNT = 10; + + @Before + public void init() throws Exception { + nodeManager = new MockNodeManager(true, + PIPELINE_PLACEMENT_MAX_NODES_COUNT); + conf = new OzoneConfiguration(); + conf.setInt(OZONE_DATANODE_PIPELINE_LIMIT, 5); + placementPolicy = new PipelinePlacementPolicy( + nodeManager, new PipelineStateManager(), conf); + } + + @Test + public void testChooseNodeBasedOnNetworkTopology() { + List healthyNodes = + nodeManager.getNodes(HddsProtos.NodeState.HEALTHY); + DatanodeDetails anchor = placementPolicy.chooseNode(healthyNodes); + // anchor should be removed from healthyNodes after being chosen. + Assert.assertFalse(healthyNodes.contains(anchor)); + + List excludedNodes = + new ArrayList<>(PIPELINE_PLACEMENT_MAX_NODES_COUNT); + excludedNodes.add(anchor); + DatanodeDetails nextNode = placementPolicy.chooseNodeFromNetworkTopology( + nodeManager.getClusterNetworkTopologyMap(), anchor, excludedNodes); + Assert.assertFalse(excludedNodes.contains(nextNode)); + // nextNode should not be the same as anchor. + Assert.assertTrue(anchor.getUuid() != nextNode.getUuid()); + } + + @Test + public void testChooseNodeBasedOnRackAwareness() { + List healthyNodes = overWriteLocationInNodes( + nodeManager.getNodes(HddsProtos.NodeState.HEALTHY)); + DatanodeDetails anchor = placementPolicy.chooseNode(healthyNodes); + NetworkTopology topologyWithDifRacks = + createNetworkTopologyOnDifRacks(); + DatanodeDetails nextNode = placementPolicy.chooseNodeBasedOnRackAwareness( + healthyNodes, new ArrayList<>(PIPELINE_PLACEMENT_MAX_NODES_COUNT), + topologyWithDifRacks, anchor); + Assert.assertNotNull(nextNode); + Assert.assertFalse(anchor.getNetworkLocation().equals( + nextNode.getNetworkLocation())); + } + + @Test + public void testFallBackPickNodes() { + List healthyNodes = overWriteLocationInNodes( + nodeManager.getNodes(HddsProtos.NodeState.HEALTHY)); + DatanodeDetails node; + try { + node = placementPolicy.fallBackPickNodes(healthyNodes, null); + Assert.assertNotNull(node); + } catch (SCMException e) { + Assert.fail("Should not reach here."); + } + + // when input nodeSet are all excluded. + List exclude = healthyNodes; + try { + node = placementPolicy.fallBackPickNodes(healthyNodes, exclude); + Assert.assertNull(node); + } catch (SCMException e) { + Assert.assertEquals(SCMException.ResultCodes.FAILED_TO_FIND_SUITABLE_NODE, + e.getResult()); + } catch (Exception ex) { + Assert.fail("Should not reach here."); + } + } + + @Test + public void testRackAwarenessNotEnabledWithFallBack() throws SCMException{ + List healthyNodes = + nodeManager.getNodes(HddsProtos.NodeState.HEALTHY); + DatanodeDetails anchor = placementPolicy.chooseNode(healthyNodes); + DatanodeDetails randomNode = placementPolicy.chooseNode(healthyNodes); + // rack awareness is not enabled. + Assert.assertTrue(anchor.getNetworkLocation().equals( + randomNode.getNetworkLocation())); + + NetworkTopology topology = new NetworkTopologyImpl(new Configuration()); + DatanodeDetails nextNode = placementPolicy.chooseNodeBasedOnRackAwareness( + healthyNodes, new ArrayList<>(PIPELINE_PLACEMENT_MAX_NODES_COUNT), + topology, anchor); + // RackAwareness should not be able to choose any node. + Assert.assertNull(nextNode); + + // PlacementPolicy should still be able to pick a set of 3 nodes. + int numOfNodes = HddsProtos.ReplicationFactor.THREE.getNumber(); + List results = placementPolicy + .getResultSet(numOfNodes, healthyNodes); + + Assert.assertEquals(numOfNodes, results.size()); + // All nodes are on same rack. + Assert.assertEquals(results.get(0).getNetworkLocation(), + results.get(1).getNetworkLocation()); + Assert.assertEquals(results.get(0).getNetworkLocation(), + results.get(2).getNetworkLocation()); + } + + private final static Node[] NODES = new NodeImpl[] { + new NodeImpl("h1", "/r1", NetConstants.NODE_COST_DEFAULT), + new NodeImpl("h2", "/r1", NetConstants.NODE_COST_DEFAULT), + new NodeImpl("h3", "/r1", NetConstants.NODE_COST_DEFAULT), + new NodeImpl("h4", "/r1", NetConstants.NODE_COST_DEFAULT), + new NodeImpl("h5", "/r2", NetConstants.NODE_COST_DEFAULT), + new NodeImpl("h6", "/r2", NetConstants.NODE_COST_DEFAULT), + new NodeImpl("h7", "/r2", NetConstants.NODE_COST_DEFAULT), + new NodeImpl("h8", "/r2", NetConstants.NODE_COST_DEFAULT), + }; + + + private NetworkTopology createNetworkTopologyOnDifRacks() { + NetworkTopology topology = new NetworkTopologyImpl(new Configuration()); + for (Node n : NODES) { + topology.add(n); + } + return topology; + } + + private List overWriteLocationInNodes( + List datanodes) { + List results = new ArrayList<>(datanodes.size()); + for (int i = 0; i < datanodes.size(); i++) { + DatanodeDetails datanode = datanodes.get(i); + DatanodeDetails result = DatanodeDetails.newBuilder() + .setUuid(datanode.getUuidString()) + .setHostName(datanode.getHostName()) + .setIpAddress(datanode.getIpAddress()) + .addPort(datanode.getPort(DatanodeDetails.Port.Name.STANDALONE)) + .addPort(datanode.getPort(DatanodeDetails.Port.Name.RATIS)) + .addPort(datanode.getPort(DatanodeDetails.Port.Name.REST)) + .setNetworkLocation(NODES[i].getNetworkLocation()).build(); + results.add(result); + } + return results; + } + + @Test + public void testHeavyNodeShouldBeExcluded() throws SCMException{ + List healthyNodes = + nodeManager.getNodes(HddsProtos.NodeState.HEALTHY); + int nodesRequired = HddsProtos.ReplicationFactor.THREE.getNumber(); + // only minority of healthy NODES are heavily engaged in pipelines. + int minorityHeavy = healthyNodes.size()/2 - 1; + List pickedNodes1 = placementPolicy.chooseDatanodes( + new ArrayList<>(PIPELINE_PLACEMENT_MAX_NODES_COUNT), + new ArrayList<>(PIPELINE_PLACEMENT_MAX_NODES_COUNT), + nodesRequired, 0); + // modify node to pipeline mapping. + insertHeavyNodesIntoNodeManager(healthyNodes, minorityHeavy); + // NODES should be sufficient. + Assert.assertEquals(nodesRequired, pickedNodes1.size()); + // make sure pipeline placement policy won't select duplicated NODES. + Assert.assertTrue(checkDuplicateNodesUUID(pickedNodes1)); + + // majority of healthy NODES are heavily engaged in pipelines. + int majorityHeavy = healthyNodes.size()/2 + 2; + insertHeavyNodesIntoNodeManager(healthyNodes, majorityHeavy); + boolean thrown = false; + List pickedNodes2 = null; + try { + pickedNodes2 = placementPolicy.chooseDatanodes( + new ArrayList<>(PIPELINE_PLACEMENT_MAX_NODES_COUNT), + new ArrayList<>(PIPELINE_PLACEMENT_MAX_NODES_COUNT), + nodesRequired, 0); + } catch (SCMException e) { + Assert.assertFalse(thrown); + thrown = true; + } + // NODES should NOT be sufficient and exception should be thrown. + Assert.assertNull(pickedNodes2); + Assert.assertTrue(thrown); + } + + private boolean checkDuplicateNodesUUID(List nodes) { + HashSet uuids = nodes.stream(). + map(DatanodeDetails::getUuid). + collect(Collectors.toCollection(HashSet::new)); + return uuids.size() == nodes.size(); + } + + private Set mockPipelineIDs(int count) { + Set pipelineIDs = new HashSet<>(count); + for (int i = 0; i < count; i++) { + pipelineIDs.add(PipelineID.randomId()); + } + return pipelineIDs; + } + + private void insertHeavyNodesIntoNodeManager( + List nodes, int heavyNodeCount) throws SCMException{ + if (nodes == null) { + throw new SCMException("", + SCMException.ResultCodes.FAILED_TO_FIND_SUITABLE_NODE); + } + + int considerHeavyCount = + conf.getInt( + ScmConfigKeys.OZONE_DATANODE_PIPELINE_LIMIT, + ScmConfigKeys.OZONE_DATANODE_PIPELINE_LIMIT_DEFAULT) + 1; + + Node2PipelineMap mockMap = new Node2PipelineMap(); + for (DatanodeDetails node : nodes) { + // mock heavy node + if (heavyNodeCount > 0) { + mockMap.insertNewDatanode( + node.getUuid(), mockPipelineIDs(considerHeavyCount)); + heavyNodeCount--; + } else { + mockMap.insertNewDatanode(node.getUuid(), mockPipelineIDs(1)); + } + } + nodeManager.setNode2PipelineMap(mockMap); + } +} diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestRatisPipelineProvider.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestRatisPipelineProvider.java index f5e3f842448c..86d54b399186 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestRatisPipelineProvider.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestRatisPipelineProvider.java @@ -22,8 +22,10 @@ import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.MockDatanodeDetails; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; +import org.apache.hadoop.hdds.scm.ScmConfigKeys; import org.apache.hadoop.hdds.scm.container.MockNodeManager; import org.apache.hadoop.hdds.scm.node.NodeManager; +import org.junit.Assert; import org.junit.Assume; import org.junit.Before; import org.junit.Test; @@ -33,9 +35,11 @@ import java.util.HashSet; import java.util.List; import java.util.Set; +import java.util.stream.Collectors; import static org.apache.commons.collections.CollectionUtils.intersection; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotEquals; import static org.junit.Assert.assertTrue; /** @@ -49,13 +53,18 @@ public class TestRatisPipelineProvider { private NodeManager nodeManager; private PipelineProvider provider; private PipelineStateManager stateManager; + private OzoneConfiguration conf; + private int maxPipelinePerNode = 2; @Before public void init() throws Exception { nodeManager = new MockNodeManager(true, 10); + conf = new OzoneConfiguration(); + conf.setInt(ScmConfigKeys.OZONE_DATANODE_PIPELINE_LIMIT, + maxPipelinePerNode); stateManager = new PipelineStateManager(); provider = new MockRatisPipelineProvider(nodeManager, - stateManager, new OzoneConfiguration()); + stateManager, conf); } private void createPipelineAndAssertions( @@ -64,6 +73,7 @@ private void createPipelineAndAssertions( assertPipelineProperties(pipeline, factor, REPLICATION_TYPE, Pipeline.PipelineState.ALLOCATED); stateManager.addPipeline(pipeline); + nodeManager.addPipeline(pipeline); Pipeline pipeline1 = provider.create(factor); assertPipelineProperties(pipeline1, factor, REPLICATION_TYPE, @@ -71,8 +81,12 @@ private void createPipelineAndAssertions( // New pipeline should not overlap with the previous created pipeline assertTrue( intersection(pipeline.getNodes(), pipeline1.getNodes()) - .isEmpty()); + .size() < factor.getNumber()); + if (pipeline.getFactor() == HddsProtos.ReplicationFactor.THREE) { + assertNotEquals(pipeline.getNodeSet(), pipeline1.getNodeSet()); + } stateManager.addPipeline(pipeline1); + nodeManager.addPipeline(pipeline1); } @Test @@ -88,10 +102,9 @@ public void testCreatePipelineWithFactor() throws IOException { assertPipelineProperties(pipeline1, factor, REPLICATION_TYPE, Pipeline.PipelineState.ALLOCATED); stateManager.addPipeline(pipeline1); - // New pipeline should overlap with the previous created pipeline, - // and one datanode should overlap between the two types. - assertEquals(1, - intersection(pipeline.getNodes(), pipeline1.getNodes()).size()); + // With enough pipeline quote on datanodes, they should not share + // the same set of datanodes. + assertNotEquals(pipeline.getNodeSet(), pipeline1.getNodeSet()); } @Test @@ -126,6 +139,20 @@ public void testCreatePipelineWithNodes() { Pipeline.PipelineState.OPEN); } + @Test + public void testCreateFactorTHREEPipelineWithSameDatanodes() { + List healthyNodes = nodeManager + .getNodes(HddsProtos.NodeState.HEALTHY).stream() + .limit(3).collect(Collectors.toList()); + + Pipeline pipeline1 = provider.create( + HddsProtos.ReplicationFactor.THREE, healthyNodes); + Pipeline pipeline2 = provider.create( + HddsProtos.ReplicationFactor.THREE, healthyNodes); + + Assert.assertEquals(pipeline1.getNodeSet(), pipeline2.getNodeSet()); + } + @Test public void testCreatePipelinesDnExclude() throws IOException { List healthyNodes = @@ -137,7 +164,10 @@ public void testCreatePipelinesDnExclude() throws IOException { // Use up first 3 DNs for an open pipeline. List dns = healthyNodes.subList(0, 3); - addPipeline(dns, factor, Pipeline.PipelineState.OPEN, REPLICATION_TYPE); + for (int i = 0; i < maxPipelinePerNode; i++) { + // Saturate pipeline counts on all the 1st 3 DNs. + addPipeline(dns, factor, Pipeline.PipelineState.OPEN, REPLICATION_TYPE); + } Set membersOfOpenPipelines = new HashSet<>(dns); // Use up next 3 DNs for a closed pipeline. @@ -149,11 +179,14 @@ public void testCreatePipelinesDnExclude() throws IOException { Pipeline pipeline = provider.create(factor); assertPipelineProperties(pipeline, factor, REPLICATION_TYPE, Pipeline.PipelineState.ALLOCATED); + nodeManager.addPipeline(pipeline); + stateManager.addPipeline(pipeline); + List nodes = pipeline.getNodes(); assertTrue( - "nodes of new pipeline cannot be from open pipelines", + "nodes of new pipeline cannot be all from open pipelines", nodes.stream().noneMatch(membersOfOpenPipelines::contains)); assertTrue( @@ -184,5 +217,6 @@ private void addPipeline( .build(); stateManager.addPipeline(openPipeline); + nodeManager.addPipeline(openPipeline); } } \ No newline at end of file diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestSCMPipelineManager.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestSCMPipelineManager.java index 81723e1afcbe..ab2315326bc3 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestSCMPipelineManager.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestSCMPipelineManager.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hdds.scm.pipeline; +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_DATANODE_PIPELINE_LIMIT; +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_PIPELINE_ALLOCATED_TIMEOUT; import static org.apache.hadoop.test.MetricsAsserts.getLongCounter; import static org.apache.hadoop.test.MetricsAsserts.getMetrics; @@ -27,6 +29,8 @@ import java.util.HashSet; import java.util.List; import java.util.Set; +import java.util.concurrent.TimeUnit; +import java.util.stream.Collectors; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileUtil; @@ -34,12 +38,13 @@ import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; +import org.apache.hadoop.hdds.scm.TestUtils; +import org.apache.hadoop.hdds.scm.exceptions.SCMException; +import org.apache.hadoop.hdds.scm.safemode.SCMSafeModeManager; import org.apache.hadoop.hdds.scm.container.ContainerID; import org.apache.hadoop.hdds.scm.container.MockNodeManager; -import org.apache.hadoop.hdds.scm.safemode.SCMSafeModeManager; import org.apache.hadoop.hdds.scm.server.SCMDatanodeHeartbeatDispatcher .PipelineReportFromDatanode; -import org.apache.hadoop.hdds.scm.TestUtils; import org.apache.hadoop.hdds.server.events.EventQueue; import org.apache.hadoop.metrics2.MetricsRecordBuilder; import org.apache.hadoop.test.GenericTestUtils; @@ -59,6 +64,7 @@ public class TestSCMPipelineManager { @Before public void setUp() throws Exception { conf = new OzoneConfiguration(); + conf.setInt(OZONE_DATANODE_PIPELINE_LIMIT, 1); testDir = GenericTestUtils .getTestDir(TestSCMPipelineManager.class.getSimpleName()); conf.set(HddsConfigKeys.OZONE_METADATA_DIRS, testDir.getAbsolutePath()); @@ -84,8 +90,10 @@ public void testPipelineReload() throws IOException { pipelineManager.getStateManager(), conf); pipelineManager.setPipelineProvider(HddsProtos.ReplicationType.RATIS, mockRatisProvider); + int pipelineNum = 5; + Set pipelines = new HashSet<>(); - for (int i = 0; i < 5; i++) { + for (int i = 0; i < pipelineNum; i++) { Pipeline pipeline = pipelineManager .createPipeline(HddsProtos.ReplicationType.RATIS, HddsProtos.ReplicationFactor.THREE); @@ -108,6 +116,13 @@ public void testPipelineReload() throws IOException { pipelineManager.getPipelines(HddsProtos.ReplicationType.RATIS); Assert.assertEquals(pipelines, new HashSet<>(pipelineList)); + Set> originalPipelines = pipelineList.stream() + .map(Pipeline::getNodeSet).collect(Collectors.toSet()); + Set> reloadedPipelineHash = pipelines.stream() + .map(Pipeline::getNodeSet).collect(Collectors.toSet()); + Assert.assertEquals(reloadedPipelineHash, originalPipelines); + Assert.assertEquals(pipelineNum, originalPipelines.size()); + // clean up for (Pipeline pipeline : pipelines) { pipelineManager.finalizeAndDestroyPipeline(pipeline, false); @@ -253,10 +268,10 @@ public void testPipelineCreationFailedMetric() throws Exception { pipelineManager.createPipeline(HddsProtos.ReplicationType.RATIS, HddsProtos.ReplicationFactor.THREE); Assert.fail(); - } catch (InsufficientDatanodesException idEx) { - Assert.assertEquals( - "Cannot create pipeline of factor 3 using 1 nodes.", - idEx.getMessage()); + } catch (SCMException ioe) { + // pipeline creation failed this time. + Assert.assertEquals(SCMException.ResultCodes.FAILED_TO_FIND_SUITABLE_NODE, + ioe.getResult()); } metrics = getMetrics( @@ -266,8 +281,8 @@ public void testPipelineCreationFailedMetric() throws Exception { numPipelineCreateFailed = getLongCounter( "NumPipelineCreationFailed", metrics); - Assert.assertTrue(numPipelineCreateFailed == 0); - + Assert.assertTrue(numPipelineCreateFailed == 1); + // clean up pipelineManager.close(); } @@ -371,6 +386,47 @@ public void testPipelineOpenOnlyWhenLeaderReported() throws Exception { pipelineManager.close(); } + @Test + public void testScrubPipeline() throws IOException { + // No timeout for pipeline scrubber. + conf.setTimeDuration( + OZONE_SCM_PIPELINE_ALLOCATED_TIMEOUT, -1, + TimeUnit.MILLISECONDS); + + EventQueue eventQueue = new EventQueue(); + final SCMPipelineManager pipelineManager = + new SCMPipelineManager(conf, nodeManager, eventQueue); + final PipelineProvider ratisProvider = new MockRatisPipelineProvider( + nodeManager, pipelineManager.getStateManager(), conf, eventQueue, + false); + + pipelineManager.setPipelineProvider(HddsProtos.ReplicationType.RATIS, + ratisProvider); + + Pipeline pipeline = pipelineManager + .createPipeline(HddsProtos.ReplicationType.RATIS, + HddsProtos.ReplicationFactor.THREE); + // At this point, pipeline is not at OPEN stage. + Assert.assertEquals(pipeline.getPipelineState(), + Pipeline.PipelineState.ALLOCATED); + + // pipeline should be seen in pipelineManager as ALLOCATED. + Assert.assertTrue(pipelineManager + .getPipelines(HddsProtos.ReplicationType.RATIS, + HddsProtos.ReplicationFactor.THREE, + Pipeline.PipelineState.ALLOCATED).contains(pipeline)); + pipelineManager.scrubPipeline(HddsProtos.ReplicationType.RATIS, + HddsProtos.ReplicationFactor.THREE); + + // pipeline should be scrubbed. + Assert.assertFalse(pipelineManager + .getPipelines(HddsProtos.ReplicationType.RATIS, + HddsProtos.ReplicationFactor.THREE, + Pipeline.PipelineState.ALLOCATED).contains(pipeline)); + + pipelineManager.close(); + } + private void sendPipelineReport(DatanodeDetails dn, Pipeline pipeline, PipelineReportHandler pipelineReportHandler, boolean isLeader, EventQueue eventQueue) { diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestSafeModeHandler.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestSafeModeHandler.java index 5572e9aa1ef4..4ad3456e7ba8 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestSafeModeHandler.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestSafeModeHandler.java @@ -25,8 +25,7 @@ import org.apache.hadoop.hdds.scm.container.ContainerManager; import org.apache.hadoop.hdds.scm.container.ReplicationManager; import org.apache.hadoop.hdds.scm.container.ReplicationManager.ReplicationManagerConfiguration; -import org.apache.hadoop.hdds.scm.container.placement.algorithms - .ContainerPlacementPolicy; +import org.apache.hadoop.hdds.scm.PlacementPolicy; import org.apache.hadoop.hdds.scm.events.SCMEvents; import org.apache.hadoop.hdds.scm.pipeline.PipelineManager; import org.apache.hadoop.hdds.scm.pipeline.SCMPipelineManager; @@ -70,7 +69,7 @@ public void setup(boolean enabled) { .thenReturn(new HashSet<>()); replicationManager = new ReplicationManager( new ReplicationManagerConfiguration(), - containerManager, Mockito.mock(ContainerPlacementPolicy.class), + containerManager, Mockito.mock(PlacementPolicy.class), eventQueue, new LockManager(configuration)); scmPipelineManager = Mockito.mock(SCMPipelineManager.class); blockManager = Mockito.mock(BlockManagerImpl.class); diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/ozone/container/testutils/ReplicationNodeManagerMock.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/ozone/container/testutils/ReplicationNodeManagerMock.java index 0ecff3f541a7..069844360a81 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/ozone/container/testutils/ReplicationNodeManagerMock.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/ozone/container/testutils/ReplicationNodeManagerMock.java @@ -20,6 +20,7 @@ import org.apache.hadoop.hdds.protocol.proto .StorageContainerDatanodeProtocolProtos.PipelineReportsProto; import org.apache.hadoop.hdds.scm.container.ContainerID; +import org.apache.hadoop.hdds.scm.net.NetworkTopology; import org.apache.hadoop.hdds.scm.pipeline.Pipeline; import org.apache.hadoop.hdds.scm.pipeline.PipelineID; import org.apache.hadoop.hdds.scm.container.placement.metrics.SCMNodeMetric; @@ -165,6 +166,16 @@ public Set getPipelines(DatanodeDetails dnId) { throw new UnsupportedOperationException("Not yet implemented"); } + /** + * Get the count of pipelines a datanodes is associated with. + * @param dn DatanodeDetails + * @return The number of pipelines + */ + @Override + public int getPipelinesCount(DatanodeDetails dn) { + throw new UnsupportedOperationException("Not yet implemented"); + } + /** * Add pipeline information in the NodeManager. * @param pipeline - Pipeline to be added @@ -327,4 +338,9 @@ public DatanodeDetails getNodeByUuid(String address) { public List getNodesByAddress(String address) { return new LinkedList<>(); } + + @Override + public NetworkTopology getClusterNetworkTopologyMap() { + return null; + } } diff --git a/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/SCMCLI.java b/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/SCMCLI.java index 8c0fb0370834..20a35a8c4e21 100644 --- a/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/SCMCLI.java +++ b/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/SCMCLI.java @@ -29,6 +29,7 @@ import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.scm.ScmConfigKeys; import org.apache.hadoop.hdds.scm.cli.container.ContainerCommands; +import org.apache.hadoop.hdds.scm.cli.datanode.DatanodeCommands; import org.apache.hadoop.hdds.scm.cli.pipeline.PipelineCommands; import org.apache.hadoop.hdds.scm.client.ContainerOperationClient; import org.apache.hadoop.hdds.scm.client.ScmClient; @@ -59,6 +60,7 @@ SafeModeCommands.class, ContainerCommands.class, PipelineCommands.class, + DatanodeCommands.class, TopologySubcommand.class, ReplicationManagerCommands.class }, diff --git a/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/DatanodeCommands.java b/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/DatanodeCommands.java new file mode 100644 index 000000000000..94763d356cb2 --- /dev/null +++ b/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/DatanodeCommands.java @@ -0,0 +1,52 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdds.scm.cli.datanode; + +import org.apache.hadoop.hdds.cli.HddsVersionProvider; +import org.apache.hadoop.hdds.cli.MissingSubcommandException; +import org.apache.hadoop.hdds.scm.cli.SCMCLI; +import picocli.CommandLine; + +import java.util.concurrent.Callable; + +/** + * Subcommand for datanode related operations. + */ +@CommandLine.Command( + name = "datanode", + description = "Datanode specific operations", + mixinStandardHelpOptions = true, + versionProvider = HddsVersionProvider.class, + subcommands = { + ListInfoSubcommand.class + }) +public class DatanodeCommands implements Callable { + + @CommandLine.ParentCommand + private SCMCLI parent; + + public SCMCLI getParent() { + return parent; + } + + @Override + public Void call() throws Exception { + throw new MissingSubcommandException( + this.parent.getCmd().getSubcommands().get("datanode")); + } +} diff --git a/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/ListInfoSubcommand.java b/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/ListInfoSubcommand.java new file mode 100644 index 000000000000..e4060b3dadaf --- /dev/null +++ b/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/ListInfoSubcommand.java @@ -0,0 +1,121 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdds.scm.cli.datanode; + +import com.google.common.base.Strings; +import org.apache.hadoop.hdds.cli.HddsVersionProvider; +import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos; +import org.apache.hadoop.hdds.scm.client.ScmClient; +import org.apache.hadoop.hdds.scm.pipeline.Pipeline; +import picocli.CommandLine; + +import java.io.IOException; +import java.util.List; +import java.util.concurrent.Callable; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +/** + * Handler of list datanodes info command. + */ +@CommandLine.Command( + name = "list", + description = "List info of datanodes", + mixinStandardHelpOptions = true, + versionProvider = HddsVersionProvider.class) +public class ListInfoSubcommand implements Callable { + + @CommandLine.ParentCommand + private DatanodeCommands parent; + + @CommandLine.Option(names = {"--ip"}, + description = "Show info by ip address.", + defaultValue = "", + required = false) + private String ipaddress; + + @CommandLine.Option(names = {"--id"}, + description = "Show info by datanode UUID.", + defaultValue = "", + required = false) + private String uuid; + + private List pipelines; + + + @Override + public Void call() throws Exception { + try (ScmClient scmClient = parent.getParent().createScmClient()) { + pipelines = scmClient.listPipelines(); + if (Strings.isNullOrEmpty(ipaddress) && Strings.isNullOrEmpty(uuid)) { + getAllNodes(scmClient).stream().forEach(p -> printDatanodeInfo(p)); + } else { + Stream allNodes = getAllNodes(scmClient).stream(); + if (!Strings.isNullOrEmpty(ipaddress)) { + allNodes = allNodes.filter(p -> p.getIpAddress() + .compareToIgnoreCase(ipaddress) == 0); + } + if (!Strings.isNullOrEmpty(uuid)) { + allNodes = allNodes.filter(p -> p.getUuid().toString().equals(uuid)); + } + allNodes.forEach(p -> printDatanodeInfo(p)); + } + return null; + } + } + + private List getAllNodes(ScmClient scmClient) + throws IOException { + List nodes = scmClient.queryNode( + HddsProtos.NodeState.HEALTHY, HddsProtos.QueryScope.CLUSTER, ""); + + return nodes.stream() + .map(p -> DatanodeDetails.getFromProtoBuf(p.getNodeID())) + .collect(Collectors.toList()); + } + + private void printDatanodeInfo(DatanodeDetails datanode) { + StringBuilder pipelineListInfo = new StringBuilder(); + int relatedPipelineNum = 0; + if (!pipelines.isEmpty()) { + List relatedPipelines = pipelines.stream().filter( + p -> p.getNodes().contains(datanode)).collect(Collectors.toList()); + if (relatedPipelines.isEmpty()) { + pipelineListInfo.append("No related pipelines" + + " or the node is not in Healthy state."); + } else { + relatedPipelineNum = relatedPipelines.size(); + relatedPipelines.stream().forEach( + p -> pipelineListInfo.append(p.getId().getId().toString()) + .append("/").append(p.getFactor().toString()).append("/") + .append(p.getType().toString()).append("/") + .append(p.getPipelineState().toString()).append("/") + .append(datanode.getUuid().equals(p.getLeaderId()) ? + "Leader" : "Follower") + .append(System.getProperty("line.separator"))); + } + } else { + pipelineListInfo.append("No pipelines in cluster."); + } + System.out.println("Datanode: " + datanode.getUuid().toString() + + " (" + datanode.getNetworkLocation() + "/" + datanode.getIpAddress() + + "/" + datanode.getHostName() + "/" + relatedPipelineNum + + " pipelines) \n" + "Related pipelines: \n" + pipelineListInfo); + } +} \ No newline at end of file diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/package-info.java b/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/package-info.java similarity index 86% rename from hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/package-info.java rename to hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/package-info.java index dac4752fe66f..f4c45cfa0e3e 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/package-info.java +++ b/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/package-info.java @@ -15,7 +15,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.hadoop.hdds.scm.container.placement.algorithms; + /** - Contains container placement policy interface definition. - **/ \ No newline at end of file + * Contains all of the datanode related scm commands. + */ +package org.apache.hadoop.hdds.scm.cli.datanode; \ No newline at end of file diff --git a/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/pipeline/CreatePipelineSubcommand.java b/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/pipeline/CreatePipelineSubcommand.java new file mode 100644 index 000000000000..e0bdddb7797e --- /dev/null +++ b/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/pipeline/CreatePipelineSubcommand.java @@ -0,0 +1,77 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdds.scm.cli.pipeline; + +import org.apache.hadoop.hdds.cli.HddsVersionProvider; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos; +import org.apache.hadoop.hdds.scm.client.ScmClient; +import org.apache.hadoop.hdds.scm.pipeline.Pipeline; +import picocli.CommandLine; + +import java.util.concurrent.Callable; + +/** + * Handler of createPipeline command. + */ +@CommandLine.Command( + name = "create", + description = "create pipeline", + mixinStandardHelpOptions = true, + versionProvider = HddsVersionProvider.class) +public class CreatePipelineSubcommand implements Callable { + @CommandLine.ParentCommand + private PipelineCommands parent; + + @CommandLine.Option( + names = {"-t", "--replicationType"}, + description = "Replication type (STAND_ALONE, RATIS)", + defaultValue = "STAND_ALONE" + ) + private HddsProtos.ReplicationType type + = HddsProtos.ReplicationType.STAND_ALONE; + + @CommandLine.Option( + names = {"-f", "--replicationFactor"}, + description = "Replication factor (ONE, THREE)", + defaultValue = "ONE" + ) + private HddsProtos.ReplicationFactor factor + = HddsProtos.ReplicationFactor.ONE; + + @Override + public Void call() throws Exception { + if (type == HddsProtos.ReplicationType.CHAINED) { + throw new IllegalArgumentException(type.name() + + " is not supported yet."); + } + try (ScmClient scmClient = parent.getParent().createScmClient()) { + Pipeline pipeline = scmClient.createReplicationPipeline( + type, + factor, + HddsProtos.NodePool.getDefaultInstance()); + + if (pipeline != null) { + System.out.println(pipeline.getId().toString() + + " is created. Factor: " + pipeline.getFactor() + + ", Type: " + pipeline.getType()); + } + return null; + } + } +} \ No newline at end of file diff --git a/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/pipeline/ListPipelinesSubcommand.java b/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/pipeline/ListPipelinesSubcommand.java index 8b3b1b3b8cbd..f8ac1d498759 100644 --- a/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/pipeline/ListPipelinesSubcommand.java +++ b/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/pipeline/ListPipelinesSubcommand.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hdds.scm.cli.pipeline; +import com.google.common.base.Strings; import org.apache.hadoop.hdds.cli.HddsVersionProvider; import org.apache.hadoop.hdds.scm.client.ScmClient; import picocli.CommandLine; @@ -53,13 +54,13 @@ public class ListPipelinesSubcommand implements Callable { @Override public Void call() throws Exception { try (ScmClient scmClient = parent.getParent().createScmClient()) { - if (isNullOrEmpty(factor) && isNullOrEmpty(state)) { + if (Strings.isNullOrEmpty(factor) && Strings.isNullOrEmpty(state)) { scmClient.listPipelines().forEach(System.out::println); } else { scmClient.listPipelines().stream() - .filter(p -> ((isNullOrEmpty(factor) || + .filter(p -> ((Strings.isNullOrEmpty(factor) || (p.getFactor().toString().compareToIgnoreCase(factor) == 0)) - && (isNullOrEmpty(state) || + && (Strings.isNullOrEmpty(state) || (p.getPipelineState().toString().compareToIgnoreCase(state) == 0)))) .forEach(System.out::println); @@ -67,8 +68,4 @@ public Void call() throws Exception { return null; } } - - protected static boolean isNullOrEmpty(String str) { - return ((str == null) || str.trim().isEmpty()); - } } diff --git a/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/pipeline/PipelineCommands.java b/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/pipeline/PipelineCommands.java index 948a51a8eb58..0bdbc19fe44e 100644 --- a/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/pipeline/PipelineCommands.java +++ b/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/pipeline/PipelineCommands.java @@ -37,6 +37,7 @@ ListPipelinesSubcommand.class, ActivatePipelineSubcommand.class, DeactivatePipelineSubcommand.class, + CreatePipelineSubcommand.class, ClosePipelineSubcommand.class }) public class PipelineCommands implements Callable { diff --git a/hadoop-ozone/dist/src/main/compose/ozone-topology/docker-compose.yaml b/hadoop-ozone/dist/src/main/compose/ozone-topology/docker-compose.yaml index 69611fa674ce..ccd131c7be15 100644 --- a/hadoop-ozone/dist/src/main/compose/ozone-topology/docker-compose.yaml +++ b/hadoop-ozone/dist/src/main/compose/ozone-topology/docker-compose.yaml @@ -72,6 +72,34 @@ services: networks: net: ipv4_address: 10.5.0.7 + datanode_5: + image: apache/ozone-runner:${OZONE_RUNNER_VERSION} + privileged: true #required by the profiler + volumes: + - ../..:/opt/hadoop + ports: + - 9864 + - 9882 + command: ["/opt/hadoop/bin/ozone","datanode"] + env_file: + - ./docker-config + networks: + net: + ipv4_address: 10.5.0.8 + datanode_6: + image: apache/ozone-runner:${OZONE_RUNNER_VERSION} + privileged: true #required by the profiler + volumes: + - ../..:/opt/hadoop + ports: + - 9864 + - 9882 + command: ["/opt/hadoop/bin/ozone","datanode"] + env_file: + - ./docker-config + networks: + net: + ipv4_address: 10.5.0.9 om: image: apache/ozone-runner:${OZONE_RUNNER_VERSION} privileged: true #required by the profiler diff --git a/hadoop-ozone/dist/src/main/smoketest/scmcli/datanode.robot b/hadoop-ozone/dist/src/main/smoketest/scmcli/datanode.robot new file mode 100644 index 000000000000..ed1173d79994 --- /dev/null +++ b/hadoop-ozone/dist/src/main/smoketest/scmcli/datanode.robot @@ -0,0 +1,29 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +*** Settings *** +Documentation Smoketest ozone cluster startup +Library OperatingSystem +Library BuiltIn +Resource ../commonlib.robot + +*** Variables *** + + +*** Test Cases *** +Run list datanodes + ${output} = Execute ozone scmcli datanode list + Should contain ${output} Datanode: + Should contain ${output} Related pipelines: \ No newline at end of file diff --git a/hadoop-ozone/dist/src/main/smoketest/scmcli/pipeline.robot b/hadoop-ozone/dist/src/main/smoketest/scmcli/pipeline.robot index 6a6f0b0eb782..f411e0c3af67 100644 --- a/hadoop-ozone/dist/src/main/smoketest/scmcli/pipeline.robot +++ b/hadoop-ozone/dist/src/main/smoketest/scmcli/pipeline.robot @@ -25,4 +25,9 @@ Resource ../commonlib.robot *** Test Cases *** Run list pipeline ${output} = Execute ozone scmcli pipeline list - Should contain ${output} Type:RATIS, Factor:ONE, State:OPEN \ No newline at end of file + Should contain ${output} Type: + Should contain ${output} Factor:ONE, State: + +Run create pipeline + ${output} = Execute ozone scmcli pipeline create + Should contain ${output} is created. Factor: ONE, Type: STAND_ALONE \ No newline at end of file diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestOzoneFsHAURLs.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestOzoneFsHAURLs.java index 23d7833333c2..7a6143cd48d9 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestOzoneFsHAURLs.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestOzoneFsHAURLs.java @@ -97,6 +97,7 @@ public void init() throws Exception { conf.setTimeDuration( OMConfigKeys.OZONE_OM_LEADER_ELECTION_MINIMUM_TIMEOUT_DURATION_KEY, LEADER_ELECTION_TIMEOUT, TimeUnit.MILLISECONDS); + conf.setInt(ScmConfigKeys.OZONE_DATANODE_PIPELINE_LIMIT, 3); OMStorage omStore = new OMStorage(conf); omStore.setClusterId(clusterId); @@ -106,6 +107,8 @@ public void init() throws Exception { // Start the cluster cluster = MiniOzoneCluster.newHABuilder(conf) + .setNumDatanodes(7) + .setTotalPipelineNumLimit(10) .setClusterId(clusterId) .setScmId(scmId) .setOMServiceId(omServiceId) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestPipelineClose.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestPipelineClose.java index 21fa7bdca8b2..aba9caed41d3 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestPipelineClose.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestPipelineClose.java @@ -170,6 +170,7 @@ public void testPipelineCloseWithPipelineAction() throws Exception { pipelineActionHandler .onMessage(pipelineActionsFromDatanode, new EventQueue()); Thread.sleep(5000); + OzoneContainer ozoneContainer = cluster.getHddsDatanodes().get(0).getDatanodeStateMachine() .getContainer(); diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestRatisPipelineCreateAndDestroy.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestRatisPipelineCreateAndDestroy.java index 3590e4380844..bd677db65f65 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestRatisPipelineCreateAndDestroy.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestRatisPipelineCreateAndDestroy.java @@ -20,6 +20,7 @@ import org.apache.hadoop.hdds.HddsConfigKeys; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; +import org.apache.hadoop.hdds.scm.exceptions.SCMException; import org.apache.hadoop.hdds.scm.server.StorageContainerManager; import org.apache.hadoop.ozone.HddsDatanodeService; import org.apache.hadoop.ozone.MiniOzoneCluster; @@ -36,6 +37,7 @@ import java.util.concurrent.TimeoutException; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_PIPELINE_AUTO_CREATE_FACTOR_ONE; +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_DATANODE_PIPELINE_LIMIT; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_STALENODE_INTERVAL; /** @@ -51,9 +53,12 @@ public class TestRatisPipelineCreateAndDestroy { public void init(int numDatanodes) throws Exception { conf.set(HddsConfigKeys.OZONE_METADATA_DIRS, GenericTestUtils.getRandomizedTempPath()); + conf.setInt(OZONE_DATANODE_PIPELINE_LIMIT, 2); + cluster = MiniOzoneCluster.newBuilder(conf) .setNumDatanodes(numDatanodes) - .setHbInterval(1000) + .setTotalPipelineNumLimit(numDatanodes + numDatanodes/3) + .setHbInterval(2000) .setHbProcessorInterval(1000) .build(); cluster.waitForClusterToBeReady(); @@ -134,7 +139,9 @@ public void testPipelineCreationOnNodeRestart() throws Exception { } catch (IOException ioe) { // As now all datanodes are shutdown, they move to stale state, there // will be no sufficient datanodes to create the pipeline. - Assert.assertTrue(ioe instanceof InsufficientDatanodesException); + Assert.assertTrue(ioe instanceof SCMException); + Assert.assertEquals(SCMException.ResultCodes.FAILED_TO_FIND_SUITABLE_NODE, + ((SCMException) ioe).getResult()); } // make sure pipelines is destroyed @@ -147,9 +154,14 @@ public void testPipelineCreationOnNodeRestart() throws Exception { for (Pipeline pipeline : pipelines) { pipelineManager.finalizeAndDestroyPipeline(pipeline, false); } - // make sure pipelines is created after node start - pipelineManager.triggerPipelineCreation(); - waitForPipelines(1); + + if (cluster.getStorageContainerManager() + .getScmNodeManager().getNodeCount(HddsProtos.NodeState.HEALTHY) >= + HddsProtos.ReplicationFactor.THREE.getNumber()) { + // make sure pipelines is created after node start + pipelineManager.triggerPipelineCreation(); + waitForPipelines(1); + } } private void waitForPipelines(int numPipelines) @@ -157,6 +169,6 @@ private void waitForPipelines(int numPipelines) GenericTestUtils.waitFor(() -> pipelineManager .getPipelines(HddsProtos.ReplicationType.RATIS, HddsProtos.ReplicationFactor.THREE, Pipeline.PipelineState.OPEN) - .size() == numPipelines, 100, 40000); + .size() >= numPipelines, 100, 40000); } } diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestSCMRestart.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestSCMRestart.java index 459a67ae882a..baeee6a31fb8 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestSCMRestart.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestSCMRestart.java @@ -57,8 +57,11 @@ public class TestSCMRestart { @BeforeClass public static void init() throws Exception { conf = new OzoneConfiguration(); + int numOfNodes = 4; cluster = MiniOzoneCluster.newBuilder(conf) - .setNumDatanodes(4) + .setNumDatanodes(numOfNodes) + // allow only one FACTOR THREE pipeline. + .setTotalPipelineNumLimit(numOfNodes + 1) .setHbInterval(1000) .setHbProcessorInterval(1000) .build(); diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestSCMSafeModeWithPipelineRules.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestSCMSafeModeWithPipelineRules.java index 866d0b018bf0..39b67ac2aebc 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestSCMSafeModeWithPipelineRules.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestSCMSafeModeWithPipelineRules.java @@ -39,6 +39,7 @@ import java.util.List; import java.util.concurrent.TimeoutException; +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_DATANODE_PIPELINE_LIMIT; import static org.junit.Assert.fail; /** @@ -64,6 +65,8 @@ public void setup(int numDatanodes) throws Exception { true); conf.set(HddsConfigKeys.HDDS_SCM_WAIT_TIME_AFTER_SAFE_MODE_EXIT, "10s"); conf.set(ScmConfigKeys.OZONE_SCM_PIPELINE_CREATION_INTERVAL, "10s"); + conf.setInt(OZONE_DATANODE_PIPELINE_LIMIT, 50); + clusterBuilder = MiniOzoneCluster.newBuilder(conf) .setNumDatanodes(numDatanodes) .setHbInterval(1000) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneCluster.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneCluster.java index de27d5a17d3e..0042363d862b 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneCluster.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneCluster.java @@ -251,6 +251,7 @@ abstract class Builder { protected static final int DEFAULT_HB_INTERVAL_MS = 1000; protected static final int DEFAULT_HB_PROCESSOR_INTERVAL_MS = 100; protected static final int ACTIVE_OMS_NOT_SET = -1; + protected static final int DEFAULT_PIPELIME_LIMIT = 3; protected final OzoneConfiguration conf; protected String path; @@ -278,6 +279,7 @@ abstract class Builder { protected int numOfDatanodes = 3; protected boolean startDataNodes = true; protected CertificateClient certClient; + protected int pipelineNumLimit = DEFAULT_PIPELIME_LIMIT; protected Builder(OzoneConfiguration conf) { this.conf = conf; @@ -364,6 +366,16 @@ public Builder setNumDatanodes(int val) { return this; } + /** + * Sets the total number of pipelines to create. + * @param val number of pipelines + * @return MiniOzoneCluster.Builder + */ + public Builder setTotalPipelineNumLimit(int val) { + pipelineNumLimit = val; + return this; + } + /** * Sets the number of HeartBeat Interval of Datanodes, the value should be * in MilliSeconds. diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java index c2e196a9be8a..7758c3cd738d 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java @@ -575,6 +575,10 @@ protected void initializeConfiguration() throws IOException { streamBufferMaxSize.get(), streamBufferSizeUnit.get()); conf.setStorageSize(OzoneConfigKeys.OZONE_SCM_BLOCK_SIZE, blockSize.get(), streamBufferSizeUnit.get()); + // MiniOzoneCluster should have global pipeline upper limit. + conf.setInt(ScmConfigKeys.OZONE_SCM_RATIS_PIPELINE_LIMIT, + pipelineNumLimit >= DEFAULT_PIPELIME_LIMIT ? + pipelineNumLimit : DEFAULT_PIPELIME_LIMIT); configureTrace(); } diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestContainerOperations.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestContainerOperations.java index eadb520b7915..17c090d9a5a8 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestContainerOperations.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestContainerOperations.java @@ -17,11 +17,10 @@ */ package org.apache.hadoop.ozone; -import org.apache.hadoop.conf.StorageUnit; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerWithPipeline; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; -import org.apache.hadoop.hdds.scm.container.placement.algorithms.ContainerPlacementPolicy; +import org.apache.hadoop.hdds.scm.PlacementPolicy; import org.apache.hadoop.hdds.scm.container.placement.algorithms.SCMContainerPlacementCapacity; import org.apache.hadoop.hdds.scm.ScmConfigKeys; import org.apache.hadoop.hdds.scm.client.ContainerOperationClient; @@ -30,7 +29,6 @@ import org.junit.BeforeClass; import org.junit.Test; -import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_CONTAINER_SIZE; import static org.junit.Assert.assertEquals; /** @@ -47,8 +45,7 @@ public class TestContainerOperations { public static void setup() throws Exception { ozoneConf = new OzoneConfiguration(); ozoneConf.setClass(ScmConfigKeys.OZONE_SCM_CONTAINER_PLACEMENT_IMPL_KEY, - SCMContainerPlacementCapacity.class, ContainerPlacementPolicy.class); - ozoneConf.setStorageSize(OZONE_SCM_CONTAINER_SIZE, 5, StorageUnit.GB); + SCMContainerPlacementCapacity.class, PlacementPolicy.class); cluster = MiniOzoneCluster.newBuilder(ozoneConf).setNumDatanodes(3).build(); storageClient = new ContainerOperationClient(ozoneConf); cluster.waitForClusterToBeReady(); diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestContainerStateMachineIdempotency.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestContainerStateMachineIdempotency.java index 548f9b600d86..b0b3fbf73bbb 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestContainerStateMachineIdempotency.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestContainerStateMachineIdempotency.java @@ -29,8 +29,7 @@ import org.apache.hadoop.hdds.scm.XceiverClientSpi; import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerWithPipeline; import org.apache.hadoop.hdds.scm.pipeline.Pipeline; -import org.apache.hadoop.hdds.scm.container.placement.algorithms. - ContainerPlacementPolicy; +import org.apache.hadoop.hdds.scm.PlacementPolicy; import org.apache.hadoop.hdds.scm.container.placement.algorithms. SCMContainerPlacementCapacity; import org.apache.hadoop.hdds.scm.protocolPB. @@ -59,7 +58,7 @@ public class TestContainerStateMachineIdempotency { public static void init() throws Exception { ozoneConfig = new OzoneConfiguration(); ozoneConfig.setClass(ScmConfigKeys.OZONE_SCM_CONTAINER_PLACEMENT_IMPL_KEY, - SCMContainerPlacementCapacity.class, ContainerPlacementPolicy.class); + SCMContainerPlacementCapacity.class, PlacementPolicy.class); cluster = MiniOzoneCluster.newBuilder(ozoneConfig).setNumDatanodes(3).build(); cluster.waitForClusterToBeReady(); diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/Test2WayCommitInRatis.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/Test2WayCommitInRatis.java index c8bf36bd06dd..64ded12aabc2 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/Test2WayCommitInRatis.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/Test2WayCommitInRatis.java @@ -83,6 +83,7 @@ private void startCluster(OzoneConfiguration conf) throws Exception { conf.setQuietMode(false); cluster = MiniOzoneCluster.newBuilder(conf) .setNumDatanodes(7) + .setTotalPipelineNumLimit(10) .setBlockSize(blockSize) .setChunkSize(chunkSize) .setStreamBufferFlushSize(flushSize) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestBlockOutputStream.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestBlockOutputStream.java index 96226d86e07e..fa7783c93c15 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestBlockOutputStream.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestBlockOutputStream.java @@ -84,6 +84,7 @@ public static void init() throws Exception { StorageUnit.MB); cluster = MiniOzoneCluster.newBuilder(conf) .setNumDatanodes(7) + .setTotalPipelineNumLimit(10) .setBlockSize(blockSize) .setChunkSize(chunkSize) .setStreamBufferFlushSize(flushSize) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestBlockOutputStreamWithFailures.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestBlockOutputStreamWithFailures.java index e236b8539abc..1b6b7dc335f9 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestBlockOutputStreamWithFailures.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestBlockOutputStreamWithFailures.java @@ -21,6 +21,7 @@ import org.apache.hadoop.hdds.client.ReplicationType; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; +import org.apache.hadoop.hdds.scm.ScmConfigKeys; import org.apache.hadoop.hdds.scm.XceiverClientManager; import org.apache.hadoop.hdds.scm.XceiverClientMetrics; import org.apache.hadoop.hdds.scm.XceiverClientRatis; @@ -91,9 +92,11 @@ public void init() throws Exception { conf.setQuietMode(false); conf.setStorageSize(OzoneConfigKeys.OZONE_SCM_BLOCK_SIZE, 4, StorageUnit.MB); + conf.setInt(ScmConfigKeys.OZONE_DATANODE_PIPELINE_LIMIT, 3); + cluster = MiniOzoneCluster.newBuilder(conf).setNumDatanodes(7) - .setBlockSize(blockSize).setChunkSize(chunkSize) - .setStreamBufferFlushSize(flushSize) + .setTotalPipelineNumLimit(10).setBlockSize(blockSize) + .setChunkSize(chunkSize).setStreamBufferFlushSize(flushSize) .setStreamBufferMaxSize(maxFlushSize) .setStreamBufferSizeUnit(StorageUnit.BYTES).build(); cluster.waitForClusterToBeReady(); diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestCommitWatcher.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestCommitWatcher.java index 4a5f5289ad80..16f50c67b596 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestCommitWatcher.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestCommitWatcher.java @@ -97,6 +97,7 @@ public static void init() throws Exception { StorageUnit.MB); cluster = MiniOzoneCluster.newBuilder(conf) .setNumDatanodes(7) + .setTotalPipelineNumLimit(10) .setBlockSize(blockSize) .setChunkSize(chunkSize) .setStreamBufferFlushSize(flushSize) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestContainerReplicationEndToEnd.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestContainerReplicationEndToEnd.java index 36f720b51322..6917ab27a7bb 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestContainerReplicationEndToEnd.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestContainerReplicationEndToEnd.java @@ -57,6 +57,7 @@ import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_CONTAINER_REPORT_INTERVAL; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_PIPELINE_DESTROY_TIMEOUT; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_STALENODE_INTERVAL; +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_DATANODE_PIPELINE_LIMIT; /** * Tests delete key operation with a slow follower in the datanode @@ -107,10 +108,12 @@ public static void init() throws Exception { 1000, TimeUnit.SECONDS); conf.setLong("hdds.scm.replication.thread.interval", containerReportInterval); + conf.setInt(OZONE_DATANODE_PIPELINE_LIMIT, 2); conf.setQuietMode(false); cluster = - MiniOzoneCluster.newBuilder(conf).setNumDatanodes(4).setHbInterval(200) + MiniOzoneCluster.newBuilder(conf).setNumDatanodes(4) + .setTotalPipelineNumLimit(6).setHbInterval(200) .build(); cluster.waitForClusterToBeReady(); cluster.getStorageContainerManager().getReplicationManager().start(); diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestContainerStateMachine.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestContainerStateMachine.java index 6bef0600e719..ba5ed9f03d59 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestContainerStateMachine.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestContainerStateMachine.java @@ -53,8 +53,7 @@ import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_BLOCK_TOKEN_ENABLED; import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_COMMAND_STATUS_REPORT_INTERVAL; import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_CONTAINER_REPORT_INTERVAL; -import static org.apache.hadoop.hdds.scm.ScmConfigKeys.HDDS_SCM_WATCHER_TIMEOUT; -import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_STALENODE_INTERVAL; +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.*; /** * Tests the containerStateMachine failure handling. @@ -83,7 +82,7 @@ public void setup() throws Exception { baseDir.mkdirs(); conf.setBoolean(HDDS_BLOCK_TOKEN_ENABLED, true); - // conf.setBoolean(OZONE_SECURITY_ENABLED_KEY, true); + // conf.setBoolean(OZONE_SECURITY_ENABLED_KEY, true); conf.setTimeDuration(HDDS_CONTAINER_REPORT_INTERVAL, 200, TimeUnit.MILLISECONDS); conf.setTimeDuration(HDDS_COMMAND_STATUS_REPORT_INTERVAL, 200, diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestDeleteWithSlowFollower.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestDeleteWithSlowFollower.java index cf96a7486d8d..da2d656acaad 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestDeleteWithSlowFollower.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestDeleteWithSlowFollower.java @@ -83,6 +83,7 @@ public class TestDeleteWithSlowFollower { private static String bucketName; private static String path; private static XceiverClientManager xceiverClientManager; + private static final int FACTOR_THREE_PIPELINE_COUNT = 1; /** * Create a MiniDFSCluster for testing. @@ -120,10 +121,13 @@ public static void init() throws Exception { 1000, TimeUnit.SECONDS); conf.setTimeDuration(OzoneConfigKeys.OZONE_BLOCK_DELETING_SERVICE_INTERVAL, 1, TimeUnit.SECONDS); - conf.setQuietMode(false); - cluster = - MiniOzoneCluster.newBuilder(conf).setNumDatanodes(3).setHbInterval(100) + int numOfDatanodes = 3; + cluster = MiniOzoneCluster.newBuilder(conf) + .setNumDatanodes(numOfDatanodes) + .setTotalPipelineNumLimit( + numOfDatanodes + FACTOR_THREE_PIPELINE_COUNT) + .setHbInterval(100) .build(); cluster.waitForClusterToBeReady(); //the easiest way to create an open container is creating a key @@ -185,7 +189,7 @@ public void testDeleteKeyWithSlowFollower() throws Exception { cluster.getStorageContainerManager().getPipelineManager() .getPipelines(HddsProtos.ReplicationType.RATIS, HddsProtos.ReplicationFactor.THREE); - Assert.assertTrue(pipelineList.size() == 1); + Assert.assertTrue(pipelineList.size() >= FACTOR_THREE_PIPELINE_COUNT); Pipeline pipeline = pipelineList.get(0); for (HddsDatanodeService dn : cluster.getHddsDatanodes()) { if (ContainerTestHelper.isRatisFollower(dn, pipeline)) { diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestFailureHandlingByClient.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestFailureHandlingByClient.java index 21b51e755482..a84e16eea8f3 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestFailureHandlingByClient.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestFailureHandlingByClient.java @@ -23,6 +23,7 @@ import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; +import org.apache.hadoop.hdds.scm.ScmConfigKeys; import org.apache.hadoop.hdds.scm.container.ContainerID; import org.apache.hadoop.hdds.scm.container.ContainerInfo; import org.apache.hadoop.hdds.scm.pipeline.Pipeline; @@ -98,6 +99,7 @@ private void init() throws Exception { 1, TimeUnit.SECONDS); conf.setBoolean( OzoneConfigKeys.OZONE_NETWORK_TOPOLOGY_AWARE_READ_KEY, true); + conf.setInt(ScmConfigKeys.OZONE_DATANODE_PIPELINE_LIMIT, 2); conf.setQuietMode(false); conf.setClass(NET_TOPOLOGY_NODE_SWITCH_MAPPING_IMPL_KEY, @@ -106,7 +108,7 @@ private void init() throws Exception { Collections.singleton(HddsUtils.getHostName(conf))).get(0), "/rack1"); cluster = MiniOzoneCluster.newBuilder(conf) - .setNumDatanodes(10).build(); + .setNumDatanodes(10).setTotalPipelineNumLimit(15).build(); cluster.waitForClusterToBeReady(); //the easiest way to create an open container is creating a key client = OzoneClientFactory.getClient(conf); diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestHybridPipelineOnDatanode.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestHybridPipelineOnDatanode.java index 47a716e85ca2..75af061fe984 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestHybridPipelineOnDatanode.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestHybridPipelineOnDatanode.java @@ -67,7 +67,8 @@ public class TestHybridPipelineOnDatanode { @BeforeClass public static void init() throws Exception { conf = new OzoneConfiguration(); - cluster = MiniOzoneCluster.newBuilder(conf).setNumDatanodes(3).build(); + cluster = MiniOzoneCluster.newBuilder(conf).setNumDatanodes(3) + .setTotalPipelineNumLimit(5).build(); cluster.waitForClusterToBeReady(); //the easiest way to create an open container is creating a key client = OzoneClientFactory.getClient(conf); diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestKeyInputStream.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestKeyInputStream.java index bb7b6f0374a0..589208346061 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestKeyInputStream.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestKeyInputStream.java @@ -82,6 +82,7 @@ public static void init() throws Exception { StorageUnit.MB); cluster = MiniOzoneCluster.newBuilder(conf) .setNumDatanodes(3) + .setTotalPipelineNumLimit(5) .setBlockSize(blockSize) .setChunkSize(chunkSize) .setStreamBufferFlushSize(flushSize) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestMultiBlockWritesWithDnFailures.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestMultiBlockWritesWithDnFailures.java index 281ad4ab297b..7d31499d1db9 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestMultiBlockWritesWithDnFailures.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestMultiBlockWritesWithDnFailures.java @@ -49,8 +49,7 @@ import java.util.UUID; import java.util.concurrent.TimeUnit; -import static org.apache.hadoop.hdds.scm.ScmConfigKeys.HDDS_SCM_WATCHER_TIMEOUT; -import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_STALENODE_INTERVAL; +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.*; /** * Tests MultiBlock Writes with Dn failures by Ozone Client. @@ -88,10 +87,13 @@ private void startCluster(int datanodes) throws Exception { conf.setTimeDuration( OzoneConfigKeys.DFS_RATIS_LEADER_ELECTION_MINIMUM_TIMEOUT_DURATION_KEY, 1, TimeUnit.SECONDS); + conf.setInt(OZONE_DATANODE_PIPELINE_LIMIT, 2); conf.setQuietMode(false); cluster = MiniOzoneCluster.newBuilder(conf) - .setNumDatanodes(datanodes).build(); + .setNumDatanodes(datanodes) + .setTotalPipelineNumLimit(0) + .build(); cluster.waitForClusterToBeReady(); //the easiest way to create an open container is creating a key client = OzoneClientFactory.getClient(conf); diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestOzoneClientRetriesOnException.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestOzoneClientRetriesOnException.java index 5aefcc8f3f52..1bf2ea3ff91e 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestOzoneClientRetriesOnException.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestOzoneClientRetriesOnException.java @@ -93,6 +93,7 @@ public void init() throws Exception { conf.setQuietMode(false); cluster = MiniOzoneCluster.newBuilder(conf) .setNumDatanodes(7) + .setTotalPipelineNumLimit(10) .setBlockSize(blockSize) .setChunkSize(chunkSize) .setStreamBufferFlushSize(flushSize) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestOzoneRpcClientAbstract.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestOzoneRpcClientAbstract.java index c188e80758d7..03f98b631aa1 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestOzoneRpcClientAbstract.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestOzoneRpcClientAbstract.java @@ -165,6 +165,7 @@ public abstract class TestOzoneRpcClientAbstract { static void startCluster(OzoneConfiguration conf) throws Exception { cluster = MiniOzoneCluster.newBuilder(conf) .setNumDatanodes(3) + .setTotalPipelineNumLimit(10) .setScmId(scmId) .build(); cluster.waitForClusterToBeReady(); diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestWatchForCommit.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestWatchForCommit.java index d2007ceb910e..95dcedccc313 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestWatchForCommit.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestWatchForCommit.java @@ -58,6 +58,7 @@ import static java.nio.charset.StandardCharsets.UTF_8; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.HDDS_SCM_WATCHER_TIMEOUT; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_STALENODE_INTERVAL; +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_DATANODE_PIPELINE_LIMIT; /** * This class verifies the watchForCommit Handling by xceiverClient. @@ -95,10 +96,12 @@ private void startCluster(OzoneConfiguration conf) throws Exception { conf.setTimeDuration( OzoneConfigKeys.DFS_RATIS_CLIENT_REQUEST_RETRY_INTERVAL_KEY, 1, TimeUnit.SECONDS); + conf.setInt(OZONE_DATANODE_PIPELINE_LIMIT, 5); conf.setQuietMode(false); cluster = MiniOzoneCluster.newBuilder(conf) .setNumDatanodes(7) + .setTotalPipelineNumLimit(10) .setBlockSize(blockSize) .setChunkSize(chunkSize) .setStreamBufferFlushSize(flushSize) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestCloseContainerByPipeline.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestCloseContainerByPipeline.java index c65ce954a542..869f0910265c 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestCloseContainerByPipeline.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestCloseContainerByPipeline.java @@ -53,6 +53,8 @@ import java.util.List; import java.util.concurrent.TimeoutException; +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_DATANODE_PIPELINE_LIMIT; + /** * Test container closing. */ @@ -75,8 +77,11 @@ public class TestCloseContainerByPipeline { public static void init() throws Exception { conf = new OzoneConfiguration(); conf.set(ScmConfigKeys.OZONE_SCM_PIPELINE_OWNER_CONTAINER_COUNT, "1"); + conf.setInt(OZONE_DATANODE_PIPELINE_LIMIT, 2); + cluster = MiniOzoneCluster.newBuilder(conf) .setNumDatanodes(10) + .setTotalPipelineNumLimit(15) .build(); cluster.waitForClusterToBeReady(); //the easiest way to create an open container is creating a key diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scrubber/TestDataScrubber.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scrubber/TestDataScrubber.java index 108d20431d7a..dd29189d0e40 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scrubber/TestDataScrubber.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scrubber/TestDataScrubber.java @@ -30,7 +30,7 @@ import org.apache.hadoop.hdds.scm.container.ContainerInfo; import org.apache.hadoop.hdds.scm.container.ContainerManager; import org.apache.hadoop.hdds.scm.container.ContainerReplica; -import org.apache.hadoop.hdds.scm.container.placement.algorithms.ContainerPlacementPolicy; +import org.apache.hadoop.hdds.scm.PlacementPolicy; import org.apache.hadoop.hdds.scm.container.placement.algorithms.SCMContainerPlacementCapacity; import org.apache.hadoop.hdds.scm.protocolPB.StorageContainerLocationProtocolClientSideTranslatorPB; import org.apache.hadoop.ozone.HddsDatanodeService; @@ -84,7 +84,7 @@ public static void init() throws Exception { ozoneConfig = new OzoneConfiguration(); ozoneConfig.set(HddsConfigKeys.HDDS_CONTAINER_REPORT_INTERVAL, "1s"); ozoneConfig.setClass(ScmConfigKeys.OZONE_SCM_CONTAINER_PLACEMENT_IMPL_KEY, - SCMContainerPlacementCapacity.class, ContainerPlacementPolicy.class); + SCMContainerPlacementCapacity.class, PlacementPolicy.class); ozoneConfig.setBoolean(HddsConfigKeys.HDDS_SCM_SAFEMODE_PIPELINE_CREATION, false); cluster = MiniOzoneCluster.newBuilder(ozoneConfig).setNumDatanodes(1) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/freon/TestDataValidate.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/freon/TestDataValidate.java index 7857e1f9599f..0c875c983d35 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/freon/TestDataValidate.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/freon/TestDataValidate.java @@ -40,7 +40,7 @@ public abstract class TestDataValidate { */ static void startCluster(OzoneConfiguration conf) throws Exception { cluster = MiniOzoneCluster.newBuilder(conf) - .setNumDatanodes(5).build(); + .setNumDatanodes(5).setTotalPipelineNumLimit(8).build(); cluster.waitForClusterToBeReady(); } diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/freon/TestFreonWithPipelineDestroy.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/freon/TestFreonWithPipelineDestroy.java index 80ef246f12b1..5150fd4d8f2f 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/freon/TestFreonWithPipelineDestroy.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/freon/TestFreonWithPipelineDestroy.java @@ -55,6 +55,7 @@ public static void init() throws Exception { .setHbProcessorInterval(1000) .setHbInterval(1000) .setNumDatanodes(3) + .setTotalPipelineNumLimit(8) .build(); cluster.waitForClusterToBeReady(); } diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOzoneManagerRestart.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOzoneManagerRestart.java index 077886c2debf..6058fad61d2b 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOzoneManagerRestart.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOzoneManagerRestart.java @@ -35,6 +35,8 @@ import org.apache.hadoop.test.GenericTestUtils; import org.apache.commons.lang3.RandomStringUtils; + +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_RATIS_PIPELINE_LIMIT; import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_ACL_ENABLED; import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_ADMINISTRATORS; import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_ADMINISTRATORS_WILDCARD; @@ -79,6 +81,7 @@ public void init() throws Exception { conf.setBoolean(OZONE_ACL_ENABLED, true); conf.setInt(OZONE_OPEN_KEY_EXPIRE_THRESHOLD_SECONDS, 2); conf.set(OZONE_ADMINISTRATORS, OZONE_ADMINISTRATORS_WILDCARD); + conf.setInt(OZONE_SCM_RATIS_PIPELINE_LIMIT, 10); cluster = MiniOzoneCluster.newBuilder(conf) .setClusterId(clusterId) .setScmId(scmId) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/TestContainerSmallFile.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/TestContainerSmallFile.java index 9d187ff7d561..3b6da67a338c 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/TestContainerSmallFile.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/TestContainerSmallFile.java @@ -24,7 +24,7 @@ import org.apache.hadoop.ozone.MiniOzoneCluster; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; -import org.apache.hadoop.hdds.scm.container.placement.algorithms.ContainerPlacementPolicy; +import org.apache.hadoop.hdds.scm.PlacementPolicy; import org.apache.hadoop.hdds.scm.container.placement.algorithms.SCMContainerPlacementCapacity; import org.apache.hadoop.hdds.scm.ScmConfigKeys; import org.apache.hadoop.hdds.scm.protocolPB @@ -60,7 +60,7 @@ public class TestContainerSmallFile { public static void init() throws Exception { ozoneConfig = new OzoneConfiguration(); ozoneConfig.setClass(ScmConfigKeys.OZONE_SCM_CONTAINER_PLACEMENT_IMPL_KEY, - SCMContainerPlacementCapacity.class, ContainerPlacementPolicy.class); + SCMContainerPlacementCapacity.class, PlacementPolicy.class); cluster = MiniOzoneCluster.newBuilder(ozoneConfig).setNumDatanodes(3) .build(); cluster.waitForClusterToBeReady(); diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/TestGetCommittedBlockLengthAndPutKey.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/TestGetCommittedBlockLengthAndPutKey.java index cdd3f8a87578..0cb9329fda92 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/TestGetCommittedBlockLengthAndPutKey.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/TestGetCommittedBlockLengthAndPutKey.java @@ -31,8 +31,7 @@ import org.apache.hadoop.hdds.scm.pipeline.Pipeline; import org.apache.hadoop.hdds.scm.container.common.helpers. StorageContainerException; -import org.apache.hadoop.hdds.scm.container.placement.algorithms. - ContainerPlacementPolicy; +import org.apache.hadoop.hdds.scm.PlacementPolicy; import org.apache.hadoop.hdds.scm.container.placement.algorithms. SCMContainerPlacementCapacity; import org.apache.hadoop.hdds.scm.protocolPB. @@ -65,7 +64,7 @@ public class TestGetCommittedBlockLengthAndPutKey { public static void init() throws Exception { ozoneConfig = new OzoneConfiguration(); ozoneConfig.setClass(ScmConfigKeys.OZONE_SCM_CONTAINER_PLACEMENT_IMPL_KEY, - SCMContainerPlacementCapacity.class, ContainerPlacementPolicy.class); + SCMContainerPlacementCapacity.class, PlacementPolicy.class); cluster = MiniOzoneCluster.newBuilder(ozoneConfig).setNumDatanodes(3).build(); cluster.waitForClusterToBeReady(); diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/TestSCMContainerPlacementPolicyMetrics.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/TestSCMContainerPlacementPolicyMetrics.java index 1f9d9fbf01ba..4025acac439f 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/TestSCMContainerPlacementPolicyMetrics.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/TestSCMContainerPlacementPolicyMetrics.java @@ -85,6 +85,7 @@ public void setup() throws Exception { "/rack1"); cluster = MiniOzoneCluster.newBuilder(conf) .setNumDatanodes(4) + .setTotalPipelineNumLimit(10) .build(); cluster.waitForClusterToBeReady(); metrics = getMetrics(SCMContainerPlacementMetrics.class.getSimpleName()); diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/node/TestQueryNode.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/node/TestQueryNode.java index 841fd85f3666..14660d67c7b7 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/node/TestQueryNode.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/node/TestQueryNode.java @@ -16,6 +16,7 @@ */ package org.apache.hadoop.ozone.scm.node; +import org.apache.hadoop.hdds.scm.ScmConfigKeys; import org.apache.hadoop.ozone.MiniOzoneCluster; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; @@ -78,9 +79,11 @@ public void setUp() throws Exception { conf.setTimeDuration(HDDS_NODE_REPORT_INTERVAL, 1, SECONDS); conf.setTimeDuration(OZONE_SCM_STALENODE_INTERVAL, 3, SECONDS); conf.setTimeDuration(OZONE_SCM_DEADNODE_INTERVAL, 6, SECONDS); + conf.setInt(ScmConfigKeys.OZONE_DATANODE_PIPELINE_LIMIT, 3); cluster = MiniOzoneCluster.newBuilder(conf) .setNumDatanodes(numOfDatanodes) + .setTotalPipelineNumLimit(numOfDatanodes + numOfDatanodes/2) .build(); cluster.waitForClusterToBeReady(); scmClient = new ContainerOperationClient(conf);