From 72c85a88eeeab2e494b35040afd1c67e934481d2 Mon Sep 17 00:00:00 2001 From: Bharat Viswanadham Date: Wed, 25 Nov 2020 15:28:54 -0800 Subject: [PATCH 1/6] HDDS-4484. Use RatisServerImpl isLeader instead of periodic leader update logic in OM and isLeaderReady for read requests. --- .../hadoop/ozone/om/KeyDeletingService.java | 2 +- .../apache/hadoop/ozone/om/OzoneManager.java | 17 +- .../om/ratis/OzoneManagerRatisServer.java | 161 +++--------------- .../om/ratis/OzoneManagerStateMachine.java | 1 - ...ManagerProtocolServerSideTranslatorPB.java | 51 ++++-- 5 files changed, 72 insertions(+), 160 deletions(-) diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/KeyDeletingService.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/KeyDeletingService.java index 466a55f18300..418d7a88274b 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/KeyDeletingService.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/KeyDeletingService.java @@ -129,7 +129,7 @@ private boolean shouldRun() { // OzoneManager can be null for testing return true; } - return ozoneManager.isLeader(); + return ozoneManager.isLeaderReady(); } private boolean isRatisEnabled() { diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java index 6229beffa265..23244c579cb8 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java @@ -3505,12 +3505,6 @@ public long getMaxUserVolumeCount() { /** * Checks the Leader status of OM Ratis Server. - * Note that this status has a small window of error. It should not be used - * to determine the absolute leader status. - * If it is the leader, the role status is cached till Ratis server - * notifies of leader change. If it is not leader, the role information is - * retrieved through by submitting a GroupInfoRequest to Ratis server. - *

* If ratis is not enabled, then it always returns true. * * @return Return true if this node is the leader, false otherwsie. @@ -3519,6 +3513,17 @@ public boolean isLeader() { return isRatisEnabled ? omRatisServer.isLeader() : true; } + /** + * Return true, if the current OM node is leader and in ready state to + * process the requests. + * + * If ratis is not enabled, then it always returns true. + * @return + */ + public boolean isLeaderReady() { + return isRatisEnabled ? omRatisServer.isLeaderReady() : true; + } + /** * Return if Ratis is enabled or not. * diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerRatisServer.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerRatisServer.java index 467764b2f6a0..9ab4b92aeeac 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerRatisServer.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerRatisServer.java @@ -28,7 +28,6 @@ import java.util.List; import java.util.Optional; import java.util.UUID; -import java.util.concurrent.Executors; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicLong; @@ -60,10 +59,7 @@ import org.apache.ratis.grpc.GrpcConfigKeys; import org.apache.ratis.netty.NettyConfigKeys; import org.apache.ratis.proto.RaftProtos.RaftPeerRole; -import org.apache.ratis.proto.RaftProtos.RoleInfoProto; import org.apache.ratis.protocol.ClientId; -import org.apache.ratis.protocol.GroupInfoReply; -import org.apache.ratis.protocol.GroupInfoRequest; import org.apache.ratis.protocol.exceptions.LeaderNotReadyException; import org.apache.ratis.protocol.exceptions.NotLeaderException; import org.apache.ratis.protocol.exceptions.StateMachineException; @@ -78,8 +74,9 @@ import org.apache.ratis.rpc.SupportedRpcType; import org.apache.ratis.server.RaftServer; import org.apache.ratis.server.RaftServerConfigKeys; +import org.apache.ratis.server.impl.RaftServerImpl; +import org.apache.ratis.server.impl.RaftServerProxy; import org.apache.ratis.server.protocol.TermIndex; -import org.apache.ratis.thirdparty.com.google.protobuf.ByteString; import org.apache.ratis.util.LifeCycle; import org.apache.ratis.util.SizeInBytes; import org.apache.ratis.util.StringUtils; @@ -106,20 +103,6 @@ public final class OzoneManagerRatisServer { private final OzoneManager ozoneManager; private final OzoneManagerStateMachine omStateMachine; - private final ClientId clientId = ClientId.randomId(); - - private final ScheduledExecutorService scheduledRoleChecker; - private long roleCheckInitialDelayMs = 1000; // 1 second default - private long roleCheckIntervalMs; - private ReentrantReadWriteLock roleCheckLock = new ReentrantReadWriteLock(); - private Optional cachedPeerRole = Optional.empty(); - private Optional cachedLeaderPeerId = Optional.empty(); - - private static final AtomicLong CALL_ID_COUNTER = new AtomicLong(); - - private static long nextCallId() { - return CALL_ID_COUNTER.getAndIncrement() & Long.MAX_VALUE; - } /** * Submit request to Ratis server. @@ -301,20 +284,6 @@ private OzoneManagerRatisServer(ConfigurationSource conf, .setProperties(serverProperties) .setStateMachine(omStateMachine) .build(); - - // Run a scheduler to check and update the server role on the leader - // periodically - this.scheduledRoleChecker = Executors.newSingleThreadScheduledExecutor(); - this.scheduledRoleChecker.scheduleWithFixedDelay(new Runnable() { - @Override - public void run() { - // Run this check only on the leader OM - if (cachedPeerRole.isPresent() && - cachedPeerRole.get() == RaftPeerRole.LEADER) { - updateServerRole(); - } - } - }, roleCheckInitialDelayMs, roleCheckIntervalMs, TimeUnit.MILLISECONDS); } /** @@ -556,19 +525,6 @@ private RaftProperties newRaftProperties(ConfigurationSource conf) { RaftServerConfigKeys.Rpc.setSlownessTimeout(properties, nodeFailureTimeout); - TimeUnit roleCheckIntervalUnit = - OMConfigKeys.OZONE_OM_RATIS_SERVER_ROLE_CHECK_INTERVAL_DEFAULT - .getUnit(); - long roleCheckIntervalDuration = conf.getTimeDuration( - OMConfigKeys.OZONE_OM_RATIS_SERVER_ROLE_CHECK_INTERVAL_KEY, - OMConfigKeys.OZONE_OM_RATIS_SERVER_ROLE_CHECK_INTERVAL_DEFAULT - .getDuration(), nodeFailureTimeoutUnit); - this.roleCheckIntervalMs = TimeDuration.valueOf( - roleCheckIntervalDuration, roleCheckIntervalUnit) - .toLong(TimeUnit.MILLISECONDS); - this.roleCheckInitialDelayMs = leaderElectionMinTimeout - .toLong(TimeUnit.MILLISECONDS); - // Set auto trigger snapshot. We don't need to configure auto trigger // threshold in OM, as last applied index is flushed during double buffer // flush automatically. (But added this property internally, so that this @@ -590,108 +546,45 @@ private RaftProperties newRaftProperties(ConfigurationSource conf) { return properties; } - /** - * Check the cached leader status. - * @return true if cached role is Leader, false otherwise. - */ - private boolean checkCachedPeerRoleIsLeader() { - this.roleCheckLock.readLock().lock(); - try { - if (cachedPeerRole.isPresent() && - cachedPeerRole.get() == RaftPeerRole.LEADER) { - return true; - } - return false; - } finally { - this.roleCheckLock.readLock().unlock(); - } - } - /** * Check if the current OM node is the leader node. * @return true if Leader, false otherwise. */ public boolean isLeader() { - if (checkCachedPeerRoleIsLeader()) { - return true; - } - - // Get the server role from ratis server and update the cached values. - updateServerRole(); - - // After updating the server role, check and return if leader or not. - return checkCachedPeerRoleIsLeader(); - } - - /** - * Get the suggested leader peer id. - * @return RaftPeerId of the suggested leader node. - */ - public Optional getCachedLeaderPeerId() { - this.roleCheckLock.readLock().lock(); - try { - return cachedLeaderPeerId; - } finally { - this.roleCheckLock.readLock().unlock(); - } - } - - /** - * Get the gorup info (peer role and leader peer id) from Ratis server and - * update the OM server role. - */ - public void updateServerRole() { + Preconditions.checkState(server instanceof RaftServerProxy); + RaftServerImpl serverImpl = null; try { - GroupInfoReply groupInfo = getGroupInfo(); - RoleInfoProto roleInfoProto = groupInfo.getRoleInfoProto(); - RaftPeerRole thisNodeRole = roleInfoProto.getRole(); - - if (thisNodeRole.equals(RaftPeerRole.LEADER)) { - setServerRole(thisNodeRole, raftPeerId); - - } else if (thisNodeRole.equals(RaftPeerRole.FOLLOWER)) { - ByteString leaderNodeId = roleInfoProto.getFollowerInfo() - .getLeaderInfo().getId().getId(); - // There may be a chance, here we get leaderNodeId as null. For - // example, in 3 node OM Ratis, if 2 OM nodes are down, there will - // be no leader. - RaftPeerId leaderPeerId = null; - if (leaderNodeId != null && !leaderNodeId.isEmpty()) { - leaderPeerId = RaftPeerId.valueOf(leaderNodeId); - } - - setServerRole(thisNodeRole, leaderPeerId); - - } else { - setServerRole(thisNodeRole, null); - + serverImpl = ((RaftServerProxy) server).getImpl(raftGroupId); + if (serverImpl != null) { + return serverImpl.isLeader(); } - } catch (IOException e) { - LOG.error("Failed to retrieve RaftPeerRole. Setting cached role to " + - "{} and resetting leader info.", RaftPeerRole.UNRECOGNIZED, e); - setServerRole(null, null); + } catch (IOException ioe) { + // In this case we return not a leader. + LOG.error("Fail to get RaftServer impl and therefore it's not clear " + + "whether it's leader. ", ioe); } + return false; } /** - * Set the current server role and the leader peer id. + * Return true, if the current OM node is leader and in ready state to + * process the requests. + * @return */ - private void setServerRole(RaftPeerRole currentRole, - RaftPeerId leaderPeerId) { - this.roleCheckLock.writeLock().lock(); + public boolean isLeaderReady() { + Preconditions.checkState(server instanceof RaftServerProxy); + RaftServerImpl serverImpl = null; try { - this.cachedPeerRole = Optional.ofNullable(currentRole); - this.cachedLeaderPeerId = Optional.ofNullable(leaderPeerId); - } finally { - this.roleCheckLock.writeLock().unlock(); + serverImpl = ((RaftServerProxy) server).getImpl(raftGroupId); + if (serverImpl != null) { + return serverImpl.isLeaderReady(); + } + } catch (IOException ioe) { + // In this case we return not a leader. + LOG.error("Fail to get RaftServer impl and therefore it's not clear " + + "whether it's leader. ", ioe); } - } - - private GroupInfoReply getGroupInfo() throws IOException { - GroupInfoRequest groupInfoRequest = new GroupInfoRequest(clientId, - raftPeerId, raftGroupId, nextCallId()); - GroupInfoReply groupInfo = server.getGroupInfo(groupInfoRequest); - return groupInfo; + return false; } public int getServerPort() { diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerStateMachine.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerStateMachine.java index aaf94e9b8c56..183fe461cbcd 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerStateMachine.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerStateMachine.java @@ -380,7 +380,6 @@ public CompletableFuture notifyInstallSnapshotFromLeader( @Override public void notifyNotLeader(Collection pendingEntries) throws IOException { - omRatisServer.updateServerRole(); } @Override diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/protocolPB/OzoneManagerProtocolServerSideTranslatorPB.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/protocolPB/OzoneManagerProtocolServerSideTranslatorPB.java index 57b8fac6d0d1..98eebf65b1eb 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/protocolPB/OzoneManagerProtocolServerSideTranslatorPB.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/protocolPB/OzoneManagerProtocolServerSideTranslatorPB.java @@ -26,6 +26,7 @@ import org.apache.hadoop.hdds.utils.ProtocolMessageMetrics; import org.apache.hadoop.ozone.OmUtils; import org.apache.hadoop.ozone.om.OzoneManager; +import org.apache.hadoop.ozone.om.exceptions.OMLeaderNotReadyException; import org.apache.hadoop.ozone.om.exceptions.OMNotLeaderException; import org.apache.hadoop.ozone.om.protocolPB.OzoneManagerProtocolPB; import org.apache.hadoop.ozone.om.ratis.OzoneManagerDoubleBuffer; @@ -128,15 +129,19 @@ private OMResponse processRequest(OMRequest request) throws return submitReadRequestToOM(request); } else { if (omRatisServer.isLeader()) { - try { - OMClientRequest omClientRequest = - OzoneManagerRatisUtils.createClientRequest(request); - request = omClientRequest.preExecute(ozoneManager); - } catch (IOException ex) { - // As some of the preExecute returns error. So handle here. - return createErrorResponse(request, ex); + if (omRatisServer.isLeaderReady()) { + try { + OMClientRequest omClientRequest = + OzoneManagerRatisUtils.createClientRequest(request); + request = omClientRequest.preExecute(ozoneManager); + } catch (IOException ex) { + // As some of the preExecute returns error. So handle here. + return createErrorResponse(request, ex); + } + return submitRequestToRatis(request); + } else { + throw createLeaderNotReadyException(); } - return submitRequestToRatis(request); } else { // throw not leader exception. This is being done, so to avoid // unnecessary execution of preExecute on follower OM's. This @@ -186,7 +191,11 @@ private OMResponse submitReadRequestToOM(OMRequest request) throws ServiceException { // Check if this OM is the leader. if (omRatisServer.isLeader()) { - return handler.handleReadRequest(request); + if (omRatisServer.isLeaderReady()) { + return handler.handleReadRequest(request); + } else { + throw createLeaderNotReadyException(); + } } else { throw createNotLeaderException(); } @@ -194,16 +203,12 @@ private OMResponse submitReadRequestToOM(OMRequest request) private ServiceException createNotLeaderException() { RaftPeerId raftPeerId = omRatisServer.getRaftPeerId(); - Optional leaderRaftPeerId = omRatisServer - .getCachedLeaderPeerId(); - OMNotLeaderException notLeaderException; - if (leaderRaftPeerId.isPresent()) { - notLeaderException = new OMNotLeaderException( - raftPeerId, leaderRaftPeerId.get()); - } else { - notLeaderException = new OMNotLeaderException(raftPeerId); - } + // TODO: Set suggest leaderID. Right now, client is not using suggest + // leaderID. Need to fix this. + + OMNotLeaderException notLeaderException = + new OMNotLeaderException(raftPeerId); if (LOG.isDebugEnabled()) { LOG.debug(notLeaderException.getMessage()); @@ -212,6 +217,16 @@ private ServiceException createNotLeaderException() { return new ServiceException(notLeaderException); } + private ServiceException createLeaderNotReadyException() { + RaftPeerId raftPeerId = omRatisServer.getRaftPeerId(); + + OMLeaderNotReadyException leaderNotReadyException = + new OMLeaderNotReadyException(raftPeerId.toString() + " is Leader " + + "but not ready to process request"); + + return new ServiceException(leaderNotReadyException); + } + /** * Submits request directly to OM. */ From 83fccd3e00724a69b5660747fbbcc589fd8e8f7a Mon Sep 17 00:00:00 2001 From: Bharat Viswanadham Date: Wed, 25 Nov 2020 15:29:25 -0800 Subject: [PATCH 2/6] remove config --- .../common/src/main/resources/ozone-default.xml | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/hadoop-hdds/common/src/main/resources/ozone-default.xml b/hadoop-hdds/common/src/main/resources/ozone-default.xml index d8402f7b9df6..d2421b1a0e40 100644 --- a/hadoop-hdds/common/src/main/resources/ozone-default.xml +++ b/hadoop-hdds/common/src/main/resources/ozone-default.xml @@ -1643,17 +1643,6 @@ - - ozone.om.ratis.server.role.check.interval - 15s - OZONE, OM, RATIS, MANAGEMENT - The interval between OM leader performing a role - check on its ratis server. Ratis server informs OM if it - loses the leader role. The scheduled check is an secondary - check to ensure that the leader role is updated periodically - . - - ozone.om.ratis.snapshot.dir From 1e6146af8502f456dc30d8483f83536218655084 Mon Sep 17 00:00:00 2001 From: Bharat Viswanadham Date: Mon, 30 Nov 2020 11:18:59 -0800 Subject: [PATCH 3/6] merge leader and isLeaderReady to single API, instead of depending on ratis change until RATIS-1181. --- .../hadoop/ozone/MiniOzoneHAClusterImpl.java | 2 +- .../hadoop/ozone/TestMiniOzoneHACluster.java | 2 +- .../recon/TestReconWithOzoneManagerHA.java | 2 +- .../apache/hadoop/ozone/om/OzoneManager.java | 5 +- .../om/ratis/OzoneManagerRatisServer.java | 44 ++++++--------- ...ManagerProtocolServerSideTranslatorPB.java | 56 ++++++++++--------- 6 files changed, 51 insertions(+), 60 deletions(-) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneHAClusterImpl.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneHAClusterImpl.java index 6953594747bc..d16618a373d9 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneHAClusterImpl.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneHAClusterImpl.java @@ -171,7 +171,7 @@ public List getOzoneManagersList() { public OzoneManager getOMLeader() { OzoneManager res = null; for (OzoneManager ozoneManager : this.ozoneManagers) { - if (ozoneManager.isLeader()) { + if (ozoneManager.isLeaderReady()) { if (res != null) { // Found more than one leader // Return null, expect the caller to retry in a while diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestMiniOzoneHACluster.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestMiniOzoneHACluster.java index 96121afed966..051eb94d582e 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestMiniOzoneHACluster.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestMiniOzoneHACluster.java @@ -107,6 +107,6 @@ public void testGetOMLeader() throws InterruptedException, TimeoutException { Assert.assertNotNull("Timed out waiting OM leader election to finish: " + "no leader or more than one leader.", ozoneManager); Assert.assertTrue("Should have gotten the leader!", - ozoneManager.get().isLeader()); + ozoneManager.get().isLeaderReady()); } } diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/recon/TestReconWithOzoneManagerHA.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/recon/TestReconWithOzoneManagerHA.java index be146fc9d49f..2ff79de07500 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/recon/TestReconWithOzoneManagerHA.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/recon/TestReconWithOzoneManagerHA.java @@ -103,7 +103,7 @@ public void testReconGetsSnapshotFromLeader() throws Exception { Assert.assertNotNull("Timed out waiting OM leader election to finish: " + "no leader or more than one leader.", ozoneManager); Assert.assertTrue("Should have gotten the leader!", - ozoneManager.get().isLeader()); + ozoneManager.get().isLeaderReady()); OzoneManagerServiceProviderImpl impl = (OzoneManagerServiceProviderImpl) cluster.getReconServer().getOzoneManagerServiceProvider(); diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java index 23244c579cb8..35e00fd6696a 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java @@ -234,6 +234,7 @@ import static org.apache.hadoop.ozone.om.exceptions.OMException.ResultCodes.KEY_NOT_FOUND; import static org.apache.hadoop.ozone.om.exceptions.OMException.ResultCodes.TOKEN_ERROR_OTHER; import static org.apache.hadoop.ozone.om.lock.OzoneManagerLock.Resource.VOLUME_LOCK; +import static org.apache.hadoop.ozone.om.ratis.OzoneManagerRatisServer.RaftServerStatus.LEADER_AND_READY; import static org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.OzoneManagerService.newReflectiveBlockingService; import org.apache.hadoop.util.Time; @@ -3510,7 +3511,7 @@ public long getMaxUserVolumeCount() { * @return Return true if this node is the leader, false otherwsie. */ public boolean isLeader() { - return isRatisEnabled ? omRatisServer.isLeader() : true; + return isRatisEnabled ? omRatisServer.checkLeaderStatus() == LEADER_AND_READY : true; } /** @@ -3521,7 +3522,7 @@ public boolean isLeader() { * @return */ public boolean isLeaderReady() { - return isRatisEnabled ? omRatisServer.isLeaderReady() : true; + return isRatisEnabled ? omRatisServer.checkLeaderStatus() == LEADER_AND_READY : true; } /** diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerRatisServer.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerRatisServer.java index 9ab4b92aeeac..8741b22467e1 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerRatisServer.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerRatisServer.java @@ -26,12 +26,8 @@ import java.util.ArrayList; import java.util.Collections; import java.util.List; -import java.util.Optional; import java.util.UUID; -import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicLong; -import java.util.concurrent.locks.ReentrantReadWriteLock; import com.google.common.base.Preconditions; import org.apache.hadoop.hdds.conf.ConfigurationSource; @@ -546,45 +542,37 @@ private RaftProperties newRaftProperties(ConfigurationSource conf) { return properties; } - /** - * Check if the current OM node is the leader node. - * @return true if Leader, false otherwise. - */ - public boolean isLeader() { - Preconditions.checkState(server instanceof RaftServerProxy); - RaftServerImpl serverImpl = null; - try { - serverImpl = ((RaftServerProxy) server).getImpl(raftGroupId); - if (serverImpl != null) { - return serverImpl.isLeader(); - } - } catch (IOException ioe) { - // In this case we return not a leader. - LOG.error("Fail to get RaftServer impl and therefore it's not clear " + - "whether it's leader. ", ioe); - } - return false; + public enum RaftServerStatus { + NOT_LEADER, + LEADER_AND_NOTREADY, + LEADER_AND_READY; } /** - * Return true, if the current OM node is leader and in ready state to - * process the requests. - * @return + * Check Leader status and return the state of the RaftServer. + * + * @return RaftServerStatus. */ - public boolean isLeaderReady() { + public RaftServerStatus checkLeaderStatus() { Preconditions.checkState(server instanceof RaftServerProxy); RaftServerImpl serverImpl = null; try { serverImpl = ((RaftServerProxy) server).getImpl(raftGroupId); if (serverImpl != null) { - return serverImpl.isLeaderReady(); + if (!serverImpl.isLeader()) { + return RaftServerStatus.NOT_LEADER; + } else if (serverImpl.isLeaderReady()) { + return RaftServerStatus.LEADER_AND_READY; + } else { + return RaftServerStatus.LEADER_AND_NOTREADY; + } } } catch (IOException ioe) { // In this case we return not a leader. LOG.error("Fail to get RaftServer impl and therefore it's not clear " + "whether it's leader. ", ioe); } - return false; + return RaftServerStatus.NOT_LEADER; } public int getServerPort() { diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/protocolPB/OzoneManagerProtocolServerSideTranslatorPB.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/protocolPB/OzoneManagerProtocolServerSideTranslatorPB.java index 98eebf65b1eb..2e1cfe64ce00 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/protocolPB/OzoneManagerProtocolServerSideTranslatorPB.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/protocolPB/OzoneManagerProtocolServerSideTranslatorPB.java @@ -31,6 +31,7 @@ import org.apache.hadoop.ozone.om.protocolPB.OzoneManagerProtocolPB; import org.apache.hadoop.ozone.om.ratis.OzoneManagerDoubleBuffer; import org.apache.hadoop.ozone.om.ratis.OzoneManagerRatisServer; +import org.apache.hadoop.ozone.om.ratis.OzoneManagerRatisServer.RaftServerStatus; import org.apache.hadoop.ozone.om.ratis.utils.OzoneManagerRatisUtils; import org.apache.hadoop.ozone.om.request.OMClientRequest; import org.apache.hadoop.ozone.om.response.OMClientResponse; @@ -45,6 +46,9 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import static org.apache.hadoop.ozone.om.ratis.OzoneManagerRatisServer.RaftServerStatus.LEADER_AND_READY; +import static org.apache.hadoop.ozone.om.ratis.OzoneManagerRatisServer.RaftServerStatus.NOT_LEADER; + /** * This class is the server-side translator that forwards requests received on * {@link OzoneManagerProtocolPB} @@ -122,33 +126,25 @@ public OMResponse submitRequest(RpcController controller, private OMResponse processRequest(OMRequest request) throws ServiceException { - + RaftServerStatus raftServerStatus; if (isRatisEnabled) { // Check if the request is a read only request if (OmUtils.isReadOnly(request)) { return submitReadRequestToOM(request); } else { - if (omRatisServer.isLeader()) { - if (omRatisServer.isLeaderReady()) { - try { - OMClientRequest omClientRequest = - OzoneManagerRatisUtils.createClientRequest(request); - request = omClientRequest.preExecute(ozoneManager); - } catch (IOException ex) { - // As some of the preExecute returns error. So handle here. - return createErrorResponse(request, ex); - } - return submitRequestToRatis(request); - } else { - throw createLeaderNotReadyException(); + raftServerStatus = omRatisServer.checkLeaderStatus(); + if (raftServerStatus == LEADER_AND_READY) { + try { + OMClientRequest omClientRequest = + OzoneManagerRatisUtils.createClientRequest(request); + request = omClientRequest.preExecute(ozoneManager); + } catch (IOException ex) { + // As some of the preExecute returns error. So handle here. + return createErrorResponse(request, ex); } + return submitRequestToRatis(request); } else { - // throw not leader exception. This is being done, so to avoid - // unnecessary execution of preExecute on follower OM's. This - // will be helpful in the case like where we we reduce the - // chance of allocate blocks on follower OM's. Right now our - // leader status is updated every 1 second. - throw createNotLeaderException(); + throw createLeaderErrorException(raftServerStatus); } } } else { @@ -190,14 +186,11 @@ private OMResponse submitRequestToRatis(OMRequest request) private OMResponse submitReadRequestToOM(OMRequest request) throws ServiceException { // Check if this OM is the leader. - if (omRatisServer.isLeader()) { - if (omRatisServer.isLeaderReady()) { - return handler.handleReadRequest(request); - } else { - throw createLeaderNotReadyException(); - } + RaftServerStatus raftServerStatus = omRatisServer.checkLeaderStatus(); + if (raftServerStatus == LEADER_AND_READY) { + return handler.handleReadRequest(request); } else { - throw createNotLeaderException(); + throw createLeaderErrorException(raftServerStatus); } } @@ -217,6 +210,15 @@ private ServiceException createNotLeaderException() { return new ServiceException(notLeaderException); } + private ServiceException createLeaderErrorException(RaftServerStatus raftServerStatus) { + if (raftServerStatus == NOT_LEADER) { + return createNotLeaderException(); + } else { + return createLeaderNotReadyException(); + } + } + + private ServiceException createLeaderNotReadyException() { RaftPeerId raftPeerId = omRatisServer.getRaftPeerId(); From d7d43c35730ece7c55099e0c7df10237938578cf Mon Sep 17 00:00:00 2001 From: Bharat Viswanadham Date: Mon, 30 Nov 2020 11:31:21 -0800 Subject: [PATCH 4/6] add javadoc --- .../hadoop/ozone/om/ratis/OzoneManagerRatisServer.java | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerRatisServer.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerRatisServer.java index 8741b22467e1..970cc05cc0ff 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerRatisServer.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerRatisServer.java @@ -542,9 +542,12 @@ private RaftProperties newRaftProperties(ConfigurationSource conf) { return properties; } + /** + * Defines RaftServer Status. + */ public enum RaftServerStatus { NOT_LEADER, - LEADER_AND_NOTREADY, + LEADER_AND_NOT_READY, LEADER_AND_READY; } @@ -555,7 +558,7 @@ public enum RaftServerStatus { */ public RaftServerStatus checkLeaderStatus() { Preconditions.checkState(server instanceof RaftServerProxy); - RaftServerImpl serverImpl = null; + RaftServerImpl serverImpl; try { serverImpl = ((RaftServerProxy) server).getImpl(raftGroupId); if (serverImpl != null) { @@ -564,7 +567,7 @@ public RaftServerStatus checkLeaderStatus() { } else if (serverImpl.isLeaderReady()) { return RaftServerStatus.LEADER_AND_READY; } else { - return RaftServerStatus.LEADER_AND_NOTREADY; + return RaftServerStatus.LEADER_AND_NOT_READY; } } } catch (IOException ioe) { From b9b04928649effb089c02813d6003561ce7394d9 Mon Sep 17 00:00:00 2001 From: Bharat Viswanadham Date: Mon, 30 Nov 2020 11:43:38 -0800 Subject: [PATCH 5/6] fix cs --- .../org/apache/hadoop/ozone/om/OzoneManager.java | 14 ++------------ .../ozone/om/ratis/OzoneManagerRatisServer.java | 1 - ...OzoneManagerProtocolServerSideTranslatorPB.java | 4 ++-- 3 files changed, 4 insertions(+), 15 deletions(-) diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java index 35e00fd6696a..cc5af0f11edd 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java @@ -3503,17 +3503,6 @@ public String getComponent() { public long getMaxUserVolumeCount() { return maxUserVolumeCount; } - - /** - * Checks the Leader status of OM Ratis Server. - * If ratis is not enabled, then it always returns true. - * - * @return Return true if this node is the leader, false otherwsie. - */ - public boolean isLeader() { - return isRatisEnabled ? omRatisServer.checkLeaderStatus() == LEADER_AND_READY : true; - } - /** * Return true, if the current OM node is leader and in ready state to * process the requests. @@ -3522,7 +3511,8 @@ public boolean isLeader() { * @return */ public boolean isLeaderReady() { - return isRatisEnabled ? omRatisServer.checkLeaderStatus() == LEADER_AND_READY : true; + return isRatisEnabled ? + omRatisServer.checkLeaderStatus() == LEADER_AND_READY : true; } /** diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerRatisServer.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerRatisServer.java index 970cc05cc0ff..0f9e20b8d9fa 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerRatisServer.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerRatisServer.java @@ -54,7 +54,6 @@ import org.apache.ratis.conf.RaftProperties; import org.apache.ratis.grpc.GrpcConfigKeys; import org.apache.ratis.netty.NettyConfigKeys; -import org.apache.ratis.proto.RaftProtos.RaftPeerRole; import org.apache.ratis.protocol.ClientId; import org.apache.ratis.protocol.exceptions.LeaderNotReadyException; import org.apache.ratis.protocol.exceptions.NotLeaderException; diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/protocolPB/OzoneManagerProtocolServerSideTranslatorPB.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/protocolPB/OzoneManagerProtocolServerSideTranslatorPB.java index 2e1cfe64ce00..c118307fd28d 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/protocolPB/OzoneManagerProtocolServerSideTranslatorPB.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/protocolPB/OzoneManagerProtocolServerSideTranslatorPB.java @@ -17,7 +17,6 @@ package org.apache.hadoop.ozone.protocolPB; import java.io.IOException; -import java.util.Optional; import java.util.concurrent.ExecutionException; import java.util.concurrent.atomic.AtomicLong; @@ -210,7 +209,8 @@ private ServiceException createNotLeaderException() { return new ServiceException(notLeaderException); } - private ServiceException createLeaderErrorException(RaftServerStatus raftServerStatus) { + private ServiceException createLeaderErrorException( + RaftServerStatus raftServerStatus) { if (raftServerStatus == NOT_LEADER) { return createNotLeaderException(); } else { From 0510f0d295459bac2a26044ca29455f52312b262 Mon Sep 17 00:00:00 2001 From: Bharat Viswanadham Date: Mon, 30 Nov 2020 13:05:35 -0800 Subject: [PATCH 6/6] fix test --- .../main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java | 7 ------- 1 file changed, 7 deletions(-) diff --git a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java index f16679a681eb..a5f2d573f22f 100644 --- a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java +++ b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java @@ -168,13 +168,6 @@ private OMConfigKeys() { OZONE_OM_RATIS_SERVER_FAILURE_TIMEOUT_DURATION_DEFAULT = TimeDuration.valueOf(120, TimeUnit.SECONDS); - // OM Leader server role check interval - public static final String OZONE_OM_RATIS_SERVER_ROLE_CHECK_INTERVAL_KEY - = "ozone.om.ratis.server.role.check.interval"; - public static final TimeDuration - OZONE_OM_RATIS_SERVER_ROLE_CHECK_INTERVAL_DEFAULT - = TimeDuration.valueOf(15, TimeUnit.SECONDS); - // OM SnapshotProvider configurations public static final String OZONE_OM_RATIS_SNAPSHOT_DIR = "ozone.om.ratis.snapshot.dir";