From 14e85b45c3cb6764de0c39c56ef80931d63e2676 Mon Sep 17 00:00:00 2001 From: Bharat Viswanadham Date: Wed, 14 Apr 2021 18:47:28 +0530 Subject: [PATCH 1/7] RATIS-1356. NotifyInstallSnapshot during SetConfiguration has leader info missing. --- .../ratis/server/impl/RaftServerImpl.java | 38 ++++++++++++++++++- .../InstallSnapshotNotificationTests.java | 6 ++- .../apache/ratis/RaftExceptionBaseTest.java | 3 +- .../ratis/server/impl/MiniRaftCluster.java | 33 +++++++++++----- .../statemachine/RaftSnapshotBaseTest.java | 2 +- 5 files changed, 67 insertions(+), 15 deletions(-) diff --git a/ratis-server/src/main/java/org/apache/ratis/server/impl/RaftServerImpl.java b/ratis-server/src/main/java/org/apache/ratis/server/impl/RaftServerImpl.java index 01046f9609..e8836ed4d7 100644 --- a/ratis-server/src/main/java/org/apache/ratis/server/impl/RaftServerImpl.java +++ b/ratis-server/src/main/java/org/apache/ratis/server/impl/RaftServerImpl.java @@ -512,6 +512,22 @@ GroupInfoReply getGroupInfo(GroupInfoRequest request) { getGroup(), getRoleInfoProto(), state.getStorage().getStorageDir().isHealthy()); } + private RoleInfoProto getRoleInfoProto(RaftPeer leaderPeerInfo) { + RaftPeerRole currentRole = role.getCurrentRole(); + RoleInfoProto.Builder roleInfo = RoleInfoProto.newBuilder() + .setSelf(getPeer().getRaftPeerProto()) + .setRole(currentRole) + .setRoleElapsedTimeMs(role.getRoleElapsedTimeMs()); + final Optional fs = role.getFollowerState(); + final ServerRpcProto leaderInfo = + ServerProtoUtils.toServerRpcProto(leaderPeerInfo, + fs.map(FollowerState::getLastRpcTime).map(Timestamp::elapsedTimeMs).orElse(0L)); + roleInfo.setFollowerInfo(FollowerInfoProto.newBuilder() + .setLeaderInfo(leaderInfo) + .setOutstandingOp(fs.map(FollowerState::getOutstandingOp).orElse(0))); + return roleInfo.build(); + } + RoleInfoProto getRoleInfoProto() { RaftPeerRole currentRole = role.getCurrentRole(); RoleInfoProto.Builder roleInfo = RoleInfoProto.newBuilder() @@ -1418,6 +1434,8 @@ private InstallSnapshotReplyProto installSnapshotImpl(InstallSnapshotRequestProt CodeInjectionForTesting.execute(INSTALL_SNAPSHOT, getId(), leaderId, request); + + assertLifeCycleState(LifeCycle.States.STARTING_OR_RUNNING); assertGroup(leaderId, leaderGroupId); @@ -1545,13 +1563,27 @@ private InstallSnapshotReplyProto notifyStateMachineToInstallSnapshot( return reply; } + Optional leaderPeerInfo = null; + if (request.hasLastRaftConfigurationLogEntryProto()) { + List peerList = request.getLastRaftConfigurationLogEntryProto().getConfigurationEntry() + .getPeersList(); + leaderPeerInfo = peerList.stream().filter(p -> RaftPeerId.valueOf(p.getId()).equals(leaderId)).findFirst(); + Preconditions.assertTrue(leaderPeerInfo.isPresent()); + } + + // For the cases where RaftConf is empty on newly started peer with + // empty peer list, we retrieve leader info from + // installSnapShotRequestProto. + RoleInfoProto roleInfoProto = + getRaftConf().getPeer(state.getLeaderId()) == null ? + getRoleInfoProto(ProtoUtils.toRaftPeer(leaderPeerInfo.get())) : + getRoleInfoProto(); // This is the first installSnapshot notify request for this term and // index. Notify the state machine to install the snapshot. LOG.info("{}: notifyInstallSnapshot: nextIndex is {} but the leader's first available index is {}.", getMemberId(), state.getLog().getNextIndex(), firstAvailableLogIndex); - try { - stateMachine.followerEvent().notifyInstallSnapshotFromLeader(getRoleInfoProto(), firstAvailableLogTermIndex) + stateMachine.followerEvent().notifyInstallSnapshotFromLeader(roleInfoProto, firstAvailableLogTermIndex) .whenComplete((reply, exception) -> { if (exception != null) { LOG.warn("{}: Failed to notify StateMachine to InstallSnapshot. Exception: {}", @@ -1586,6 +1618,8 @@ private InstallSnapshotReplyProto notifyStateMachineToInstallSnapshot( } } + + void submitUpdateCommitEvent() { role.getLeaderState().ifPresent(LeaderStateImpl::submitUpdateCommitEvent); } diff --git a/ratis-server/src/test/java/org/apache/ratis/InstallSnapshotNotificationTests.java b/ratis-server/src/test/java/org/apache/ratis/InstallSnapshotNotificationTests.java index c690ea1933..675ee55272 100644 --- a/ratis-server/src/test/java/org/apache/ratis/InstallSnapshotNotificationTests.java +++ b/ratis-server/src/test/java/org/apache/ratis/InstallSnapshotNotificationTests.java @@ -175,7 +175,8 @@ private void testAddNewFollowers(CLUSTER cluster) throws Exception { Assert.assertTrue(set); // add two more peers - final MiniRaftCluster.PeerChanges change = cluster.addNewPeers(2, true); + final MiniRaftCluster.PeerChanges change = cluster.addNewPeers(2, true, + true); // trigger setConfiguration cluster.setConfiguration(change.allPeersInNewConf); @@ -317,7 +318,8 @@ private void testInstallSnapshotNotificationCount(CLUSTER cluster) throws Except } // Add two more peers who will need snapshots from the leader. - final MiniRaftCluster.PeerChanges change = cluster.addNewPeers(2, true); + final MiniRaftCluster.PeerChanges change = cluster.addNewPeers(2, true, + true); // trigger setConfiguration cluster.setConfiguration(change.allPeersInNewConf); RaftServerTestUtil diff --git a/ratis-server/src/test/java/org/apache/ratis/RaftExceptionBaseTest.java b/ratis-server/src/test/java/org/apache/ratis/RaftExceptionBaseTest.java index 5a519976fa..09b057b9a5 100644 --- a/ratis-server/src/test/java/org/apache/ratis/RaftExceptionBaseTest.java +++ b/ratis-server/src/test/java/org/apache/ratis/RaftExceptionBaseTest.java @@ -109,7 +109,8 @@ void runTestNotLeaderExceptionWithReconf(CLUSTER cluster) throws Exception { final RaftPeerId newLeader = RaftTestUtil.changeLeader(cluster, oldLeader); // add two more peers - MiniRaftCluster.PeerChanges change = cluster.addNewPeers(new String[]{"ss1", "ss2"}, true); + MiniRaftCluster.PeerChanges change = cluster.addNewPeers(new String[]{ + "ss1", "ss2"}, true, false); // trigger setConfiguration LOG.info("Start changing the configuration: {}", Arrays.asList(change.allPeersInNewConf)); try (final RaftClient c2 = cluster.createClient(newLeader)) { diff --git a/ratis-server/src/test/java/org/apache/ratis/server/impl/MiniRaftCluster.java b/ratis-server/src/test/java/org/apache/ratis/server/impl/MiniRaftCluster.java index 2d2c219e88..955bb42f58 100644 --- a/ratis-server/src/test/java/org/apache/ratis/server/impl/MiniRaftCluster.java +++ b/ratis-server/src/test/java/org/apache/ratis/server/impl/MiniRaftCluster.java @@ -283,7 +283,8 @@ public RaftProperties getProperties() { public MiniRaftCluster initServers() { LOG.info("servers = " + servers); if (servers.isEmpty()) { - putNewServers(CollectionUtils.as(group.getPeers(), RaftPeer::getId), true); + putNewServers(CollectionUtils.as(group.getPeers(), RaftPeer::getId), + true, false); } return this; } @@ -296,10 +297,18 @@ public RaftServerProxy putNewServer(RaftPeerId id, RaftGroup group, boolean form } private Collection putNewServers( - Iterable peers, boolean format) { - return StreamSupport.stream(peers.spliterator(), false) - .map(id -> putNewServer(id, group, format)) - .collect(Collectors.toList()); + Iterable peers, boolean format, boolean emptyPeer) { + if (emptyPeer) { + RaftGroup raftGroup = RaftGroup.valueOf(group.getGroupId(), + Collections.EMPTY_LIST); + return StreamSupport.stream(peers.spliterator(), false) + .map(id -> putNewServer(id, raftGroup, format)) + .collect(Collectors.toList()); + } else { + return StreamSupport.stream(peers.spliterator(), false) + .map(id -> putNewServer(id, group, format)) + .collect(Collectors.toList()); + } } public void start() throws IOException { @@ -337,7 +346,7 @@ public void restart(boolean format) throws IOException { List idList = new ArrayList<>(servers.keySet()); servers.clear(); - putNewServers(idList, format); + putNewServers(idList, format, false); start(); } @@ -406,15 +415,21 @@ private static List toRaftPeers(Iterable servers) { public PeerChanges addNewPeers(int number, boolean startNewPeer) throws IOException { - return addNewPeers(generateIds(number, servers.size()), startNewPeer); + return addNewPeers(generateIds(number, servers.size()), startNewPeer, false); + } + + public PeerChanges addNewPeers(int number, boolean startNewPeer, + boolean emptyPeer) throws IOException { + return addNewPeers(generateIds(number, servers.size()), startNewPeer, emptyPeer); } - public PeerChanges addNewPeers(String[] ids, boolean startNewPeer) throws IOException { + public PeerChanges addNewPeers(String[] ids, boolean startNewPeer, + boolean emptyPeer) throws IOException { LOG.info("Add new peers {}", Arrays.asList(ids)); // create and add new RaftServers final Collection newServers = putNewServers( - CollectionUtils.as(Arrays.asList(ids), RaftPeerId::valueOf), true); + CollectionUtils.as(Arrays.asList(ids), RaftPeerId::valueOf), true, emptyPeer); startServers(newServers); if (!startNewPeer) { diff --git a/ratis-server/src/test/java/org/apache/ratis/statemachine/RaftSnapshotBaseTest.java b/ratis-server/src/test/java/org/apache/ratis/statemachine/RaftSnapshotBaseTest.java index 714ff68979..9837fe3783 100644 --- a/ratis-server/src/test/java/org/apache/ratis/statemachine/RaftSnapshotBaseTest.java +++ b/ratis-server/src/test/java/org/apache/ratis/statemachine/RaftSnapshotBaseTest.java @@ -230,7 +230,7 @@ public void testBasicInstallSnapshot() throws Exception { // add two more peers String[] newPeers = new String[]{"s3", "s4"}; MiniRaftCluster.PeerChanges change = cluster.addNewPeers( - newPeers, true); + newPeers, true, false); // trigger setConfiguration cluster.setConfiguration(change.allPeersInNewConf); From a71d428451f6d40bbfe7e0dbeb4e2876bbced4a5 Mon Sep 17 00:00:00 2001 From: Bharat Viswanadham Date: Wed, 14 Apr 2021 21:16:30 +0530 Subject: [PATCH 2/7] fix unnecessary --- .../java/org/apache/ratis/server/impl/RaftServerImpl.java | 4 ---- 1 file changed, 4 deletions(-) diff --git a/ratis-server/src/main/java/org/apache/ratis/server/impl/RaftServerImpl.java b/ratis-server/src/main/java/org/apache/ratis/server/impl/RaftServerImpl.java index e8836ed4d7..9c1389affd 100644 --- a/ratis-server/src/main/java/org/apache/ratis/server/impl/RaftServerImpl.java +++ b/ratis-server/src/main/java/org/apache/ratis/server/impl/RaftServerImpl.java @@ -1434,8 +1434,6 @@ private InstallSnapshotReplyProto installSnapshotImpl(InstallSnapshotRequestProt CodeInjectionForTesting.execute(INSTALL_SNAPSHOT, getId(), leaderId, request); - - assertLifeCycleState(LifeCycle.States.STARTING_OR_RUNNING); assertGroup(leaderId, leaderGroupId); @@ -1618,8 +1616,6 @@ private InstallSnapshotReplyProto notifyStateMachineToInstallSnapshot( } } - - void submitUpdateCommitEvent() { role.getLeaderState().ifPresent(LeaderStateImpl::submitUpdateCommitEvent); } From d7ae85f8242695ddcee9b471659beb1320b2a345 Mon Sep 17 00:00:00 2001 From: Bharat Viswanadham Date: Mon, 19 Apr 2021 10:10:07 +0530 Subject: [PATCH 3/7] fix cs --- .../ratis/server/impl/RaftServerImpl.java | 25 +++++++++---------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/ratis-server/src/main/java/org/apache/ratis/server/impl/RaftServerImpl.java b/ratis-server/src/main/java/org/apache/ratis/server/impl/RaftServerImpl.java index 9c1389affd..ccead21dcd 100644 --- a/ratis-server/src/main/java/org/apache/ratis/server/impl/RaftServerImpl.java +++ b/ratis-server/src/main/java/org/apache/ratis/server/impl/RaftServerImpl.java @@ -513,19 +513,18 @@ GroupInfoReply getGroupInfo(GroupInfoRequest request) { } private RoleInfoProto getRoleInfoProto(RaftPeer leaderPeerInfo) { - RaftPeerRole currentRole = role.getCurrentRole(); - RoleInfoProto.Builder roleInfo = RoleInfoProto.newBuilder() - .setSelf(getPeer().getRaftPeerProto()) - .setRole(currentRole) - .setRoleElapsedTimeMs(role.getRoleElapsedTimeMs()); - final Optional fs = role.getFollowerState(); - final ServerRpcProto leaderInfo = - ServerProtoUtils.toServerRpcProto(leaderPeerInfo, - fs.map(FollowerState::getLastRpcTime).map(Timestamp::elapsedTimeMs).orElse(0L)); - roleInfo.setFollowerInfo(FollowerInfoProto.newBuilder() - .setLeaderInfo(leaderInfo) - .setOutstandingOp(fs.map(FollowerState::getOutstandingOp).orElse(0))); - return roleInfo.build(); + RaftPeerRole currentRole = role.getCurrentRole(); + RoleInfoProto.Builder roleInfo = RoleInfoProto.newBuilder() + .setSelf(getPeer().getRaftPeerProto()) + .setRole(currentRole) + .setRoleElapsedTimeMs(role.getRoleElapsedTimeMs()); + final Optional fs = role.getFollowerState(); + final ServerRpcProto leaderInfo = + ServerProtoUtils.toServerRpcProto(leaderPeerInfo, + fs.map(FollowerState::getLastRpcTime).map(Timestamp::elapsedTimeMs).orElse(0L)); + roleInfo.setFollowerInfo(FollowerInfoProto.newBuilder().setLeaderInfo(leaderInfo) + .setOutstandingOp(fs.map(FollowerState::getOutstandingOp).orElse(0))); + return roleInfo.build(); } RoleInfoProto getRoleInfoProto() { From 6be88e14f92f3d70e038e1e01b49a83b78a0a601 Mon Sep 17 00:00:00 2001 From: Bharat Viswanadham Date: Mon, 19 Apr 2021 12:13:53 +0530 Subject: [PATCH 4/7] Trigger CI From d5ee3c2022e3fc98741af9341cc807c9c2a0b1d7 Mon Sep 17 00:00:00 2001 From: Bharat Viswanadham Date: Mon, 19 Apr 2021 14:03:33 +0530 Subject: [PATCH 5/7] run test with prevote disable --- .../main/java/org/apache/ratis/server/RaftServerConfigKeys.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ratis-server-api/src/main/java/org/apache/ratis/server/RaftServerConfigKeys.java b/ratis-server-api/src/main/java/org/apache/ratis/server/RaftServerConfigKeys.java index 9d5da7df62..a4d5d1c93a 100644 --- a/ratis-server-api/src/main/java/org/apache/ratis/server/RaftServerConfigKeys.java +++ b/ratis-server-api/src/main/java/org/apache/ratis/server/RaftServerConfigKeys.java @@ -557,7 +557,7 @@ static void setLeaderStepDownWaitTime(RaftProperties properties, TimeDuration no } String PRE_VOTE_KEY = PREFIX + ".pre-vote"; - boolean PRE_VOTE_DEFAULT = true; + boolean PRE_VOTE_DEFAULT = false; static boolean preVote(RaftProperties properties) { return getBoolean(properties::getBoolean, PRE_VOTE_KEY, PRE_VOTE_DEFAULT, getDefaultLog()); } From ab3eb728b713729f03a7672bc97f0eba78521f1c Mon Sep 17 00:00:00 2001 From: Bharat Viswanadham Date: Mon, 19 Apr 2021 15:16:31 +0530 Subject: [PATCH 6/7] Revert "run test with prevote disable" This reverts commit d5ee3c2022e3fc98741af9341cc807c9c2a0b1d7. --- .../main/java/org/apache/ratis/server/RaftServerConfigKeys.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ratis-server-api/src/main/java/org/apache/ratis/server/RaftServerConfigKeys.java b/ratis-server-api/src/main/java/org/apache/ratis/server/RaftServerConfigKeys.java index a4d5d1c93a..9d5da7df62 100644 --- a/ratis-server-api/src/main/java/org/apache/ratis/server/RaftServerConfigKeys.java +++ b/ratis-server-api/src/main/java/org/apache/ratis/server/RaftServerConfigKeys.java @@ -557,7 +557,7 @@ static void setLeaderStepDownWaitTime(RaftProperties properties, TimeDuration no } String PRE_VOTE_KEY = PREFIX + ".pre-vote"; - boolean PRE_VOTE_DEFAULT = false; + boolean PRE_VOTE_DEFAULT = true; static boolean preVote(RaftProperties properties) { return getBoolean(properties::getBoolean, PRE_VOTE_KEY, PRE_VOTE_DEFAULT, getDefaultLog()); } From 8f16dacb6aee3637970418aeac958dcfd9295c94 Mon Sep 17 00:00:00 2001 From: Bharat Viswanadham Date: Mon, 19 Apr 2021 15:22:06 +0530 Subject: [PATCH 7/7] add wait for for success check --- .../ratis/server/impl/RaftReconfigurationBaseTest.java | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/ratis-server/src/test/java/org/apache/ratis/server/impl/RaftReconfigurationBaseTest.java b/ratis-server/src/test/java/org/apache/ratis/server/impl/RaftReconfigurationBaseTest.java index 6e7522435d..22b60daabf 100644 --- a/ratis-server/src/test/java/org/apache/ratis/server/impl/RaftReconfigurationBaseTest.java +++ b/ratis-server/src/test/java/org/apache/ratis/server/impl/RaftReconfigurationBaseTest.java @@ -361,7 +361,8 @@ void runTestBootstrapReconf(int numNewPeer, boolean startNewPeer, CLUSTER cluste } FIVE_SECONDS.sleep(); LOG.info(cluster.printServers()); - assertSuccess(success); + + RaftTestUtil.waitFor(() -> success.get(), 300, 15000); final RaftLog leaderLog = cluster.getLeader().getRaftLog(); for (RaftPeer newPeer : c1.newPeers) { @@ -452,12 +453,6 @@ void runTestKillLeaderDuringReconf(CLUSTER cluster) throws Exception { } } - static void assertSuccess(final AtomicReference success) { - final String s = "success=" + success; - Assert.assertNotNull(s, success.get()); - Assert.assertTrue(s, success.get()); - } - /** * When a request's new configuration is the same with the current one, make * sure we return success immediately and no log entry is recorded.