-
Notifications
You must be signed in to change notification settings - Fork 7.3k
ZOOKEEPER-4508: Fix endless-loop in ClientCnxn.SendThread.run when all zk servers down #1847
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||
|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -164,6 +164,8 @@ static class AuthData { | |||||||||
|
|
||||||||||
| private int readTimeout; | ||||||||||
|
|
||||||||||
| private int expirationTimeout; | ||||||||||
|
|
||||||||||
| private final int sessionTimeout; | ||||||||||
|
|
||||||||||
| private final ZKWatchManager watchManager; | ||||||||||
|
|
@@ -418,6 +420,7 @@ public ClientCnxn( | |||||||||
|
|
||||||||||
| this.connectTimeout = sessionTimeout / hostProvider.size(); | ||||||||||
| this.readTimeout = sessionTimeout * 2 / 3; | ||||||||||
| this.expirationTimeout = sessionTimeout * 4 / 3; | ||||||||||
|
|
||||||||||
| this.sendThread = new SendThread(clientCnxnSocket); | ||||||||||
| this.eventThread = new EventThread(); | ||||||||||
|
|
@@ -814,6 +817,12 @@ public String toString() { | |||||||||
|
|
||||||||||
| } | ||||||||||
|
|
||||||||||
| private static class ConnectionTimeoutException extends IOException { | ||||||||||
| public ConnectionTimeoutException(String message) { | ||||||||||
| super(message); | ||||||||||
| } | ||||||||||
| } | ||||||||||
|
|
||||||||||
| private static class SessionTimeoutException extends IOException { | ||||||||||
|
|
||||||||||
| private static final long serialVersionUID = 824482094072071178L; | ||||||||||
|
|
@@ -1192,7 +1201,6 @@ public void run() { | |||||||||
| startConnect(serverAddress); | ||||||||||
| // Update now to start the connection timer right after we make a connection attempt | ||||||||||
| clientCnxnSocket.updateNow(); | ||||||||||
| clientCnxnSocket.updateLastSendAndHeard(); | ||||||||||
| } | ||||||||||
|
|
||||||||||
| if (state.isConnected()) { | ||||||||||
|
|
@@ -1233,13 +1241,20 @@ public void run() { | |||||||||
| to = connectTimeout - clientCnxnSocket.getIdleRecv(); | ||||||||||
| } | ||||||||||
|
|
||||||||||
| if (to <= 0) { | ||||||||||
| if (expirationTimeout - clientCnxnSocket.getIdleRecv() <= 0) { | ||||||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. THREAD_SAFETY_VIOLATION: Read/Write race. Non-private method ℹ️ Expand to see all @sonatype-lift commandsYou can reply with the following commands. For example, reply with @sonatype-lift ignoreall to leave out all findings.
Note: When talking to LiftBot, you need to refresh the page to see its response. |
||||||||||
| String warnInfo = String.format( | ||||||||||
| "Client session timed out, have not heard from server in %dms for session id 0x%s", | ||||||||||
| clientCnxnSocket.getIdleRecv(), | ||||||||||
| Long.toHexString(sessionId)); | ||||||||||
| LOG.warn(warnInfo); | ||||||||||
| changeZkState(States.CLOSED); | ||||||||||
kezhuw marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||
| throw new SessionTimeoutException(warnInfo); | ||||||||||
| } else if (to <= 0) { | ||||||||||
| String warnInfo = String.format( | ||||||||||
| "Client connection timed out, have not heard from server in %dms for session id 0x%s", | ||||||||||
| clientCnxnSocket.getIdleRecv(), | ||||||||||
| Long.toHexString(sessionId)); | ||||||||||
| throw new ConnectionTimeoutException(warnInfo); | ||||||||||
| } | ||||||||||
| if (state.isConnected()) { | ||||||||||
| //1000(1 second) is to prevent race condition missing to send the second ping | ||||||||||
|
|
@@ -1284,7 +1299,7 @@ public void run() { | |||||||||
| } else { | ||||||||||
| LOG.warn( | ||||||||||
| "Session 0x{} for server {}, Closing socket connection. " | ||||||||||
| + "Attempting reconnect except it is a SessionExpiredException.", | ||||||||||
| + "Attempting reconnect except it is a SessionExpiredException or SessionTimeoutException.", | ||||||||||
| Long.toHexString(getSessionId()), | ||||||||||
| serverAddress, | ||||||||||
| e); | ||||||||||
|
|
@@ -1305,7 +1320,12 @@ public void run() { | |||||||||
| if (state.isAlive()) { | ||||||||||
| eventThread.queueEvent(new WatchedEvent(Event.EventType.None, Event.KeeperState.Disconnected, null)); | ||||||||||
| } | ||||||||||
| eventThread.queueEvent(new WatchedEvent(Event.EventType.None, Event.KeeperState.Closed, null)); | ||||||||||
| if (closing) { | ||||||||||
| eventThread.queueEvent(new WatchedEvent(Event.EventType.None, KeeperState.Closed, null)); | ||||||||||
| } else if (state == States.CLOSED) { | ||||||||||
| eventThread.queueEvent(new WatchedEvent(Event.EventType.None, KeeperState.Expired, null)); | ||||||||||
| } | ||||||||||
| eventThread.queueEventOfDeath(); | ||||||||||
|
|
||||||||||
| if (zooKeeperSaslClient != null) { | ||||||||||
| zooKeeperSaslClient.shutdown(); | ||||||||||
|
|
@@ -1322,7 +1342,6 @@ private void cleanAndNotifyState() { | |||||||||
| eventThread.queueEvent(new WatchedEvent(Event.EventType.None, Event.KeeperState.Disconnected, null)); | ||||||||||
| } | ||||||||||
| clientCnxnSocket.updateNow(); | ||||||||||
| clientCnxnSocket.updateLastSendAndHeard(); | ||||||||||
| } | ||||||||||
|
|
||||||||||
| private void pingRwServer() throws RWServerFoundException { | ||||||||||
|
|
@@ -1422,6 +1441,7 @@ void onConnected( | |||||||||
| } | ||||||||||
|
|
||||||||||
| readTimeout = negotiatedSessionTimeout * 2 / 3; | ||||||||||
| expirationTimeout = negotiatedSessionTimeout * 4 / 3; | ||||||||||
| connectTimeout = negotiatedSessionTimeout / hostProvider.size(); | ||||||||||
| hostProvider.onConnected(); | ||||||||||
| sessionId = _sessionId; | ||||||||||
|
|
||||||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
why are you removing updateLastSendAndHeard ? (here and there)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Semantically, it is because we are not heard(
lastHeard) anything here and there. If we updatelastHeardin these two places, thengetIdleRecvwill be reset to 0 in every re-connect which will cause noSessionTimeoutException.For
lastSend, I think it does not matter as it is only used forpinginCONNECTEDstate after successfulConnectRequestwhich willupdateLastSend. I don't see a reason forupdateLastSendin these two place.