From c05b654847543db42d04df24e849e00df7dbb9b5 Mon Sep 17 00:00:00 2001 From: Little-Wallace Date: Thu, 4 Nov 2021 14:14:41 +0800 Subject: [PATCH 1/6] fix backoff error Signed-off-by: Little-Wallace --- .../common/operation/RegionErrorHandler.java | 14 ++++--- .../region/AbstractRegionStoreClient.java | 13 ++++--- .../java/org/tikv/common/util/BackOffer.java | 2 + .../tikv/common/util/ConcreteBackOffer.java | 39 ++++++++++++------- 4 files changed, 43 insertions(+), 25 deletions(-) diff --git a/src/main/java/org/tikv/common/operation/RegionErrorHandler.java b/src/main/java/org/tikv/common/operation/RegionErrorHandler.java index 2e4ab56b041..c967179c977 100644 --- a/src/main/java/org/tikv/common/operation/RegionErrorHandler.java +++ b/src/main/java/org/tikv/common/operation/RegionErrorHandler.java @@ -224,15 +224,19 @@ private boolean onRegionEpochNotMatch(BackOffer backOffer, List c @Override public boolean handleRequestError(BackOffer backOffer, Exception e) { if (recv.onStoreUnreachable()) { - backOffer.doBackOff(BackOffFunction.BackOffFuncType.BoTiKVRPC, e); + if (!backOffer.canRetryAfterSleep(BackOffFunction.BackOffFuncType.BoTiKVRPC)) { + regionManager.onRequestFail(recv.getRegion()); + throw new GrpcException("retry is exhausted.", e); + } return true; } logger.warn("request failed because of: " + e.getMessage()); - backOffer.doBackOff( - BackOffFunction.BackOffFuncType.BoTiKVRPC, - new GrpcException( - "send tikv request error: " + e.getMessage() + ", try next peer later", e)); + if (!backOffer.canRetryAfterSleep(BackOffFunction.BackOffFuncType.BoTiKVRPC)) { + regionManager.onRequestFail(recv.getRegion()); + throw new GrpcException( + "send tikv request error: " + e.getMessage() + ", try next peer later", e); + } // TiKV maybe down, so do not retry in `callWithRetry` // should re-fetch the new leader from PD and send request to it return false; diff --git a/src/main/java/org/tikv/common/region/AbstractRegionStoreClient.java b/src/main/java/org/tikv/common/region/AbstractRegionStoreClient.java index c47bb854746..b28b08da375 100644 --- a/src/main/java/org/tikv/common/region/AbstractRegionStoreClient.java +++ b/src/main/java/org/tikv/common/region/AbstractRegionStoreClient.java @@ -143,8 +143,8 @@ public boolean onStoreUnreachable() { // so that we can // reduce the latency cost by fail requests. if (targetStore.canForwardFirst()) { - if (conf.getEnableGrpcForward() && retryForwardTimes <= region.getFollowerList().size()) { - return retryOtherStoreByProxyForward(); + if (retryOtherStoreByProxyForward()) { + return true; } if (retryOtherStoreLeader()) { return true; @@ -153,12 +153,10 @@ public boolean onStoreUnreachable() { if (retryOtherStoreLeader()) { return true; } - if (conf.getEnableGrpcForward() && retryForwardTimes <= region.getFollowerList().size()) { - return retryOtherStoreByProxyForward(); + if (retryOtherStoreByProxyForward()) { + return true; } - return true; } - logger.warn( String.format( "retry time exceed for region[%d], invalid this region[%d]", @@ -262,6 +260,9 @@ private void updateClientStub() { } private boolean retryOtherStoreByProxyForward() { + if (!conf.getEnableGrpcForward() && retryForwardTimes > region.getFollowerList().size()) { + return false; + } TiStore proxyStore = switchProxyStore(); if (proxyStore == null) { logger.warn( diff --git a/src/main/java/org/tikv/common/util/BackOffer.java b/src/main/java/org/tikv/common/util/BackOffer.java index 951700008f2..5e150094b0e 100644 --- a/src/main/java/org/tikv/common/util/BackOffer.java +++ b/src/main/java/org/tikv/common/util/BackOffer.java @@ -34,6 +34,8 @@ public interface BackOffer { */ void doBackOff(BackOffFunction.BackOffFuncType funcType, Exception err); + boolean canRetryAfterSleep(BackOffFunction.BackOffFuncType funcType); + /** * BackoffWithMaxSleep sleeps a while base on the backoffType and records the error message and * never sleep more than maxSleepMs for each sleep. diff --git a/src/main/java/org/tikv/common/util/ConcreteBackOffer.java b/src/main/java/org/tikv/common/util/ConcreteBackOffer.java index 9950e5a0288..9a480e04d0d 100644 --- a/src/main/java/org/tikv/common/util/ConcreteBackOffer.java +++ b/src/main/java/org/tikv/common/util/ConcreteBackOffer.java @@ -138,26 +138,23 @@ public void doBackOff(BackOffFunction.BackOffFuncType funcType, Exception err) { } @Override - public void doBackOffWithMaxSleep( - BackOffFunction.BackOffFuncType funcType, long maxSleepMs, Exception err) { + public boolean canRetryAfterSleep(BackOffFunction.BackOffFuncType funcType) { + return canRetryAfterSleep(funcType, -1); + } + + public boolean canRetryAfterSleep(BackOffFunction.BackOffFuncType funcType, long maxSleepMs) { BackOffFunction backOffFunction = backOffFunctionMap.computeIfAbsent(funcType, this::createBackOffFunc); // Back off will not be done here long sleep = backOffFunction.getSleepMs(maxSleepMs); totalSleep += sleep; - - logger.debug( - String.format( - "%s, retry later(totalSleep %dms, maxSleep %dms)", - err.getMessage(), totalSleep, maxSleep)); - errors.add(err); - // Check deadline if (deadline > 0) { long currentMs = System.currentTimeMillis(); if (currentMs + sleep >= deadline) { - logThrowError(String.format("Deadline %d is exceeded, errors:", deadline), err); + logger.warn(String.format("Deadline %d is exceeded, errors:", deadline)); + return false; } } @@ -166,14 +163,28 @@ public void doBackOffWithMaxSleep( } catch (InterruptedException e) { throw new GrpcException(e); } - if (maxSleep > 0 && totalSleep >= maxSleep) { - logThrowError(String.format("BackOffer.maxSleep %dms is exceeded, errors:", maxSleep), err); + logger.warn(String.format("BackOffer.maxSleep %dms is exceeded, errors:", maxSleep)); + return false; + } + return true; + } + + @Override + public void doBackOffWithMaxSleep( + BackOffFunction.BackOffFuncType funcType, long maxSleepMs, Exception err) { + logger.debug( + String.format( + "%s, retry later(totalSleep %dms, maxSleep %dms)", + err.getMessage(), totalSleep, maxSleep)); + errors.add(err); + if (!canRetryAfterSleep(funcType, maxSleepMs)) { + logThrowError(err); } } - private void logThrowError(String msg, Exception err) { - StringBuilder errMsg = new StringBuilder(msg); + private void logThrowError(Exception err) { + StringBuilder errMsg = new StringBuilder(); for (int i = 0; i < errors.size(); i++) { Exception curErr = errors.get(i); // Print only last 3 errors for non-DEBUG log levels. From 6e66b9332770fd20d268e3237e29900100412415 Mon Sep 17 00:00:00 2001 From: Little-Wallace Date: Thu, 4 Nov 2021 14:28:50 +0800 Subject: [PATCH 2/6] add backoff timer Signed-off-by: Little-Wallace --- src/main/java/org/tikv/common/util/ConcreteBackOffer.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/main/java/org/tikv/common/util/ConcreteBackOffer.java b/src/main/java/org/tikv/common/util/ConcreteBackOffer.java index 82804ccb53a..173109a1cbc 100644 --- a/src/main/java/org/tikv/common/util/ConcreteBackOffer.java +++ b/src/main/java/org/tikv/common/util/ConcreteBackOffer.java @@ -151,6 +151,7 @@ public boolean canRetryAfterSleep(BackOffFunction.BackOffFuncType funcType) { } public boolean canRetryAfterSleep(BackOffFunction.BackOffFuncType funcType, long maxSleepMs) { + Histogram.Timer backOffTimer = BACKOFF_DURATION.labels(funcType.name()).startTimer(); BackOffFunction backOffFunction = backOffFunctionMap.computeIfAbsent(funcType, this::createBackOffFunc); @@ -171,6 +172,7 @@ public boolean canRetryAfterSleep(BackOffFunction.BackOffFuncType funcType, long } catch (InterruptedException e) { throw new GrpcException(e); } + backOffTimer.observeDuration(); if (maxSleep > 0 && totalSleep >= maxSleep) { logger.warn(String.format("BackOffer.maxSleep %dms is exceeded, errors:", maxSleep)); return false; From 6fb22718038dbddb1b578fe9df6dd69baf5f8544 Mon Sep 17 00:00:00 2001 From: Little-Wallace Date: Thu, 4 Nov 2021 15:12:00 +0800 Subject: [PATCH 3/6] address comment Signed-off-by: Little-Wallace --- .../java/org/tikv/common/region/AbstractRegionStoreClient.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/tikv/common/region/AbstractRegionStoreClient.java b/src/main/java/org/tikv/common/region/AbstractRegionStoreClient.java index b28b08da375..fcfd88cc50c 100644 --- a/src/main/java/org/tikv/common/region/AbstractRegionStoreClient.java +++ b/src/main/java/org/tikv/common/region/AbstractRegionStoreClient.java @@ -260,7 +260,7 @@ private void updateClientStub() { } private boolean retryOtherStoreByProxyForward() { - if (!conf.getEnableGrpcForward() && retryForwardTimes > region.getFollowerList().size()) { + if (!conf.getEnableGrpcForward() || retryForwardTimes > region.getFollowerList().size()) { return false; } TiStore proxyStore = switchProxyStore(); From 627b2cf5091e3ea9a047d73b89ae6039e2577877 Mon Sep 17 00:00:00 2001 From: Little-Wallace Date: Thu, 4 Nov 2021 15:14:53 +0800 Subject: [PATCH 4/6] add comment Signed-off-by: Little-Wallace --- src/main/java/org/tikv/common/util/BackOffer.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/main/java/org/tikv/common/util/BackOffer.java b/src/main/java/org/tikv/common/util/BackOffer.java index 5e150094b0e..190f25cd681 100644 --- a/src/main/java/org/tikv/common/util/BackOffer.java +++ b/src/main/java/org/tikv/common/util/BackOffer.java @@ -33,7 +33,11 @@ public interface BackOffer { * max back off time exceeded and throw an exception to the caller. */ void doBackOff(BackOffFunction.BackOffFuncType funcType, Exception err); - + /** + * doBackOff sleeps a while base on the BackOffType and records the error message. Will stop until + * max back off time exceeded and throw an exception to the caller. It will return false if the + * total sleep time has exceed some limit condition. + */ boolean canRetryAfterSleep(BackOffFunction.BackOffFuncType funcType); /** From 2c29d5bf19ebc65468cec5394b2546f941259344 Mon Sep 17 00:00:00 2001 From: Little-Wallace Date: Thu, 4 Nov 2021 15:15:20 +0800 Subject: [PATCH 5/6] add comment Signed-off-by: Little-Wallace --- src/main/java/org/tikv/common/util/BackOffer.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/tikv/common/util/BackOffer.java b/src/main/java/org/tikv/common/util/BackOffer.java index 190f25cd681..6deb88028ab 100644 --- a/src/main/java/org/tikv/common/util/BackOffer.java +++ b/src/main/java/org/tikv/common/util/BackOffer.java @@ -34,8 +34,8 @@ public interface BackOffer { */ void doBackOff(BackOffFunction.BackOffFuncType funcType, Exception err); /** - * doBackOff sleeps a while base on the BackOffType and records the error message. Will stop until - * max back off time exceeded and throw an exception to the caller. It will return false if the + * canRetryAfterSleep sleeps a while base on the BackOffType and records the error message. Will stop + * until max back off time exceeded and throw an exception to the caller. It will return false if the * total sleep time has exceed some limit condition. */ boolean canRetryAfterSleep(BackOffFunction.BackOffFuncType funcType); From 2029400d725c36a90c197b48441fdbcfd96302ce Mon Sep 17 00:00:00 2001 From: Little-Wallace Date: Thu, 4 Nov 2021 15:27:25 +0800 Subject: [PATCH 6/6] fix format Signed-off-by: Little-Wallace --- .../java/org/tikv/common/operation/RegionErrorHandler.java | 2 +- src/main/java/org/tikv/common/util/BackOffer.java | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/main/java/org/tikv/common/operation/RegionErrorHandler.java b/src/main/java/org/tikv/common/operation/RegionErrorHandler.java index c967179c977..cec220440c3 100644 --- a/src/main/java/org/tikv/common/operation/RegionErrorHandler.java +++ b/src/main/java/org/tikv/common/operation/RegionErrorHandler.java @@ -235,7 +235,7 @@ public boolean handleRequestError(BackOffer backOffer, Exception e) { if (!backOffer.canRetryAfterSleep(BackOffFunction.BackOffFuncType.BoTiKVRPC)) { regionManager.onRequestFail(recv.getRegion()); throw new GrpcException( - "send tikv request error: " + e.getMessage() + ", try next peer later", e); + "send tikv request error: " + e.getMessage() + ", try next peer later", e); } // TiKV maybe down, so do not retry in `callWithRetry` // should re-fetch the new leader from PD and send request to it diff --git a/src/main/java/org/tikv/common/util/BackOffer.java b/src/main/java/org/tikv/common/util/BackOffer.java index 6deb88028ab..b5081a39b5c 100644 --- a/src/main/java/org/tikv/common/util/BackOffer.java +++ b/src/main/java/org/tikv/common/util/BackOffer.java @@ -34,9 +34,9 @@ public interface BackOffer { */ void doBackOff(BackOffFunction.BackOffFuncType funcType, Exception err); /** - * canRetryAfterSleep sleeps a while base on the BackOffType and records the error message. Will stop - * until max back off time exceeded and throw an exception to the caller. It will return false if the - * total sleep time has exceed some limit condition. + * canRetryAfterSleep sleeps a while base on the BackOffType and records the error message. Will + * stop until max back off time exceeded and throw an exception to the caller. It will return + * false if the total sleep time has exceed some limit condition. */ boolean canRetryAfterSleep(BackOffFunction.BackOffFuncType funcType);