From 0c6fd95d47ec0c8d399034dab648114e43363939 Mon Sep 17 00:00:00 2001 From: birdstorm Date: Wed, 3 Nov 2021 17:12:16 +0800 Subject: [PATCH 1/2] add metrics for callWithRetry & doBackOff Signed-off-by: birdstorm --- pom.xml | 2 +- .../org/tikv/common/policy/RetryPolicy.java | 64 +++++++++++------- .../tikv/common/util/ConcreteBackOffer.java | 67 +++++++++++-------- 3 files changed, 79 insertions(+), 54 deletions(-) diff --git a/pom.xml b/pom.xml index 412c80c3e00..210b59e2554 100644 --- a/pom.xml +++ b/pom.xml @@ -402,7 +402,7 @@ jar - -Xdoclint:none + -Xdoclint:none diff --git a/src/main/java/org/tikv/common/policy/RetryPolicy.java b/src/main/java/org/tikv/common/policy/RetryPolicy.java index 27d8558c7b9..73cb9dc0e93 100644 --- a/src/main/java/org/tikv/common/policy/RetryPolicy.java +++ b/src/main/java/org/tikv/common/policy/RetryPolicy.java @@ -33,6 +33,12 @@ public abstract class RetryPolicy { .help("grpc request latency.") .labelNames("type") .register(); + public static final Histogram CALL_WITH_RETRY_DURATION = + Histogram.build() + .name("client_java_call_with_retry_duration") + .help("callWithRetry duration.") + .labelNames("type") + .register(); public static final Counter GRPC_REQUEST_RETRY_NUM = Counter.build() .name("client_java_grpc_requests_retry_num") @@ -62,37 +68,43 @@ private void rethrowNotRecoverableException(Exception e) { } public RespT callWithRetry(Callable proc, String methodName) { - while (true) { - RespT result = null; - try { - // add single request duration histogram - Histogram.Timer requestTimer = GRPC_SINGLE_REQUEST_LATENCY.labels(methodName).startTimer(); + Histogram.Timer callWithRetryTimer = CALL_WITH_RETRY_DURATION.labels(methodName).startTimer(); + try { + while (true) { + RespT result = null; try { - result = proc.call(); - } finally { - requestTimer.observeDuration(); + // add single request duration histogram + Histogram.Timer requestTimer = + GRPC_SINGLE_REQUEST_LATENCY.labels(methodName).startTimer(); + try { + result = proc.call(); + } finally { + requestTimer.observeDuration(); + } + } catch (Exception e) { + rethrowNotRecoverableException(e); + // Handle request call error + boolean retry = handler.handleRequestError(backOffer, e); + if (retry) { + GRPC_REQUEST_RETRY_NUM.labels(methodName).inc(); + continue; + } else { + return result; + } } - } catch (Exception e) { - rethrowNotRecoverableException(e); - // Handle request call error - boolean retry = handler.handleRequestError(backOffer, e); - if (retry) { - GRPC_REQUEST_RETRY_NUM.labels(methodName).inc(); - continue; - } else { - return result; - } - } - // Handle response error - if (handler != null) { - boolean retry = handler.handleResponseError(backOffer, result); - if (retry) { - GRPC_REQUEST_RETRY_NUM.labels(methodName).inc(); - continue; + // Handle response error + if (handler != null) { + boolean retry = handler.handleResponseError(backOffer, result); + if (retry) { + GRPC_REQUEST_RETRY_NUM.labels(methodName).inc(); + continue; + } } + return result; } - return result; + } finally { + callWithRetryTimer.observeDuration(); } } diff --git a/src/main/java/org/tikv/common/util/ConcreteBackOffer.java b/src/main/java/org/tikv/common/util/ConcreteBackOffer.java index 9950e5a0288..1b082319f7a 100644 --- a/src/main/java/org/tikv/common/util/ConcreteBackOffer.java +++ b/src/main/java/org/tikv/common/util/ConcreteBackOffer.java @@ -20,6 +20,7 @@ import static org.tikv.common.ConfigUtils.TIKV_BO_REGION_MISS_BASE_IN_MS; import com.google.common.base.Preconditions; +import io.prometheus.client.Histogram; import java.util.ArrayList; import java.util.HashMap; import java.util.List; @@ -35,7 +36,14 @@ public class ConcreteBackOffer implements BackOffer { private final Map backOffFunctionMap; private final List errors; private int totalSleep; - private long deadline; + private final long deadline; + + public static final Histogram BACKOFF_DURATION = + Histogram.build() + .name("client_java_backoff_duration") + .help("backoff duration.") + .labelNames("type") + .register(); private ConcreteBackOffer(int maxSleep, long deadline) { Preconditions.checkArgument( @@ -140,35 +148,40 @@ public void doBackOff(BackOffFunction.BackOffFuncType funcType, Exception err) { @Override public void doBackOffWithMaxSleep( BackOffFunction.BackOffFuncType funcType, long maxSleepMs, Exception err) { - BackOffFunction backOffFunction = - backOffFunctionMap.computeIfAbsent(funcType, this::createBackOffFunc); - - // Back off will not be done here - long sleep = backOffFunction.getSleepMs(maxSleepMs); - totalSleep += sleep; - - logger.debug( - String.format( - "%s, retry later(totalSleep %dms, maxSleep %dms)", - err.getMessage(), totalSleep, maxSleep)); - errors.add(err); - - // Check deadline - if (deadline > 0) { - long currentMs = System.currentTimeMillis(); - if (currentMs + sleep >= deadline) { - logThrowError(String.format("Deadline %d is exceeded, errors:", deadline), err); + Histogram.Timer backOffTimer = BACKOFF_DURATION.labels(funcType.name()).startTimer(); + try { + BackOffFunction backOffFunction = + backOffFunctionMap.computeIfAbsent(funcType, this::createBackOffFunc); + + // Back off will not be done here + long sleep = backOffFunction.getSleepMs(maxSleepMs); + totalSleep += sleep; + + logger.debug( + String.format( + "%s, retry later(totalSleep %dms, maxSleep %dms)", + err.getMessage(), totalSleep, maxSleep)); + errors.add(err); + + // Check deadline + if (deadline > 0) { + long currentMs = System.currentTimeMillis(); + if (currentMs + sleep >= deadline) { + logThrowError(String.format("Deadline %d is exceeded, errors:", deadline), err); + } } - } - try { - Thread.sleep(sleep); - } catch (InterruptedException e) { - throw new GrpcException(e); - } + try { + Thread.sleep(sleep); + } catch (InterruptedException e) { + throw new GrpcException(e); + } - if (maxSleep > 0 && totalSleep >= maxSleep) { - logThrowError(String.format("BackOffer.maxSleep %dms is exceeded, errors:", maxSleep), err); + if (maxSleep > 0 && totalSleep >= maxSleep) { + logThrowError(String.format("BackOffer.maxSleep %dms is exceeded, errors:", maxSleep), err); + } + } finally { + backOffTimer.observeDuration(); } } From 123b4237f0e4c11c634aacf235c62255c7fceb2c Mon Sep 17 00:00:00 2001 From: birdstorm Date: Wed, 3 Nov 2021 17:27:36 +0800 Subject: [PATCH 2/2] revert Signed-off-by: birdstorm --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 210b59e2554..412c80c3e00 100644 --- a/pom.xml +++ b/pom.xml @@ -402,7 +402,7 @@ jar - -Xdoclint:none + -Xdoclint:none