From 7e7492ef3e17188862ef6458d9f3488803eca344 Mon Sep 17 00:00:00 2001 From: marsishandsome Date: Tue, 2 Nov 2021 12:23:06 +0800 Subject: [PATCH 1/3] add log for qps0 & fix EpochNotMatch Signed-off-by: marsishandsome --- .../common/operation/RegionErrorHandler.java | 61 +++++++++++++++++-- .../org/tikv/common/region/RegionCache.java | 11 ++++ .../org/tikv/common/region/RegionManager.java | 10 +++ .../java/org/tikv/common/region/TiRegion.java | 16 +++++ src/main/java/org/tikv/raw/RawKVClient.java | 3 + 5 files changed, 96 insertions(+), 5 deletions(-) diff --git a/src/main/java/org/tikv/common/operation/RegionErrorHandler.java b/src/main/java/org/tikv/common/operation/RegionErrorHandler.java index a463988ca96..285a2478516 100644 --- a/src/main/java/org/tikv/common/operation/RegionErrorHandler.java +++ b/src/main/java/org/tikv/common/operation/RegionErrorHandler.java @@ -3,17 +3,21 @@ import com.google.protobuf.ByteString; import io.grpc.Status; import io.grpc.StatusRuntimeException; +import java.util.ArrayList; +import java.util.List; import java.util.function.Function; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.tikv.common.codec.KeyUtils; import org.tikv.common.exception.GrpcException; +import org.tikv.common.exception.TiKVException; import org.tikv.common.region.RegionErrorReceiver; import org.tikv.common.region.RegionManager; import org.tikv.common.region.TiRegion; import org.tikv.common.util.BackOffFunction; import org.tikv.common.util.BackOffer; import org.tikv.kvproto.Errorpb; +import org.tikv.kvproto.Metapb; public class RegionErrorHandler implements ErrorHandler { private static final Logger logger = LoggerFactory.getLogger(RegionErrorHandler.class); @@ -115,11 +119,11 @@ public boolean handleRegionError(BackOffer backOffer, Errorpb.Error error) { // throwing it out. return false; } else if (error.hasEpochNotMatch()) { - // this error is reported from raftstore: - // region has outdated version,please try later. - logger.warn(String.format("Stale Epoch encountered for region [%s]", recv.getRegion())); - this.regionManager.onRegionStale(recv.getRegion()); - return false; + logger.warn( + String.format( + "tikv reports `EpochNotMatch` retry later, EpochNotMatch: %s", + error.getEpochNotMatch())); + return onRegionEpochNotMatch(backOffer, error.getEpochNotMatch().getCurrentRegionsList()); } else if (error.hasServerIsBusy()) { // this error is reported from kv: // will occur when write pressure is high. Please try later. @@ -171,6 +175,53 @@ public boolean handleRegionError(BackOffer backOffer, Errorpb.Error error) { return false; } + // OnRegionEpochNotMatch removes the old region and inserts new regions into the cache. + // It returns whether retries the request because it's possible the region epoch is ahead of + // TiKV's due to slow appling. + private boolean onRegionEpochNotMatch(BackOffer backOffer, List currentRegions) { + if (currentRegions.size() == 0) { + this.regionManager.onRegionStale(recv.getRegion()); + return false; + } + + // Find whether the region epoch in `ctx` is ahead of TiKV's. If so, backoff. + for (Metapb.Region meta : currentRegions) { + if (meta.getId() == recv.getRegion().getId() + && (meta.getRegionEpoch().getConfVer() < recv.getRegion().getVerID().getConfVer() + || meta.getRegionEpoch().getVersion() < recv.getRegion().getVerID().getVer())) { + String errorMsg = + String.format( + "region epoch is ahead of tikv, region: %s, currentRegions: %s", + recv.getRegion(), currentRegions); + logger.info(errorMsg); + backOffer.doBackOff( + BackOffFunction.BackOffFuncType.BoRegionMiss, new TiKVException(errorMsg)); + return true; + } + } + + boolean needInvalidateOld = true; + List newRegions = new ArrayList<>(currentRegions.size()); + // If the region epoch is not ahead of TiKV's, replace region meta in region cache. + for (Metapb.Region meta : currentRegions) { + TiRegion region = regionManager.createRegion(meta, backOffer); + newRegions.add(region); + if (recv.getRegion().getVerID() == region.getVerID()) { + needInvalidateOld = false; + } + } + + for (TiRegion region : newRegions) { + regionManager.insertRegionToCache(region); + } + + if (needInvalidateOld) { + this.regionManager.onRegionStale(recv.getRegion()); + } + + return false; + } + @Override public boolean handleRequestError(BackOffer backOffer, Exception e) { if (recv.onStoreUnreachable()) { diff --git a/src/main/java/org/tikv/common/region/RegionCache.java b/src/main/java/org/tikv/common/region/RegionCache.java index 28daf1d77d9..0aca1b454cf 100644 --- a/src/main/java/org/tikv/common/region/RegionCache.java +++ b/src/main/java/org/tikv/common/region/RegionCache.java @@ -92,6 +92,17 @@ public synchronized void invalidateRegion(TiRegion region) { } } + public synchronized void insertRegionToCache(TiRegion region) { + try { + TiRegion oldRegion = regionCache.get(region.getId()); + if (oldRegion != null) { + keyToRegionIdCache.remove(makeRange(oldRegion.getStartKey(), oldRegion.getEndKey())); + } + regionCache.put(region.getId(), region); + } catch (Exception ignore) { + } + } + public synchronized boolean updateRegion(TiRegion expected, TiRegion region) { try { if (logger.isDebugEnabled()) { diff --git a/src/main/java/org/tikv/common/region/RegionManager.java b/src/main/java/org/tikv/common/region/RegionManager.java index 7ab25ea8b74..46b0cab2cc4 100644 --- a/src/main/java/org/tikv/common/region/RegionManager.java +++ b/src/main/java/org/tikv/common/region/RegionManager.java @@ -181,6 +181,12 @@ public Pair getRegionStorePairByKey( return Pair.create(region, store); } + public TiRegion createRegion(Metapb.Region region, BackOffer backOffer) { + List peers = region.getPeersList(); + List stores = getRegionStore(peers, backOffer); + return new TiRegion(conf, region, null, peers, stores); + } + private TiRegion createRegion(Metapb.Region region, Metapb.Peer leader, BackOffer backOffer) { List peers = region.getPeersList(); List stores = getRegionStore(peers, backOffer); @@ -262,6 +268,10 @@ public void invalidateRegion(TiRegion region) { cache.invalidateRegion(region); } + public void insertRegionToCache(TiRegion region) { + cache.insertRegionToCache(region); + } + private BackOffer defaultBackOff() { return ConcreteBackOffer.newCustomBackOff(conf.getRawKVDefaultBackoffInMS()); } diff --git a/src/main/java/org/tikv/common/region/TiRegion.java b/src/main/java/org/tikv/common/region/TiRegion.java index b53557f9306..0bb934e4251 100644 --- a/src/main/java/org/tikv/common/region/TiRegion.java +++ b/src/main/java/org/tikv/common/region/TiRegion.java @@ -82,6 +82,10 @@ public TiRegion( replicaIdx = 0; } + public TiConfiguration getConf() { + return conf; + } + public Peer getLeader() { return leader; } @@ -271,6 +275,18 @@ public class RegionVerID { this.ver = ver; } + public long getId() { + return id; + } + + public long getConfVer() { + return confVer; + } + + public long getVer() { + return ver; + } + @Override public boolean equals(Object other) { if (this == other) { diff --git a/src/main/java/org/tikv/raw/RawKVClient.java b/src/main/java/org/tikv/raw/RawKVClient.java index fdd96600b96..74ac44b1f41 100644 --- a/src/main/java/org/tikv/raw/RawKVClient.java +++ b/src/main/java/org/tikv/raw/RawKVClient.java @@ -133,6 +133,7 @@ private void put(ByteString key, ByteString value, long ttl, boolean atomic) { BackOffer backOffer = ConcreteBackOffer.newDeadlineBackOff(conf.getRawKVWriteTimeoutInMS()); while (true) { RegionStoreClient client = clientBuilder.build(key, backOffer); + logger.warn("put region: " + client.getRegion().toString()); try { client.rawPut(backOffer, key, value, ttl, atomic); RAW_REQUEST_SUCCESS.labels(label).inc(); @@ -178,6 +179,7 @@ public ByteString putIfAbsent(ByteString key, ByteString value, long ttl) { BackOffer backOffer = ConcreteBackOffer.newDeadlineBackOff(conf.getRawKVWriteTimeoutInMS()); while (true) { RegionStoreClient client = clientBuilder.build(key, backOffer); + logger.warn("putIfAbsent region: " + client.getRegion().toString()); try { ByteString result = client.rawPutIfAbsent(backOffer, key, value, ttl); RAW_REQUEST_SUCCESS.labels(label).inc(); @@ -263,6 +265,7 @@ public ByteString get(ByteString key) { BackOffer backOffer = ConcreteBackOffer.newDeadlineBackOff(conf.getRawKVReadTimeoutInMS()); while (true) { RegionStoreClient client = clientBuilder.build(key, backOffer); + logger.warn("get region: " + client.getRegion().toString()); try { ByteString result = client.rawGet(backOffer, key); RAW_REQUEST_SUCCESS.labels(label).inc(); From e4d6603ac9b0fab11f9ae77fd84844fbe0b09e3f Mon Sep 17 00:00:00 2001 From: marsishandsome Date: Tue, 2 Nov 2021 18:02:35 +0800 Subject: [PATCH 2/3] move invalide before insertRegionToCache Signed-off-by: marsishandsome --- .../tikv/common/operation/RegionErrorHandler.java | 15 +++++++++------ .../org/tikv/common/region/RegionManager.java | 1 + 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/src/main/java/org/tikv/common/operation/RegionErrorHandler.java b/src/main/java/org/tikv/common/operation/RegionErrorHandler.java index 285a2478516..355348169e2 100644 --- a/src/main/java/org/tikv/common/operation/RegionErrorHandler.java +++ b/src/main/java/org/tikv/common/operation/RegionErrorHandler.java @@ -121,8 +121,8 @@ public boolean handleRegionError(BackOffer backOffer, Errorpb.Error error) { } else if (error.hasEpochNotMatch()) { logger.warn( String.format( - "tikv reports `EpochNotMatch` retry later, EpochNotMatch: %s", - error.getEpochNotMatch())); + "tikv reports `EpochNotMatch` retry later, region: %s, EpochNotMatch: %s", + recv.getRegion(), error.getEpochNotMatch())); return onRegionEpochNotMatch(backOffer, error.getEpochNotMatch().getCurrentRegionsList()); } else if (error.hasServerIsBusy()) { // this error is reported from kv: @@ -180,6 +180,7 @@ public boolean handleRegionError(BackOffer backOffer, Errorpb.Error error) { // TiKV's due to slow appling. private boolean onRegionEpochNotMatch(BackOffer backOffer, List currentRegions) { if (currentRegions.size() == 0) { + logger.warn("currentRegions.size() == 0"); this.regionManager.onRegionStale(recv.getRegion()); return false; } @@ -211,14 +212,16 @@ private boolean onRegionEpochNotMatch(BackOffer backOffer, List c } } - for (TiRegion region : newRegions) { - regionManager.insertRegionToCache(region); - } - if (needInvalidateOld) { + logger.warn("needInvalidateOld, region=" + recv.getRegion()); this.regionManager.onRegionStale(recv.getRegion()); } + for (TiRegion region : newRegions) { + logger.warn("insertRegionToCache, region=" + region); + regionManager.insertRegionToCache(region); + } + return false; } diff --git a/src/main/java/org/tikv/common/region/RegionManager.java b/src/main/java/org/tikv/common/region/RegionManager.java index 46b0cab2cc4..dcac93b0218 100644 --- a/src/main/java/org/tikv/common/region/RegionManager.java +++ b/src/main/java/org/tikv/common/region/RegionManager.java @@ -100,6 +100,7 @@ public TiRegion getRegionByKey(ByteString key, BackOffer backOffer) { if (region == null) { logger.debug("Key not found in keyToRegionIdCache:" + formatBytesUTF8(key)); Pair regionAndLeader = pdClient.getRegionByKey(backOffer, key); + logger.warn("getRegionByKey: regionAndLeader=" + regionAndLeader); region = cache.putRegion(createRegion(regionAndLeader.first, regionAndLeader.second, backOffer)); } From 09958fdd91f168f625709486afde9b34085fda0a Mon Sep 17 00:00:00 2001 From: marsishandsome Date: Tue, 2 Nov 2021 18:20:58 +0800 Subject: [PATCH 3/3] add missing keyToRegionIdCache Signed-off-by: marsishandsome --- .../java/org/tikv/common/operation/RegionErrorHandler.java | 4 +--- src/main/java/org/tikv/common/region/RegionCache.java | 1 + src/main/java/org/tikv/common/region/RegionManager.java | 1 - src/main/java/org/tikv/raw/RawKVClient.java | 3 --- 4 files changed, 2 insertions(+), 7 deletions(-) diff --git a/src/main/java/org/tikv/common/operation/RegionErrorHandler.java b/src/main/java/org/tikv/common/operation/RegionErrorHandler.java index 355348169e2..7c17c80e4b3 100644 --- a/src/main/java/org/tikv/common/operation/RegionErrorHandler.java +++ b/src/main/java/org/tikv/common/operation/RegionErrorHandler.java @@ -175,12 +175,12 @@ public boolean handleRegionError(BackOffer backOffer, Errorpb.Error error) { return false; } + // ref: https://github.com/tikv/client-go/blob/tidb-5.2/internal/locate/region_request.go#L985 // OnRegionEpochNotMatch removes the old region and inserts new regions into the cache. // It returns whether retries the request because it's possible the region epoch is ahead of // TiKV's due to slow appling. private boolean onRegionEpochNotMatch(BackOffer backOffer, List currentRegions) { if (currentRegions.size() == 0) { - logger.warn("currentRegions.size() == 0"); this.regionManager.onRegionStale(recv.getRegion()); return false; } @@ -213,12 +213,10 @@ private boolean onRegionEpochNotMatch(BackOffer backOffer, List c } if (needInvalidateOld) { - logger.warn("needInvalidateOld, region=" + recv.getRegion()); this.regionManager.onRegionStale(recv.getRegion()); } for (TiRegion region : newRegions) { - logger.warn("insertRegionToCache, region=" + region); regionManager.insertRegionToCache(region); } diff --git a/src/main/java/org/tikv/common/region/RegionCache.java b/src/main/java/org/tikv/common/region/RegionCache.java index 0aca1b454cf..540e8cbad88 100644 --- a/src/main/java/org/tikv/common/region/RegionCache.java +++ b/src/main/java/org/tikv/common/region/RegionCache.java @@ -99,6 +99,7 @@ public synchronized void insertRegionToCache(TiRegion region) { keyToRegionIdCache.remove(makeRange(oldRegion.getStartKey(), oldRegion.getEndKey())); } regionCache.put(region.getId(), region); + keyToRegionIdCache.put(makeRange(region.getStartKey(), region.getEndKey()), region.getId()); } catch (Exception ignore) { } } diff --git a/src/main/java/org/tikv/common/region/RegionManager.java b/src/main/java/org/tikv/common/region/RegionManager.java index dcac93b0218..46b0cab2cc4 100644 --- a/src/main/java/org/tikv/common/region/RegionManager.java +++ b/src/main/java/org/tikv/common/region/RegionManager.java @@ -100,7 +100,6 @@ public TiRegion getRegionByKey(ByteString key, BackOffer backOffer) { if (region == null) { logger.debug("Key not found in keyToRegionIdCache:" + formatBytesUTF8(key)); Pair regionAndLeader = pdClient.getRegionByKey(backOffer, key); - logger.warn("getRegionByKey: regionAndLeader=" + regionAndLeader); region = cache.putRegion(createRegion(regionAndLeader.first, regionAndLeader.second, backOffer)); } diff --git a/src/main/java/org/tikv/raw/RawKVClient.java b/src/main/java/org/tikv/raw/RawKVClient.java index 74ac44b1f41..fdd96600b96 100644 --- a/src/main/java/org/tikv/raw/RawKVClient.java +++ b/src/main/java/org/tikv/raw/RawKVClient.java @@ -133,7 +133,6 @@ private void put(ByteString key, ByteString value, long ttl, boolean atomic) { BackOffer backOffer = ConcreteBackOffer.newDeadlineBackOff(conf.getRawKVWriteTimeoutInMS()); while (true) { RegionStoreClient client = clientBuilder.build(key, backOffer); - logger.warn("put region: " + client.getRegion().toString()); try { client.rawPut(backOffer, key, value, ttl, atomic); RAW_REQUEST_SUCCESS.labels(label).inc(); @@ -179,7 +178,6 @@ public ByteString putIfAbsent(ByteString key, ByteString value, long ttl) { BackOffer backOffer = ConcreteBackOffer.newDeadlineBackOff(conf.getRawKVWriteTimeoutInMS()); while (true) { RegionStoreClient client = clientBuilder.build(key, backOffer); - logger.warn("putIfAbsent region: " + client.getRegion().toString()); try { ByteString result = client.rawPutIfAbsent(backOffer, key, value, ttl); RAW_REQUEST_SUCCESS.labels(label).inc(); @@ -265,7 +263,6 @@ public ByteString get(ByteString key) { BackOffer backOffer = ConcreteBackOffer.newDeadlineBackOff(conf.getRawKVReadTimeoutInMS()); while (true) { RegionStoreClient client = clientBuilder.build(key, backOffer); - logger.warn("get region: " + client.getRegion().toString()); try { ByteString result = client.rawGet(backOffer, key); RAW_REQUEST_SUCCESS.labels(label).inc();