From 457fb06e510c0b623e5e7bfc20502af4dec4d023 Mon Sep 17 00:00:00 2001 From: shiyuhang <1136742008@qq.com> Date: Wed, 24 May 2023 10:18:15 +0800 Subject: [PATCH 1/7] optimize getregionstore logical Signed-off-by: shiyuhang <1136742008@qq.com> --- .../org/tikv/common/region/RegionManager.java | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/src/main/java/org/tikv/common/region/RegionManager.java b/src/main/java/org/tikv/common/region/RegionManager.java index 129afaf9d5..ae992bb3af 100644 --- a/src/main/java/org/tikv/common/region/RegionManager.java +++ b/src/main/java/org/tikv/common/region/RegionManager.java @@ -177,6 +177,10 @@ public TiRegion getRegionByKey(ByteString key, BackOffer backOffer) { Pair regionAndLeader = pdClient.getRegionByKey(backOffer, key); region = cache.putRegion(createRegion(regionAndLeader.first, regionAndLeader.second, backOffer)); + logger.info( + String.format( + "get region id: %d with leader: %d", + region.getId(), region.getLeader().getStoreId())); } } catch (Exception e) { return null; @@ -235,6 +239,7 @@ public Pair getRegionStorePairByKey( store = getStoreById(peer.getStoreId(), backOffer); if (store.isReachable()) { // update replica's index + logger.info("Store {} is reachable, use it as TiStore", peer.getStoreId()); region.setReplicaIdx(i); break; } @@ -260,11 +265,13 @@ public Pair getRegionStorePairByKey( tiflashStores.get( Math.floorMod(tiflashStoreIndex.getAndIncrement(), tiflashStores.size())); } - - if (store == null) { - // clear the region cache, so we may get the learner peer next time - cache.invalidateRegion(region); - } + } + if (store == null || !store.isReachable()) { + // For TiFlash: clear the region cache, so we may get the learner peer next time + // For TiKV: clear the region cache and set store to null + logger.info("store is null or unreachable, clear region cache"); + store = null; + cache.invalidateRegion(region); } return Pair.create(region, store); } From 967cfa58c4d94834a219fd61cdd8d10369f2527a Mon Sep 17 00:00:00 2001 From: shiyuhang <1136742008@qq.com> Date: Thu, 25 May 2023 12:22:02 +0800 Subject: [PATCH 2/7] decrease impact Signed-off-by: shiyuhang <1136742008@qq.com> --- .../org/tikv/common/region/RegionManager.java | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/main/java/org/tikv/common/region/RegionManager.java b/src/main/java/org/tikv/common/region/RegionManager.java index ae992bb3af..741d723687 100644 --- a/src/main/java/org/tikv/common/region/RegionManager.java +++ b/src/main/java/org/tikv/common/region/RegionManager.java @@ -245,6 +245,10 @@ public Pair getRegionStorePairByKey( } logger.info("Store {} is unreachable, try to get the next replica", peer.getStoreId()); } + // Does not set unreachable store to null in case it is incompatible with GrpcForward + if (store == null || !store.isReachable()) { + logger.warn("No TiKV store available for region: " + region); + } } else { List tiflashStores = new ArrayList<>(); for (Peer peer : region.getLearnerList()) { @@ -265,13 +269,11 @@ public Pair getRegionStorePairByKey( tiflashStores.get( Math.floorMod(tiflashStoreIndex.getAndIncrement(), tiflashStores.size())); } - } - if (store == null || !store.isReachable()) { - // For TiFlash: clear the region cache, so we may get the learner peer next time - // For TiKV: clear the region cache and set store to null - logger.info("store is null or unreachable, clear region cache"); - store = null; - cache.invalidateRegion(region); + + if (store == null) { + // clear the region cache, so we may get the learner peer next time + cache.invalidateRegion(region); + } } return Pair.create(region, store); } From c9b9472a97dd7c627d58abbacb7377c958a10600 Mon Sep 17 00:00:00 2001 From: shi yuhang <52435083+shiyuhang0@users.noreply.github.com> Date: Thu, 25 May 2023 12:34:34 +0800 Subject: [PATCH 3/7] Update RegionManager.java Signed-off-by: shiyuhang <1136742008@qq.com> --- src/main/java/org/tikv/common/region/RegionManager.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/tikv/common/region/RegionManager.java b/src/main/java/org/tikv/common/region/RegionManager.java index 741d723687..22d89821d8 100644 --- a/src/main/java/org/tikv/common/region/RegionManager.java +++ b/src/main/java/org/tikv/common/region/RegionManager.java @@ -239,7 +239,7 @@ public Pair getRegionStorePairByKey( store = getStoreById(peer.getStoreId(), backOffer); if (store.isReachable()) { // update replica's index - logger.info("Store {} is reachable, use it as TiStore", peer.getStoreId()); + logger.info("Store {} is reachable, select it", peer.getStoreId()); region.setReplicaIdx(i); break; } From 35cd4e1c7414ac837540eebfd6fee5c170195a7a Mon Sep 17 00:00:00 2001 From: shi yuhang <52435083+shiyuhang0@users.noreply.github.com> Date: Thu, 25 May 2023 21:20:34 +0800 Subject: [PATCH 4/7] [close #749] Fix health checking issue (#748) Signed-off-by: shiyuhang <1136742008@qq.com> --- .../org/tikv/common/region/RegionManager.java | 7 ++--- .../common/region/StoreHealthyChecker.java | 30 +++++++++++++++++++ .../java/org/tikv/common/region/TiStore.java | 10 +++++++ 3 files changed, 42 insertions(+), 5 deletions(-) diff --git a/src/main/java/org/tikv/common/region/RegionManager.java b/src/main/java/org/tikv/common/region/RegionManager.java index 22d89821d8..31b629f1a0 100644 --- a/src/main/java/org/tikv/common/region/RegionManager.java +++ b/src/main/java/org/tikv/common/region/RegionManager.java @@ -256,11 +256,8 @@ public Pair getRegionStorePairByKey( if (!s.isReachable()) { continue; } - for (Metapb.StoreLabel label : s.getStore().getLabelsList()) { - if (label.getKey().equals(storeType.getLabelKey()) - && label.getValue().equals(storeType.getLabelValue())) { - tiflashStores.add(s); - } + if (s.isTiFlash()) { + tiflashStores.add(s); } } // select a tiflash with Round-Robin strategy diff --git a/src/main/java/org/tikv/common/region/StoreHealthyChecker.java b/src/main/java/org/tikv/common/region/StoreHealthyChecker.java index 8d305649c4..3ae3f40d1f 100644 --- a/src/main/java/org/tikv/common/region/StoreHealthyChecker.java +++ b/src/main/java/org/tikv/common/region/StoreHealthyChecker.java @@ -20,17 +20,22 @@ import io.grpc.health.v1.HealthCheckRequest; import io.grpc.health.v1.HealthCheckResponse; import io.grpc.health.v1.HealthGrpc; +import io.grpc.stub.ClientCalls; import java.util.LinkedList; import java.util.List; import java.util.concurrent.BlockingQueue; import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.TimeUnit; +import java.util.function.Supplier; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.tikv.common.ReadOnlyPDClient; import org.tikv.common.util.ChannelFactory; import org.tikv.common.util.ConcreteBackOffer; import org.tikv.kvproto.Metapb; +import org.tikv.kvproto.Mpp; +import org.tikv.kvproto.Mpp.IsAliveRequest; +import org.tikv.kvproto.TikvGrpc; public class StoreHealthyChecker implements Runnable { private static final Logger logger = LoggerFactory.getLogger(StoreHealthyChecker.class); @@ -75,6 +80,30 @@ private List getValidStores() { private boolean checkStoreHealth(TiStore store) { String addressStr = store.getStore().getAddress(); + if (store.isTiFlash()) { + return checkTiFlashHealth(addressStr); + } + return checkTiKVHealth(addressStr); + } + + private boolean checkTiFlashHealth(String addressStr) { + try { + ManagedChannel channel = channelFactory.getChannel(addressStr, pdClient.getHostMapping()); + TikvGrpc.TikvBlockingStub stub = + TikvGrpc.newBlockingStub(channel).withDeadlineAfter(timeout, TimeUnit.MILLISECONDS); + Supplier factory = () -> Mpp.IsAliveRequest.newBuilder().build(); + Mpp.IsAliveResponse resp = + ClientCalls.blockingUnaryCall( + stub.getChannel(), TikvGrpc.getIsAliveMethod(), stub.getCallOptions(), factory.get()); + return resp != null && resp.getAvailable(); + } catch (Exception e) { + logger.info( + "fail to check TiFlash health, regard as unhealthy. TiFlash address: " + addressStr, e); + return false; + } + } + + private boolean checkTiKVHealth(String addressStr) { try { ManagedChannel channel = channelFactory.getChannel(addressStr, pdClient.getHostMapping()); HealthGrpc.HealthBlockingStub stub = @@ -83,6 +112,7 @@ private boolean checkStoreHealth(TiStore store) { HealthCheckResponse resp = stub.check(req); return resp.getStatus() == HealthCheckResponse.ServingStatus.SERVING; } catch (Exception e) { + logger.info("fail to check TiKV health, regard as unhealthy. TiKV address: " + addressStr, e); return false; } } diff --git a/src/main/java/org/tikv/common/region/TiStore.java b/src/main/java/org/tikv/common/region/TiStore.java index 8513e2b56e..5feaa246fe 100644 --- a/src/main/java/org/tikv/common/region/TiStore.java +++ b/src/main/java/org/tikv/common/region/TiStore.java @@ -105,4 +105,14 @@ public Metapb.Store getProxyStore() { public long getId() { return this.store.getId(); } + + public boolean isTiFlash() { + for (Metapb.StoreLabel label : store.getLabelsList()) { + if (label.getKey().equals(TiStoreType.TiFlash.getLabelKey()) + && label.getValue().equals(TiStoreType.TiFlash.getLabelValue())) { + return true; + } + } + return false; + } } From 5d499903b9ecc3a0c100509ea4b34f8198e006d1 Mon Sep 17 00:00:00 2001 From: shi yuhang <52435083+shiyuhang0@users.noreply.github.com> Date: Thu, 25 May 2023 21:22:00 +0800 Subject: [PATCH 5/7] Update RegionManager.java Signed-off-by: shiyuhang <1136742008@qq.com> --- src/main/java/org/tikv/common/region/RegionManager.java | 1 - 1 file changed, 1 deletion(-) diff --git a/src/main/java/org/tikv/common/region/RegionManager.java b/src/main/java/org/tikv/common/region/RegionManager.java index 31b629f1a0..7b971e820f 100644 --- a/src/main/java/org/tikv/common/region/RegionManager.java +++ b/src/main/java/org/tikv/common/region/RegionManager.java @@ -239,7 +239,6 @@ public Pair getRegionStorePairByKey( store = getStoreById(peer.getStoreId(), backOffer); if (store.isReachable()) { // update replica's index - logger.info("Store {} is reachable, select it", peer.getStoreId()); region.setReplicaIdx(i); break; } From a01479bfd4f5e742ac4d7de4f491fe6b198b0fe0 Mon Sep 17 00:00:00 2001 From: shiyuhang <1136742008@qq.com> Date: Mon, 29 May 2023 13:56:18 +0800 Subject: [PATCH 6/7] add log Signed-off-by: shiyuhang <1136742008@qq.com> --- src/main/java/org/tikv/common/region/RegionManager.java | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main/java/org/tikv/common/region/RegionManager.java b/src/main/java/org/tikv/common/region/RegionManager.java index 7b971e820f..c61bad3f9a 100644 --- a/src/main/java/org/tikv/common/region/RegionManager.java +++ b/src/main/java/org/tikv/common/region/RegionManager.java @@ -183,6 +183,7 @@ public TiRegion getRegionByKey(ByteString key, BackOffer backOffer) { region.getId(), region.getLeader().getStoreId())); } } catch (Exception e) { + logger.warn("Get region failed: ", e); return null; } finally { requestTimer.observeDuration(); From d915e504056cc081622db706fc782dcdfef0ac41 Mon Sep 17 00:00:00 2001 From: shiyuhang <1136742008@qq.com> Date: Wed, 31 May 2023 14:40:12 +0800 Subject: [PATCH 7/7] change log level Signed-off-by: shiyuhang <1136742008@qq.com> --- src/main/java/org/tikv/common/region/RegionManager.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/tikv/common/region/RegionManager.java b/src/main/java/org/tikv/common/region/RegionManager.java index c61bad3f9a..9678d9e813 100644 --- a/src/main/java/org/tikv/common/region/RegionManager.java +++ b/src/main/java/org/tikv/common/region/RegionManager.java @@ -177,7 +177,7 @@ public TiRegion getRegionByKey(ByteString key, BackOffer backOffer) { Pair regionAndLeader = pdClient.getRegionByKey(backOffer, key); region = cache.putRegion(createRegion(regionAndLeader.first, regionAndLeader.second, backOffer)); - logger.info( + logger.debug( String.format( "get region id: %d with leader: %d", region.getId(), region.getLeader().getStoreId()));