From d86f2cc1a30f0ed3405cc1dd98dc6dfde2de8ebb Mon Sep 17 00:00:00 2001 From: Byte Yue Date: Wed, 24 Jul 2024 20:45:41 +0800 Subject: [PATCH] pick --- .../java/org/apache/doris/common/Config.java | 3 ++ .../apache/doris/planner/OlapScanNode.java | 42 ++++++++++--------- 2 files changed, 25 insertions(+), 20 deletions(-) diff --git a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java index 5a811fddd6c985..441a6a7c46af72 100644 --- a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java +++ b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java @@ -2735,6 +2735,9 @@ public static boolean isNotCloudMode() { "Stream_Load When importing, the maximum length of label is limited"}) public static int label_regex_length = 128; + @ConfField(mutable = true) + public static boolean enable_cooldown_replica_affinity = true; + //========================================================================== // end of cloud config //========================================================================== diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java index d312992bc80066..52104f6e668563 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java @@ -836,27 +836,29 @@ private void addScanRangeLocations(Partition partition, } } - final long coolDownReplicaId = tablet.getCooldownReplicaId(); - // we prefer to query using cooldown replica to make sure the cache is fully utilized - // for example: consider there are 3BEs(A,B,C) and each has one replica for tablet X. and X - // is now under cooldown - // first time we choose BE A, and A will download data into cache while the other two's cache is empty - // second time we choose BE B, this time B will be cached, C is still empty - // third time we choose BE C, after this time all replica is cached - // but it means we will do 3 S3 IO to get the data which will bring 3 slow query - if (-1L != coolDownReplicaId) { - final Optional replicaOptional = replicas.stream() - .filter(r -> r.getId() == coolDownReplicaId).findAny(); - replicaOptional.ifPresent( - r -> { - Backend backend = Env.getCurrentSystemInfo() - .getBackend(r.getBackendId()); - if (backend != null && backend.isAlive()) { - replicas.clear(); - replicas.add(r); + if (Config.enable_cooldown_replica_affinity) { + final long coolDownReplicaId = tablet.getCooldownReplicaId(); + // we prefer to query using cooldown replica to make sure the cache is fully utilized + // for example: consider there are 3BEs(A,B,C) and each has one replica for tablet X. and X + // is now under cooldown + // first time we choose BE A, and A will download data into cache while the other two's cache is empty + // second time we choose BE B, this time B will be cached, C is still empty + // third time we choose BE C, after this time all replica is cached + // but it means we will do 3 S3 IO to get the data which will bring 3 slow query + if (-1L != coolDownReplicaId) { + final Optional replicaOptional = replicas.stream() + .filter(r -> r.getId() == coolDownReplicaId).findAny(); + replicaOptional.ifPresent( + r -> { + Backend backend = Env.getCurrentSystemInfo() + .getBackend(r.getBackendId()); + if (backend != null && backend.isAlive()) { + replicas.clear(); + replicas.add(r); + } } - } - ); + ); + } } boolean tabletIsNull = true; boolean collectedStat = false;