Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions fe/fe-core/src/main/java/org/apache/doris/catalog/Tablet.java
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ public enum TabletStatus {
COLOCATE_MISMATCH, // replicas do not all locate in right colocate backends set.
COLOCATE_REDUNDANT, // replicas match the colocate backends set, but redundant.
NEED_FURTHER_REPAIR, // one of replicas need a definite repair.
UNRECOVERABLE // non of replicas are healthy
}

@SerializedName(value = "id")
Expand Down Expand Up @@ -455,7 +456,9 @@ public Pair<TabletStatus, TabletSchedCtx.Priority> getHealthStatusWithPriority(

// 1. alive replicas are not enough
int aliveBackendsNum = aliveBeIdsInCluster.size();
if (alive < replicationNum && replicas.size() >= aliveBackendsNum
if (alive == 0) {
return Pair.create(TabletStatus.UNRECOVERABLE, Priority.VERY_HIGH);
} else if (alive < replicationNum && replicas.size() >= aliveBackendsNum
&& aliveBackendsNum >= replicationNum && replicationNum > 1) {
// there is no enough backend for us to create a new replica, so we have to delete an existing replica,
// so there can be available backend for us to create a new replica.
Expand All @@ -473,7 +476,9 @@ public Pair<TabletStatus, TabletSchedCtx.Priority> getHealthStatusWithPriority(
}

// 2. version complete replicas are not enough
if (aliveAndVersionComplete < (replicationNum / 2) + 1) {
if (aliveAndVersionComplete == 0) {
return Pair.create(TabletStatus.UNRECOVERABLE, Priority.VERY_HIGH);
} else if (aliveAndVersionComplete < (replicationNum / 2) + 1) {
return Pair.create(TabletStatus.VERSION_INCOMPLETE, TabletSchedCtx.Priority.HIGH);
} else if (aliveAndVersionComplete < replicationNum) {
return Pair.create(TabletStatus.VERSION_INCOMPLETE, TabletSchedCtx.Priority.NORMAL);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,11 @@ private LoopControlStatus handlePartitionTablet(Database db, OlapTable tbl, Part
// Only set last status check time when status is healthy.
tablet.setLastStatusCheckTime(startTime);
continue;
} else if (statusWithPrio.first == TabletStatus.UNRECOVERABLE) {
// This tablet is not recoverable, do not set it into tablet scheduler
// all UNRECOVERABLE tablet can be seen from "show proc '/statistic'"
counter.unhealthyTabletNum++;
continue;
} else if (isInPrios) {
statusWithPrio.second = TabletSchedCtx.Priority.VERY_HIGH;
prioPartIsHealthy = false;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -575,17 +575,19 @@ private void handleTabletByTypeAndStatus(TabletStatus status, TabletSchedCtx tab
case FORCE_REDUNDANT:
handleRedundantReplica(tabletCtx, true);
break;
case REPLICA_MISSING_IN_CLUSTER:
handleReplicaClusterMigration(tabletCtx, batchTask);
break;
case COLOCATE_MISMATCH:
handleColocateMismatch(tabletCtx, batchTask);
break;
case COLOCATE_REDUNDANT:
handleColocateRedundant(tabletCtx);
break;
default:
break;
case REPLICA_MISSING_IN_CLUSTER:
handleReplicaClusterMigration(tabletCtx, batchTask);
break;
case COLOCATE_MISMATCH:
handleColocateMismatch(tabletCtx, batchTask);
break;
case COLOCATE_REDUNDANT:
handleColocateRedundant(tabletCtx);
break;
case UNRECOVERABLE:
throw new SchedException(Status.UNRECOVERABLE, "tablet is unrecoverable");
default:
break;
}
} else {
// balance
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,20 +29,23 @@

public class IncompleteTabletsProcNode implements ProcNodeInterface {
public static final ImmutableList<String> TITLE_NAMES = new ImmutableList.Builder<String>()
.add("UnhealthyTablets").add("InconsistentTablets").add("CloningTablets")
.add("UnhealthyTablets").add("InconsistentTablets").add("CloningTablets").add("BadTablets")
.build();
private static final Joiner JOINER = Joiner.on(",");

Collection<Long> unhealthyTabletIds;
Collection<Long> inconsistentTabletIds;
Collection<Long> cloningTabletIds;
Collection<Long> unrecoverableTabletIds;

public IncompleteTabletsProcNode(Collection<Long> unhealthyTabletIds,
Collection<Long> inconsistentTabletIds,
Collection<Long> cloningTabletIds) {
Collection<Long> cloningTabletIds,
Collection<Long> unrecoverableTabletIds) {
this.unhealthyTabletIds = unhealthyTabletIds;
this.inconsistentTabletIds = inconsistentTabletIds;
this.cloningTabletIds = cloningTabletIds;
this.unrecoverableTabletIds = unrecoverableTabletIds;
}

@Override
Expand All @@ -56,9 +59,11 @@ public ProcResult fetchResult() throws AnalysisException {
String incompleteTablets = JOINER.join(Arrays.asList(unhealthyTabletIds));
String inconsistentTablets = JOINER.join(Arrays.asList(inconsistentTabletIds));
String cloningTablets = JOINER.join(Arrays.asList(cloningTabletIds));
String unrecoverableTablets = JOINER.join(Arrays.asList(unrecoverableTabletIds));
row.add(incompleteTablets);
row.add(inconsistentTablets);
row.add(cloningTablets);
row.add(unrecoverableTablets);

result.addRow(row);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,6 @@

package org.apache.doris.common.proc;

import com.google.common.base.Preconditions;
import com.google.common.collect.HashMultimap;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Multimap;
import org.apache.doris.catalog.Catalog;
import org.apache.doris.catalog.Database;
import org.apache.doris.catalog.MaterializedIndex;
Expand All @@ -38,6 +34,12 @@
import org.apache.doris.system.SystemInfoService;
import org.apache.doris.task.AgentTaskQueue;
import org.apache.doris.thrift.TTaskType;

import com.google.common.base.Preconditions;
import com.google.common.collect.HashMultimap;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Multimap;

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;

Expand All @@ -49,7 +51,7 @@ public class StatisticProcDir implements ProcDirInterface {
public static final ImmutableList<String> TITLE_NAMES = new ImmutableList.Builder<String>()
.add("DbId").add("DbName").add("TableNum").add("PartitionNum")
.add("IndexNum").add("TabletNum").add("ReplicaNum").add("UnhealthyTabletNum")
.add("InconsistentTabletNum").add("CloningTabletNum")
.add("InconsistentTabletNum").add("CloningTabletNum").add("BadTabletNum")
.build();
private static final Logger LOG = LogManager.getLogger(StatisticProcDir.class);

Expand All @@ -61,12 +63,15 @@ public class StatisticProcDir implements ProcDirInterface {
Multimap<Long, Long> inconsistentTabletIds;
// db id -> set(tablet id)
Multimap<Long, Long> cloningTabletIds;
// db id -> set(tablet id)
Multimap<Long, Long> unrecoverableTabletIds;

public StatisticProcDir(Catalog catalog) {
this.catalog = catalog;
unhealthyTabletIds = HashMultimap.create();
inconsistentTabletIds = HashMultimap.create();
cloningTabletIds = HashMultimap.create();
unrecoverableTabletIds = HashMultimap.create();
}

@Override
Expand Down Expand Up @@ -140,8 +145,11 @@ public ProcResult fetchResult() throws AnalysisException {

// here we treat REDUNDANT as HEALTHY, for user friendly.
if (res.first != TabletStatus.HEALTHY && res.first != TabletStatus.REDUNDANT
&& res.first != TabletStatus.COLOCATE_REDUNDANT && res.first != TabletStatus.NEED_FURTHER_REPAIR) {
&& res.first != TabletStatus.COLOCATE_REDUNDANT && res.first != TabletStatus.NEED_FURTHER_REPAIR
&& res.first != TabletStatus.UNRECOVERABLE) {
unhealthyTabletIds.put(dbId, tablet.getId());
} else if (res.first == TabletStatus.UNRECOVERABLE) {
unrecoverableTabletIds.put(dbId, tablet.getId());
}

if (!tablet.isConsistent()) {
Expand All @@ -166,6 +174,7 @@ public ProcResult fetchResult() throws AnalysisException {
oneLine.add(unhealthyTabletIds.get(dbId).size());
oneLine.add(inconsistentTabletIds.get(dbId).size());
oneLine.add(cloningTabletIds.get(dbId).size());
oneLine.add(unrecoverableTabletIds.get(dbId).size());

lines.add(oneLine);

Expand Down Expand Up @@ -195,6 +204,7 @@ public ProcResult fetchResult() throws AnalysisException {
finalLine.add(unhealthyTabletIds.size());
finalLine.add(inconsistentTabletIds.size());
finalLine.add(cloningTabletIds.size());
finalLine.add(unrecoverableTabletIds.size());
lines.add(finalLine);

// add result
Expand Down Expand Up @@ -224,7 +234,8 @@ public ProcNodeInterface lookup(String dbIdStr) throws AnalysisException {
}

return new IncompleteTabletsProcNode(unhealthyTabletIds.get(dbId),
inconsistentTabletIds.get(dbId),
cloningTabletIds.get(dbId));
inconsistentTabletIds.get(dbId),
cloningTabletIds.get(dbId),
unrecoverableTabletIds.get(dbId));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,9 @@ public void start(String dorisHome) {
properties.put("spring.http.encoding.force", true);
properties.put("spring.servlet.multipart.max-file-size", this.maxFileSize);
properties.put("spring.servlet.multipart.max-request-size", this.maxRequestSize);
// This is to disable the spring-boot-devtools restart feature.
// To avoid some unexpected behavior.
System.setProperty("spring.devtools.restart.enabled", "false");
properties.put("logging.config", dorisHome + "/conf/" + SpringLog4j2Config.SPRING_LOG_XML_FILE);
new SpringApplicationBuilder()
.sources(HttpServer.class)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1014,7 +1014,8 @@ private static void addReplica(long tabletId, TTabletInfo backendTabletInfo, lon
db.getClusterName(), visibleVersion, visibleVersionHash,
replicationNum, aliveBeIdsInCluster);

if (status.first == TabletStatus.VERSION_INCOMPLETE || status.first == TabletStatus.REPLICA_MISSING) {
if (status.first == TabletStatus.VERSION_INCOMPLETE || status.first == TabletStatus.REPLICA_MISSING
|| status.first == TabletStatus.UNRECOVERABLE) {
long lastFailedVersion = -1L;
long lastFailedVersionHash = 0L;

Expand Down