diff --git a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java index 69d382c7a5c2e7..355683230d51ed 100644 --- a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java +++ b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java @@ -1532,6 +1532,15 @@ public class Config extends ConfigBase { @ConfField(mutable = true, masterOnly = true) public static int max_backup_restore_job_num_per_db = 10; + /** + * Control the max num of tablets per backup job involved. + */ + @ConfField(mutable = true, masterOnly = true, description = { + "用于控制每次 backup job 允许备份的 tablet 上限,以避免 OOM", + "Control the max num of tablets per backup job involved, to avoid OOM" + }) + public static int max_backup_tablets_per_job = 300000; + /** * whether to ignore table that not support type when backup, and not report exception. */ diff --git a/fe/fe-core/src/main/java/org/apache/doris/backup/BackupJob.java b/fe/fe-core/src/main/java/org/apache/doris/backup/BackupJob.java index 6f73334f0c2474..d5010293b4d6ce 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/backup/BackupJob.java +++ b/fe/fe-core/src/main/java/org/apache/doris/backup/BackupJob.java @@ -523,6 +523,17 @@ private void prepareAndSendSnapshotTask() { } } + // Limit the max num of tablets involved in a backup job, to avoid OOM. + if (unfinishedTaskIds.size() > Config.max_backup_tablets_per_job) { + String msg = String.format("the num involved tablets %d exceeds the limit %d, " + + "which might cause the FE OOM, change config `max_backup_tablets_per_job` " + + "to change this limitation", + unfinishedTaskIds.size(), Config.max_backup_tablets_per_job); + LOG.warn(msg); + status = new Status(ErrCode.COMMON_ERROR, msg); + return; + } + backupMeta = new BackupMeta(copiedTables, copiedResources); // send tasks