From fb6739280abdac6eeb4c88e0a1eb27d9ed33b2ce Mon Sep 17 00:00:00 2001 From: morningman Date: Tue, 4 Aug 2020 23:59:42 +0800 Subject: [PATCH 1/4] fix force recovery --- be/src/agent/task_worker_pool.cpp | 67 ---- be/src/agent/task_worker_pool.h | 1 - be/src/common/config.h | 3 - be/src/olap/storage_engine.cpp | 10 - be/src/olap/storage_engine.h | 2 - be/src/olap/tablet.cpp | 5 - be/src/olap/tablet.h | 5 - .../java/org/apache/doris/common/Config.java | 10 + .../org/apache/doris/master/MasterImpl.java | 22 +- .../apache/doris/master/ReportHandler.java | 361 ++++++++---------- .../apache/doris/task/CreateReplicaTask.java | 15 +- .../apache/doris/task/RecoverTabletTask.java | 44 --- gensrc/thrift/Types.thrift | 2 +- 13 files changed, 205 insertions(+), 342 deletions(-) delete mode 100644 fe/fe-core/src/main/java/org/apache/doris/task/RecoverTabletTask.java diff --git a/be/src/agent/task_worker_pool.cpp b/be/src/agent/task_worker_pool.cpp index ac51aed9e80b7d..2f0ac4d54c9d64 100644 --- a/be/src/agent/task_worker_pool.cpp +++ b/be/src/agent/task_worker_pool.cpp @@ -168,10 +168,6 @@ void TaskWorkerPool::start() { _worker_count = 1; _callback_function = _move_dir_thread_callback; break; - case TaskWorkerType::RECOVER_TABLET: - _worker_count = 1; - _callback_function = _recover_tablet_thread_callback; - break; case TaskWorkerType::UPDATE_TABLET_META_INFO: _worker_count = 1; _callback_function = _update_tablet_meta_worker_thread_callback; @@ -1041,7 +1037,6 @@ void* TaskWorkerPool::_report_task_worker_thread_callback(void* arg_this) { TaskWorkerPool* worker_pool_this = (TaskWorkerPool*)arg_this; TReportRequest request; - request.__set_force_recovery(config::force_recovery); request.__set_backend(worker_pool_this->_backend); #ifndef BE_TEST @@ -1075,7 +1070,6 @@ void* TaskWorkerPool::_report_disk_state_worker_thread_callback(void* arg_this) TaskWorkerPool* worker_pool_this = (TaskWorkerPool*)arg_this; TReportRequest request; - request.__set_force_recovery(config::force_recovery); request.__set_backend(worker_pool_this->_backend); #ifndef BE_TEST @@ -1131,7 +1125,6 @@ void* TaskWorkerPool::_report_tablet_worker_thread_callback(void* arg_this) { TaskWorkerPool* worker_pool_this = (TaskWorkerPool*)arg_this; TReportRequest request; - request.__set_force_recovery(config::force_recovery); request.__set_backend(worker_pool_this->_backend); request.__isset.tablets = true; AgentStatus status = DORIS_SUCCESS; @@ -1552,64 +1545,4 @@ AgentStatus TaskWorkerPool::_move_dir(const TTabletId tablet_id, const TSchemaHa return DORIS_SUCCESS; } -void* TaskWorkerPool::_recover_tablet_thread_callback(void* arg_this) { - TaskWorkerPool* worker_pool_this = (TaskWorkerPool*)arg_this; - - while (true) { - TAgentTaskRequest agent_task_req; - TRecoverTabletReq recover_tablet_req; - { - MutexLock worker_thread_lock(&(worker_pool_this->_worker_thread_lock)); - while (worker_pool_this->_tasks.empty()) { - worker_pool_this->_worker_thread_condition_variable.wait(); - } - - agent_task_req = worker_pool_this->_tasks.front(); - recover_tablet_req = agent_task_req.recover_tablet_req; - worker_pool_this->_tasks.pop_front(); - } - - TStatusCode::type status_code = TStatusCode::OK; - vector error_msgs; - TStatus task_status; - - LOG(INFO) << "begin to recover tablet." - << ", tablet_id:" << recover_tablet_req.tablet_id << "." - << recover_tablet_req.schema_hash << ", version:" << recover_tablet_req.version - << "-" << recover_tablet_req.version_hash; - OLAPStatus status = - worker_pool_this->_env->storage_engine()->recover_tablet_until_specfic_version( - recover_tablet_req); - if (status != OLAP_SUCCESS) { - status_code = TStatusCode::RUNTIME_ERROR; - LOG(WARNING) << "failed to recover tablet." - << "signature:" << agent_task_req.signature - << ", table:" << recover_tablet_req.tablet_id << "." - << recover_tablet_req.schema_hash - << ", version:" << recover_tablet_req.version << "-" - << recover_tablet_req.version_hash; - } else { - LOG(WARNING) << "succeed to recover tablet." - << "signature:" << agent_task_req.signature - << ", table:" << recover_tablet_req.tablet_id << "." - << recover_tablet_req.schema_hash - << ", version:" << recover_tablet_req.version << "-" - << recover_tablet_req.version_hash; - } - - task_status.__set_status_code(status_code); - task_status.__set_error_msgs(error_msgs); - - TFinishTaskRequest finish_task_request; - finish_task_request.__set_backend(worker_pool_this->_backend); - finish_task_request.__set_task_type(agent_task_req.task_type); - finish_task_request.__set_signature(agent_task_req.signature); - finish_task_request.__set_task_status(task_status); - - worker_pool_this->_finish_task(finish_task_request); - worker_pool_this->_remove_task_info(agent_task_req.task_type, agent_task_req.signature); - } - return (void*)0; -} - } // namespace doris diff --git a/be/src/agent/task_worker_pool.h b/be/src/agent/task_worker_pool.h index 32b78b9f1b4641..c7500b5f6adde9 100644 --- a/be/src/agent/task_worker_pool.h +++ b/be/src/agent/task_worker_pool.h @@ -109,7 +109,6 @@ class TaskWorkerPool { static void* _make_snapshot_thread_callback(void* arg_this); static void* _release_snapshot_thread_callback(void* arg_this); static void* _move_dir_thread_callback(void* arg_this); - static void* _recover_tablet_thread_callback(void* arg_this); static void* _update_tablet_meta_worker_thread_callback(void* arg_this); void _alter_tablet( diff --git a/be/src/common/config.h b/be/src/common/config.h index beaae0f0e94f77..145d9b38e5ecd3 100644 --- a/be/src/common/config.h +++ b/be/src/common/config.h @@ -446,9 +446,6 @@ namespace config { // result buffer cancelled time (unit: second) CONF_mInt32(result_buffer_cancelled_interval_time, "300"); - // can perform recovering tablet - CONF_Bool(force_recovery, "false"); - // the increased frequency of priority for remaining tasks in BlockingPriorityQueue CONF_mInt32(priority_queue_remaining_tasks_increased_frequency, "512"); diff --git a/be/src/olap/storage_engine.cpp b/be/src/olap/storage_engine.cpp index b65652b9df5f84..dc90d237876b25 100644 --- a/be/src/olap/storage_engine.cpp +++ b/be/src/olap/storage_engine.cpp @@ -832,16 +832,6 @@ OLAPStatus StorageEngine::create_tablet(const TCreateTabletReq& request) { return _tablet_manager->create_tablet(request, stores); } -OLAPStatus StorageEngine::recover_tablet_until_specfic_version( - const TRecoverTabletReq& recover_tablet_req) { - TabletSharedPtr tablet = _tablet_manager->get_tablet(recover_tablet_req.tablet_id, - recover_tablet_req.schema_hash); - if (tablet == nullptr) { return OLAP_ERR_TABLE_NOT_FOUND; } - RETURN_NOT_OK(tablet->recover_tablet_until_specfic_version(recover_tablet_req.version, - recover_tablet_req.version_hash)); - return OLAP_SUCCESS; -} - OLAPStatus StorageEngine::obtain_shard_path( TStorageMedium::type storage_medium, std::string* shard_path, DataDir** store) { LOG(INFO) << "begin to process obtain root path. storage_medium=" << storage_medium; diff --git a/be/src/olap/storage_engine.h b/be/src/olap/storage_engine.h index 56ac12fd528c10..f7e9aa384b6208 100644 --- a/be/src/olap/storage_engine.h +++ b/be/src/olap/storage_engine.h @@ -119,8 +119,6 @@ class StorageEngine { void start_delete_unused_rowset(); void add_unused_rowset(RowsetSharedPtr rowset); - OLAPStatus recover_tablet_until_specfic_version(const TRecoverTabletReq& recover_tablet_req); - // Obtain shard path for new tablet. // // @param [out] shard_path choose an available root_path to clone new tablet diff --git a/be/src/olap/tablet.cpp b/be/src/olap/tablet.cpp index 1498bfabf74936..a73c98b6ec3acc 100644 --- a/be/src/olap/tablet.cpp +++ b/be/src/olap/tablet.cpp @@ -639,11 +639,6 @@ OLAPStatus Tablet::set_alter_state(AlterTabletState state) { return _tablet_meta->set_alter_state(state); } -OLAPStatus Tablet::recover_tablet_until_specfic_version(const int64_t& spec_version, - const int64_t& version_hash) { - return OLAP_SUCCESS; -} - bool Tablet::can_do_compaction() { // 如果table正在做schema change,则通过选路判断数据是否转换完成 // 如果选路成功,则转换完成,可以进行compaction diff --git a/be/src/olap/tablet.h b/be/src/olap/tablet.h index 36576158541480..b83e1eddd00544 100644 --- a/be/src/olap/tablet.h +++ b/be/src/olap/tablet.h @@ -179,11 +179,6 @@ class Tablet : public BaseTablet { uint64_t request_block_row_count, vector* ranges); - // operation for recover tablet - // Deprected, remove it later - OLAPStatus recover_tablet_until_specfic_version(const int64_t& spec_version, - const int64_t& version_hash); - void set_bad(bool is_bad) { _is_bad = is_bad; } int64_t last_cumu_compaction_failure_time() { return _last_cumu_compaction_failure_millis; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/Config.java b/fe/fe-core/src/main/java/org/apache/doris/common/Config.java index 26fccc0c42541f..1623c3ed04159f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/Config.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/Config.java @@ -1208,4 +1208,14 @@ public class Config extends ConfigBase { @ConfField(mutable = true, masterOnly = true) public static int max_allowed_in_element_num_of_delete = 1024; + /** + * In some cases, some tablets may have all replicas damaged or lost. + * At this time, the data has been lost, and the damaged tablets + * will cause the entire query to fail, and the remaining healthy tablets cannot be queried. + * In this case, you can set this configuration to true. + * The system will replace damaged tablets with empty tablets to ensure that the query + * can be executed. (but at this time the data has been lost, so the query results may be inaccurate) + */ + @ConfField(mutable = true, masterOnly = true) + public static boolean recover_with_empty_tablet = false; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/master/MasterImpl.java b/fe/fe-core/src/main/java/org/apache/doris/master/MasterImpl.java index 46a90edb88cb71..45c5033bc85d65 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/master/MasterImpl.java +++ b/fe/fe-core/src/main/java/org/apache/doris/master/MasterImpl.java @@ -36,6 +36,7 @@ import org.apache.doris.catalog.TabletMeta; import org.apache.doris.common.MetaNotFoundException; import org.apache.doris.load.AsyncDeleteJob; +import org.apache.doris.load.DeleteJob; import org.apache.doris.load.LoadJob; import org.apache.doris.load.loadv2.SparkLoadJob; import org.apache.doris.persist.ReplicaPersistInfo; @@ -48,7 +49,6 @@ import org.apache.doris.task.CloneTask; import org.apache.doris.task.CreateReplicaTask; import org.apache.doris.task.CreateRollupTask; -import org.apache.doris.load.DeleteJob; import org.apache.doris.task.DirMoveTask; import org.apache.doris.task.DownloadTask; import org.apache.doris.task.PublishVersionTask; @@ -68,12 +68,12 @@ import org.apache.doris.thrift.TTabletInfo; import org.apache.doris.thrift.TTaskType; -import com.google.common.base.Preconditions; - import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.apache.thrift.TException; +import com.google.common.base.Preconditions; + import java.util.ArrayList; import java.util.LinkedList; import java.util.List; @@ -248,9 +248,19 @@ private void finishCreateReplica(AgentTask task, TFinishTaskRequest request) { long tabletId = createReplicaTask.getTabletId(); if (request.isSetFinish_tablet_infos()) { - Replica replica = Catalog.getCurrentInvertedIndex().getReplica(createReplicaTask.getTabletId(), - createReplicaTask.getBackendId()); - replica.setPathHash(request.getFinish_tablet_infos().get(0).getPath_hash()); + Replica replica = Catalog.getCurrentInvertedIndex().getReplica(createReplicaTask.getTabletId(), + createReplicaTask.getBackendId()); + replica.setPathHash(request.getFinish_tablet_infos().get(0).getPath_hash()); + + if (createReplicaTask.isRecoverTask()) { + /** + * This create replica task may be generated by recovery(See comment of Config.recover_with_empty_tablet) + * So we set replica back to good. + */ + replica.setBad(false); + LOG.info("finish recover create replica task. set replica to good. tablet {}, replica {}, backend {}", + tabletId, task.getBackendId(), replica.getId()); + } } // this should be called before 'countDownLatch()' diff --git a/fe/fe-core/src/main/java/org/apache/doris/master/ReportHandler.java b/fe/fe-core/src/main/java/org/apache/doris/master/ReportHandler.java index 580bb1fde2f088..30624e46812aaf 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/master/ReportHandler.java +++ b/fe/fe-core/src/main/java/org/apache/doris/master/ReportHandler.java @@ -17,9 +17,6 @@ package org.apache.doris.master; -import com.google.common.collect.Sets; -import org.apache.commons.lang3.tuple.ImmutableTriple; -import org.apache.commons.lang3.tuple.Triple; import org.apache.doris.catalog.Catalog; import org.apache.doris.catalog.Database; import org.apache.doris.catalog.MaterializedIndex; @@ -57,7 +54,6 @@ import org.apache.doris.task.MasterTask; import org.apache.doris.task.PublishVersionTask; import org.apache.doris.task.PushTask; -import org.apache.doris.task.RecoverTabletTask; import org.apache.doris.task.StorageMediaMigrationTask; import org.apache.doris.task.UpdateTabletMetaInfoTask; import org.apache.doris.thrift.TBackend; @@ -71,20 +67,23 @@ import org.apache.doris.thrift.TStorageMedium; import org.apache.doris.thrift.TStorageType; import org.apache.doris.thrift.TTablet; -import org.apache.doris.thrift.TTabletMetaType; import org.apache.doris.thrift.TTabletInfo; +import org.apache.doris.thrift.TTabletMetaType; import org.apache.doris.thrift.TTaskType; +import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.tuple.ImmutableTriple; +import org.apache.commons.lang3.tuple.Triple; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.thrift.TException; + import com.google.common.collect.LinkedListMultimap; import com.google.common.collect.ListMultimap; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.collect.Queues; - -import org.apache.commons.lang.StringUtils; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; -import org.apache.thrift.TException; +import com.google.common.collect.Sets; import java.util.HashMap; import java.util.HashSet; @@ -113,7 +112,7 @@ public TMasterResult handleReport(TReportRequest request) throws TException { TMasterResult result = new TMasterResult(); TStatus tStatus = new TStatus(TStatusCode.OK); result.setStatus(tStatus); - + // get backend TBackend tBackend = request.getBackend(); String host = tBackend.getHost(); @@ -126,12 +125,11 @@ public TMasterResult handleReport(TReportRequest request) throws TException { tStatus.setError_msgs(errorMsgs); return result; } - + long beId = backend.getId(); Map> tasks = null; Map disks = null; Map tablets = null; - boolean forceRecovery = false; long reportVersion = -1; String reportType = ""; @@ -139,12 +137,12 @@ public TMasterResult handleReport(TReportRequest request) throws TException { tasks = request.getTasks(); reportType += "task"; } - + if (request.isSetDisks()) { disks = request.getDisks(); reportType += "disk"; } - + if (request.isSetTablets()) { tablets = request.getTablets(); reportVersion = request.getReport_version(); @@ -155,16 +153,12 @@ public TMasterResult handleReport(TReportRequest request) throws TException { reportVersion = request.getReport_version(); reportType += "tablet"; } - - if (request.isSetForce_recovery()) { - forceRecovery = request.isForce_recovery(); - } - + if (request.isSetTablet_max_compaction_score()) { backend.setTabletMaxCompactionScore(request.getTablet_max_compaction_score()); } - ReportTask reportTask = new ReportTask(beId, tasks, disks, tablets, reportVersion, forceRecovery); + ReportTask reportTask = new ReportTask(beId, tasks, disks, tablets, reportVersion); try { putToQueue(reportTask); } catch (Exception e) { @@ -175,7 +169,7 @@ public TMasterResult handleReport(TReportRequest request) throws TException { tStatus.setError_msgs(errorMsgs); return result; } - + LOG.info("receive report from be {}. type: {}, current queue size: {}", backend.getId(), reportType, reportQueue.size()); return result; @@ -184,9 +178,9 @@ public TMasterResult handleReport(TReportRequest request) throws TException { private void putToQueue(ReportTask reportTask) throws Exception { int currentSize = reportQueue.size(); if (currentSize > Config.report_queue_size) { - LOG.warn("the report queue size exceeds the limit: {}. current: {}", Config.report_queue_size, currentSize); + LOG.warn("the report queue size exceeds the limit: {}. current: {}", Config.report_queue_size, currentSize); throw new Exception( - "the report queue size exceeds the limit: " + Config.report_queue_size + ". current: " + currentSize); + "the report queue size exceeds the limit: " + Config.report_queue_size + ". current: " + currentSize); } reportQueue.put(reportTask); } @@ -210,18 +204,15 @@ private class ReportTask extends MasterTask { private Map disks; private Map tablets; private long reportVersion; - private boolean forceRecovery = false; public ReportTask(long beId, Map> tasks, - Map disks, - Map tablets, long reportVersion, - boolean forceRecovery) { + Map disks, + Map tablets, long reportVersion) { this.beId = beId; this.tasks = tasks; this.disks = disks; this.tablets = tablets; this.reportVersion = reportVersion; - this.forceRecovery = forceRecovery; } @Override @@ -236,19 +227,18 @@ protected void exec() { long backendReportVersion = Catalog.getCurrentSystemInfo().getBackendReportVersion(beId); if (reportVersion < backendReportVersion) { LOG.warn("out of date report version {} from backend[{}]. current report version[{}]", - reportVersion, beId, backendReportVersion); + reportVersion, beId, backendReportVersion); } else { - ReportHandler.tabletReport(beId, tablets, reportVersion, forceRecovery); + ReportHandler.tabletReport(beId, tablets, reportVersion); } } } } - private static void tabletReport(long backendId, Map backendTablets, long backendReportVersion, - boolean forceRecovery) { + private static void tabletReport(long backendId, Map backendTablets, long backendReportVersion) { long start = System.currentTimeMillis(); LOG.info("backend[{}] reports {} tablet(s). report version: {}", - backendId, backendTablets.size(), backendReportVersion); + backendId, backendTablets.size(), backendReportVersion); // storage medium map HashMap storageMediumMap = Catalog.getCurrentCatalog().getPartitionIdToStorageMediumMap(); @@ -267,7 +257,7 @@ private static void tabletReport(long backendId, Map backendTable // dbid -> txn id -> [partition info] Map> transactionsToPublish = Maps.newHashMap(); ListMultimap transactionsToClear = LinkedListMultimap.create(); - + // db id -> tablet id ListMultimap tabletRecoveryMap = LinkedListMultimap.create(); @@ -275,38 +265,38 @@ private static void tabletReport(long backendId, Map backendTable // 1. do the diff. find out (intersection) / (be - meta) / (meta - be) Catalog.getCurrentInvertedIndex().tabletReport(backendId, backendTablets, storageMediumMap, - tabletSyncMap, - tabletDeleteFromMeta, - foundTabletsWithValidSchema, - foundTabletsWithInvalidSchema, - tabletMigrationMap, - transactionsToPublish, - transactionsToClear, - tabletRecoveryMap, - tabletWithoutPartitionId); + tabletSyncMap, + tabletDeleteFromMeta, + foundTabletsWithValidSchema, + foundTabletsWithInvalidSchema, + tabletMigrationMap, + transactionsToPublish, + transactionsToClear, + tabletRecoveryMap, + tabletWithoutPartitionId); // 2. sync sync(backendTablets, tabletSyncMap, backendId, backendReportVersion); // 3. delete (meta - be) // BE will automatically drop defective tablets. these tablets should also be dropped in catalog - deleteFromMeta(tabletDeleteFromMeta, backendId, backendReportVersion, forceRecovery); - + deleteFromMeta(tabletDeleteFromMeta, backendId, backendReportVersion); + // 4. handle (be - meta) deleteFromBackend(backendTablets, foundTabletsWithValidSchema, foundTabletsWithInvalidSchema, backendId); - + // 5. migration (ssd <-> hdd) handleMigration(tabletMigrationMap, backendId); - + // 6. send clear transactions to be handleClearTransactions(transactionsToClear, backendId); - + // 7. send publish version request to be handleRepublishVersionInfo(transactionsToPublish, backendId); - + // 8. send recover request to be - handleRecoverTablet(tabletRecoveryMap, backendTablets, backendId, forceRecovery); - + handleRecoverTablet(tabletRecoveryMap, backendTablets, backendId); + // 9. send set tablet partition info to be handleSetTabletPartitionId(backendId, tabletWithoutPartitionId); @@ -366,7 +356,7 @@ private static void taskReport(long backendId, Map> running AgentTaskExecutor.submit(batchTask); } LOG.info("finished to handle task report from backend {}, diff task num: {}. cost: {} ms", - backendId, batchTask.getTaskNum(), (System.currentTimeMillis() - start)); + backendId, batchTask.getTaskNum(), (System.currentTimeMillis() - start)); } private static void diskReport(long backendId, Map backendDisks) { @@ -377,10 +367,10 @@ private static void diskReport(long backendId, Map backendDisks) LOG.warn("backend doesn't exist. id: " + backendId); return; } - + backend.updateDisks(backendDisks); LOG.info("finished to handle disk report from backend {}, cost: {} ms", - backendId, (System.currentTimeMillis() - start)); + backendId, (System.currentTimeMillis() - start)); } private static void sync(Map backendTablets, ListMultimap tabletSyncMap, @@ -396,7 +386,7 @@ private static void sync(Map backendTablets, ListMultimap tabletIds = tabletSyncMap.get(dbId); LOG.info("before sync tablets in db[{}]. report num: {}. backend[{}]", - dbId, tabletIds.size(), backendId); + dbId, tabletIds.size(), backendId); List tabletMetaList = invertedIndex.getTabletMetaList(tabletIds); for (int i = 0; i < tabletMetaList.size(); i++) { TabletMeta tabletMeta = tabletMetaList.get(i); @@ -463,7 +453,7 @@ private static void sync(Map backendTablets, ListMultimap backendTablets, ListMultimap backendTablets, ListMultimap tabletDeleteFromMeta, long backendId, - long backendReportVersion, boolean forceRecovery) { + long backendReportVersion) { AgentBatchTask createReplicaBatchTask = new AgentBatchTask(); TabletInvertedIndex invertedIndex = Catalog.getCurrentInvertedIndex(); for (Long dbId : tabletDeleteFromMeta.keySet()) { @@ -532,7 +522,7 @@ private static void deleteFromMeta(ListMultimap tabletDeleteFromMeta continue; } long tabletId = tabletIds.get(i); - long tableId = tabletMeta.getTableId(); + long tableId = tabletMeta.getTableId(); OlapTable olapTable = (OlapTable) db.getTable(tableId); if (olapTable == null) { continue; @@ -566,7 +556,7 @@ private static void deleteFromMeta(ListMultimap tabletDeleteFromMeta if (replica == null) { continue; } - + // check report version again long currentBackendReportVersion = Catalog.getCurrentSystemInfo().getBackendReportVersion(backendId); if (backendReportVersion < currentBackendReportVersion) { @@ -580,17 +570,17 @@ private static void deleteFromMeta(ListMultimap tabletDeleteFromMeta // so we do not delete it. List replicas = tablet.getReplicas(); if (replicas.size() <= 1) { - LOG.error("backend [{}] invalid situation. tablet[{}] has few replica[{}], " - + "replica num setting is [{}]", + LOG.error("backend [{}] invalid situation. tablet[{}] has few replica[{}], " + + "replica num setting is [{}]", backendId, tabletId, replicas.size(), replicationNum); // there is a replica in FE, but not in BE and there is only one replica in this tablet // in this case, it means data is lost. // should generate a create replica request to BE to create a replica forcibly. if (replicas.size() == 1) { - if (forceRecovery) { + if (Config.recover_with_empty_tablet) { // only create this task if force recovery is true LOG.warn("tablet {} has only one replica {} on backend {}" - + "and it is lost. create an empty replica to recover it", + + " and it is lost. create an empty replica to recover it", tabletId, replica.getId(), backendId); MaterializedIndexMeta indexMeta = olapTable.getIndexMetaByIndexId(indexId); Set bfColumns = olapTable.getCopiedBfColumns(); @@ -604,12 +594,13 @@ private static void deleteFromMeta(ListMultimap tabletDeleteFromMeta olapTable.getCopiedIndexes(), olapTable.isInMemory(), olapTable.getPartitionInfo().getTabletType(partitionId)); + createReplicaTask.setIsRecoverTask(true); createReplicaBatchTask.addTask(createReplicaTask); } else { // just set this replica as bad if (replica.setBad(true)) { LOG.warn("tablet {} has only one replica {} on backend {}" - + "and it is lost, set it as bad", + + " and it is lost, set it as bad", tabletId, replica.getId(), backendId); BackendTabletsInfo tabletsInfo = new BackendTabletsInfo(backendId); tabletsInfo.setBad(true); @@ -617,7 +608,6 @@ private static void deleteFromMeta(ListMultimap tabletDeleteFromMeta olapTable.getSchemaHashByIndexId(indexId)); Catalog.getCurrentCatalog().getEditLog().logBackendTabletsInfo(tabletsInfo); } - } } continue; @@ -625,20 +615,20 @@ private static void deleteFromMeta(ListMultimap tabletDeleteFromMeta tablet.deleteReplicaByBackendId(backendId); ++deleteCounter; - + // remove replica related tasks AgentTaskQueue.removeReplicaRelatedTasks(backendId, tabletId); // write edit log ReplicaPersistInfo info = ReplicaPersistInfo.createForDelete(dbId, tableId, partitionId, - indexId, tabletId, backendId); + indexId, tabletId, backendId); Catalog.getCurrentCatalog().getEditLog().logDeleteReplica(info); LOG.warn("delete replica[{}] in tablet[{}] from meta. backend[{}], report version: {}" - + ", current report version: {}", + + ", current report version: {}", replica.getId(), tabletId, backendId, backendReportVersion, currentBackendReportVersion); - + // check for clone replicas = tablet.getReplicas(); if (replicas.size() == 0) { @@ -652,7 +642,11 @@ private static void deleteFromMeta(ListMultimap tabletDeleteFromMeta } } // end for dbs - if (forceRecovery && createReplicaBatchTask.getTaskNum() > 0) { + if (Config.recover_with_empty_tablet && createReplicaBatchTask.getTaskNum() > 0) { + // must add to queue, so that when task finish report, the task can be found in queue. + // the task will be eventually removed from queue by task report, so no need to worry + // about the residuals. + AgentTaskQueue.addBatchTask(createReplicaBatchTask); AgentTaskExecutor.submit(createReplicaBatchTask); } } @@ -709,7 +703,7 @@ private static void deleteFromBackend(Map backendTablets, } } // end for backendTabletIds AgentTaskExecutor.submit(batchTask); - + LOG.info("delete {} tablet(s) from backend[{}]", deleteFromBackendCounter, backendId); LOG.info("add {} replica(s) to meta. backend[{}]", addToMetaCounter, backendId); } @@ -746,8 +740,8 @@ private static void handleMigration(ListMultimap tabletMet AgentTaskExecutor.submit(batchTask); } - private static void handleRepublishVersionInfo(Map> transactionsToPublish, - long backendId) { + private static void handleRepublishVersionInfo(Map> transactionsToPublish, + long backendId) { AgentBatchTask batchTask = new AgentBatchTask(); long createPublishVersionTaskTime = System.currentTimeMillis(); for (Long dbId : transactionsToPublish.keySet()) { @@ -761,9 +755,9 @@ private static void handleRepublishVersionInfo(Map tabletRecoveryMap, - Map backendTablets, long backendId, boolean forceRecovery) { + Map backendTablets, long backendId) { if (tabletRecoveryMap.isEmpty()) { return; } @@ -773,130 +767,105 @@ private static void handleRecoverTablet(ListMultimap tabletRecoveryM tabletRecoveryMap.size(), backendId); TabletInvertedIndex invertedIndex = Catalog.getCurrentInvertedIndex(); - if (!forceRecovery) { - LOG.warn("force recovery is disable. try reset the tablets' version" - + " or set it as bad, and waiting clone"); - - BackendTabletsInfo backendTabletsInfo = new BackendTabletsInfo(backendId); - backendTabletsInfo.setBad(true); - for (Long dbId : tabletRecoveryMap.keySet()) { - Database db = Catalog.getCurrentCatalog().getDb(dbId); - if (db == null) { - continue; - } - db.writeLock(); - try { - List tabletIds = tabletRecoveryMap.get(dbId); - List tabletMetaList = invertedIndex.getTabletMetaList(tabletIds); - for (int i = 0; i < tabletMetaList.size(); i++) { - TabletMeta tabletMeta = tabletMetaList.get(i); - if (tabletMeta == TabletInvertedIndex.NOT_EXIST_TABLET_META) { - continue; - } - long tabletId = tabletIds.get(i); - long tableId = tabletMeta.getTableId(); - OlapTable olapTable = (OlapTable) db.getTable(tableId); - if (olapTable == null) { - continue; - } + BackendTabletsInfo backendTabletsInfo = new BackendTabletsInfo(backendId); + backendTabletsInfo.setBad(true); + for (Long dbId : tabletRecoveryMap.keySet()) { + Database db = Catalog.getCurrentCatalog().getDb(dbId); + if (db == null) { + continue; + } + db.writeLock(); + try { + List tabletIds = tabletRecoveryMap.get(dbId); + List tabletMetaList = invertedIndex.getTabletMetaList(tabletIds); + for (int i = 0; i < tabletMetaList.size(); i++) { + TabletMeta tabletMeta = tabletMetaList.get(i); + if (tabletMeta == TabletInvertedIndex.NOT_EXIST_TABLET_META) { + continue; + } + long tabletId = tabletIds.get(i); + long tableId = tabletMeta.getTableId(); + OlapTable olapTable = (OlapTable) db.getTable(tableId); + if (olapTable == null) { + continue; + } - long partitionId = tabletMeta.getPartitionId(); - Partition partition = olapTable.getPartition(partitionId); - if (partition == null) { - continue; - } + long partitionId = tabletMeta.getPartitionId(); + Partition partition = olapTable.getPartition(partitionId); + if (partition == null) { + continue; + } - long indexId = tabletMeta.getIndexId(); - MaterializedIndex index = partition.getIndex(indexId); - if (index == null) { - continue; - } + long indexId = tabletMeta.getIndexId(); + MaterializedIndex index = partition.getIndex(indexId); + if (index == null) { + continue; + } - int schemaHash = olapTable.getSchemaHashByIndexId(indexId); + int schemaHash = olapTable.getSchemaHashByIndexId(indexId); - Tablet tablet = index.getTablet(tabletId); - if (tablet == null) { - continue; - } + Tablet tablet = index.getTablet(tabletId); + if (tablet == null) { + continue; + } - Replica replica = tablet.getReplicaByBackendId(backendId); - if (replica == null) { - continue; - } + Replica replica = tablet.getReplicaByBackendId(backendId); + if (replica == null) { + continue; + } - for (TTabletInfo tTabletInfo : backendTablets.get(tabletId).getTablet_infos()) { - if (tTabletInfo.getSchema_hash() == schemaHash) { - if (tTabletInfo.isSetUsed() && !tTabletInfo.isUsed()) { - if (replica.setBad(true)) { - LOG.warn("set bad for replica {} of tablet {} on backend {}", - replica.getId(), tabletId, backendId); - backendTabletsInfo.addTabletWithSchemaHash(tabletId, schemaHash); - } - break; + for (TTabletInfo tTabletInfo : backendTablets.get(tabletId).getTablet_infos()) { + if (tTabletInfo.getSchema_hash() == schemaHash) { + if (tTabletInfo.isSetUsed() && !tTabletInfo.isUsed()) { + if (replica.setBad(true)) { + LOG.warn("set bad for replica {} of tablet {} on backend {}", + replica.getId(), tabletId, backendId); + backendTabletsInfo.addTabletWithSchemaHash(tabletId, schemaHash); } + break; + } - if (replica.getVersion() > tTabletInfo.getVersion()) { - LOG.warn("recover for replica {} of tablet {} on backend {}", - replica.getId(), tabletId, backendId); - if (replica.getVersion() == tTabletInfo.getVersion() + 1) { - // this missing version is the last version of this replica - replica.updateVersionInfoForRecovery( - tTabletInfo.getVersion(), /* set version to BE report version */ - -1, /* BE report version hash is meaningless here */ - replica.getVersion(), /* set LFV to current FE version */ - replica.getVersionHash(), /* set LFV hash to current FE version hash */ - tTabletInfo.getVersion(), /* set LSV to BE report version */ - -1 /* LSV hash is unknown */); - } else { - // this missing version is a hole - replica.updateVersionInfoForRecovery( - tTabletInfo.getVersion(), /* set version to BE report version */ - -1, /* BE report version hash is meaningless here */ - tTabletInfo.getVersion() + 1, /* LFV */ - -1, /* LFV hash is unknown */ - /* remain LSV unchanged, which should be equal to replica.version */ - replica.getLastSuccessVersion(), - replica.getLastSuccessVersionHash()); - } - // no need to write edit log, if FE crashed, this will be recovered again - break; + if (replica.getVersion() > tTabletInfo.getVersion()) { + LOG.warn("recover for replica {} of tablet {} on backend {}", + replica.getId(), tabletId, backendId); + if (replica.getVersion() == tTabletInfo.getVersion() + 1) { + // this missing version is the last version of this replica + replica.updateVersionInfoForRecovery( + tTabletInfo.getVersion(), /* set version to BE report version */ + -1, /* BE report version hash is meaningless here */ + replica.getVersion(), /* set LFV to current FE version */ + replica.getVersionHash(), /* set LFV hash to current FE version hash */ + tTabletInfo.getVersion(), /* set LSV to BE report version */ + -1 /* LSV hash is unknown */); + } else { + // this missing version is a hole + replica.updateVersionInfoForRecovery( + tTabletInfo.getVersion(), /* set version to BE report version */ + -1, /* BE report version hash is meaningless here */ + tTabletInfo.getVersion() + 1, /* LFV */ + -1, /* LFV hash is unknown */ + /* remain LSV unchanged, which should be equal to replica.version */ + replica.getLastSuccessVersion(), + replica.getLastSuccessVersionHash()); } + // no need to write edit log, if FE crashed, this will be recovered again + break; } } } - } finally { - db.writeUnlock(); } - } // end for recovery map - - if (!backendTabletsInfo.isEmpty()) { - // need to write edit log the sync the bad info to other FEs - Catalog.getCurrentCatalog().getEditLog().logBackendTabletsInfo(backendTabletsInfo); - } - } else { - LOG.warn("force recovery is enable. use recovery tablet task to recover"); - AgentBatchTask batchTask = new AgentBatchTask(); - for (long tabletId : tabletRecoveryMap.values()) { - Replica replica = invertedIndex.getReplica(tabletId, backendId); - - RecoverTabletTask recoverTask = new RecoverTabletTask(backendId, - tabletId, replica.getVersion(), replica.getVersionHash(), - backendTablets.get(tabletId).getTablet_infos().get(0).getSchema_hash()); - LOG.warn("recover replica {} of tablet {} on backend {}, schema hash: {}" - + ", version: {}-{}", - replica.getId(), tabletId, backendId, - backendTablets.get(tabletId).getTablet_infos().get(0).getSchema_hash(), - replica.getVersion(), replica.getVersionHash()); - - batchTask.addTask(recoverTask); - AgentTaskQueue.addTask(recoverTask); + } finally { + db.writeUnlock(); } + } // end for recovery map - AgentTaskExecutor.submit(batchTask); + if (!backendTabletsInfo.isEmpty()) { + // need to write edit log the sync the bad info to other FEs + Catalog.getCurrentCatalog().getEditLog().logBackendTabletsInfo(backendTabletsInfo); } } - private static void handleSetTabletPartitionId(long backendId, Set>tabletWithoutPartitionId) { + private static void handleSetTabletPartitionId(long backendId, Set> tabletWithoutPartitionId) { LOG.info("find [{}] tablets without partition id, try to set them", tabletWithoutPartitionId.size()); if (tabletWithoutPartitionId.size() < 1) { return; @@ -958,15 +927,15 @@ private static void handleSetTabletInMemory(long backendId, Map b AgentTaskExecutor.submit(batchTask); } } - + private static void handleClearTransactions(ListMultimap transactionsToClear, long backendId) { AgentBatchTask batchTask = new AgentBatchTask(); for (Long transactionId : transactionsToClear.keySet()) { - ClearTransactionTask clearTransactionTask = new ClearTransactionTask(backendId, + ClearTransactionTask clearTransactionTask = new ClearTransactionTask(backendId, transactionId, transactionsToClear.get(transactionId)); batchTask.addTask(clearTransactionTask); } - + AgentTaskExecutor.submit(batchTask); } @@ -980,7 +949,7 @@ private static void addReplica(long tabletId, TTabletInfo backendTabletInfo, lon long tableId = tabletMeta != null ? tabletMeta.getTableId() : TabletInvertedIndex.NOT_EXIST_VALUE; long partitionId = tabletMeta != null ? tabletMeta.getPartitionId() : TabletInvertedIndex.NOT_EXIST_VALUE; long indexId = tabletMeta != null ? tabletMeta.getIndexId() : TabletInvertedIndex.NOT_EXIST_VALUE; - + int schemaHash = backendTabletInfo.getSchema_hash(); long version = backendTabletInfo.getVersion(); long versionHash = backendTabletInfo.getVersion_hash(); @@ -1022,7 +991,7 @@ private static void addReplica(long tabletId, TTabletInfo backendTabletInfo, lon throw new MetaNotFoundException("version is invalid. tablet[" + version + "-" + versionHash + "]" + ", visible[" + visibleVersion + "-" + visibleVersionHash + "]"); } - + // check schema hash if (schemaHash != olapTable.getSchemaHashByIndexId(indexId)) { throw new MetaNotFoundException("schema hash is diff[" + schemaHash + "-" @@ -1039,16 +1008,16 @@ private static void addReplica(long tabletId, TTabletInfo backendTabletInfo, lon Pair status = tablet.getHealthStatusWithPriority(infoService, db.getClusterName(), visibleVersion, visibleVersionHash, replicationNum, aliveBeIdsInCluster); - + if (status.first == TabletStatus.VERSION_INCOMPLETE || status.first == TabletStatus.REPLICA_MISSING) { long lastFailedVersion = -1L; long lastFailedVersionHash = 0L; boolean initPartitionCreateByOldVersionDoris = partition.getVisibleVersion() == Partition.PARTITION_INIT_VERSION && - partition.getVisibleVersionHash() == Partition.PARTITION_INIT_VERSION_HASH && - version == 2 && - versionHash == 0; + partition.getVisibleVersionHash() == Partition.PARTITION_INIT_VERSION_HASH && + version == 2 && + versionHash == 0; if (initPartitionCreateByOldVersionDoris) { // For some partition created by old version's Doris @@ -1066,10 +1035,10 @@ private static void addReplica(long tabletId, TTabletInfo backendTabletInfo, lon long replicaId = Catalog.getCurrentCatalog().getNextId(); Replica replica = new Replica(replicaId, backendId, version, versionHash, schemaHash, - dataSize, rowCount, ReplicaState.NORMAL, - lastFailedVersion, lastFailedVersionHash, version, versionHash); + dataSize, rowCount, ReplicaState.NORMAL, + lastFailedVersion, lastFailedVersionHash, version, versionHash); tablet.addReplica(replica); - + // write edit log ReplicaPersistInfo info = ReplicaPersistInfo.createForAdd(dbId, tableId, partitionId, indexId, tabletId, backendId, replicaId, diff --git a/fe/fe-core/src/main/java/org/apache/doris/task/CreateReplicaTask.java b/fe/fe-core/src/main/java/org/apache/doris/task/CreateReplicaTask.java index 3303ca752f81ab..911c897da5627f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/task/CreateReplicaTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/task/CreateReplicaTask.java @@ -27,12 +27,12 @@ import org.apache.doris.thrift.TCreateTabletReq; import org.apache.doris.thrift.TOlapTableIndex; import org.apache.doris.thrift.TStatusCode; +import org.apache.doris.thrift.TStorageFormat; import org.apache.doris.thrift.TStorageMedium; import org.apache.doris.thrift.TStorageType; import org.apache.doris.thrift.TTabletSchema; import org.apache.doris.thrift.TTabletType; import org.apache.doris.thrift.TTaskType; -import org.apache.doris.thrift.TStorageFormat; import org.apache.commons.collections.CollectionUtils; import org.apache.logging.log4j.LogManager; @@ -79,6 +79,9 @@ public class CreateReplicaTask extends AgentTask { private TStorageFormat storageFormat = null; + // true if this task is created by recover request(See comment of Config.recover_with_empty_tablet) + private boolean isRecoverTask = false; + public CreateReplicaTask(long backendId, long dbId, long tableId, long partitionId, long indexId, long tabletId, short shortKeyColumnCount, int schemaHash, long version, long versionHash, KeysType keysType, TStorageType storageType, @@ -110,7 +113,15 @@ public CreateReplicaTask(long backendId, long dbId, long tableId, long partition this.isInMemory = isInMemory; this.tabletType = tabletType; } - + + public void setIsRecoverTask(boolean isRecoverTask) { + this.isRecoverTask = isRecoverTask; + } + + public boolean isRecoverTask() { + return isRecoverTask; + } + public void countDownLatch(long backendId, long tabletId) { if (this.latch != null) { if (latch.markedCountDown(backendId, tabletId)) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/task/RecoverTabletTask.java b/fe/fe-core/src/main/java/org/apache/doris/task/RecoverTabletTask.java deleted file mode 100644 index eb2e12aebeafd8..00000000000000 --- a/fe/fe-core/src/main/java/org/apache/doris/task/RecoverTabletTask.java +++ /dev/null @@ -1,44 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.task; - -import org.apache.doris.thrift.TRecoverTabletReq; -import org.apache.doris.thrift.TTaskType; - -public class RecoverTabletTask extends AgentTask { - - private long version; - private long versionHash; - private int schemaHash; - - public RecoverTabletTask(long backendId, long tabletId, long version, long versionHash, int schemaHash) { - super(null, backendId, TTaskType.RECOVER_TABLET, -1L, -1L, -1L, -1L, tabletId, tabletId); - this.version = version; - this.versionHash = versionHash; - this.schemaHash = schemaHash; - } - - public TRecoverTabletReq toThrift() { - TRecoverTabletReq recoverTabletReq = new TRecoverTabletReq(); - recoverTabletReq.setTablet_id(tabletId); - recoverTabletReq.setVersion(version); - recoverTabletReq.setVersion_hash(versionHash); - recoverTabletReq.setSchema_hash(schemaHash); - return recoverTabletReq; - } -} \ No newline at end of file diff --git a/gensrc/thrift/Types.thrift b/gensrc/thrift/Types.thrift index abdd52d7f448f0..837616475e2658 100644 --- a/gensrc/thrift/Types.thrift +++ b/gensrc/thrift/Types.thrift @@ -163,7 +163,7 @@ enum TTaskType { PUBLISH_VERSION, CLEAR_ALTER_TASK, CLEAR_TRANSACTION_TASK, - RECOVER_TABLET, + RECOVER_TABLET, // deprecated STREAM_LOAD, UPDATE_TABLET_META_INFO, // this type of task will replace both ROLLUP and SCHEMA_CHANGE From d1b2aade19ac930e428d3f0cd53a63ffc11f965f Mon Sep 17 00:00:00 2001 From: morningman Date: Wed, 5 Aug 2020 10:52:45 +0800 Subject: [PATCH 2/4] add doc --- docs/.vuepress/sidebar/en.js | 1 + .../operation/tablet-restore-tool.md | 110 ++++++++++++------ 2 files changed, 77 insertions(+), 34 deletions(-) diff --git a/docs/.vuepress/sidebar/en.js b/docs/.vuepress/sidebar/en.js index e24b6075d0c495..a9421dd22abb23 100644 --- a/docs/.vuepress/sidebar/en.js +++ b/docs/.vuepress/sidebar/en.js @@ -95,6 +95,7 @@ module.exports = [ "multi-tenant", "tablet-meta-tool", "tablet-repair-and-balance", + "tablet-restore-tool", { title: "Metrics", directoryPath: "monitor-metrics/", diff --git a/docs/zh-CN/administrator-guide/operation/tablet-restore-tool.md b/docs/zh-CN/administrator-guide/operation/tablet-restore-tool.md index 784366361dd09b..da7270a95b3d0c 100644 --- a/docs/zh-CN/administrator-guide/operation/tablet-restore-tool.md +++ b/docs/zh-CN/administrator-guide/operation/tablet-restore-tool.md @@ -1,6 +1,6 @@ --- { - "title": "BE Tablet数据恢复工具", + "title": "Tablet 恢复工具", "language": "zh-CN" } --- @@ -24,9 +24,9 @@ specific language governing permissions and limitations under the License. --> -# BE Tablet数据恢复工具 +# Tablet 恢复工具 -## 背景 +## 从 BE 回收站中恢复数据 用户在使用Doris的过程中,可能会发生因为一些误操作或者线上bug,导致一些有效的tablet被删除(包括元数据和数据)。为了防止在这些异常情况出现数据丢失,Doris提供了回收站机制,来保护用户数据。用户删除的tablet数据不会被直接删除,会被放在回收站中存储一段时间,在一段时间之后会有定时清理机制将过期的数据删除。回收站中的数据包括:tablet的data文件(.dat),tablet的索引文件(.idx)和tablet的元数据文件(.hdr)。数据将会存放在如下格式的路径: @@ -45,56 +45,98 @@ BE提供http接口和 `restore_tablet_tool.sh` 脚本实现这个功能,支持 * 在single mode下,支持单个tablet的数据恢复。 * 在batch mode下,支持批量tablet的数据恢复。 -## 操作 +### 操作 + +#### single mode + +1. http请求方式 + + BE中提供单个tablet数据恢复的http接口,接口如下: + + ``` + curl -X POST "http://be_host:be_webserver_port/api/restore_tablet?tablet_id=11111\&schema_hash=12345" + ``` + + 成功的结果如下: + + ``` + {"status": "Success", "msg": "OK"} + ``` + + 失败的话,会返回相应的失败原因,一种可能的结果如下: + + ``` + {"status": "Failed", "msg": "create link path failed"} + ``` + +2. 脚本方式 + + `restore_tablet_tool.sh` 可用来实现单tablet数据恢复的功能。 + + ``` + sh tools/restore_tablet_tool.sh -b "http://127.0.0.1:8040" -t 12345 -s 11111 + sh tools/restore_tablet_tool.sh --backend "http://127.0.0.1:8040" --tablet_id 12345 --schema_hash 11111 + ``` + +#### batch mode -### single mode +批量恢复模式用于实现恢复多个tablet数据的功能。 -#### http请求方式 +使用的时候需要预先将恢复的tablet id和schema hash按照逗号分隔的格式放在一个文件中,一个tablet一行。 -BE中提供单个tablet数据恢复的http接口,接口如下: +格式如下: ``` -curl -X POST "http://be_host:be_webserver_port/api/restore_tablet?tablet_id=11111\&schema_hash=12345" +12345,11111 +12346,11111 +12347,11111 ``` -成功的结果如下: +然后如下的命令进行恢复(假设文件名为:`tablets.txt`): ``` -{"status": "Success", "msg": "OK"} +sh restore_tablet_tool.sh -b "http://127.0.0.1:8040" -f tablets.txt +sh restore_tablet_tool.sh --backend "http://127.0.0.1:8040" --file tablets.txt ``` -失败的话,会返回相应的失败原因,一种可能的结果如下: +## 修复缺失或损坏的 Tablet -``` -{"status": "Failed", "msg": "create link path failed"} -``` +在某些极特殊情况下,如代码BUG、或人为误操作等,可能导致部分分片的全部副本都丢失。这种情况下,数据已经实质性的丢失。但是在某些场景下,业务依然希望能够在即使有数据丢失的情况下,保证查询正常不报错,降低用户层的感知程度。此时,我们可以通过使用空白Tablet填充丢失副本的功能,来保证查询能够正常执行。 -#### 脚本方式 +**注:该操作仅用于规避查询因无法找到可查询副本导致报错的问题,无法恢复已经实质性丢失的数据** -`restore_tablet_tool.sh` 可用来实现单tablet数据恢复的功能。 +1. 查看 Master FE 日志 `fe.log` -``` -sh tools/restore_tablet_tool.sh -b "http://127.0.0.1:8040" -t 12345 -s 11111 -sh tools/restore_tablet_tool.sh --backend "http://127.0.0.1:8040" --tablet_id 12345 --schema_hash 11111 -``` + 如果出现数据丢失的情况,则日志中会有类似如下日志: + + ``` + backend [10001] invalid situation. tablet[20000] has few replica[1], replica num setting is [3] + ``` -### batch mode + 这个日志表示,Tablet 20000 的所有副本已损坏或丢失。 + +2. 使用空白副本填补缺失副本 -批量恢复模式用于实现恢复多个tablet数据的功能。 + 当确认数据已经无法恢复后,可以通过执行以下命令,生成空白副本。 + + ``` + ADMIN SET FRONTEND CONFIG ("recover_with_empty_tablet" = "true"); + ``` -使用的时候需要预先将恢复的tablet id和schema hash按照逗号分隔的格式放在一个文件中,一个tablet一行。 + * 注:可以先通过 `AMDIN SHOW FRONTEND CONFIG;` 命令查看当前版本是否支持该参数。 -格式如下: +3. 设置完成几分钟后,应该会在 Master FE 日志 `fe.log` 中看到如下日志: -``` -12345,11111 -12346,11111 -12347,11111 -``` + ``` + tablet 20000 has only one replica 20001 on backend 10001 and it is lost. create an empty replica to recover it. + ``` -然后如下的命令进行恢复(假设文件名为:`tablets.txt`): + 该日志表示系统已经创建了一个空白 Tablet 用于填补缺失副本。 + +4. 通过查询来判断是否已经修复成功。 -``` -sh restore_tablet_tool.sh -b "http://127.0.0.1:8040" -f tablets.txt -sh restore_tablet_tool.sh --backend "http://127.0.0.1:8040" --file tablets.txt -``` +5. 全部修复成功后,通过以下命令关闭 `recover_with_empty_tablet` 参数: + + ``` + ADMIN SET FRONTEND CONFIG ("recover_with_empty_tablet" = "false"); + ``` From a9387996f1b2ec301be72c3625a063be1d6d4e79 Mon Sep 17 00:00:00 2001 From: morningman Date: Wed, 5 Aug 2020 11:23:38 +0800 Subject: [PATCH 3/4] add doc2 --- .../administrator-guide/config/fe_config.md | 8 ++ .../operation/tablet-restore-tool.md | 136 ++++++++++++++++++ .../administrator-guide/config/fe_config.md | 12 ++ 3 files changed, 156 insertions(+) create mode 100644 docs/en/administrator-guide/operation/tablet-restore-tool.md diff --git a/docs/en/administrator-guide/config/fe_config.md b/docs/en/administrator-guide/config/fe_config.md index 0b3fe4a972c15a..9bcb9c0d3e5ae0 100644 --- a/docs/en/administrator-guide/config/fe_config.md +++ b/docs/en/administrator-guide/config/fe_config.md @@ -674,3 +674,11 @@ The time interval of the latest partitioned version of the table refers to the t ### `cache_result_max_row_count` In order to avoid occupying too much memory, the maximum number of rows that can be cached is 2000 by default. If this threshold is exceeded, the cache cannot be set. + +### `recover_with_empty_tablet` + +In some very special circumstances, such as code bugs, or human misoperation, etc., all replicas of some tablets may be lost. In this case, the data has been substantially lost. However, in some scenarios, the business still hopes to ensure that the query will not report errors even if there is data loss, and reduce the perception of the user layer. At this point, we can use the blank Tablet to fill the missing replica to ensure that the query can be executed normally. + +Set to true so that Doris will automatically use blank replicas to fill tablets which all replicas have been damaged or missing. + +Default is false. diff --git a/docs/en/administrator-guide/operation/tablet-restore-tool.md b/docs/en/administrator-guide/operation/tablet-restore-tool.md new file mode 100644 index 00000000000000..2012c73f4d8ec0 --- /dev/null +++ b/docs/en/administrator-guide/operation/tablet-restore-tool.md @@ -0,0 +1,136 @@ +--- +{ + "title": "Tablet Restore Tool", + "language": "en" +} +--- + + + +# Tablet Restore Tool + +## Restore data from BE Recycle Bin + +During the user's use of Doris, some valid tablets (including metadata and data) may be deleted due to some misoperations or online bugs. In order to prevent data loss in these abnormal situations, Doris provides a recycle bin mechanism to protect user data. Tablet data deleted by users will not be deleted directly, but will be stored in the recycle bin for a period of time. After a period of time, there will be a regular cleaning mechanism to delete expired data. The data in the recycle bin includes: tablet data file (.dat), tablet index file (.idx) and tablet metadata file (.hdr). The data will be stored in a path in the following format: + +``` +/root_path/trash/time_label/tablet_id/schema_hash/ +``` + +* `root_path`: a data root directory corresponding to the BE node. +* `trash`: The directory of the recycle bin. +* `time_label`: Time label, for the uniqueness of the data directory in the recycle bin, while recording the data time, use the time label as a subdirectory. + +When a user finds that online data has been deleted by mistake, he needs to recover the deleted tablet from the recycle bin. This tablet data recovery function is needed. + +BE provides http interface and `restore_tablet_tool.sh` script to achieve this function, and supports single tablet operation (single mode) and batch operation mode (batch mode). + +* In single mode, data recovery of a single tablet is supported. +* In batch mode, support batch tablet data recovery. + +### Operation + +#### single mode + +1. http request method + + BE provides an http interface for single tablet data recovery, the interface is as follows: + + ``` + curl -X POST "http://be_host:be_webserver_port/api/restore_tablet?tablet_id=11111\&schema_hash=12345" + ``` + + The successful results are as follows: + + ``` + {"status": "Success", "msg": "OK"} + ``` + + If it fails, the corresponding failure reason will be returned. One possible result is as follows: + + ``` + {"status": "Failed", "msg": "create link path failed"} + ``` + +2. Script mode + + `restore_tablet_tool.sh` can be used to realize the function of single tablet data recovery. + + ``` + sh tools/restore_tablet_tool.sh -b "http://127.0.0.1:8040" -t 12345 -s 11111 + sh tools/restore_tablet_tool.sh --backend "http://127.0.0.1:8040" --tablet_id 12345 --schema_hash 11111 + ``` + +#### batch mode + +The batch recovery mode is used to realize the function of recovering multiple tablet data. + +When using, you need to put the restored tablet id and schema hash in a file in a comma-separated format in advance, one tablet per line. + +The format is as follows: + +``` +12345,11111 +12346,11111 +12347,11111 +``` + +Then perform the recovery with the following command (assuming the file name is: `tablets.txt`): + +``` +sh restore_tablet_tool.sh -b "http://127.0.0.1:8040" -f tablets.txt +sh restore_tablet_tool.sh --backend "http://127.0.0.1:8040" --file tablets.txt +``` + +## Repair missing or damaged Tablet + +In some very special circumstances, such as code bugs, or human misoperation, etc., all replicas of some tablets may be lost. In this case, the data has been substantially lost. However, in some scenarios, the business still hopes to ensure that the query will not report errors even if there is data loss, and reduce the perception of the user layer. At this point, we can use the blank Tablet to fill the missing replica to ensure that the query can be executed normally. + +**Note: This operation is only used to avoid the problem of error reporting due to the inability to find a queryable replica, and it is impossible to recover the data that has been substantially lost.** + +1. View Master FE log `fe.log` + + If there is data loss, there will be a log similar to the following in the log: + + ``` + backend [10001] invalid situation. tablet[20000] has few replica[1], replica num setting is [3] + ``` + + This log indicates that all replicas of tablet 20000 have been damaged or lost. + +2. Use blank replicas to fill in missing copies + + After confirming that the data cannot be recovered, you can execute the following command to generate blank replicas. + + ``` + ADMIN SET FRONTEND CONFIG ("recover_with_empty_tablet" = "true"); + ``` + + * Note: You can first check whether the current version supports this parameter through the `AMDIN SHOW FRONTEND CONFIG;` command. + +3. A few minutes after the setup is complete, you should see the following log in the Master FE log `fe.log`: + + ``` + tablet 20000 has only one replica 20001 on backend 10001 and it is lost. create an empty replica to recover it. + ``` + + The log indicates that the system has created a blank tablet to fill in the missing replica. + +4. Judge whether it has been repaired successfully through query. \ No newline at end of file diff --git a/docs/zh-CN/administrator-guide/config/fe_config.md b/docs/zh-CN/administrator-guide/config/fe_config.md index e172a688001a32..3f8ef1fc94d6d1 100644 --- a/docs/zh-CN/administrator-guide/config/fe_config.md +++ b/docs/zh-CN/administrator-guide/config/fe_config.md @@ -670,3 +670,15 @@ thrift_client_timeout_ms 的值被设置为大于0来避免线程卡在java.net. ### `cache_result_max_row_count` 为了避免过多占用内存,能够被缓存最大的行数,默认2000,超过这个阈值将不能缓存置。 + +### `recover_with_empty_tablet` + +在某些极特殊情况下,如代码BUG、或人为误操作等,可能导致部分分片的全部副本都丢失。这种情况下,数据已经实质性的丢失。但是在某些场景下,业务依然希望能够在即使有数据丢失的情况下,保证查询正常不报错,降低用户层的感知程度。此时,我们可以通过使用空白Tablet填充丢失副本的功能,来保证查询能够正常执行。 + +将此参数设置为 true,则 Doris 会自动使用空白副本填充所有副本都以损坏或丢失的 Tablet。 + +默认为 false。 + + + + From 3c21dde243a6bff0d00eecc992ab80e1c04a7d74 Mon Sep 17 00:00:00 2001 From: morningman Date: Thu, 6 Aug 2020 20:09:05 +0800 Subject: [PATCH 4/4] fix set bad bug --- .../org/apache/doris/catalog/Catalog.java | 59 ++++++++++++++---- .../org/apache/doris/load/TabletLoadInfo.java | 1 + .../apache/doris/master/ReportHandler.java | 23 +++---- .../doris/persist/BackendTabletsInfo.java | 30 +++++++++- .../doris/persist/ReplicaPersistInfo.java | 60 +++++++++++-------- .../org/apache/doris/task/AgentBatchTask.java | 10 ---- 6 files changed, 124 insertions(+), 59 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Catalog.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Catalog.java index d269ce10299177..3b498bd5da7902 100755 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Catalog.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Catalog.java @@ -210,6 +210,8 @@ import org.apache.doris.thrift.TTaskType; import org.apache.doris.transaction.GlobalTransactionMgr; import org.apache.doris.transaction.PublishVersionDaemon; +import org.apache.doris.transaction.UpdateDbUsedDataQuotaDaemon; + import com.google.common.base.Joiner; import com.google.common.base.Preconditions; import com.google.common.base.Strings; @@ -225,7 +227,6 @@ import com.sleepycat.je.rep.NetworkRestoreConfig; import org.apache.commons.collections.CollectionUtils; -import org.apache.doris.transaction.UpdateDbUsedDataQuotaDaemon; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.codehaus.jackson.map.ObjectMapper; @@ -6470,20 +6471,56 @@ public void setConfig(AdminSetConfigStmt stmt) throws DdlException { public void replayBackendTabletsInfo(BackendTabletsInfo backendTabletsInfo) { List> tabletsWithSchemaHash = backendTabletsInfo.getTabletSchemaHash(); - for (Pair tabletInfo : tabletsWithSchemaHash) { - Replica replica = tabletInvertedIndex.getReplica(tabletInfo.first, - backendTabletsInfo.getBackendId()); - if (replica == null) { - LOG.warn("replica does not found when replay. tablet {}, backend {}", - tabletInfo.first, backendTabletsInfo.getBackendId()); - continue; + if (!tabletsWithSchemaHash.isEmpty()) { + // In previous version, we save replica info in `tabletsWithSchemaHash`, + // but it is wrong because we can not get replica from `tabletInvertedIndex` when doing checkpoint, + // because when doing checkpoint, the tabletInvertedIndex is not initialized at all. + // + // So we can only discard this information, in this case, it is equivalent to losing the record of these operations. + // But it doesn't matter, these records are currently only used to record whether a replica is in a bad state. + // This state has little effect on the system, and it can be restored after the system has processed the bad state replica. + for (Pair tabletInfo : tabletsWithSchemaHash) { + LOG.warn("find an old backendTabletsInfo for tablet {}, ignore it", tabletInfo.first); } + return; + } - if (replica.getSchemaHash() != tabletInfo.second) { + // in new version, replica info is saved here. + // but we need to get replica from db->tbl->partition->... + List replicaPersistInfos = backendTabletsInfo.getReplicaPersistInfos(); + for (ReplicaPersistInfo info : replicaPersistInfos) { + long dbId = info.getDbId(); + Database db = getDb(dbId); + if (db == null) { continue; } - - replica.setBad(backendTabletsInfo.isBad()); + db.writeLock(); + try { + OlapTable tbl = (OlapTable) db.getTable(info.getTableId()); + if (tbl == null) { + continue; + } + Partition partition = tbl.getPartition(info.getPartitionId()); + if (partition == null) { + continue; + } + MaterializedIndex mindex = partition.getIndex(info.getIndexId()); + if (mindex == null) { + continue; + } + Tablet tablet = mindex.getTablet(info.getTabletId()); + if (tablet == null) { + continue; + } + Replica replica = tablet.getReplicaById(info.getReplicaId()); + if (replica != null) { + replica.setBad(true); + LOG.debug("get replica {} of tablet {} on backend {} to bad when replaying", + info.getReplicaId(), info.getTabletId(), info.getBackendId()); + } + } finally { + db.writeUnlock(); + } } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/load/TabletLoadInfo.java b/fe/fe-core/src/main/java/org/apache/doris/load/TabletLoadInfo.java index c795a812ad3680..66a1a8a70594eb 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/load/TabletLoadInfo.java +++ b/fe/fe-core/src/main/java/org/apache/doris/load/TabletLoadInfo.java @@ -69,6 +69,7 @@ public void write(DataOutput out) throws IOException { out.writeLong(fileSize); } } + public void readFields(DataInput in) throws IOException { if (in.readBoolean()) { filePath = Text.readString(in).intern(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/master/ReportHandler.java b/fe/fe-core/src/main/java/org/apache/doris/master/ReportHandler.java index 30624e46812aaf..fdbf3ceff4ed3e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/master/ReportHandler.java +++ b/fe/fe-core/src/main/java/org/apache/doris/master/ReportHandler.java @@ -71,13 +71,6 @@ import org.apache.doris.thrift.TTabletMetaType; import org.apache.doris.thrift.TTaskType; -import org.apache.commons.lang.StringUtils; -import org.apache.commons.lang3.tuple.ImmutableTriple; -import org.apache.commons.lang3.tuple.Triple; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; -import org.apache.thrift.TException; - import com.google.common.collect.LinkedListMultimap; import com.google.common.collect.ListMultimap; import com.google.common.collect.Lists; @@ -85,6 +78,13 @@ import com.google.common.collect.Queues; import com.google.common.collect.Sets; +import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.tuple.ImmutableTriple; +import org.apache.commons.lang3.tuple.Triple; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.thrift.TException; + import java.util.HashMap; import java.util.HashSet; import java.util.List; @@ -604,8 +604,9 @@ private static void deleteFromMeta(ListMultimap tabletDeleteFromMeta tabletId, replica.getId(), backendId); BackendTabletsInfo tabletsInfo = new BackendTabletsInfo(backendId); tabletsInfo.setBad(true); - tabletsInfo.addTabletWithSchemaHash(tabletId, - olapTable.getSchemaHashByIndexId(indexId)); + ReplicaPersistInfo replicaPersistInfo = ReplicaPersistInfo.createForReport( + dbId, tableId, partitionId, indexId, tabletId, backendId, replica.getId()); + tabletsInfo.addReplicaInfo(replicaPersistInfo); Catalog.getCurrentCatalog().getEditLog().logBackendTabletsInfo(tabletsInfo); } } @@ -820,7 +821,9 @@ private static void handleRecoverTablet(ListMultimap tabletRecoveryM if (replica.setBad(true)) { LOG.warn("set bad for replica {} of tablet {} on backend {}", replica.getId(), tabletId, backendId); - backendTabletsInfo.addTabletWithSchemaHash(tabletId, schemaHash); + ReplicaPersistInfo replicaPersistInfo = ReplicaPersistInfo.createForReport( + dbId, tableId, partitionId, indexId, tabletId, backendId, replica.getId()); + backendTabletsInfo.addReplicaInfo(replicaPersistInfo); } break; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/persist/BackendTabletsInfo.java b/fe/fe-core/src/main/java/org/apache/doris/persist/BackendTabletsInfo.java index 2cf9b7f771fd95..5182436e8f4c19 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/persist/BackendTabletsInfo.java +++ b/fe/fe-core/src/main/java/org/apache/doris/persist/BackendTabletsInfo.java @@ -31,10 +31,14 @@ public class BackendTabletsInfo implements Writable { private long backendId; // tablet id , schema hash + // this structure is deprecated and be replaced by 'replicaPersistInfos' + @Deprecated private List> tabletSchemaHash = Lists.newArrayList(); private boolean bad; + private List replicaPersistInfos = Lists.newArrayList(); + private BackendTabletsInfo() { } @@ -43,8 +47,12 @@ public BackendTabletsInfo(long backendId) { this.backendId = backendId; } - public void addTabletWithSchemaHash(long tabletId, int schemaHash) { - tabletSchemaHash.add(Pair.create(tabletId, schemaHash)); + public void addReplicaInfo(ReplicaPersistInfo info) { + replicaPersistInfos.add(info); + } + + public List getReplicaPersistInfos() { + return replicaPersistInfos; } public long getBackendId() { @@ -64,7 +72,7 @@ public boolean isBad() { } public boolean isEmpty() { - return tabletSchemaHash.isEmpty(); + return tabletSchemaHash.isEmpty() && replicaPersistInfos.isEmpty(); } public static BackendTabletsInfo read(DataInput in) throws IOException { @@ -84,6 +92,12 @@ public void write(DataOutput out) throws IOException { out.writeBoolean(bad); + // this is for further extension + out.writeBoolean(true); + out.writeInt(replicaPersistInfos.size()); + for (ReplicaPersistInfo info : replicaPersistInfos) { + info.write(out); + } // this is for further extension out.writeBoolean(false); } @@ -100,6 +114,16 @@ public void readFields(DataInput in) throws IOException { bad = in.readBoolean(); + if (in.readBoolean()) { + size = in.readInt(); + for (int i = 0; i < size; i++) { + ReplicaPersistInfo replicaPersistInfo = ReplicaPersistInfo.read(in); + replicaPersistInfos.add(replicaPersistInfo); + } + } else { + replicaPersistInfos = Lists.newArrayList(); + } + if (in.readBoolean()) { } diff --git a/fe/fe-core/src/main/java/org/apache/doris/persist/ReplicaPersistInfo.java b/fe/fe-core/src/main/java/org/apache/doris/persist/ReplicaPersistInfo.java index a3594f100c65a5..ed9935afb99a43 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/persist/ReplicaPersistInfo.java +++ b/fe/fe-core/src/main/java/org/apache/doris/persist/ReplicaPersistInfo.java @@ -40,7 +40,8 @@ public enum ReplicaOperationType { // the old image and old persist log does not have op field, so the op field is null when upgrate to fe meta 45 // then fe will dump image and want to write op type to image, op type is null and then throw null pointer exception // add the default op, when read from image and op type == null ,set op type to default op to skip the exception - DEFAULT_OP(8); + DEFAULT_OP(8), + TABLET_INFO(9); private final int value; @@ -51,30 +52,32 @@ private ReplicaOperationType(int value) { public int getValue() { return value; } - - public static ReplicaOperationType findByValue(int value) { - switch (value) { - case 0: - return ADD; - case 1: - return CROND_DELETE; - case 2: - return DELETE; - case 3: - return CLONE; - case 4: - return LOAD; - case 5: - return ROLLUP; - case 6: - return SCHEMA_CHANGE; - case 7: - return CLEAR_ROLLUPINFO; - case 8: - return DEFAULT_OP; - default: - return null; - } + + public static ReplicaOperationType findByValue(int value) { + switch (value) { + case 0: + return ADD; + case 1: + return CROND_DELETE; + case 2: + return DELETE; + case 3: + return CLONE; + case 4: + return LOAD; + case 5: + return ROLLUP; + case 6: + return SCHEMA_CHANGE; + case 7: + return CLEAR_ROLLUPINFO; + case 8: + return DEFAULT_OP; + case 9: + return TABLET_INFO; + default: + return null; + } } } @@ -193,6 +196,13 @@ public static ReplicaPersistInfo createForClearRollupInfo(long dbId, long tableI dbId, tableId, partitionId, indexId, -1L, -1L, -1L, -1L, -1L, -1, -1L, -1L, -1L, 0L, -1L, 0L); } + public static ReplicaPersistInfo createForReport(long dbId, long tblId, long partitionId, long indexId, long tabletId, + long backendId, long replicaId) { + return new ReplicaPersistInfo(ReplicaOperationType.TABLET_INFO, dbId, tblId, partitionId, indexId, tabletId, backendId, replicaId, + -1L, -1L, -1, -1L, -1L, -1L, 0L, -1L, 0L); + } + + private ReplicaPersistInfo() { } diff --git a/fe/fe-core/src/main/java/org/apache/doris/task/AgentBatchTask.java b/fe/fe-core/src/main/java/org/apache/doris/task/AgentBatchTask.java index 2beed65dc2f562..9bff8ed3a60cf5 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/task/AgentBatchTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/task/AgentBatchTask.java @@ -37,7 +37,6 @@ import org.apache.doris.thrift.TPublishVersionRequest; import org.apache.doris.thrift.TPushReq; import org.apache.doris.thrift.TPushType; -import org.apache.doris.thrift.TRecoverTabletReq; import org.apache.doris.thrift.TReleaseSnapshotRequest; import org.apache.doris.thrift.TSnapshotRequest; import org.apache.doris.thrift.TStorageMediumMigrateReq; @@ -343,15 +342,6 @@ private TAgentTaskRequest toAgentTaskRequest(AgentTask task) { tAgentTaskRequest.setMove_dir_req(request); return tAgentTaskRequest; } - case RECOVER_TABLET: { - RecoverTabletTask recoverTabletTask = (RecoverTabletTask) task; - TRecoverTabletReq request = recoverTabletTask.toThrift(); - if (LOG.isDebugEnabled()) { - LOG.debug(request.toString()); - } - tAgentTaskRequest.setRecover_tablet_req(request); - return tAgentTaskRequest; - } case UPDATE_TABLET_META_INFO: { UpdateTabletMetaInfoTask updateTabletMetaInfoTask = (UpdateTabletMetaInfoTask) task; TUpdateTabletMetaInfoReq request = updateTabletMetaInfoTask.toThrift();