From 3fef840bd9eb0abbdf5f3be22783137ca4bd9418 Mon Sep 17 00:00:00 2001 From: qijianliang Date: Thu, 10 Jun 2021 22:07:15 +0800 Subject: [PATCH 1/7] [Bug]Fix the bug data balance causes tablet loss --- be/src/agent/task_worker_pool.cpp | 35 ++++++-- be/src/exec/olap_scan_node.cpp | 2 +- be/src/exec/olap_scanner.cpp | 2 +- be/src/http/action/compaction_action.cpp | 6 +- be/src/http/action/meta_action.cpp | 2 +- be/src/http/action/restore_tablet_action.cpp | 2 +- .../http/action/tablet_migration_action.cpp | 2 +- be/src/olap/base_tablet.h | 13 ++- be/src/olap/data_dir.cpp | 6 +- be/src/olap/delta_writer.cpp | 2 +- be/src/olap/schema_change.cpp | 4 +- be/src/olap/snapshot_manager.cpp | 2 +- be/src/olap/storage_engine.cpp | 4 +- be/src/olap/tablet.cpp | 9 ++ be/src/olap/tablet.h | 4 + be/src/olap/tablet_manager.cpp | 88 ++++++++++--------- be/src/olap/tablet_manager.h | 16 ++-- be/src/olap/tablet_meta.cpp | 28 +++++- be/src/olap/tablet_meta.h | 21 ++++- be/src/olap/task/engine_batch_load_task.cpp | 6 +- be/src/olap/task/engine_checksum_task.cpp | 2 +- be/src/olap/task/engine_clone_task.cpp | 37 ++++++-- .../olap/task/engine_publish_version_task.cpp | 2 +- .../task/engine_storage_migration_task.cpp | 2 +- be/src/runtime/snapshot_loader.cpp | 2 +- .../cumulative_compaction_policy_test.cpp | 4 +- be/test/olap/delete_handler_test.cpp | 14 +-- be/test/olap/delta_writer_test.cpp | 10 +-- be/test/olap/memory/mem_tablet_test.cpp | 2 +- be/test/olap/tablet_meta_test.cpp | 2 +- be/test/olap/tablet_mgr_test.cpp | 24 ++--- be/test/olap/tablet_test.cpp | 2 +- .../olap/test_data/header_without_inc_rs.txt | 3 +- .../org/apache/doris/alter/RollupJobV2.java | 3 +- .../apache/doris/alter/SchemaChangeJobV2.java | 3 +- .../org/apache/doris/backup/RestoreJob.java | 2 +- .../org/apache/doris/catalog/Catalog.java | 6 +- .../apache/doris/clone/TabletSchedCtx.java | 15 ++-- .../apache/doris/clone/TabletScheduler.java | 6 +- .../apache/doris/master/ReportHandler.java | 11 ++- .../java/org/apache/doris/task/AgentTask.java | 2 +- .../java/org/apache/doris/task/CloneTask.java | 11 ++- .../apache/doris/task/CreateReplicaTask.java | 5 +- .../apache/doris/task/DropReplicaTask.java | 9 +- .../org/apache/doris/task/AgentTaskTest.java | 8 +- gensrc/proto/olap_file.proto | 2 + gensrc/thrift/AgentService.thrift | 4 + gensrc/thrift/MasterService.thrift | 1 + gensrc/thrift/Types.thrift | 1 + 49 files changed, 300 insertions(+), 149 deletions(-) diff --git a/be/src/agent/task_worker_pool.cpp b/be/src/agent/task_worker_pool.cpp index 1fb5bb30c452db..994222197c0e2e 100644 --- a/be/src/agent/task_worker_pool.cpp +++ b/be/src/agent/task_worker_pool.cpp @@ -373,7 +373,7 @@ void TaskWorkerPool::_create_tablet_worker_thread_callback() { ++_s_report_version; // get path hash of the created tablet TabletSharedPtr tablet = StorageEngine::instance()->tablet_manager()->get_tablet( - create_tablet_req.tablet_id, create_tablet_req.tablet_schema.schema_hash); + create_tablet_req.tablet_id, create_tablet_req.replica_id, create_tablet_req.tablet_schema.schema_hash); DCHECK(tablet != nullptr); TTabletInfo tablet_info; tablet_info.tablet_id = tablet->table_id(); @@ -426,10 +426,16 @@ void TaskWorkerPool::_drop_tablet_worker_thread_callback() { TStatus task_status; string err; TabletSharedPtr dropped_tablet = StorageEngine::instance()->tablet_manager()->get_tablet( - drop_tablet_req.tablet_id, drop_tablet_req.schema_hash, false, &err); + drop_tablet_req.tablet_id, drop_tablet_req.replica_id, drop_tablet_req.schema_hash, false, &err); if (dropped_tablet != nullptr) { + if (dropped_tablet->clone_mode()) { + LOG(WARNING) << "drop table cancelled as tablet is in clone mode! signature: " << agent_task_req.signature; + error_msgs.push_back("drop table cancelled!"); + status_code = TStatusCode::CANCELLED; + } + OLAPStatus drop_status = StorageEngine::instance()->tablet_manager()->drop_tablet( - drop_tablet_req.tablet_id, drop_tablet_req.schema_hash); + drop_tablet_req.tablet_id, drop_tablet_req.replica_id, drop_tablet_req.schema_hash); if (drop_status != OLAP_SUCCESS) { LOG(WARNING) << "drop table failed! signature: " << agent_task_req.signature; error_msgs.push_back("drop table failed!"); @@ -837,7 +843,7 @@ void TaskWorkerPool::_update_tablet_meta_worker_thread_callback() { for (auto tablet_meta_info : update_tablet_meta_req.tabletMetaInfos) { TabletSharedPtr tablet = StorageEngine::instance()->tablet_manager()->get_tablet( - tablet_meta_info.tablet_id, tablet_meta_info.schema_hash); + tablet_meta_info.tablet_id, tablet_meta_info.replica_id, tablet_meta_info.schema_hash); if (tablet == nullptr) { LOG(WARNING) << "could not find tablet when update partition id" << " tablet_id=" << tablet_meta_info.tablet_id @@ -901,6 +907,14 @@ void TaskWorkerPool::_clone_worker_thread_callback() { DorisMetrics::instance()->clone_requests_total->increment(1); LOG(INFO) << "get clone task. signature:" << agent_task_req.signature; + // check tablet with the same tabletId existance, if exist, set tablet in clone mode + string err; + TabletSharedPtr exist_tablet = StorageEngine::instance()->tablet_manager()->get_tablet( + clone_req.tablet_id, 0 /*replica_id*/, clone_req.schema_hash, &err); + if (exist_tablet != nullptr) { + exist_tablet->set_clone_mode(true); + } + std::vector error_msgs; std::vector tablet_infos; EngineCloneTask engine_task(clone_req, _master_info, agent_task_req.signature, &error_msgs, @@ -928,6 +942,14 @@ void TaskWorkerPool::_clone_worker_thread_callback() { task_status.__set_error_msgs(error_msgs); finish_task_request.__set_task_status(task_status); + // clone done, set clone mode false + // Retrieve once again to prevent tablet from being dropped + exist_tablet = StorageEngine::instance()->tablet_manager()->get_tablet( + clone_req.tablet_id, 0 /*replica_id*/, clone_req.schema_hash, &err); + if (exist_tablet != nullptr) { + exist_tablet->set_clone_mode(false); + } + _finish_task(finish_task_request); _remove_task_info(agent_task_req.task_type, agent_task_req.signature); } @@ -991,7 +1013,8 @@ OLAPStatus TaskWorkerPool::_check_migrate_requset(const TStorageMediumMigrateReq TabletSharedPtr& tablet, DataDir** dest_store) { int64_t tablet_id = req.tablet_id; int32_t schema_hash = req.schema_hash; - tablet = StorageEngine::instance()->tablet_manager()->get_tablet(tablet_id, schema_hash); + // tablet migration no need to know replica_id + tablet = StorageEngine::instance()->tablet_manager()->get_tablet(tablet_id, 0 /*replica_id*/, schema_hash); if (tablet == nullptr) { LOG(WARNING) << "can't find tablet. tablet_id= " << tablet_id << " schema_hash=" << schema_hash; @@ -1572,7 +1595,7 @@ AgentStatus TaskWorkerPool::_move_dir(const TTabletId tablet_id, const TSchemaHa const std::string& src, int64_t job_id, bool overwrite, std::vector* error_msgs) { TabletSharedPtr tablet = - StorageEngine::instance()->tablet_manager()->get_tablet(tablet_id, schema_hash); + StorageEngine::instance()->tablet_manager()->get_tablet(tablet_id, 0 /*replica_id*/, schema_hash); if (tablet == nullptr) { LOG(INFO) << "failed to get tablet. tablet_id:" << tablet_id << ", schema hash:" << schema_hash; diff --git a/be/src/exec/olap_scan_node.cpp b/be/src/exec/olap_scan_node.cpp index b2d90e8bdf25d4..abb077210b8bbf 100644 --- a/be/src/exec/olap_scan_node.cpp +++ b/be/src/exec/olap_scan_node.cpp @@ -665,7 +665,7 @@ Status OlapScanNode::get_hints(const TPaloScanRange& scan_range, int block_row_c int32_t schema_hash = strtoul(scan_range.schema_hash.c_str(), NULL, 10); std::string err; TabletSharedPtr table = StorageEngine::instance()->tablet_manager()->get_tablet( - tablet_id, schema_hash, true, &err); + tablet_id, 0 /*replica_id*/, schema_hash, true, &err); if (table == nullptr) { std::stringstream ss; ss << "failed to get tablet: " << tablet_id << " with schema hash: " << schema_hash diff --git a/be/src/exec/olap_scanner.cpp b/be/src/exec/olap_scanner.cpp index 9e1a9923035bab..1145a4c757868f 100644 --- a/be/src/exec/olap_scanner.cpp +++ b/be/src/exec/olap_scanner.cpp @@ -75,7 +75,7 @@ Status OlapScanner::prepare( _version = strtoul(scan_range.version.c_str(), nullptr, 10); { std::string err; - _tablet = StorageEngine::instance()->tablet_manager()->get_tablet(tablet_id, schema_hash, + _tablet = StorageEngine::instance()->tablet_manager()->get_tablet(tablet_id, 0 /*replica_id*/, schema_hash, true, &err); if (_tablet.get() == nullptr) { std::stringstream ss; diff --git a/be/src/http/action/compaction_action.cpp b/be/src/http/action/compaction_action.cpp index 16f8e91db35f58..da117a9f9071e4 100644 --- a/be/src/http/action/compaction_action.cpp +++ b/be/src/http/action/compaction_action.cpp @@ -69,7 +69,7 @@ Status CompactionAction::_handle_show_compaction(HttpRequest* req, std::string* "check param failed"); TabletSharedPtr tablet = - StorageEngine::instance()->tablet_manager()->get_tablet(tablet_id, schema_hash); + StorageEngine::instance()->tablet_manager()->get_tablet(tablet_id, 0 /*replica_id*/, schema_hash); if (tablet == nullptr) { return Status::NotFound( strings::Substitute("Tablet not found. tablet_id=$0, schema_hash=$1", @@ -98,7 +98,7 @@ Status CompactionAction::_handle_run_compaction(HttpRequest* req, std::string* j // 2. fetch the tablet by tablet_id and schema_hash TabletSharedPtr tablet = - StorageEngine::instance()->tablet_manager()->get_tablet(tablet_id, schema_hash); + StorageEngine::instance()->tablet_manager()->get_tablet(tablet_id, 0 /*replica_id*/, schema_hash); if (tablet == nullptr) { return Status::NotFound( strings::Substitute("Tablet not found. tablet_id=$0, schema_hash=$1", @@ -160,7 +160,7 @@ Status CompactionAction::_handle_run_status_compaction(HttpRequest* req, std::st } else { // fetch the tablet by tablet_id and schema_hash TabletSharedPtr tablet = - StorageEngine::instance()->tablet_manager()->get_tablet(tablet_id, schema_hash); + StorageEngine::instance()->tablet_manager()->get_tablet(tablet_id, 0 /*replica_id*/, schema_hash); if (tablet == nullptr) { LOG(WARNING) << "invalid argument.tablet_id:" << tablet_id diff --git a/be/src/http/action/meta_action.cpp b/be/src/http/action/meta_action.cpp index 945c7477398ff2..920c2637854b44 100644 --- a/be/src/http/action/meta_action.cpp +++ b/be/src/http/action/meta_action.cpp @@ -60,7 +60,7 @@ Status MetaAction::_handle_header(HttpRequest* req, std::string* json_meta) { } TabletSharedPtr tablet = - StorageEngine::instance()->tablet_manager()->get_tablet(tablet_id, schema_hash); + StorageEngine::instance()->tablet_manager()->get_tablet(tablet_id, 0 /*replica_id*/, schema_hash); if (tablet == nullptr) { LOG(WARNING) << "no tablet for tablet_id:" << tablet_id << " schema hash:" << schema_hash; return Status::InternalError("no tablet exist"); diff --git a/be/src/http/action/restore_tablet_action.cpp b/be/src/http/action/restore_tablet_action.cpp index d207ca54c85d36..71849fb7ccd03a 100644 --- a/be/src/http/action/restore_tablet_action.cpp +++ b/be/src/http/action/restore_tablet_action.cpp @@ -83,7 +83,7 @@ Status RestoreTabletAction::_handle(HttpRequest* req) { LOG(INFO) << "get restore tablet action request: " << tablet_id << "-" << schema_hash; TabletSharedPtr tablet = - StorageEngine::instance()->tablet_manager()->get_tablet(tablet_id, schema_hash); + StorageEngine::instance()->tablet_manager()->get_tablet(tablet_id, 0 /*replica_id*/, schema_hash); if (tablet != nullptr) { LOG(WARNING) << "find tablet. tablet_id=" << tablet_id << " schema_hash=" << schema_hash; return Status::InternalError("tablet already exists, can not restore."); diff --git a/be/src/http/action/tablet_migration_action.cpp b/be/src/http/action/tablet_migration_action.cpp index ed7ea2197607a7..31556514a85f1c 100644 --- a/be/src/http/action/tablet_migration_action.cpp +++ b/be/src/http/action/tablet_migration_action.cpp @@ -186,7 +186,7 @@ Status TabletMigrationAction::_check_param(HttpRequest* req, int64_t& tablet_id, Status TabletMigrationAction::_check_migrate_request(int64_t tablet_id, int32_t schema_hash, string dest_disk, TabletSharedPtr& tablet, DataDir** dest_store) { - tablet = StorageEngine::instance()->tablet_manager()->get_tablet(tablet_id, schema_hash); + tablet = StorageEngine::instance()->tablet_manager()->get_tablet(tablet_id, 0 /*replica_id*/, schema_hash); if (tablet == nullptr) { LOG(WARNING) << "no tablet for tablet_id:" << tablet_id << " schema hash:" << schema_hash; return Status::NotFound("Tablet not found"); diff --git a/be/src/olap/base_tablet.h b/be/src/olap/base_tablet.h index 3e1386b4609cb6..0839f0306572ef 100644 --- a/be/src/olap/base_tablet.h +++ b/be/src/olap/base_tablet.h @@ -55,11 +55,12 @@ class BaseTablet : public std::enable_shared_from_this { inline const std::string full_name() const; inline int64_t partition_id() const; inline int64_t tablet_id() const; + inline int64_t replica_id() const; inline int32_t schema_hash() const; inline int16_t shard_id(); inline const int64_t creation_time() const; inline void set_creation_time(int64_t creation_time); - inline bool equal(int64_t tablet_id, int32_t schema_hash); + inline bool equal(int64_t tablet_id, int64_t replica_id, int32_t schema_hash); // properties encapsulated in TabletSchema inline const TabletSchema& tablet_schema() const; @@ -125,6 +126,10 @@ inline int64_t BaseTablet::tablet_id() const { return _tablet_meta->tablet_id(); } +inline int64_t BaseTablet::replica_id() const { + return _tablet_meta->replica_id(); +} + inline int32_t BaseTablet::schema_hash() const { return _tablet_meta->schema_hash(); } @@ -141,8 +146,10 @@ inline void BaseTablet::set_creation_time(int64_t creation_time) { _tablet_meta->set_creation_time(creation_time); } -inline bool BaseTablet::equal(int64_t id, int32_t hash) { - return (tablet_id() == id) && (schema_hash() == hash); +inline bool BaseTablet::equal(int64_t id, int64_t r_id, int32_t hash) { + // For compatibility with older data, there is no replica id in the old version of the tablet meta + // For new data with replica_id in the meta, there are some tasks that do not need to check the replica_id + return (tablet_id() == id) && ((replica_id() == 0 || r_id == 0) ? true : (replica_id() == r_id)) && (schema_hash() == hash); } inline const TabletSchema& BaseTablet::tablet_schema() const { diff --git a/be/src/olap/data_dir.cpp b/be/src/olap/data_dir.cpp index e48392ae16c7c4..b0457eb7c13523 100644 --- a/be/src/olap/data_dir.cpp +++ b/be/src/olap/data_dir.cpp @@ -581,7 +581,7 @@ OLAPStatus DataDir::load() { // 2. add visible rowset to tablet // ignore any errors when load tablet or rowset, because fe will repair them after report for (auto rowset_meta : dir_rowset_metas) { - TabletSharedPtr tablet = _tablet_manager->get_tablet(rowset_meta->tablet_id(), + TabletSharedPtr tablet = _tablet_manager->get_tablet(rowset_meta->tablet_id(), 0 /*replica_id*/, rowset_meta->tablet_schema_hash()); // tablet maybe dropped, but not drop related rowset meta if (tablet == nullptr) { @@ -679,7 +679,7 @@ void DataDir::perform_path_gc_by_tablet() { << ", path=" << path; continue; } - TabletSharedPtr tablet = _tablet_manager->get_tablet(tablet_id, schema_hash); + TabletSharedPtr tablet = _tablet_manager->get_tablet(tablet_id, 0 /*replica_id*/, schema_hash); if (tablet != nullptr) { // could find the tablet, then skip check it continue; @@ -729,7 +729,7 @@ void DataDir::perform_path_gc_by_rowsetid() { RowsetId rowset_id; bool is_rowset_file = TabletManager::get_rowset_id_from_path(path, &rowset_id); if (is_rowset_file) { - TabletSharedPtr tablet = _tablet_manager->get_tablet(tablet_id, schema_hash); + TabletSharedPtr tablet = _tablet_manager->get_tablet(tablet_id, 0 /*replica_id*/, schema_hash); if (tablet != nullptr) { if (!tablet->check_rowset_id(rowset_id) && !StorageEngine::instance()->check_rowset_id_in_unused_rowsets(rowset_id)) { diff --git a/be/src/olap/delta_writer.cpp b/be/src/olap/delta_writer.cpp index 3868e8806e56b8..1b539dd03fb1a4 100644 --- a/be/src/olap/delta_writer.cpp +++ b/be/src/olap/delta_writer.cpp @@ -94,7 +94,7 @@ void DeltaWriter::_garbage_collection() { OLAPStatus DeltaWriter::init() { TabletManager* tablet_mgr = _storage_engine->tablet_manager(); - _tablet = tablet_mgr->get_tablet(_req.tablet_id, _req.schema_hash); + _tablet = tablet_mgr->get_tablet(_req.tablet_id, 0 /*replica_id*/, _req.schema_hash); if (_tablet == nullptr) { LOG(WARNING) << "fail to find tablet. tablet_id=" << _req.tablet_id << ", schema_hash=" << _req.schema_hash; diff --git a/be/src/olap/schema_change.cpp b/be/src/olap/schema_change.cpp index 49a7f961f49193..7ac20731da1f09 100644 --- a/be/src/olap/schema_change.cpp +++ b/be/src/olap/schema_change.cpp @@ -1426,7 +1426,7 @@ OLAPStatus SchemaChangeHandler::process_alter_tablet_v2(const TAlterTabletReqV2& OLAPStatus SchemaChangeHandler::_do_process_alter_tablet_v2(const TAlterTabletReqV2& request) { OLAPStatus res = OLAP_SUCCESS; TabletSharedPtr base_tablet = StorageEngine::instance()->tablet_manager()->get_tablet( - request.base_tablet_id, request.base_schema_hash); + request.base_tablet_id, 0 /*replica_id*/, request.base_schema_hash); if (base_tablet == nullptr) { LOG(WARNING) << "fail to find base tablet. base_tablet=" << request.base_tablet_id << ", base_schema_hash=" << request.base_schema_hash; @@ -1435,7 +1435,7 @@ OLAPStatus SchemaChangeHandler::_do_process_alter_tablet_v2(const TAlterTabletRe // new tablet has to exist TabletSharedPtr new_tablet = StorageEngine::instance()->tablet_manager()->get_tablet( - request.new_tablet_id, request.new_schema_hash); + request.new_tablet_id, 0 /*replica_id*/, request.new_schema_hash); if (new_tablet == nullptr) { LOG(WARNING) << "fail to find new tablet." << " new_tablet=" << request.new_tablet_id diff --git a/be/src/olap/snapshot_manager.cpp b/be/src/olap/snapshot_manager.cpp index 9db0fa73d8a0aa..f9ded4f42279b4 100644 --- a/be/src/olap/snapshot_manager.cpp +++ b/be/src/olap/snapshot_manager.cpp @@ -77,7 +77,7 @@ OLAPStatus SnapshotManager::make_snapshot( } TabletSharedPtr ref_tablet = StorageEngine::instance()->tablet_manager()->get_tablet( - request.tablet_id, request.schema_hash); + request.tablet_id, 0 /*replica_id*/, request.schema_hash); if (ref_tablet == nullptr) { LOG(WARNING) << "failed to get tablet. tablet=" << request.tablet_id << " schema_hash=" << request.schema_hash; diff --git a/be/src/olap/storage_engine.cpp b/be/src/olap/storage_engine.cpp index 8e6aab6fffed14..1acd27de2ec289 100644 --- a/be/src/olap/storage_engine.cpp +++ b/be/src/olap/storage_engine.cpp @@ -998,7 +998,7 @@ OLAPStatus StorageEngine::execute_task(EngineTask* task) { std::vector related_tablets; for (TabletInfo& tablet_info : tablet_infos) { TabletSharedPtr tablet = - _tablet_manager->get_tablet(tablet_info.tablet_id, tablet_info.schema_hash); + _tablet_manager->get_tablet(tablet_info.tablet_id, 0 /*replica_id*/, tablet_info.schema_hash); if (tablet != nullptr) { related_tablets.push_back(tablet); tablet->obtain_header_wrlock(); @@ -1034,7 +1034,7 @@ OLAPStatus StorageEngine::execute_task(EngineTask* task) { std::vector related_tablets; for (TabletInfo& tablet_info : tablet_infos) { TabletSharedPtr tablet = - _tablet_manager->get_tablet(tablet_info.tablet_id, tablet_info.schema_hash); + _tablet_manager->get_tablet(tablet_info.tablet_id, 0 /*replica_id*/, tablet_info.schema_hash); if (tablet != nullptr) { related_tablets.push_back(tablet); tablet->obtain_header_wrlock(); diff --git a/be/src/olap/tablet.cpp b/be/src/olap/tablet.cpp index 7a77448706da62..cf177f11529dfb 100644 --- a/be/src/olap/tablet.cpp +++ b/be/src/olap/tablet.cpp @@ -720,6 +720,14 @@ bool Tablet::can_do_compaction(size_t path_hash, CompactionType compaction_type) return true; } +bool Tablet::clone_mode() { + return _tablet_meta->in_clone_mode(); +} + +void Tablet::set_clone_mode(bool clone_mode) { + _tablet_meta->set_in_clone_mode(clone_mode); +} + uint32_t Tablet::calc_compaction_score( CompactionType compaction_type, std::shared_ptr cumulative_compaction_policy) { @@ -1279,6 +1287,7 @@ void Tablet::build_tablet_report_info(TTabletInfo* tablet_info) { tablet_info->__set_version_count(_tablet_meta->version_count()); tablet_info->__set_path_hash(_data_dir->path_hash()); tablet_info->__set_is_in_memory(_tablet_meta->tablet_schema().is_in_memory()); + tablet_info->__set_replica_id(_tablet_meta->replica_id()); } // should use this method to get a copy of current tablet meta diff --git a/be/src/olap/tablet.h b/be/src/olap/tablet.h index 437c13662cba38..36249c595e36d5 100644 --- a/be/src/olap/tablet.h +++ b/be/src/olap/tablet.h @@ -134,6 +134,10 @@ class Tablet : public BaseTablet { bool version_for_delete_predicate(const Version& version); bool version_for_load_deletion(const Version& version); + // message for clone task + bool clone_mode(); + void set_clone_mode(bool clone_mode); + // meta lock inline void obtain_header_rdlock() { _meta_lock.rdlock(); } inline void obtain_header_wrlock() { _meta_lock.wrlock(); } diff --git a/be/src/olap/tablet_manager.cpp b/be/src/olap/tablet_manager.cpp index 9054cdb56f6203..a0952e3977e982 100644 --- a/be/src/olap/tablet_manager.cpp +++ b/be/src/olap/tablet_manager.cpp @@ -92,25 +92,27 @@ TabletManager::~TabletManager() { DEREGISTER_HOOK_METRIC(tablet_meta_mem_consumption); } -OLAPStatus TabletManager::_add_tablet_unlocked(TTabletId tablet_id, SchemaHash schema_hash, +OLAPStatus TabletManager::_add_tablet_unlocked(TTabletId tablet_id, TReplicaId replica_id, SchemaHash schema_hash, const TabletSharedPtr& tablet, bool update_meta, bool force) { OLAPStatus res = OLAP_SUCCESS; VLOG_NOTICE << "begin to add tablet to TabletManager. " - << "tablet_id=" << tablet_id << ", schema_hash=" << schema_hash + << "tablet_id=" << tablet_id + << ", replica_id=" << replica_id + << ", schema_hash=" << schema_hash << ", force=" << force; TabletSharedPtr existed_tablet = nullptr; tablet_map_t& tablet_map = _get_tablet_map(tablet_id); for (TabletSharedPtr item : tablet_map[tablet_id].table_arr) { - if (item->equal(tablet_id, schema_hash)) { + if (item->equal(tablet_id, replica_id, schema_hash)) { existed_tablet = item; break; } } if (existed_tablet == nullptr) { - return _add_tablet_to_map_unlocked(tablet_id, schema_hash, tablet, update_meta, + return _add_tablet_to_map_unlocked(tablet_id, replica_id, schema_hash, tablet, update_meta, false /*keep_files*/, false /*drop_old*/); } @@ -159,7 +161,7 @@ OLAPStatus TabletManager::_add_tablet_unlocked(TTabletId tablet_id, SchemaHash s if (force || (new_version > old_version || (new_version == old_version && new_time > old_time))) { // check if new tablet's meta is in store and add new tablet's meta to meta store - res = _add_tablet_to_map_unlocked(tablet_id, schema_hash, tablet, update_meta, keep_files, + res = _add_tablet_to_map_unlocked(tablet_id, replica_id, schema_hash, tablet, update_meta, keep_files, true /*drop_old*/); } else { res = OLAP_ERR_ENGINE_INSERT_OLD_TABLET; @@ -174,7 +176,7 @@ OLAPStatus TabletManager::_add_tablet_unlocked(TTabletId tablet_id, SchemaHash s return res; } -OLAPStatus TabletManager::_add_tablet_to_map_unlocked(TTabletId tablet_id, SchemaHash schema_hash, +OLAPStatus TabletManager::_add_tablet_to_map_unlocked(TTabletId tablet_id, TReplicaId replica_id, SchemaHash schema_hash, const TabletSharedPtr& tablet, bool update_meta, bool keep_files, bool drop_old) { @@ -187,10 +189,10 @@ OLAPStatus TabletManager::_add_tablet_to_map_unlocked(TTabletId tablet_id, Schem if (drop_old) { // If the new tablet is fresher than the existing one, then replace // the existing tablet with the new one. - RETURN_NOT_OK_LOG(_drop_tablet_unlocked(tablet_id, schema_hash, keep_files), + RETURN_NOT_OK_LOG(_drop_tablet_unlocked(tablet_id, replica_id, schema_hash, keep_files), strings::Substitute("failed to drop old tablet when add new tablet. " - "tablet_id=$0, schema_hash=$1", - tablet_id, schema_hash)); + "tablet_id=$0, replica_id=$1, schema_hash=$2", + tablet_id, replica_id, schema_hash)); } // Register tablet into DataDir, so that we can manage tablet from // the perspective of root path. @@ -227,9 +229,10 @@ OLAPStatus TabletManager::create_tablet(const TCreateTabletReq& request, DorisMetrics::instance()->create_tablet_requests_total->increment(1); int64_t tablet_id = request.tablet_id; + int64_t replica_id = request.replica_id; int32_t schema_hash = request.tablet_schema.schema_hash; LOG(INFO) << "begin to create tablet. tablet_id=" << tablet_id - << ", schema_hash=" << schema_hash; + << ", replica_id=" << replica_id << ", schema_hash=" << schema_hash; WriteLock wlock(_get_tablets_shard_lock(tablet_id)); TRACE("got tablets shard lock"); @@ -241,7 +244,7 @@ OLAPStatus TabletManager::create_tablet(const TCreateTabletReq& request, // tablet_id exist but with different schema_hash, return an error(report task will // eventually trigger its deletion). if (_check_tablet_id_exist_unlocked(tablet_id)) { - TabletSharedPtr tablet = _get_tablet_unlocked(tablet_id, schema_hash); + TabletSharedPtr tablet = _get_tablet_unlocked(tablet_id, replica_id, schema_hash); if (tablet != nullptr) { LOG(INFO) << "success to create tablet. tablet already exist. tablet_id=" << tablet_id; return OLAP_SUCCESS; @@ -258,7 +261,7 @@ OLAPStatus TabletManager::create_tablet(const TCreateTabletReq& request, // If the CreateTabletReq has base_tablet_id then it is a alter-tablet request if (request.__isset.base_tablet_id && request.base_tablet_id > 0) { is_schema_change = true; - base_tablet = _get_tablet_unlocked(request.base_tablet_id, request.base_schema_hash); + base_tablet = _get_tablet_unlocked(request.base_tablet_id, 0 /*replica_id*/, request.base_schema_hash); if (base_tablet == nullptr) { LOG(WARNING) << "fail to create tablet(change schema), base tablet does not exist. " << "new_tablet_id=" << tablet_id << ", new_schema_hash=" << schema_hash @@ -285,7 +288,7 @@ OLAPStatus TabletManager::create_tablet(const TCreateTabletReq& request, } TRACE("succeed to create tablet"); - LOG(INFO) << "success to create tablet. tablet_id=" << tablet_id + LOG(INFO) << "success to create tablet. tablet_id=" << tablet_id << ", replica_id=" << replica_id << ", schema_hash=" << schema_hash; return OLAP_SUCCESS; } @@ -308,6 +311,7 @@ TabletSharedPtr TabletManager::_internal_create_tablet_unlocked( TRACE("create tablet meta"); int64_t new_tablet_id = request.tablet_id; + int64_t new_replica_id = request.replica_id; int32_t new_schema_hash = request.tablet_schema.schema_hash; // should remove the tablet's pending_id no matter create-tablet success or not @@ -362,7 +366,7 @@ TabletSharedPtr TabletManager::_internal_create_tablet_unlocked( TRACE("update schema change info"); } // Add tablet to StorageEngine will make it visible to user - res = _add_tablet_unlocked(new_tablet_id, new_schema_hash, tablet, true, false); + res = _add_tablet_unlocked(new_tablet_id, new_replica_id, new_schema_hash, tablet, true, false); if (res != OLAP_SUCCESS) { LOG(WARNING) << "fail to add tablet to StorageEngine. res=" << res; break; @@ -371,7 +375,7 @@ TabletSharedPtr TabletManager::_internal_create_tablet_unlocked( // TODO(lingbin): The following logic seems useless, can be removed? // Because if _add_tablet_unlocked() return OK, we must can get it from map. - TabletSharedPtr tablet_ptr = _get_tablet_unlocked(new_tablet_id, new_schema_hash); + TabletSharedPtr tablet_ptr = _get_tablet_unlocked(new_tablet_id, new_replica_id, new_schema_hash); if (tablet_ptr == nullptr) { res = OLAP_ERR_TABLE_NOT_FOUND; LOG(WARNING) << "fail to get tablet. res=" << res; @@ -385,7 +389,7 @@ TabletSharedPtr TabletManager::_internal_create_tablet_unlocked( } // something is wrong, we need clear environment if (is_tablet_added) { - OLAPStatus status = _drop_tablet_unlocked(new_tablet_id, new_schema_hash, false); + OLAPStatus status = _drop_tablet_unlocked(new_tablet_id, new_replica_id, new_schema_hash, false); if (status != OLAP_SUCCESS) { LOG(WARNING) << "fail to drop tablet when create tablet failed. res=" << res; } @@ -455,10 +459,10 @@ TabletSharedPtr TabletManager::_create_tablet_meta_and_dir_unlocked( return nullptr; } -OLAPStatus TabletManager::drop_tablet(TTabletId tablet_id, SchemaHash schema_hash, +OLAPStatus TabletManager::drop_tablet(TTabletId tablet_id, TReplicaId replica_id, SchemaHash schema_hash, bool keep_files) { WriteLock wlock(_get_tablets_shard_lock(tablet_id)); - return _drop_tablet_unlocked(tablet_id, schema_hash, keep_files); + return _drop_tablet_unlocked(tablet_id, replica_id, schema_hash, keep_files); } // Drop specified tablet, the main logical is as follows: @@ -469,13 +473,13 @@ OLAPStatus TabletManager::drop_tablet(TTabletId tablet_id, SchemaHash schema_has // base-tablet cannot be dropped; // b. other cases: // drop specified tablet directly and clear schema change info. -OLAPStatus TabletManager::_drop_tablet_unlocked(TTabletId tablet_id, SchemaHash schema_hash, +OLAPStatus TabletManager::_drop_tablet_unlocked(TTabletId tablet_id, TReplicaId replica_id, SchemaHash schema_hash, bool keep_files) { - LOG(INFO) << "begin drop tablet. tablet_id=" << tablet_id << ", schema_hash=" << schema_hash; + LOG(INFO) << "begin drop tablet. tablet_id=" << tablet_id << ", replica_id=" << replica_id << ", schema_hash=" << schema_hash; DorisMetrics::instance()->drop_tablet_requests_total->increment(1); // Fetch tablet which need to be dropped - TabletSharedPtr to_drop_tablet = _get_tablet_unlocked(tablet_id, schema_hash); + TabletSharedPtr to_drop_tablet = _get_tablet_unlocked(tablet_id, replica_id, schema_hash); if (to_drop_tablet == nullptr) { LOG(WARNING) << "fail to drop tablet because it does not exist. " << "tablet_id=" << tablet_id << ", schema_hash=" << schema_hash; @@ -506,7 +510,8 @@ OLAPStatus TabletManager::drop_tablets_on_error_root_path( TSchemaHash schema_hash = tablet_info.schema_hash; VLOG_NOTICE << "drop_tablet begin. tablet_id=" << tablet_id << ", schema_hash=" << schema_hash; - TabletSharedPtr dropped_tablet = _get_tablet_unlocked(tablet_id, schema_hash); + // clear tablets in unused data dirs, there is no need to compare tablet_replica_id + TabletSharedPtr dropped_tablet = _get_tablet_unlocked(tablet_id, 0 /*replica_id*/, schema_hash); if (dropped_tablet == nullptr) { LOG(WARNING) << "dropping tablet not exist. " << " tablet=" << tablet_id << " schema_hash=" << schema_hash; @@ -514,8 +519,9 @@ OLAPStatus TabletManager::drop_tablets_on_error_root_path( } else { tablet_map_t& tablet_map = _get_tablet_map(tablet_id); for (list::iterator it = tablet_map[tablet_id].table_arr.begin(); - it != tablet_map[tablet_id].table_arr.end();) { - if ((*it)->equal(tablet_id, schema_hash)) { + it != tablet_map[tablet_id].table_arr.end();) { + // clear tablets in unused data dirs, there is no need to compare tablet_replica_id + if ((*it)->equal(tablet_id, 0 /*replica_id*/, schema_hash)) { // We should first remove tablet from partition_map to avoid iterator // becoming invalid. _remove_tablet_from_partition(*(*it)); @@ -530,16 +536,16 @@ OLAPStatus TabletManager::drop_tablets_on_error_root_path( return res; } -TabletSharedPtr TabletManager::get_tablet(TTabletId tablet_id, SchemaHash schema_hash, +TabletSharedPtr TabletManager::get_tablet(TTabletId tablet_id, TReplicaId replica_id, SchemaHash schema_hash, bool include_deleted, string* err) { ReadLock rlock(_get_tablets_shard_lock(tablet_id)); - return _get_tablet_unlocked(tablet_id, schema_hash, include_deleted, err); + return _get_tablet_unlocked(tablet_id, replica_id, schema_hash, include_deleted, err); } -TabletSharedPtr TabletManager::_get_tablet_unlocked(TTabletId tablet_id, SchemaHash schema_hash, +TabletSharedPtr TabletManager::_get_tablet_unlocked(TTabletId tablet_id, TReplicaId replica_id, SchemaHash schema_hash, bool include_deleted, string* err) { TabletSharedPtr tablet; - tablet = _get_tablet_unlocked(tablet_id, schema_hash); + tablet = _get_tablet_unlocked(tablet_id, replica_id, schema_hash); if (tablet == nullptr && include_deleted) { ReadLock rlock(&_shutdown_tablets_lock); for (auto& deleted_tablet : _shutdown_tablets) { @@ -573,7 +579,7 @@ TabletSharedPtr TabletManager::_get_tablet_unlocked(TTabletId tablet_id, SchemaH TabletSharedPtr TabletManager::get_tablet(TTabletId tablet_id, SchemaHash schema_hash, TabletUid tablet_uid, bool include_deleted, string* err) { ReadLock rlock(_get_tablets_shard_lock(tablet_id)); - TabletSharedPtr tablet = _get_tablet_unlocked(tablet_id, schema_hash, include_deleted, err); + TabletSharedPtr tablet = _get_tablet_unlocked(tablet_id, 0 /*replica_id*/, schema_hash, include_deleted, err); if (tablet != nullptr && tablet->tablet_uid() == tablet_uid) { return tablet; } @@ -791,7 +797,7 @@ OLAPStatus TabletManager::load_tablet_from_meta(DataDir* data_dir, TTabletId tab strings::Substitute("tablet init failed. tablet=$0", tablet->full_name())); WriteLock wlock(_get_tablets_shard_lock(tablet_id)); - RETURN_NOT_OK_LOG(_add_tablet_unlocked(tablet_id, schema_hash, tablet, update_meta, force), + RETURN_NOT_OK_LOG(_add_tablet_unlocked(tablet_id, tablet_meta->replica_id(), schema_hash, tablet, update_meta, force), strings::Substitute("fail to add tablet. tablet=$0", tablet->full_name())); return OLAP_SUCCESS; @@ -862,7 +868,7 @@ OLAPStatus TabletManager::report_tablet_info(TTabletInfo* tablet_info) { OLAPStatus res = OLAP_SUCCESS; - TabletSharedPtr tablet = get_tablet(tablet_info->tablet_id, tablet_info->schema_hash); + TabletSharedPtr tablet = get_tablet(tablet_info->tablet_id, 0 /*replica_id*/, tablet_info->schema_hash); if (tablet == nullptr) { LOG(WARNING) << "can't find tablet. " << " tablet=" << tablet_info->tablet_id @@ -1327,19 +1333,19 @@ OLAPStatus TabletManager::_create_tablet_meta_unlocked(const TCreateTabletReq& r return res; } -OLAPStatus TabletManager::_drop_tablet_directly_unlocked(TTabletId tablet_id, +OLAPStatus TabletManager::_drop_tablet_directly_unlocked(TTabletId tablet_id, TReplicaId replica_id, SchemaHash schema_hash, bool keep_files) { - TabletSharedPtr dropped_tablet = _get_tablet_unlocked(tablet_id, schema_hash); + TabletSharedPtr dropped_tablet = _get_tablet_unlocked(tablet_id, replica_id, schema_hash); if (dropped_tablet == nullptr) { LOG(WARNING) << "fail to drop tablet because it does not exist. " - << " tablet_id=" << tablet_id << ", schema_hash=" << schema_hash; + << " tablet_id=" << tablet_id << ", replica_id=" << replica_id << ", schema_hash=" << schema_hash; return OLAP_ERR_TABLE_NOT_FOUND; } tablet_map_t& tablet_map = _get_tablet_map(tablet_id); list& candidate_tablets = tablet_map[tablet_id].table_arr; list::iterator it = candidate_tablets.begin(); while (it != candidate_tablets.end()) { - if (!(*it)->equal(tablet_id, schema_hash)) { + if (!(*it)->equal(tablet_id, replica_id, schema_hash)) { ++it; continue; } @@ -1351,7 +1357,7 @@ OLAPStatus TabletManager::_drop_tablet_directly_unlocked(TTabletId tablet_id, // drop tablet will update tablet meta, should lock WriteLock wrlock(tablet->get_header_lock_ptr()); LOG(INFO) << "set tablet to shutdown state and remove it from memory. " - << "tablet_id=" << tablet_id << ", schema_hash=" << schema_hash + << "tablet_id=" << tablet_id << ", replica_id=" << replica_id << ", schema_hash=" << schema_hash << ", tablet_path=" << dropped_tablet->tablet_path(); // NOTE: has to update tablet here, but must not update tablet meta directly. // because other thread may hold the tablet object, they may save meta too. @@ -1372,16 +1378,16 @@ OLAPStatus TabletManager::_drop_tablet_directly_unlocked(TTabletId tablet_id, return OLAP_SUCCESS; } -TabletSharedPtr TabletManager::_get_tablet_unlocked(TTabletId tablet_id, SchemaHash schema_hash) { - VLOG_NOTICE << "begin to get tablet. tablet_id=" << tablet_id +TabletSharedPtr TabletManager::_get_tablet_unlocked(TTabletId tablet_id, TReplicaId replica_id, SchemaHash schema_hash) { + VLOG_NOTICE << "begin to get tablet. tablet_id=" << tablet_id << ", replica_id=" << replica_id << ", schema_hash=" << schema_hash; tablet_map_t& tablet_map = _get_tablet_map(tablet_id); tablet_map_t::iterator it = tablet_map.find(tablet_id); if (it != tablet_map.end()) { for (TabletSharedPtr tablet : it->second.table_arr) { CHECK(tablet != nullptr) << "tablet is nullptr. tablet_id=" << tablet_id; - if (tablet->equal(tablet_id, schema_hash)) { - VLOG_NOTICE << "get tablet success. tablet_id=" << tablet_id + if (tablet->equal(tablet_id, replica_id, schema_hash)) { + VLOG_NOTICE << "get tablet success. tablet_id=" << tablet_id << ", replica_id=" << replica_id << ", schema_hash=" << schema_hash; return tablet; } @@ -1464,7 +1470,7 @@ void TabletManager::get_tablets_distribution_on_different_disks( for (; tablet_info_iter != (partition_iter->second).end(); ++tablet_info_iter) { // get_tablet() will hold 'tablet_shard_lock' TabletSharedPtr tablet = - get_tablet(tablet_info_iter->tablet_id, tablet_info_iter->schema_hash); + get_tablet(tablet_info_iter->tablet_id, 0 /*replica_id*/, tablet_info_iter->schema_hash); if (tablet == nullptr) { continue; } diff --git a/be/src/olap/tablet_manager.h b/be/src/olap/tablet_manager.h index db905c7b6f7729..91265541e03eaf 100644 --- a/be/src/olap/tablet_manager.h +++ b/be/src/olap/tablet_manager.h @@ -66,7 +66,7 @@ class TabletManager { // Return OLAP_SUCCESS, if run ok // OLAP_ERR_TABLE_DELETE_NOEXIST_ERROR, if tablet not exist // OLAP_ERR_NOT_INITED, if not inited - OLAPStatus drop_tablet(TTabletId tablet_id, SchemaHash schema_hash, bool keep_files = false); + OLAPStatus drop_tablet(TTabletId tablet_id, TReplicaId replica_id, SchemaHash schema_hash, bool keep_files = false); OLAPStatus drop_tablets_on_error_root_path(const std::vector& tablet_info_vec); @@ -75,7 +75,7 @@ class TabletManager { const std::unordered_set& tablet_submitted_compaction, uint32_t* score, std::shared_ptr cumulative_compaction_policy); - TabletSharedPtr get_tablet(TTabletId tablet_id, SchemaHash schema_hash, + TabletSharedPtr get_tablet(TTabletId tablet_id, TReplicaId replica_id, SchemaHash schema_hash, bool include_deleted = false, std::string* err = nullptr); TabletSharedPtr get_tablet(TTabletId tablet_id, SchemaHash schema_hash, TabletUid tablet_uid, @@ -150,23 +150,23 @@ class TabletManager { // Return OLAP_SUCCESS, if run ok // OLAP_ERR_TABLE_INSERT_DUPLICATION_ERROR, if find duplication // OLAP_ERR_NOT_INITED, if not inited - OLAPStatus _add_tablet_unlocked(TTabletId tablet_id, SchemaHash schema_hash, + OLAPStatus _add_tablet_unlocked(TTabletId tablet_id, TReplicaId replica_id, SchemaHash schema_hash, const TabletSharedPtr& tablet, bool update_meta, bool force); - OLAPStatus _add_tablet_to_map_unlocked(TTabletId tablet_id, SchemaHash schema_hash, + OLAPStatus _add_tablet_to_map_unlocked(TTabletId tablet_id, TReplicaId replica_id, SchemaHash schema_hash, const TabletSharedPtr& tablet, bool update_meta, bool keep_files, bool drop_old); bool _check_tablet_id_exist_unlocked(TTabletId tablet_id); OLAPStatus _create_initial_rowset_unlocked(const TCreateTabletReq& request, Tablet* tablet); - OLAPStatus _drop_tablet_directly_unlocked(TTabletId tablet_id, TSchemaHash schema_hash, + OLAPStatus _drop_tablet_directly_unlocked(TTabletId tablet_id, TReplicaId replica_id, TSchemaHash schema_hash, bool keep_files = false); - OLAPStatus _drop_tablet_unlocked(TTabletId tablet_id, SchemaHash schema_hash, bool keep_files); + OLAPStatus _drop_tablet_unlocked(TTabletId tablet_id, TReplicaId replica_id, SchemaHash schema_hash, bool keep_files); - TabletSharedPtr _get_tablet_unlocked(TTabletId tablet_id, SchemaHash schema_hash); - TabletSharedPtr _get_tablet_unlocked(TTabletId tablet_id, SchemaHash schema_hash, + TabletSharedPtr _get_tablet_unlocked(TTabletId tablet_id, TReplicaId replica_id, SchemaHash schema_hash); + TabletSharedPtr _get_tablet_unlocked(TTabletId tablet_id, TReplicaId replica_id, SchemaHash schema_hash, bool include_deleted, std::string* err); TabletSharedPtr _internal_create_tablet_unlocked(const TCreateTabletReq& request, diff --git a/be/src/olap/tablet_meta.cpp b/be/src/olap/tablet_meta.cpp index 4ede471c812dfa..577d7287b52a12 100644 --- a/be/src/olap/tablet_meta.cpp +++ b/be/src/olap/tablet_meta.cpp @@ -39,7 +39,7 @@ OLAPStatus TabletMeta::create(const TCreateTabletReq& request, const TabletUid& const unordered_map& col_ordinal_to_unique_id, TabletMetaSharedPtr* tablet_meta) { tablet_meta->reset(new TabletMeta( - request.table_id, request.partition_id, request.tablet_id, + request.table_id, request.partition_id, request.tablet_id, request.replica_id, request.tablet_schema.schema_hash, shard_id, request.tablet_schema, next_unique_id, col_ordinal_to_unique_id, tablet_uid, request.__isset.tablet_type ? request.tablet_type : TTabletType::TABLET_TYPE_DISK)); @@ -48,7 +48,7 @@ OLAPStatus TabletMeta::create(const TCreateTabletReq& request, const TabletUid& TabletMeta::TabletMeta() : _tablet_uid(0, 0), _schema(new TabletSchema) {} -TabletMeta::TabletMeta(int64_t table_id, int64_t partition_id, int64_t tablet_id, +TabletMeta::TabletMeta(int64_t table_id, int64_t partition_id, int64_t tablet_id, int64_t replica_id, int32_t schema_hash, uint64_t shard_id, const TTabletSchema& tablet_schema, uint32_t next_unique_id, const std::unordered_map& col_ordinal_to_unique_id, @@ -58,6 +58,7 @@ TabletMeta::TabletMeta(int64_t table_id, int64_t partition_id, int64_t tablet_id tablet_meta_pb.set_table_id(table_id); tablet_meta_pb.set_partition_id(partition_id); tablet_meta_pb.set_tablet_id(tablet_id); + tablet_meta_pb.set_replica_id(replica_id); tablet_meta_pb.set_schema_hash(schema_hash); tablet_meta_pb.set_shard_id(shard_id); tablet_meta_pb.set_creation_time(time(NULL)); @@ -248,6 +249,26 @@ OLAPStatus TabletMeta::reset_tablet_uid(const string& header_file) { return res; } +OLAPStatus TabletMeta::reset_tablet_replica_id(const string& header_file, int64_t replica_id) { + OLAPStatus res = OLAP_SUCCESS; + TabletMeta tmp_tablet_meta; + if ((res = tmp_tablet_meta.create_from_file(header_file)) != OLAP_SUCCESS) { + LOG(WARNING) << "fail to load tablet meta from file" + << ", meta_file=" << header_file; + return res; + } + TabletMetaPB tmp_tablet_meta_pb; + tmp_tablet_meta.to_meta_pb(&tmp_tablet_meta_pb); + tmp_tablet_meta_pb.set_replica_id(replica_id); + res = save(header_file, tmp_tablet_meta_pb); + if (res != OLAP_SUCCESS) { + LOG(FATAL) << "fail to save tablet meta pb to " + << " meta_file=" << header_file; + return res; + } + return res; +} + std::string TabletMeta::construct_header_file_path(const string& schema_hash_path, int64_t tablet_id) { std::stringstream header_name_stream; @@ -335,6 +356,7 @@ void TabletMeta::init_from_pb(const TabletMetaPB& tablet_meta_pb) { _table_id = tablet_meta_pb.table_id(); _partition_id = tablet_meta_pb.partition_id(); _tablet_id = tablet_meta_pb.tablet_id(); + _replica_id = tablet_meta_pb.replica_id(); _schema_hash = tablet_meta_pb.schema_hash(); _shard_id = tablet_meta_pb.shard_id(); _creation_time = tablet_meta_pb.creation_time(); @@ -400,6 +422,7 @@ void TabletMeta::to_meta_pb(TabletMetaPB* tablet_meta_pb) { tablet_meta_pb->set_table_id(table_id()); tablet_meta_pb->set_partition_id(partition_id()); tablet_meta_pb->set_tablet_id(tablet_id()); + tablet_meta_pb->set_replica_id(replica_id()); tablet_meta_pb->set_schema_hash(schema_hash()); tablet_meta_pb->set_shard_id(shard_id()); tablet_meta_pb->set_creation_time(creation_time()); @@ -637,6 +660,7 @@ bool operator==(const TabletMeta& a, const TabletMeta& b) { if (a._table_id != b._table_id) return false; if (a._partition_id != b._partition_id) return false; if (a._tablet_id != b._tablet_id) return false; + if (a._replica_id != b._replica_id) return false; if (a._schema_hash != b._schema_hash) return false; if (a._shard_id != b._shard_id) return false; if (a._creation_time != b._creation_time) return false; diff --git a/be/src/olap/tablet_meta.h b/be/src/olap/tablet_meta.h index 069507edbba737..66c20c567bb3a6 100644 --- a/be/src/olap/tablet_meta.h +++ b/be/src/olap/tablet_meta.h @@ -78,7 +78,7 @@ class TabletMeta { TabletMetaSharedPtr* tablet_meta); TabletMeta(); - TabletMeta(int64_t table_id, int64_t partition_id, int64_t tablet_id, int32_t schema_hash, + TabletMeta(int64_t table_id, int64_t partition_id, int64_t tablet_id, int64_t replica_id, int32_t schema_hash, uint64_t shard_id, const TTabletSchema& tablet_schema, uint32_t next_unique_id, const std::unordered_map& col_ordinal_to_unique_id, TabletUid tablet_uid, TTabletType::type tabletType); @@ -92,6 +92,7 @@ class TabletMeta { OLAPStatus save(const std::string& file_path); static OLAPStatus save(const std::string& file_path, const TabletMetaPB& tablet_meta_pb); static OLAPStatus reset_tablet_uid(const std::string& file_path); + static OLAPStatus reset_tablet_replica_id(const std::string& file_path, int64_t replica_id); static std::string construct_header_file_path(const std::string& schema_hash_path, int64_t tablet_id); OLAPStatus save_meta(DataDir* data_dir); @@ -109,6 +110,7 @@ class TabletMeta { inline int64_t table_id() const; inline int64_t partition_id() const; inline int64_t tablet_id() const; + inline int64_t replica_id() const; inline int32_t schema_hash() const; inline int16_t shard_id() const; inline void set_shard_id(int32_t shard_id); @@ -129,6 +131,9 @@ class TabletMeta { inline bool in_restore_mode() const; inline void set_in_restore_mode(bool in_restore_mode); + bool in_clone_mode() const; + void set_in_clone_mode(bool in_clone_mode); + inline const TabletSchema& tablet_schema() const; inline TabletSchema* mutable_tablet_schema(); @@ -179,6 +184,7 @@ class TabletMeta { int64_t _table_id = 0; int64_t _partition_id = 0; int64_t _tablet_id = 0; + int64_t _replica_id = 0; int32_t _schema_hash = 0; int32_t _shard_id = 0; int64_t _creation_time = 0; @@ -199,6 +205,7 @@ class TabletMeta { DelPredicateArray _del_pred_array; bool _in_restore_mode = false; + bool _in_clone_mode = false; RowsetTypePB _preferred_rowset_type = BETA_ROWSET; RWMutex _meta_lock; @@ -222,6 +229,10 @@ inline int64_t TabletMeta::tablet_id() const { return _tablet_id; } +inline int64_t TabletMeta::replica_id() const { + return _replica_id; +} + inline int32_t TabletMeta::schema_hash() const { return _schema_hash; } @@ -286,6 +297,14 @@ inline void TabletMeta::set_in_restore_mode(bool in_restore_mode) { _in_restore_mode = in_restore_mode; } +inline bool TabletMeta::in_clone_mode() const { + return _in_clone_mode; +} + +inline void TabletMeta::set_in_clone_mode(bool in_clone_mode) { + _in_clone_mode = in_clone_mode; +} + inline const TabletSchema& TabletMeta::tablet_schema() const { return *_schema; } diff --git a/be/src/olap/task/engine_batch_load_task.cpp b/be/src/olap/task/engine_batch_load_task.cpp index 8d1b6d2a1737ae..3335d82c519a65 100644 --- a/be/src/olap/task/engine_batch_load_task.cpp +++ b/be/src/olap/task/engine_batch_load_task.cpp @@ -106,7 +106,7 @@ AgentStatus EngineBatchLoadTask::_init() { // Check replica exist TabletSharedPtr tablet; - tablet = StorageEngine::instance()->tablet_manager()->get_tablet(_push_req.tablet_id, + tablet = StorageEngine::instance()->tablet_manager()->get_tablet(_push_req.tablet_id, 0 /*replica_id*/, _push_req.schema_hash); if (tablet == nullptr) { LOG(WARNING) << "get tables failed. " @@ -293,7 +293,7 @@ OLAPStatus EngineBatchLoadTask::_push(const TPushReq& request, } TabletSharedPtr tablet = StorageEngine::instance()->tablet_manager()->get_tablet( - request.tablet_id, request.schema_hash); + request.tablet_id, 0 /*replica_id*/, request.schema_hash); if (tablet == nullptr) { LOG(WARNING) << "false to find tablet. tablet=" << request.tablet_id << ", schema_hash=" << request.schema_hash; @@ -355,7 +355,7 @@ OLAPStatus EngineBatchLoadTask::_delete_data(const TPushReq& request, // 1. Get all tablets with same tablet_id TabletSharedPtr tablet = StorageEngine::instance()->tablet_manager()->get_tablet( - request.tablet_id, request.schema_hash); + request.tablet_id, 0 /*replica_id*/, request.schema_hash); if (tablet == nullptr) { LOG(WARNING) << "can't find tablet. tablet=" << request.tablet_id << ", schema_hash=" << request.schema_hash; diff --git a/be/src/olap/task/engine_checksum_task.cpp b/be/src/olap/task/engine_checksum_task.cpp index b27550ec852b6e..31b69cf1f002d1 100644 --- a/be/src/olap/task/engine_checksum_task.cpp +++ b/be/src/olap/task/engine_checksum_task.cpp @@ -48,7 +48,7 @@ OLAPStatus EngineChecksumTask::_compute_checksum() { } TabletSharedPtr tablet = - StorageEngine::instance()->tablet_manager()->get_tablet(_tablet_id, _schema_hash); + StorageEngine::instance()->tablet_manager()->get_tablet(_tablet_id, 0 /*replica_id*/, _schema_hash); if (NULL == tablet.get()) { OLAP_LOG_WARNING("can't find tablet. [tablet_id=%ld schema_hash=%d]", _tablet_id, _schema_hash); diff --git a/be/src/olap/task/engine_clone_task.cpp b/be/src/olap/task/engine_clone_task.cpp index edb421d87693b6..ba43776b1185a5 100644 --- a/be/src/olap/task/engine_clone_task.cpp +++ b/be/src/olap/task/engine_clone_task.cpp @@ -70,9 +70,21 @@ OLAPStatus EngineCloneTask::_do_clone() { AgentStatus status = DORIS_SUCCESS; string src_file_path; TBackend src_host; + // There will be 3 cases: + // + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + // tablet + old_version_tablet + explaination + need_reset_replica_id + // + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + // null + null + totally new tablet, just clone a new one. + true + // null + not null + old version tablet files exist, drop and clone+ true + // not null + not null + the same version tablet exist, need repair. + false + // not null + null + can not exist this case + ----- + // Check local tablet exist or not TabletSharedPtr tablet = StorageEngine::instance()->tablet_manager()->get_tablet( - _clone_req.tablet_id, _clone_req.schema_hash); + _clone_req.tablet_id, _clone_req.replica_id, _clone_req.schema_hash); + // for tablet with same tablet id, but diff replica id + TabletSharedPtr old_version_tablet = StorageEngine::instance()->tablet_manager()->get_tablet( + _clone_req.tablet_id, 0 /*replica_id*/, _clone_req.schema_hash); bool is_new_tablet = tablet == nullptr; // try to repair a tablet with missing version if (tablet != nullptr) { @@ -157,15 +169,26 @@ OLAPStatus EngineCloneTask::_do_clone() { string header_path = TabletMeta::construct_header_file_path( schema_hash_path_stream.str(), _clone_req.tablet_id); OLAPStatus reset_id_status = TabletMeta::reset_tablet_uid(header_path); - if (reset_id_status != OLAP_SUCCESS) { - LOG(WARNING) << "errors while set tablet uid: '" << header_path; - _error_msgs->push_back("errors while set tablet uid."); + // reset_replica_id here. before load tablet to tablet_manager + OLAPStatus reset_replica_id_status = TabletMeta::reset_tablet_replica_id(header_path, _clone_req.replica_id); + if (reset_id_status != OLAP_SUCCESS || reset_replica_id_status != OLAP_SUCCESS) { + LOG(WARNING) << "errors while set tablet uid or replica id: '" << header_path; + _error_msgs->push_back("errors while set tablet uid/replica_id."); status = DORIS_ERROR; } else { - OLAPStatus load_header_status = - StorageEngine::instance()->tablet_manager()->load_tablet_from_dir( + OLAPStatus load_header_status; + if (old_version_tablet != nullptr) { + // drop old version tablet first, then and new tablet + load_header_status = StorageEngine::instance()->tablet_manager()->load_tablet_from_dir( + store, _clone_req.tablet_id, _clone_req.schema_hash, + schema_hash_path_stream.str(), true); + } else { + // just create and add a new tablet + load_header_status = StorageEngine::instance()->tablet_manager()->load_tablet_from_dir( store, _clone_req.tablet_id, _clone_req.schema_hash, schema_hash_path_stream.str(), false); + } + if (load_header_status != OLAP_SUCCESS) { LOG(WARNING) << "load header failed. local_shard_root_path: '" << local_shard_root_path @@ -237,7 +260,7 @@ void EngineCloneTask::_set_tablet_info(AgentStatus status, bool is_new_tablet) { << ", signature:" << _signature << ", version:" << tablet_info.version << ", expected_version: " << _clone_req.committed_version; OLAPStatus drop_status = StorageEngine::instance()->tablet_manager()->drop_tablet( - _clone_req.tablet_id, _clone_req.schema_hash); + _clone_req.tablet_id, _clone_req.replica_id, _clone_req.schema_hash); if (drop_status != OLAP_SUCCESS && drop_status != OLAP_ERR_TABLE_NOT_FOUND) { // just log LOG(WARNING) << "drop stale cloned table failed! tablet id: " diff --git a/be/src/olap/task/engine_publish_version_task.cpp b/be/src/olap/task/engine_publish_version_task.cpp index 51ad517ca5dd6a..c0086ec21358d7 100644 --- a/be/src/olap/task/engine_publish_version_task.cpp +++ b/be/src/olap/task/engine_publish_version_task.cpp @@ -121,7 +121,7 @@ OLAPStatus EnginePublishVersionTask::finish() { break; } TabletSharedPtr tablet = StorageEngine::instance()->tablet_manager()->get_tablet( - tablet_info.tablet_id, tablet_info.schema_hash); + tablet_info.tablet_id, 0 /*replica_id*/, tablet_info.schema_hash); if (tablet == nullptr) { _error_tablet_ids->push_back(tablet_info.tablet_id); } else { diff --git a/be/src/olap/task/engine_storage_migration_task.cpp b/be/src/olap/task/engine_storage_migration_task.cpp index 78944235c0af30..3c26832f36698e 100644 --- a/be/src/olap/task/engine_storage_migration_task.cpp +++ b/be/src/olap/task/engine_storage_migration_task.cpp @@ -159,7 +159,7 @@ OLAPStatus EngineStorageMigrationTask::_migrate() { // if old tablet finished schema change, then the schema change status of the new tablet is DONE // else the schema change status of the new tablet is FAILED TabletSharedPtr new_tablet = - StorageEngine::instance()->tablet_manager()->get_tablet(tablet_id, schema_hash); + StorageEngine::instance()->tablet_manager()->get_tablet(tablet_id, 0 /*replica_id*/, schema_hash); if (new_tablet == nullptr) { LOG(WARNING) << "tablet not found. tablet_id=" << tablet_id << " schema_hash=" << schema_hash; diff --git a/be/src/runtime/snapshot_loader.cpp b/be/src/runtime/snapshot_loader.cpp index e3f5f3536b80ff..e140e857ed58ad 100644 --- a/be/src/runtime/snapshot_loader.cpp +++ b/be/src/runtime/snapshot_loader.cpp @@ -226,7 +226,7 @@ Status SnapshotLoader::download(const std::map& src_to } TabletSharedPtr tablet = - _env->storage_engine()->tablet_manager()->get_tablet(local_tablet_id, schema_hash); + _env->storage_engine()->tablet_manager()->get_tablet(local_tablet_id, 0 /*replica_id*/, schema_hash); if (tablet == nullptr) { std::stringstream ss; ss << "failed to get local tablet: " << local_tablet_id; diff --git a/be/test/olap/cumulative_compaction_policy_test.cpp b/be/test/olap/cumulative_compaction_policy_test.cpp index bc61071bec1f03..090abd8ccc0740 100644 --- a/be/test/olap/cumulative_compaction_policy_test.cpp +++ b/be/test/olap/cumulative_compaction_policy_test.cpp @@ -32,7 +32,7 @@ class TestNumBasedCumulativeCompactionPolicy : public testing::Test { TestNumBasedCumulativeCompactionPolicy() {} void SetUp() { _tablet_meta = static_cast( - new TabletMeta(1, 2, 15673, 4, 5, TTabletSchema(), 6, {{7, 8}}, UniqueId(9, 10), + new TabletMeta(1, 2, 15673, 15674, 4, 5, TTabletSchema(), 6, {{7, 8}}, UniqueId(9, 10), TTabletType::TABLET_TYPE_DISK)); _json_rowset_meta = R"({ @@ -322,7 +322,7 @@ class TestSizeBasedCumulativeCompactionPolicy : public testing::Test { config::cumulative_size_based_compaction_lower_size_mbytes = 64; _tablet_meta = static_cast( - new TabletMeta(1, 2, 15673, 4, 5, TTabletSchema(), 6, {{7, 8}}, UniqueId(9, 10), + new TabletMeta(1, 2, 15673, 15674, 4, 5, TTabletSchema(), 6, {{7, 8}}, UniqueId(9, 10), TTabletType::TABLET_TYPE_DISK)); _json_rowset_meta = R"({ diff --git a/be/test/olap/delete_handler_test.cpp b/be/test/olap/delete_handler_test.cpp index 98823cc59474cd..366c2a9cae81d2 100644 --- a/be/test/olap/delete_handler_test.cpp +++ b/be/test/olap/delete_handler_test.cpp @@ -257,7 +257,7 @@ class TestDeleteConditionHandler : public testing::Test { set_default_create_tablet_request(&_create_tablet); res = k_engine->create_tablet(_create_tablet); ASSERT_EQ(OLAP_SUCCESS, res); - tablet = k_engine->tablet_manager()->get_tablet(_create_tablet.tablet_id, + tablet = k_engine->tablet_manager()->get_tablet(_create_tablet.tablet_id, 0 /*replica_id*/, _create_tablet.tablet_schema.schema_hash); ASSERT_TRUE(tablet.get() != NULL); _tablet_path = tablet->tablet_path(); @@ -266,7 +266,7 @@ class TestDeleteConditionHandler : public testing::Test { res = k_engine->create_tablet(_create_dup_tablet); ASSERT_EQ(OLAP_SUCCESS, res); dup_tablet = k_engine->tablet_manager()->get_tablet( - _create_dup_tablet.tablet_id, _create_dup_tablet.tablet_schema.schema_hash); + _create_dup_tablet.tablet_id, 0 /*replica_id*/, _create_dup_tablet.tablet_schema.schema_hash); ASSERT_TRUE(dup_tablet.get() != NULL); _dup_tablet_path = tablet->tablet_path(); } @@ -276,7 +276,7 @@ class TestDeleteConditionHandler : public testing::Test { tablet.reset(); dup_tablet.reset(); StorageEngine::instance()->tablet_manager()->drop_tablet( - _create_tablet.tablet_id, _create_tablet.tablet_schema.schema_hash); + _create_tablet.tablet_id, _create_tablet.replica_id, _create_tablet.tablet_schema.schema_hash); ASSERT_TRUE(FileUtils::remove_all(config::storage_root_path).ok()); } @@ -428,7 +428,7 @@ class TestDeleteConditionHandler2 : public testing::Test { set_default_create_tablet_request(&_create_tablet); res = k_engine->create_tablet(_create_tablet); ASSERT_EQ(OLAP_SUCCESS, res); - tablet = k_engine->tablet_manager()->get_tablet(_create_tablet.tablet_id, + tablet = k_engine->tablet_manager()->get_tablet(_create_tablet.tablet_id, 0 /*replica_id*/, _create_tablet.tablet_schema.schema_hash); ASSERT_TRUE(tablet.get() != NULL); _tablet_path = tablet->tablet_path(); @@ -438,7 +438,7 @@ class TestDeleteConditionHandler2 : public testing::Test { // Remove all dir. tablet.reset(); StorageEngine::instance()->tablet_manager()->drop_tablet( - _create_tablet.tablet_id, _create_tablet.tablet_schema.schema_hash); + _create_tablet.tablet_id, _create_tablet.replica_id, _create_tablet.tablet_schema.schema_hash); ASSERT_TRUE(FileUtils::remove_all(config::storage_root_path).ok()); } @@ -795,7 +795,7 @@ class TestDeleteHandler : public testing::Test { set_default_create_tablet_request(&_create_tablet); res = k_engine->create_tablet(_create_tablet); ASSERT_EQ(OLAP_SUCCESS, res); - tablet = k_engine->tablet_manager()->get_tablet(_create_tablet.tablet_id, + tablet = k_engine->tablet_manager()->get_tablet(_create_tablet.tablet_id, 0 /*replica_id*/, _create_tablet.tablet_schema.schema_hash); ASSERT_TRUE(tablet != nullptr); _tablet_path = tablet->tablet_path(); @@ -809,7 +809,7 @@ class TestDeleteHandler : public testing::Test { tablet.reset(); _delete_handler.finalize(); StorageEngine::instance()->tablet_manager()->drop_tablet( - _create_tablet.tablet_id, _create_tablet.tablet_schema.schema_hash); + _create_tablet.tablet_id, _create_tablet.replica_id, _create_tablet.tablet_schema.schema_hash); ASSERT_TRUE(FileUtils::remove_all(config::storage_root_path).ok()); } diff --git a/be/test/olap/delta_writer_test.cpp b/be/test/olap/delta_writer_test.cpp index a0573b569730f3..f1e67ecbfa2f32 100644 --- a/be/test/olap/delta_writer_test.cpp +++ b/be/test/olap/delta_writer_test.cpp @@ -383,7 +383,7 @@ TEST_F(TestDeltaWriter, open) { TDropTabletReq drop_request; auto tablet_id = 10003; auto schema_hash = 270068375; - res = k_engine->tablet_manager()->drop_tablet(tablet_id, schema_hash); + res = k_engine->tablet_manager()->drop_tablet(tablet_id, 0, schema_hash); ASSERT_EQ(OLAP_SUCCESS, res); } @@ -477,7 +477,7 @@ TEST_F(TestDeltaWriter, write) { // publish version success TabletSharedPtr tablet = - k_engine->tablet_manager()->get_tablet(write_req.tablet_id, write_req.schema_hash); + k_engine->tablet_manager()->get_tablet(write_req.tablet_id, 0 /*replica_id*/, write_req.schema_hash); std::cout << "before publish, tablet row nums:" << tablet->num_rows() << std::endl; OlapMeta* meta = tablet->data_dir()->get_meta(); Version version; @@ -507,7 +507,7 @@ TEST_F(TestDeltaWriter, write) { auto tablet_id = 10003; auto schema_hash = 270068375; - res = k_engine->tablet_manager()->drop_tablet(tablet_id, schema_hash); + res = k_engine->tablet_manager()->drop_tablet(tablet_id, 0, schema_hash); ASSERT_EQ(OLAP_SUCCESS, res); delete delta_writer; } @@ -557,7 +557,7 @@ TEST_F(TestDeltaWriter, sequence_col) { // publish version success TabletSharedPtr tablet = - k_engine->tablet_manager()->get_tablet(write_req.tablet_id, write_req.schema_hash); + k_engine->tablet_manager()->get_tablet(write_req.tablet_id, 0 /*replica_id*/, write_req.schema_hash); std::cout << "before publish, tablet row nums:" << tablet->num_rows() << std::endl; OlapMeta* meta = tablet->data_dir()->get_meta(); Version version; @@ -587,7 +587,7 @@ TEST_F(TestDeltaWriter, sequence_col) { auto tablet_id = 10005; auto schema_hash = 270068377; - res = k_engine->tablet_manager()->drop_tablet(tablet_id, schema_hash); + res = k_engine->tablet_manager()->drop_tablet(tablet_id, 0, schema_hash); ASSERT_EQ(OLAP_SUCCESS, res); delete delta_writer; } diff --git a/be/test/olap/memory/mem_tablet_test.cpp b/be/test/olap/memory/mem_tablet_test.cpp index ad4b2975c03028..80a01559db7f4a 100644 --- a/be/test/olap/memory/mem_tablet_test.cpp +++ b/be/test/olap/memory/mem_tablet_test.cpp @@ -68,7 +68,7 @@ TEST(MemTablet, writescan) { tschema.__set_is_in_memory(false); tschema.__set_schema_hash(1); TabletMetaSharedPtr tablet_meta( - new TabletMeta(1, 1, 1, 1, 1, tschema, static_cast(sc->cid_size()), + new TabletMeta(1, 1, 1, 1, 1, 1, tschema, static_cast(sc->cid_size()), col_idx_to_unique_id, TabletUid(1, 1), TTabletType::TABLET_TYPE_MEMORY)); std::shared_ptr tablet = MemTablet::create_tablet_from_meta(tablet_meta, nullptr); ASSERT_TRUE(tablet->init().ok()); diff --git a/be/test/olap/tablet_meta_test.cpp b/be/test/olap/tablet_meta_test.cpp index aea3e84e160ba9..4d5cb2dc96c9a4 100644 --- a/be/test/olap/tablet_meta_test.cpp +++ b/be/test/olap/tablet_meta_test.cpp @@ -26,7 +26,7 @@ namespace doris { TEST(TabletMetaTest, SaveAndParse) { std::string meta_path = "./be/test/olap/test_data/tablet_meta_test.hdr"; - TabletMeta old_tablet_meta(1, 2, 3, 4, 5, TTabletSchema(), 6, {{7, 8}}, UniqueId(9, 10), + TabletMeta old_tablet_meta(1, 2, 3, 4, 5, 6, TTabletSchema(), 6, {{7, 8}}, UniqueId(9, 10), TTabletType::TABLET_TYPE_DISK); ASSERT_EQ(OLAP_SUCCESS, old_tablet_meta.save(meta_path)); diff --git a/be/test/olap/tablet_mgr_test.cpp b/be/test/olap/tablet_mgr_test.cpp index 9a5eb86157f154..c689b5aeb19569 100644 --- a/be/test/olap/tablet_mgr_test.cpp +++ b/be/test/olap/tablet_mgr_test.cpp @@ -105,7 +105,7 @@ TEST_F(TabletMgrTest, CreateTablet) { data_dirs.push_back(_data_dir); OLAPStatus create_st = _tablet_mgr->create_tablet(create_tablet_req, data_dirs); ASSERT_TRUE(create_st == OLAP_SUCCESS); - TabletSharedPtr tablet = _tablet_mgr->get_tablet(111, 3333); + TabletSharedPtr tablet = _tablet_mgr->get_tablet(111, 0, 3333); ASSERT_TRUE(tablet != nullptr); // check dir exist bool dir_exist = FileUtils::check_exist(tablet->tablet_path()); @@ -125,7 +125,7 @@ TEST_F(TabletMgrTest, CreateTablet) { create_st = _tablet_mgr->create_tablet(create_tablet_req, data_dirs); ASSERT_TRUE(create_st == OLAP_ERR_CE_TABLET_ID_EXIST); - OLAPStatus drop_st = _tablet_mgr->drop_tablet(111, 3333, false); + OLAPStatus drop_st = _tablet_mgr->drop_tablet(111, 0, 3333, false); ASSERT_TRUE(drop_st == OLAP_SUCCESS); tablet.reset(); OLAPStatus trash_st = _tablet_mgr->start_trash_sweep(); @@ -171,7 +171,7 @@ TEST_F(TabletMgrTest, CreateTabletWithSequence) { OLAPStatus create_st = _tablet_mgr->create_tablet(create_tablet_req, data_dirs); ASSERT_TRUE(create_st == OLAP_SUCCESS); - TabletSharedPtr tablet = _tablet_mgr->get_tablet(111, 3333); + TabletSharedPtr tablet = _tablet_mgr->get_tablet(111, 0, 3333); ASSERT_TRUE(tablet != nullptr); // check dir exist bool dir_exist = FileUtils::check_exist(tablet->tablet_path()); @@ -181,7 +181,7 @@ TEST_F(TabletMgrTest, CreateTabletWithSequence) { OLAPStatus check_meta_st = TabletMetaManager::get_meta(_data_dir, 111, 3333, new_tablet_meta); ASSERT_TRUE(check_meta_st == OLAP_SUCCESS); - OLAPStatus drop_st = _tablet_mgr->drop_tablet(111, 3333, false); + OLAPStatus drop_st = _tablet_mgr->drop_tablet(111, 0, 3333, false); ASSERT_TRUE(drop_st == OLAP_SUCCESS); tablet.reset(); OLAPStatus trash_st = _tablet_mgr->start_trash_sweep(); @@ -212,21 +212,21 @@ TEST_F(TabletMgrTest, DropTablet) { data_dirs.push_back(_data_dir); OLAPStatus create_st = _tablet_mgr->create_tablet(create_tablet_req, data_dirs); ASSERT_TRUE(create_st == OLAP_SUCCESS); - TabletSharedPtr tablet = _tablet_mgr->get_tablet(111, 3333); + TabletSharedPtr tablet = _tablet_mgr->get_tablet(111, 0, 3333); ASSERT_TRUE(tablet != nullptr); // drop unexist tablet will be success - OLAPStatus drop_st = _tablet_mgr->drop_tablet(111, 4444, false); + OLAPStatus drop_st = _tablet_mgr->drop_tablet(111, 0, 4444, false); ASSERT_TRUE(drop_st == OLAP_SUCCESS); - tablet = _tablet_mgr->get_tablet(111, 3333); + tablet = _tablet_mgr->get_tablet(111, 0, 3333); ASSERT_TRUE(tablet != nullptr); // drop exist tablet will be success - drop_st = _tablet_mgr->drop_tablet(111, 3333, false); + drop_st = _tablet_mgr->drop_tablet(111, 0, 3333, false); ASSERT_TRUE(drop_st == OLAP_SUCCESS); - tablet = _tablet_mgr->get_tablet(111, 3333); + tablet = _tablet_mgr->get_tablet(111, 0, 3333); ASSERT_TRUE(tablet == nullptr); - tablet = _tablet_mgr->get_tablet(111, 3333, true); + tablet = _tablet_mgr->get_tablet(111, 0, 3333, true); ASSERT_TRUE(tablet != nullptr); // check dir exist @@ -238,7 +238,7 @@ TEST_F(TabletMgrTest, DropTablet) { // because tablet ptr referenced it OLAPStatus trash_st = _tablet_mgr->start_trash_sweep(); ASSERT_TRUE(trash_st == OLAP_SUCCESS); - tablet = _tablet_mgr->get_tablet(111, 3333, true); + tablet = _tablet_mgr->get_tablet(111, 0, 3333, true); ASSERT_TRUE(tablet != nullptr); dir_exist = FileUtils::check_exist(tablet_path); ASSERT_TRUE(dir_exist); @@ -247,7 +247,7 @@ TEST_F(TabletMgrTest, DropTablet) { tablet.reset(); trash_st = _tablet_mgr->start_trash_sweep(); ASSERT_TRUE(trash_st == OLAP_SUCCESS); - tablet = _tablet_mgr->get_tablet(111, 3333, true); + tablet = _tablet_mgr->get_tablet(111, 0, 3333, true); ASSERT_TRUE(tablet == nullptr); dir_exist = FileUtils::check_exist(tablet_path); ASSERT_TRUE(!dir_exist); diff --git a/be/test/olap/tablet_test.cpp b/be/test/olap/tablet_test.cpp index 0e5d57b49b157b..77c4cbe945dbfe 100644 --- a/be/test/olap/tablet_test.cpp +++ b/be/test/olap/tablet_test.cpp @@ -36,7 +36,7 @@ class TestTablet : public testing::Test { virtual void SetUp() { _tablet_meta = static_cast( - new TabletMeta(1, 2, 15673, 4, 5, TTabletSchema(), 6, {{7, 8}}, UniqueId(9, 10), + new TabletMeta(1, 2, 15673, 15674, 4, 5, TTabletSchema(), 6, {{7, 8}}, UniqueId(9, 10), TTabletType::TABLET_TYPE_DISK)); _json_rowset_meta = R"({ "rowset_id": 540081, diff --git a/be/test/olap/test_data/header_without_inc_rs.txt b/be/test/olap/test_data/header_without_inc_rs.txt index edd7d03628a5c1..127a117ecae53c 100644 --- a/be/test/olap/test_data/header_without_inc_rs.txt +++ b/be/test/olap/test_data/header_without_inc_rs.txt @@ -141,5 +141,6 @@ "lo": 10 }, "preferred_rowset_type": "BETA_ROWSET", - "tablet_type": "TABLET_TYPE_DISK" + "tablet_type": "TABLET_TYPE_DISK", + "replica_id": 0 } diff --git a/fe/fe-core/src/main/java/org/apache/doris/alter/RollupJobV2.java b/fe/fe-core/src/main/java/org/apache/doris/alter/RollupJobV2.java index 28fbd796e3df68..ace9264ad0c76e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/alter/RollupJobV2.java +++ b/fe/fe-core/src/main/java/org/apache/doris/alter/RollupJobV2.java @@ -222,12 +222,13 @@ protected void runPendingJob() throws AlterCancelException { List rollupReplicas = rollupTablet.getReplicas(); for (Replica rollupReplica : rollupReplicas) { long backendId = rollupReplica.getBackendId(); + long rollupReplicaId = rollupReplica.getId(); Preconditions.checkNotNull(tabletIdMap.get(rollupTabletId)); // baseTabletId countDownLatch.addMark(backendId, rollupTabletId); // create replica with version 1. // version will be updated by following load process, or when rollup task finished. CreateReplicaTask createReplicaTask = new CreateReplicaTask( - backendId, dbId, tableId, partitionId, rollupIndexId, rollupTabletId, + backendId, dbId, tableId, partitionId, rollupIndexId, rollupTabletId, rollupReplicaId, rollupShortKeyColumnCount, rollupSchemaHash, Partition.PARTITION_INIT_VERSION, Partition.PARTITION_INIT_VERSION_HASH, rollupKeysType, TStorageType.COLUMN, storageMedium, diff --git a/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeJobV2.java b/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeJobV2.java index 1ad14934bdeb57..3d1c49cef19d27 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeJobV2.java +++ b/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeJobV2.java @@ -250,9 +250,10 @@ protected void runPendingJob() throws AlterCancelException { List shadowReplicas = shadowTablet.getReplicas(); for (Replica shadowReplica : shadowReplicas) { long backendId = shadowReplica.getBackendId(); + long shadowReplicaId = shadowReplica.getId(); countDownLatch.addMark(backendId, shadowTabletId); CreateReplicaTask createReplicaTask = new CreateReplicaTask( - backendId, dbId, tableId, partitionId, shadowIdxId, shadowTabletId, + backendId, dbId, tableId, partitionId, shadowIdxId, shadowTabletId, shadowReplicaId, shadowShortKeyColumnCount, shadowSchemaHash, Partition.PARTITION_INIT_VERSION, Partition.PARTITION_INIT_VERSION_HASH, originKeysType, TStorageType.COLUMN, storageMedium, diff --git a/fe/fe-core/src/main/java/org/apache/doris/backup/RestoreJob.java b/fe/fe-core/src/main/java/org/apache/doris/backup/RestoreJob.java index 8e92395472a8f5..6c96dd0d921d35 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/backup/RestoreJob.java +++ b/fe/fe-core/src/main/java/org/apache/doris/backup/RestoreJob.java @@ -935,7 +935,7 @@ private void createReplicas(Database db, AgentBatchTask batchTask, OlapTable loc Catalog.getCurrentInvertedIndex().addReplica(restoreTablet.getId(), restoreReplica); CreateReplicaTask task = new CreateReplicaTask(restoreReplica.getBackendId(), dbId, localTbl.getId(), restorePart.getId(), restoredIdx.getId(), - restoreTablet.getId(), indexMeta.getShortKeyColumnCount(), + restoreTablet.getId(), restoreReplica.getId(), indexMeta.getShortKeyColumnCount(), indexMeta.getSchemaHash(), restoreReplica.getVersion(), restoreReplica.getVersionHash(), indexMeta.getKeysType(), TStorageType.COLUMN, TStorageMedium.HDD /* all restored replicas will be saved to HDD */, diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Catalog.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Catalog.java index eb04db8e094392..108fc20b1bcccd 100755 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Catalog.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Catalog.java @@ -3557,9 +3557,10 @@ private Partition createPartitionWithIndices(String clusterName, long dbId, long long tabletId = tablet.getId(); for (Replica replica : tablet.getReplicas()) { long backendId = replica.getBackendId(); + long replicaId = replica.getId(); countDownLatch.addMark(backendId, tabletId); CreateReplicaTask task = new CreateReplicaTask(backendId, dbId, tableId, - partitionId, indexId, tabletId, + partitionId, indexId, tabletId, replicaId, shortKeyColumnCount, schemaHash, version, versionHash, keysType, @@ -7140,7 +7141,8 @@ public void onEraseOlapTable(OlapTable olapTable, boolean isReplay) { List replicas = tablet.getReplicas(); for (Replica replica : replicas) { long backendId = replica.getBackendId(); - DropReplicaTask dropTask = new DropReplicaTask(backendId, tabletId, schemaHash); + long replicaId = replica.getId(); + DropReplicaTask dropTask = new DropReplicaTask(backendId, tabletId, replicaId, schemaHash); batchTask.addTask(dropTask); } // end for replicas } // end for tablets diff --git a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletSchedCtx.java b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletSchedCtx.java index d86c2c5ae8e0d8..7dee79a171a098 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletSchedCtx.java +++ b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletSchedCtx.java @@ -749,11 +749,7 @@ public CloneTask createCloneReplicaAndTask() throws SchedException { // That is, we may need to use 2 clone tasks to create a new replica. It is inefficient, // but there is no other way now. TBackend tSrcBe = new TBackend(srcBe.getHost(), srcBe.getBePort(), srcBe.getHttpPort()); - cloneTask = new CloneTask(destBackendId, dbId, tblId, partitionId, indexId, - tabletId, schemaHash, Lists.newArrayList(tSrcBe), storageMedium, - visibleVersion, visibleVersionHash, (int) (taskTimeoutMs / 1000)); - cloneTask.setPathHash(srcPathHash, destPathHash); - + // if this is a balance task, or this is a repair task with REPLICA_MISSING/REPLICA_RELOCATING or REPLICA_MISSING_IN_CLUSTER, // we create a new replica with state CLONE if (tabletStatus == TabletStatus.REPLICA_MISSING || tabletStatus == TabletStatus.REPLICA_MISSING_IN_CLUSTER @@ -766,6 +762,9 @@ public CloneTask createCloneReplicaAndTask() throws SchedException { ReplicaState.CLONE, committedVersion, committedVersionHash, /* use committed version as last failed version */ -1 /* last success version */, 0 /* last success version hash */); + cloneTask = new CloneTask(destBackendId, dbId, tblId, partitionId, indexId, + tabletId, cloneReplica.getId(), schemaHash, Lists.newArrayList(tSrcBe), storageMedium, + visibleVersion, visibleVersionHash, (int) (taskTimeoutMs / 1000)); // addReplica() method will add this replica to tablet inverted index too. tablet.addReplica(cloneReplica); @@ -781,8 +780,12 @@ public CloneTask createCloneReplicaAndTask() throws SchedException { throw new SchedException(Status.SCHEDULE_FAILED, "dest replica's path hash is changed. " + "current: " + replica.getPathHash() + ", scheduled: " + destPathHash); } + cloneTask = new CloneTask(destBackendId, dbId, tblId, partitionId, indexId, + tabletId, replica.getId(), schemaHash, Lists.newArrayList(tSrcBe), storageMedium, + visibleVersion, visibleVersionHash, (int) (taskTimeoutMs / 1000)); } - + + cloneTask.setPathHash(srcPathHash, destPathHash); this.state = State.RUNNING; return cloneTask; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java index 7aca236ad88681..93bed0d111a51c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java +++ b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java @@ -1006,7 +1006,7 @@ private void deleteReplicaInternal(TabletSchedCtx tabletCtx, Replica replica, St // NOTICE: only delete the replica from meta may not work. sometimes we can depends on tablet report // to delete these replicas, but in FORCE_REDUNDANT case, replica may be added to meta again in report // process. - sendDeleteReplicaTask(replica.getBackendId(), tabletCtx.getTabletId(), tabletCtx.getSchemaHash()); + sendDeleteReplicaTask(replica.getBackendId(), tabletCtx.getTabletId(),replica.getId(), tabletCtx.getSchemaHash()); } // write edit log @@ -1023,8 +1023,8 @@ private void deleteReplicaInternal(TabletSchedCtx tabletCtx, Replica replica, St tabletCtx.getTabletId(), replica.getBackendId(), reason, force); } - private void sendDeleteReplicaTask(long backendId, long tabletId, int schemaHash) { - DropReplicaTask task = new DropReplicaTask(backendId, tabletId, schemaHash); + private void sendDeleteReplicaTask(long backendId, long tabletId, long replicaId, int schemaHash) { + DropReplicaTask task = new DropReplicaTask(backendId, tabletId, replicaId, schemaHash); AgentBatchTask batchTask = new AgentBatchTask(); batchTask.addTask(task); AgentTaskExecutor.submit(batchTask); diff --git a/fe/fe-core/src/main/java/org/apache/doris/master/ReportHandler.java b/fe/fe-core/src/main/java/org/apache/doris/master/ReportHandler.java index 3775afbbf9bc61..2354b085004008 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/master/ReportHandler.java +++ b/fe/fe-core/src/main/java/org/apache/doris/master/ReportHandler.java @@ -608,7 +608,7 @@ private static void deleteFromMeta(ListMultimap tabletDeleteFromMeta Set bfColumns = olapTable.getCopiedBfColumns(); double bfFpp = olapTable.getBfFpp(); CreateReplicaTask createReplicaTask = new CreateReplicaTask(backendId, dbId, - tableId, partitionId, indexId, tabletId, indexMeta.getShortKeyColumnCount(), + tableId, partitionId, indexId, tabletId, replica.getId(), indexMeta.getShortKeyColumnCount(), indexMeta.getSchemaHash(), partition.getVisibleVersion(), partition.getVisibleVersionHash(), indexMeta.getKeysType(), TStorageType.COLUMN, @@ -686,7 +686,8 @@ private static void deleteFromBackend(Map backendTablets, for (Long tabletId : foundTabletsWithInvalidSchema.keySet()) { // this tablet is found in meta but with invalid schema hash. delete it. int schemaHash = foundTabletsWithInvalidSchema.get(tabletId).getSchemaHash(); - DropReplicaTask task = new DropReplicaTask(backendId, tabletId, schemaHash); + long replicaId = foundTabletsWithInvalidSchema.get(tabletId).getReplicaId(); + DropReplicaTask task = new DropReplicaTask(backendId, tabletId, replicaId, schemaHash); batchTask.addTask(task); LOG.warn("delete tablet[" + tabletId + " - " + schemaHash + "] from backend[" + backendId + "] because invalid schema hash"); @@ -696,7 +697,8 @@ private static void deleteFromBackend(Map backendTablets, for (Long tabletId : backendTablets.keySet()) { if (foundTabletsWithInvalidSchema.containsKey(tabletId)) { int schemaHash = foundTabletsWithInvalidSchema.get(tabletId).getSchemaHash(); - DropReplicaTask task = new DropReplicaTask(backendId, tabletId, schemaHash); + long replicaId = foundTabletsWithInvalidSchema.get(tabletId).getReplicaId(); + DropReplicaTask task = new DropReplicaTask(backendId, tabletId, replicaId, schemaHash); batchTask.addTask(task); LOG.warn("delete tablet[" + tabletId + " - " + schemaHash + "] from backend[" + backendId + "] because invalid schema hash"); @@ -727,7 +729,8 @@ private static void deleteFromBackend(Map backendTablets, if (needDelete) { // drop replica - DropReplicaTask task = new DropReplicaTask(backendId, tabletId, backendTabletInfo.getSchemaHash()); + DropReplicaTask task = new DropReplicaTask(backendId, tabletId, backendTabletInfo.getReplicaId(), + backendTabletInfo.getSchemaHash()); batchTask.addTask(task); LOG.warn("delete tablet[" + tabletId + " - " + backendTabletInfo.getSchemaHash() + "] from backend[" + backendId + "] because not found in meta"); diff --git a/fe/fe-core/src/main/java/org/apache/doris/task/AgentTask.java b/fe/fe-core/src/main/java/org/apache/doris/task/AgentTask.java index 77d38073ffa755..abef257c55e2c1 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/task/AgentTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/task/AgentTask.java @@ -59,7 +59,7 @@ public AgentTask(TResourceInfo resourceInfo, long backendId, TTaskType taskType, this.failedTimes = 0; this.createTime = createTime; } - + public AgentTask(TResourceInfo resourceInfo, long backendId, TTaskType taskType, long dbId, long tableId, long partitionId, long indexId, long tabletId) { this(resourceInfo, backendId, taskType, dbId, tableId, partitionId, indexId, tabletId, tabletId, -1); diff --git a/fe/fe-core/src/main/java/org/apache/doris/task/CloneTask.java b/fe/fe-core/src/main/java/org/apache/doris/task/CloneTask.java index 6bb927aa8ec570..17e72dffb057d4 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/task/CloneTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/task/CloneTask.java @@ -30,6 +30,7 @@ public class CloneTask extends AgentTask { public static final int VERSION_2 = 2; private int schemaHash; + private long replicaId; private List srcBackends; private TStorageMedium storageMedium; @@ -44,10 +45,11 @@ public class CloneTask extends AgentTask { private int taskVersion = VERSION_1; public CloneTask(long backendId, long dbId, long tableId, long partitionId, long indexId, - long tabletId, int schemaHash, List srcBackends, TStorageMedium storageMedium, + long tabletId, long replicaId, int schemaHash, List srcBackends, TStorageMedium storageMedium, long visibleVersion, long visibleVersionHash, int timeoutS) { super(null, backendId, TTaskType.CLONE, dbId, tableId, partitionId, indexId, tabletId); this.schemaHash = schemaHash; + this.replicaId = replicaId; this.srcBackends = srcBackends; this.storageMedium = storageMedium; this.visibleVersion = visibleVersion; @@ -59,6 +61,10 @@ public int getSchemaHash() { return schemaHash; } + public long getReplicaId() { + return replicaId; + } + public TStorageMedium getStorageMedium() { return storageMedium; } @@ -87,6 +93,7 @@ public TCloneReq toThrift() { request.setCommittedVersion(visibleVersion); request.setCommittedVersionHash(visibleVersionHash); request.setTaskVersion(taskVersion); + request.setReplicaId(replicaId); if (taskVersion == VERSION_2) { request.setSrcPathHash(srcPathHash); request.setDestPathHash(destPathHash); @@ -99,7 +106,7 @@ public TCloneReq toThrift() { @Override public String toString() { StringBuilder sb = new StringBuilder(); - sb.append("tablet id: ").append(tabletId).append(", schema hash: ").append(schemaHash); + sb.append("tablet id: ").append(tabletId).append(", replica id: ").append(replicaId).append(", schema hash: ").append(schemaHash); sb.append(", storageMedium: ").append(storageMedium.name()); sb.append(", visible version(hash): ").append(visibleVersion).append("-").append(visibleVersionHash); sb.append(", src backend: ").append(srcBackends.get(0).getHost()).append(", src path hash: ").append(srcPathHash); diff --git a/fe/fe-core/src/main/java/org/apache/doris/task/CreateReplicaTask.java b/fe/fe-core/src/main/java/org/apache/doris/task/CreateReplicaTask.java index 04055d7d845b90..67979d9bdea5e5 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/task/CreateReplicaTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/task/CreateReplicaTask.java @@ -47,6 +47,7 @@ public class CreateReplicaTask extends AgentTask { private short shortKeyColumnCount; private int schemaHash; + private long replicaId; private long version; private long versionHash; @@ -83,7 +84,7 @@ public class CreateReplicaTask extends AgentTask { private boolean isRecoverTask = false; public CreateReplicaTask(long backendId, long dbId, long tableId, long partitionId, long indexId, long tabletId, - short shortKeyColumnCount, int schemaHash, long version, long versionHash, + long replicaId, short shortKeyColumnCount, int schemaHash, long version, long versionHash, KeysType keysType, TStorageType storageType, TStorageMedium storageMedium, List columns, Set bfColumns, double bfFpp, MarkedCountDownLatch latch, @@ -94,6 +95,7 @@ public CreateReplicaTask(long backendId, long dbId, long tableId, long partition this.shortKeyColumnCount = shortKeyColumnCount; this.schemaHash = schemaHash; + this.replicaId = replicaId; this.version = version; this.versionHash = versionHash; @@ -216,6 +218,7 @@ public TCreateTabletReq toThrift() { createTabletReq.setInRestoreMode(true); } createTabletReq.setTableId(tableId); + createTabletReq.setReplicaId(replicaId); createTabletReq.setPartitionId(partitionId); if (baseTabletId != -1) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/task/DropReplicaTask.java b/fe/fe-core/src/main/java/org/apache/doris/task/DropReplicaTask.java index cddbe118f39c24..b4dcd8a6ad3cf9 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/task/DropReplicaTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/task/DropReplicaTask.java @@ -22,14 +22,17 @@ public class DropReplicaTask extends AgentTask { private int schemaHash; // set -1L as unknown + private long replicaId; - public DropReplicaTask(long backendId, long tabletId, int schemaHash) { + public DropReplicaTask(long backendId, long tabletId, long replicaId, int schemaHash) { super(null, backendId, TTaskType.DROP, -1L, -1L, -1L, -1L, tabletId); this.schemaHash = schemaHash; + this.replicaId = replicaId; } public TDropTabletReq toThrift() { TDropTabletReq request = new TDropTabletReq(tabletId); + request.setReplicaId(replicaId); if (this.schemaHash != -1) { request.setSchemaHash(schemaHash); } @@ -39,4 +42,8 @@ public TDropTabletReq toThrift() { public int getSchemaHash() { return schemaHash; } + + public long getReplicaId() { + return replicaId; + } } diff --git a/fe/fe-core/src/test/java/org/apache/doris/task/AgentTaskTest.java b/fe/fe-core/src/test/java/org/apache/doris/task/AgentTaskTest.java index 3065a1fdad64a3..4d4b53e1bebdf6 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/task/AgentTaskTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/task/AgentTaskTest.java @@ -110,14 +110,14 @@ public void setUp() throws AnalysisException { // create createReplicaTask = new CreateReplicaTask(backendId1, dbId, tableId, partitionId, - indexId1, tabletId1, shortKeyNum, schemaHash1, + indexId1, tabletId1, replicaId1, shortKeyNum, schemaHash1, version, versionHash, KeysType.AGG_KEYS, storageType, TStorageMedium.SSD, columns, null, 0, latch, null, false, TTabletType.TABLET_TYPE_DISK); // drop - dropTask = new DropReplicaTask(backendId1, tabletId1, schemaHash1); + dropTask = new DropReplicaTask(backendId1, tabletId1, replicaId1, schemaHash1); // push pushTask = @@ -127,7 +127,7 @@ public void setUp() throws AnalysisException { // clone cloneTask = - new CloneTask(backendId1, dbId, tableId, partitionId, indexId1, tabletId1, schemaHash1, + new CloneTask(backendId1, dbId, tableId, partitionId, indexId1, tabletId1, replicaId1, schemaHash1, Arrays.asList(new TBackend("host1", 8290, 8390)), TStorageMedium.HDD, -1, -1, 3600); // rollup @@ -264,7 +264,7 @@ public void failedAgentTaskTest() { Assert.assertEquals(1, AgentTaskQueue.getTaskNum(backendId1, TTaskType.DROP, true)); dropTask.failed(); - DropReplicaTask dropTask2 = new DropReplicaTask(backendId2, tabletId1, schemaHash1); + DropReplicaTask dropTask2 = new DropReplicaTask(backendId2, tabletId1, replicaId1, schemaHash1); AgentTaskQueue.addTask(dropTask2); dropTask2.failed(); Assert.assertEquals(1, AgentTaskQueue.getTaskNum(backendId1, TTaskType.DROP, true)); diff --git a/gensrc/proto/olap_file.proto b/gensrc/proto/olap_file.proto index 622db02fb33e06..06ccc47376de23 100644 --- a/gensrc/proto/olap_file.proto +++ b/gensrc/proto/olap_file.proto @@ -220,6 +220,7 @@ message OLAPHeaderMessage { optional int64 tablet_id = 20; // TabletMetaPB.tablet_id optional int32 schema_hash = 21; // TabletMetaPB.schema_hash? int32 vs int64 optional uint64 shard_id = 22; // TabletMetaPB.shard_id? int64 vs int32 + optional uint64 replica_id = 23; // TabletMetaPB.replica_id } enum AlterTabletState { @@ -312,6 +313,7 @@ message TabletMetaPB { optional RowsetTypePB preferred_rowset_type = 16; optional TabletTypePB tablet_type = 17; repeated RowsetMetaPB stale_rs_metas = 18; + optional int64 replica_id = 19; // OlapHeaderMessage.replica_id } message OLAPIndexHeaderMessage { diff --git a/gensrc/thrift/AgentService.thrift b/gensrc/thrift/AgentService.thrift index a18d6fd21efd5b..1494e1d35e8c92 100644 --- a/gensrc/thrift/AgentService.thrift +++ b/gensrc/thrift/AgentService.thrift @@ -85,11 +85,13 @@ struct TCreateTabletReq { 12: optional bool is_eco_mode 13: optional TStorageFormat storage_format 14: optional TTabletType tablet_type + 15: optional Types.TReplicaId replica_id } struct TDropTabletReq { 1: required Types.TTabletId tablet_id 2: optional Types.TSchemaHash schema_hash + 3: optional Types.TReplicaId replica_id } struct TAlterTabletReq { @@ -156,6 +158,7 @@ struct TCloneReq { 8: optional i64 src_path_hash; 9: optional i64 dest_path_hash; 10: optional i32 timeout_s; + 11: optional Types.TReplicaId replica_id } struct TStorageMediumMigrateReq { @@ -275,6 +278,7 @@ struct TTabletMetaInfo { 3: optional Types.TPartitionId partition_id 4: optional TTabletMetaType meta_type 5: optional bool is_in_memory + 6: optional Types.TReplicaId replica_id } struct TUpdateTabletMetaInfoReq { diff --git a/gensrc/thrift/MasterService.thrift b/gensrc/thrift/MasterService.thrift index ded383b1b64638..e49d34aacf874e 100644 --- a/gensrc/thrift/MasterService.thrift +++ b/gensrc/thrift/MasterService.thrift @@ -38,6 +38,7 @@ struct TTabletInfo { 12: optional bool used 13: optional Types.TPartitionId partition_id 14: optional bool is_in_memory + 15: optional Types.TReplicaId replica_id } struct TFinishTaskRequest { diff --git a/gensrc/thrift/Types.thrift b/gensrc/thrift/Types.thrift index efa657a5592e43..96d220c51603b8 100644 --- a/gensrc/thrift/Types.thrift +++ b/gensrc/thrift/Types.thrift @@ -24,6 +24,7 @@ typedef i32 TPlanNodeId typedef i32 TTupleId typedef i32 TSlotId typedef i64 TTableId +typedef i64 TReplicaId typedef i64 TTabletId typedef i64 TVersion typedef i64 TVersionHash From 25194fa3ad111b5df9b36be944111788fecb77da Mon Sep 17 00:00:00 2001 From: qijianliang01 Date: Mon, 28 Jun 2021 12:48:16 +0800 Subject: [PATCH 2/7] set replica_id when restore snapshot Change-Id: Ibfb98ccf52966f6995f55f7ccb0e470abe347e29 --- be/src/olap/snapshot_manager.cpp | 3 ++- be/src/olap/snapshot_manager.h | 2 +- be/src/olap/task/engine_clone_task.cpp | 2 +- be/src/olap/task/engine_storage_migration_task.cpp | 3 ++- be/src/runtime/snapshot_loader.cpp | 3 ++- 5 files changed, 8 insertions(+), 5 deletions(-) diff --git a/be/src/olap/snapshot_manager.cpp b/be/src/olap/snapshot_manager.cpp index f9ded4f42279b4..56863cdaa506d8 100644 --- a/be/src/olap/snapshot_manager.cpp +++ b/be/src/olap/snapshot_manager.cpp @@ -122,7 +122,7 @@ OLAPStatus SnapshotManager::release_snapshot(const string& snapshot_path) { // TODO support beta rowset // For now, alpha and beta rowset meta have same fields, so we can just use // AlphaRowsetMeta here. -OLAPStatus SnapshotManager::convert_rowset_ids(const string& clone_dir, int64_t tablet_id, +OLAPStatus SnapshotManager::convert_rowset_ids(const string& clone_dir, int64_t tablet_id, int64_t replica_id, const int32_t& schema_hash) { OLAPStatus res = OLAP_SUCCESS; // check clone dir existed @@ -154,6 +154,7 @@ OLAPStatus SnapshotManager::convert_rowset_ids(const string& clone_dir, int64_t // equal to tablet id in meta new_tablet_meta_pb.set_tablet_id(tablet_id); new_tablet_meta_pb.set_schema_hash(schema_hash); + new_tablet_meta_pb.set_replica_id(replica_id); TabletSchema tablet_schema; tablet_schema.init_from_pb(new_tablet_meta_pb.schema()); diff --git a/be/src/olap/snapshot_manager.h b/be/src/olap/snapshot_manager.h index d3685a4f03a19f..7f9e158183d292 100644 --- a/be/src/olap/snapshot_manager.h +++ b/be/src/olap/snapshot_manager.h @@ -61,7 +61,7 @@ class SnapshotManager { static SnapshotManager* instance(); - OLAPStatus convert_rowset_ids(const string& clone_dir, int64_t tablet_id, + OLAPStatus convert_rowset_ids(const string& clone_dir, int64_t tablet_id, int64_t replica_id, const int32_t& schema_hash); private: diff --git a/be/src/olap/task/engine_clone_task.cpp b/be/src/olap/task/engine_clone_task.cpp index ba43776b1185a5..27877517298dc7 100644 --- a/be/src/olap/task/engine_clone_task.cpp +++ b/be/src/olap/task/engine_clone_task.cpp @@ -346,7 +346,7 @@ AgentStatus EngineCloneTask::_make_and_download_snapshots(DataDir& data_dir, con if (status == DORIS_SUCCESS) { // change all rowset ids because they maybe its id same with local rowset auto olap_st = SnapshotManager::instance()->convert_rowset_ids( - local_path, _clone_req.tablet_id, _clone_req.schema_hash); + local_path, _clone_req.tablet_id, _clone_req.replica_id, _clone_req.schema_hash); if (olap_st != OLAP_SUCCESS) { LOG(WARNING) << "fail to convert rowset ids, path=" << local_path << ", tablet_id=" << _clone_req.tablet_id diff --git a/be/src/olap/task/engine_storage_migration_task.cpp b/be/src/olap/task/engine_storage_migration_task.cpp index 3c26832f36698e..2f9ad4b4a0a05a 100644 --- a/be/src/olap/task/engine_storage_migration_task.cpp +++ b/be/src/olap/task/engine_storage_migration_task.cpp @@ -34,6 +34,7 @@ OLAPStatus EngineStorageMigrationTask::execute() { OLAPStatus EngineStorageMigrationTask::_migrate() { int64_t tablet_id = _tablet->tablet_id(); + int64_t replica_id = _tablet->replica_id(); int32_t schema_hash = _tablet->schema_hash(); LOG(INFO) << "begin to process tablet migrate. " << "tablet_id=" << tablet_id << ", dest_store=" << _dest_store->path(); @@ -141,7 +142,7 @@ OLAPStatus EngineStorageMigrationTask::_migrate() { } // it will change rowset id and its create time // rowset create time is useful when load tablet from meta to check which tablet is the tablet to load - res = SnapshotManager::instance()->convert_rowset_ids(full_path, tablet_id, schema_hash); + res = SnapshotManager::instance()->convert_rowset_ids(full_path, tablet_id, replica_id, schema_hash); if (res != OLAP_SUCCESS) { LOG(WARNING) << "failed to convert rowset id when do storage migration" << " path = " << full_path; diff --git a/be/src/runtime/snapshot_loader.cpp b/be/src/runtime/snapshot_loader.cpp index e140e857ed58ad..5ce644b755bc10 100644 --- a/be/src/runtime/snapshot_loader.cpp +++ b/be/src/runtime/snapshot_loader.cpp @@ -362,6 +362,7 @@ Status SnapshotLoader::move(const std::string& snapshot_path, TabletSharedPtr ta bool overwrite) { std::string tablet_path = tablet->tablet_path(); std::string store_path = tablet->data_dir()->path(); + int64_t replica_id = tablet->replica_id(); LOG(INFO) << "begin to move snapshot files. from: " << snapshot_path << ", to: " << tablet_path << ", store: " << store_path << ", job: " << _job_id << ", task id: " << _task_id; @@ -412,7 +413,7 @@ Status SnapshotLoader::move(const std::string& snapshot_path, TabletSharedPtr ta // rename the rowset ids and tabletid info in rowset meta OLAPStatus convert_status = - SnapshotManager::instance()->convert_rowset_ids(snapshot_path, tablet_id, schema_hash); + SnapshotManager::instance()->convert_rowset_ids(snapshot_path, tablet_id, replica_id, schema_hash); if (convert_status != OLAP_SUCCESS) { std::stringstream ss; ss << "failed to convert rowsetids in snapshot: " << snapshot_path From 3edec7f968cda6000ac07b96bf3552baebd74be2 Mon Sep 17 00:00:00 2001 From: Jianliang Qi Date: Wed, 27 Oct 2021 20:44:04 +0800 Subject: [PATCH 3/7] fix reviews --- be/src/agent/task_worker_pool.cpp | 2 +- be/src/olap/tablet_manager.cpp | 2 +- be/src/olap/task/engine_clone_task.cpp | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/be/src/agent/task_worker_pool.cpp b/be/src/agent/task_worker_pool.cpp index 994222197c0e2e..1a6531d0244556 100644 --- a/be/src/agent/task_worker_pool.cpp +++ b/be/src/agent/task_worker_pool.cpp @@ -430,7 +430,7 @@ void TaskWorkerPool::_drop_tablet_worker_thread_callback() { if (dropped_tablet != nullptr) { if (dropped_tablet->clone_mode()) { LOG(WARNING) << "drop table cancelled as tablet is in clone mode! signature: " << agent_task_req.signature; - error_msgs.push_back("drop table cancelled!"); + error_msgs.push_back("drop table cancelled as tablet is in clone mode! signature: " + agent_task_req.signature); status_code = TStatusCode::CANCELLED; } diff --git a/be/src/olap/tablet_manager.cpp b/be/src/olap/tablet_manager.cpp index a0952e3977e982..ca64f02b49d7c2 100644 --- a/be/src/olap/tablet_manager.cpp +++ b/be/src/olap/tablet_manager.cpp @@ -486,7 +486,7 @@ OLAPStatus TabletManager::_drop_tablet_unlocked(TTabletId tablet_id, TReplicaId return OLAP_SUCCESS; } - return _drop_tablet_directly_unlocked(tablet_id, schema_hash, keep_files); + return _drop_tablet_directly_unlocked(tablet_id, replica_id, schema_hash, keep_files); } OLAPStatus TabletManager::drop_tablets_on_error_root_path( diff --git a/be/src/olap/task/engine_clone_task.cpp b/be/src/olap/task/engine_clone_task.cpp index 27877517298dc7..c63123efc369b4 100644 --- a/be/src/olap/task/engine_clone_task.cpp +++ b/be/src/olap/task/engine_clone_task.cpp @@ -178,7 +178,7 @@ OLAPStatus EngineCloneTask::_do_clone() { } else { OLAPStatus load_header_status; if (old_version_tablet != nullptr) { - // drop old version tablet first, then and new tablet + // drop old version tablet first, then add new tablet load_header_status = StorageEngine::instance()->tablet_manager()->load_tablet_from_dir( store, _clone_req.tablet_id, _clone_req.schema_hash, schema_hash_path_stream.str(), true); From b7915fc4fb06919262a9c65e07566da419201392 Mon Sep 17 00:00:00 2001 From: Jianliang Qi Date: Fri, 5 Nov 2021 16:45:54 +0800 Subject: [PATCH 4/7] set replica_id default value to 0; add agentTask req replica_id check --- be/src/agent/task_worker_pool.cpp | 33 ++++++++++++++----- be/src/exec/olap_scan_node.cpp | 2 +- be/src/exec/olap_scanner.cpp | 2 +- be/src/http/action/compaction_action.cpp | 6 ++-- be/src/http/action/meta_action.cpp | 2 +- be/src/http/action/restore_tablet_action.cpp | 2 +- .../http/action/tablet_migration_action.cpp | 2 +- be/src/olap/data_dir.cpp | 6 ++-- be/src/olap/delta_writer.cpp | 2 +- be/src/olap/schema_change.cpp | 4 +-- be/src/olap/snapshot_manager.cpp | 2 +- be/src/olap/storage_engine.cpp | 4 +-- be/src/olap/tablet_manager.cpp | 24 +++++++------- be/src/olap/tablet_manager.h | 4 +-- be/src/olap/task/engine_batch_load_task.cpp | 6 ++-- be/src/olap/task/engine_checksum_task.cpp | 2 +- be/src/olap/task/engine_clone_task.cpp | 4 +-- .../olap/task/engine_publish_version_task.cpp | 2 +- .../task/engine_storage_migration_task.cpp | 2 +- be/src/runtime/snapshot_loader.cpp | 2 +- be/test/olap/delete_handler_test.cpp | 8 ++--- be/test/olap/delta_writer_test.cpp | 4 +-- be/test/olap/tablet_mgr_test.cpp | 16 ++++----- 23 files changed, 78 insertions(+), 63 deletions(-) diff --git a/be/src/agent/task_worker_pool.cpp b/be/src/agent/task_worker_pool.cpp index 1a6531d0244556..b5321807ce7009 100644 --- a/be/src/agent/task_worker_pool.cpp +++ b/be/src/agent/task_worker_pool.cpp @@ -371,9 +371,13 @@ void TaskWorkerPool::_create_tablet_worker_thread_callback() { status_code = TStatusCode::RUNTIME_ERROR; } else { ++_s_report_version; + TReplicaId replica_id = 0; + if (create_tablet_req.__isset.replica_id) { + replica_id = create_tablet_req.replica_id; + } // get path hash of the created tablet TabletSharedPtr tablet = StorageEngine::instance()->tablet_manager()->get_tablet( - create_tablet_req.tablet_id, create_tablet_req.replica_id, create_tablet_req.tablet_schema.schema_hash); + create_tablet_req.tablet_id, create_tablet_req.tablet_schema.schema_hash, replica_id); DCHECK(tablet != nullptr); TTabletInfo tablet_info; tablet_info.tablet_id = tablet->table_id(); @@ -425,8 +429,12 @@ void TaskWorkerPool::_drop_tablet_worker_thread_callback() { std::vector error_msgs; TStatus task_status; string err; + TReplicaId replica_id = 0; + if (drop_tablet_req.__isset.replica_id) { + replica_id = drop_tablet_req.replica_id; + } TabletSharedPtr dropped_tablet = StorageEngine::instance()->tablet_manager()->get_tablet( - drop_tablet_req.tablet_id, drop_tablet_req.replica_id, drop_tablet_req.schema_hash, false, &err); + drop_tablet_req.tablet_id, drop_tablet_req.schema_hash, replica_id, false, &err); if (dropped_tablet != nullptr) { if (dropped_tablet->clone_mode()) { LOG(WARNING) << "drop table cancelled as tablet is in clone mode! signature: " << agent_task_req.signature; @@ -435,7 +443,7 @@ void TaskWorkerPool::_drop_tablet_worker_thread_callback() { } OLAPStatus drop_status = StorageEngine::instance()->tablet_manager()->drop_tablet( - drop_tablet_req.tablet_id, drop_tablet_req.replica_id, drop_tablet_req.schema_hash); + drop_tablet_req.tablet_id, replica_id, drop_tablet_req.schema_hash); if (drop_status != OLAP_SUCCESS) { LOG(WARNING) << "drop table failed! signature: " << agent_task_req.signature; error_msgs.push_back("drop table failed!"); @@ -842,8 +850,12 @@ void TaskWorkerPool::_update_tablet_meta_worker_thread_callback() { TStatus task_status; for (auto tablet_meta_info : update_tablet_meta_req.tabletMetaInfos) { + TReplicaId replica_id = 0; + if (tablet_meta_info.__isset.replica_id) { + replica_id = tablet_meta_info.replica_id; + } TabletSharedPtr tablet = StorageEngine::instance()->tablet_manager()->get_tablet( - tablet_meta_info.tablet_id, tablet_meta_info.replica_id, tablet_meta_info.schema_hash); + tablet_meta_info.tablet_id, tablet_meta_info.schema_hash, replica_id); if (tablet == nullptr) { LOG(WARNING) << "could not find tablet when update partition id" << " tablet_id=" << tablet_meta_info.tablet_id @@ -907,10 +919,13 @@ void TaskWorkerPool::_clone_worker_thread_callback() { DorisMetrics::instance()->clone_requests_total->increment(1); LOG(INFO) << "get clone task. signature:" << agent_task_req.signature; + TReplicaId replica_id = 0; + if (clone_req.__isset.replica_id) { + replica_id = clone_req.replica_id; + } // check tablet with the same tabletId existance, if exist, set tablet in clone mode - string err; TabletSharedPtr exist_tablet = StorageEngine::instance()->tablet_manager()->get_tablet( - clone_req.tablet_id, 0 /*replica_id*/, clone_req.schema_hash, &err); + clone_req.tablet_id, clone_req.schema_hash, replica_id); if (exist_tablet != nullptr) { exist_tablet->set_clone_mode(true); } @@ -945,7 +960,7 @@ void TaskWorkerPool::_clone_worker_thread_callback() { // clone done, set clone mode false // Retrieve once again to prevent tablet from being dropped exist_tablet = StorageEngine::instance()->tablet_manager()->get_tablet( - clone_req.tablet_id, 0 /*replica_id*/, clone_req.schema_hash, &err); + clone_req.tablet_id, clone_req.schema_hash, replica_id); if (exist_tablet != nullptr) { exist_tablet->set_clone_mode(false); } @@ -1014,7 +1029,7 @@ OLAPStatus TaskWorkerPool::_check_migrate_requset(const TStorageMediumMigrateReq int64_t tablet_id = req.tablet_id; int32_t schema_hash = req.schema_hash; // tablet migration no need to know replica_id - tablet = StorageEngine::instance()->tablet_manager()->get_tablet(tablet_id, 0 /*replica_id*/, schema_hash); + tablet = StorageEngine::instance()->tablet_manager()->get_tablet(tablet_id, schema_hash); if (tablet == nullptr) { LOG(WARNING) << "can't find tablet. tablet_id= " << tablet_id << " schema_hash=" << schema_hash; @@ -1595,7 +1610,7 @@ AgentStatus TaskWorkerPool::_move_dir(const TTabletId tablet_id, const TSchemaHa const std::string& src, int64_t job_id, bool overwrite, std::vector* error_msgs) { TabletSharedPtr tablet = - StorageEngine::instance()->tablet_manager()->get_tablet(tablet_id, 0 /*replica_id*/, schema_hash); + StorageEngine::instance()->tablet_manager()->get_tablet(tablet_id, schema_hash); if (tablet == nullptr) { LOG(INFO) << "failed to get tablet. tablet_id:" << tablet_id << ", schema hash:" << schema_hash; diff --git a/be/src/exec/olap_scan_node.cpp b/be/src/exec/olap_scan_node.cpp index abb077210b8bbf..ef09529b84a38d 100644 --- a/be/src/exec/olap_scan_node.cpp +++ b/be/src/exec/olap_scan_node.cpp @@ -665,7 +665,7 @@ Status OlapScanNode::get_hints(const TPaloScanRange& scan_range, int block_row_c int32_t schema_hash = strtoul(scan_range.schema_hash.c_str(), NULL, 10); std::string err; TabletSharedPtr table = StorageEngine::instance()->tablet_manager()->get_tablet( - tablet_id, 0 /*replica_id*/, schema_hash, true, &err); + tablet_id, schema_hash, 0 /*replica_id*/, true, &err); if (table == nullptr) { std::stringstream ss; ss << "failed to get tablet: " << tablet_id << " with schema hash: " << schema_hash diff --git a/be/src/exec/olap_scanner.cpp b/be/src/exec/olap_scanner.cpp index 1145a4c757868f..f3b3cb409ecb7a 100644 --- a/be/src/exec/olap_scanner.cpp +++ b/be/src/exec/olap_scanner.cpp @@ -75,7 +75,7 @@ Status OlapScanner::prepare( _version = strtoul(scan_range.version.c_str(), nullptr, 10); { std::string err; - _tablet = StorageEngine::instance()->tablet_manager()->get_tablet(tablet_id, 0 /*replica_id*/, schema_hash, + _tablet = StorageEngine::instance()->tablet_manager()->get_tablet(tablet_id, schema_hash, 0 /*replica_id*/, true, &err); if (_tablet.get() == nullptr) { std::stringstream ss; diff --git a/be/src/http/action/compaction_action.cpp b/be/src/http/action/compaction_action.cpp index da117a9f9071e4..16f8e91db35f58 100644 --- a/be/src/http/action/compaction_action.cpp +++ b/be/src/http/action/compaction_action.cpp @@ -69,7 +69,7 @@ Status CompactionAction::_handle_show_compaction(HttpRequest* req, std::string* "check param failed"); TabletSharedPtr tablet = - StorageEngine::instance()->tablet_manager()->get_tablet(tablet_id, 0 /*replica_id*/, schema_hash); + StorageEngine::instance()->tablet_manager()->get_tablet(tablet_id, schema_hash); if (tablet == nullptr) { return Status::NotFound( strings::Substitute("Tablet not found. tablet_id=$0, schema_hash=$1", @@ -98,7 +98,7 @@ Status CompactionAction::_handle_run_compaction(HttpRequest* req, std::string* j // 2. fetch the tablet by tablet_id and schema_hash TabletSharedPtr tablet = - StorageEngine::instance()->tablet_manager()->get_tablet(tablet_id, 0 /*replica_id*/, schema_hash); + StorageEngine::instance()->tablet_manager()->get_tablet(tablet_id, schema_hash); if (tablet == nullptr) { return Status::NotFound( strings::Substitute("Tablet not found. tablet_id=$0, schema_hash=$1", @@ -160,7 +160,7 @@ Status CompactionAction::_handle_run_status_compaction(HttpRequest* req, std::st } else { // fetch the tablet by tablet_id and schema_hash TabletSharedPtr tablet = - StorageEngine::instance()->tablet_manager()->get_tablet(tablet_id, 0 /*replica_id*/, schema_hash); + StorageEngine::instance()->tablet_manager()->get_tablet(tablet_id, schema_hash); if (tablet == nullptr) { LOG(WARNING) << "invalid argument.tablet_id:" << tablet_id diff --git a/be/src/http/action/meta_action.cpp b/be/src/http/action/meta_action.cpp index 920c2637854b44..945c7477398ff2 100644 --- a/be/src/http/action/meta_action.cpp +++ b/be/src/http/action/meta_action.cpp @@ -60,7 +60,7 @@ Status MetaAction::_handle_header(HttpRequest* req, std::string* json_meta) { } TabletSharedPtr tablet = - StorageEngine::instance()->tablet_manager()->get_tablet(tablet_id, 0 /*replica_id*/, schema_hash); + StorageEngine::instance()->tablet_manager()->get_tablet(tablet_id, schema_hash); if (tablet == nullptr) { LOG(WARNING) << "no tablet for tablet_id:" << tablet_id << " schema hash:" << schema_hash; return Status::InternalError("no tablet exist"); diff --git a/be/src/http/action/restore_tablet_action.cpp b/be/src/http/action/restore_tablet_action.cpp index 71849fb7ccd03a..d207ca54c85d36 100644 --- a/be/src/http/action/restore_tablet_action.cpp +++ b/be/src/http/action/restore_tablet_action.cpp @@ -83,7 +83,7 @@ Status RestoreTabletAction::_handle(HttpRequest* req) { LOG(INFO) << "get restore tablet action request: " << tablet_id << "-" << schema_hash; TabletSharedPtr tablet = - StorageEngine::instance()->tablet_manager()->get_tablet(tablet_id, 0 /*replica_id*/, schema_hash); + StorageEngine::instance()->tablet_manager()->get_tablet(tablet_id, schema_hash); if (tablet != nullptr) { LOG(WARNING) << "find tablet. tablet_id=" << tablet_id << " schema_hash=" << schema_hash; return Status::InternalError("tablet already exists, can not restore."); diff --git a/be/src/http/action/tablet_migration_action.cpp b/be/src/http/action/tablet_migration_action.cpp index 31556514a85f1c..ed7ea2197607a7 100644 --- a/be/src/http/action/tablet_migration_action.cpp +++ b/be/src/http/action/tablet_migration_action.cpp @@ -186,7 +186,7 @@ Status TabletMigrationAction::_check_param(HttpRequest* req, int64_t& tablet_id, Status TabletMigrationAction::_check_migrate_request(int64_t tablet_id, int32_t schema_hash, string dest_disk, TabletSharedPtr& tablet, DataDir** dest_store) { - tablet = StorageEngine::instance()->tablet_manager()->get_tablet(tablet_id, 0 /*replica_id*/, schema_hash); + tablet = StorageEngine::instance()->tablet_manager()->get_tablet(tablet_id, schema_hash); if (tablet == nullptr) { LOG(WARNING) << "no tablet for tablet_id:" << tablet_id << " schema hash:" << schema_hash; return Status::NotFound("Tablet not found"); diff --git a/be/src/olap/data_dir.cpp b/be/src/olap/data_dir.cpp index b0457eb7c13523..e48392ae16c7c4 100644 --- a/be/src/olap/data_dir.cpp +++ b/be/src/olap/data_dir.cpp @@ -581,7 +581,7 @@ OLAPStatus DataDir::load() { // 2. add visible rowset to tablet // ignore any errors when load tablet or rowset, because fe will repair them after report for (auto rowset_meta : dir_rowset_metas) { - TabletSharedPtr tablet = _tablet_manager->get_tablet(rowset_meta->tablet_id(), 0 /*replica_id*/, + TabletSharedPtr tablet = _tablet_manager->get_tablet(rowset_meta->tablet_id(), rowset_meta->tablet_schema_hash()); // tablet maybe dropped, but not drop related rowset meta if (tablet == nullptr) { @@ -679,7 +679,7 @@ void DataDir::perform_path_gc_by_tablet() { << ", path=" << path; continue; } - TabletSharedPtr tablet = _tablet_manager->get_tablet(tablet_id, 0 /*replica_id*/, schema_hash); + TabletSharedPtr tablet = _tablet_manager->get_tablet(tablet_id, schema_hash); if (tablet != nullptr) { // could find the tablet, then skip check it continue; @@ -729,7 +729,7 @@ void DataDir::perform_path_gc_by_rowsetid() { RowsetId rowset_id; bool is_rowset_file = TabletManager::get_rowset_id_from_path(path, &rowset_id); if (is_rowset_file) { - TabletSharedPtr tablet = _tablet_manager->get_tablet(tablet_id, 0 /*replica_id*/, schema_hash); + TabletSharedPtr tablet = _tablet_manager->get_tablet(tablet_id, schema_hash); if (tablet != nullptr) { if (!tablet->check_rowset_id(rowset_id) && !StorageEngine::instance()->check_rowset_id_in_unused_rowsets(rowset_id)) { diff --git a/be/src/olap/delta_writer.cpp b/be/src/olap/delta_writer.cpp index 1b539dd03fb1a4..3868e8806e56b8 100644 --- a/be/src/olap/delta_writer.cpp +++ b/be/src/olap/delta_writer.cpp @@ -94,7 +94,7 @@ void DeltaWriter::_garbage_collection() { OLAPStatus DeltaWriter::init() { TabletManager* tablet_mgr = _storage_engine->tablet_manager(); - _tablet = tablet_mgr->get_tablet(_req.tablet_id, 0 /*replica_id*/, _req.schema_hash); + _tablet = tablet_mgr->get_tablet(_req.tablet_id, _req.schema_hash); if (_tablet == nullptr) { LOG(WARNING) << "fail to find tablet. tablet_id=" << _req.tablet_id << ", schema_hash=" << _req.schema_hash; diff --git a/be/src/olap/schema_change.cpp b/be/src/olap/schema_change.cpp index 7ac20731da1f09..49a7f961f49193 100644 --- a/be/src/olap/schema_change.cpp +++ b/be/src/olap/schema_change.cpp @@ -1426,7 +1426,7 @@ OLAPStatus SchemaChangeHandler::process_alter_tablet_v2(const TAlterTabletReqV2& OLAPStatus SchemaChangeHandler::_do_process_alter_tablet_v2(const TAlterTabletReqV2& request) { OLAPStatus res = OLAP_SUCCESS; TabletSharedPtr base_tablet = StorageEngine::instance()->tablet_manager()->get_tablet( - request.base_tablet_id, 0 /*replica_id*/, request.base_schema_hash); + request.base_tablet_id, request.base_schema_hash); if (base_tablet == nullptr) { LOG(WARNING) << "fail to find base tablet. base_tablet=" << request.base_tablet_id << ", base_schema_hash=" << request.base_schema_hash; @@ -1435,7 +1435,7 @@ OLAPStatus SchemaChangeHandler::_do_process_alter_tablet_v2(const TAlterTabletRe // new tablet has to exist TabletSharedPtr new_tablet = StorageEngine::instance()->tablet_manager()->get_tablet( - request.new_tablet_id, 0 /*replica_id*/, request.new_schema_hash); + request.new_tablet_id, request.new_schema_hash); if (new_tablet == nullptr) { LOG(WARNING) << "fail to find new tablet." << " new_tablet=" << request.new_tablet_id diff --git a/be/src/olap/snapshot_manager.cpp b/be/src/olap/snapshot_manager.cpp index 56863cdaa506d8..bccd8035a63f1d 100644 --- a/be/src/olap/snapshot_manager.cpp +++ b/be/src/olap/snapshot_manager.cpp @@ -77,7 +77,7 @@ OLAPStatus SnapshotManager::make_snapshot( } TabletSharedPtr ref_tablet = StorageEngine::instance()->tablet_manager()->get_tablet( - request.tablet_id, 0 /*replica_id*/, request.schema_hash); + request.tablet_id, request.schema_hash); if (ref_tablet == nullptr) { LOG(WARNING) << "failed to get tablet. tablet=" << request.tablet_id << " schema_hash=" << request.schema_hash; diff --git a/be/src/olap/storage_engine.cpp b/be/src/olap/storage_engine.cpp index 1acd27de2ec289..8e6aab6fffed14 100644 --- a/be/src/olap/storage_engine.cpp +++ b/be/src/olap/storage_engine.cpp @@ -998,7 +998,7 @@ OLAPStatus StorageEngine::execute_task(EngineTask* task) { std::vector related_tablets; for (TabletInfo& tablet_info : tablet_infos) { TabletSharedPtr tablet = - _tablet_manager->get_tablet(tablet_info.tablet_id, 0 /*replica_id*/, tablet_info.schema_hash); + _tablet_manager->get_tablet(tablet_info.tablet_id, tablet_info.schema_hash); if (tablet != nullptr) { related_tablets.push_back(tablet); tablet->obtain_header_wrlock(); @@ -1034,7 +1034,7 @@ OLAPStatus StorageEngine::execute_task(EngineTask* task) { std::vector related_tablets; for (TabletInfo& tablet_info : tablet_infos) { TabletSharedPtr tablet = - _tablet_manager->get_tablet(tablet_info.tablet_id, 0 /*replica_id*/, tablet_info.schema_hash); + _tablet_manager->get_tablet(tablet_info.tablet_id, tablet_info.schema_hash); if (tablet != nullptr) { related_tablets.push_back(tablet); tablet->obtain_header_wrlock(); diff --git a/be/src/olap/tablet_manager.cpp b/be/src/olap/tablet_manager.cpp index ca64f02b49d7c2..9046c60ff9152c 100644 --- a/be/src/olap/tablet_manager.cpp +++ b/be/src/olap/tablet_manager.cpp @@ -244,7 +244,7 @@ OLAPStatus TabletManager::create_tablet(const TCreateTabletReq& request, // tablet_id exist but with different schema_hash, return an error(report task will // eventually trigger its deletion). if (_check_tablet_id_exist_unlocked(tablet_id)) { - TabletSharedPtr tablet = _get_tablet_unlocked(tablet_id, replica_id, schema_hash); + TabletSharedPtr tablet = _get_tablet_unlocked(tablet_id, schema_hash, replica_id); if (tablet != nullptr) { LOG(INFO) << "success to create tablet. tablet already exist. tablet_id=" << tablet_id; return OLAP_SUCCESS; @@ -261,7 +261,7 @@ OLAPStatus TabletManager::create_tablet(const TCreateTabletReq& request, // If the CreateTabletReq has base_tablet_id then it is a alter-tablet request if (request.__isset.base_tablet_id && request.base_tablet_id > 0) { is_schema_change = true; - base_tablet = _get_tablet_unlocked(request.base_tablet_id, 0 /*replica_id*/, request.base_schema_hash); + base_tablet = _get_tablet_unlocked(request.base_tablet_id, request.base_schema_hash); if (base_tablet == nullptr) { LOG(WARNING) << "fail to create tablet(change schema), base tablet does not exist. " << "new_tablet_id=" << tablet_id << ", new_schema_hash=" << schema_hash @@ -375,7 +375,7 @@ TabletSharedPtr TabletManager::_internal_create_tablet_unlocked( // TODO(lingbin): The following logic seems useless, can be removed? // Because if _add_tablet_unlocked() return OK, we must can get it from map. - TabletSharedPtr tablet_ptr = _get_tablet_unlocked(new_tablet_id, new_replica_id, new_schema_hash); + TabletSharedPtr tablet_ptr = _get_tablet_unlocked(new_tablet_id, new_schema_hash, new_replica_id); if (tablet_ptr == nullptr) { res = OLAP_ERR_TABLE_NOT_FOUND; LOG(WARNING) << "fail to get tablet. res=" << res; @@ -479,7 +479,7 @@ OLAPStatus TabletManager::_drop_tablet_unlocked(TTabletId tablet_id, TReplicaId DorisMetrics::instance()->drop_tablet_requests_total->increment(1); // Fetch tablet which need to be dropped - TabletSharedPtr to_drop_tablet = _get_tablet_unlocked(tablet_id, replica_id, schema_hash); + TabletSharedPtr to_drop_tablet = _get_tablet_unlocked(tablet_id, schema_hash, replica_id); if (to_drop_tablet == nullptr) { LOG(WARNING) << "fail to drop tablet because it does not exist. " << "tablet_id=" << tablet_id << ", schema_hash=" << schema_hash; @@ -511,7 +511,7 @@ OLAPStatus TabletManager::drop_tablets_on_error_root_path( VLOG_NOTICE << "drop_tablet begin. tablet_id=" << tablet_id << ", schema_hash=" << schema_hash; // clear tablets in unused data dirs, there is no need to compare tablet_replica_id - TabletSharedPtr dropped_tablet = _get_tablet_unlocked(tablet_id, 0 /*replica_id*/, schema_hash); + TabletSharedPtr dropped_tablet = _get_tablet_unlocked(tablet_id, schema_hash); if (dropped_tablet == nullptr) { LOG(WARNING) << "dropping tablet not exist. " << " tablet=" << tablet_id << " schema_hash=" << schema_hash; @@ -536,7 +536,7 @@ OLAPStatus TabletManager::drop_tablets_on_error_root_path( return res; } -TabletSharedPtr TabletManager::get_tablet(TTabletId tablet_id, TReplicaId replica_id, SchemaHash schema_hash, +TabletSharedPtr TabletManager::get_tablet(TTabletId tablet_id, SchemaHash schema_hash, TReplicaId replica_id, bool include_deleted, string* err) { ReadLock rlock(_get_tablets_shard_lock(tablet_id)); return _get_tablet_unlocked(tablet_id, replica_id, schema_hash, include_deleted, err); @@ -545,7 +545,7 @@ TabletSharedPtr TabletManager::get_tablet(TTabletId tablet_id, TReplicaId replic TabletSharedPtr TabletManager::_get_tablet_unlocked(TTabletId tablet_id, TReplicaId replica_id, SchemaHash schema_hash, bool include_deleted, string* err) { TabletSharedPtr tablet; - tablet = _get_tablet_unlocked(tablet_id, replica_id, schema_hash); + tablet = _get_tablet_unlocked(tablet_id, schema_hash, replica_id); if (tablet == nullptr && include_deleted) { ReadLock rlock(&_shutdown_tablets_lock); for (auto& deleted_tablet : _shutdown_tablets) { @@ -579,7 +579,7 @@ TabletSharedPtr TabletManager::_get_tablet_unlocked(TTabletId tablet_id, TReplic TabletSharedPtr TabletManager::get_tablet(TTabletId tablet_id, SchemaHash schema_hash, TabletUid tablet_uid, bool include_deleted, string* err) { ReadLock rlock(_get_tablets_shard_lock(tablet_id)); - TabletSharedPtr tablet = _get_tablet_unlocked(tablet_id, 0 /*replica_id*/, schema_hash, include_deleted, err); + TabletSharedPtr tablet = _get_tablet_unlocked(tablet_id, schema_hash, 0 /*replica_id*/, include_deleted, err); if (tablet != nullptr && tablet->tablet_uid() == tablet_uid) { return tablet; } @@ -868,7 +868,7 @@ OLAPStatus TabletManager::report_tablet_info(TTabletInfo* tablet_info) { OLAPStatus res = OLAP_SUCCESS; - TabletSharedPtr tablet = get_tablet(tablet_info->tablet_id, 0 /*replica_id*/, tablet_info->schema_hash); + TabletSharedPtr tablet = get_tablet(tablet_info->tablet_id, tablet_info->schema_hash); if (tablet == nullptr) { LOG(WARNING) << "can't find tablet. " << " tablet=" << tablet_info->tablet_id @@ -1335,7 +1335,7 @@ OLAPStatus TabletManager::_create_tablet_meta_unlocked(const TCreateTabletReq& r OLAPStatus TabletManager::_drop_tablet_directly_unlocked(TTabletId tablet_id, TReplicaId replica_id, SchemaHash schema_hash, bool keep_files) { - TabletSharedPtr dropped_tablet = _get_tablet_unlocked(tablet_id, replica_id, schema_hash); + TabletSharedPtr dropped_tablet = _get_tablet_unlocked(tablet_id, schema_hash, replica_id); if (dropped_tablet == nullptr) { LOG(WARNING) << "fail to drop tablet because it does not exist. " << " tablet_id=" << tablet_id << ", replica_id=" << replica_id << ", schema_hash=" << schema_hash; @@ -1378,7 +1378,7 @@ OLAPStatus TabletManager::_drop_tablet_directly_unlocked(TTabletId tablet_id, TR return OLAP_SUCCESS; } -TabletSharedPtr TabletManager::_get_tablet_unlocked(TTabletId tablet_id, TReplicaId replica_id, SchemaHash schema_hash) { +TabletSharedPtr TabletManager::_get_tablet_unlocked(TTabletId tablet_id, SchemaHash schema_hash, TReplicaId replica_id) { VLOG_NOTICE << "begin to get tablet. tablet_id=" << tablet_id << ", replica_id=" << replica_id << ", schema_hash=" << schema_hash; tablet_map_t& tablet_map = _get_tablet_map(tablet_id); @@ -1470,7 +1470,7 @@ void TabletManager::get_tablets_distribution_on_different_disks( for (; tablet_info_iter != (partition_iter->second).end(); ++tablet_info_iter) { // get_tablet() will hold 'tablet_shard_lock' TabletSharedPtr tablet = - get_tablet(tablet_info_iter->tablet_id, 0 /*replica_id*/, tablet_info_iter->schema_hash); + get_tablet(tablet_info_iter->tablet_id, tablet_info_iter->schema_hash); if (tablet == nullptr) { continue; } diff --git a/be/src/olap/tablet_manager.h b/be/src/olap/tablet_manager.h index 91265541e03eaf..85ae8dedce957a 100644 --- a/be/src/olap/tablet_manager.h +++ b/be/src/olap/tablet_manager.h @@ -75,7 +75,7 @@ class TabletManager { const std::unordered_set& tablet_submitted_compaction, uint32_t* score, std::shared_ptr cumulative_compaction_policy); - TabletSharedPtr get_tablet(TTabletId tablet_id, TReplicaId replica_id, SchemaHash schema_hash, + TabletSharedPtr get_tablet(TTabletId tablet_id, SchemaHash schema_hash, TReplicaId replica_id = 0, bool include_deleted = false, std::string* err = nullptr); TabletSharedPtr get_tablet(TTabletId tablet_id, SchemaHash schema_hash, TabletUid tablet_uid, @@ -165,7 +165,7 @@ class TabletManager { OLAPStatus _drop_tablet_unlocked(TTabletId tablet_id, TReplicaId replica_id, SchemaHash schema_hash, bool keep_files); - TabletSharedPtr _get_tablet_unlocked(TTabletId tablet_id, TReplicaId replica_id, SchemaHash schema_hash); + TabletSharedPtr _get_tablet_unlocked(TTabletId tablet_id, SchemaHash schema_hash, TReplicaId replica_id = 0); TabletSharedPtr _get_tablet_unlocked(TTabletId tablet_id, TReplicaId replica_id, SchemaHash schema_hash, bool include_deleted, std::string* err); diff --git a/be/src/olap/task/engine_batch_load_task.cpp b/be/src/olap/task/engine_batch_load_task.cpp index 3335d82c519a65..8d1b6d2a1737ae 100644 --- a/be/src/olap/task/engine_batch_load_task.cpp +++ b/be/src/olap/task/engine_batch_load_task.cpp @@ -106,7 +106,7 @@ AgentStatus EngineBatchLoadTask::_init() { // Check replica exist TabletSharedPtr tablet; - tablet = StorageEngine::instance()->tablet_manager()->get_tablet(_push_req.tablet_id, 0 /*replica_id*/, + tablet = StorageEngine::instance()->tablet_manager()->get_tablet(_push_req.tablet_id, _push_req.schema_hash); if (tablet == nullptr) { LOG(WARNING) << "get tables failed. " @@ -293,7 +293,7 @@ OLAPStatus EngineBatchLoadTask::_push(const TPushReq& request, } TabletSharedPtr tablet = StorageEngine::instance()->tablet_manager()->get_tablet( - request.tablet_id, 0 /*replica_id*/, request.schema_hash); + request.tablet_id, request.schema_hash); if (tablet == nullptr) { LOG(WARNING) << "false to find tablet. tablet=" << request.tablet_id << ", schema_hash=" << request.schema_hash; @@ -355,7 +355,7 @@ OLAPStatus EngineBatchLoadTask::_delete_data(const TPushReq& request, // 1. Get all tablets with same tablet_id TabletSharedPtr tablet = StorageEngine::instance()->tablet_manager()->get_tablet( - request.tablet_id, 0 /*replica_id*/, request.schema_hash); + request.tablet_id, request.schema_hash); if (tablet == nullptr) { LOG(WARNING) << "can't find tablet. tablet=" << request.tablet_id << ", schema_hash=" << request.schema_hash; diff --git a/be/src/olap/task/engine_checksum_task.cpp b/be/src/olap/task/engine_checksum_task.cpp index 31b69cf1f002d1..b27550ec852b6e 100644 --- a/be/src/olap/task/engine_checksum_task.cpp +++ b/be/src/olap/task/engine_checksum_task.cpp @@ -48,7 +48,7 @@ OLAPStatus EngineChecksumTask::_compute_checksum() { } TabletSharedPtr tablet = - StorageEngine::instance()->tablet_manager()->get_tablet(_tablet_id, 0 /*replica_id*/, _schema_hash); + StorageEngine::instance()->tablet_manager()->get_tablet(_tablet_id, _schema_hash); if (NULL == tablet.get()) { OLAP_LOG_WARNING("can't find tablet. [tablet_id=%ld schema_hash=%d]", _tablet_id, _schema_hash); diff --git a/be/src/olap/task/engine_clone_task.cpp b/be/src/olap/task/engine_clone_task.cpp index c63123efc369b4..21270f705fd3c5 100644 --- a/be/src/olap/task/engine_clone_task.cpp +++ b/be/src/olap/task/engine_clone_task.cpp @@ -81,10 +81,10 @@ OLAPStatus EngineCloneTask::_do_clone() { // Check local tablet exist or not TabletSharedPtr tablet = StorageEngine::instance()->tablet_manager()->get_tablet( - _clone_req.tablet_id, _clone_req.replica_id, _clone_req.schema_hash); + _clone_req.tablet_id, _clone_req.schema_hash, _clone_req.replica_id); // for tablet with same tablet id, but diff replica id TabletSharedPtr old_version_tablet = StorageEngine::instance()->tablet_manager()->get_tablet( - _clone_req.tablet_id, 0 /*replica_id*/, _clone_req.schema_hash); + _clone_req.tablet_id, _clone_req.schema_hash); bool is_new_tablet = tablet == nullptr; // try to repair a tablet with missing version if (tablet != nullptr) { diff --git a/be/src/olap/task/engine_publish_version_task.cpp b/be/src/olap/task/engine_publish_version_task.cpp index c0086ec21358d7..51ad517ca5dd6a 100644 --- a/be/src/olap/task/engine_publish_version_task.cpp +++ b/be/src/olap/task/engine_publish_version_task.cpp @@ -121,7 +121,7 @@ OLAPStatus EnginePublishVersionTask::finish() { break; } TabletSharedPtr tablet = StorageEngine::instance()->tablet_manager()->get_tablet( - tablet_info.tablet_id, 0 /*replica_id*/, tablet_info.schema_hash); + tablet_info.tablet_id, tablet_info.schema_hash); if (tablet == nullptr) { _error_tablet_ids->push_back(tablet_info.tablet_id); } else { diff --git a/be/src/olap/task/engine_storage_migration_task.cpp b/be/src/olap/task/engine_storage_migration_task.cpp index 2f9ad4b4a0a05a..f55de6356dc298 100644 --- a/be/src/olap/task/engine_storage_migration_task.cpp +++ b/be/src/olap/task/engine_storage_migration_task.cpp @@ -160,7 +160,7 @@ OLAPStatus EngineStorageMigrationTask::_migrate() { // if old tablet finished schema change, then the schema change status of the new tablet is DONE // else the schema change status of the new tablet is FAILED TabletSharedPtr new_tablet = - StorageEngine::instance()->tablet_manager()->get_tablet(tablet_id, 0 /*replica_id*/, schema_hash); + StorageEngine::instance()->tablet_manager()->get_tablet(tablet_id, schema_hash); if (new_tablet == nullptr) { LOG(WARNING) << "tablet not found. tablet_id=" << tablet_id << " schema_hash=" << schema_hash; diff --git a/be/src/runtime/snapshot_loader.cpp b/be/src/runtime/snapshot_loader.cpp index 5ce644b755bc10..9c7421266f2dd8 100644 --- a/be/src/runtime/snapshot_loader.cpp +++ b/be/src/runtime/snapshot_loader.cpp @@ -226,7 +226,7 @@ Status SnapshotLoader::download(const std::map& src_to } TabletSharedPtr tablet = - _env->storage_engine()->tablet_manager()->get_tablet(local_tablet_id, 0 /*replica_id*/, schema_hash); + _env->storage_engine()->tablet_manager()->get_tablet(local_tablet_id, schema_hash); if (tablet == nullptr) { std::stringstream ss; ss << "failed to get local tablet: " << local_tablet_id; diff --git a/be/test/olap/delete_handler_test.cpp b/be/test/olap/delete_handler_test.cpp index 366c2a9cae81d2..d79d73fc7ac603 100644 --- a/be/test/olap/delete_handler_test.cpp +++ b/be/test/olap/delete_handler_test.cpp @@ -257,7 +257,7 @@ class TestDeleteConditionHandler : public testing::Test { set_default_create_tablet_request(&_create_tablet); res = k_engine->create_tablet(_create_tablet); ASSERT_EQ(OLAP_SUCCESS, res); - tablet = k_engine->tablet_manager()->get_tablet(_create_tablet.tablet_id, 0 /*replica_id*/, + tablet = k_engine->tablet_manager()->get_tablet(_create_tablet.tablet_id, _create_tablet.tablet_schema.schema_hash); ASSERT_TRUE(tablet.get() != NULL); _tablet_path = tablet->tablet_path(); @@ -266,7 +266,7 @@ class TestDeleteConditionHandler : public testing::Test { res = k_engine->create_tablet(_create_dup_tablet); ASSERT_EQ(OLAP_SUCCESS, res); dup_tablet = k_engine->tablet_manager()->get_tablet( - _create_dup_tablet.tablet_id, 0 /*replica_id*/, _create_dup_tablet.tablet_schema.schema_hash); + _create_dup_tablet.tablet_id, _create_dup_tablet.tablet_schema.schema_hash); ASSERT_TRUE(dup_tablet.get() != NULL); _dup_tablet_path = tablet->tablet_path(); } @@ -428,7 +428,7 @@ class TestDeleteConditionHandler2 : public testing::Test { set_default_create_tablet_request(&_create_tablet); res = k_engine->create_tablet(_create_tablet); ASSERT_EQ(OLAP_SUCCESS, res); - tablet = k_engine->tablet_manager()->get_tablet(_create_tablet.tablet_id, 0 /*replica_id*/, + tablet = k_engine->tablet_manager()->get_tablet(_create_tablet.tablet_id, _create_tablet.tablet_schema.schema_hash); ASSERT_TRUE(tablet.get() != NULL); _tablet_path = tablet->tablet_path(); @@ -795,7 +795,7 @@ class TestDeleteHandler : public testing::Test { set_default_create_tablet_request(&_create_tablet); res = k_engine->create_tablet(_create_tablet); ASSERT_EQ(OLAP_SUCCESS, res); - tablet = k_engine->tablet_manager()->get_tablet(_create_tablet.tablet_id, 0 /*replica_id*/, + tablet = k_engine->tablet_manager()->get_tablet(_create_tablet.tablet_id, _create_tablet.tablet_schema.schema_hash); ASSERT_TRUE(tablet != nullptr); _tablet_path = tablet->tablet_path(); diff --git a/be/test/olap/delta_writer_test.cpp b/be/test/olap/delta_writer_test.cpp index f1e67ecbfa2f32..96b7a6523884f2 100644 --- a/be/test/olap/delta_writer_test.cpp +++ b/be/test/olap/delta_writer_test.cpp @@ -477,7 +477,7 @@ TEST_F(TestDeltaWriter, write) { // publish version success TabletSharedPtr tablet = - k_engine->tablet_manager()->get_tablet(write_req.tablet_id, 0 /*replica_id*/, write_req.schema_hash); + k_engine->tablet_manager()->get_tablet(write_req.tablet_id, write_req.schema_hash); std::cout << "before publish, tablet row nums:" << tablet->num_rows() << std::endl; OlapMeta* meta = tablet->data_dir()->get_meta(); Version version; @@ -557,7 +557,7 @@ TEST_F(TestDeltaWriter, sequence_col) { // publish version success TabletSharedPtr tablet = - k_engine->tablet_manager()->get_tablet(write_req.tablet_id, 0 /*replica_id*/, write_req.schema_hash); + k_engine->tablet_manager()->get_tablet(write_req.tablet_id, write_req.schema_hash); std::cout << "before publish, tablet row nums:" << tablet->num_rows() << std::endl; OlapMeta* meta = tablet->data_dir()->get_meta(); Version version; diff --git a/be/test/olap/tablet_mgr_test.cpp b/be/test/olap/tablet_mgr_test.cpp index c689b5aeb19569..6b796dc80ad6b8 100644 --- a/be/test/olap/tablet_mgr_test.cpp +++ b/be/test/olap/tablet_mgr_test.cpp @@ -105,7 +105,7 @@ TEST_F(TabletMgrTest, CreateTablet) { data_dirs.push_back(_data_dir); OLAPStatus create_st = _tablet_mgr->create_tablet(create_tablet_req, data_dirs); ASSERT_TRUE(create_st == OLAP_SUCCESS); - TabletSharedPtr tablet = _tablet_mgr->get_tablet(111, 0, 3333); + TabletSharedPtr tablet = _tablet_mgr->get_tablet(111, 3333); ASSERT_TRUE(tablet != nullptr); // check dir exist bool dir_exist = FileUtils::check_exist(tablet->tablet_path()); @@ -171,7 +171,7 @@ TEST_F(TabletMgrTest, CreateTabletWithSequence) { OLAPStatus create_st = _tablet_mgr->create_tablet(create_tablet_req, data_dirs); ASSERT_TRUE(create_st == OLAP_SUCCESS); - TabletSharedPtr tablet = _tablet_mgr->get_tablet(111, 0, 3333); + TabletSharedPtr tablet = _tablet_mgr->get_tablet(111, 3333); ASSERT_TRUE(tablet != nullptr); // check dir exist bool dir_exist = FileUtils::check_exist(tablet->tablet_path()); @@ -212,21 +212,21 @@ TEST_F(TabletMgrTest, DropTablet) { data_dirs.push_back(_data_dir); OLAPStatus create_st = _tablet_mgr->create_tablet(create_tablet_req, data_dirs); ASSERT_TRUE(create_st == OLAP_SUCCESS); - TabletSharedPtr tablet = _tablet_mgr->get_tablet(111, 0, 3333); + TabletSharedPtr tablet = _tablet_mgr->get_tablet(111, 3333); ASSERT_TRUE(tablet != nullptr); // drop unexist tablet will be success OLAPStatus drop_st = _tablet_mgr->drop_tablet(111, 0, 4444, false); ASSERT_TRUE(drop_st == OLAP_SUCCESS); - tablet = _tablet_mgr->get_tablet(111, 0, 3333); + tablet = _tablet_mgr->get_tablet(111, 3333); ASSERT_TRUE(tablet != nullptr); // drop exist tablet will be success drop_st = _tablet_mgr->drop_tablet(111, 0, 3333, false); ASSERT_TRUE(drop_st == OLAP_SUCCESS); - tablet = _tablet_mgr->get_tablet(111, 0, 3333); + tablet = _tablet_mgr->get_tablet(111, 3333); ASSERT_TRUE(tablet == nullptr); - tablet = _tablet_mgr->get_tablet(111, 0, 3333, true); + tablet = _tablet_mgr->get_tablet(111, 3333, 0, true); ASSERT_TRUE(tablet != nullptr); // check dir exist @@ -238,7 +238,7 @@ TEST_F(TabletMgrTest, DropTablet) { // because tablet ptr referenced it OLAPStatus trash_st = _tablet_mgr->start_trash_sweep(); ASSERT_TRUE(trash_st == OLAP_SUCCESS); - tablet = _tablet_mgr->get_tablet(111, 0, 3333, true); + tablet = _tablet_mgr->get_tablet(111, 3333, 0, true); ASSERT_TRUE(tablet != nullptr); dir_exist = FileUtils::check_exist(tablet_path); ASSERT_TRUE(dir_exist); @@ -247,7 +247,7 @@ TEST_F(TabletMgrTest, DropTablet) { tablet.reset(); trash_st = _tablet_mgr->start_trash_sweep(); ASSERT_TRUE(trash_st == OLAP_SUCCESS); - tablet = _tablet_mgr->get_tablet(111, 0, 3333, true); + tablet = _tablet_mgr->get_tablet(111, 3333, 0, true); ASSERT_TRUE(tablet == nullptr); dir_exist = FileUtils::check_exist(tablet_path); ASSERT_TRUE(!dir_exist); From 5787a9db2dc76de8f49913c14c8893a504ac2ea3 Mon Sep 17 00:00:00 2001 From: Jianliang Qi Date: Fri, 5 Nov 2021 17:29:46 +0800 Subject: [PATCH 5/7] add replica_id status --- be/src/olap/base_tablet.h | 4 ++-- be/src/olap/tablet_manager.cpp | 16 ++++++++-------- be/src/olap/tablet_manager.h | 4 ++-- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/be/src/olap/base_tablet.h b/be/src/olap/base_tablet.h index 0839f0306572ef..b28a3d14c6fe35 100644 --- a/be/src/olap/base_tablet.h +++ b/be/src/olap/base_tablet.h @@ -60,7 +60,7 @@ class BaseTablet : public std::enable_shared_from_this { inline int16_t shard_id(); inline const int64_t creation_time() const; inline void set_creation_time(int64_t creation_time); - inline bool equal(int64_t tablet_id, int64_t replica_id, int32_t schema_hash); + inline bool equal(int64_t tablet_id, int32_t schema_hash, int64_t replica_id = 0); // properties encapsulated in TabletSchema inline const TabletSchema& tablet_schema() const; @@ -146,7 +146,7 @@ inline void BaseTablet::set_creation_time(int64_t creation_time) { _tablet_meta->set_creation_time(creation_time); } -inline bool BaseTablet::equal(int64_t id, int64_t r_id, int32_t hash) { +inline bool BaseTablet::equal(int64_t id, int32_t hash, int64_t r_id) { // For compatibility with older data, there is no replica id in the old version of the tablet meta // For new data with replica_id in the meta, there are some tasks that do not need to check the replica_id return (tablet_id() == id) && ((replica_id() == 0 || r_id == 0) ? true : (replica_id() == r_id)) && (schema_hash() == hash); diff --git a/be/src/olap/tablet_manager.cpp b/be/src/olap/tablet_manager.cpp index 9046c60ff9152c..48473856aff0ec 100644 --- a/be/src/olap/tablet_manager.cpp +++ b/be/src/olap/tablet_manager.cpp @@ -105,7 +105,7 @@ OLAPStatus TabletManager::_add_tablet_unlocked(TTabletId tablet_id, TReplicaId r TabletSharedPtr existed_tablet = nullptr; tablet_map_t& tablet_map = _get_tablet_map(tablet_id); for (TabletSharedPtr item : tablet_map[tablet_id].table_arr) { - if (item->equal(tablet_id, replica_id, schema_hash)) { + if (item->equal(tablet_id, schema_hash, replica_id)) { existed_tablet = item; break; } @@ -521,7 +521,7 @@ OLAPStatus TabletManager::drop_tablets_on_error_root_path( for (list::iterator it = tablet_map[tablet_id].table_arr.begin(); it != tablet_map[tablet_id].table_arr.end();) { // clear tablets in unused data dirs, there is no need to compare tablet_replica_id - if ((*it)->equal(tablet_id, 0 /*replica_id*/, schema_hash)) { + if ((*it)->equal(tablet_id, schema_hash)) { // We should first remove tablet from partition_map to avoid iterator // becoming invalid. _remove_tablet_from_partition(*(*it)); @@ -539,11 +539,11 @@ OLAPStatus TabletManager::drop_tablets_on_error_root_path( TabletSharedPtr TabletManager::get_tablet(TTabletId tablet_id, SchemaHash schema_hash, TReplicaId replica_id, bool include_deleted, string* err) { ReadLock rlock(_get_tablets_shard_lock(tablet_id)); - return _get_tablet_unlocked(tablet_id, replica_id, schema_hash, include_deleted, err); + return _get_tablet_unlocked(tablet_id, schema_hash, include_deleted, err, replica_id); } -TabletSharedPtr TabletManager::_get_tablet_unlocked(TTabletId tablet_id, TReplicaId replica_id, SchemaHash schema_hash, - bool include_deleted, string* err) { +TabletSharedPtr TabletManager::_get_tablet_unlocked(TTabletId tablet_id, SchemaHash schema_hash, + bool include_deleted, string* err, TReplicaId replica_id) { TabletSharedPtr tablet; tablet = _get_tablet_unlocked(tablet_id, schema_hash, replica_id); if (tablet == nullptr && include_deleted) { @@ -579,7 +579,7 @@ TabletSharedPtr TabletManager::_get_tablet_unlocked(TTabletId tablet_id, TReplic TabletSharedPtr TabletManager::get_tablet(TTabletId tablet_id, SchemaHash schema_hash, TabletUid tablet_uid, bool include_deleted, string* err) { ReadLock rlock(_get_tablets_shard_lock(tablet_id)); - TabletSharedPtr tablet = _get_tablet_unlocked(tablet_id, schema_hash, 0 /*replica_id*/, include_deleted, err); + TabletSharedPtr tablet = _get_tablet_unlocked(tablet_id, schema_hash, include_deleted, err); if (tablet != nullptr && tablet->tablet_uid() == tablet_uid) { return tablet; } @@ -1345,7 +1345,7 @@ OLAPStatus TabletManager::_drop_tablet_directly_unlocked(TTabletId tablet_id, TR list& candidate_tablets = tablet_map[tablet_id].table_arr; list::iterator it = candidate_tablets.begin(); while (it != candidate_tablets.end()) { - if (!(*it)->equal(tablet_id, replica_id, schema_hash)) { + if (!(*it)->equal(tablet_id, schema_hash, replica_id)) { ++it; continue; } @@ -1386,7 +1386,7 @@ TabletSharedPtr TabletManager::_get_tablet_unlocked(TTabletId tablet_id, SchemaH if (it != tablet_map.end()) { for (TabletSharedPtr tablet : it->second.table_arr) { CHECK(tablet != nullptr) << "tablet is nullptr. tablet_id=" << tablet_id; - if (tablet->equal(tablet_id, replica_id, schema_hash)) { + if (tablet->equal(tablet_id, schema_hash, replica_id)) { VLOG_NOTICE << "get tablet success. tablet_id=" << tablet_id << ", replica_id=" << replica_id << ", schema_hash=" << schema_hash; return tablet; diff --git a/be/src/olap/tablet_manager.h b/be/src/olap/tablet_manager.h index 85ae8dedce957a..f56bf50e5a5baf 100644 --- a/be/src/olap/tablet_manager.h +++ b/be/src/olap/tablet_manager.h @@ -166,8 +166,8 @@ class TabletManager { OLAPStatus _drop_tablet_unlocked(TTabletId tablet_id, TReplicaId replica_id, SchemaHash schema_hash, bool keep_files); TabletSharedPtr _get_tablet_unlocked(TTabletId tablet_id, SchemaHash schema_hash, TReplicaId replica_id = 0); - TabletSharedPtr _get_tablet_unlocked(TTabletId tablet_id, TReplicaId replica_id, SchemaHash schema_hash, - bool include_deleted, std::string* err); + TabletSharedPtr _get_tablet_unlocked(TTabletId tablet_id, SchemaHash schema_hash, + bool include_deleted, std::string* err, TReplicaId replica_id = 0); TabletSharedPtr _internal_create_tablet_unlocked(const TCreateTabletReq& request, const bool is_schema_change, From 9b923ff03b9c1838aa28ff0e1d578eb1bc75c7cb Mon Sep 17 00:00:00 2001 From: Jianliang Qi Date: Mon, 8 Nov 2021 19:37:58 +0800 Subject: [PATCH 6/7] optimize replica_id check and logs --- be/src/agent/task_worker_pool.cpp | 22 +++++-------------- be/src/olap/base_tablet.h | 1 + be/src/olap/tablet_manager.cpp | 27 ++++++++++++++++-------- be/src/olap/tablet_meta.cpp | 3 ++- be/src/olap/task/engine_clone_task.cpp | 29 ++++++++++++++++++-------- gensrc/thrift/AgentService.thrift | 2 +- 6 files changed, 47 insertions(+), 37 deletions(-) diff --git a/be/src/agent/task_worker_pool.cpp b/be/src/agent/task_worker_pool.cpp index b5321807ce7009..2494ad80050839 100644 --- a/be/src/agent/task_worker_pool.cpp +++ b/be/src/agent/task_worker_pool.cpp @@ -371,10 +371,7 @@ void TaskWorkerPool::_create_tablet_worker_thread_callback() { status_code = TStatusCode::RUNTIME_ERROR; } else { ++_s_report_version; - TReplicaId replica_id = 0; - if (create_tablet_req.__isset.replica_id) { - replica_id = create_tablet_req.replica_id; - } + TReplicaId replica_id = create_tablet_req.__isset.replica_id ? create_tablet_req.replica_id : 0; // get path hash of the created tablet TabletSharedPtr tablet = StorageEngine::instance()->tablet_manager()->get_tablet( create_tablet_req.tablet_id, create_tablet_req.tablet_schema.schema_hash, replica_id); @@ -387,6 +384,7 @@ void TaskWorkerPool::_create_tablet_worker_thread_callback() { tablet_info.row_count = 0; tablet_info.data_size = 0; tablet_info.__set_path_hash(tablet->data_dir()->path_hash()); + tablet_info.replica_id = tablet->replica_id(); finish_tablet_infos.push_back(tablet_info); } TRACE("StorageEngine create tablet finish, status: $0", create_status); @@ -429,10 +427,7 @@ void TaskWorkerPool::_drop_tablet_worker_thread_callback() { std::vector error_msgs; TStatus task_status; string err; - TReplicaId replica_id = 0; - if (drop_tablet_req.__isset.replica_id) { - replica_id = drop_tablet_req.replica_id; - } + TReplicaId replica_id = drop_tablet_req.__isset.replica_id ? drop_tablet_req.replica_id : 0; TabletSharedPtr dropped_tablet = StorageEngine::instance()->tablet_manager()->get_tablet( drop_tablet_req.tablet_id, drop_tablet_req.schema_hash, replica_id, false, &err); if (dropped_tablet != nullptr) { @@ -850,12 +845,8 @@ void TaskWorkerPool::_update_tablet_meta_worker_thread_callback() { TStatus task_status; for (auto tablet_meta_info : update_tablet_meta_req.tabletMetaInfos) { - TReplicaId replica_id = 0; - if (tablet_meta_info.__isset.replica_id) { - replica_id = tablet_meta_info.replica_id; - } TabletSharedPtr tablet = StorageEngine::instance()->tablet_manager()->get_tablet( - tablet_meta_info.tablet_id, tablet_meta_info.schema_hash, replica_id); + tablet_meta_info.tablet_id, tablet_meta_info.schema_hash); if (tablet == nullptr) { LOG(WARNING) << "could not find tablet when update partition id" << " tablet_id=" << tablet_meta_info.tablet_id @@ -919,10 +910,7 @@ void TaskWorkerPool::_clone_worker_thread_callback() { DorisMetrics::instance()->clone_requests_total->increment(1); LOG(INFO) << "get clone task. signature:" << agent_task_req.signature; - TReplicaId replica_id = 0; - if (clone_req.__isset.replica_id) { - replica_id = clone_req.replica_id; - } + TReplicaId replica_id = clone_req.__isset.replica_id ? clone_req.replica_id : 0; // check tablet with the same tabletId existance, if exist, set tablet in clone mode TabletSharedPtr exist_tablet = StorageEngine::instance()->tablet_manager()->get_tablet( clone_req.tablet_id, clone_req.schema_hash, replica_id); diff --git a/be/src/olap/base_tablet.h b/be/src/olap/base_tablet.h index b28a3d14c6fe35..d02714799adee7 100644 --- a/be/src/olap/base_tablet.h +++ b/be/src/olap/base_tablet.h @@ -149,6 +149,7 @@ inline void BaseTablet::set_creation_time(int64_t creation_time) { inline bool BaseTablet::equal(int64_t id, int32_t hash, int64_t r_id) { // For compatibility with older data, there is no replica id in the old version of the tablet meta // For new data with replica_id in the meta, there are some tasks that do not need to check the replica_id + // Only check replica_id in creat/drop/clone tablet tasks return (tablet_id() == id) && ((replica_id() == 0 || r_id == 0) ? true : (replica_id() == r_id)) && (schema_hash() == hash); } diff --git a/be/src/olap/tablet_manager.cpp b/be/src/olap/tablet_manager.cpp index 48473856aff0ec..de001f234a12c0 100644 --- a/be/src/olap/tablet_manager.cpp +++ b/be/src/olap/tablet_manager.cpp @@ -167,7 +167,9 @@ OLAPStatus TabletManager::_add_tablet_unlocked(TTabletId tablet_id, TReplicaId r res = OLAP_ERR_ENGINE_INSERT_OLD_TABLET; } LOG(WARNING) << "add duplicated tablet. force=" << force << ", res=" << res - << ", tablet_id=" << tablet_id << ", schema_hash=" << schema_hash + << ", tablet_id=" << tablet_id + << ", replica_id=" << replica_id + << ", schema_hash=" << schema_hash << ", old_version=" << old_version << ", new_version=" << new_version << ", old_time=" << old_time << ", new_time=" << new_time << ", old_tablet_path=" << existed_tablet->tablet_path() @@ -229,7 +231,7 @@ OLAPStatus TabletManager::create_tablet(const TCreateTabletReq& request, DorisMetrics::instance()->create_tablet_requests_total->increment(1); int64_t tablet_id = request.tablet_id; - int64_t replica_id = request.replica_id; + int64_t replica_id = request.__isset.replica_id ? request.replica_id : 0; int32_t schema_hash = request.tablet_schema.schema_hash; LOG(INFO) << "begin to create tablet. tablet_id=" << tablet_id << ", replica_id=" << replica_id << ", schema_hash=" << schema_hash; @@ -261,12 +263,15 @@ OLAPStatus TabletManager::create_tablet(const TCreateTabletReq& request, // If the CreateTabletReq has base_tablet_id then it is a alter-tablet request if (request.__isset.base_tablet_id && request.base_tablet_id > 0) { is_schema_change = true; - base_tablet = _get_tablet_unlocked(request.base_tablet_id, request.base_schema_hash); + int64_t base_replica_id = request.__isset.base_replica_id ? request.base_replica_id : 0; + base_tablet = _get_tablet_unlocked(request.base_tablet_id, request.base_schema_hash, base_replica_id); if (base_tablet == nullptr) { LOG(WARNING) << "fail to create tablet(change schema), base tablet does not exist. " << "new_tablet_id=" << tablet_id << ", new_schema_hash=" << schema_hash + << ", new_replica_id=" << replica_id << ", base_tablet_id=" << request.base_tablet_id - << ", base_schema_hash=" << request.base_schema_hash; + << ", base_schema_hash=" << request.base_schema_hash + << ", base_replica_id=" << base_replica_id; DorisMetrics::instance()->create_tablet_requests_failed->increment(1); return OLAP_ERR_TABLE_CREATE_META_ERROR; } @@ -311,7 +316,7 @@ TabletSharedPtr TabletManager::_internal_create_tablet_unlocked( TRACE("create tablet meta"); int64_t new_tablet_id = request.tablet_id; - int64_t new_replica_id = request.replica_id; + int64_t new_replica_id = request.__isset.replica_id ? request.replica_id : 0; int32_t new_schema_hash = request.tablet_schema.schema_hash; // should remove the tablet's pending_id no matter create-tablet success or not @@ -482,7 +487,9 @@ OLAPStatus TabletManager::_drop_tablet_unlocked(TTabletId tablet_id, TReplicaId TabletSharedPtr to_drop_tablet = _get_tablet_unlocked(tablet_id, schema_hash, replica_id); if (to_drop_tablet == nullptr) { LOG(WARNING) << "fail to drop tablet because it does not exist. " - << "tablet_id=" << tablet_id << ", schema_hash=" << schema_hash; + << "tablet_id=" << tablet_id + << ", replica_id=" << replica_id + << ", schema_hash=" << schema_hash; return OLAP_SUCCESS; } @@ -864,14 +871,16 @@ OLAPStatus TabletManager::report_tablet_info(TTabletInfo* tablet_info) { DorisMetrics::instance()->report_tablet_requests_total->increment(1); LOG(INFO) << "begin to process report tablet info." << "tablet_id=" << tablet_info->tablet_id + << "replica_id=" << tablet_info->replica_id << ", schema_hash=" << tablet_info->schema_hash; OLAPStatus res = OLAP_SUCCESS; - TabletSharedPtr tablet = get_tablet(tablet_info->tablet_id, tablet_info->schema_hash); + TabletSharedPtr tablet = get_tablet(tablet_info->tablet_id, tablet_info->schema_hash, tablet_info->replica_id); if (tablet == nullptr) { LOG(WARNING) << "can't find tablet. " - << " tablet=" << tablet_info->tablet_id + << " tablet_id=" << tablet_info->tablet_id + << " replica_id=" << tablet_info->replica_id << " schema_hash=" << tablet_info->schema_hash; return OLAP_ERR_TABLE_NOT_FOUND; } @@ -1394,7 +1403,7 @@ TabletSharedPtr TabletManager::_get_tablet_unlocked(TTabletId tablet_id, SchemaH } } - VLOG_NOTICE << "fail to get tablet. tablet_id=" << tablet_id << ", schema_hash=" << schema_hash; + VLOG_NOTICE << "fail to get tablet. tablet_id=" << tablet_id << ", replica_id=" << replica_id << ", schema_hash=" << schema_hash; // Return nullptr tablet if fail TabletSharedPtr tablet; return tablet; diff --git a/be/src/olap/tablet_meta.cpp b/be/src/olap/tablet_meta.cpp index 577d7287b52a12..3d1fba3eff91c5 100644 --- a/be/src/olap/tablet_meta.cpp +++ b/be/src/olap/tablet_meta.cpp @@ -39,7 +39,8 @@ OLAPStatus TabletMeta::create(const TCreateTabletReq& request, const TabletUid& const unordered_map& col_ordinal_to_unique_id, TabletMetaSharedPtr* tablet_meta) { tablet_meta->reset(new TabletMeta( - request.table_id, request.partition_id, request.tablet_id, request.replica_id, + request.table_id, request.partition_id, request.tablet_id, + request.__isset.replica_id ? request.replica_id : 0, request.tablet_schema.schema_hash, shard_id, request.tablet_schema, next_unique_id, col_ordinal_to_unique_id, tablet_uid, request.__isset.tablet_type ? request.tablet_type : TTabletType::TABLET_TYPE_DISK)); diff --git a/be/src/olap/task/engine_clone_task.cpp b/be/src/olap/task/engine_clone_task.cpp index 21270f705fd3c5..d72c8621953d5e 100644 --- a/be/src/olap/task/engine_clone_task.cpp +++ b/be/src/olap/task/engine_clone_task.cpp @@ -79,10 +79,12 @@ OLAPStatus EngineCloneTask::_do_clone() { // not null + not null + the same version tablet exist, need repair. + false // not null + null + can not exist this case + ----- - // Check local tablet exist or not + // Check local tablet exist or not, get replica with cheking replica_id + TReplicaId replica_id = _clone_req.__isset.replica_id ? _clone_req.replica_id : 0; TabletSharedPtr tablet = StorageEngine::instance()->tablet_manager()->get_tablet( - _clone_req.tablet_id, _clone_req.schema_hash, _clone_req.replica_id); - // for tablet with same tablet id, but diff replica id + _clone_req.tablet_id, _clone_req.schema_hash, replica_id); + + // For tablet with same tablet id, but diff replica id. Get tablet without checking replica_id TabletSharedPtr old_version_tablet = StorageEngine::instance()->tablet_manager()->get_tablet( _clone_req.tablet_id, _clone_req.schema_hash); bool is_new_tablet = tablet == nullptr; @@ -118,6 +120,7 @@ OLAPStatus EngineCloneTask::_do_clone() { LOG(INFO) << "tablet exist with number of missing version: " << missed_versions.size() << ", try to incremental clone succeed: " << allow_incremental_clone << ", signature: " << _signature << ", tablet id: " << _clone_req.tablet_id + << ", replica id: " << replica_id << ", schema hash: " << _clone_req.schema_hash << ", clone version: " << _clone_req.committed_version << ", download snapshot: " << status; @@ -170,7 +173,7 @@ OLAPStatus EngineCloneTask::_do_clone() { schema_hash_path_stream.str(), _clone_req.tablet_id); OLAPStatus reset_id_status = TabletMeta::reset_tablet_uid(header_path); // reset_replica_id here. before load tablet to tablet_manager - OLAPStatus reset_replica_id_status = TabletMeta::reset_tablet_replica_id(header_path, _clone_req.replica_id); + OLAPStatus reset_replica_id_status = TabletMeta::reset_tablet_replica_id(header_path, replica_id); if (reset_id_status != OLAP_SUCCESS || reset_replica_id_status != OLAP_SUCCESS) { LOG(WARNING) << "errors while set tablet uid or replica id: '" << header_path; _error_msgs->push_back("errors while set tablet uid/replica_id."); @@ -178,15 +181,15 @@ OLAPStatus EngineCloneTask::_do_clone() { } else { OLAPStatus load_header_status; if (old_version_tablet != nullptr) { - // drop old version tablet first, then add new tablet + // drop old version tablet first(force = true), then add new tablet load_header_status = StorageEngine::instance()->tablet_manager()->load_tablet_from_dir( store, _clone_req.tablet_id, _clone_req.schema_hash, - schema_hash_path_stream.str(), true); + schema_hash_path_stream.str(), true /*force*/); } else { // just create and add a new tablet load_header_status = StorageEngine::instance()->tablet_manager()->load_tablet_from_dir( store, _clone_req.tablet_id, _clone_req.schema_hash, - schema_hash_path_stream.str(), false); + schema_hash_path_stream.str(), false /*force*/); } if (load_header_status != OLAP_SUCCESS) { @@ -232,13 +235,16 @@ void EngineCloneTask::_set_tablet_info(AgentStatus status, bool is_new_tablet) { // Get clone tablet info if (status == DORIS_SUCCESS || status == DORIS_CREATE_TABLE_EXIST) { TTabletInfo tablet_info; + int64_t replica_id = _clone_req.__isset.replica_id ? _clone_req.replica_id : 0; tablet_info.__set_tablet_id(_clone_req.tablet_id); + tablet_info.__set_replica_id(replica_id); tablet_info.__set_schema_hash(_clone_req.schema_hash); OLAPStatus get_tablet_info_status = StorageEngine::instance()->tablet_manager()->report_tablet_info(&tablet_info); if (get_tablet_info_status != OLAP_SUCCESS) { LOG(WARNING) << "clone success, but get tablet info failed." << " tablet id: " << _clone_req.tablet_id + << " replica id: " << replica_id << " schema hash: " << _clone_req.schema_hash << " signature: " << _signature; _error_msgs->push_back("clone success, but get tablet info failed."); @@ -246,6 +252,7 @@ void EngineCloneTask::_set_tablet_info(AgentStatus status, bool is_new_tablet) { } else if (_clone_req.__isset.committed_version && tablet_info.version < _clone_req.committed_version) { LOG(WARNING) << "failed to clone tablet. tablet_id:" << _clone_req.tablet_id + << ", replica_id:" << replica_id << ", schema_hash:" << _clone_req.schema_hash << ", signature:" << _signature << ", version:" << tablet_info.version << ", expected_version: " << _clone_req.committed_version; @@ -256,11 +263,12 @@ void EngineCloneTask::_set_tablet_info(AgentStatus status, bool is_new_tablet) { // if not, maybe this is a stale remaining table which is waiting for drop. // we drop it. LOG(WARNING) << "begin to drop the stale tablet. tablet_id:" << _clone_req.tablet_id + << ", replica_id:" << replica_id << ", schema_hash:" << _clone_req.schema_hash << ", signature:" << _signature << ", version:" << tablet_info.version << ", expected_version: " << _clone_req.committed_version; OLAPStatus drop_status = StorageEngine::instance()->tablet_manager()->drop_tablet( - _clone_req.tablet_id, _clone_req.replica_id, _clone_req.schema_hash); + _clone_req.tablet_id, replica_id, _clone_req.schema_hash); if (drop_status != OLAP_SUCCESS && drop_status != OLAP_ERR_TABLE_NOT_FOUND) { // just log LOG(WARNING) << "drop stale cloned table failed! tablet id: " @@ -270,6 +278,7 @@ void EngineCloneTask::_set_tablet_info(AgentStatus status, bool is_new_tablet) { status = DORIS_ERROR; } else { LOG(INFO) << "clone get tablet info success. tablet_id:" << _clone_req.tablet_id + << ", replica_id:" << replica_id << ", schema_hash:" << _clone_req.schema_hash << ", signature:" << _signature << ", version:" << tablet_info.version; _tablet_infos->push_back(tablet_info); @@ -346,7 +355,9 @@ AgentStatus EngineCloneTask::_make_and_download_snapshots(DataDir& data_dir, con if (status == DORIS_SUCCESS) { // change all rowset ids because they maybe its id same with local rowset auto olap_st = SnapshotManager::instance()->convert_rowset_ids( - local_path, _clone_req.tablet_id, _clone_req.replica_id, _clone_req.schema_hash); + local_path, _clone_req.tablet_id, + _clone_req.__isset.replica_id ? _clone_req.replica_id : 0, + _clone_req.schema_hash); if (olap_st != OLAP_SUCCESS) { LOG(WARNING) << "fail to convert rowset ids, path=" << local_path << ", tablet_id=" << _clone_req.tablet_id diff --git a/gensrc/thrift/AgentService.thrift b/gensrc/thrift/AgentService.thrift index 1494e1d35e8c92..70cf1a9a35b582 100644 --- a/gensrc/thrift/AgentService.thrift +++ b/gensrc/thrift/AgentService.thrift @@ -86,6 +86,7 @@ struct TCreateTabletReq { 13: optional TStorageFormat storage_format 14: optional TTabletType tablet_type 15: optional Types.TReplicaId replica_id + 16: optional Types.TReplicaId base_replica_id } struct TDropTabletReq { @@ -278,7 +279,6 @@ struct TTabletMetaInfo { 3: optional Types.TPartitionId partition_id 4: optional TTabletMetaType meta_type 5: optional bool is_in_memory - 6: optional Types.TReplicaId replica_id } struct TUpdateTabletMetaInfoReq { From 92a2606deb05118ec6a2baf56033d664704a6a05 Mon Sep 17 00:00:00 2001 From: Jianliang Qi Date: Mon, 8 Nov 2021 20:20:09 +0800 Subject: [PATCH 7/7] merge reset tablet_uid and replica_id function to one --- be/src/olap/tablet_meta.cpp | 3 ++- be/src/olap/tablet_meta.h | 2 +- be/src/olap/task/engine_clone_task.cpp | 7 +++---- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/be/src/olap/tablet_meta.cpp b/be/src/olap/tablet_meta.cpp index 3d1fba3eff91c5..58489689675c6e 100644 --- a/be/src/olap/tablet_meta.cpp +++ b/be/src/olap/tablet_meta.cpp @@ -250,7 +250,7 @@ OLAPStatus TabletMeta::reset_tablet_uid(const string& header_file) { return res; } -OLAPStatus TabletMeta::reset_tablet_replica_id(const string& header_file, int64_t replica_id) { +OLAPStatus TabletMeta::reset_tablet_replica_id_and_uid(const string& header_file, int64_t replica_id) { OLAPStatus res = OLAP_SUCCESS; TabletMeta tmp_tablet_meta; if ((res = tmp_tablet_meta.create_from_file(header_file)) != OLAP_SUCCESS) { @@ -261,6 +261,7 @@ OLAPStatus TabletMeta::reset_tablet_replica_id(const string& header_file, int64_ TabletMetaPB tmp_tablet_meta_pb; tmp_tablet_meta.to_meta_pb(&tmp_tablet_meta_pb); tmp_tablet_meta_pb.set_replica_id(replica_id); + *(tmp_tablet_meta_pb.mutable_tablet_uid()) = TabletUid::gen_uid().to_proto(); res = save(header_file, tmp_tablet_meta_pb); if (res != OLAP_SUCCESS) { LOG(FATAL) << "fail to save tablet meta pb to " diff --git a/be/src/olap/tablet_meta.h b/be/src/olap/tablet_meta.h index 66c20c567bb3a6..6eaefa9fdfe7cf 100644 --- a/be/src/olap/tablet_meta.h +++ b/be/src/olap/tablet_meta.h @@ -92,7 +92,7 @@ class TabletMeta { OLAPStatus save(const std::string& file_path); static OLAPStatus save(const std::string& file_path, const TabletMetaPB& tablet_meta_pb); static OLAPStatus reset_tablet_uid(const std::string& file_path); - static OLAPStatus reset_tablet_replica_id(const std::string& file_path, int64_t replica_id); + static OLAPStatus reset_tablet_replica_id_and_uid(const std::string& file_path, int64_t replica_id); static std::string construct_header_file_path(const std::string& schema_hash_path, int64_t tablet_id); OLAPStatus save_meta(DataDir* data_dir); diff --git a/be/src/olap/task/engine_clone_task.cpp b/be/src/olap/task/engine_clone_task.cpp index d72c8621953d5e..8894097bb6b7df 100644 --- a/be/src/olap/task/engine_clone_task.cpp +++ b/be/src/olap/task/engine_clone_task.cpp @@ -171,10 +171,9 @@ OLAPStatus EngineCloneTask::_do_clone() { << _clone_req.schema_hash; string header_path = TabletMeta::construct_header_file_path( schema_hash_path_stream.str(), _clone_req.tablet_id); - OLAPStatus reset_id_status = TabletMeta::reset_tablet_uid(header_path); - // reset_replica_id here. before load tablet to tablet_manager - OLAPStatus reset_replica_id_status = TabletMeta::reset_tablet_replica_id(header_path, replica_id); - if (reset_id_status != OLAP_SUCCESS || reset_replica_id_status != OLAP_SUCCESS) { + // reset_replica_id and tablet_uid here. before load tablet to tablet_manager + OLAPStatus reset_status = TabletMeta::reset_tablet_replica_id_and_uid(header_path, replica_id); + if (reset_status != OLAP_SUCCESS) { LOG(WARNING) << "errors while set tablet uid or replica id: '" << header_path; _error_msgs->push_back("errors while set tablet uid/replica_id."); status = DORIS_ERROR;