-
Notifications
You must be signed in to change notification settings - Fork 3.7k
[Bug]Fix the bug data balance causes tablet loss #6063
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
3fef840
25194fa
3edec7f
b7915fc
5787a9d
9b923ff
92a2606
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -371,9 +371,10 @@ void TaskWorkerPool::_create_tablet_worker_thread_callback() { | |
| status_code = TStatusCode::RUNTIME_ERROR; | ||
| } else { | ||
| ++_s_report_version; | ||
| TReplicaId replica_id = create_tablet_req.__isset.replica_id ? create_tablet_req.replica_id : 0; | ||
| // get path hash of the created tablet | ||
| TabletSharedPtr tablet = StorageEngine::instance()->tablet_manager()->get_tablet( | ||
| create_tablet_req.tablet_id, create_tablet_req.tablet_schema.schema_hash); | ||
| create_tablet_req.tablet_id, create_tablet_req.tablet_schema.schema_hash, replica_id); | ||
| DCHECK(tablet != nullptr); | ||
| TTabletInfo tablet_info; | ||
| tablet_info.tablet_id = tablet->table_id(); | ||
|
|
@@ -383,6 +384,7 @@ void TaskWorkerPool::_create_tablet_worker_thread_callback() { | |
| tablet_info.row_count = 0; | ||
| tablet_info.data_size = 0; | ||
| tablet_info.__set_path_hash(tablet->data_dir()->path_hash()); | ||
| tablet_info.replica_id = tablet->replica_id(); | ||
| finish_tablet_infos.push_back(tablet_info); | ||
| } | ||
| TRACE("StorageEngine create tablet finish, status: $0", create_status); | ||
|
|
@@ -425,11 +427,18 @@ void TaskWorkerPool::_drop_tablet_worker_thread_callback() { | |
| std::vector<string> error_msgs; | ||
| TStatus task_status; | ||
| string err; | ||
| TReplicaId replica_id = drop_tablet_req.__isset.replica_id ? drop_tablet_req.replica_id : 0; | ||
| TabletSharedPtr dropped_tablet = StorageEngine::instance()->tablet_manager()->get_tablet( | ||
| drop_tablet_req.tablet_id, drop_tablet_req.schema_hash, false, &err); | ||
| drop_tablet_req.tablet_id, drop_tablet_req.schema_hash, replica_id, false, &err); | ||
| if (dropped_tablet != nullptr) { | ||
| if (dropped_tablet->clone_mode()) { | ||
| LOG(WARNING) << "drop table cancelled as tablet is in clone mode! signature: " << agent_task_req.signature; | ||
| error_msgs.push_back("drop table cancelled as tablet is in clone mode! signature: " + agent_task_req.signature); | ||
| status_code = TStatusCode::CANCELLED; | ||
| } | ||
|
|
||
| OLAPStatus drop_status = StorageEngine::instance()->tablet_manager()->drop_tablet( | ||
| drop_tablet_req.tablet_id, drop_tablet_req.schema_hash); | ||
| drop_tablet_req.tablet_id, replica_id, drop_tablet_req.schema_hash); | ||
| if (drop_status != OLAP_SUCCESS) { | ||
| LOG(WARNING) << "drop table failed! signature: " << agent_task_req.signature; | ||
| error_msgs.push_back("drop table failed!"); | ||
|
|
@@ -901,6 +910,14 @@ void TaskWorkerPool::_clone_worker_thread_callback() { | |
| DorisMetrics::instance()->clone_requests_total->increment(1); | ||
| LOG(INFO) << "get clone task. signature:" << agent_task_req.signature; | ||
|
|
||
| TReplicaId replica_id = clone_req.__isset.replica_id ? clone_req.replica_id : 0; | ||
| // check tablet with the same tabletId existance, if exist, set tablet in clone mode | ||
| TabletSharedPtr exist_tablet = StorageEngine::instance()->tablet_manager()->get_tablet( | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we can do this in |
||
| clone_req.tablet_id, clone_req.schema_hash, replica_id); | ||
| if (exist_tablet != nullptr) { | ||
| exist_tablet->set_clone_mode(true); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why not checking replica id here? |
||
| } | ||
|
|
||
| std::vector<string> error_msgs; | ||
| std::vector<TTabletInfo> tablet_infos; | ||
| EngineCloneTask engine_task(clone_req, _master_info, agent_task_req.signature, &error_msgs, | ||
|
|
@@ -928,6 +945,14 @@ void TaskWorkerPool::_clone_worker_thread_callback() { | |
| task_status.__set_error_msgs(error_msgs); | ||
| finish_task_request.__set_task_status(task_status); | ||
|
|
||
| // clone done, set clone mode false | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do this in clone task. |
||
| // Retrieve once again to prevent tablet from being dropped | ||
| exist_tablet = StorageEngine::instance()->tablet_manager()->get_tablet( | ||
| clone_req.tablet_id, clone_req.schema_hash, replica_id); | ||
| if (exist_tablet != nullptr) { | ||
| exist_tablet->set_clone_mode(false); | ||
| } | ||
|
|
||
| _finish_task(finish_task_request); | ||
| _remove_task_info(agent_task_req.task_type, agent_task_req.signature); | ||
| } | ||
|
|
@@ -991,6 +1016,7 @@ OLAPStatus TaskWorkerPool::_check_migrate_requset(const TStorageMediumMigrateReq | |
| TabletSharedPtr& tablet, DataDir** dest_store) { | ||
| int64_t tablet_id = req.tablet_id; | ||
| int32_t schema_hash = req.schema_hash; | ||
| // tablet migration no need to know replica_id | ||
| tablet = StorageEngine::instance()->tablet_manager()->get_tablet(tablet_id, schema_hash); | ||
| if (tablet == nullptr) { | ||
| LOG(WARNING) << "can't find tablet. tablet_id= " << tablet_id | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why not just do this check in
tablet_manager()->drop_tablet?