-
Notifications
You must be signed in to change notification settings - Fork 3.7k
add garbase collect by rowsetid #1374
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -932,6 +932,52 @@ void DataDir::perform_path_gc() { | |
| LOG(INFO) << "finished one time path gc."; | ||
| } | ||
|
|
||
| void DataDir::perform_path_gc_by_rowsetid() { | ||
| // init the set of valid path | ||
| // validate the path in data dir | ||
| std::unique_lock<std::mutex> lck(_check_path_mutex); | ||
| cv.wait(lck, [this]{return _all_check_paths.size() > 0;}); | ||
| LOG(INFO) << "start to path gc by rowsetid."; | ||
| int counter = 0; | ||
| for (auto& path : _all_check_paths) { | ||
| ++counter; | ||
| if (config::path_gc_check_step > 0 && counter % config::path_gc_check_step == 0) { | ||
| usleep(config::path_gc_check_step_interval_ms * 1000); | ||
| } | ||
| TTabletId tablet_id = -1; | ||
| TSchemaHash schema_hash = -1; | ||
| bool is_valid = _tablet_manager->get_tablet_id_and_schema_hash_from_path(path, | ||
| &tablet_id, &schema_hash); | ||
| if (!is_valid) { | ||
| LOG(WARNING) << "unknown path:" << path; | ||
| continue; | ||
| } | ||
| if (tablet_id > 0 && schema_hash > 0) { | ||
| // tablet schema hash path or rowset file path | ||
| // gc thread should get tablet include deleted tablet | ||
| // or it will delete rowset file before tablet is garbage collected | ||
| RowsetId rowset_id = -1; | ||
| bool is_rowset_file = _tablet_manager->get_rowset_id_from_path(path, &rowset_id); | ||
| if (is_rowset_file) { | ||
| TabletSharedPtr tablet = _tablet_manager->get_tablet(tablet_id, schema_hash, true); | ||
| if (tablet != nullptr) { | ||
| bool valid = tablet->check_rowset_id(rowset_id); | ||
| if (!valid) { | ||
| // if the rowset id is less than tablet's initial end rowset id | ||
| // and the path is not in unused_rowsets, delete the path. | ||
| if (rowset_id < tablet->initial_end_rowset_id() | ||
| && !StorageEngine::instance()->check_rowset_id_in_unused_rowsets(rowset_id)) { | ||
| _process_garbage_path(path); | ||
| } | ||
| } | ||
| } | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. sleep every 1000 path?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. every path_gc_check_step paths, I will sleep 10ms, it is a config. |
||
| } | ||
| } | ||
| } | ||
| _all_check_paths.clear(); | ||
| LOG(INFO) << "finished one time path gc by rowsetid."; | ||
| } | ||
|
|
||
| // path producer | ||
| void DataDir::perform_path_scan() { | ||
| { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -839,6 +839,28 @@ bool Tablet::check_path(const std::string& path_to_check) { | |
| return true; | ||
| } | ||
| } | ||
| for (auto& inc_version_rowset : _inc_rs_version_map) { | ||
| bool ret = inc_version_rowset.second->check_path(path_to_check); | ||
| if (ret) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. maybe only check rowset id not the full path?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. OK, I will add check_rowset_id to do this |
||
| return true; | ||
| } | ||
| } | ||
| return false; | ||
| } | ||
|
|
||
| bool Tablet::check_rowset_id(RowsetId rowset_id) { | ||
| ReadLock rdlock(&_meta_lock); | ||
| for (auto& version_rowset : _rs_version_map) { | ||
| if (version_rowset.second->rowset_id() == rowset_id) { | ||
| return true; | ||
| } | ||
| } | ||
|
|
||
| for (auto& inc_version_rowset : _inc_rs_version_map) { | ||
| if (inc_version_rowset.second->rowset_id() == rowset_id) { | ||
| return true; | ||
| } | ||
| } | ||
| return false; | ||
| } | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
get_rowset_id_from_path should in rowset class?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think so, because this is a path pattern. And more here you can not get a rowset to get the rowsetid