From e281c5b52d7ded7302036f164b177d50d9efba58 Mon Sep 17 00:00:00 2001 From: huangkangping Date: Tue, 25 Jun 2019 12:46:10 +0800 Subject: [PATCH 1/3] add garbase collect by rowsetid --- be/src/olap/data_dir.cpp | 46 ++++++++++++++++++++++++++++++++++ be/src/olap/data_dir.h | 1 + be/src/olap/storage_engine.cpp | 22 +++++++++++++--- be/src/olap/storage_engine.h | 2 ++ be/src/olap/tablet.cpp | 6 +++++ be/src/olap/tablet.h | 5 ++++ be/src/olap/tablet_meta.cpp | 1 + be/src/olap/tablet_meta.h | 5 ++++ 8 files changed, 84 insertions(+), 4 deletions(-) diff --git a/be/src/olap/data_dir.cpp b/be/src/olap/data_dir.cpp index 75cfdcf558e1a5..121287dc459e95 100755 --- a/be/src/olap/data_dir.cpp +++ b/be/src/olap/data_dir.cpp @@ -932,6 +932,52 @@ void DataDir::perform_path_gc() { LOG(INFO) << "finished one time path gc."; } +void DataDir::perform_path_gc_by_rowsetid() { + // init the set of valid path + // validate the path in data dir + std::unique_lock lck(_check_path_mutex); + cv.wait(lck, [this]{return _all_check_paths.size() > 0;}); + LOG(INFO) << "start to path gc by rowsetid."; + int counter = 0; + for (auto& path : _all_check_paths) { + ++counter; + if (config::path_gc_check_step > 0 && counter % config::path_gc_check_step == 0) { + usleep(config::path_gc_check_step_interval_ms * 1000); + } + TTabletId tablet_id = -1; + TSchemaHash schema_hash = -1; + bool is_valid = _tablet_manager->get_tablet_id_and_schema_hash_from_path(path, + &tablet_id, &schema_hash); + if (!is_valid) { + LOG(WARNING) << "unknown path:" << path; + continue; + } + if (tablet_id > 0 && schema_hash > 0) { + // tablet schema hash path or rowset file path + // gc thread should get tablet include deleted tablet + // or it will delete rowset file before tablet is garbage collected + RowsetId rowset_id = -1; + bool is_rowset_file = _tablet_manager->get_rowset_id_from_path(path, &rowset_id); + if (is_rowset_file) { + TabletSharedPtr tablet = _tablet_manager->get_tablet(tablet_id, schema_hash, true); + if (tablet != nullptr) { + bool valid = tablet->check_path(path); + if (!valid) { + // if the rowset id is less than tablet's initial end rowset id + // and the path is not in unused_rowsets, delete the path. + if (rowset_id < tablet->initial_end_rowset_id() + && !StorageEngine::instance()->check_path_in_unused_rowsets(path)) { + _process_garbage_path(path); + } + } + } + } + } + } + _all_check_paths.clear(); + LOG(INFO) << "finished one time path gc by rowsetid."; +} + // path producer void DataDir::perform_path_scan() { { diff --git a/be/src/olap/data_dir.h b/be/src/olap/data_dir.h index 40a7f084527f14..44528279cf19ce 100644 --- a/be/src/olap/data_dir.h +++ b/be/src/olap/data_dir.h @@ -113,6 +113,7 @@ class DataDir { // this function will collect garbage paths scaned by last function void perform_path_gc(); + void perform_path_gc_by_rowsetid(); OLAPStatus remove_old_meta_and_files(); diff --git a/be/src/olap/storage_engine.cpp b/be/src/olap/storage_engine.cpp index f4871bd4db92a8..f15278f870ab0d 100644 --- a/be/src/olap/storage_engine.cpp +++ b/be/src/olap/storage_engine.cpp @@ -944,6 +944,20 @@ OLAPStatus StorageEngine::execute_task(EngineTask* task) { } } +bool StorageEngine::check_path_in_unused_rowsets(const string& path) { + _gc_mutex.lock(); + for (auto& _unused_rowset_pair : _unused_rowsets) { + if (_unused_rowset_pair.second->check_path(path)) { + LOG(INFO) << "path is found in unused rowsets, path:" << path; + _gc_mutex.unlock(); + return true; + } + } + LOG(INFO) << "path is not found in unused rowsets, path:" << path; + _gc_mutex.unlock(); + return false; +} + void* StorageEngine::_path_gc_thread_callback(void* arg) { #ifdef GOOGLE_PROFILER ProfilerRegisterThread(); @@ -959,12 +973,12 @@ void* StorageEngine::_path_gc_thread_callback(void* arg) { while (true) { LOG(INFO) << "try to perform path gc!"; - // TODO(ygl): stop gc temp because could not define all pending dirs currently - // ((DataDir*)arg)->perform_path_gc(); + // perform path gc by rowset id + ((DataDir*)arg)->perform_path_gc_by_rowsetid(); usleep(interval * 1000000); } - return NULL; + return nullptr; } void* StorageEngine::_path_scan_thread_callback(void* arg) { @@ -986,7 +1000,7 @@ void* StorageEngine::_path_scan_thread_callback(void* arg) { usleep(interval * 1000000); } - return NULL; + return nullptr; } } // namespace doris diff --git a/be/src/olap/storage_engine.h b/be/src/olap/storage_engine.h index fc5c1475401f95..a083feb57b25ad 100644 --- a/be/src/olap/storage_engine.h +++ b/be/src/olap/storage_engine.h @@ -193,6 +193,8 @@ class StorageEngine { TabletManager* tablet_manager() { return _tablet_manager.get(); } TxnManager* txn_manager() { return _txn_manager.get(); } + bool check_path_in_unused_rowsets(const string& path); + private: OLAPStatus check_all_root_path_cluster_id(); diff --git a/be/src/olap/tablet.cpp b/be/src/olap/tablet.cpp index 996eb77021113e..76f90a9a28d740 100644 --- a/be/src/olap/tablet.cpp +++ b/be/src/olap/tablet.cpp @@ -839,6 +839,12 @@ bool Tablet::check_path(const std::string& path_to_check) { return true; } } + for (auto& inc_version_rowset : _inc_rs_version_map) { + bool ret = inc_version_rowset.second->check_path(path_to_check); + if (ret) { + return true; + } + } return false; } diff --git a/be/src/olap/tablet.h b/be/src/olap/tablet.h index ca3d7fa9f59c36..05232302d39666 100644 --- a/be/src/olap/tablet.h +++ b/be/src/olap/tablet.h @@ -224,6 +224,10 @@ class Tablet : public std::enable_shared_from_this { OLAPStatus set_partition_id(int64_t partition_id); + RowsetId initial_end_rowset_id() { + return _tablet_meta->initial_end_rowset_id(); + } + private: void _print_missed_versions(const std::vector& missed_versions) const; OLAPStatus _check_added_rowset(const RowsetSharedPtr& rowset); @@ -248,6 +252,7 @@ class Tablet : public std::enable_shared_from_this { std::atomic _is_bad; // if this tablet is broken, set to true. default is false std::atomic _last_compaction_failure_time; // timestamp of last compaction failure + RowsetId _start_rowset_id; DISALLOW_COPY_AND_ASSIGN(Tablet); }; diff --git a/be/src/olap/tablet_meta.cpp b/be/src/olap/tablet_meta.cpp index b99de7135a6a9b..fdc79ec8963983 100644 --- a/be/src/olap/tablet_meta.cpp +++ b/be/src/olap/tablet_meta.cpp @@ -339,6 +339,7 @@ OLAPStatus TabletMeta::init_from_pb(const TabletMetaPB& tablet_meta_pb) { _cumulative_layer_point = tablet_meta_pb.cumulative_layer_point(); _tablet_uid = TabletUid(tablet_meta_pb.tablet_uid()); _end_rowset_id = tablet_meta_pb.end_rowset_id(); + _initial_end_rowset_id = tablet_meta_pb.end_rowset_id(); _next_rowset_id = _end_rowset_id + 1; // init _tablet_state diff --git a/be/src/olap/tablet_meta.h b/be/src/olap/tablet_meta.h index 3f11d6c181168d..9fbd8cbeb4bbf2 100644 --- a/be/src/olap/tablet_meta.h +++ b/be/src/olap/tablet_meta.h @@ -191,6 +191,10 @@ class TabletMeta { OLAPStatus set_partition_id(int64_t partition_id); + RowsetId initial_end_rowset_id() { + return _initial_end_rowset_id; + } + private: OLAPStatus _save_meta(DataDir* data_dir); @@ -205,6 +209,7 @@ class TabletMeta { TabletUid _tablet_uid; RowsetId _next_rowset_id = 10000; RowsetId _end_rowset_id; + RowsetId _initial_end_rowset_id; RowsetId _batch_interval = 10000; From f9b43085d39fdb2760273acc29007b046f6fdb4c Mon Sep 17 00:00:00 2001 From: huangkangping Date: Tue, 25 Jun 2019 14:49:57 +0800 Subject: [PATCH 2/3] check by rowset id --- be/src/common/config.h | 2 +- be/src/olap/data_dir.cpp | 4 ++-- be/src/olap/storage_engine.cpp | 9 +++++---- be/src/olap/storage_engine.h | 2 +- be/src/olap/tablet.cpp | 18 ++++++++++++++++++ be/src/olap/tablet.h | 4 +++- 6 files changed, 30 insertions(+), 9 deletions(-) diff --git a/be/src/common/config.h b/be/src/common/config.h index 2b4ee0c82cff13..4f6b98578e7d07 100644 --- a/be/src/common/config.h +++ b/be/src/common/config.h @@ -415,7 +415,7 @@ namespace config { // path gc CONF_Bool(path_gc_check, "true"); CONF_Int32(path_gc_check_interval_second, "1800"); - CONF_Int32(path_gc_check_step, "-1"); + CONF_Int32(path_gc_check_step, "1000"); CONF_Int32(path_gc_check_step_interval_ms, "10"); CONF_Int32(path_scan_interval_second, "1800"); } // namespace config diff --git a/be/src/olap/data_dir.cpp b/be/src/olap/data_dir.cpp index 121287dc459e95..4c4d9052840ca7 100755 --- a/be/src/olap/data_dir.cpp +++ b/be/src/olap/data_dir.cpp @@ -961,12 +961,12 @@ void DataDir::perform_path_gc_by_rowsetid() { if (is_rowset_file) { TabletSharedPtr tablet = _tablet_manager->get_tablet(tablet_id, schema_hash, true); if (tablet != nullptr) { - bool valid = tablet->check_path(path); + bool valid = tablet->check_rowset_id(rowset_id); if (!valid) { // if the rowset id is less than tablet's initial end rowset id // and the path is not in unused_rowsets, delete the path. if (rowset_id < tablet->initial_end_rowset_id() - && !StorageEngine::instance()->check_path_in_unused_rowsets(path)) { + && !StorageEngine::instance()->check_rowset_id_in_unused_rowsets(rowset_id)) { _process_garbage_path(path); } } diff --git a/be/src/olap/storage_engine.cpp b/be/src/olap/storage_engine.cpp index f15278f870ab0d..bf1d9314cc068f 100644 --- a/be/src/olap/storage_engine.cpp +++ b/be/src/olap/storage_engine.cpp @@ -944,16 +944,17 @@ OLAPStatus StorageEngine::execute_task(EngineTask* task) { } } -bool StorageEngine::check_path_in_unused_rowsets(const string& path) { +// check whether any unused rowsets's id equal to rowset_id +bool StorageEngine::check_rowset_id_in_unused_rowsets(RowsetId rowset_id) { _gc_mutex.lock(); for (auto& _unused_rowset_pair : _unused_rowsets) { - if (_unused_rowset_pair.second->check_path(path)) { - LOG(INFO) << "path is found in unused rowsets, path:" << path; + if (_unused_rowset_pair.second->rowset_id() == rowset_id) { + LOG(INFO) << "rowset is found in unused rowsets, rowset_id:" << rowset_id; _gc_mutex.unlock(); return true; } } - LOG(INFO) << "path is not found in unused rowsets, path:" << path; + LOG(INFO) << "rowset is not found in unused rowsets, rowset_id:" << rowset_id; _gc_mutex.unlock(); return false; } diff --git a/be/src/olap/storage_engine.h b/be/src/olap/storage_engine.h index a083feb57b25ad..99cd6aa307c94a 100644 --- a/be/src/olap/storage_engine.h +++ b/be/src/olap/storage_engine.h @@ -193,7 +193,7 @@ class StorageEngine { TabletManager* tablet_manager() { return _tablet_manager.get(); } TxnManager* txn_manager() { return _txn_manager.get(); } - bool check_path_in_unused_rowsets(const string& path); + bool check_rowset_id_in_unused_rowsets(RowsetId rowset_id); private: OLAPStatus check_all_root_path_cluster_id(); diff --git a/be/src/olap/tablet.cpp b/be/src/olap/tablet.cpp index 76f90a9a28d740..32c66fdbaebb92 100644 --- a/be/src/olap/tablet.cpp +++ b/be/src/olap/tablet.cpp @@ -848,6 +848,24 @@ bool Tablet::check_path(const std::string& path_to_check) { return false; } +bool Tablet::check_rowset_id(RowsetId rowset_id) { + bool ret = false; + for (auto& version_rowset : _rs_version_map) { + ret = version_rowset.second->rowset_id() == rowset_id; + if (ret) { + return true; + } + } + + for (auto& inc_version_rowset : _inc_rs_version_map) { + ret = inc_version_rowset.second->rowset_id() == rowset_id; + if (ret) { + return true; + } + } + return false; +} + // lock here, function that call next_rowset_id should not have meta lock OLAPStatus Tablet::next_rowset_id(RowsetId* id) { WriteLock wrlock(&_meta_lock); diff --git a/be/src/olap/tablet.h b/be/src/olap/tablet.h index 05232302d39666..fb34e1c381a8f1 100644 --- a/be/src/olap/tablet.h +++ b/be/src/olap/tablet.h @@ -219,6 +219,9 @@ class Tablet : public std::enable_shared_from_this { bool check_path(const std::string& check_path); + // check rowset_id is valid + bool check_rowset_id(RowsetId rowset_id); + OLAPStatus next_rowset_id(RowsetId* id); OLAPStatus set_next_rowset_id(RowsetId new_rowset_id); @@ -252,7 +255,6 @@ class Tablet : public std::enable_shared_from_this { std::atomic _is_bad; // if this tablet is broken, set to true. default is false std::atomic _last_compaction_failure_time; // timestamp of last compaction failure - RowsetId _start_rowset_id; DISALLOW_COPY_AND_ASSIGN(Tablet); }; From af3ff7a7a0c7ff83eb78d29cb6949c6192c60b67 Mon Sep 17 00:00:00 2001 From: huangkangping Date: Tue, 25 Jun 2019 15:48:19 +0800 Subject: [PATCH 3/3] fix pr problem --- be/src/olap/storage_engine.cpp | 2 -- be/src/olap/tablet.cpp | 8 +++----- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/be/src/olap/storage_engine.cpp b/be/src/olap/storage_engine.cpp index bf1d9314cc068f..c0e74945743619 100644 --- a/be/src/olap/storage_engine.cpp +++ b/be/src/olap/storage_engine.cpp @@ -949,12 +949,10 @@ bool StorageEngine::check_rowset_id_in_unused_rowsets(RowsetId rowset_id) { _gc_mutex.lock(); for (auto& _unused_rowset_pair : _unused_rowsets) { if (_unused_rowset_pair.second->rowset_id() == rowset_id) { - LOG(INFO) << "rowset is found in unused rowsets, rowset_id:" << rowset_id; _gc_mutex.unlock(); return true; } } - LOG(INFO) << "rowset is not found in unused rowsets, rowset_id:" << rowset_id; _gc_mutex.unlock(); return false; } diff --git a/be/src/olap/tablet.cpp b/be/src/olap/tablet.cpp index 32c66fdbaebb92..a39a8742543d92 100644 --- a/be/src/olap/tablet.cpp +++ b/be/src/olap/tablet.cpp @@ -849,17 +849,15 @@ bool Tablet::check_path(const std::string& path_to_check) { } bool Tablet::check_rowset_id(RowsetId rowset_id) { - bool ret = false; + ReadLock rdlock(&_meta_lock); for (auto& version_rowset : _rs_version_map) { - ret = version_rowset.second->rowset_id() == rowset_id; - if (ret) { + if (version_rowset.second->rowset_id() == rowset_id) { return true; } } for (auto& inc_version_rowset : _inc_rs_version_map) { - ret = inc_version_rowset.second->rowset_id() == rowset_id; - if (ret) { + if (inc_version_rowset.second->rowset_id() == rowset_id) { return true; } }