From f55064622f56d347a9b369772575c6d6be3b6eaa Mon Sep 17 00:00:00 2001 From: yiguolei Date: Thu, 20 Aug 2020 13:11:41 +0800 Subject: [PATCH 1/3] [bugfix] Some deleted tablets are not recycled on BE and occupy too much disk space! #4400 --- be/src/olap/data_dir.cpp | 61 +++++++++++++++++++++----- be/src/olap/data_dir.h | 5 ++- be/src/olap/olap_server.cpp | 5 ++- be/src/olap/tablet_manager.cpp | 54 +++++++++++++++++++++++ be/src/olap/tablet_manager.h | 9 ++++ be/src/olap/task/engine_clone_task.cpp | 8 ++++ be/src/olap/task/engine_clone_task.h | 2 + 7 files changed, 130 insertions(+), 14 deletions(-) diff --git a/be/src/olap/data_dir.cpp b/be/src/olap/data_dir.cpp index 1ecd24e3079945..9fedead06690a8 100644 --- a/be/src/olap/data_dir.cpp +++ b/be/src/olap/data_dir.cpp @@ -759,6 +759,55 @@ void DataDir::remove_pending_ids(const std::string& id) { _pending_path_ids.erase(id); } +// gc unused tablet schemahash dir +void DataDir::perform_path_gc_by_tablet() { + std::unique_lock lck(_check_path_mutex); + _cv.wait(lck, [this] { return _stop_bg_worker || !_all_tablet_schemahash_paths.empty(); }); + if (_stop_bg_worker) { + return; + } + LOG(INFO) << "start to path gc by tablet schemahash."; + int counter = 0; + for (auto& path : _all_tablet_schemahash_paths) { + ++counter; + if (config::path_gc_check_step > 0 && counter % config::path_gc_check_step == 0) { + SleepFor(MonoDelta::FromMilliseconds(config::path_gc_check_step_interval_ms)); + } + TTabletId tablet_id = -1; + TSchemaHash schema_hash = -1; + bool is_valid = _tablet_manager->get_tablet_id_and_schema_hash_from_path(path, &tablet_id, + &schema_hash); + if (!is_valid) { + LOG(WARNING) << "unknown path:" << path; + continue; + } + // should not happen, because already check it is a valid tablet schema hash path in previous step + // so that log fatal here + if (tablet_id < 1 || schema_hash < 1) { + LOG(WARN) << "invalid tablet id " << tablet_id << " or schema hash " << schema_hash + << ", path=" << path; + continue; + } + TabletSharedPtr tablet = _tablet_manager->get_tablet(tablet_id, schema_hash); + if (tablet != nullptr) { + // could find the tablet, then skip check it + continue; + } + boost::filesystem::path tablet_path(path); + boost::filesystem::path data_dir_path = + tablet_path.parent_path().parent_path().parent_path().parent_path(); + std::string data_dir_string = data_dir_path.string(); + DataDir* data_dir = StorageEngine::instance()->get_store(data_dir_string); + if (data_dir == nullptr) { + LOG(WARNING) << "could not find data dir for tablet path " << path; + continue; + } + _tablet_manager->try_delete_unused_tablet_path(data_dir, tablet_id, schema_hash, path); + } + _all_tablet_schemahash_paths.clear(); + LOG(INFO) << "finished one time path gc by tablet."; +} + void DataDir::perform_path_gc_by_rowsetid() { // init the set of valid path // validate the path in data dir @@ -833,7 +882,6 @@ void DataDir::perform_path_scan() { } for (const auto& tablet_id : tablet_ids) { std::string tablet_id_path = shard_path + "/" + tablet_id; - _all_check_paths.insert(tablet_id_path); std::set schema_hashes; ret = FileUtils::list_dirs_files(tablet_id_path, &schema_hashes, nullptr, Env::Default()); @@ -844,7 +892,7 @@ void DataDir::perform_path_scan() { } for (const auto& schema_hash : schema_hashes) { std::string tablet_schema_hash_path = tablet_id_path + "/" + schema_hash; - _all_check_paths.insert(tablet_schema_hash_path); + _all_tablet_schemahash_paths.insert(tablet_schema_hash_path); std::set rowset_files; ret = FileUtils::list_dirs_files(tablet_schema_hash_path, nullptr, @@ -879,15 +927,6 @@ bool DataDir::_check_pending_ids(const std::string& id) { return _pending_path_ids.find(id) != _pending_path_ids.end(); } -void DataDir::_remove_check_paths_no_lock(const std::set& paths) { - for (const auto& path : paths) { - auto path_iter = _all_check_paths.find(path); - if (path_iter != _all_check_paths.end()) { - _all_check_paths.erase(path_iter); - } - } -} - Status DataDir::update_capacity() { try { boost::filesystem::path path_name(_path); diff --git a/be/src/olap/data_dir.h b/be/src/olap/data_dir.h index e70083210394cf..d151fdd6a28573 100644 --- a/be/src/olap/data_dir.h +++ b/be/src/olap/data_dir.h @@ -106,6 +106,8 @@ class DataDir { void perform_path_gc_by_rowsetid(); + void perform_path_gc_by_tablet(); + OLAPStatus remove_old_meta_and_files(); bool convert_old_data_success(); @@ -138,8 +140,6 @@ class DataDir { OLAPStatus _clean_unfinished_converting_data(); OLAPStatus _convert_old_tablet(); - void _remove_check_paths_no_lock(const std::set& paths); - void _process_garbage_path(const std::string& path); void _remove_check_paths(const std::set& paths); @@ -182,6 +182,7 @@ class DataDir { std::mutex _check_path_mutex; std::condition_variable _cv; std::set _all_check_paths; + std::set _all_tablet_schemahash_paths; RWMutex _pending_path_mutex; std::set _pending_path_ids; diff --git a/be/src/olap/olap_server.cpp b/be/src/olap/olap_server.cpp index dac000af51757d..8948e7ab986257 100644 --- a/be/src/olap/olap_server.cpp +++ b/be/src/olap/olap_server.cpp @@ -371,7 +371,10 @@ void* StorageEngine::_path_gc_thread_callback(void* arg) { LOG(INFO) << "try to start path gc thread!"; while (!_stop_bg_worker) { - LOG(INFO) << "try to perform path gc!"; + LOG(INFO) << "try to perform path gc by tablet!"; + ((DataDir*)arg)->perform_path_gc_by_tablet(); + + LOG(INFO) << "try to perform path gc by rowsetid!"; // perform path gc by rowset id ((DataDir*)arg)->perform_path_gc_by_rowsetid(); diff --git a/be/src/olap/tablet_manager.cpp b/be/src/olap/tablet_manager.cpp index 96af2c3a38b563..7df1e5dcc74b31 100644 --- a/be/src/olap/tablet_manager.cpp +++ b/be/src/olap/tablet_manager.cpp @@ -635,6 +635,8 @@ TabletSharedPtr TabletManager::get_tablet(TTabletId tablet_id, SchemaHash schema bool TabletManager::get_tablet_id_and_schema_hash_from_path( const string& path, TTabletId* tablet_id, TSchemaHash* schema_hash) { static re2::RE2 normal_re("/data/\\d+/(\\d+)/(\\d+)($|/)"); + // match tablet schema hash data path, for example, the path is /data/1/16791/29998 + // 1 is shard id , 16791 is tablet id, 29998 is schema hash if (RE2::PartialMatch(path, normal_re, tablet_id, schema_hash)) { return true; } @@ -812,6 +814,12 @@ OLAPStatus TabletManager::load_tablet_from_meta(DataDir* data_dir, TTabletId tab return OLAP_ERR_TABLE_CREATE_FROM_HEADER_ERROR; } + // check if the tablet path exists since the path maybe deleted by gc thread + if (!Env::Default()->path_exists(tablet->tablet_path()).ok()) { + LOG(WARNING) << "tablet path not exists, create tablet failed, path=" << tablet->tablet_path(); + return OLAP_ERR_TABLE_ALREADY_DELETED_ERROR; + } + if (tablet_meta->tablet_state() == TABLET_SHUTDOWN) { LOG(INFO) << "fail to load tablet because it is to be deleted. tablet_id=" << tablet_id << " schema_hash=" << schema_hash << ", path=" << data_dir->path(); @@ -1086,6 +1094,52 @@ OLAPStatus TabletManager::start_trash_sweep() { return OLAP_SUCCESS; } // start_trash_sweep +void TabletManager::register_clone_tablet(int64_t tablet_id) { + RWMutex& tablet_map_lock = _get_tablet_map_lock(tablet_id); + ReadLock rlock(&tablet_map_lock); + _tablets_under_clone.insert(tablet_id); +} + +void TabletManager::unregister_clone_tablet(int64_t tablet_id) { + RWMutex& tablet_map_lock = _get_tablet_map_lock(tablet_id); + ReadLock rlock(&tablet_map_lock); + _tablets_under_clone.erase(tablet_id); +} + +void TabletManager::try_delete_unused_tablet_path(DataDir* data_dir, TTabletId tablet_id, + SchemaHash schema_hash, const string& schema_hash_path) { + // acquire the read lock, so that there is no creating tablet or load tablet from meta tasks + // create tablet and load tablet task should check whether the dir exists + RWMutex& tablet_map_lock = _get_tablet_map_lock(tablet_id); + ReadLock rlock(&tablet_map_lock); + + // check if meta already exists + TabletMetaSharedPtr tablet_meta(new TabletMeta()); + OLAPStatus check_st = TabletMetaManager::get_meta(data_dir, tablet_id, + schema_hash, tablet_meta); + if (check_st == OLAP_SUCCESS) { + LOG(INFO) << "tablet meta exist is meta store, skip delete the path " << schema_hash_path; + return; + } + + if (_tablets_under_clone.count(tablet_id) > 0) { + LOG(INFO) << "tablet is under clone, skip delete the path " << schema_hash_path; + return; + } + + // TODO(ygl): may do other checks in the future + if (Env::Default()->path_exists(schema_hash_path).ok()) { + LOG(INFO) << "start to move tablet to trash. tablet_path = " << schema_hash_path; + OLAPStatus rm_st = move_to_trash(schema_hash_path, schema_hash_path); + if (rm_st != OLAP_SUCCESS) { + LOG(WARNING) << "fail to move dir to trash. dir=" << schema_hash_path; + } else { + LOG(INFO) << "move path " << schema_hash_path << " to trash successfully"; + } + } + return; +} + bool TabletManager::try_schema_change_lock(TTabletId tablet_id) { bool res = false; VLOG(3) << "try_schema_change_lock begin. tablet_id=" << tablet_id; diff --git a/be/src/olap/tablet_manager.h b/be/src/olap/tablet_manager.h index d556377f4f9b4a..ea4de8c69e96c7 100644 --- a/be/src/olap/tablet_manager.h +++ b/be/src/olap/tablet_manager.h @@ -125,6 +125,9 @@ class TabletManager { // Prevent schema change executed concurrently. bool try_schema_change_lock(TTabletId tablet_id); + void try_delete_unused_tablet_path(DataDir* data_dir, TTabletId tablet_id, + SchemaHash schema_hash, const string& schema_hash_path); + void update_root_path_info(std::map* path_map, size_t* tablet_counter); @@ -134,6 +137,9 @@ class TabletManager { void obtain_all_tablets(vector &tablets_info); + void register_clone_tablet(int64_t tablet_id); + void unregister_clone_tablet(int64_t tablet_id); + private: // Add a tablet pointer to StorageEngine // If force, drop the existing tablet add this new one @@ -221,6 +227,9 @@ class TabletManager { int64_t _last_update_stat_ms; inline tablet_map_t& _get_tablet_map(TTabletId tablet_id); + + std::set _tablets_under_clone; + std::set _tablets_under_restore; }; inline RWMutex& TabletManager::_get_tablet_map_lock(TTabletId tabletId) { diff --git a/be/src/olap/task/engine_clone_task.cpp b/be/src/olap/task/engine_clone_task.cpp index 9f951e396d9f07..ba4a6c753f5c8d 100644 --- a/be/src/olap/task/engine_clone_task.cpp +++ b/be/src/olap/task/engine_clone_task.cpp @@ -63,6 +63,14 @@ EngineCloneTask::EngineCloneTask(const TCloneReq& clone_req, _master_info(master_info) {} OLAPStatus EngineCloneTask::execute() { + // register the tablet to avoid it is deleted by gc thread during clone process + StorageEngine::instance()->tablet_manager()->register_clone_tablet(_clone_req.tablet_id); + OLAPStatus st = _do_clone(); + StorageEngine::instance()->tablet_manager()->unregister_clone_tablet(_clone_req.tablet_id); + return st; +} + +OLAPStatus EngineCloneTask::_do_clone() { AgentStatus status = DORIS_SUCCESS; string src_file_path; TBackend src_host; diff --git a/be/src/olap/task/engine_clone_task.h b/be/src/olap/task/engine_clone_task.h index 52d1d1ccba97d8..57a8e2eb9b05e5 100644 --- a/be/src/olap/task/engine_clone_task.h +++ b/be/src/olap/task/engine_clone_task.h @@ -45,6 +45,8 @@ class EngineCloneTask : public EngineTask { private: + OLAPStatus _do_clone(); + virtual OLAPStatus _finish_clone(Tablet* tablet, const std::string& clone_dir, int64_t committed_version, bool is_incremental_clone); From e5f25f55089c0523dc8f2d420f50cef35bb5bf0e Mon Sep 17 00:00:00 2001 From: yiguolei Date: Thu, 20 Aug 2020 13:11:41 +0800 Subject: [PATCH 2/3] [bugfix] Some deleted tablets are not recycled on BE and occupy too much disk space! #4400 --- be/src/olap/tablet_manager.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/be/src/olap/tablet_manager.cpp b/be/src/olap/tablet_manager.cpp index 7df1e5dcc74b31..2290cf205855b1 100644 --- a/be/src/olap/tablet_manager.cpp +++ b/be/src/olap/tablet_manager.cpp @@ -1096,13 +1096,13 @@ OLAPStatus TabletManager::start_trash_sweep() { void TabletManager::register_clone_tablet(int64_t tablet_id) { RWMutex& tablet_map_lock = _get_tablet_map_lock(tablet_id); - ReadLock rlock(&tablet_map_lock); + WriteLock wlock(&tablet_map_lock); _tablets_under_clone.insert(tablet_id); } void TabletManager::unregister_clone_tablet(int64_t tablet_id) { RWMutex& tablet_map_lock = _get_tablet_map_lock(tablet_id); - ReadLock rlock(&tablet_map_lock); + WriteLock wlock(&tablet_map_lock); _tablets_under_clone.erase(tablet_id); } From 8b7a0a09b8738b30980c519ff97866a157d44921 Mon Sep 17 00:00:00 2001 From: yiguolei Date: Wed, 26 Aug 2020 13:11:06 +0800 Subject: [PATCH 3/3] Some deleted tablets are not recycled on BE and occupy too much disk space! #4400 --- be/src/olap/data_dir.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/be/src/olap/data_dir.cpp b/be/src/olap/data_dir.cpp index 9fedead06690a8..f24567793baa15 100644 --- a/be/src/olap/data_dir.cpp +++ b/be/src/olap/data_dir.cpp @@ -784,7 +784,7 @@ void DataDir::perform_path_gc_by_tablet() { // should not happen, because already check it is a valid tablet schema hash path in previous step // so that log fatal here if (tablet_id < 1 || schema_hash < 1) { - LOG(WARN) << "invalid tablet id " << tablet_id << " or schema hash " << schema_hash + LOG(WARNING) << "invalid tablet id " << tablet_id << " or schema hash " << schema_hash << ", path=" << path; continue; }