From 35263c7013ea11a1c5fbba1a3768a04a10acaec1 Mon Sep 17 00:00:00 2001 From: zhengyu Date: Fri, 29 Nov 2024 10:40:01 +0800 Subject: [PATCH 1/2] [fix](cloud) serialize cache init to avoid unstable cache pick (#44429) The original paralleled cache init will causing unstable pick of cache base path because the choice depends on the order of init which could be different after each BE reboot. Thus, cause cache missing and duplicate cache block across multiple caches (disk space waste). This commit will serialize the init process of multiple cache and using fixed order, i.e. the order explicitly declared in be conf: file_cache_path. Signed-off-by: zhengyu --- be/src/runtime/exec_env_init.cpp | 29 ++++++++++------------------- 1 file changed, 10 insertions(+), 19 deletions(-) diff --git a/be/src/runtime/exec_env_init.cpp b/be/src/runtime/exec_env_init.cpp index b91adb4de9836e..d70bedbfe8ad6a 100644 --- a/be/src/runtime/exec_env_init.cpp +++ b/be/src/runtime/exec_env_init.cpp @@ -422,33 +422,24 @@ void ExecEnv::init_file_cache_factory(std::vector& cache_paths << ", reason=" << rest.msg(); exit(-1); } - std::vector file_cache_init_threads; - std::list cache_status; + doris::Status cache_status; for (auto& cache_path : cache_paths) { if (cache_path_set.find(cache_path.path) != cache_path_set.end()) { LOG(WARNING) << fmt::format("cache path {} is duplicate", cache_path.path); continue; } - file_cache_init_threads.emplace_back([&, status = &cache_status.emplace_back()]() { - *status = doris::io::FileCacheFactory::instance()->create_file_cache( - cache_path.path, cache_path.init_settings()); - }); - - cache_path_set.emplace(cache_path.path); - } - - for (std::thread& thread : file_cache_init_threads) { - if (thread.joinable()) { - thread.join(); - } - } - for (const auto& status : cache_status) { - if (!status.ok()) { - LOG(FATAL) << "failed to init file cache, err: " << status; - exit(-1); + cache_status = doris::io::FileCacheFactory::instance()->create_file_cache( + cache_path.path, cache_path.init_settings()); + if (!cache_status.ok()) { + if (!doris::config::ignore_broken_disk) { + LOG(FATAL) << "failed to init file cache, err: " << cache_status; + exit(-1); + } + LOG(WARNING) << "failed to init file cache, err: " << cache_status; } + cache_path_set.emplace(cache_path.path); } } From a46d5c6c5c6e505bcbfa9f4c3b1a9d60e5d40538 Mon Sep 17 00:00:00 2001 From: zhengyu Date: Tue, 3 Dec 2024 20:34:16 +0800 Subject: [PATCH 2/2] reponse to the reviewer Signed-off-by: zhengyu --- be/src/runtime/exec_env_init.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/be/src/runtime/exec_env_init.cpp b/be/src/runtime/exec_env_init.cpp index d70bedbfe8ad6a..93e6719e1ec3ff 100644 --- a/be/src/runtime/exec_env_init.cpp +++ b/be/src/runtime/exec_env_init.cpp @@ -434,10 +434,12 @@ void ExecEnv::init_file_cache_factory(std::vector& cache_paths cache_path.path, cache_path.init_settings()); if (!cache_status.ok()) { if (!doris::config::ignore_broken_disk) { - LOG(FATAL) << "failed to init file cache, err: " << cache_status; + LOG(FATAL) << "failed to init file cache, path: " << cache_path.path + << " err: " << cache_status; exit(-1); } - LOG(WARNING) << "failed to init file cache, err: " << cache_status; + LOG(WARNING) << "failed to init file cache, path: " << cache_path.path + << " err: " << cache_status; } cache_path_set.emplace(cache_path.path); }