From 91fd95160c03d4050e647802bce9e6babed8aab6 Mon Sep 17 00:00:00 2001 From: Yukang-Lian Date: Mon, 1 Apr 2024 20:59:27 +0800 Subject: [PATCH 1/4] Problem: When the process stops, there is a heap use after free error with the WAL manager. Reason: During the startup process, if the storage engine does not initialize successfully and the main program directly returns 0, the WAL manager, which was created during initialization and started a thread to periodically check disk space, will encounter an issue. When the program exits and returns 0, local variables are destroyed first before the thread is properly terminated. If the thread attempts to access those local variables at this point, it leads to a heap use after free error. Solution: Ensure that the thread for periodically checking disk space is only started after the storage engine has been successfully initialized. --- be/src/olap/wal/wal_manager.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/be/src/olap/wal/wal_manager.cpp b/be/src/olap/wal/wal_manager.cpp index 06937a32b81dc5..1bd083992b73b9 100644 --- a/be/src/olap/wal/wal_manager.cpp +++ b/be/src/olap/wal/wal_manager.cpp @@ -474,6 +474,9 @@ Status WalManager::update_wal_dir_estimated_wal_bytes(const std::string& wal_dir } Status WalManager::_update_wal_dir_info_thread() { + while (!ExecEnv::ready()) { + sleep(1); + } while (!_stop.load()) { static_cast(_wal_dirs_info->update_all_wal_dir_limit()); static_cast(_wal_dirs_info->update_all_wal_dir_used()); From 338a79580c8c92a5ae9e8036aa76196a09fede02 Mon Sep 17 00:00:00 2001 From: Yukang-Lian Date: Tue, 2 Apr 2024 15:15:00 +0800 Subject: [PATCH 2/4] 2 --- be/src/olap/wal/wal_manager.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/be/src/olap/wal/wal_manager.cpp b/be/src/olap/wal/wal_manager.cpp index 1bd083992b73b9..02176b7c064d14 100644 --- a/be/src/olap/wal/wal_manager.cpp +++ b/be/src/olap/wal/wal_manager.cpp @@ -475,7 +475,8 @@ Status WalManager::update_wal_dir_estimated_wal_bytes(const std::string& wal_dir Status WalManager::_update_wal_dir_info_thread() { while (!ExecEnv::ready()) { - sleep(1); + LOG(INFO) << "Sleep 1s to wait storage engine init."; + std::this_thread::sleep_for(std::chrono::milliseconds(1000)); } while (!_stop.load()) { static_cast(_wal_dirs_info->update_all_wal_dir_limit()); From d0ceb0854232a5965275ee77a5537adfebe81a47 Mon Sep 17 00:00:00 2001 From: Yukang-Lian Date: Thu, 4 Apr 2024 22:13:19 +0800 Subject: [PATCH 3/4] 3 --- be/src/olap/wal/wal_manager.cpp | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/be/src/olap/wal/wal_manager.cpp b/be/src/olap/wal/wal_manager.cpp index 02176b7c064d14..ae3e72024c3715 100644 --- a/be/src/olap/wal/wal_manager.cpp +++ b/be/src/olap/wal/wal_manager.cpp @@ -474,10 +474,16 @@ Status WalManager::update_wal_dir_estimated_wal_bytes(const std::string& wal_dir } Status WalManager::_update_wal_dir_info_thread() { - while (!ExecEnv::ready()) { - LOG(INFO) << "Sleep 1s to wait storage engine init."; + int wait_time = 0; + while (!ExecEnv::ready() && !_stop.load()) { + LOG(INFO) << "Sleep 1s to wait for storage engine init."; std::this_thread::sleep_for(std::chrono::milliseconds(1000)); + wait_time++; + if (wait_time == 5) { + return Status::TimedOut("Init update wal dir info thread timeout."); + } } + while (!_stop.load()) { static_cast(_wal_dirs_info->update_all_wal_dir_limit()); static_cast(_wal_dirs_info->update_all_wal_dir_used()); From d9946a8732a8b3a4ff24ab5a93a023182d7885c7 Mon Sep 17 00:00:00 2001 From: Yukang-Lian Date: Tue, 13 Aug 2024 15:25:07 +0800 Subject: [PATCH 4/4] 4 --- be/src/olap/wal/wal_manager.cpp | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/be/src/olap/wal/wal_manager.cpp b/be/src/olap/wal/wal_manager.cpp index ae3e72024c3715..a7e33e7383f597 100644 --- a/be/src/olap/wal/wal_manager.cpp +++ b/be/src/olap/wal/wal_manager.cpp @@ -474,17 +474,12 @@ Status WalManager::update_wal_dir_estimated_wal_bytes(const std::string& wal_dir } Status WalManager::_update_wal_dir_info_thread() { - int wait_time = 0; - while (!ExecEnv::ready() && !_stop.load()) { - LOG(INFO) << "Sleep 1s to wait for storage engine init."; - std::this_thread::sleep_for(std::chrono::milliseconds(1000)); - wait_time++; - if (wait_time == 5) { - return Status::TimedOut("Init update wal dir info thread timeout."); - } - } - while (!_stop.load()) { + if (!ExecEnv::ready()) { + LOG(INFO) << "Sleep 1s to wait for storage engine init."; + std::this_thread::sleep_for(std::chrono::milliseconds(1000)); + continue; + } static_cast(_wal_dirs_info->update_all_wal_dir_limit()); static_cast(_wal_dirs_info->update_all_wal_dir_used()); LOG_EVERY_N(INFO, 100) << "Scheduled(every 10s) WAL info: " << get_wal_dirs_info_string();