From a779eede37777202c706953112a9ec82dc501c08 Mon Sep 17 00:00:00 2001 From: hui lai <1353307710@qq.com> Date: Thu, 25 Jul 2024 09:44:32 +0800 Subject: [PATCH] [fix](multi table) fix single stream multi table memory leak (#38255) We meet OOM when using single stream multi table ![image](https://github.com/user-attachments/assets/748e9914-d591-4f41-8b28-412d3cecc841) It exist memory leak, and heap profile like: ![image](https://github.com/user-attachments/assets/af30c593-88ea-44f6-bba1-82436b13f99f) The stream load context will not release in some exception conditions as plan failed for high concurrency causing timeout when obtaining read lock. It is introduced by https://github.com/apache/doris/pull/35458 The solution effect is shown in the following figure, which can run stably with a small amount of memory ![image](https://github.com/user-attachments/assets/4483e0a5-6c0c-4cdc-b8ed-3408da6a86b2) --- be/src/io/fs/multi_table_pipe.cpp | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/be/src/io/fs/multi_table_pipe.cpp b/be/src/io/fs/multi_table_pipe.cpp index fa38b6440c1b1a..4469174211e9e8 100644 --- a/be/src/io/fs/multi_table_pipe.cpp +++ b/be/src/io/fs/multi_table_pipe.cpp @@ -324,6 +324,19 @@ void MultiTablePipe::_handle_consumer_finished() { _ctx->number_filtered_rows = _number_filtered_rows; _ctx->number_unselected_rows = _number_unselected_rows; _ctx->commit_infos = _tablet_commit_infos; + + // remove ctx to avoid memory leak. + for (const auto& pair : _planned_tables) { + if (pair.second) { + doris::ExecEnv::GetInstance()->new_load_stream_mgr()->remove(pair.second->id); + } + } + for (const auto& pair : _unplanned_tables) { + if (pair.second) { + doris::ExecEnv::GetInstance()->new_load_stream_mgr()->remove(pair.second->id); + } + } + LOG(INFO) << "all plan for multi-table load complete. number_total_rows=" << _ctx->number_total_rows << " number_loaded_rows=" << _ctx->number_loaded_rows << " number_filtered_rows=" << _ctx->number_filtered_rows