From 1d6c53a90438dc73fb44be358d09eee118e811d1 Mon Sep 17 00:00:00 2001 From: airborne12 Date: Tue, 15 Jul 2025 00:19:48 +0800 Subject: [PATCH 1/8] [fix](inverted index) fix index memeory leak for inverted index --- .../rowset/segment_v2/index_storage_format_v2.cpp | 3 +-- .../segment_v2/inverted_index_fs_directory.cpp | 7 ++++--- .../rowset/segment_v2/inverted_index_fs_directory.h | 2 +- .../rowset/segment_v2/inverted_index_searcher.cpp | 12 +++++++----- conf/lsan_suppr.conf | 6 ------ 5 files changed, 13 insertions(+), 17 deletions(-) diff --git a/be/src/olap/rowset/segment_v2/index_storage_format_v2.cpp b/be/src/olap/rowset/segment_v2/index_storage_format_v2.cpp index e9db5f2345564e..779d96c23987ba 100644 --- a/be/src/olap/rowset/segment_v2/index_storage_format_v2.cpp +++ b/be/src/olap/rowset/segment_v2/index_storage_format_v2.cpp @@ -187,8 +187,7 @@ IndexStorageFormatV2::create_output_stream() { DCHECK(_index_file_writer->_idx_v2_writer != nullptr) << "inverted index file writer v2 is nullptr"; - auto compound_file_output = std::unique_ptr( - out_dir->createOutputV2(_index_file_writer->_idx_v2_writer.get())); + auto compound_file_output = out_dir->createOutputV2(_index_file_writer->_idx_v2_writer.get()); return {std::move(out_dir_ptr), std::move(compound_file_output)}; } diff --git a/be/src/olap/rowset/segment_v2/inverted_index_fs_directory.cpp b/be/src/olap/rowset/segment_v2/inverted_index_fs_directory.cpp index 856c899943902a..3cd5b96ff86ae5 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_fs_directory.cpp +++ b/be/src/olap/rowset/segment_v2/inverted_index_fs_directory.cpp @@ -693,10 +693,11 @@ lucene::store::IndexOutput* DorisFSDirectory::createOutput(const char* name) { return ret; } -lucene::store::IndexOutput* DorisFSDirectory::createOutputV2(io::FileWriter* file_writer) { - auto* ret = _CLNEW FSIndexOutputV2(); +std::unique_ptr DorisFSDirectory::createOutputV2( + io::FileWriter* file_writer) { + auto ret = std::make_unique(); ret->init(file_writer); - return ret; + return std::move(ret); } std::string DorisFSDirectory::toString() const { diff --git a/be/src/olap/rowset/segment_v2/inverted_index_fs_directory.h b/be/src/olap/rowset/segment_v2/inverted_index_fs_directory.h index 348db0bd297a45..f4704615fcdd47 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_fs_directory.h +++ b/be/src/olap/rowset/segment_v2/inverted_index_fs_directory.h @@ -79,7 +79,7 @@ class CLUCENE_EXPORT DorisFSDirectory : public lucene::store::Directory { void renameFile(const char* from, const char* to) override; void touchFile(const char* name) override; lucene::store::IndexOutput* createOutput(const char* name) override; - lucene::store::IndexOutput* createOutputV2(io::FileWriter* file_writer); + std::unique_ptr createOutputV2(io::FileWriter* file_writer); void close() override; std::string toString() const override; static const char* getClassName(); diff --git a/be/src/olap/rowset/segment_v2/inverted_index_searcher.cpp b/be/src/olap/rowset/segment_v2/inverted_index_searcher.cpp index 8d56b913b31c67..914069a476649b 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_searcher.cpp +++ b/be/src/olap/rowset/segment_v2/inverted_index_searcher.cpp @@ -21,6 +21,7 @@ #include #include "common/config.h" +#include "olap/rowset/segment_v2/inverted_index_common.h" #include "olap/rowset/segment_v2/inverted_index_compound_reader.h" #include "olap/rowset/segment_v2/inverted_index_desc.h" #include "olap/rowset/segment_v2/inverted_index_fs_directory.h" @@ -29,10 +30,11 @@ namespace doris::segment_v2 { Status FulltextIndexSearcherBuilder::build(lucene::store::Directory* directory, OptionalIndexSearcherPtr& output_searcher) { auto close_directory = true; - lucene::index::IndexReader* reader = nullptr; + std::unique_ptr reader; + std::unique_ptr directory_ptr(directory); try { - reader = lucene::index::IndexReader::open( - directory, config::inverted_index_read_buffer_size, close_directory); + reader = std::unique_ptr(lucene::index::IndexReader::open( + directory, config::inverted_index_read_buffer_size, close_directory)); } catch (const CLuceneError& e) { std::vector file_names; directory->list(&file_names); @@ -44,7 +46,8 @@ Status FulltextIndexSearcherBuilder::build(lucene::store::Directory* directory, return Status::Error(msg); } bool close_reader = true; - auto index_searcher = std::make_shared(reader, close_reader); + auto index_searcher = + std::make_shared(reader.release(), close_reader); if (!index_searcher) { output_searcher = std::nullopt; return Status::Error( @@ -53,7 +56,6 @@ Status FulltextIndexSearcherBuilder::build(lucene::store::Directory* directory, reader_size = reader->getTermInfosRAMUsed(); // NOTE: need to cl_refcount-- here, so that directory will be deleted when // index_searcher is destroyed - _CLDECDELETE(directory) output_searcher = index_searcher; return Status::OK(); } diff --git a/conf/lsan_suppr.conf b/conf/lsan_suppr.conf index 953cad02f1c1a8..34de582ebe5a56 100644 --- a/conf/lsan_suppr.conf +++ b/conf/lsan_suppr.conf @@ -20,9 +20,3 @@ leak:brpc leak:libjvm leak:libzip leak:*_dl_map_object_deps* - -# Known leaks about index -leak:lucene::index::IndexReader::open -leak:lucene::index::IndexWriter::IndexWriter -leak:doris::segment_v2::IndexFileReader::open -leak:doris::segment_v2::IndexFileWriter::close From 9a8b85ede95fe94ca02f8decdcabe45b960bc2a1 Mon Sep 17 00:00:00 2001 From: airborne12 Date: Tue, 15 Jul 2025 11:18:54 +0800 Subject: [PATCH 2/8] [fix](inverted index) fix index memeory leak and refact some code --- be/src/olap/rowset/segment_v2/inverted_index_searcher.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/be/src/olap/rowset/segment_v2/inverted_index_searcher.cpp b/be/src/olap/rowset/segment_v2/inverted_index_searcher.cpp index 914069a476649b..38978d01b2722c 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_searcher.cpp +++ b/be/src/olap/rowset/segment_v2/inverted_index_searcher.cpp @@ -46,6 +46,7 @@ Status FulltextIndexSearcherBuilder::build(lucene::store::Directory* directory, return Status::Error(msg); } bool close_reader = true; + reader_size = reader->getTermInfosRAMUsed(); auto index_searcher = std::make_shared(reader.release(), close_reader); if (!index_searcher) { @@ -53,7 +54,6 @@ Status FulltextIndexSearcherBuilder::build(lucene::store::Directory* directory, return Status::Error( "FulltextIndexSearcherBuilder build index_searcher error."); } - reader_size = reader->getTermInfosRAMUsed(); // NOTE: need to cl_refcount-- here, so that directory will be deleted when // index_searcher is destroyed output_searcher = index_searcher; From 38c8044efb9d5f28a24ed58a38fe74c8bac7f61d Mon Sep 17 00:00:00 2001 From: airborne12 Date: Tue, 15 Jul 2025 11:24:21 +0800 Subject: [PATCH 3/8] [fix](inverted index) fix index memeory leak and refact some code --- .../rowset/segment_v2/inverted_index_fs_directory_test.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/be/test/olap/rowset/segment_v2/inverted_index_fs_directory_test.cpp b/be/test/olap/rowset/segment_v2/inverted_index_fs_directory_test.cpp index ac47888ac33657..99ac4d34b41fbc 100644 --- a/be/test/olap/rowset/segment_v2/inverted_index_fs_directory_test.cpp +++ b/be/test/olap/rowset/segment_v2/inverted_index_fs_directory_test.cpp @@ -556,7 +556,7 @@ TEST_F(DorisFSDirectoryTest, FSIndexOutputV2FlushBufferError) { Status s = _fs->create_file(file_path, &writer); EXPECT_TRUE(s.ok()); - auto* output = _directory->createOutputV2(writer.get()); + auto output = _directory->createOutputV2(writer.get()); // Write small chunks to fill the buffer and trigger flush // BufferedIndexOutput buffer size is 1024 bytes @@ -573,7 +573,6 @@ TEST_F(DorisFSDirectoryTest, FSIndexOutputV2FlushBufferError) { } catch (...) { // Ignore close errors in cleanup } - delete output; } // Test 38: FSIndexOutputV2 flushBuffer with null writer From e8be39e3bd348db1f591d2494be02fad6e9bf864 Mon Sep 17 00:00:00 2001 From: airborne12 Date: Tue, 15 Jul 2025 11:43:15 +0800 Subject: [PATCH 4/8] [fix](inverted index) fix index memeory leak and refact some code --- be/src/olap/rowset/segment_v2/inverted_index_fs_directory.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/be/src/olap/rowset/segment_v2/inverted_index_fs_directory.cpp b/be/src/olap/rowset/segment_v2/inverted_index_fs_directory.cpp index 3cd5b96ff86ae5..66b37c5f087d2e 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_fs_directory.cpp +++ b/be/src/olap/rowset/segment_v2/inverted_index_fs_directory.cpp @@ -697,7 +697,7 @@ std::unique_ptr DorisFSDirectory::createOutputV2( io::FileWriter* file_writer) { auto ret = std::make_unique(); ret->init(file_writer); - return std::move(ret); + return ret; } std::string DorisFSDirectory::toString() const { From 7ee62e8175cd423e27a3834b07a73ee9ab50184c Mon Sep 17 00:00:00 2001 From: airborne12 Date: Tue, 15 Jul 2025 15:15:17 +0800 Subject: [PATCH 5/8] fix heap use after free --- .../olap/rowset/segment_v2/inverted_index_searcher.cpp | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/be/src/olap/rowset/segment_v2/inverted_index_searcher.cpp b/be/src/olap/rowset/segment_v2/inverted_index_searcher.cpp index 38978d01b2722c..4c1b2e6c817c32 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_searcher.cpp +++ b/be/src/olap/rowset/segment_v2/inverted_index_searcher.cpp @@ -31,7 +31,6 @@ Status FulltextIndexSearcherBuilder::build(lucene::store::Directory* directory, OptionalIndexSearcherPtr& output_searcher) { auto close_directory = true; std::unique_ptr reader; - std::unique_ptr directory_ptr(directory); try { reader = std::unique_ptr(lucene::index::IndexReader::open( directory, config::inverted_index_read_buffer_size, close_directory)); @@ -54,8 +53,7 @@ Status FulltextIndexSearcherBuilder::build(lucene::store::Directory* directory, return Status::Error( "FulltextIndexSearcherBuilder build index_searcher error."); } - // NOTE: need to cl_refcount-- here, so that directory will be deleted when - // index_searcher is destroyed + // NOTE: IndexSearcher takes ownership of the reader, and directory cleanup is handled by caller output_searcher = index_searcher; return Status::OK(); } @@ -106,13 +104,13 @@ Result> IndexSearcherBuilder::create_index Result IndexSearcherBuilder::get_index_searcher( lucene::store::Directory* directory) { OptionalIndexSearcherPtr result; - auto st = build(directory, result); + std::unique_ptr directory_ptr(directory); + + auto st = build(directory_ptr.get(), result); if (!st.ok()) { - _CLDECDELETE(directory) return ResultError(st); } if (!result.has_value()) { - _CLDECDELETE(directory) return ResultError(Status::Error( "InvertedIndexSearcherCache build error.")); } From 351ccc0f13e5ee996fa76162f40c495c1d9530b9 Mon Sep 17 00:00:00 2001 From: airborne12 Date: Tue, 15 Jul 2025 17:55:27 +0800 Subject: [PATCH 6/8] fix heap use after free --- be/src/olap/rowset/segment_v2/inverted_index_searcher.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/be/src/olap/rowset/segment_v2/inverted_index_searcher.cpp b/be/src/olap/rowset/segment_v2/inverted_index_searcher.cpp index 4c1b2e6c817c32..a8538dabab8756 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_searcher.cpp +++ b/be/src/olap/rowset/segment_v2/inverted_index_searcher.cpp @@ -69,7 +69,6 @@ Status BKDIndexSearcherBuilder::build(lucene::store::Directory* directory, } reader_size = bkd_reader->ram_bytes_used(); output_searcher = bkd_reader; - _CLDECDELETE(directory) return Status::OK(); } catch (const CLuceneError& e) { return Status::Error( From 13254a57d0f1e4316c95c4cb317d1a6b300af4c4 Mon Sep 17 00:00:00 2001 From: airborne12 Date: Tue, 15 Jul 2025 23:38:15 +0800 Subject: [PATCH 7/8] fix memory leak --- .../segment_v2/inverted_index_fs_directory.cpp | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/be/src/olap/rowset/segment_v2/inverted_index_fs_directory.cpp b/be/src/olap/rowset/segment_v2/inverted_index_fs_directory.cpp index 66b37c5f087d2e..4b397539d38d9d 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_fs_directory.cpp +++ b/be/src/olap/rowset/segment_v2/inverted_index_fs_directory.cpp @@ -696,7 +696,20 @@ lucene::store::IndexOutput* DorisFSDirectory::createOutput(const char* name) { std::unique_ptr DorisFSDirectory::createOutputV2( io::FileWriter* file_writer) { auto ret = std::make_unique(); - ret->init(file_writer); + ErrorContext error_context; + try { + ret->init(file_writer); + } catch (CLuceneError& err) { + error_context.eptr = std::current_exception(); + error_context.err_msg.append("FSIndexOutputV2 init error: "); + error_context.err_msg.append(err.what()); + LOG(ERROR) << error_context.err_msg; + } + FINALLY_EXCEPTION({ + if (error_context.eptr) { + FINALLY_CLOSE(ret); + } + }) return ret; } From 3cc9b237b95230ae69c15a30f6c77675cea6fadb Mon Sep 17 00:00:00 2001 From: airborne12 Date: Wed, 16 Jul 2025 10:50:48 +0800 Subject: [PATCH 8/8] [fix](index reader) fix memory leak after exception in infos read --- be/src/clucene | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/be/src/clucene b/be/src/clucene index ace9ba07a880a0..75126ebce823d3 160000 --- a/be/src/clucene +++ b/be/src/clucene @@ -1 +1 @@ -Subproject commit ace9ba07a880a01d0161ee4a03418c30ba048fb9 +Subproject commit 75126ebce823d3c7a61396973330f132120a31ec