From 752ab520dfd3a109518d59654afc6ce89fe162fb Mon Sep 17 00:00:00 2001 From: csun5285 Date: Thu, 27 Jun 2024 10:43:51 +0800 Subject: [PATCH 1/5] remove file exists --- .../segment_v2/inverted_index_cache.cpp | 19 ++++---- .../inverted_index_compound_directory.cpp | 39 +++++----------- .../inverted_index_compound_reader.cpp | 6 --- .../segment_v2/inverted_index_reader.cpp | 44 +++++++++---------- 4 files changed, 41 insertions(+), 67 deletions(-) diff --git a/be/src/olap/rowset/segment_v2/inverted_index_cache.cpp b/be/src/olap/rowset/segment_v2/inverted_index_cache.cpp index 035e28efabd4c3..41e3b134a32cd0 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_cache.cpp +++ b/be/src/olap/rowset/segment_v2/inverted_index_cache.cpp @@ -136,18 +136,17 @@ Status InvertedIndexSearcherCache::get_index_searcher(const io::FileSystemSPtr& std::unique_ptr(new MemTracker("InvertedIndexSearcherCacheWithRead")); #ifndef BE_TEST { - bool exists = false; - { - SCOPED_RAW_TIMER(&stats->inverted_index_query_file_exists_timer); - RETURN_IF_ERROR(fs->exists(file_path, &exists)); - } - if (!exists) { - return Status::Error( - "inverted index path: {} not exist.", file_path); - } SCOPED_RAW_TIMER(&stats->inverted_index_searcher_open_timer); SCOPED_CONSUME_MEM_TRACKER(mem_tracker.get()); - index_searcher = build_index_searcher(fs, index_dir, file_name); + try { + index_searcher = build_index_searcher(fs, index_dir, file_name); + } catch (CLuceneError& err) { + if (err.number() == CL_ERR_FileNotFound) { + return Status::Error( + "inverted index path: {} not exist.", file_path); + } + throw err; + } } #endif diff --git a/be/src/olap/rowset/segment_v2/inverted_index_compound_directory.cpp b/be/src/olap/rowset/segment_v2/inverted_index_compound_directory.cpp index 0eb7e31a02706d..3ee1d9a6e6cf98 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_compound_directory.cpp +++ b/be/src/olap/rowset/segment_v2/inverted_index_compound_directory.cpp @@ -266,30 +266,26 @@ bool DorisCompoundDirectory::FSIndexInput::open(const io::FileSystemSPtr& fs, co io::FileBlockCachePathPolicy cache_policy; auto type = config::enable_file_cache ? config::file_cache_type : ""; io::FileReaderOptions reader_options(io::cache_type_from_string(type), cache_policy); - if (!fs->open_file(fd, reader_options, &h->_reader).ok()) { - error.set(CL_ERR_IO, "open file error"); + auto st = fs->open_file(fd, reader_options, &h->_reader); + if (st.is_not_found()) { + error.set(CL_ERR_FileNotFound, "File does not exist"); + } else if (st.is_io_error()) { + error.set(CL_ERR_IO, "File open io error"); + } else if (st.code() == ErrorCode::PERMISSION_DENIED) { + error.set(CL_ERR_IO, "File Access denied"); + } else { + error.set(CL_ERR_IO, "Could not open file"); } //Check if a valid handle was retrieved - if (h->_reader) { + if (st.ok() && h->_reader) { //Store the file length h->_length = h->_reader->size(); h->_fpos = 0; ret = _CLNEW FSIndexInput(h, buffer_size); return true; - - } else { - int err = errno; - if (err == ENOENT) { - error.set(CL_ERR_IO, "File does not exist"); - } else if (err == EACCES) { - error.set(CL_ERR_IO, "File Access denied"); - } else if (err == EMFILE) { - error.set(CL_ERR_IO, "Too many open files"); - } else { - error.set(CL_ERR_IO, "Could not open file"); - } } + delete h->_shared_lock; _CLDECDELETE(h) return false; @@ -532,19 +528,6 @@ void DorisCompoundDirectory::init(const io::FileSystemSPtr& _fs, const char* _pa } lucene::store::Directory::setLockFactory(lock_factory); - - // It's fail checking directory existence in S3. - if (fs->type() == io::FileSystemType::S3) { - return; - } - bool exists = false; - LOG_AND_THROW_IF_ERROR(fs->exists(directory, &exists), - "Doris compound directory init IO error"); - if (!exists) { - auto e = "Doris compound directory init error: " + directory + " is not a directory"; - LOG(WARNING) << e; - _CLTHROWA(CL_ERR_IO, e.c_str()); - } } void DorisCompoundDirectory::priv_getFN(char* buffer, const char* name) const { diff --git a/be/src/olap/rowset/segment_v2/inverted_index_compound_reader.cpp b/be/src/olap/rowset/segment_v2/inverted_index_compound_reader.cpp index 6673845b89ca44..f1095712580244 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_compound_reader.cpp +++ b/be/src/olap/rowset/segment_v2/inverted_index_compound_reader.cpp @@ -132,12 +132,6 @@ DorisCompoundReader::DorisCompoundReader(lucene::store::Directory* d, const char entries(_CLNEW EntriesType(true, true)) { bool success = false; try { - if (dir->fileLength(name) == 0) { - LOG(WARNING) << "CompoundReader open failed, index file " << name << " is empty."; - _CLTHROWA(CL_ERR_IO, - fmt::format("CompoundReader open failed, index file {} is empty", name) - .c_str()); - } stream = dir->openInput(name, readBufferSize); int32_t count = stream->readVInt(); diff --git a/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp b/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp index 7db21a96c653b1..157d746f54fb6f 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp +++ b/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp @@ -210,21 +210,18 @@ Status InvertedIndexReader::read_null_bitmap(OlapReaderStatistics* stats, return Status::OK(); } - bool exists = false; - { - SCOPED_RAW_TIMER(&stats->inverted_index_query_file_exists_timer); - RETURN_IF_ERROR(_fs->exists(index_file_path, &exists)); - } - if (!exists) { - LOG(WARNING) << "inverted index: " << index_file_path.native() << " not exist."; - return Status::Error( - "inverted index path: {} not exist.", index_file_path.native()); - } - if (!dir) { - dir = new DorisCompoundReader( - DorisCompoundDirectoryFactory::getDirectory(_fs, index_dir.c_str()), - index_file_name.c_str(), config::inverted_index_read_buffer_size); + try { + dir = new DorisCompoundReader( + DorisCompoundDirectoryFactory::getDirectory(_fs, index_dir.c_str()), + index_file_name.c_str(), config::inverted_index_read_buffer_size); + } catch (CLuceneError& err) { + if (err.number() == CL_ERR_FileNotFound) { + return Status::Error( + "inverted index path: {} not exist.", index_file_path.native()); + } + throw err; + } owned_dir = true; } @@ -763,17 +760,18 @@ BkdIndexReader::BkdIndexReader(io::FileSystemSPtr fs, const std::string& path, auto index_dir = io_path.parent_path(); auto index_file_name = InvertedIndexDescriptor::get_index_file_name(io_path.filename(), index_meta->index_id()); - - // check index file existence auto index_file = index_dir / index_file_name; - if (!indexExists(index_file)) { - LOG(WARNING) << "bkd index: " << index_file.string() << " not exist."; - return; - } _file_full_path = index_file; - _compoundReader = std::make_unique( - DorisCompoundDirectoryFactory::getDirectory(fs, index_dir.c_str()), - index_file_name.c_str(), config::inverted_index_read_buffer_size); + try { + _compoundReader = std::make_unique( + DorisCompoundDirectoryFactory::getDirectory(fs, index_dir.c_str()), + index_file_name.c_str(), config::inverted_index_read_buffer_size); + } catch (CLuceneError& err) { + if (err.number() == CL_ERR_FileNotFound) { + LOG(WARNING) << "bkd index: " << index_file.string() << " not exist."; + return; + } + } } Status BkdIndexReader::new_iterator(OlapReaderStatistics* stats, RuntimeState* runtime_state, From a0594ebaaf437e917d88444ee2e7d2ea89770d3c Mon Sep 17 00:00:00 2001 From: csun5285 Date: Thu, 27 Jun 2024 11:02:14 +0800 Subject: [PATCH 2/5] fix comment --- be/src/olap/olap_common.h | 1 - .../rowset/segment_v2/inverted_index_compound_reader.cpp | 6 ++++++ be/src/olap/rowset/segment_v2/inverted_index_reader.cpp | 7 ------- be/src/vec/exec/scan/new_olap_scan_node.cpp | 2 -- be/src/vec/exec/scan/new_olap_scan_node.h | 1 - be/src/vec/exec/scan/new_olap_scanner.cpp | 2 -- 6 files changed, 6 insertions(+), 13 deletions(-) diff --git a/be/src/olap/olap_common.h b/be/src/olap/olap_common.h index e5f029d2a29841..811e77590f98b8 100644 --- a/be/src/olap/olap_common.h +++ b/be/src/olap/olap_common.h @@ -357,7 +357,6 @@ struct OlapReaderStatistics { int64_t inverted_index_query_timer = 0; int64_t inverted_index_query_cache_hit = 0; int64_t inverted_index_query_cache_miss = 0; - int64_t inverted_index_query_file_exists_timer = 0; int64_t inverted_index_query_null_bitmap_timer = 0; int64_t inverted_index_query_bitmap_copy_timer = 0; int64_t inverted_index_query_bitmap_op_timer = 0; diff --git a/be/src/olap/rowset/segment_v2/inverted_index_compound_reader.cpp b/be/src/olap/rowset/segment_v2/inverted_index_compound_reader.cpp index f1095712580244..6673845b89ca44 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_compound_reader.cpp +++ b/be/src/olap/rowset/segment_v2/inverted_index_compound_reader.cpp @@ -132,6 +132,12 @@ DorisCompoundReader::DorisCompoundReader(lucene::store::Directory* d, const char entries(_CLNEW EntriesType(true, true)) { bool success = false; try { + if (dir->fileLength(name) == 0) { + LOG(WARNING) << "CompoundReader open failed, index file " << name << " is empty."; + _CLTHROWA(CL_ERR_IO, + fmt::format("CompoundReader open failed, index file {} is empty", name) + .c_str()); + } stream = dir->openInput(name, readBufferSize); int32_t count = stream->readVInt(); diff --git a/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp b/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp index 157d746f54fb6f..b6b8f9c04410d5 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp +++ b/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp @@ -99,13 +99,6 @@ bool InvertedIndexReader::_is_match_query(InvertedIndexQueryType query_type) { query_type == InvertedIndexQueryType::MATCH_REGEXP_QUERY); } -bool InvertedIndexReader::indexExists(io::Path& index_file_path) { - // SCOPED_RAW_TIMER(&stats->inverted_index_query_file_exists_timer); - bool exists = false; - RETURN_IF_ERROR(_fs->exists(index_file_path, &exists)); - return exists; -} - std::unique_ptr InvertedIndexReader::create_analyzer( InvertedIndexCtx* inverted_index_ctx) { std::unique_ptr analyzer; diff --git a/be/src/vec/exec/scan/new_olap_scan_node.cpp b/be/src/vec/exec/scan/new_olap_scan_node.cpp index 61147cc77a821a..98a2371fa688e7 100644 --- a/be/src/vec/exec/scan/new_olap_scan_node.cpp +++ b/be/src/vec/exec/scan/new_olap_scan_node.cpp @@ -173,8 +173,6 @@ Status NewOlapScanNode::_init_profile() { _inverted_index_query_timer = ADD_TIMER(_segment_profile, "InvertedIndexQueryTime"); _inverted_index_query_null_bitmap_timer = ADD_TIMER(_segment_profile, "InvertedIndexQueryNullBitmapTime"); - _inverted_index_query_file_exists_timer = - ADD_TIMER(_segment_profile, "InvertedIndexQueryFileExistsTime"); _inverted_index_query_bitmap_copy_timer = ADD_TIMER(_segment_profile, "InvertedIndexQueryBitmapCopyTime"); _inverted_index_query_bitmap_op_timer = diff --git a/be/src/vec/exec/scan/new_olap_scan_node.h b/be/src/vec/exec/scan/new_olap_scan_node.h index 35b4290ecfa5fd..a8e0f6dde7eb97 100644 --- a/be/src/vec/exec/scan/new_olap_scan_node.h +++ b/be/src/vec/exec/scan/new_olap_scan_node.h @@ -191,7 +191,6 @@ class NewOlapScanNode : public VScanNode { RuntimeProfile::Counter* _inverted_index_query_cache_hit_counter = nullptr; RuntimeProfile::Counter* _inverted_index_query_cache_miss_counter = nullptr; RuntimeProfile::Counter* _inverted_index_query_timer = nullptr; - RuntimeProfile::Counter* _inverted_index_query_file_exists_timer = nullptr; RuntimeProfile::Counter* _inverted_index_query_null_bitmap_timer = nullptr; RuntimeProfile::Counter* _inverted_index_query_bitmap_copy_timer = nullptr; RuntimeProfile::Counter* _inverted_index_query_bitmap_op_timer = nullptr; diff --git a/be/src/vec/exec/scan/new_olap_scanner.cpp b/be/src/vec/exec/scan/new_olap_scanner.cpp index f80802cc2f70cf..ddecaccd1952bb 100644 --- a/be/src/vec/exec/scan/new_olap_scanner.cpp +++ b/be/src/vec/exec/scan/new_olap_scanner.cpp @@ -587,8 +587,6 @@ void NewOlapScanner::_update_counters_before_close() { COUNTER_UPDATE(olap_parent->_inverted_index_query_cache_miss_counter, stats.inverted_index_query_cache_miss); COUNTER_UPDATE(olap_parent->_inverted_index_query_timer, stats.inverted_index_query_timer); - COUNTER_UPDATE(olap_parent->_inverted_index_query_file_exists_timer, - stats.inverted_index_query_file_exists_timer); COUNTER_UPDATE(olap_parent->_inverted_index_query_null_bitmap_timer, stats.inverted_index_query_null_bitmap_timer); COUNTER_UPDATE(olap_parent->_inverted_index_query_bitmap_copy_timer, From b020471a5c19f2f3788ed13c8b63e8d6d6b75a2c Mon Sep 17 00:00:00 2001 From: csun5285 Date: Thu, 27 Jun 2024 14:34:37 +0800 Subject: [PATCH 3/5] fix file length --- .../rowset/segment_v2/inverted_index_compound_directory.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/be/src/olap/rowset/segment_v2/inverted_index_compound_directory.cpp b/be/src/olap/rowset/segment_v2/inverted_index_compound_directory.cpp index 3ee1d9a6e6cf98..1c1857f2ac2447 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_compound_directory.cpp +++ b/be/src/olap/rowset/segment_v2/inverted_index_compound_directory.cpp @@ -599,7 +599,11 @@ int64_t DorisCompoundDirectory::fileLength(const char* name) const { char buffer[CL_MAX_DIR]; priv_getFN(buffer, name); int64_t size = -1; - LOG_AND_THROW_IF_ERROR(fs->file_size(buffer, &size), "Get file size IO error"); + auto st = fs->file_size(buffer, &size); + if (st.is_not_found()) { + _CLTHROWA(CL_ERR_FileNotFound, "File does not exist"); + } + LOG_AND_THROW_IF_ERROR(st, "Get file size IO error"); return size; } From d3533cd3ae2caa180f2f656ce1d99f309d04153f Mon Sep 17 00:00:00 2001 From: csun5285 Date: Thu, 27 Jun 2024 16:42:46 +0800 Subject: [PATCH 4/5] add cluence --- be/src/clucene | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/be/src/clucene b/be/src/clucene index a28adab869f139..51f15724f5bdb7 160000 --- a/be/src/clucene +++ b/be/src/clucene @@ -1 +1 @@ -Subproject commit a28adab869f1397aefd7c3636d977c406613617d +Subproject commit 51f15724f5bdb7ae5f6f5e5d7072d43a5bda63f8 From 85bca9f261e382d6fb08e1e434090e50b511f8ed Mon Sep 17 00:00:00 2001 From: csun5285 Date: Thu, 27 Jun 2024 19:24:27 +0800 Subject: [PATCH 5/5] fix bug --- be/src/olap/rowset/segment_v2/inverted_index_compound_reader.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/be/src/olap/rowset/segment_v2/inverted_index_compound_reader.h b/be/src/olap/rowset/segment_v2/inverted_index_compound_reader.h index c084141c656bc4..d9daadb2b04316 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_compound_reader.h +++ b/be/src/olap/rowset/segment_v2/inverted_index_compound_reader.h @@ -54,7 +54,7 @@ class CLUCENE_EXPORT DorisCompoundReader : public lucene::store::Directory { lucene::store::RAMDirectory* ram_dir; std::string directory; std::string file_name; - CL_NS(store)::IndexInput* stream; + CL_NS(store)::IndexInput* stream = nullptr; using EntriesType = lucene::util::CLHashMap