From bbe0bd1f78b975391f3fb600ee00799f79f0feda Mon Sep 17 00:00:00 2001 From: Gavin Chou Date: Thu, 8 Aug 2024 20:37:02 +0800 Subject: [PATCH] [fix](file cache) Fix slow IO for table stats procedure, introduced by #37141 Session variable `disable_file_cache` is processed as "disposable file cache" in beta_rowset_reader.cpp. ``` if (_read_context->runtime_state != nullptr) { _read_options.io_ctx.query_id = &_read_context->runtime_state->query_id(); _read_options.io_ctx.read_file_cache = _read_context->runtime_state->query_options().enable_file_cache; _read_options.io_ctx.is_disposable = _read_context->runtime_state->query_options().disable_file_cache; } ``` We use disposable cache to avoid IO amp and avoid large amount of eviction from the cached data ("normal cache"). We cannot set the read option cache policy to "no cache" because it may cause IO amp: every page IO will cause a remote IO, which is a performance disaster. --- be/src/olap/parallel_scanner_builder.cpp | 7 ++----- be/src/olap/rowset/beta_rowset.cpp | 18 +++++++----------- be/src/olap/rowset/beta_rowset.h | 9 +++------ be/src/olap/rowset/beta_rowset_reader.cpp | 10 +++------- be/src/olap/segment_loader.cpp | 4 ++-- be/src/olap/segment_loader.h | 3 +-- 6 files changed, 18 insertions(+), 33 deletions(-) diff --git a/be/src/olap/parallel_scanner_builder.cpp b/be/src/olap/parallel_scanner_builder.cpp index 6a2503a70e9002..10bd61cd8d5d4b 100644 --- a/be/src/olap/parallel_scanner_builder.cpp +++ b/be/src/olap/parallel_scanner_builder.cpp @@ -182,9 +182,6 @@ Status ParallelScannerBuilder::_load() { bool enable_segment_cache = _state->query_options().__isset.enable_segment_cache ? _state->query_options().enable_segment_cache : true; - bool disable_file_cache = _state->query_options().__isset.disable_file_cache - ? _state->query_options().disable_file_cache - : false; for (auto& rowset : rowsets) { RETURN_IF_ERROR(rowset->load()); const auto rowset_id = rowset->rowset_id(); @@ -192,7 +189,7 @@ Status ParallelScannerBuilder::_load() { RETURN_IF_ERROR(SegmentLoader::instance()->load_segments( std::dynamic_pointer_cast(rowset), &segment_cache_handle, - enable_segment_cache, false, disable_file_cache)); + enable_segment_cache, false)); _total_rows += rowset->num_rows(); } } @@ -211,4 +208,4 @@ std::shared_ptr ParallelScannerBuilder::_build_scanner( return NewOlapScanner::create_shared(_parent, std::move(params)); } -} // namespace doris \ No newline at end of file +} // namespace doris diff --git a/be/src/olap/rowset/beta_rowset.cpp b/be/src/olap/rowset/beta_rowset.cpp index 5114cc6595a74a..832ca3140887da 100644 --- a/be/src/olap/rowset/beta_rowset.cpp +++ b/be/src/olap/rowset/beta_rowset.cpp @@ -147,26 +147,23 @@ Status BetaRowset::get_segments_size(std::vector* segments_size) { return Status::OK(); } -Status BetaRowset::load_segments(std::vector* segments, - bool disable_file_cache) { - return load_segments(0, num_segments(), segments, disable_file_cache); +Status BetaRowset::load_segments(std::vector* segments) { + return load_segments(0, num_segments(), segments); } Status BetaRowset::load_segments(int64_t seg_id_begin, int64_t seg_id_end, - std::vector* segments, - bool disable_file_cache) { + std::vector* segments) { int64_t seg_id = seg_id_begin; while (seg_id < seg_id_end) { std::shared_ptr segment; - RETURN_IF_ERROR(load_segment(seg_id, &segment, disable_file_cache)); + RETURN_IF_ERROR(load_segment(seg_id, &segment)); segments->push_back(std::move(segment)); seg_id++; } return Status::OK(); } -Status BetaRowset::load_segment(int64_t seg_id, segment_v2::SegmentSharedPtr* segment, - bool disable_file_cache) { +Status BetaRowset::load_segment(int64_t seg_id, segment_v2::SegmentSharedPtr* segment) { auto fs = _rowset_meta->fs(); if (!fs) { return Status::Error("get fs failed"); @@ -175,9 +172,8 @@ Status BetaRowset::load_segment(int64_t seg_id, segment_v2::SegmentSharedPtr* se DCHECK(seg_id >= 0); auto seg_path = DORIS_TRY(segment_path(seg_id)); io::FileReaderOptions reader_options { - .cache_type = !disable_file_cache && config::enable_file_cache - ? io::FileCachePolicy::FILE_BLOCK_CACHE - : io::FileCachePolicy::NO_CACHE, + .cache_type = config::enable_file_cache ? io::FileCachePolicy::FILE_BLOCK_CACHE + : io::FileCachePolicy::NO_CACHE, .is_doris_table = true, .cache_base_path = "", .file_size = _rowset_meta->segment_file_size(seg_id), diff --git a/be/src/olap/rowset/beta_rowset.h b/be/src/olap/rowset/beta_rowset.h index 59ed6e061feb06..52d5ac5c8a8742 100644 --- a/be/src/olap/rowset/beta_rowset.h +++ b/be/src/olap/rowset/beta_rowset.h @@ -71,15 +71,12 @@ class BetaRowset final : public Rowset { Status check_file_exist() override; - Status load_segments(std::vector* segments, - bool disable_file_cache = false); + Status load_segments(std::vector* segments); Status load_segments(int64_t seg_id_begin, int64_t seg_id_end, - std::vector* segments, - bool disable_file_cache = false); + std::vector* segments); - Status load_segment(int64_t seg_id, segment_v2::SegmentSharedPtr* segment, - bool disable_file_cache = false); + Status load_segment(int64_t seg_id, segment_v2::SegmentSharedPtr* segment); Status get_segments_size(std::vector* segments_size); diff --git a/be/src/olap/rowset/beta_rowset_reader.cpp b/be/src/olap/rowset/beta_rowset_reader.cpp index 4d953d1dbe37e7..42456bb862502d 100644 --- a/be/src/olap/rowset/beta_rowset_reader.cpp +++ b/be/src/olap/rowset/beta_rowset_reader.cpp @@ -249,13 +249,9 @@ Status BetaRowsetReader::get_segment_iterators(RowsetReaderContext* read_context } // load segments - bool disable_file_cache = false; bool enable_segment_cache = true; auto* state = read_context->runtime_state; if (state != nullptr) { - disable_file_cache = state->query_options().__isset.disable_file_cache - ? state->query_options().disable_file_cache - : false; enable_segment_cache = state->query_options().__isset.enable_segment_cache ? state->query_options().enable_segment_cache : true; @@ -264,9 +260,9 @@ Status BetaRowsetReader::get_segment_iterators(RowsetReaderContext* read_context bool should_use_cache = use_cache || (_read_context->reader_type == ReaderType::READER_QUERY && enable_segment_cache); SegmentCacheHandle segment_cache_handle; - RETURN_IF_ERROR(SegmentLoader::instance()->load_segments( - _rowset, &segment_cache_handle, should_use_cache, - /*need_load_pk_index_and_bf*/ false, disable_file_cache)); + RETURN_IF_ERROR(SegmentLoader::instance()->load_segments(_rowset, &segment_cache_handle, + should_use_cache, + /*need_load_pk_index_and_bf*/ false)); // create iterator for each segment auto& segments = segment_cache_handle.get_segments(); diff --git a/be/src/olap/segment_loader.cpp b/be/src/olap/segment_loader.cpp index 98db0351240901..12ab89af0be283 100644 --- a/be/src/olap/segment_loader.cpp +++ b/be/src/olap/segment_loader.cpp @@ -52,7 +52,7 @@ void SegmentCache::erase(const SegmentCache::CacheKey& key) { Status SegmentLoader::load_segments(const BetaRowsetSharedPtr& rowset, SegmentCacheHandle* cache_handle, bool use_cache, - bool need_load_pk_index_and_bf, bool disable_file_cache) { + bool need_load_pk_index_and_bf) { if (cache_handle->is_inited()) { return Status::OK(); } @@ -62,7 +62,7 @@ Status SegmentLoader::load_segments(const BetaRowsetSharedPtr& rowset, continue; } segment_v2::SegmentSharedPtr segment; - RETURN_IF_ERROR(rowset->load_segment(i, &segment, disable_file_cache)); + RETURN_IF_ERROR(rowset->load_segment(i, &segment)); if (need_load_pk_index_and_bf) { RETURN_IF_ERROR(segment->load_pk_index_and_bf()); } diff --git a/be/src/olap/segment_loader.h b/be/src/olap/segment_loader.h index fc2f0d8c03fafe..5bb8fae3c41877 100644 --- a/be/src/olap/segment_loader.h +++ b/be/src/olap/segment_loader.h @@ -118,8 +118,7 @@ class SegmentLoader { // Load segments of "rowset", return the "cache_handle" which contains segments. // If use_cache is true, it will be loaded from _cache. Status load_segments(const BetaRowsetSharedPtr& rowset, SegmentCacheHandle* cache_handle, - bool use_cache = false, bool need_load_pk_index_and_bf = false, - bool disable_file_cache = false); + bool use_cache = false, bool need_load_pk_index_and_bf = false); void erase_segment(const SegmentCache::CacheKey& key);