From a36813e0a5b8edb86852ced73cd468593aa8d01a Mon Sep 17 00:00:00 2001 From: meiyi Date: Thu, 20 Mar 2025 18:47:29 +0800 Subject: [PATCH] [fix](mow) remove rowset cache version (#49295) Problem Summary: introduced by https://github.com/apache/doris/pull/48968, fix the rowset_cache_version is not deleted if _rs_metas or _stale_rs_metas is changed: ``` F20250319 13:41:54.708062 5890 tablet_meta.cpp:955] Check failed: false . tablet: 1742356296291, rowset_cache_version size: 1607, _rs_metas size: 135, _stale_rs_metas size: 707 *** Check failure stack trace: *** @ 0x55aa24363916 google::LogMessage::SendToLog() @ 0x55aa24360360 google::LogMessage::Flush() @ 0x55aa24364159 google::LogMessageFatal::~LogMessageFatal() @ 0x55aa19c476a9 doris::TabletMeta::delete_stale_rs_meta_by_version() @ 0x55aa19bf38b3 doris::Tablet::_delete_stale_rowset_by_version() @ 0x55aa19bf451a doris::Tablet::delete_expired_stale_rowset() @ 0x55aa19c264f8 doris::TabletManager::for_each_tablet() @ 0x55aa19c2a473 doris::TabletManager::start_trash_sweep() @ 0x55aa19bd3fa5 doris::StorageEngine::start_trash_sweep() @ 0x55aa198b9da6 doris::StorageEngine::_garbage_sweeper_thread_callback() ``` --- be/src/olap/tablet_meta.cpp | 70 +++++++++++++++++++++++++++++-------- be/src/olap/tablet_meta.h | 7 ++-- 2 files changed, 60 insertions(+), 17 deletions(-) diff --git a/be/src/olap/tablet_meta.cpp b/be/src/olap/tablet_meta.cpp index dedff393e12be0..670b48f07d7815 100644 --- a/be/src/olap/tablet_meta.cpp +++ b/be/src/olap/tablet_meta.cpp @@ -856,6 +856,7 @@ void TabletMeta::add_rowsets_unchecked(const std::vector& to_ad void TabletMeta::delete_rs_meta_by_version(const Version& version, std::vector* deleted_rs_metas) { + size_t rowset_cache_version_size = 0; auto it = _rs_metas.begin(); while (it != _rs_metas.end()) { if ((*it)->version() == version) { @@ -863,22 +864,32 @@ void TabletMeta::delete_rs_meta_by_version(const Version& version, deleted_rs_metas->push_back(*it); } _rs_metas.erase(it); + if (_enable_unique_key_merge_on_write) { + rowset_cache_version_size = + _delete_bitmap->remove_rowset_cache_version((*it)->rowset_id()); + } return; } else { ++it; } } + _check_mow_rowset_cache_version_size(rowset_cache_version_size); } void TabletMeta::modify_rs_metas(const std::vector& to_add, const std::vector& to_delete, bool same_version) { + size_t rowset_cache_version_size = 0; // Remove to_delete rowsets from _rs_metas for (auto rs_to_del : to_delete) { auto it = _rs_metas.begin(); while (it != _rs_metas.end()) { if (rs_to_del->version() == (*it)->version()) { _rs_metas.erase(it); + if (_enable_unique_key_merge_on_write) { + rowset_cache_version_size = + _delete_bitmap->remove_rowset_cache_version((*it)->rowset_id()); + } // there should be only one rowset match the version break; } else { @@ -892,6 +903,7 @@ void TabletMeta::modify_rs_metas(const std::vector& to_add, } // put to_add rowsets in _rs_metas. _rs_metas.insert(_rs_metas.end(), to_add.begin(), to_add.end()); + _check_mow_rowset_cache_version_size(rowset_cache_version_size); } // Use the passing "rs_metas" to replace the rs meta in this tablet meta @@ -899,9 +911,14 @@ void TabletMeta::modify_rs_metas(const std::vector& to_add, // an existing tablet before. Add after revise, only the passing "rs_metas" // is needed. void TabletMeta::revise_rs_metas(std::vector&& rs_metas) { - std::lock_guard wrlock(_meta_lock); - _rs_metas = std::move(rs_metas); - _stale_rs_metas.clear(); + { + std::lock_guard wrlock(_meta_lock); + _rs_metas = std::move(rs_metas); + _stale_rs_metas.clear(); + } + if (_enable_unique_key_merge_on_write) { + _delete_bitmap->clear_rowset_cache_version(); + } } // This method should call after revise_rs_metas, since new rs_metas might be a subset @@ -942,18 +959,7 @@ void TabletMeta::delete_stale_rs_meta_by_version(const Version& version) { it++; } } - if (_enable_unique_key_merge_on_write && - rowset_cache_version_size > _rs_metas.size() + _stale_rs_metas.size()) { - std::string err_msg = fmt::format( - ". tablet: {}, rowset_cache_version size: {}, " - "_rs_metas size: {}, _stale_rs_metas size: {}", - _tablet_id, rowset_cache_version_size, _rs_metas.size(), _stale_rs_metas.size()); - if (config::enable_mow_get_agg_correctness_check_core) { - CHECK(false) << err_msg; - } else { - DCHECK(false) << err_msg; - } - } + _check_mow_rowset_cache_version_size(rowset_cache_version_size); } RowsetMetaSharedPtr TabletMeta::acquire_rs_meta_by_version(const Version& version) const { @@ -983,6 +989,35 @@ Status TabletMeta::set_partition_id(int64_t partition_id) { return Status::OK(); } +void TabletMeta::clear_stale_rowset() { + _stale_rs_metas.clear(); + if (_enable_unique_key_merge_on_write) { + _delete_bitmap->clear_rowset_cache_version(); + } +} + +void TabletMeta::clear_rowsets() { + _rs_metas.clear(); + if (_enable_unique_key_merge_on_write) { + _delete_bitmap->clear_rowset_cache_version(); + } +} + +void TabletMeta::_check_mow_rowset_cache_version_size(size_t rowset_cache_version_size) { + if (_enable_unique_key_merge_on_write && + rowset_cache_version_size > _rs_metas.size() + _stale_rs_metas.size()) { + std::string err_msg = fmt::format( + ". tablet: {}, rowset_cache_version size: {}, " + "_rs_metas size: {}, _stale_rs_metas size: {}", + _tablet_id, rowset_cache_version_size, _rs_metas.size(), _stale_rs_metas.size()); + if (config::enable_mow_get_agg_correctness_check_core) { + CHECK(false) << err_msg; + } else { + DCHECK(false) << err_msg; + } + } +} + bool operator==(const TabletMeta& a, const TabletMeta& b) { if (a._table_id != b._table_id) return false; if (a._index_id != b._index_id) return false; @@ -1292,6 +1327,11 @@ size_t DeleteBitmap::remove_rowset_cache_version(const RowsetId& rowset_id) { return _rowset_cache_version.size(); } +void DeleteBitmap::clear_rowset_cache_version() { + std::lock_guard l(_rowset_cache_version_lock); + _rowset_cache_version.clear(); +} + DeleteBitmap::Version DeleteBitmap::_get_rowset_cache_version(const BitmapKey& bmk) const { std::shared_lock l(_rowset_cache_version_lock); if (auto it = _rowset_cache_version.find(std::get<0>(bmk)); it != _rowset_cache_version.end()) { diff --git a/be/src/olap/tablet_meta.h b/be/src/olap/tablet_meta.h index 267605795242d6..4970551ff24acd 100644 --- a/be/src/olap/tablet_meta.h +++ b/be/src/olap/tablet_meta.h @@ -217,9 +217,9 @@ class TabletMeta : public MetadataAdder { } // used for after tablet cloned to clear stale rowset - void clear_stale_rowset() { _stale_rs_metas.clear(); } + void clear_stale_rowset(); - void clear_rowsets() { _rs_metas.clear(); } + void clear_rowsets(); // MUST hold EXCLUSIVE `_meta_lock` in belonged Tablet // `to_add` MUST NOT have overlapped version with `_rs_metas` in tablet meta. @@ -300,6 +300,7 @@ class TabletMeta : public MetadataAdder { private: Status _save_meta(DataDir* data_dir); + void _check_mow_rowset_cache_version_size(size_t rowset_cache_version_size); // _del_predicates is ignored to compare. friend bool operator==(const TabletMeta& a, const TabletMeta& b); @@ -560,6 +561,8 @@ class DeleteBitmap { // return the size of the map size_t remove_rowset_cache_version(const RowsetId& rowset_id); + void clear_rowset_cache_version(); + class AggCachePolicy : public LRUCachePolicy { public: AggCachePolicy(size_t capacity)