diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp index 69741ce21bd1cb..511756b07fec79 100644 --- a/be/src/common/config.cpp +++ b/be/src/common/config.cpp @@ -1213,6 +1213,8 @@ DEFINE_mInt32(mow_publish_max_discontinuous_version_num, "20"); DEFINE_mInt32(publish_version_gap_logging_threshold, "200"); // get agg by cache for mow table DEFINE_mBool(enable_mow_get_agg_by_cache, "true"); +// get agg correctness check for mow table +DEFINE_mBool(enable_mow_get_agg_correctness_check_core, "false"); // The secure path with user files, used in the `local` table function. DEFINE_mString(user_files_secure_path, "${DORIS_HOME}"); diff --git a/be/src/common/config.h b/be/src/common/config.h index 5d66dc3286d88b..c36863f61067e2 100644 --- a/be/src/common/config.h +++ b/be/src/common/config.h @@ -1276,6 +1276,8 @@ DECLARE_mInt32(mow_publish_max_discontinuous_version_num); DECLARE_mInt32(publish_version_gap_logging_threshold); // get agg by cache for mow table DECLARE_mBool(enable_mow_get_agg_by_cache); +// get agg correctness check for mow table +DECLARE_mBool(enable_mow_get_agg_correctness_check_core); // The secure path with user files, used in the `local` table function. DECLARE_mString(user_files_secure_path); diff --git a/be/src/olap/tablet_meta.cpp b/be/src/olap/tablet_meta.cpp index b54764bfed25e1..dedff393e12be0 100644 --- a/be/src/olap/tablet_meta.cpp +++ b/be/src/olap/tablet_meta.cpp @@ -28,6 +28,7 @@ #include #include +#include #include #include @@ -941,12 +942,17 @@ void TabletMeta::delete_stale_rs_meta_by_version(const Version& version) { it++; } } - if (_enable_unique_key_merge_on_write) { - DCHECK(rowset_cache_version_size <= _rs_metas.size() + _stale_rs_metas.size()) - << "tablet: " << _tablet_id - << ", rowset_cache_version size: " << rowset_cache_version_size - << ", _rs_metas size: " << _rs_metas.size() - << ", _stale_rs_metas size:" << _stale_rs_metas.size(); + if (_enable_unique_key_merge_on_write && + rowset_cache_version_size > _rs_metas.size() + _stale_rs_metas.size()) { + std::string err_msg = fmt::format( + ". tablet: {}, rowset_cache_version size: {}, " + "_rs_metas size: {}, _stale_rs_metas size: {}", + _tablet_id, rowset_cache_version_size, _rs_metas.size(), _stale_rs_metas.size()); + if (config::enable_mow_get_agg_correctness_check_core) { + CHECK(false) << err_msg; + } else { + DCHECK(false) << err_msg; + } } } @@ -1334,7 +1340,20 @@ std::shared_ptr DeleteBitmap::get_agg(const BitmapKey& bmk) co if (start_version > 0) { Cache::Handle* handle2 = _agg_cache->repr()->lookup( agg_cache_key(_tablet_id, {std::get<0>(bmk), std::get<1>(bmk), start_version})); - if (handle2 == nullptr) { + + DBUG_EXECUTE_IF("DeleteBitmap::get_agg.cache_miss", { + if (handle2 != nullptr) { + auto p = dp->param("percent", 0.3); + std::mt19937 gen {std::random_device {}()}; + std::bernoulli_distribution inject_fault {p}; + if (inject_fault(gen)) { + LOG_INFO("injection DeleteBitmap::get_agg.cache_miss, tablet_id={}", + _tablet_id); + handle2 = nullptr; + } + } + }); + if (handle2 == nullptr || start_version > std::get<2>(bmk)) { start_version = 0; } else { val->bitmap |= @@ -1371,6 +1390,19 @@ std::shared_ptr DeleteBitmap::get_agg(const BitmapKey& bmk) co << ", rowset=" << std::get<0>(bmk).to_string() << ", segment=" << std::get<1>(bmk); } + if (start_version > 0 && config::enable_mow_get_agg_correctness_check_core) { + std::shared_ptr bitmap = get_agg_without_cache(bmk); + if (val->bitmap != *bitmap) { + CHECK(false) << ". get agg correctness check failed for tablet=" << _tablet_id + << ", rowset=" << std::get<0>(bmk).to_string() + << ", segment=" << std::get<1>(bmk) << ", version=" << std::get<2>(bmk) + << ". start_version from cache=" << start_version + << ", delete_bitmap cardinality with cache=" + << val->bitmap.cardinality() + << ", delete_bitmap cardinality without cache=" + << bitmap->cardinality(); + } + } } // It is natural for the cache to reclaim the underlying memory @@ -1378,6 +1410,21 @@ std::shared_ptr DeleteBitmap::get_agg(const BitmapKey& bmk) co &val->bitmap, [this, handle](...) { _agg_cache->repr()->release(handle); }); } +std::shared_ptr DeleteBitmap::get_agg_without_cache(const BitmapKey& bmk) const { + std::shared_ptr bitmap = std::make_shared(); + std::shared_lock l(lock); + DeleteBitmap::BitmapKey start {std::get<0>(bmk), std::get<1>(bmk), 0}; + for (auto it = delete_bitmap.lower_bound(start); it != delete_bitmap.end(); ++it) { + auto& [k, bm] = *it; + if (std::get<0>(k) != std::get<0>(bmk) || std::get<1>(k) != std::get<1>(bmk) || + std::get<2>(k) > std::get<2>(bmk)) { + break; + } + *bitmap |= bm; + } + return bitmap; +} + std::atomic DeleteBitmap::AggCache::s_repr {nullptr}; std::string tablet_state_name(TabletState state) { diff --git a/be/src/olap/tablet_meta.h b/be/src/olap/tablet_meta.h index 12ac97d665d73e..267605795242d6 100644 --- a/be/src/olap/tablet_meta.h +++ b/be/src/olap/tablet_meta.h @@ -544,6 +544,7 @@ class DeleteBitmap { * @return shared_ptr to a bitmap, which may be empty */ std::shared_ptr get_agg(const BitmapKey& bmk) const; + std::shared_ptr get_agg_without_cache(const BitmapKey& bmk) const; void remove_sentinel_marks();