From ce05f1cfb4f10f13fcfa9807de8751a9f30987ac Mon Sep 17 00:00:00 2001 From: huanghaibin <284824253@qq.com> Date: Thu, 4 Apr 2024 00:31:16 +0800 Subject: [PATCH] [enhancement](merge-on-write) consider version count on size-based cloud cu compaction policy --- .../cloud/cloud_cumulative_compaction_policy.cpp | 16 +++++++++++++--- .../cloud/cloud_cumulative_compaction_policy.h | 5 ++++- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/be/src/cloud/cloud_cumulative_compaction_policy.cpp b/be/src/cloud/cloud_cumulative_compaction_policy.cpp index f2e4411897bd45..5875340ec7b476 100644 --- a/be/src/cloud/cloud_cumulative_compaction_policy.cpp +++ b/be/src/cloud/cloud_cumulative_compaction_policy.cpp @@ -34,11 +34,12 @@ namespace doris { CloudSizeBasedCumulativeCompactionPolicy::CloudSizeBasedCumulativeCompactionPolicy( int64_t promotion_size, double promotion_ratio, int64_t promotion_min_size, - int64_t compaction_min_size) + int64_t compaction_min_size, int64_t promotion_version_count) : _promotion_size(promotion_size), _promotion_ratio(promotion_ratio), _promotion_min_size(promotion_min_size), - _compaction_min_size(compaction_min_size) {} + _compaction_min_size(compaction_min_size), + _promotion_version_count(promotion_version_count) {} int64_t CloudSizeBasedCumulativeCompactionPolicy::_level_size(const int64_t size) { if (size < 1024) return 0; @@ -194,11 +195,20 @@ int64_t CloudSizeBasedCumulativeCompactionPolicy::new_cumulative_point( int64_t last_cumulative_point) { TEST_INJECTION_POINT_RETURN_WITH_VALUE("new_cumulative_point", int64_t(0), output_rowset.get(), last_cumulative_point); + // for MoW table, if there's too many versions, the delete bitmap will grow to + // a very big size, which may cause the tablet meta too big and the `save_meta` + // operation too slow. + // if the rowset should not promotion according to it's disk size, we should also + // consider it's version count here. + bool satisfy_promotion_version = tablet->enable_unique_key_merge_on_write() && + output_rowset->end_version() - output_rowset->start_version() > + _promotion_version_count; // if rowsets have delete version, move to the last directly. // if rowsets have no delete version, check output_rowset total disk size satisfies promotion size. return output_rowset->start_version() == last_cumulative_point && (last_delete_version.first != -1 || - output_rowset->data_disk_size() >= cloud_promotion_size(tablet)) + output_rowset->data_disk_size() >= cloud_promotion_size(tablet) || + satisfy_promotion_version) ? output_rowset->end_version() + 1 : last_cumulative_point; } diff --git a/be/src/cloud/cloud_cumulative_compaction_policy.h b/be/src/cloud/cloud_cumulative_compaction_policy.h index 9ca9a207b9f0a4..dffe6e0cd0fda1 100644 --- a/be/src/cloud/cloud_cumulative_compaction_policy.h +++ b/be/src/cloud/cloud_cumulative_compaction_policy.h @@ -40,7 +40,8 @@ class CloudSizeBasedCumulativeCompactionPolicy { int64_t promotion_size = config::compaction_promotion_size_mbytes * 1024 * 1024, double promotion_ratio = config::compaction_promotion_ratio, int64_t promotion_min_size = config::compaction_promotion_min_size_mbytes * 1024 * 1024, - int64_t compaction_min_size = config::compaction_min_size_mbytes * 1024 * 1024); + int64_t compaction_min_size = config::compaction_min_size_mbytes * 1024 * 1024, + int64_t promotion_version_count = config::compaction_promotion_version_count); ~CloudSizeBasedCumulativeCompactionPolicy() {} @@ -68,6 +69,8 @@ class CloudSizeBasedCumulativeCompactionPolicy { int64_t _promotion_min_size; /// lower bound size to do compaction compaction. int64_t _compaction_min_size; + // cululative compaction promotion version count, only works for unique key MoW table + int64_t _promotion_version_count; }; } // namespace doris