From 54f67d150344b6ce36de8ce5fc47225e31a5637d Mon Sep 17 00:00:00 2001 From: yixiutt Date: Mon, 17 Oct 2022 14:23:54 +0800 Subject: [PATCH] [improvement](compaction) delete num based compaction policy --- be/src/common/config.h | 10 - be/src/http/action/compaction_action.cpp | 23 +- be/src/http/action/compaction_action.h | 2 - be/src/olap/cumulative_compaction_policy.cpp | 142 +---------- be/src/olap/cumulative_compaction_policy.h | 65 +---- be/src/olap/olap_server.cpp | 21 +- be/src/olap/storage_engine.h | 1 - be/src/olap/tablet.cpp | 3 +- .../cumulative_compaction_policy_test.cpp | 241 +----------------- docs/en/docs/admin-manual/config/be-config.md | 8 - .../http-actions/compaction-action.md | 2 +- .../docs/admin-manual/config/be-config.md | 8 - .../http-actions/compaction-action.md | 2 +- 13 files changed, 14 insertions(+), 514 deletions(-) diff --git a/be/src/common/config.h b/be/src/common/config.h index 02f76870b0874c..b53b6e45c34947 100644 --- a/be/src/common/config.h +++ b/be/src/common/config.h @@ -266,16 +266,6 @@ CONF_Bool(enable_base_compaction_idle_sched, "true"); CONF_Bool(enable_dup_key_base_compaction_skip_big_file, "true"); CONF_mInt64(base_compaction_dup_key_max_file_size_mbytes, "1024"); -// config the cumulative compaction policy -// Valid configs: num_based, size_based -// num_based policy, the original version of cumulative compaction, cumulative version compaction once. -// size_based policy, a optimization version of cumulative compaction, targeting the use cases requiring -// lower write amplification, trading off read amplification and space amplification. -CONF_mString(cumulative_compaction_policy, "size_based"); -CONF_Validator(cumulative_compaction_policy, [](const std::string config) -> bool { - return config == "size_based" || config == "num_based"; -}); - // In size_based policy, output rowset of cumulative compaction total disk size exceed this config size, // this rowset will be given to base compaction, unit is m byte. CONF_mInt64(cumulative_size_based_promotion_size_mbytes, "1024"); diff --git a/be/src/http/action/compaction_action.cpp b/be/src/http/action/compaction_action.cpp index 101e193797c15e..ea51c81e0492a6 100644 --- a/be/src/http/action/compaction_action.cpp +++ b/be/src/http/action/compaction_action.cpp @@ -183,13 +183,10 @@ Status CompactionAction::_execute_compaction_callback(TabletSharedPtr tablet, timer.start(); std::shared_ptr cumulative_compaction_policy = - _create_cumulative_compaction_policy(); - if (tablet->get_cumulative_compaction_policy() == nullptr || - tablet->get_cumulative_compaction_policy()->name() != - cumulative_compaction_policy->name()) { + CumulativeCompactionPolicyFactory::create_cumulative_compaction_policy(); + if (tablet->get_cumulative_compaction_policy() == nullptr) { tablet->set_cumulative_compaction_policy(cumulative_compaction_policy); } - Status res = Status::OK(); if (compaction_type == PARAM_COMPACTION_BASE) { BaseCompaction base_compaction(tablet); @@ -257,20 +254,4 @@ void CompactionAction::handle(HttpRequest* req) { } } -std::shared_ptr -CompactionAction::_create_cumulative_compaction_policy() { - std::string current_policy; - { - std::lock_guard lock(*config::get_mutable_string_config_lock()); - current_policy = config::cumulative_compaction_policy; - } - boost::to_upper(current_policy); - - if (current_policy == CUMULATIVE_SIZE_BASED_POLICY) { - // check size_based cumulative compaction config - StorageEngine::instance()->check_cumulative_compaction_config(); - } - - return CumulativeCompactionPolicyFactory::create_cumulative_compaction_policy(current_policy); -} } // end namespace doris diff --git a/be/src/http/action/compaction_action.h b/be/src/http/action/compaction_action.h index b2659487594cd9..6def386a443050 100644 --- a/be/src/http/action/compaction_action.h +++ b/be/src/http/action/compaction_action.h @@ -59,8 +59,6 @@ class CompactionAction : public HttpHandler { /// check param and fetch tablet_id from req Status _check_param(HttpRequest* req, uint64_t* tablet_id); - std::shared_ptr _create_cumulative_compaction_policy(); - private: CompactionActionType _type; }; diff --git a/be/src/olap/cumulative_compaction_policy.cpp b/be/src/olap/cumulative_compaction_policy.cpp index 874ce60ddf90b7..599fad92be6d67 100644 --- a/be/src/olap/cumulative_compaction_policy.cpp +++ b/be/src/olap/cumulative_compaction_policy.cpp @@ -345,121 +345,6 @@ int SizeBasedCumulativeCompactionPolicy::_level_size(const int64_t size) { return 0; } -void NumBasedCumulativeCompactionPolicy::update_cumulative_point( - Tablet* tablet, const std::vector& input_rowsets, - RowsetSharedPtr _output_rowset, Version& last_delete_version) { - // use the version after end version of the last input rowsets to update cumulative point - int64_t cumulative_point = input_rowsets.back()->end_version() + 1; - tablet->set_cumulative_layer_point(cumulative_point); -} - -int NumBasedCumulativeCompactionPolicy::pick_input_rowsets( - Tablet* tablet, const std::vector& candidate_rowsets, - const int64_t max_compaction_score, const int64_t min_compaction_score, - std::vector* input_rowsets, Version* last_delete_version, - size_t* compaction_score) { - *compaction_score = 0; - int transient_size = 0; - for (size_t i = 0; i < candidate_rowsets.size(); ++i) { - RowsetSharedPtr rowset = candidate_rowsets[i]; - // check whether this rowset is delete version - if (tablet->version_for_delete_predicate(rowset->version())) { - *last_delete_version = rowset->version(); - if (!input_rowsets->empty()) { - // we meet a delete version, and there were other versions before. - // we should compact those version before handling them over to base compaction - break; - } else { - // we meet a delete version, and no other versions before, skip it and continue - input_rowsets->clear(); - transient_size = 0; - *compaction_score = 0; - continue; - } - } - if (*compaction_score >= max_compaction_score) { - // got enough segments - break; - } - *compaction_score += rowset->rowset_meta()->get_compaction_score(); - input_rowsets->push_back(rowset); - transient_size += 1; - } - - if (input_rowsets->empty()) { - return transient_size; - } - - // if we have a sufficient number of segments, - // or have other versions before encountering the delete version, we should process the compaction. - if (last_delete_version->first == -1 && *compaction_score < min_compaction_score) { - input_rowsets->clear(); - } - return transient_size; -} - -void NumBasedCumulativeCompactionPolicy::calc_cumulative_compaction_score( - TabletState state, const std::vector& all_rowsets, - const int64_t current_cumulative_point, uint32_t* score) { - const int64_t point = current_cumulative_point; - for (auto& rs_meta : all_rowsets) { - if (rs_meta->start_version() < point) { - // all_rs_metas() is not sorted, so we use _continue_ other than _break_ here. - continue; - } - *score += rs_meta->get_compaction_score(); - } -} - -void NumBasedCumulativeCompactionPolicy::calculate_cumulative_point( - Tablet* tablet, const std::vector& all_metas, - int64_t current_cumulative_point, int64_t* ret_cumulative_point) { - *ret_cumulative_point = Tablet::K_INVALID_CUMULATIVE_POINT; - if (current_cumulative_point != Tablet::K_INVALID_CUMULATIVE_POINT) { - // only calculate the point once. - // after that, cumulative point will be updated along with compaction process. - return; - } - - std::list existing_rss; - for (auto& rs : all_metas) { - existing_rss.emplace_back(rs); - } - - // sort the existing rowsets by version in ascending order - existing_rss.sort([](const RowsetMetaSharedPtr& a, const RowsetMetaSharedPtr& b) { - // simple because 2 versions are certainly not overlapping - return a->version().first < b->version().first; - }); - - if (tablet->tablet_state() == TABLET_RUNNING) { - int64_t prev_version = -1; - for (const RowsetMetaSharedPtr& rs : existing_rss) { - if (rs->version().first > prev_version + 1) { - // There is a hole, do not continue - break; - } - // break the loop if segments in this rowset is overlapping, or is a singleton. - if (rs->is_segments_overlapping() || rs->is_singleton_delta()) { - *ret_cumulative_point = rs->version().first; - break; - } - - prev_version = rs->version().second; - *ret_cumulative_point = prev_version + 1; - } - } else if (tablet->tablet_state() == TABLET_NOTREADY) { - // tablet under alter process - // we choose version next to the base version as cumulative point - for (const RowsetMetaSharedPtr& rs : existing_rss) { - if (rs->version().first > 0) { - *ret_cumulative_point = rs->version().first; - break; - } - } - } -} - void CumulativeCompactionPolicy::pick_candidate_rowsets( const std::unordered_map& rs_version_map, int64_t cumulative_point, std::vector* candidate_rowsets) { @@ -472,31 +357,8 @@ void CumulativeCompactionPolicy::pick_candidate_rowsets( } std::shared_ptr -CumulativeCompactionPolicyFactory::create_cumulative_compaction_policy(std::string type) { - CompactionPolicy policy_type; - _parse_cumulative_compaction_policy(type, &policy_type); - - if (policy_type == NUM_BASED_POLICY) { - return std::unique_ptr( - new NumBasedCumulativeCompactionPolicy()); - } else if (policy_type == SIZE_BASED_POLICY) { - return std::unique_ptr( - new SizeBasedCumulativeCompactionPolicy()); - } - - return std::shared_ptr(new NumBasedCumulativeCompactionPolicy()); +CumulativeCompactionPolicyFactory::create_cumulative_compaction_policy() { + return std::unique_ptr(new SizeBasedCumulativeCompactionPolicy()); } -void CumulativeCompactionPolicyFactory::_parse_cumulative_compaction_policy( - std::string type, CompactionPolicy* policy_type) { - if (type == CUMULATIVE_NUM_BASED_POLICY) { - *policy_type = NUM_BASED_POLICY; - } else if (type == CUMULATIVE_SIZE_BASED_POLICY) { - *policy_type = SIZE_BASED_POLICY; - } else { - LOG(WARNING) << "parse cumulative compaction policy error " << type << ", default use " - << CUMULATIVE_NUM_BASED_POLICY; - *policy_type = NUM_BASED_POLICY; - } -} } // namespace doris diff --git a/be/src/olap/cumulative_compaction_policy.h b/be/src/olap/cumulative_compaction_policy.h index 079155c6b36eca..4abdd3497a3bcb 100644 --- a/be/src/olap/cumulative_compaction_policy.h +++ b/be/src/olap/cumulative_compaction_policy.h @@ -29,17 +29,8 @@ namespace doris { class Tablet; -/// This CompactionPolicy enum is used to represent the type of compaction policy. -/// Now it has two values, NUM_BASED_POLICY and SIZE_BASED_POLICY. -/// NUM_BASED_POLICY means current compaction policy implemented by num based policy. -/// SIZE_BASED_POLICY means current compaction policy implemented by size_based policy. -enum CompactionPolicy { - NUM_BASED_POLICY = 0, - SIZE_BASED_POLICY = 1, -}; - -const static std::string CUMULATIVE_NUM_BASED_POLICY = "NUM_BASED"; const static std::string CUMULATIVE_SIZE_BASED_POLICY = "SIZE_BASED"; + /// This class CumulativeCompactionPolicy is the base class of cumulative compaction policy. /// It defines the policy to do cumulative compaction. It has different derived classes, which implements /// concrete cumulative compaction algorithm. The policy is configured by conf::cumulative_compaction_policy. @@ -115,52 +106,6 @@ class CumulativeCompactionPolicy { virtual std::string name() = 0; }; -/// Num based cumulative compaction policy implemention. Num based policy which derives CumulativeCompactionPolicy is early -/// basic algorithm. This policy uses linear structure to compact rowsets. The cumulative rowsets compact only once and -/// then the output will do base compaction. It can make segments of rowsets in order and compact small rowsets to a bigger one. -class NumBasedCumulativeCompactionPolicy final : public CumulativeCompactionPolicy { -public: - /// Constructor function of NumBasedCumulativeCompactionPolicy, - /// it needs tablet pointer to access tablet method. - /// param tablet, the shared pointer of tablet - NumBasedCumulativeCompactionPolicy() : CumulativeCompactionPolicy() {} - - /// Destructor function of NumBasedCumulativeCompactionPolicy. - ~NumBasedCumulativeCompactionPolicy() {} - - /// Num based cumulative compaction policy implements pick input rowsets function. - /// Its main policy is picking rowsets from candidate rowsets by comparing accumulative compaction_score and - /// max_cumulative_compaction_num_singleton_deltas or checking whether there is delete version rowset. - int pick_input_rowsets(Tablet* tablet, const std::vector& candidate_rowsets, - const int64_t max_compaction_score, const int64_t min_compaction_score, - std::vector* input_rowsets, - Version* last_delete_version, size_t* compaction_score) override; - - /// Num based cumulative compaction policy implements update cumulative point function. - /// Its main policy is using the last input version to update the cumulative point. It aims that every rowsets only - /// do compact once. - void update_cumulative_point(Tablet* tablet, const std::vector& input_rowsets, - RowsetSharedPtr _output_rowset, - Version& last_delete_version) override; - - /// Num based cumulative compaction policy implements calculate cumulative point function. - /// When the first time the tablet does compact, this calculation is executed. Its main policy is to find first rowset - /// which is segments_overlapping type, it represent this rowset is not compacted and use this version as cumulative point. - void calculate_cumulative_point(Tablet* tablet, - const std::vector& all_rowsets, - int64_t current_cumulative_point, - int64_t* cumulative_point) override; - - /// Num based cumulative compaction policy implements calc cumulative compaction score function. - /// Its main policy is calculating the accumulative compaction score after current cumulative_point in tablet. - void calc_cumulative_compaction_score(TabletState state, - const std::vector& all_rowsets, - int64_t current_cumulative_point, - uint32_t* score) override; - - std::string name() override { return CUMULATIVE_NUM_BASED_POLICY; } -}; - /// SizeBased cumulative compaction policy implemention. SizeBased policy which derives CumulativeCompactionPolicy is a optimized /// version of num based cumulative compaction policy. This policy also uses linear structure to compact rowsets. The cumulative rowsets /// can do compaction when they are in same level size. And when output rowset exceeds the promotion radio of base size or min promotion @@ -248,13 +193,7 @@ class CumulativeCompactionPolicyFactory { public: /// Static factory function. It can product different policy according to the `policy` parameter and use tablet ptr /// to construct the policy. Now it can product size based and num based policies. - static std::shared_ptr create_cumulative_compaction_policy( - std::string policy); - -private: - /// It is a static function to help to check the policy config and convert to CompactionPolicy enum variable - static void _parse_cumulative_compaction_policy(std::string policy, - CompactionPolicy* policy_type); + static std::shared_ptr create_cumulative_compaction_policy(); }; } // namespace doris diff --git a/be/src/olap/olap_server.cpp b/be/src/olap/olap_server.cpp index 7a66e49d6fd91d..845031c229393a 100644 --- a/be/src/olap/olap_server.cpp +++ b/be/src/olap/olap_server.cpp @@ -479,7 +479,6 @@ void StorageEngine::_compaction_tasks_producer_callback() { std::vector StorageEngine::_generate_compaction_tasks( CompactionType compaction_type, std::vector& data_dirs, bool check_score) { _update_cumulative_compaction_policy(); - std::vector tablets_compaction; uint32_t max_compaction_score = 0; @@ -563,21 +562,9 @@ std::vector StorageEngine::_generate_compaction_tasks( } void StorageEngine::_update_cumulative_compaction_policy() { - std::string current_policy = ""; - { - std::lock_guard lock(*config::get_mutable_string_config_lock()); - current_policy = config::cumulative_compaction_policy; - } - boost::to_upper(current_policy); - if (_cumulative_compaction_policy == nullptr || - _cumulative_compaction_policy->name() != current_policy) { - if (current_policy == CUMULATIVE_SIZE_BASED_POLICY) { - // check size_based cumulative compaction config - check_cumulative_compaction_config(); - } + if (_cumulative_compaction_policy == nullptr) { _cumulative_compaction_policy = - CumulativeCompactionPolicyFactory::create_cumulative_compaction_policy( - current_policy); + CumulativeCompactionPolicyFactory::create_cumulative_compaction_policy(); } } @@ -673,9 +660,7 @@ Status StorageEngine::_submit_compaction_task(TabletSharedPtr tablet, Status StorageEngine::submit_compaction_task(TabletSharedPtr tablet, CompactionType compaction_type) { _update_cumulative_compaction_policy(); - if (tablet->get_cumulative_compaction_policy() == nullptr || - tablet->get_cumulative_compaction_policy()->name() != - _cumulative_compaction_policy->name()) { + if (tablet->get_cumulative_compaction_policy() == nullptr) { tablet->set_cumulative_compaction_policy(_cumulative_compaction_policy); } return _submit_compaction_task(tablet, compaction_type); diff --git a/be/src/olap/storage_engine.h b/be/src/olap/storage_engine.h index 198aba8aafd2e4..7d0dad4f6e2317 100644 --- a/be/src/olap/storage_engine.h +++ b/be/src/olap/storage_engine.h @@ -258,7 +258,6 @@ class StorageEngine { std::vector _generate_compaction_tasks(CompactionType compaction_type, std::vector& data_dirs, bool check_score); - void _update_cumulative_compaction_policy(); bool _push_tablet_into_submitted_compaction(TabletSharedPtr tablet, diff --git a/be/src/olap/tablet.cpp b/be/src/olap/tablet.cpp index 1cb03ee2060286..8ca35e89c4d9a3 100644 --- a/be/src/olap/tablet.cpp +++ b/be/src/olap/tablet.cpp @@ -125,8 +125,7 @@ Status Tablet::_init_once_action() { #ifdef BE_TEST // init cumulative compaction policy by type _cumulative_compaction_policy = - CumulativeCompactionPolicyFactory::create_cumulative_compaction_policy( - _cumulative_compaction_type); + CumulativeCompactionPolicyFactory::create_cumulative_compaction_policy(); #endif RowsetVector rowset_vec; diff --git a/be/test/olap/cumulative_compaction_policy_test.cpp b/be/test/olap/cumulative_compaction_policy_test.cpp index 1c25041dd87347..e0425c36b8933b 100644 --- a/be/test/olap/cumulative_compaction_policy_test.cpp +++ b/be/test/olap/cumulative_compaction_policy_test.cpp @@ -27,241 +27,6 @@ namespace doris { -class TestNumBasedCumulativeCompactionPolicy : public testing::Test { -public: - TestNumBasedCumulativeCompactionPolicy() {} - void SetUp() { - _tablet_meta = static_cast(new TabletMeta( - 1, 2, 15673, 15674, 4, 5, TTabletSchema(), 6, {{7, 8}}, UniqueId(9, 10), - TTabletType::TABLET_TYPE_DISK, TCompressionType::LZ4F)); - - _json_rowset_meta = R"({ - "rowset_id": 540081, - "tablet_id": 15673, - "txn_id": 4042, - "tablet_schema_hash": 567997577, - "rowset_type": "BETA_ROWSET", - "rowset_state": "VISIBLE", - "start_version": 2, - "end_version": 2, - "num_rows": 3929, - "total_disk_size": 84699, - "data_disk_size": 84464, - "index_disk_size": 235, - "empty": false, - "load_id": { - "hi": -5350970832824939812, - "lo": -6717994719194512122 - }, - "creation_time": 1553765670, - "num_segments": 3 - })"; - } - void TearDown() {} - - void init_rs_meta(RowsetMetaSharedPtr& pb1, int64_t start, int64_t end) { - pb1->init_from_json(_json_rowset_meta); - pb1->set_start_version(start); - pb1->set_end_version(end); - pb1->set_creation_time(10000); - pb1->set_tablet_schema(_tablet_meta->tablet_schema()); - } - - void init_all_rs_meta(std::vector* rs_metas) { - RowsetMetaSharedPtr ptr1(new RowsetMeta()); - init_rs_meta(ptr1, 0, 0); - rs_metas->push_back(ptr1); - - RowsetMetaSharedPtr ptr2(new RowsetMeta()); - init_rs_meta(ptr2, 1, 1); - rs_metas->push_back(ptr2); - - RowsetMetaSharedPtr ptr3(new RowsetMeta()); - init_rs_meta(ptr3, 2, 2); - rs_metas->push_back(ptr3); - - RowsetMetaSharedPtr ptr4(new RowsetMeta()); - init_rs_meta(ptr4, 3, 3); - rs_metas->push_back(ptr4); - - RowsetMetaSharedPtr ptr5(new RowsetMeta()); - init_rs_meta(ptr5, 4, 4); - rs_metas->push_back(ptr5); - } - - void init_all_rs_meta_cal_point(std::vector* rs_metas) { - RowsetMetaSharedPtr ptr1(new RowsetMeta()); - init_rs_meta(ptr1, 0, 1); - ptr1->set_segments_overlap(NONOVERLAPPING); - rs_metas->push_back(ptr1); - - RowsetMetaSharedPtr ptr2(new RowsetMeta()); - init_rs_meta(ptr2, 2, 3); - ptr2->set_segments_overlap(NONOVERLAPPING); - rs_metas->push_back(ptr2); - - RowsetMetaSharedPtr ptr3(new RowsetMeta()); - init_rs_meta(ptr3, 4, 4); - ptr3->set_segments_overlap(OVERLAPPING); - rs_metas->push_back(ptr3); - - RowsetMetaSharedPtr ptr4(new RowsetMeta()); - init_rs_meta(ptr4, 5, 5); - ptr4->set_segments_overlap(OVERLAPPING); - rs_metas->push_back(ptr4); - } - - void init_all_rs_meta_delete(std::vector* rs_metas) { - RowsetMetaSharedPtr ptr1(new RowsetMeta()); - init_rs_meta(ptr1, 0, 1); - ptr1->set_segments_overlap(NONOVERLAPPING); - rs_metas->push_back(ptr1); - - RowsetMetaSharedPtr ptr2(new RowsetMeta()); - init_rs_meta(ptr2, 2, 3); - ptr2->set_segments_overlap(NONOVERLAPPING); - rs_metas->push_back(ptr2); - - RowsetMetaSharedPtr ptr3(new RowsetMeta()); - init_rs_meta(ptr3, 4, 4); - ptr3->set_segments_overlap(OVERLAPPING); - rs_metas->push_back(ptr3); - - RowsetMetaSharedPtr ptr4(new RowsetMeta()); - init_rs_meta(ptr4, 5, 5); - DeletePredicatePB del; - del.add_sub_predicates("a = 1"); - del.set_version(5); - ptr4->set_delete_predicate(del); - ptr4->set_segments_overlap(OVERLAP_UNKNOWN); - rs_metas->push_back(ptr4); - - RowsetMetaSharedPtr ptr5(new RowsetMeta()); - init_rs_meta(ptr5, 6, 6); - ptr5->set_segments_overlap(OVERLAPPING); - rs_metas->push_back(ptr5); - } - -protected: - std::string _json_rowset_meta; - TabletMetaSharedPtr _tablet_meta; -}; - -TEST_F(TestNumBasedCumulativeCompactionPolicy, calc_cumulative_compaction_score) { - std::vector rs_metas; - init_all_rs_meta(&rs_metas); - - for (auto& rowset : rs_metas) { - _tablet_meta->add_rs_meta(rowset); - } - - TabletSharedPtr _tablet(new Tablet(_tablet_meta, nullptr, CUMULATIVE_NUM_BASED_POLICY)); - _tablet->init(); - std::shared_ptr cumulative_compaction_policy = - CumulativeCompactionPolicyFactory::create_cumulative_compaction_policy( - CUMULATIVE_NUM_BASED_POLICY); - - const uint32_t score = _tablet->calc_compaction_score(CompactionType::CUMULATIVE_COMPACTION, - cumulative_compaction_policy); - - EXPECT_EQ(15, score); -} - -TEST_F(TestNumBasedCumulativeCompactionPolicy, calculate_cumulative_point) { - std::vector rs_metas; - init_all_rs_meta_cal_point(&rs_metas); - - for (auto& rowset : rs_metas) { - _tablet_meta->add_rs_meta(rowset); - } - - TabletSharedPtr _tablet(new Tablet(_tablet_meta, nullptr, CUMULATIVE_NUM_BASED_POLICY)); - _tablet->init(); - _tablet->calculate_cumulative_point(); - - EXPECT_EQ(4, _tablet->cumulative_layer_point()); -} - -TEST_F(TestNumBasedCumulativeCompactionPolicy, pick_candidate_rowsets) { - std::vector rs_metas; - init_all_rs_meta_cal_point(&rs_metas); - - for (auto& rowset : rs_metas) { - _tablet_meta->add_rs_meta(rowset); - } - - TabletSharedPtr _tablet(new Tablet(_tablet_meta, nullptr, CUMULATIVE_NUM_BASED_POLICY)); - _tablet->init(); - _tablet->calculate_cumulative_point(); - - std::vector candidate_rowsets; - std::shared_lock rdlock(_tablet->get_header_lock()); - _tablet->pick_candidate_rowsets_to_cumulative_compaction(&candidate_rowsets, rdlock); - - EXPECT_EQ(2, candidate_rowsets.size()); -} - -TEST_F(TestNumBasedCumulativeCompactionPolicy, pick_input_rowsets_normal) { - std::vector rs_metas; - init_all_rs_meta_cal_point(&rs_metas); - - for (auto& rowset : rs_metas) { - _tablet_meta->add_rs_meta(rowset); - } - - TabletSharedPtr _tablet(new Tablet(_tablet_meta, nullptr, CUMULATIVE_NUM_BASED_POLICY)); - _tablet->init(); - _tablet->calculate_cumulative_point(); - - NumBasedCumulativeCompactionPolicy policy; - std::vector candidate_rowsets; - - std::shared_lock rdlock(_tablet->get_header_lock()); - _tablet->pick_candidate_rowsets_to_cumulative_compaction(&candidate_rowsets, rdlock); - - std::vector input_rowsets; - Version last_delete_version {-1, -1}; - size_t compaction_score = 0; - policy.pick_input_rowsets(_tablet.get(), candidate_rowsets, 10, 5, &input_rowsets, - &last_delete_version, &compaction_score); - - EXPECT_EQ(2, input_rowsets.size()); - EXPECT_EQ(6, compaction_score); - EXPECT_EQ(-1, last_delete_version.first); - EXPECT_EQ(-1, last_delete_version.second); -} - -TEST_F(TestNumBasedCumulativeCompactionPolicy, pick_input_rowsets_delete) { - std::vector rs_metas; - init_all_rs_meta_delete(&rs_metas); - - for (auto& rowset : rs_metas) { - _tablet_meta->add_rs_meta(rowset); - } - - TabletSharedPtr _tablet(new Tablet(_tablet_meta, nullptr, CUMULATIVE_NUM_BASED_POLICY)); - _tablet->init(); - _tablet->calculate_cumulative_point(); - - NumBasedCumulativeCompactionPolicy policy; - std::vector candidate_rowsets; - - std::shared_lock rdlock(_tablet->get_header_lock()); - _tablet->pick_candidate_rowsets_to_cumulative_compaction(&candidate_rowsets, rdlock); - - std::vector input_rowsets; - Version last_delete_version {-1, -1}; - size_t compaction_score = 0; - - policy.pick_input_rowsets(_tablet.get(), candidate_rowsets, 10, 5, &input_rowsets, - &last_delete_version, &compaction_score); - - EXPECT_EQ(1, input_rowsets.size()); - EXPECT_EQ(3, compaction_score); - EXPECT_EQ(5, last_delete_version.first); - EXPECT_EQ(5, last_delete_version.second); -} - class TestSizeBasedCumulativeCompactionPolicy : public testing::Test { public: TestSizeBasedCumulativeCompactionPolicy() {} @@ -567,8 +332,7 @@ TEST_F(TestSizeBasedCumulativeCompactionPolicy, calc_cumulative_compaction_score _tablet->calculate_cumulative_point(); std::shared_ptr cumulative_compaction_policy = - CumulativeCompactionPolicyFactory::create_cumulative_compaction_policy( - CUMULATIVE_SIZE_BASED_POLICY); + CumulativeCompactionPolicyFactory::create_cumulative_compaction_policy(); const uint32_t score = _tablet->calc_compaction_score(CompactionType::CUMULATIVE_COMPACTION, cumulative_compaction_policy); @@ -587,8 +351,7 @@ TEST_F(TestSizeBasedCumulativeCompactionPolicy, calc_cumulative_compaction_score _tablet->init(); _tablet->calculate_cumulative_point(); std::shared_ptr cumulative_compaction_policy = - CumulativeCompactionPolicyFactory::create_cumulative_compaction_policy( - CUMULATIVE_SIZE_BASED_POLICY); + CumulativeCompactionPolicyFactory::create_cumulative_compaction_policy(); const uint32_t score = _tablet->calc_compaction_score(CompactionType::CUMULATIVE_COMPACTION, cumulative_compaction_policy); diff --git a/docs/en/docs/admin-manual/config/be-config.md b/docs/en/docs/admin-manual/config/be-config.md index 687e527e097810..db36e24b8aa752 100644 --- a/docs/en/docs/admin-manual/config/be-config.md +++ b/docs/en/docs/admin-manual/config/be-config.md @@ -351,14 +351,6 @@ Similar to `base_compaction_trace_threshold`. If set to true, the `cumulative_compaction_trace_threshold` and `base_compaction_trace_threshold` won't work and log is disabled. -### `cumulative_compaction_policy` - -* Type: string -* Description: Configure the merge policy of the cumulative compaction stage. Currently, two merge policy have been implemented, num_based and size_based. -* Default value: size_based - -In detail, ordinary is the initial version of the cumulative compaction merge policy. After a cumulative compaction, the base compaction process is directly performed. The size_based policy is an optimized version of the ordinary strategy. Versions are merged only when the disk volume of the rowset is of the same order of magnitude. After the compaction, the output rowset which satisfies the conditions is promoted to the base compaction stage. In the case of a large number of small batch imports: reduce the write magnification of base compact, trade-off between read magnification and space magnification, and reducing file version data. - ### `cumulative_size_based_promotion_size_mbytes` * Type: int64 diff --git a/docs/en/docs/admin-manual/http-actions/compaction-action.md b/docs/en/docs/admin-manual/http-actions/compaction-action.md index f753cea2388ccb..40a7e6b867efb1 100644 --- a/docs/en/docs/admin-manual/http-actions/compaction-action.md +++ b/docs/en/docs/admin-manual/http-actions/compaction-action.md @@ -72,7 +72,7 @@ If the tablet exists, the result is returned in JSON format: ``` { - "cumulative policy type": "NUM_BASED", + "cumulative policy type": "SIZE_BASED", "cumulative point": 50, "last cumulative failure time": "2019-12-16 18:13:43.224", "last base failure time": "2019-12-16 18:13:23.320", diff --git a/docs/zh-CN/docs/admin-manual/config/be-config.md b/docs/zh-CN/docs/admin-manual/config/be-config.md index 8a1557d9b1c66a..b6816b4c0e3fc5 100644 --- a/docs/zh-CN/docs/admin-manual/config/be-config.md +++ b/docs/zh-CN/docs/admin-manual/config/be-config.md @@ -347,14 +347,6 @@ BaseCompaction触发条件之一:Singleton文件大小限制,100MB 如果设置为true,`cumulative_compaction_trace_threshold` 和 `base_compaction_trace_threshold` 将不起作用。并且trace日志将关闭。 -### `cumulative_compaction_policy` - -* 类型:string -* 描述:配置 cumulative compaction 阶段的合并策略,目前实现了两种合并策略,num_based和size_based -* 默认值:size_based - -详细说明,ordinary,是最初版本的cumulative compaction合并策略,做一次cumulative compaction之后直接base compaction流程。size_based,通用策略是ordinary策略的优化版本,仅当rowset的磁盘体积在相同数量级时才进行版本合并。合并之后满足条件的rowset进行晋升到base compaction阶段。能够做到在大量小批量导入的情况下:降低base compact的写入放大率,并在读取放大率和空间放大率之间进行权衡,同时减少了文件版本的数据。 - ### `cumulative_size_based_promotion_size_mbytes` * 类型:int64 diff --git a/docs/zh-CN/docs/admin-manual/http-actions/compaction-action.md b/docs/zh-CN/docs/admin-manual/http-actions/compaction-action.md index 0698069e166874..432fa2fc2a2e63 100644 --- a/docs/zh-CN/docs/admin-manual/http-actions/compaction-action.md +++ b/docs/zh-CN/docs/admin-manual/http-actions/compaction-action.md @@ -72,7 +72,7 @@ curl -X GET http://be_host:webserver_port/api/compaction/show?tablet_id=xxxx ``` { - "cumulative policy type": "NUM_BASED", + "cumulative policy type": "SIZE_BASED", "cumulative point": 50, "last cumulative failure time": "2019-12-16 18:13:43.224", "last base failure time": "2019-12-16 18:13:23.320",