-
Notifications
You must be signed in to change notification settings - Fork 3.7k
[enhancement](compaction) opt compaction task producer and quick compaction #13495
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -253,59 +253,41 @@ CONF_Bool(enable_vectorized_compaction, "true"); | |
| // whether enable vectorized schema change/material-view/rollup task. | ||
| CONF_Bool(enable_vectorized_alter_table, "true"); | ||
|
|
||
| // check the configuration of auto compaction in seconds when auto compaction disabled | ||
| CONF_mInt32(check_auto_compaction_interval_seconds, "5"); | ||
| // This config can be set to limit thread number in compaction thread pool. | ||
| CONF_mInt32(max_base_compaction_threads, "4"); | ||
| CONF_mInt32(max_cumu_compaction_threads, "10"); | ||
|
|
||
| CONF_mInt64(base_compaction_num_cumulative_deltas, "5"); | ||
| CONF_mDouble(base_cumulative_delta_ratio, "0.3"); | ||
| CONF_mInt64(base_compaction_interval_seconds_since_last_operation, "86400"); | ||
| CONF_mInt32(base_compaction_write_mbytes_per_sec, "5"); | ||
| CONF_Bool(enable_base_compaction_idle_sched, "true"); | ||
|
|
||
| // dup key not compaction big files | ||
| CONF_Bool(enable_dup_key_base_compaction_skip_big_file, "true"); | ||
| CONF_mInt64(base_compaction_min_rowset_num, "5"); | ||
| CONF_mDouble(base_compaction_min_data_ratio, "0.3"); | ||
| CONF_mInt64(base_compaction_dup_key_max_file_size_mbytes, "1024"); | ||
|
|
||
| // In size_based policy, output rowset of cumulative compaction total disk size exceed this config size, | ||
| // output rowset of cumulative compaction total disk size exceed this config size, | ||
| // this rowset will be given to base compaction, unit is m byte. | ||
| CONF_mInt64(cumulative_size_based_promotion_size_mbytes, "1024"); | ||
| CONF_mInt64(compaction_promotion_size_mbytes, "1024"); | ||
|
|
||
| // In size_based policy, output rowset of cumulative compaction total disk size exceed this config ratio of | ||
| // output rowset of cumulative compaction total disk size exceed this config ratio of | ||
| // base rowset's total disk size, this rowset will be given to base compaction. The value must be between | ||
| // 0 and 1. | ||
| CONF_mDouble(cumulative_size_based_promotion_ratio, "0.05"); | ||
| CONF_mDouble(compaction_promotion_ratio, "0.05"); | ||
|
|
||
| // In size_based policy, the smallest size of rowset promotion. When the rowset is less than this config, this | ||
| // the smallest size of rowset promotion. When the rowset is less than this config, this | ||
| // rowset will be not given to base compaction. The unit is m byte. | ||
| CONF_mInt64(cumulative_size_based_promotion_min_size_mbytes, "64"); | ||
| CONF_mInt64(compaction_promotion_min_size_mbytes, "64"); | ||
|
|
||
| // The lower bound size to do cumulative compaction. When total disk size of candidate rowsets is less than | ||
| // this size, size_based policy may not do to cumulative compaction. The unit is m byte. | ||
| CONF_mInt64(cumulative_size_based_compaction_lower_size_mbytes, "64"); | ||
| CONF_mInt64(compaction_min_size_mbytes, "64"); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If it is used for cu, we'd better use cumulative_compaction_min_size_mbytes.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. And the config is strange, if we insert small data each, e.g. 1row, then all rowset would not be compacted until total size reached 64MB, however the version num is limited by 2000? |
||
|
|
||
| // cumulative compaction policy: min and max delta file's number | ||
| CONF_mInt64(min_cumulative_compaction_num_singleton_deltas, "5"); | ||
| CONF_mInt64(max_cumulative_compaction_num_singleton_deltas, "1000"); | ||
|
|
||
| // if compaction of a tablet failed, this tablet should not be chosen to | ||
| // compaction until this interval passes. | ||
| CONF_mInt64(min_compaction_failure_interval_sec, "5"); // 5 seconds | ||
|
|
||
| // This config can be set to limit thread number in compaction thread pool. | ||
| CONF_mInt32(max_base_compaction_threads, "4"); | ||
| CONF_mInt32(max_cumu_compaction_threads, "10"); | ||
|
|
||
| // This config can be set to limit thread number in smallcompaction thread pool. | ||
| CONF_mInt32(quick_compaction_max_threads, "10"); | ||
|
|
||
| // Thread count to do tablet meta checkpoint, -1 means use the data directories count. | ||
| CONF_Int32(max_meta_checkpoint_threads, "-1"); | ||
| CONF_mInt64(cumulative_compaction_min_deltas, "5"); | ||
| CONF_mInt64(cumulative_compaction_max_deltas, "1000"); | ||
|
|
||
|
Comment on lines
+284
to
285
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we rename deltas to segments? |
||
| // The upper limit of "permits" held by all compaction tasks. This config can be set to limit memory consumption for compaction. | ||
| CONF_mInt64(total_permits_for_compaction_score, "10000"); | ||
|
|
||
| // sleep interval in ms after generated compaction tasks | ||
| CONF_mInt32(generate_compaction_tasks_min_interval_ms, "10"); | ||
| CONF_mInt32(generate_compaction_tasks_interval_ms, "10"); | ||
|
|
||
| // Compaction task number per disk. | ||
| // Must be greater than 2, because Base compaction and Cumulative compaction have at least one thread each. | ||
|
|
@@ -319,23 +301,17 @@ CONF_Validator(compaction_task_num_per_fast_disk, | |
| // How many rounds of cumulative compaction for each round of base compaction when compaction tasks generation. | ||
| CONF_mInt32(cumulative_compaction_rounds_for_each_base_compaction_round, "9"); | ||
|
|
||
| // Merge log will be printed for each "row_step_for_compaction_merge_log" rows merged during compaction | ||
| CONF_mInt64(row_step_for_compaction_merge_log, "0"); | ||
|
|
||
| // Threshold to logging compaction trace, in seconds. | ||
| CONF_mInt32(base_compaction_trace_threshold, "60"); | ||
| CONF_mInt32(cumulative_compaction_trace_threshold, "10"); | ||
| CONF_mBool(disable_compaction_trace_log, "true"); | ||
|
|
||
| // Thread count to do tablet meta checkpoint, -1 means use the data directories count. | ||
| CONF_Int32(max_meta_checkpoint_threads, "-1"); | ||
|
|
||
| // Threshold to logging agent task trace, in seconds. | ||
| CONF_mInt32(agent_task_trace_threshold_sec, "2"); | ||
|
|
||
| // time interval to record tablet scan count in second for the purpose of calculating tablet scan frequency | ||
| CONF_mInt64(tablet_scan_frequency_time_node_interval_second, "300"); | ||
| // coefficient for tablet scan frequency and compaction score when finding a tablet for compaction | ||
| CONF_mInt32(compaction_tablet_scan_frequency_factor, "0"); | ||
| CONF_mInt32(compaction_tablet_compaction_score_factor, "1"); | ||
|
|
||
| // This config can be set to limit thread number in tablet migration thread pool. | ||
| CONF_Int32(min_tablet_migration_threads, "1"); | ||
| CONF_Int32(max_tablet_migration_threads, "1"); | ||
|
|
@@ -814,15 +790,6 @@ CONF_mInt32(bloom_filter_predicate_check_row_num, "20480"); | |
|
|
||
| CONF_Bool(enable_decimalv3, "false"); | ||
|
|
||
| //whether turn on quick compaction feature | ||
| CONF_Bool(enable_quick_compaction, "false"); | ||
| // For continuous versions that rows less than quick_compaction_max_rows will trigger compaction quickly | ||
| CONF_Int32(quick_compaction_max_rows, "1000"); | ||
| // min compaction versions | ||
| CONF_Int32(quick_compaction_batch_size, "10"); | ||
| // do compaction min rowsets | ||
| CONF_Int32(quick_compaction_min_rowsets, "10"); | ||
|
|
||
| // cooldown task configs | ||
| CONF_Int32(cooldown_thread_num, "5"); | ||
| CONF_mInt64(generate_cooldown_task_interval_sec, "20"); | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -117,10 +117,9 @@ Status CumulativeCompaction::pick_rowsets_to_compact() { | |
|
|
||
| size_t compaction_score = 0; | ||
| int transient_size = _tablet->cumulative_compaction_policy()->pick_input_rowsets( | ||
| _tablet.get(), candidate_rowsets, | ||
| config::max_cumulative_compaction_num_singleton_deltas, | ||
| config::min_cumulative_compaction_num_singleton_deltas, &_input_rowsets, | ||
| &_last_delete_version, &compaction_score); | ||
| _tablet.get(), candidate_rowsets, config::cumulative_compaction_max_deltas, | ||
| config::cumulative_compaction_min_deltas, &_input_rowsets, &_last_delete_version, | ||
| &compaction_score); | ||
|
|
||
| // Cumulative compaction will process with at least 1 rowset. | ||
| // So when there is no rowset being chosen, we should return Status::OLAPInternalError(OLAP_ERR_CUMULATIVE_NO_SUITABLE_VERSION): | ||
|
|
@@ -143,8 +142,7 @@ Status CumulativeCompaction::pick_rowsets_to_compact() { | |
| int64_t last_cumu = _tablet->last_cumu_compaction_success_time(); | ||
| int64_t last_base = _tablet->last_base_compaction_success_time(); | ||
| if (last_cumu != 0 || last_base != 0) { | ||
| int64_t interval_threshold = | ||
| config::base_compaction_interval_seconds_since_last_operation * 1000; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we should define a const instead of using 86400 in different places. |
||
| int64_t interval_threshold = 86400 * 1000; | ||
| int64_t cumu_interval = now - last_cumu; | ||
| int64_t base_interval = now - last_base; | ||
| if (cumu_interval > interval_threshold && base_interval > interval_threshold) { | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We can clean some useless configs, but we'd better not to change the config name for compatibility consideration? @morningman pls help to review this kind of change.