From 791a9cd7acdbec81ec6b0da54c28a1fefb8bf56d Mon Sep 17 00:00:00 2001 From: weizuo Date: Fri, 13 Nov 2020 11:56:02 +0800 Subject: [PATCH] add metrics for compaction permits and log for compaction merge --- be/src/common/config.h | 3 +++ be/src/olap/compaction_permit_limiter.cpp | 4 ++++ be/src/olap/compaction_permit_limiter.h | 1 + be/src/olap/merger.cpp | 4 ++++ be/src/util/doris_metrics.cpp | 6 ++++++ be/src/util/doris_metrics.h | 5 +++++ docs/en/administrator-guide/config/be_config.md | 14 ++++++++++++++ docs/zh-CN/administrator-guide/config/be_config.md | 14 ++++++++++++++ 8 files changed, 51 insertions(+) diff --git a/be/src/common/config.h b/be/src/common/config.h index 401088d3cd596a..e1fc5357cc105d 100644 --- a/be/src/common/config.h +++ b/be/src/common/config.h @@ -324,6 +324,9 @@ namespace config { // How many rounds of cumulative compaction for each round of base compaction when compaction tasks generation. CONF_mInt32(cumulative_compaction_rounds_for_each_base_compaction_round, "9"); + // Merge log will be printed for each "row_step_for_compaction_merge_log" rows merged during compaction + CONF_mInt64(row_step_for_compaction_merge_log, "0"); + // Threshold to logging compaction trace, in seconds. CONF_mInt32(base_compaction_trace_threshold, "10"); CONF_mInt32(cumulative_compaction_trace_threshold, "2"); diff --git a/be/src/olap/compaction_permit_limiter.cpp b/be/src/olap/compaction_permit_limiter.cpp index b0a08e6679db94..b675cf89f42fd9 100644 --- a/be/src/olap/compaction_permit_limiter.cpp +++ b/be/src/olap/compaction_permit_limiter.cpp @@ -22,6 +22,7 @@ namespace doris { CompactionPermitLimiter::CompactionPermitLimiter() : _used_permits(0) {} bool CompactionPermitLimiter::request(int64_t permits) { + DorisMetrics::instance()->compaction_waitting_permits->set_value(permits); if (permits > config::total_permits_for_compaction_score) { // when tablet's compaction score is larger than "config::total_permits_for_compaction_score", // it's necessary to do compaction for this tablet because this tablet will not get "permits" @@ -40,11 +41,14 @@ bool CompactionPermitLimiter::request(int64_t permits) { } } _used_permits += permits; + DorisMetrics::instance()->compaction_waitting_permits->set_value(0); + DorisMetrics::instance()->compaction_used_permits->set_value(_used_permits); return true; } void CompactionPermitLimiter::release(int64_t permits) { _used_permits -= permits; _permits_cv.notify_one(); + DorisMetrics::instance()->compaction_used_permits->set_value(_used_permits); } } // namespace doris diff --git a/be/src/olap/compaction_permit_limiter.h b/be/src/olap/compaction_permit_limiter.h index b8a216d361ff33..316eb621291fc9 100644 --- a/be/src/olap/compaction_permit_limiter.h +++ b/be/src/olap/compaction_permit_limiter.h @@ -22,6 +22,7 @@ #include "common/config.h" #include "olap/utils.h" +#include "util/doris_metrics.h" namespace doris { diff --git a/be/src/olap/merger.cpp b/be/src/olap/merger.cpp index c8b262c515881c..eccfbe0ca40722 100644 --- a/be/src/olap/merger.cpp +++ b/be/src/olap/merger.cpp @@ -65,6 +65,10 @@ OLAPStatus Merger::merge_rowsets(TabletSharedPtr tablet, RETURN_NOT_OK_LOG(dst_rowset_writer->add_row(row_cursor), "failed to write row when merging rowsets of tablet " + tablet->full_name()); output_rows++; + LOG_IF(INFO, config::row_step_for_compaction_merge_log != 0 && output_rows % config::row_step_for_compaction_merge_log == 0) + << "Merge rowsets stay alive. " + << "tablet=" << tablet->full_name() + << ", merged rows=" << output_rows; // the memory allocate by mem pool has been copied, // so we should release memory immediately mem_pool->clear(); diff --git a/be/src/util/doris_metrics.cpp b/be/src/util/doris_metrics.cpp index 54cc6af5f6488b..52c9f5af150254 100644 --- a/be/src/util/doris_metrics.cpp +++ b/be/src/util/doris_metrics.cpp @@ -110,6 +110,9 @@ DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(process_fd_num_limit_hard, MetricUnit::NOUNIT DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(tablet_cumulative_max_compaction_score, MetricUnit::NOUNIT); DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(tablet_base_max_compaction_score, MetricUnit::NOUNIT); +DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(compaction_used_permits, MetricUnit::NOUNIT); +DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(compaction_waitting_permits, MetricUnit::NOUNIT); + DEFINE_GAUGE_CORE_METRIC_PROTOTYPE_2ARG(push_request_write_bytes_per_second, MetricUnit::BYTES); DEFINE_GAUGE_CORE_METRIC_PROTOTYPE_2ARG(query_scan_bytes_per_second, MetricUnit::BYTES); DEFINE_GAUGE_CORE_METRIC_PROTOTYPE_2ARG(max_disk_io_util_percent, MetricUnit::PERCENT); @@ -214,6 +217,9 @@ DorisMetrics::DorisMetrics() : _metric_registry(_s_registry_name) { INT_GAUGE_METRIC_REGISTER(_server_metric_entity, tablet_cumulative_max_compaction_score); INT_GAUGE_METRIC_REGISTER(_server_metric_entity, tablet_base_max_compaction_score); + INT_GAUGE_METRIC_REGISTER(_server_metric_entity, compaction_used_permits); + INT_GAUGE_METRIC_REGISTER(_server_metric_entity, compaction_waitting_permits); + INT_GAUGE_METRIC_REGISTER(_server_metric_entity, push_request_write_bytes_per_second); INT_GAUGE_METRIC_REGISTER(_server_metric_entity, query_scan_bytes_per_second); INT_GAUGE_METRIC_REGISTER(_server_metric_entity, max_disk_io_util_percent); diff --git a/be/src/util/doris_metrics.h b/be/src/util/doris_metrics.h index 47a893390869df..c7ef56c5d00361 100644 --- a/be/src/util/doris_metrics.h +++ b/be/src/util/doris_metrics.h @@ -133,6 +133,11 @@ class DorisMetrics { IntGauge* tablet_cumulative_max_compaction_score; IntGauge* tablet_base_max_compaction_score; + // permits have been used for all compaction tasks + IntGauge* compaction_used_permits; + // permits required by the compaction task which is waitting for permits + IntGauge* compaction_waitting_permits; + // The following metrics will be calculated // by metric calculator IntGauge* push_request_write_bytes_per_second; diff --git a/docs/en/administrator-guide/config/be_config.md b/docs/en/administrator-guide/config/be_config.md index dcca0c647bceb9..e1d290d6a2b698 100644 --- a/docs/en/administrator-guide/config/be_config.md +++ b/docs/en/administrator-guide/config/be_config.md @@ -655,6 +655,13 @@ Indicates how many tablets in this data directory failed to load. At the same ti ### `row_nums_check` +### `row_step_for_compaction_merge_log` + +* Type: int64 +* Description: Merge log will be printed for each "row_step_for_compaction_merge_log" rows merged during compaction. If the value is set to 0, merge log will not be printed. +* Default value: 0 +* Dynamically modify: true + ### `scan_context_gc_interval_min` ### `scratch_dirs` @@ -802,6 +809,13 @@ If the parameter is `THREADED`, the model is a non-blocking I/O model, If the parameter is `THREAD_POOL`, the model is a blocking I/O model. +### `total_permits_for_compaction_score` + +* Type: int64 +* Description: The upper limit of "permits" held by all compaction tasks. This config can be set to limit memory consumption for compaction. +* Default: 10000 +* Dynamically modify: true + ### `trash_file_expire_time_sec` ### `txn_commit_rpc_timeout_ms` diff --git a/docs/zh-CN/administrator-guide/config/be_config.md b/docs/zh-CN/administrator-guide/config/be_config.md index ad54636ccc6bd8..4deb54877bbc5c 100644 --- a/docs/zh-CN/administrator-guide/config/be_config.md +++ b/docs/zh-CN/administrator-guide/config/be_config.md @@ -654,6 +654,13 @@ load tablets from header failed, failed tablets size: xxx, path=xxx ### `row_nums_check` +### `row_step_for_compaction_merge_log` + +* 类型:int64 +* 描述:Compaction执行过程中,每次合并row_step_for_compaction_merge_log行数据会打印一条LOG。如果该参数被设置为0,表示merge过程中不需要打印LOG。 +* 默认值: 0 +* 可动态修改:是 + ### `scan_context_gc_interval_min` ### `scratch_dirs` @@ -799,6 +806,13 @@ Stream Load 一般适用于导入几个GB以内的数据,不适合导入过大 若该参数为`THREAD_POOL`, 该模型为阻塞式I/O模型。 +### `total_permits_for_compaction_score` + +* 类型:int64 +* 描述:被所有的compaction任务所能持有的 "permits" 上限,用来限制compaction占用的内存。 +* 默认值:10000 +* 可动态修改:是 + ### `trash_file_expire_time_sec` ### `txn_commit_rpc_timeout_ms`