diff --git a/docs/en/docs/query-acceleration/statistics.md b/docs/en/docs/query-acceleration/statistics.md index c25054094b6e84..dbb1bf0016b74d 100644 --- a/docs/en/docs/query-acceleration/statistics.md +++ b/docs/en/docs/query-acceleration/statistics.md @@ -88,7 +88,7 @@ This feature has been officially supported since 2.0.3 and is enabled by default The collection jobs for statistics themselves consume a certain amount of system resources. To minimize the overhead, for tables with a large amount of data (default 5 GiB, adjustable with the FE parameter `huge_table_lower_bound_size_in_bytes`), Doris automatically uses sampling to collect statistics. Automatic sampling defaults to sampling 4,194,304 (2^22) rows to reduce the system's burden and complete the collection job as quickly as possible. If you want to sample more rows to obtain a more accurate data distribution, you can increase the sampling row count by adjusting the `huge_table_default_sample_rows` parameter. In addition, for tables with data larger than `huge_table_lower_bound_size_in_bytes` * 5, Doris ensures that the collection time interval is not less than 12 hours (which can be controlled by adjusting the `huge_table_auto_analyze_interval_in_millis` parameter). -If you are concerned about automatic collection jobs interfering with your business, you can specify a time frame for the automatic collection jobs to run during low business loads by setting the `full_auto_analyze_start_time` and `full_auto_analyze_end_time` parameters according to your needs. You can also completely disable this feature by setting the `enable_full_auto_analyze` parameter to `false`. +If you are concerned about automatic collection jobs interfering with your business, you can specify a time frame for the automatic collection jobs to run during low business loads by setting the `auto_analyze_start_time` and `auto_analyze_end_time` parameters according to your needs. You can also completely disable this feature by setting the `enable_auto_analyze` parameter to `false`.
@@ -287,9 +287,9 @@ mysql> KILL ANALYZE 52357; | Session Variable | Description | Default Value | | ----------------------------- | -------------------------------------------- | ------------- | -| full_auto_analyze_start_time | Start time for automatic statistics collection | 00:00:00 | -| full_auto_analyze_end_time | End time for automatic statistics collection | 23:59:59 | -| enable_full_auto_analyze | Enable automatic collection functionality | true | +| auto_analyze_start_time | Start time for automatic statistics collection | 00:00:00 | +| auto_analyze_end_time | End time for automatic statistics collection | 23:59:59 | +| enable_auto_analyze | Enable automatic collection functionality | true | | huge_table_default_sample_rows | Sampling rows for large tables | 4194304 | | huge_table_lower_bound_size_in_bytes | Tables with size greater than this value will be automatically sampled during collection of statistics | 5368709120 | | huge_table_auto_analyze_interval_in_millis | Controls the minimum time interval for automatic ANALYZE on large tables. Tables with sizes greater than `huge_table_lower_bound_size_in_bytes * 5` will be ANALYZEed only once within this time interval. | 43200000 | diff --git a/docs/zh-CN/docs/query-acceleration/statistics.md b/docs/zh-CN/docs/query-acceleration/statistics.md index 7700ae3db44f02..cccce083ee9658 100644 --- a/docs/zh-CN/docs/query-acceleration/statistics.md +++ b/docs/zh-CN/docs/query-acceleration/statistics.md @@ -91,7 +91,7 @@ ANALYZE TABLE lineitem WITH SAMPLE ROWS 100000; 统计信息的收集作业本身需要占用一定的系统资源,为了尽可能降低开销,对于数据量较大(默认为5GiB,可通过设置FE参数`huge_table_lower_bound_size_in_bytes`来调节此行为)的表,Doris会自动采取采样的方式去收集,自动采样默认采样4194304(2^22)行,以尽可能降低对系统造成的负担并尽快完成收集作业。如果希望采样更多的行以获得更准确的数据分布信息,可通过调整参数`huge_table_default_sample_rows`增大采样行数。另外对于数据量大于`huge_table_lower_bound_size_in_bytes` * 5 的表,Doris保证其收集时间间隔不小于12小时(该时间可通过调整参数`huge_table_auto_analyze_interval_in_millis`控制)。 -如果担心自动收集作业对业务造成干扰,可结合自身需求通过设置参数`full_auto_analyze_start_time`和参数`full_auto_analyze_end_time`指定自动收集作业在业务负载较低的时间段执行。也可以通过设置参数`enable_full_auto_analyze` 为`false`来彻底关闭本功能。 +如果担心自动收集作业对业务造成干扰,可结合自身需求通过设置参数`auto_analyze_start_time`和参数`auto_analyze_end_time`指定自动收集作业在业务负载较低的时间段执行。也可以通过设置参数`enable_auto_analyze` 为`false`来彻底关闭本功能。
@@ -290,9 +290,9 @@ mysql> KILL ANALYZE 52357; |会话变量|说明|默认值| |---|---|---| -|full_auto_analyze_start_time|自动统计信息收集开始时间|00:00:00| -|full_auto_analyze_end_time|自动统计信息收集结束时间|23:59:59| -|enable_full_auto_analyze|开启自动收集功能|true| +|auto_analyze_start_time|自动统计信息收集开始时间|00:00:00| +|auto_analyze_end_time|自动统计信息收集结束时间|23:59:59| +|enable_auto_analyze|开启自动收集功能|true| |huge_table_default_sample_rows|对大表的采样行数|4194304| |huge_table_lower_bound_size_in_bytes|大小超过该值的的表,在自动收集时将会自动通过采样收集统计信息|5368709120| |huge_table_auto_analyze_interval_in_millis|控制对大表的自动ANALYZE的最小时间间隔,在该时间间隔内大小超过huge_table_lower_bound_size_in_bytes * 5的表仅ANALYZE一次|43200000| diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java index 7a4cef6e40b51e..e4fc5f66e95949 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java @@ -419,9 +419,9 @@ public class SessionVariable implements Serializable, Writable { public static final String INVERTED_INDEX_CONJUNCTION_OPT_THRESHOLD = "inverted_index_conjunction_opt_threshold"; - public static final String FULL_AUTO_ANALYZE_START_TIME = "full_auto_analyze_start_time"; + public static final String AUTO_ANALYZE_START_TIME = "auto_analyze_start_time"; - public static final String FULL_AUTO_ANALYZE_END_TIME = "full_auto_analyze_end_time"; + public static final String AUTO_ANALYZE_END_TIME = "auto_analyze_end_time"; public static final String SQL_DIALECT = "sql_dialect"; @@ -429,7 +429,7 @@ public class SessionVariable implements Serializable, Writable { public static final String TEST_QUERY_CACHE_HIT = "test_query_cache_hit"; - public static final String ENABLE_FULL_AUTO_ANALYZE = "enable_full_auto_analyze"; + public static final String ENABLE_AUTO_ANALYZE = "enable_auto_analyze"; public static final String FASTER_FLOAT_CONVERT = "faster_float_convert"; @@ -1260,18 +1260,6 @@ public void setMaxJoinNumberOfReorder(int maxJoinNumberOfReorder) { + " use a skiplist to optimize the intersection."}) public int invertedIndexConjunctionOptThreshold = 1000; - @VariableMgr.VarAttr(name = FULL_AUTO_ANALYZE_START_TIME, needForward = true, checker = "checkAnalyzeTimeFormat", - description = {"该参数定义自动ANALYZE例程的开始时间", - "This parameter defines the start time for the automatic ANALYZE routine."}, - flag = VariableMgr.GLOBAL) - public String fullAutoAnalyzeStartTime = "00:00:00"; - - @VariableMgr.VarAttr(name = FULL_AUTO_ANALYZE_END_TIME, needForward = true, checker = "checkAnalyzeTimeFormat", - description = {"该参数定义自动ANALYZE例程的结束时间", - "This parameter defines the end time for the automatic ANALYZE routine."}, - flag = VariableMgr.GLOBAL) - public String fullAutoAnalyzeEndTime = "23:59:59"; - @VariableMgr.VarAttr(name = SQL_DIALECT, needForward = true, checker = "checkSqlDialect", description = {"解析sql使用的方言", "The dialect used to parse sql."}) public String sqlDialect = "doris"; @@ -1286,10 +1274,22 @@ public void setMaxJoinNumberOfReorder(int maxJoinNumberOfReorder) { options = {"none", "sql_cache", "partition_cache"}) public String testQueryCacheHit = "none"; - @VariableMgr.VarAttr(name = ENABLE_FULL_AUTO_ANALYZE, + @VariableMgr.VarAttr(name = ENABLE_AUTO_ANALYZE, description = {"该参数控制是否开启自动收集", "Set false to disable auto analyze"}, flag = VariableMgr.GLOBAL) - public boolean enableFullAutoAnalyze = true; + public boolean enableAutoAnalyze = true; + + @VariableMgr.VarAttr(name = AUTO_ANALYZE_START_TIME, needForward = true, checker = "checkAnalyzeTimeFormat", + description = {"该参数定义自动ANALYZE例程的开始时间", + "This parameter defines the start time for the automatic ANALYZE routine."}, + flag = VariableMgr.GLOBAL) + public String autoAnalyzeStartTime = "00:00:00"; + + @VariableMgr.VarAttr(name = AUTO_ANALYZE_END_TIME, needForward = true, checker = "checkAnalyzeTimeFormat", + description = {"该参数定义自动ANALYZE例程的结束时间", + "This parameter defines the end time for the automatic ANALYZE routine."}, + flag = VariableMgr.GLOBAL) + public String autoAnalyzeEndTime = "23:59:59"; @VariableMgr.VarAttr(name = FASTER_FLOAT_CONVERT, description = {"是否启用更快的浮点数转换算法,注意会影响输出格式", "Set true to enable faster float pointer number convert"}) diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java index 86429f09e24cf0..59fe04339f3de8 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java @@ -832,14 +832,14 @@ public static boolean inAnalyzeTime(LocalTime now) { private static Pair findConfigFromGlobalSessionVar() { try { String startTime = - findConfigFromGlobalSessionVar(SessionVariable.FULL_AUTO_ANALYZE_START_TIME) - .fullAutoAnalyzeStartTime; + findConfigFromGlobalSessionVar(SessionVariable.AUTO_ANALYZE_START_TIME) + .autoAnalyzeStartTime; // For compatibility if (StringUtils.isEmpty(startTime)) { startTime = StatisticConstants.FULL_AUTO_ANALYZE_START_TIME; } - String endTime = findConfigFromGlobalSessionVar(SessionVariable.FULL_AUTO_ANALYZE_END_TIME) - .fullAutoAnalyzeEndTime; + String endTime = findConfigFromGlobalSessionVar(SessionVariable.AUTO_ANALYZE_END_TIME) + .autoAnalyzeEndTime; if (StringUtils.isEmpty(startTime)) { endTime = StatisticConstants.FULL_AUTO_ANALYZE_END_TIME; } @@ -859,7 +859,7 @@ protected static SessionVariable findConfigFromGlobalSessionVar(String varName) public static boolean enableAutoAnalyze() { try { - return findConfigFromGlobalSessionVar(SessionVariable.ENABLE_FULL_AUTO_ANALYZE).enableFullAutoAnalyze; + return findConfigFromGlobalSessionVar(SessionVariable.ENABLE_AUTO_ANALYZE).enableAutoAnalyze; } catch (Exception e) { LOG.warn("Fail to get value of enable auto analyze, return false by default", e); } diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/util/StatisticsUtilTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/util/StatisticsUtilTest.java index c0c790c9c25dff..107a5f53822fe5 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/util/StatisticsUtilTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/util/StatisticsUtilTest.java @@ -81,8 +81,8 @@ public void testInAnalyzeTime1() { @Mock protected SessionVariable findConfigFromGlobalSessionVar(String varName) throws Exception { SessionVariable sessionVariable = new SessionVariable(); - sessionVariable.fullAutoAnalyzeStartTime = "00:00:00"; - sessionVariable.fullAutoAnalyzeEndTime = "02:00:00"; + sessionVariable.autoAnalyzeStartTime = "00:00:00"; + sessionVariable.autoAnalyzeEndTime = "02:00:00"; return sessionVariable; } }; @@ -100,8 +100,8 @@ public void testInAnalyzeTime2() { @Mock protected SessionVariable findConfigFromGlobalSessionVar(String varName) throws Exception { SessionVariable sessionVariable = new SessionVariable(); - sessionVariable.fullAutoAnalyzeStartTime = "00:00:00"; - sessionVariable.fullAutoAnalyzeEndTime = "23:00:00"; + sessionVariable.autoAnalyzeStartTime = "00:00:00"; + sessionVariable.autoAnalyzeEndTime = "23:00:00"; return sessionVariable; } };