diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java index ee44dd65eacdab..3f874077f516a8 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java @@ -531,6 +531,8 @@ public class SessionVariable implements Serializable, Writable { public static final String HUGE_TABLE_DEFAULT_SAMPLE_ROWS = "huge_table_default_sample_rows"; public static final String HUGE_TABLE_LOWER_BOUND_SIZE_IN_BYTES = "huge_table_lower_bound_size_in_bytes"; + public static final String PARTITION_SAMPLE_COUNT = "partition_sample_count"; + public static final String PARTITION_SAMPLE_ROW_COUNT = "partition_sample_row_count"; // for spill to disk public static final String EXTERNAL_SORT_BYTES_THRESHOLD = "external_sort_bytes_threshold"; @@ -1916,6 +1918,18 @@ public void setEnableLeftZigZag(boolean enableLeftZigZag) { + "considered outdated."}) public int tableStatsHealthThreshold = 90; + @VariableMgr.VarAttr(name = PARTITION_SAMPLE_COUNT, flag = VariableMgr.GLOBAL, + description = { + "大分区表采样的分区数上限", + "The upper limit of the number of partitions for sampling large partitioned tables.\n"}) + public int partitionSampleCount = 30; + + @VariableMgr.VarAttr(name = PARTITION_SAMPLE_ROW_COUNT, flag = VariableMgr.GLOBAL, + description = { + "大分区表采样的行数上限", + "The upper limit of the number of rows for sampling large partitioned tables.\n"}) + public long partitionSampleRowCount = 3_000_000_000L; + @VariableMgr.VarAttr(name = ENABLE_MATERIALIZED_VIEW_REWRITE, needForward = true, description = {"是否开启基于结构信息的物化视图透明改写", "Whether to enable materialized view rewriting based on struct info"}) diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java index 72f9345841f06d..6ca19e39887eef 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java @@ -59,7 +59,6 @@ public class OlapAnalysisTask extends BaseAnalysisTask { private boolean partitionColumnSampleTooManyRows = false; private boolean scanFullTable = false; private static final long MAXIMUM_SAMPLE_ROWS = 1_000_000_000; - private static final int PARTITION_COUNT_TO_SAMPLE = 5; @VisibleForTesting public OlapAnalysisTask() { @@ -336,7 +335,8 @@ protected long pickSamplePartition(List partitions, List picked MaterializedIndex materializedIndex = p.getIndex(indexId); pickedTabletIds.addAll(materializedIndex.getTabletIdsInOrder()); } - if (pickedRows >= MAXIMUM_SAMPLE_ROWS || pickedPartitionCount > PARTITION_COUNT_TO_SAMPLE) { + if (pickedRows >= StatisticsUtil.getPartitionSampleRowCount() + || pickedPartitionCount >= StatisticsUtil.getPartitionSampleCount()) { break; } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java index cc1508217fb111..f56aeea60a1242 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java @@ -99,6 +99,10 @@ public class StatisticConstants { public static final int MSG_LEN_UPPER_BOUND = 1024; + public static final int PARTITION_SAMPLE_COUNT = 30; + + public static final long PARTITION_SAMPLE_ROW_COUNT = 3_000_000_000L; + static { SYSTEM_DBS.add(FeConstants.INTERNAL_DB_NAME); SYSTEM_DBS.add(InfoSchemaDb.DATABASE_NAME); diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java index 10fe21a7abf5bb..d42210dfdcb46b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java @@ -858,6 +858,24 @@ public static int getAutoAnalyzeTableWidthThreshold() { return StatisticConstants.AUTO_ANALYZE_TABLE_WIDTH_THRESHOLD; } + public static int getPartitionSampleCount() { + try { + return findConfigFromGlobalSessionVar(SessionVariable.PARTITION_SAMPLE_COUNT).partitionSampleCount; + } catch (Exception e) { + LOG.warn("Fail to get value of partition_sample_count, return default", e); + } + return StatisticConstants.PARTITION_SAMPLE_COUNT; + } + + public static long getPartitionSampleRowCount() { + try { + return findConfigFromGlobalSessionVar(SessionVariable.PARTITION_SAMPLE_ROW_COUNT).partitionSampleRowCount; + } catch (Exception e) { + LOG.warn("Fail to get value of partition_sample_row_count, return default", e); + } + return StatisticConstants.PARTITION_SAMPLE_ROW_COUNT; + } + public static String encodeValue(ResultRow row, int index) { if (row == null || row.getValues().size() <= index) { return "NULL";