From afc7f631e4b408c754c7cd3e5baf3ee00d0172b0 Mon Sep 17 00:00:00 2001 From: James Date: Mon, 24 Feb 2025 10:56:59 +0800 Subject: [PATCH] Add session variable for partition sample count. --- .../org/apache/doris/qe/SessionVariable.java | 14 ++++++++++++++ .../doris/statistics/OlapAnalysisTask.java | 4 ++-- .../doris/statistics/StatisticConstants.java | 4 ++++ .../doris/statistics/util/StatisticsUtil.java | 18 ++++++++++++++++++ 4 files changed, 38 insertions(+), 2 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java index f3d81529513104..03847f48812d10 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java @@ -567,6 +567,8 @@ public class SessionVariable implements Serializable, Writable { public static final String HUGE_TABLE_DEFAULT_SAMPLE_ROWS = "huge_table_default_sample_rows"; public static final String HUGE_TABLE_LOWER_BOUND_SIZE_IN_BYTES = "huge_table_lower_bound_size_in_bytes"; + public static final String PARTITION_SAMPLE_COUNT = "partition_sample_count"; + public static final String PARTITION_SAMPLE_ROW_COUNT = "partition_sample_row_count"; // for spill to disk public static final String EXTERNAL_SORT_BYTES_THRESHOLD = "external_sort_bytes_threshold"; @@ -2044,6 +2046,18 @@ public void setEnableLeftZigZag(boolean enableLeftZigZag) { + "considered outdated."}) public int tableStatsHealthThreshold = 90; + @VariableMgr.VarAttr(name = PARTITION_SAMPLE_COUNT, flag = VariableMgr.GLOBAL, + description = { + "大分区表采样的分区数上限", + "The upper limit of the number of partitions for sampling large partitioned tables.\n"}) + public int partitionSampleCount = 30; + + @VariableMgr.VarAttr(name = PARTITION_SAMPLE_ROW_COUNT, flag = VariableMgr.GLOBAL, + description = { + "大分区表采样的行数上限", + "The upper limit of the number of rows for sampling large partitioned tables.\n"}) + public long partitionSampleRowCount = 3_000_000_000L; + @VariableMgr.VarAttr(name = ENABLE_MATERIALIZED_VIEW_REWRITE, needForward = true, description = {"是否开启基于结构信息的物化视图透明改写", "Whether to enable materialized view rewriting based on struct info"}) diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java index f0a55f9b54ee48..ba2b2770a5d109 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java @@ -63,7 +63,6 @@ public class OlapAnalysisTask extends BaseAnalysisTask { private boolean partitionColumnSampleTooManyRows = false; private boolean scanFullTable = false; private static final long MAXIMUM_SAMPLE_ROWS = 1_000_000_000; - private static final int PARTITION_COUNT_TO_SAMPLE = 5; @VisibleForTesting public OlapAnalysisTask() { @@ -386,7 +385,8 @@ protected long pickSamplePartition(List partitions, List picked MaterializedIndex materializedIndex = p.getIndex(indexId); pickedTabletIds.addAll(materializedIndex.getTabletIdsInOrder()); } - if (pickedRows >= MAXIMUM_SAMPLE_ROWS || pickedPartitionCount > PARTITION_COUNT_TO_SAMPLE) { + if (pickedRows >= StatisticsUtil.getPartitionSampleRowCount() + || pickedPartitionCount >= StatisticsUtil.getPartitionSampleCount()) { break; } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java index f6d49ea079bf18..414a590fb772da 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java @@ -103,6 +103,10 @@ public class StatisticConstants { public static final int MSG_LEN_UPPER_BOUND = 1024; + public static final int PARTITION_SAMPLE_COUNT = 30; + + public static final long PARTITION_SAMPLE_ROW_COUNT = 3_000_000_000L; + static { SYSTEM_DBS.add(FeConstants.INTERNAL_DB_NAME); SYSTEM_DBS.add(InfoSchemaDb.DATABASE_NAME); diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java index 6b50cd32a1e77c..b482e5c225c648 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java @@ -943,6 +943,24 @@ public static int getAutoAnalyzeTableWidthThreshold() { return StatisticConstants.AUTO_ANALYZE_TABLE_WIDTH_THRESHOLD; } + public static int getPartitionSampleCount() { + try { + return findConfigFromGlobalSessionVar(SessionVariable.PARTITION_SAMPLE_COUNT).partitionSampleCount; + } catch (Exception e) { + LOG.warn("Fail to get value of partition_sample_count, return default", e); + } + return StatisticConstants.PARTITION_SAMPLE_COUNT; + } + + public static long getPartitionSampleRowCount() { + try { + return findConfigFromGlobalSessionVar(SessionVariable.PARTITION_SAMPLE_ROW_COUNT).partitionSampleRowCount; + } catch (Exception e) { + LOG.warn("Fail to get value of partition_sample_row_count, return default", e); + } + return StatisticConstants.PARTITION_SAMPLE_ROW_COUNT; + } + public static String encodeValue(ResultRow row, int index) { if (row == null || row.getValues().size() <= index) { return "NULL";