From dea9190f77d832519031c7ff06f1cc1256162bd3 Mon Sep 17 00:00:00 2001 From: Jibing-Li <64681310+Jibing-Li@users.noreply.github.com> Date: Wed, 23 Oct 2024 16:51:59 +0800 Subject: [PATCH] [improvement](statistics)Skip auto analyze when table row count is not fully reported. (return -1) (#42209) Skip auto analyze when table row count is not fully reported. Not fully reported means row count is -1. --- .../org/apache/doris/catalog/OlapTable.java | 6 ++-- .../statistics/StatisticsAutoCollector.java | 7 ++++ .../StatisticsAutoCollectorTest.java | 32 +++++++++++++++++++ 3 files changed, 43 insertions(+), 2 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java index 1e5e1eb0496c0d..9957b93338eb20 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java @@ -130,6 +130,8 @@ public enum OlapTableState { WAITING_STABLE } + public static long ROW_COUNT_BEFORE_REPORT = -1; + @SerializedName(value = "state") private volatile OlapTableState state; @@ -1516,10 +1518,10 @@ public long getRowCountForIndex(long indexId, boolean strict) { if (index == null) { LOG.warn("Index {} not exist in partition {}, table {}, {}", indexId, entry.getValue().getName(), id, name); - return -1; + return ROW_COUNT_BEFORE_REPORT; } if (strict && !index.getRowCountReported()) { - return -1; + return ROW_COUNT_BEFORE_REPORT; } rowCount += index.getRowCount() == -1 ? 0 : index.getRowCount(); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java index f76037d80cb4a9..a7c2fc6365ba29 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java @@ -179,6 +179,13 @@ protected void createAnalyzeJobForTbl(DatabaseIf db, List analysisInfos, TableIf table) { AnalysisMethod analysisMethod = table.getDataSize(true) >= StatisticsUtil.getHugeTableLowerBoundSizeInBytes() ? AnalysisMethod.SAMPLE : AnalysisMethod.FULL; + if (table instanceof OlapTable && analysisMethod.equals(AnalysisMethod.SAMPLE)) { + OlapTable ot = (OlapTable) table; + if (ot.getRowCountForIndex(ot.getBaseIndexId(), true) == OlapTable.ROW_COUNT_BEFORE_REPORT) { + LOG.info("Table {} row count is not fully reported, skip auto analyzing this time.", ot.getName()); + return; + } + } long rowCount = StatisticsUtil.isEmptyTable(table, analysisMethod) ? 0 : table.getRowCount(); AnalysisInfo jobInfo = new AnalysisInfoBuilder() .setJobId(Env.getCurrentEnv().getNextId()) diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java index d3e3130e28a060..267e04772acb04 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java @@ -470,4 +470,36 @@ protected boolean canCollect() { sac.analyzeAll(); } + + @Test + public void testCreateAnalyzeJobForTbl() { + StatisticsAutoCollector collector = new StatisticsAutoCollector(); + OlapTable table = new OlapTable(); + new MockUp() { + @Mock + public long getDataSize(boolean singleReplica) { + return 100; + } + + @Mock + public long getRowCountForIndex(long indexId, boolean strict) { + return -1; + } + + @Mock + public boolean isPartitionedTable() { + return false; + } + }; + List infos = Lists.newArrayList(); + collector.createAnalyzeJobForTbl(null, infos, table); + Assertions.assertEquals(0, infos.size()); + new MockUp() { + @Mock + public long getRowCountForIndex(long indexId, boolean strict) { + return 100; + } + }; + Assertions.assertThrows(NullPointerException.class, () -> collector.createAnalyzeJobForTbl(null, infos, table)); + } }