From d3595215b10594a27fef78e60b8e312ec3cc37d2 Mon Sep 17 00:00:00 2001 From: Jibing-Li <64681310+Jibing-Li@users.noreply.github.com> Date: Wed, 23 Oct 2024 16:51:59 +0800 Subject: [PATCH] [improvement](statistics)Skip auto analyze when table row count is not fully reported. (return -1) (#42209) Skip auto analyze when table row count is not fully reported. Not fully reported means row count is -1. --- .../org/apache/doris/catalog/OlapTable.java | 6 ++-- .../statistics/StatisticsAutoCollector.java | 7 ++++ .../StatisticsAutoCollectorTest.java | 32 +++++++++++++++++++ 3 files changed, 43 insertions(+), 2 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java index 5876611b4ec7a5..3d14b92de07371 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java @@ -119,6 +119,8 @@ public enum OlapTableState { WAITING_STABLE } + public static long ROW_COUNT_BEFORE_REPORT = -1; + private volatile OlapTableState state; // index id -> index meta @@ -1296,10 +1298,10 @@ public long getRowCountForIndex(long indexId, boolean strict) { if (index == null) { LOG.warn("Index {} not exist in partition {}, table {}, {}", indexId, entry.getValue().getName(), id, name); - return -1; + return ROW_COUNT_BEFORE_REPORT; } if (strict && !index.getRowCountReported()) { - return -1; + return ROW_COUNT_BEFORE_REPORT; } rowCount += index.getRowCount() == -1 ? 0 : index.getRowCount(); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java index a04f428aa6605e..137068d4315757 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java @@ -179,6 +179,13 @@ protected void createAnalyzeJobForTbl(DatabaseIf db, List analysisInfos, TableIf table) { AnalysisMethod analysisMethod = table.getDataSize(true) >= StatisticsUtil.getHugeTableLowerBoundSizeInBytes() ? AnalysisMethod.SAMPLE : AnalysisMethod.FULL; + if (table instanceof OlapTable && analysisMethod.equals(AnalysisMethod.SAMPLE)) { + OlapTable ot = (OlapTable) table; + if (ot.getRowCountForIndex(ot.getBaseIndexId(), true) == OlapTable.ROW_COUNT_BEFORE_REPORT) { + LOG.info("Table {} row count is not fully reported, skip auto analyzing this time.", ot.getName()); + return; + } + } long rowCount = StatisticsUtil.isEmptyTable(table, analysisMethod) ? 0 : table.getRowCount(); AnalysisInfo jobInfo = new AnalysisInfoBuilder() .setJobId(Env.getCurrentEnv().getNextId()) diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java index 1ab5178c3b70d1..9f2003535e1f4a 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java @@ -567,4 +567,36 @@ protected boolean canCollect() { sac.analyzeAll(); } + + @Test + public void testCreateAnalyzeJobForTbl() { + StatisticsAutoCollector collector = new StatisticsAutoCollector(); + OlapTable table = new OlapTable(); + new MockUp() { + @Mock + public long getDataSize(boolean singleReplica) { + return 100; + } + + @Mock + public long getRowCountForIndex(long indexId, boolean strict) { + return -1; + } + + @Mock + public boolean isPartitionedTable() { + return false; + } + }; + List infos = Lists.newArrayList(); + collector.createAnalyzeJobForTbl(null, infos, table); + Assertions.assertEquals(0, infos.size()); + new MockUp() { + @Mock + public long getRowCountForIndex(long indexId, boolean strict) { + return 100; + } + }; + Assertions.assertThrows(NullPointerException.class, () -> collector.createAnalyzeJobForTbl(null, infos, table)); + } }