From 727361664f722f866d93871e6b9bb7d005292f3e Mon Sep 17 00:00:00 2001 From: Jibing Li Date: Tue, 28 Nov 2023 11:06:30 +0800 Subject: [PATCH] Need to recalculate health value when table row count become 0. --- .../doris/statistics/AnalysisManager.java | 15 ++++ .../statistics/StatisticsAutoCollector.java | 41 ++++++++-- .../doris/statistics/util/StatisticsUtil.java | 3 +- .../doris/statistics/AnalysisManagerTest.java | 1 - .../StatisticsAutoCollectorTest.java | 74 +++++++++++++++++++ 5 files changed, 126 insertions(+), 8 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java index 7dc570ba313751..111a711eddf58e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java @@ -724,6 +724,21 @@ public void dropStats(DropStatsStmt dropStatsStmt) throws DdlException { StatisticsRepository.dropStatistics(tblId, cols); } + public void dropStats(TableIf table) throws DdlException { + TableStatsMeta tableStats = findTableStatsStatus(table.getId()); + if (tableStats == null) { + return; + } + Set cols = table.getBaseSchema().stream().map(Column::getName).collect(Collectors.toSet()); + for (String col : cols) { + tableStats.removeColumn(col); + Env.getCurrentEnv().getStatisticsCache().invalidate(table.getId(), -1L, col); + } + tableStats.updatedTime = 0; + logCreateTableStats(tableStats); + StatisticsRepository.dropStatistics(table.getId(), cols); + } + public void handleKillAnalyzeStmt(KillAnalysisJobStmt killAnalysisJobStmt) throws DdlException { Map analysisTaskMap = analysisJobIdToTaskMap.remove(killAnalysisJobStmt.jobId); if (analysisTaskMap == null) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java index 3b1107bac08fdf..7f8dd18d50eb28 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java @@ -91,15 +91,21 @@ private void analyzeAll() { public void analyzeDb(DatabaseIf databaseIf) throws DdlException { List analysisInfos = constructAnalysisInfo(databaseIf); for (AnalysisInfo analysisInfo : analysisInfos) { - analysisInfo = getReAnalyzeRequiredPart(analysisInfo); - if (analysisInfo == null) { - continue; - } try { + if (needDropStaleStats(analysisInfo)) { + Env.getCurrentEnv().getAnalysisManager().dropStats(databaseIf.getTable(analysisInfo.tblId).get()); + continue; + } + analysisInfo = getReAnalyzeRequiredPart(analysisInfo); + if (analysisInfo == null) { + continue; + } createSystemAnalysisJob(analysisInfo); } catch (Throwable t) { analysisInfo.message = t.getMessage(); - throw t; + LOG.warn("Failed to auto analyze table {}.{}, reason {}", + databaseIf.getFullName(), analysisInfo.tblId, analysisInfo.message, t); + continue; } } } @@ -191,4 +197,29 @@ protected AnalysisInfo getReAnalyzeRequiredPart(AnalysisInfo jobInfo) { return new AnalysisInfoBuilder(jobInfo).setColToPartitions(needRunPartitions).build(); } + /** + * Check if the given table should drop stale stats. User may truncate table, + * in this case, we need to drop the stale stats. + * @param jobInfo + * @return True if you need to drop, false otherwise. + */ + protected boolean needDropStaleStats(AnalysisInfo jobInfo) { + TableIf table = StatisticsUtil + .findTable(jobInfo.catalogId, jobInfo.dbId, jobInfo.tblId); + if (!(table instanceof OlapTable)) { + return false; + } + AnalysisManager analysisManager = Env.getServingEnv().getAnalysisManager(); + TableStatsMeta tblStats = analysisManager.findTableStatsStatus(table.getId()); + if (tblStats == null) { + return false; + } + if (tblStats.analyzeColumns().isEmpty()) { + return false; + } + if (table.getRowCount() == 0) { + return true; + } + return false; + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java index 06aa9895c2f80c..84ce03ed9d3ddc 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java @@ -521,8 +521,7 @@ public static String replaceParams(String template, Map params) * * @param updatedRows The number of rows updated by the table * @param totalRows The current number of rows in the table - * the healthier the statistics of the table - * @return Health, the value range is [0, 100], the larger the value, + * @return Health, the value range is [0, 100], the larger the value, the healthier the statistics of the table. */ public static int getTableHealth(long totalRows, long updatedRows) { if (updatedRows >= totalRows) { diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisManagerTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisManagerTest.java index 35f02b881159f8..9c45908068227e 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisManagerTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisManagerTest.java @@ -409,7 +409,6 @@ public List getColumns() { .setColToPartitions(new HashMap<>()).setColName("col1").build(), olapTable); stats2.updatedRows.addAndGet(20); Assertions.assertFalse(olapTable.needReAnalyzeTable(stats2)); - } @Test diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java index d94bdd61248734..14c6f41384f100 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java @@ -27,6 +27,7 @@ import org.apache.doris.catalog.TableIf; import org.apache.doris.catalog.Type; import org.apache.doris.catalog.View; +import org.apache.doris.catalog.external.ExternalTable; import org.apache.doris.cluster.ClusterNamespace; import org.apache.doris.common.Config; import org.apache.doris.common.DdlException; @@ -418,4 +419,77 @@ public TableIf findTable(long catalogId, long dbId, long tblId) { Assertions.assertNotNull(task.getTableSample()); } } + + @Test + public void testNeedDropStaleStats() { + + TableIf olapTable = new OlapTable(); + TableIf otherTable = new ExternalTable(); + + new MockUp() { + @Mock + public TableIf findTable(long catalogId, long dbId, long tblId) { + if (tblId == 0) { + return olapTable; + } else { + return otherTable; + } + } + }; + + new MockUp() { + int count = 0; + + int[] rowCounts = {100, 0}; + @Mock + public long getRowCount() { + return rowCounts[count++]; + } + + @Mock + public List getBaseSchema() { + return Lists.newArrayList(new Column("col1", Type.INT), new Column("col2", Type.INT)); + } + }; + + AnalysisInfo analysisInfoOlap = new AnalysisInfoBuilder().setAnalysisMethod(AnalysisMethod.FULL) + .setColToPartitions(new HashMap<>()) + .setAnalysisType(AnalysisType.FUNDAMENTALS) + .setColName("col1") + .setTblId(0) + .setJobType(JobType.SYSTEM).build(); + + new MockUp() { + int count = 0; + + TableStatsMeta[] tableStatsArr = + new TableStatsMeta[] {null, + new TableStatsMeta(0, analysisInfoOlap, olapTable), + new TableStatsMeta(0, analysisInfoOlap, olapTable)}; + + { + tableStatsArr[1].updatedRows.addAndGet(100); + tableStatsArr[2].updatedRows.addAndGet(0); + } + + + @Mock + public TableStatsMeta findTableStatsStatus(long tblId) { + return tableStatsArr[count++]; + } + }; + + AnalysisInfo analysisInfoOtherTable = new AnalysisInfoBuilder().setAnalysisMethod(AnalysisMethod.FULL) + .setColToPartitions(new HashMap<>()) + .setAnalysisType(AnalysisType.FUNDAMENTALS) + .setColName("col1") + .setTblId(1) + .setJobType(JobType.SYSTEM).build(); + + StatisticsAutoCollector statisticsAutoCollector = new StatisticsAutoCollector(); + Assertions.assertFalse(statisticsAutoCollector.needDropStaleStats(analysisInfoOtherTable)); + Assertions.assertFalse(statisticsAutoCollector.needDropStaleStats(analysisInfoOlap)); + Assertions.assertFalse(statisticsAutoCollector.needDropStaleStats(analysisInfoOlap)); + Assertions.assertTrue(statisticsAutoCollector.needDropStaleStats(analysisInfoOlap)); + } }