From 95747fee91547bb29a2e0acc512232c6c21f660e Mon Sep 17 00:00:00 2001 From: kikyo Date: Fri, 15 Sep 2023 00:12:45 +0800 Subject: [PATCH] remove corresponding col stats status if the loading at the end of analyze task is failed --- .../doris/analysis/ShowTableStatsStmt.java | 2 +- .../doris/statistics/AnalysisManager.java | 12 ++- .../doris/statistics/BaseAnalysisTask.java | 6 +- .../doris/statistics/ColumnStatistic.java | 5 +- .../doris/statistics/StatisticConstants.java | 8 -- .../doris/statistics/StatisticsCache.java | 10 ++- .../suites/statistics/analyze_stats.groovy | 79 ++++++++++++++++++- 7 files changed, 105 insertions(+), 17 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java index 961caba9523faa..29cb22c7848e63 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java @@ -48,7 +48,7 @@ public class ShowTableStatsStmt extends ShowStmt { new ImmutableList.Builder() .add("updated_rows") .add("query_times") - .add("row_count(for external_table only)") + .add("row_count") .add("method") .add("type") .add("updated_time") diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java index ae8e71063868b5..a83f9795ffe201 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java @@ -480,6 +480,8 @@ private Map> validateAndGetPartitions(TableIf table, Set createSimpleQueue(Collection c }, null); } + // Remove col stats status from TableStats if failed load some col stats after analyze corresponding column so that + // we could make sure it would be analyzed again soon if user or system submit job for that column again. + public void removeColStatsStatus(long tblId, String colName) { + TableStats tableStats = findTableStatsStatus(tblId); + if (tableStats != null) { + tableStats.removeColumn(colName); + } + } + } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java index 92fcc870c84997..edadf4c17bf4a5 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java @@ -185,7 +185,11 @@ protected void afterExecution() { if (killed) { return; } - Env.getCurrentEnv().getStatisticsCache().syncLoadColStats(tbl.getId(), -1, col.getName()); + long tblId = tbl.getId(); + String colName = col.getName(); + if (!Env.getCurrentEnv().getStatisticsCache().syncLoadColStats(tblId, -1, colName)) { + Env.getCurrentEnv().getAnalysisManager().removeColStatsStatus(tblId, colName); + } } protected void setTaskStateToRunning() { diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java index 7baf39d27a596b..84f8d5bfbfe0fc 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java @@ -24,7 +24,6 @@ import org.apache.doris.common.AnalysisException; import org.apache.doris.statistics.util.StatisticsUtil; -import com.google.common.base.Preconditions; import com.google.common.collect.Sets; import com.google.gson.annotations.SerializedName; import org.apache.logging.log4j.LogManager; @@ -136,7 +135,9 @@ public static ColumnStatistic fromResultRow(List resultRows) { LOG.debug("Failed to deserialize column stats", t); return ColumnStatistic.UNKNOWN; } - Preconditions.checkState(columnStatistic != null, "Column stats is null"); + if (columnStatistic == null) { + return ColumnStatistic.UNKNOWN; + } columnStatistic.partitionIdToColStats.putAll(partitionIdToColStats); return columnStatistic; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java index f0f6f60277783a..c77ffa49edb386 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java @@ -36,8 +36,6 @@ public class StatisticConstants { public static final int STATISTICS_CACHE_REFRESH_INTERVAL = 24 * 2; - public static final int ROW_COUNT_CACHE_VALID_DURATION_IN_HOURS = 12; - /** * Bucket count fot column_statistics and analysis_job table. */ @@ -59,12 +57,6 @@ public class StatisticConstants { public static final int HISTOGRAM_MAX_BUCKET_NUM = 128; - /** - * The health of the table indicates the health of the table statistics, rang in [0, 100]. - * Below this threshold will automatically re-collect statistics. TODO make it in fe.conf - */ - public static final int TABLE_STATS_HEALTH_THRESHOLD = 80; - public static final int ANALYZE_MANAGER_INTERVAL_IN_SECS = 60; public static List STATISTICS_DB_BLACK_LIST = new ArrayList<>(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCache.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCache.java index 0c53ac9074c086..6fdcd8ed3d6a42 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCache.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCache.java @@ -212,17 +212,20 @@ private void doPreHeat() { } } - public void syncLoadColStats(long tableId, long idxId, String colName) { + /** + * Return false if the log of corresponding stats load is failed. + */ + public boolean syncLoadColStats(long tableId, long idxId, String colName) { List columnResults = StatisticsRepository.loadColStats(tableId, idxId, colName); final StatisticsCacheKey k = new StatisticsCacheKey(tableId, idxId, colName); final ColumnStatistic c = ColumnStatistic.fromResultRow(columnResults); if (c == ColumnStatistic.UNKNOWN) { - return; + return false; } putCache(k, c); if (ColumnStatistic.UNKNOWN == c) { - return; + return false; } TUpdateFollowerStatsCacheRequest updateFollowerStatsCacheRequest = new TUpdateFollowerStatsCacheRequest(); updateFollowerStatsCacheRequest.key = GsonUtils.GSON.toJson(k); @@ -234,6 +237,7 @@ public void syncLoadColStats(long tableId, long idxId, String colName) { } sendStats(frontend, updateFollowerStatsCacheRequest); } + return true; } @VisibleForTesting diff --git a/regression-test/suites/statistics/analyze_stats.groovy b/regression-test/suites/statistics/analyze_stats.groovy index 6cf10848092696..bc9b42a1a5515e 100644 --- a/regression-test/suites/statistics/analyze_stats.groovy +++ b/regression-test/suites/statistics/analyze_stats.groovy @@ -117,7 +117,7 @@ suite("test_analyze") { try { sql """ - SELECT COUNT(*) FROM ${tbl}; + SELECT * FROM ${tbl}; """ } catch (Exception e) { exception = e @@ -959,4 +959,81 @@ PARTITION `p599` VALUES IN (599) """ expected_col_stats(col_id_res, 3, 1) + sql """DROP TABLE IF EXISTS `some_complex_type_test`""" + + sql """ + CREATE TABLE `some_complex_type_test` ( + `id` int(11) NULL COMMENT "", + `c_array` ARRAY NULL COMMENT "" + ) ENGINE=OLAP + DUPLICATE KEY(`id`) + COMMENT "OLAP" + DISTRIBUTED BY HASH(`id`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "in_memory" = "false", + "storage_format" = "V2" + ); + """ + + sql """INSERT INTO `some_complex_type_test` VALUES (1, [1,2,3,4,5]);""" + sql """INSERT INTO `some_complex_type_test` VALUES (2, [6,7,8]), (3, []), (4, null);""" + + sql """ + ANALYZE TABLE `some_complex_type_test` WITH SYNC; + + """ + + sql """ + SELECT COUNT(1) FROM `some_complex_type_test` + """ + + sql """DROP TABLE IF EXISTS `analyze_test_with_schema_update`""" + + sql """ + CREATE TABLE `analyze_test_with_schema_update` ( + col1 varchar(11451) not null, col2 int not null, col3 int not null) + DUPLICATE KEY(col1) + DISTRIBUTED BY HASH(col1) + BUCKETS 3 + PROPERTIES( + "replication_num"="1" + ); + """ + + sql """insert into analyze_test_with_schema_update values(1, 2, 3);""" + sql """insert into analyze_test_with_schema_update values(4, 5, 6);""" + sql """insert into analyze_test_with_schema_update values(7, 1, 9);""" + sql """insert into analyze_test_with_schema_update values(3, 8, 2);""" + sql """insert into analyze_test_with_schema_update values(5, 2, 1);""" + + sql """ + ANALYZE TABLE analyze_test_with_schema_update WITH SYNC + """ + + sql """ + ALTER TABLE analyze_test_with_schema_update ADD COLUMN tbl_name VARCHAR(256) DEFAULT NULL; + """ + + sql """ + ANALYZE TABLE analyze_test_with_schema_update WITH SYNC + """ + + sql """ + SELECT * FROM analyze_test_with_schema_update; + """ + + sql """ + DROP STATS analyze_test_with_schema_update(col3); + """ + + sql """ + ANALYZE TABLE analyze_test_with_schema_update WITH SYNC + """ + + sql """ + SELECT * FROM analyze_test_with_schema_update; + """ + + }