From 1fb8d7abfe401f7ca7d827af4ee4a109466b7d13 Mon Sep 17 00:00:00 2001 From: kikyo Date: Wed, 9 Aug 2023 19:18:00 +0800 Subject: [PATCH 1/2] remove auto analyze grammer --- fe/fe-core/src/main/cup/sql_parser.cup | 6 ------ .../statistics/StatisticsAutoAnalyzer.java | 21 +------------------ 2 files changed, 1 insertion(+), 26 deletions(-) diff --git a/fe/fe-core/src/main/cup/sql_parser.cup b/fe/fe-core/src/main/cup/sql_parser.cup index 284ccd8477ab0f..aca73ab9a5a394 100644 --- a/fe/fe-core/src/main/cup/sql_parser.cup +++ b/fe/fe-core/src/main/cup/sql_parser.cup @@ -5998,12 +5998,6 @@ with_analysis_properties ::= put("incremental", "true"); }}; :} - | KW_AUTO - {: - RESULT = new HashMap() {{ - put("automatic", "true"); - }}; - :} | KW_SAMPLE KW_PERCENT INTEGER_LITERAL:samplePercent {: RESULT = new HashMap() {{ diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoAnalyzer.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoAnalyzer.java index 23a446d1b44e62..8a81b05bd42b34 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoAnalyzer.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoAnalyzer.java @@ -77,9 +77,7 @@ protected void runAfterCatalogReady() { } analyzePeriodically(); - if (!Config.enable_full_auto_analyze) { - analyzeAutomatically(); - } else { + if (Config.enable_full_auto_analyze) { analyzeAll(); } } @@ -151,23 +149,6 @@ private void analyzePeriodically() { } } - private void analyzeAutomatically() { - AnalysisManager analysisManager = Env.getCurrentEnv().getAnalysisManager(); - List jobInfos = analysisManager.findAutomaticAnalysisJobs(); - for (AnalysisInfo jobInfo : jobInfos) { - AnalysisInfo checkedJobInfo = null; - try { - checkedJobInfo = getReAnalyzeRequiredPart(jobInfo); - if (checkedJobInfo != null) { - analysisManager.createSystemAnalysisJob(checkedJobInfo, analysisTaskExecutor); - } - } catch (Throwable t) { - LOG.warn("Failed to create analyze job: {}", checkedJobInfo, t); - } - - } - } - /** * Check if automatic analysis of statistics is required. *

From 76dbf28aa4f8c7ef715c044455ec50eeb348f8a4 Mon Sep 17 00:00:00 2001 From: kikyo Date: Fri, 11 Aug 2023 15:32:35 +0800 Subject: [PATCH 2/2] refactor ResultRow --- .../java/org/apache/doris/common/Config.java | 5 +- .../doris/analysis/AnalyzeProperties.java | 6 +- .../doris/analysis/ShowTableStatsStmt.java | 30 +- .../catalog/InternalSchemaInitializer.java | 40 +- .../catalog/external/HMSExternalTable.java | 20 +- .../apache/doris/journal/JournalEntity.java | 6 + .../org/apache/doris/persist/EditLog.java | 9 + .../apache/doris/persist/OperationType.java | 1 + .../java/org/apache/doris/qe/DdlExecutor.java | 3 - .../org/apache/doris/qe/ShowExecutor.java | 30 +- .../org/apache/doris/qe/StmtExecutor.java | 16 +- .../doris/service/FrontendServiceImpl.java | 19 +- .../apache/doris/statistics/AnalysisInfo.java | 55 -- .../doris/statistics/AnalysisManager.java | 385 ++++----- .../doris/statistics/BaseAnalysisTask.java | 17 +- .../apache/doris/statistics/ColStatsData.java | 82 ++ .../doris/statistics/ColumnStatistic.java | 31 +- .../ColumnStatisticsCacheLoader.java | 1 - .../doris/statistics/HMSAnalysisTask.java | 48 +- .../org/apache/doris/statistics/HistData.java | 36 + .../apache/doris/statistics/Histogram.java | 18 +- .../statistics/HistogramCacheLoader.java | 1 - .../doris/statistics/OlapAnalysisJob.java | 56 ++ .../doris/statistics/OlapAnalysisTask.java | 143 +++- .../apache/doris/statistics/ResultRow.java | 59 ++ .../doris/statistics/StatisticConstants.java | 4 +- .../statistics/StatisticsAutoAnalyzer.java | 97 ++- .../doris/statistics/StatisticsCache.java | 92 +- .../doris/statistics/StatisticsCleaner.java | 19 +- .../statistics/StatisticsRepository.java | 121 +-- .../org/apache/doris/statistics/StatsId.java | 58 ++ .../doris/statistics/TableStatistic.java | 61 -- .../statistics/TableStatisticBuilder.java | 51 -- .../TableStatisticsCacheLoader.java | 60 -- .../apache/doris/statistics/TableStats.java | 89 ++ .../doris/statistics/util/InternalQuery.java | 21 +- .../statistics/util/InternalQueryResult.java | 242 ------ .../doris/statistics/util/StatisticsUtil.java | 55 +- .../doris/statistics/AnalysisJobTest.java | 22 +- .../doris/statistics/AnalysisManagerTest.java | 180 +++- .../statistics/AnalysisTaskExecutorTest.java | 6 +- .../apache/doris/statistics/CacheTest.java | 180 ++-- .../StatisticsAutoAnalyzerTest.java | 202 +++++ .../doris/statistics/StatsMockUtil.java | 48 ++ .../util/InternalQueryResultTest.java | 119 --- gensrc/thrift/FrontendService.thrift | 2 +- .../suites/statistics/analyze_stats.groovy | 809 ++++++++++++++++-- 47 files changed, 2206 insertions(+), 1449 deletions(-) create mode 100644 fe/fe-core/src/main/java/org/apache/doris/statistics/ColStatsData.java create mode 100644 fe/fe-core/src/main/java/org/apache/doris/statistics/HistData.java create mode 100644 fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisJob.java create mode 100644 fe/fe-core/src/main/java/org/apache/doris/statistics/ResultRow.java create mode 100644 fe/fe-core/src/main/java/org/apache/doris/statistics/StatsId.java delete mode 100644 fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatistic.java delete mode 100644 fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatisticBuilder.java delete mode 100644 fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatisticsCacheLoader.java create mode 100644 fe/fe-core/src/main/java/org/apache/doris/statistics/TableStats.java delete mode 100644 fe/fe-core/src/main/java/org/apache/doris/statistics/util/InternalQueryResult.java create mode 100644 fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoAnalyzerTest.java create mode 100644 fe/fe-core/src/test/java/org/apache/doris/statistics/StatsMockUtil.java delete mode 100644 fe/fe-core/src/test/java/org/apache/doris/statistics/util/InternalQueryResultTest.java diff --git a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java index 6131740e920634..af864549856653 100644 --- a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java +++ b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java @@ -2087,7 +2087,7 @@ public class Config extends ConfigBase { public static int force_olap_table_replication_num = 0; @ConfField - public static int full_auto_analyze_simultaneously_running_task_num = 5; + public static int full_auto_analyze_simultaneously_running_task_num = 1; @ConfField public static int cpu_resource_limit_per_analyze_task = 1; @@ -2114,4 +2114,7 @@ public class Config extends ConfigBase { @ConfField public static boolean forbid_running_alter_job = false; + + @ConfField + public static int table_stats_health_threshold = 80; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeProperties.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeProperties.java index 9c9332a804d2e5..eae26c99843847 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeProperties.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeProperties.java @@ -22,6 +22,7 @@ import org.apache.doris.statistics.AnalysisInfo.AnalysisType; import com.google.common.collect.ImmutableSet; +import com.google.gson.annotations.SerializedName; import org.apache.commons.lang3.StringUtils; import org.quartz.CronExpression; @@ -32,8 +33,6 @@ public class AnalyzeProperties { - private final Map properties; - public static final String PROPERTY_SYNC = "sync"; public static final String PROPERTY_INCREMENTAL = "incremental"; public static final String PROPERTY_AUTOMATIC = "automatic"; @@ -55,6 +54,9 @@ public class AnalyzeProperties { private CronExpression cronExpression; + @SerializedName("analyzeProperties") + private final Map properties; + private static final ImmutableSet PROPERTIES_SET = new ImmutableSet.Builder() .add(PROPERTY_SYNC) .add(PROPERTY_INCREMENTAL) diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java index da10d5c492b1fe..af14be37b5430c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java @@ -32,12 +32,13 @@ import org.apache.doris.qe.ConnectContext; import org.apache.doris.qe.ShowResultSet; import org.apache.doris.qe.ShowResultSetMetaData; -import org.apache.doris.statistics.TableStatistic; -import org.apache.doris.statistics.util.StatisticsUtil; +import org.apache.doris.statistics.TableStats; import com.google.common.collect.ImmutableList; import com.google.common.collect.Lists; +import java.sql.Date; +import java.util.ArrayList; import java.util.List; public class ShowTableStatsStmt extends ShowStmt { @@ -45,9 +46,14 @@ public class ShowTableStatsStmt extends ShowStmt { // TODO add more columns private static final ImmutableList TITLE_NAMES = new ImmutableList.Builder() - .add("row_count") - .add("update_time") - .add("last_analyze_time") + .add("updated_rows") + .add("query_times") + .add("row_count(for external_table only)") + .add("method") + .add("type") + .add("updated_time") + .add("columns") + .add("trigger") .build(); private final TableName tableName; @@ -126,12 +132,20 @@ public long getPartitionId() { return table.getPartition(partitionName).getId(); } - public ShowResultSet constructResultSet(TableStatistic tableStatistic) { + public ShowResultSet constructResultSet(TableStats tableStatistic) { + if (tableStatistic == null) { + return new ShowResultSet(getMetaData(), new ArrayList<>()); + } List> result = Lists.newArrayList(); List row = Lists.newArrayList(); + row.add(String.valueOf(tableStatistic.updatedRows)); + row.add(String.valueOf(tableStatistic.queriedTimes.get())); row.add(String.valueOf(tableStatistic.rowCount)); - row.add(String.valueOf(tableStatistic.updateTime)); - row.add(StatisticsUtil.getReadableTime(tableStatistic.lastAnalyzeTimeInMs)); + row.add(tableStatistic.analysisMethod.toString()); + row.add(tableStatistic.analysisType.toString()); + row.add(new Date(tableStatistic.updatedTime).toString()); + row.add(tableStatistic.columns); + row.add(tableStatistic.jobType.toString()); result.add(row); return new ShowResultSet(getMetaData(), result); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/InternalSchemaInitializer.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/InternalSchemaInitializer.java index 39b2326e309538..52b3005d7963cd 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/InternalSchemaInitializer.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/InternalSchemaInitializer.java @@ -84,7 +84,6 @@ public void run() { return; } Database database = op.get(); - modifyTblReplicaCount(database, StatisticConstants.ANALYSIS_TBL_NAME); modifyTblReplicaCount(database, StatisticConstants.STATISTIC_TBL_NAME); modifyTblReplicaCount(database, StatisticConstants.HISTOGRAM_TBL_NAME); } @@ -127,7 +126,6 @@ public void modifyTblReplicaCount(Database database, String tblName) { } private void createTbl() throws UserException { - Env.getCurrentEnv().getInternalCatalog().createTable(buildAnalysisTblStmt()); Env.getCurrentEnv().getInternalCatalog().createTable(buildStatisticsTblStmt()); Env.getCurrentEnv().getInternalCatalog().createTable(buildHistogramTblStmt()); } @@ -146,41 +144,6 @@ public static void createDB() { } } - @VisibleForTesting - public CreateTableStmt buildAnalysisTblStmt() throws UserException { - TableName tableName = new TableName("", - FeConstants.INTERNAL_DB_NAME, StatisticConstants.ANALYSIS_TBL_NAME); - List columnDefs = new ArrayList<>(); - columnDefs.add(new ColumnDef("id", TypeDef.createVarchar(StatisticConstants.ID_LEN))); - columnDefs.add(new ColumnDef("catalog_id", TypeDef.createVarchar(StatisticConstants.MAX_NAME_LEN))); - columnDefs.add(new ColumnDef("db_id", TypeDef.createVarchar(StatisticConstants.MAX_NAME_LEN))); - columnDefs.add(new ColumnDef("tbl_id", TypeDef.createVarchar(StatisticConstants.MAX_NAME_LEN))); - columnDefs.add(new ColumnDef("idx_id", TypeDef.createVarchar(StatisticConstants.MAX_NAME_LEN))); - ColumnDef partId = new ColumnDef("part_id", TypeDef.createVarchar(StatisticConstants.MAX_NAME_LEN)); - partId.setAllowNull(true); - columnDefs.add(partId); - columnDefs.add(new ColumnDef("count", TypeDef.create(PrimitiveType.BIGINT))); - columnDefs.add(new ColumnDef("last_analyze_time_in_ms", TypeDef.create(PrimitiveType.BIGINT))); - columnDefs.add(new ColumnDef("update_time", TypeDef.create(PrimitiveType.DATETIME))); - String engineName = "olap"; - ArrayList uniqueKeys = Lists.newArrayList("id", "catalog_id", - "db_id", "tbl_id", "idx_id", "part_id"); - KeysDesc keysDesc = new KeysDesc(KeysType.UNIQUE_KEYS, uniqueKeys); - DistributionDesc distributionDesc = new HashDistributionDesc( - StatisticConstants.STATISTIC_TABLE_BUCKET_COUNT, uniqueKeys); - Map properties = new HashMap() { - { - put("replication_num", String.valueOf( - Math.max(1, Config.min_replication_num_per_tablet))); - } - }; - CreateTableStmt createTableStmt = new CreateTableStmt(true, false, - tableName, columnDefs, engineName, keysDesc, null, distributionDesc, - properties, null, "Doris internal statistics table, DO NOT MODIFY IT", null); - StatisticsUtil.analyze(createTableStmt); - return createTableStmt; - } - @VisibleForTesting public CreateTableStmt buildStatisticsTblStmt() throws UserException { TableName tableName = new TableName("", @@ -264,8 +227,7 @@ private boolean created() { return false; } Database db = optionalDatabase.get(); - return db.getTable(StatisticConstants.ANALYSIS_TBL_NAME).isPresent() - && db.getTable(StatisticConstants.STATISTIC_TBL_NAME).isPresent() + return db.getTable(StatisticConstants.STATISTIC_TBL_NAME).isPresent() && db.getTable(StatisticConstants.HISTOGRAM_TBL_NAME).isPresent(); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java index 02242803666193..e75b9859b84095 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java @@ -32,7 +32,7 @@ import org.apache.doris.statistics.ColumnStatistic; import org.apache.doris.statistics.ColumnStatisticBuilder; import org.apache.doris.statistics.HMSAnalysisTask; -import org.apache.doris.statistics.TableStatistic; +import org.apache.doris.statistics.TableStats; import org.apache.doris.statistics.util.StatisticsUtil; import org.apache.doris.thrift.THiveTable; import org.apache.doris.thrift.TTableDescriptor; @@ -107,6 +107,9 @@ public class HMSExternalTable extends ExternalTable { private DLAType dlaType = DLAType.UNKNOWN; + // No as precise as row count in TableStats, but better than none. + private long estimatedRowCount = -1; + public enum DLAType { UNKNOWN, HIVE, HUDI, ICEBERG } @@ -422,13 +425,20 @@ public List getHudiSchema(List hmsSchema) { @Override public long estimatedRowCount() { try { - Optional tableStatistics = Env.getCurrentEnv().getStatisticsCache().getTableStatistics( - catalog.getId(), catalog.getDbOrAnalysisException(dbName).getId(), id); - if (tableStatistics.isPresent()) { - long rowCount = tableStatistics.get().rowCount; + TableStats tableStats = Env.getCurrentEnv().getAnalysisManager().findTableStatsStatus(id); + if (tableStats != null) { + long rowCount = tableStats.rowCount; LOG.debug("Estimated row count for db {} table {} is {}.", dbName, name, rowCount); return rowCount; } + + if (estimatedRowCount != -1) { + return estimatedRowCount; + } + // Cache the estimated row count in this structure + // though the table never get analyzed, since the row estimation might be expensive caused by RPC. + estimatedRowCount = getRowCount(); + return estimatedRowCount; } catch (Exception e) { LOG.warn("Fail to get row count for table {}", name, e); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/journal/JournalEntity.java b/fe/fe-core/src/main/java/org/apache/doris/journal/JournalEntity.java index d8b8c62bd5d717..34a6e2c77cdd3a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/journal/JournalEntity.java +++ b/fe/fe-core/src/main/java/org/apache/doris/journal/JournalEntity.java @@ -122,6 +122,7 @@ import org.apache.doris.scheduler.job.Job; import org.apache.doris.scheduler.job.JobTask; import org.apache.doris.statistics.AnalysisInfo; +import org.apache.doris.statistics.TableStats; import org.apache.doris.system.Backend; import org.apache.doris.system.Frontend; import org.apache.doris.transaction.TransactionState; @@ -858,6 +859,11 @@ public void readFields(DataInput in) throws IOException { isRead = true; break; } + case OperationType.OP_UPDATE_TABLE_STATS: { + data = TableStats.read(in); + isRead = true; + break; + } default: { IOException e = new IOException(); LOG.error("UNKNOWN Operation Type {}", opCode, e); diff --git a/fe/fe-core/src/main/java/org/apache/doris/persist/EditLog.java b/fe/fe-core/src/main/java/org/apache/doris/persist/EditLog.java index 9a1841be691bfd..06486fc49cb778 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/persist/EditLog.java +++ b/fe/fe-core/src/main/java/org/apache/doris/persist/EditLog.java @@ -86,6 +86,7 @@ import org.apache.doris.scheduler.job.Job; import org.apache.doris.scheduler.job.JobTask; import org.apache.doris.statistics.AnalysisInfo; +import org.apache.doris.statistics.TableStats; import org.apache.doris.system.Backend; import org.apache.doris.system.Frontend; import org.apache.doris.transaction.TransactionState; @@ -1088,6 +1089,10 @@ public static void loadJournal(Env env, Long logId, JournalEntity journal) { env.replayAutoIncrementIdUpdateLog((AutoIncrementIdUpdateLog) journal.getData()); break; } + case OperationType.OP_UPDATE_TABLE_STATS: { + env.getAnalysisManager().replayUpdateTableStatsStatus((TableStats) journal.getData()); + break; + } default: { IOException e = new IOException(); LOG.error("UNKNOWN Operation Type {}", opCode, e); @@ -1906,4 +1911,8 @@ public long logBarrier(BarrierLog log) { public void logUpdateAutoIncrementId(AutoIncrementIdUpdateLog log) { logEdit(OperationType.OP_UPDATE_AUTO_INCREMENT_ID, log); } + + public void logCreateTableStats(TableStats tableStats) { + logEdit(OperationType.OP_UPDATE_TABLE_STATS, tableStats); + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/persist/OperationType.java b/fe/fe-core/src/main/java/org/apache/doris/persist/OperationType.java index 26fec1f296774a..0285272bb5a381 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/persist/OperationType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/persist/OperationType.java @@ -323,6 +323,7 @@ public class OperationType { public static final short OP_CREATE_SCHEDULER_TASK = 453; public static final short OP_DELETE_SCHEDULER_TASK = 454; + public static final short OP_UPDATE_TABLE_STATS = 455; /** * Get opcode name by op code. diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/DdlExecutor.java b/fe/fe-core/src/main/java/org/apache/doris/qe/DdlExecutor.java index 81a69bd9e262e6..1d2d31f4163103 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/DdlExecutor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/DdlExecutor.java @@ -38,7 +38,6 @@ import org.apache.doris.analysis.AlterRoutineLoadStmt; import org.apache.doris.analysis.AlterSqlBlockRuleStmt; import org.apache.doris.analysis.AlterSystemStmt; -import org.apache.doris.analysis.AlterTableStatsStmt; import org.apache.doris.analysis.AlterTableStmt; import org.apache.doris.analysis.AlterUserStmt; import org.apache.doris.analysis.AlterViewStmt; @@ -165,8 +164,6 @@ public static void execute(Env env, DdlStmt ddlStmt) throws Exception { env.createMaterializedView((CreateMaterializedViewStmt) ddlStmt); } else if (ddlStmt instanceof AlterTableStmt) { env.alterTable((AlterTableStmt) ddlStmt); - } else if (ddlStmt instanceof AlterTableStatsStmt) { - StatisticsRepository.alterTableStatistics((AlterTableStatsStmt) ddlStmt); } else if (ddlStmt instanceof AlterColumnStatsStmt) { StatisticsRepository.alterColumnStatistics((AlterColumnStatsStmt) ddlStmt); } else if (ddlStmt instanceof AlterViewStmt) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java b/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java index 7637c3869de90b..b46e715e77f59d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java @@ -138,7 +138,6 @@ import org.apache.doris.catalog.TabletInvertedIndex; import org.apache.doris.catalog.TabletMeta; import org.apache.doris.catalog.View; -import org.apache.doris.catalog.external.ExternalTable; import org.apache.doris.catalog.external.HMSExternalTable; import org.apache.doris.clone.DynamicPartitionScheduler; import org.apache.doris.cluster.ClusterNamespace; @@ -198,7 +197,7 @@ import org.apache.doris.statistics.ColumnStatistic; import org.apache.doris.statistics.Histogram; import org.apache.doris.statistics.StatisticsRepository; -import org.apache.doris.statistics.TableStatistic; +import org.apache.doris.statistics.TableStats; import org.apache.doris.statistics.query.QueryStatsUtil; import org.apache.doris.system.Backend; import org.apache.doris.system.Diagnoser; @@ -241,7 +240,6 @@ import java.util.HashSet; import java.util.List; import java.util.Objects; -import java.util.Optional; import java.util.Set; import java.util.concurrent.TimeUnit; import java.util.function.Predicate; @@ -2413,30 +2411,8 @@ private void handleShowDataSkew() throws AnalysisException { private void handleShowTableStats() { ShowTableStatsStmt showTableStatsStmt = (ShowTableStatsStmt) stmt; TableIf tableIf = showTableStatsStmt.getTable(); - long partitionId = showTableStatsStmt.getPartitionId(); - boolean showCache = showTableStatsStmt.isCached(); - try { - if (tableIf instanceof ExternalTable && showCache) { - Optional tableStatistics = Env.getCurrentEnv().getStatisticsCache().getTableStatistics( - tableIf.getDatabase().getCatalog().getId(), - tableIf.getDatabase().getId(), - tableIf.getId()); - if (tableStatistics.isPresent()) { - resultSet = showTableStatsStmt.constructResultSet(tableStatistics.get()); - } else { - resultSet = showTableStatsStmt.constructResultSet(TableStatistic.UNKNOWN); - } - } else if (partitionId > 0) { - TableStatistic partStats = StatisticsRepository.fetchTableLevelOfPartStats(partitionId); - resultSet = showTableStatsStmt.constructResultSet(partStats); - } else { - TableStatistic tableStats = StatisticsRepository.fetchTableLevelStats(tableIf.getId()); - resultSet = showTableStatsStmt.constructResultSet(tableStats); - } - } catch (DdlException e) { - LOG.warn("Table statistics do not exist: {}", tableIf.getName()); - resultSet = showTableStatsStmt.constructResultSet(TableStatistic.UNKNOWN); - } + TableStats tableStats = Env.getCurrentEnv().getAnalysisManager().findTableStatsStatus(tableIf.getId()); + resultSet = showTableStatsStmt.constructResultSet(tableStats); } private void handleShowColumnStats() throws AnalysisException { diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java b/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java index a742b56dc094bc..b5c556286b65a2 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java @@ -141,8 +141,8 @@ import org.apache.doris.rewrite.mvrewrite.MVSelectFailedException; import org.apache.doris.rpc.RpcException; import org.apache.doris.service.FrontendOptions; +import org.apache.doris.statistics.ResultRow; import org.apache.doris.statistics.util.InternalQueryBuffer; -import org.apache.doris.statistics.util.InternalQueryResult.ResultRow; import org.apache.doris.task.LoadEtlTask; import org.apache.doris.thrift.TFileFormatType; import org.apache.doris.thrift.TFileType; @@ -2512,9 +2512,6 @@ public List executeInternalQuery() { private List convertResultBatchToResultRows(TResultBatch batch) { List columns = parsedStmt.getColLabels(); - List types = parsedStmt.getResultExprs().stream() - .map(e -> e.getType().getPrimitiveType()) - .collect(Collectors.toList()); List resultRows = new ArrayList<>(); List rows = batch.getRows(); for (ByteBuffer buffer : rows) { @@ -2525,8 +2522,7 @@ private List convertResultBatchToResultRows(TResultBatch batch) { String value = queryBuffer.readStringWithLength(); values.add(value); } - - ResultRow resultRow = new ResultRow(columns, types, values); + ResultRow resultRow = new ResultRow(values); resultRows.add(resultRow); } return resultRows; @@ -2548,5 +2544,13 @@ public void setProfileType(ProfileType profileType) { public void setProxyResultSet(ShowResultSet proxyResultSet) { this.proxyResultSet = proxyResultSet; } + + public ConnectContext getContext() { + return context; + } + + public OriginStatement getOriginStmt() { + return originStmt; + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java b/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java index e7b82039d0be98..27649ef659445e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java +++ b/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java @@ -87,6 +87,8 @@ import org.apache.doris.qe.QueryState; import org.apache.doris.qe.StmtExecutor; import org.apache.doris.qe.VariableMgr; +import org.apache.doris.statistics.ColumnStatistic; +import org.apache.doris.statistics.ResultRow; import org.apache.doris.statistics.StatisticsCacheKey; import org.apache.doris.statistics.query.QueryStats; import org.apache.doris.system.Backend; @@ -2941,14 +2943,15 @@ private TGetBinlogLagResult getBinlogLagImpl(TGetBinlogRequest request, String c @Override public TStatus updateStatsCache(TUpdateFollowerStatsCacheRequest request) throws TException { - StatisticsCacheKey key = GsonUtils.GSON.fromJson(request.key, StatisticsCacheKey.class); - /* - TODO: Need to handle minExpr and maxExpr, so that we can generate the columnStatistic - here and use putCache to update cached directly. - ColumnStatistic columnStatistic = GsonUtils.GSON.fromJson(request.colStats, ColumnStatistic.class); - Env.getCurrentEnv().getStatisticsCache().putCache(key, columnStatistic); - */ - Env.getCurrentEnv().getStatisticsCache().refreshColStatsSync(key.tableId, key.idxId, key.colName); + StatisticsCacheKey k = GsonUtils.GSON.fromJson(request.key, StatisticsCacheKey.class); + List rows = request.statsRows.stream() + .map(s -> GsonUtils.GSON.fromJson(s, ResultRow.class)) + .collect(Collectors.toList()); + ColumnStatistic c = ColumnStatistic.fromResultRow(rows); + if (c != ColumnStatistic.UNKNOWN) { + Env.getCurrentEnv().getStatisticsCache().updateColStatsCache(k.tableId, k.idxId, k.colName, c); + } + // Return Ok anyway return new TStatus(TStatusCode.OK); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java index 874e329c062eb4..73441db2e2f8f7 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java @@ -22,7 +22,6 @@ import org.apache.doris.common.io.Text; import org.apache.doris.common.io.Writable; import org.apache.doris.persist.gson.GsonUtils; -import org.apache.doris.statistics.util.InternalQueryResult.ResultRow; import org.apache.doris.statistics.util.StatisticsUtil; import com.google.gson.Gson; @@ -252,60 +251,6 @@ public void addTaskId(long taskId) { taskIds.add(taskId); } - // TODO: use thrift - public static AnalysisInfo fromResultRow(ResultRow resultRow) { - try { - AnalysisInfoBuilder analysisInfoBuilder = new AnalysisInfoBuilder(); - long jobId = Long.parseLong(resultRow.getColumnValue("job_id")); - analysisInfoBuilder.setJobId(jobId); - long taskId = Long.parseLong(resultRow.getColumnValue("task_id")); - analysisInfoBuilder.setTaskId(taskId); - String catalogName = resultRow.getColumnValue("catalog_name"); - analysisInfoBuilder.setCatalogName(catalogName); - String dbName = resultRow.getColumnValue("db_name"); - analysisInfoBuilder.setDbName(dbName); - String tblName = resultRow.getColumnValue("tbl_name"); - analysisInfoBuilder.setTblName(tblName); - String colName = resultRow.getColumnValue("col_name"); - analysisInfoBuilder.setColName(colName); - long indexId = Long.parseLong(resultRow.getColumnValue("index_id")); - analysisInfoBuilder.setIndexId(indexId); - String partitionNames = resultRow.getColumnValue("col_partitions"); - Map> colToPartitions = getColToPartition(partitionNames); - analysisInfoBuilder.setColToPartitions(colToPartitions); - String jobType = resultRow.getColumnValue("job_type"); - analysisInfoBuilder.setJobType(JobType.valueOf(jobType)); - String analysisType = resultRow.getColumnValue("analysis_type"); - analysisInfoBuilder.setAnalysisType(AnalysisType.valueOf(analysisType)); - String analysisMode = resultRow.getColumnValue("analysis_mode"); - analysisInfoBuilder.setAnalysisMode(AnalysisMode.valueOf(analysisMode)); - String analysisMethod = resultRow.getColumnValue("analysis_method"); - analysisInfoBuilder.setAnalysisMethod(AnalysisMethod.valueOf(analysisMethod)); - String scheduleType = resultRow.getColumnValue("schedule_type"); - analysisInfoBuilder.setScheduleType(ScheduleType.valueOf(scheduleType)); - String state = resultRow.getColumnValue("state"); - analysisInfoBuilder.setState(AnalysisState.valueOf(state)); - String samplePercent = resultRow.getColumnValue("sample_percent"); - analysisInfoBuilder.setSamplePercent(StatisticsUtil.convertStrToInt(samplePercent)); - String sampleRows = resultRow.getColumnValue("sample_rows"); - analysisInfoBuilder.setSampleRows(StatisticsUtil.convertStrToInt(sampleRows)); - String maxBucketNum = resultRow.getColumnValue("max_bucket_num"); - analysisInfoBuilder.setMaxBucketNum(StatisticsUtil.convertStrToInt(maxBucketNum)); - String periodTimeInMs = resultRow.getColumnValue("period_time_in_ms"); - analysisInfoBuilder.setPeriodTimeInMs(StatisticsUtil.convertStrToInt(periodTimeInMs)); - String lastExecTimeInMs = resultRow.getColumnValue("last_exec_time_in_ms"); - analysisInfoBuilder.setLastExecTimeInMs(StatisticsUtil.convertStrToLong(lastExecTimeInMs)); - String timeCostInMs = resultRow.getColumnValue("time_cost_in_ms"); - analysisInfoBuilder.setTimeCostInMs(StatisticsUtil.convertStrToLong(timeCostInMs)); - String message = resultRow.getColumnValue("message"); - analysisInfoBuilder.setMessage(message); - return analysisInfoBuilder.build(); - } catch (Exception e) { - LOG.warn("Failed to deserialize analysis task info.", e); - return null; - } - } - public String getColToPartitionStr() { if (colToPartitions == null || colToPartitions.isEmpty()) { return ""; diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java index 82daf0e61493bd..62d27a17c92dcd 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java @@ -29,11 +29,8 @@ import org.apache.doris.catalog.Column; import org.apache.doris.catalog.DatabaseIf; import org.apache.doris.catalog.Env; -import org.apache.doris.catalog.MaterializedIndexMeta; -import org.apache.doris.catalog.OlapTable; import org.apache.doris.catalog.ScalarType; import org.apache.doris.catalog.TableIf; -import org.apache.doris.catalog.TableIf.TableType; import org.apache.doris.catalog.View; import org.apache.doris.catalog.external.HMSExternalTable; import org.apache.doris.common.AnalysisException; @@ -44,7 +41,6 @@ import org.apache.doris.common.io.Writable; import org.apache.doris.common.util.Daemon; import org.apache.doris.common.util.Util; -import org.apache.doris.datasource.CatalogIf; import org.apache.doris.mysql.privilege.PrivPredicate; import org.apache.doris.persist.AnalyzeDeletionLog; import org.apache.doris.qe.ConnectContext; @@ -57,9 +53,9 @@ import org.apache.doris.statistics.AnalysisInfo.ScheduleType; import org.apache.doris.statistics.util.StatisticsUtil; +import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.ImmutableList; import com.google.common.collect.Lists; -import com.google.common.collect.Maps; import com.google.common.util.concurrent.ThreadFactoryBuilder; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -115,6 +111,8 @@ public class AnalysisManager extends Daemon implements Writable { // Tracking and control sync analyze tasks, keep in mem only private final ConcurrentMap ctxToSyncTask = new ConcurrentHashMap<>(); + private final Map idToTblStatsStatus = new ConcurrentHashMap<>(); + private final Function userJobStatusUpdater = w -> { AnalysisInfo info = w.info; AnalysisState taskState = w.taskState; @@ -143,7 +141,7 @@ public class AnalysisManager extends Daemon implements Writable { // Set the job state to RUNNING when its first task becomes RUNNING. if (info.state.equals(AnalysisState.RUNNING) && job.state.equals(AnalysisState.PENDING)) { job.state = AnalysisState.RUNNING; - replayCreateAnalysisJob(job); + logCreateAnalysisJob(job); } boolean allFinished = true; boolean hasFailure = false; @@ -160,7 +158,6 @@ public class AnalysisManager extends Daemon implements Writable { if (allFinished) { if (hasFailure) { job.state = AnalysisState.FAILED; - logCreateAnalysisJob(job); } else { job.state = AnalysisState.FINISHED; try { @@ -168,8 +165,8 @@ public class AnalysisManager extends Daemon implements Writable { } catch (Throwable e) { LOG.warn("Failed to update Table statistics in job: {}", info.toString(), e); } - logCreateAnalysisJob(job); } + logCreateAnalysisJob(job); analysisJobIdToTaskMap.remove(job.jobId); } } @@ -222,15 +219,15 @@ protected void runOneCycle() { private void clear() { clearMeta(analysisJobInfoMap, (a) -> - a.scheduleType.equals(ScheduleType.ONCE) - && System.currentTimeMillis() - a.lastExecTimeInMs - > TimeUnit.DAYS.toMillis(StatisticConstants.ANALYSIS_JOB_INFO_EXPIRATION_TIME_IN_DAYS), + a.scheduleType.equals(ScheduleType.ONCE) + && System.currentTimeMillis() - a.lastExecTimeInMs + > TimeUnit.DAYS.toMillis(StatisticConstants.ANALYSIS_JOB_INFO_EXPIRATION_TIME_IN_DAYS), (id) -> { - Env.getCurrentEnv().getEditLog().logDeleteAnalysisJob(new AnalyzeDeletionLog(id)); - return null; - }); + Env.getCurrentEnv().getEditLog().logDeleteAnalysisJob(new AnalyzeDeletionLog(id)); + return null; + }); clearMeta(analysisTaskInfoMap, (a) -> System.currentTimeMillis() - a.lastExecTimeInMs - > TimeUnit.DAYS.toMillis(StatisticConstants.ANALYSIS_JOB_INFO_EXPIRATION_TIME_IN_DAYS), + > TimeUnit.DAYS.toMillis(StatisticConstants.ANALYSIS_JOB_INFO_EXPIRATION_TIME_IN_DAYS), (id) -> { Env.getCurrentEnv().getEditLog().logDeleteAnalysisTask(new AnalyzeDeletionLog(id)); return null; @@ -238,7 +235,7 @@ private void clear() { } private void clearMeta(Map infoMap, Predicate isExpired, - Function writeLog) { + Function writeLog) { synchronized (infoMap) { List expired = new ArrayList<>(); for (Entry entry : infoMap.entrySet()) { @@ -274,9 +271,9 @@ public void createAnalysisJobs(AnalyzeDBStmt analyzeDBStmt, boolean proxy) throw } public List buildAnalysisInfosForDB(DatabaseIf db, AnalyzeProperties analyzeProperties) { + db.readLock(); List tbls = db.getTables(); List analysisInfos = new ArrayList<>(); - db.readLock(); try { List analyzeStmts = new ArrayList<>(); for (TableIf table : tbls) { @@ -288,7 +285,9 @@ public List buildAnalysisInfosForDB(DatabaseIf db, Analyz // columnNames null means to add all visitable columns. // Will get all the visible columns in analyzeTblStmt.check() AnalyzeTblStmt analyzeTblStmt = new AnalyzeTblStmt(analyzeProperties, tableName, - null, db.getId(), table); + table.getBaseSchema().stream().filter(c -> !StatisticsUtil.isUnsupportedType(c.getType())).map( + Column::getName).collect( + Collectors.toList()), db.getId(), table); try { analyzeTblStmt.check(); } catch (AnalysisException analysisException) { @@ -321,7 +320,8 @@ public void createAnalysisJob(AnalyzeTblStmt stmt, boolean proxy) throws DdlExce } @Nullable - private AnalysisInfo buildAndAssignJob(AnalyzeTblStmt stmt) throws DdlException { + @VisibleForTesting + public AnalysisInfo buildAndAssignJob(AnalyzeTblStmt stmt) throws DdlException { if (!StatisticsUtil.statsTblAvailable() && !FeConstants.runningUnitTest) { throw new DdlException("Stats table not available, please make sure your cluster status is normal"); } @@ -334,47 +334,25 @@ private AnalysisInfo buildAndAssignJob(AnalyzeTblStmt stmt) throws DdlException boolean isSync = stmt.isSync(); Map analysisTaskInfos = new HashMap<>(); - createTaskForEachColumns(jobInfo, analysisTaskInfos, isSync); - createTaskForMVIdx(jobInfo, analysisTaskInfos, isSync); - if (stmt.isAllColumns()) { + if (stmt.isAllColumns() + && StatisticsUtil.isExternalTable(jobInfo.catalogName, jobInfo.dbName, jobInfo.tblName)) { createTaskForExternalTable(jobInfo, analysisTaskInfos, isSync); + } else { + createTaskForEachColumns(jobInfo, analysisTaskInfos, isSync); } - if (!isSync) { - persistAnalysisJob(jobInfo); - analysisJobIdToTaskMap.put(jobInfo.jobId, analysisTaskInfos); - try { - updateTableStats(jobInfo); - } catch (Throwable e) { - throw new DdlException("Failed to update Table statistics"); - } - } - if (isSync) { syncExecute(analysisTaskInfos.values()); + updateTableStats(jobInfo); return null; } - + persistAnalysisJob(jobInfo); + analysisJobIdToTaskMap.put(jobInfo.jobId, analysisTaskInfos); + // TODO: maybe we should update table stats only when all task succeeded. + updateTableStats(jobInfo); analysisTaskInfos.values().forEach(taskExecutor::submitTask); return jobInfo; } - // Analysis job created by the system - public void createSystemAnalysisJob(AnalysisInfo info, AnalysisTaskExecutor analysisTaskExecutor) - throws DdlException { - AnalysisInfo jobInfo = buildAnalysisJobInfo(info); - systemJobInfoMap.put(info.jobId, info); - if (jobInfo.colToPartitions.isEmpty()) { - // No statistics need to be collected or updated - return; - } - - Map analysisTaskInfos = new HashMap<>(); - createTaskForEachColumns(jobInfo, analysisTaskInfos, false); - createTaskForMVIdx(jobInfo, analysisTaskInfos, false); - analysisJobIdToTaskMap.put(jobInfo.jobId, analysisTaskInfos); - analysisTaskInfos.values().forEach(analysisTaskExecutor::submitTask); - } - private void sendJobId(List analysisInfos, boolean proxy) { List columns = new ArrayList<>(); columns.add(new Column("Catalog_Name", ScalarType.createVarchar(1024))); @@ -420,15 +398,15 @@ private void sendJobId(List analysisInfos, boolean proxy) { * TODO Supports incremental collection of statistics from materialized views */ private Map> validateAndGetPartitions(TableIf table, Set columnNames, - Set partitionNames, AnalysisType analysisType, - AnalysisMode analysisMode) throws DdlException { + Set partitionNames, AnalysisType analysisType, + AnalysisMode analysisMode) throws DdlException { long tableId = table.getId(); Map> columnToPartitions = columnNames.stream() - .collect(Collectors.toMap( + .collect(Collectors.toMap( columnName -> columnName, columnName -> new HashSet<>(partitionNames == null ? Collections.emptySet() : partitionNames) - )); + )); if (analysisType == AnalysisType.HISTOGRAM) { // Collecting histograms does not need to support incremental collection, @@ -486,8 +464,9 @@ private Map> validateAndGetPartitions(TableIf table, Set 0 ? numBuckets : StatisticConstants.HISTOGRAM_MAX_BUCKET_NUM; - taskInfoBuilder.setMaxBucketNum(maxBucketNum); + info.setMaxBucketNum(maxBucketNum); } long periodTimeInMs = stmt.getPeriodTimeInMs(); - taskInfoBuilder.setPeriodTimeInMs(periodTimeInMs); + info.setPeriodTimeInMs(periodTimeInMs); Map> colToPartitions = validateAndGetPartitions(table, columnNames, partitionNames, analysisType, analysisMode); - taskInfoBuilder.setColToPartitions(colToPartitions); - taskInfoBuilder.setTaskIds(Lists.newArrayList()); + info.setColToPartitions(colToPartitions); + info.setTaskIds(Lists.newArrayList()); - return taskInfoBuilder.build(); + return info.build(); } - private AnalysisInfo buildAnalysisJobInfo(AnalysisInfo jobInfo) { - AnalysisInfoBuilder taskInfoBuilder = new AnalysisInfoBuilder(); - taskInfoBuilder.setJobId(jobInfo.jobId); - taskInfoBuilder.setCatalogName(jobInfo.catalogName); - taskInfoBuilder.setDbName(jobInfo.dbName); - taskInfoBuilder.setTblName(jobInfo.tblName); - taskInfoBuilder.setJobType(JobType.SYSTEM); - taskInfoBuilder.setState(AnalysisState.PENDING); - taskInfoBuilder.setLastExecTimeInMs(System.currentTimeMillis()); - taskInfoBuilder.setAnalysisType(jobInfo.analysisType); - taskInfoBuilder.setAnalysisMode(jobInfo.analysisMode); - taskInfoBuilder.setAnalysisMethod(jobInfo.analysisMethod); - taskInfoBuilder.setScheduleType(jobInfo.scheduleType); - taskInfoBuilder.setSamplePercent(jobInfo.samplePercent); - taskInfoBuilder.setSampleRows(jobInfo.sampleRows); - taskInfoBuilder.setMaxBucketNum(jobInfo.maxBucketNum); - taskInfoBuilder.setPeriodTimeInMs(jobInfo.periodTimeInMs); - taskInfoBuilder.setLastExecTimeInMs(jobInfo.lastExecTimeInMs); - taskInfoBuilder.setColToPartitions(jobInfo.colToPartitions); - taskInfoBuilder.setTaskIds(new ArrayList<>()); - return taskInfoBuilder.build(); - } - - private void persistAnalysisJob(AnalysisInfo jobInfo) throws DdlException { + @VisibleForTesting + public void persistAnalysisJob(AnalysisInfo jobInfo) throws DdlException { if (jobInfo.scheduleType == ScheduleType.PERIOD && jobInfo.lastExecTimeInMs > 0) { return; } @@ -584,49 +541,8 @@ private void persistAnalysisJob(AnalysisInfo jobInfo) throws DdlException { logCreateAnalysisJob(analysisInfo); } - private void createTaskForMVIdx(AnalysisInfo jobInfo, Map analysisTasks, - boolean isSync) throws DdlException { - TableIf table; - try { - table = StatisticsUtil.findTable(jobInfo.catalogName, jobInfo.dbName, jobInfo.tblName); - } catch (Throwable e) { - LOG.warn(e.getMessage()); - return; - } - - TableType type = table.getType(); - if (jobInfo.analysisType != AnalysisType.INDEX || !type.equals(TableType.OLAP)) { - // not need to collect statistics for materialized view - return; - } - - OlapTable olapTable = (OlapTable) table; - - try { - olapTable.readLock(); - for (MaterializedIndexMeta meta : olapTable.getIndexIdToMeta().values()) { - if (meta.getDefineStmt() == null) { - continue; - } - long indexId = meta.getIndexId(); - long taskId = Env.getCurrentEnv().getNextId(); - AnalysisInfoBuilder indexTaskInfoBuilder = new AnalysisInfoBuilder(jobInfo); - AnalysisInfo analysisInfo = indexTaskInfoBuilder.setIndexId(indexId) - .setTaskId(taskId).setLastExecTimeInMs(System.currentTimeMillis()).build(); - jobInfo.addTaskId(taskId); - if (isSync) { - return; - } - analysisTasks.put(taskId, createTask(analysisInfo)); - logCreateAnalysisTask(analysisInfo); - } - } finally { - olapTable.readUnlock(); - } - } - - private void createTaskForEachColumns(AnalysisInfo jobInfo, Map analysisTasks, - boolean isSync) throws DdlException { + public void createTaskForEachColumns(AnalysisInfo jobInfo, Map analysisTasks, + boolean isSync) throws DdlException { Map> columnToPartitions = jobInfo.colToPartitions; for (Entry> entry : columnToPartitions.entrySet()) { long indexId = -1; @@ -666,17 +582,12 @@ public void logCreateAnalysisJob(AnalysisInfo analysisJob) { Env.getCurrentEnv().getEditLog().logCreateAnalysisJob(analysisJob); } - private void createTaskForExternalTable(AnalysisInfo jobInfo, - Map analysisTasks, - boolean isSync) throws DdlException { - TableIf table; - try { - table = StatisticsUtil.findTable(jobInfo.catalogName, jobInfo.dbName, jobInfo.tblName); - } catch (Throwable e) { - LOG.warn(e.getMessage()); - return; - } - if (jobInfo.analysisType == AnalysisType.HISTOGRAM || table.getType() != TableType.HMS_EXTERNAL_TABLE) { + @VisibleForTesting + public void createTaskForExternalTable(AnalysisInfo jobInfo, + Map analysisTasks, + boolean isSync) throws DdlException { + + if (jobInfo.analysisType == AnalysisType.HISTOGRAM) { return; } AnalysisInfoBuilder colTaskInfoBuilder = new AnalysisInfoBuilder(jobInfo); @@ -701,57 +612,24 @@ public void updateTaskStatus(AnalysisInfo info, AnalysisState taskState, String updaters[info.jobType.ordinal()].apply(taskStatusWrapper); } - private void updateTableStats(AnalysisInfo jobInfo) throws Throwable { - Map params = buildTableStatsParams(jobInfo); + @VisibleForTesting + public void updateTableStats(AnalysisInfo jobInfo) { TableIf tbl = StatisticsUtil.findTable(jobInfo.catalogName, jobInfo.dbName, jobInfo.tblName); - - // update olap table stats - if (tbl.getType() == TableType.OLAP) { - OlapTable table = (OlapTable) tbl; - updateOlapTableStats(table, params); - } - - // External Table doesn't collect table stats here. - // We create task for external table to collect table/partition level statistics. - } - - @SuppressWarnings("rawtypes") - private Map buildTableStatsParams(AnalysisInfo jobInfo) throws Throwable { - CatalogIf catalog = StatisticsUtil.findCatalog(jobInfo.catalogName); - DatabaseIf db = StatisticsUtil.findDatabase(jobInfo.catalogName, jobInfo.dbName); - TableIf tbl = StatisticsUtil.findTable(jobInfo.catalogName, jobInfo.dbName, jobInfo.tblName); - String indexId = String.valueOf(jobInfo.indexId); - String id = StatisticsUtil.constructId(tbl.getId(), indexId); - Map commonParams = new HashMap<>(); - commonParams.put("id", id); - commonParams.put("catalogId", String.valueOf(catalog.getId())); - commonParams.put("dbId", String.valueOf(db.getId())); - commonParams.put("tblId", String.valueOf(tbl.getId())); - commonParams.put("indexId", indexId); - commonParams.put("lastAnalyzeTimeInMs", String.valueOf(System.currentTimeMillis())); - return commonParams; - } - - private void updateOlapTableStats(OlapTable table, Map params) throws Throwable { - - HashMap tblParams = Maps.newHashMap(params); - long rowCount = table.getRowCount(); - tblParams.put("partId", "NULL"); - tblParams.put("rowCount", String.valueOf(rowCount)); - StatisticsRepository.persistTableStats(tblParams); + // TODO: set updatedRows to 0, when loadedRows of transaction info is ready. + updateTableStatsStatus(new TableStats(tbl.getId(), tbl.estimatedRowCount(), jobInfo)); } public List showAnalysisJob(ShowAnalyzeStmt stmt) { String state = stmt.getStateValue(); TableName tblName = stmt.getDbTableName(); return analysisJobInfoMap.values().stream() - .filter(a -> stmt.getJobId() == 0 || a.jobId == stmt.getJobId()) - .filter(a -> state == null || a.state.equals(AnalysisState.valueOf(state))) - .filter(a -> tblName == null || a.catalogName.equals(tblName.getCtl()) - && a.dbName.equals(tblName.getDb()) && a.tblName.equals(tblName.getTbl())) - .sorted(Comparator.comparingLong(a -> a.jobId)) - .collect(Collectors.toList()); + .filter(a -> stmt.getJobId() == 0 || a.jobId == stmt.getJobId()) + .filter(a -> state == null || a.state.equals(AnalysisState.valueOf(state))) + .filter(a -> tblName == null || a.catalogName.equals(tblName.getCtl()) + && a.dbName.equals(tblName.getDb()) && a.tblName.equals(tblName.getTbl())) + .sorted(Comparator.comparingLong(a -> a.jobId)) + .collect(Collectors.toList()); } public String getJobProgress(long jobId) { @@ -779,7 +657,8 @@ public String getJobProgress(long jobId) { return String.format("%d Finished/%d Failed/%d In Progress/%d Total", finished, failed, inProgress, total); } - private void syncExecute(Collection tasks) { + @VisibleForTesting + public void syncExecute(Collection tasks) { SyncTaskCollection syncTaskCollection = new SyncTaskCollection(tasks); ConnectContext ctx = ConnectContext.get(); try { @@ -794,12 +673,12 @@ private void syncExecute(Collection tasks) { private ThreadPoolExecutor createThreadPoolForSyncAnalyze() { String poolName = "SYNC ANALYZE THREAD POOL"; return new ThreadPoolExecutor(0, - ConnectContext.get().getSessionVariable().parallelSyncAnalyzeTaskNum, - 0, TimeUnit.SECONDS, + ConnectContext.get().getSessionVariable().parallelSyncAnalyzeTaskNum, + 0, TimeUnit.SECONDS, new SynchronousQueue(), new ThreadFactoryBuilder().setDaemon(true).setNameFormat("SYNC ANALYZE" + "-%d") - .build(), new BlockedPolicy(poolName, - (int) TimeUnit.HOURS.toSeconds(Config.analyze_task_timeout_in_hours))); + .build(), new BlockedPolicy(poolName, + (int) TimeUnit.HOURS.toSeconds(Config.analyze_task_timeout_in_hours))); } public void dropStats(DropStatsStmt dropStatsStmt) throws DdlException { @@ -813,11 +692,6 @@ public void dropStats(DropStatsStmt dropStatsStmt) throws DdlException { for (String col : cols) { Env.getCurrentEnv().getStatisticsCache().invalidate(tblId, -1L, col); } - if (dropStatsStmt.dropTableRowCount()) { - StatisticsRepository.dropExternalTableStatistics(tblId); - // Table cache key doesn't care about catalog id and db id, because the table id is globally unique. - Env.getCurrentEnv().getStatisticsCache().invalidateTableStats(-1, -1, tblId); - } } public void handleKillAnalyzeStmt(KillAnalysisJobStmt killAnalysisJobStmt) throws DdlException { @@ -852,7 +726,7 @@ private void checkPriv(AnalysisInfo analysisInfo) { if (!Env.getCurrentEnv().getAccessManager() .checkTblPriv(ConnectContext.get(), analysisInfo.dbName, analysisInfo.tblName, PrivPredicate.SELECT)) { throw new RuntimeException("You need at least SELECT PRIV to corresponding table to kill this analyze" - + " job"); + + " job"); } } @@ -919,11 +793,9 @@ public void execute(ThreadPoolExecutor executor) { } try { task.execute(); - updateSyncTaskStatus(task, AnalysisState.FINISHED); } catch (Throwable t) { colNames.add(task.info.colName); errorMessages.add(Util.getRootCauseMessage(t)); - updateSyncTaskStatus(task, AnalysisState.FAILED); LOG.warn("Failed to analyze, info: {}", task, t); } } finally { @@ -938,27 +810,9 @@ public void execute(ThreadPoolExecutor executor) { } if (!colNames.isEmpty()) { throw new RuntimeException("Failed to analyze following columns:[" + String.join(",", colNames) - + "] Reasons: " + String.join(",", errorMessages)); + + "] Reasons: " + String.join(",", errorMessages)); } } - - private void updateSyncTaskStatus(BaseAnalysisTask task, AnalysisState state) { - Env.getCurrentEnv().getAnalysisManager() - .updateTaskStatus(task.info, state, "", System.currentTimeMillis()); - } - } - - public List findAutomaticAnalysisJobs() { - synchronized (analysisJobInfoMap) { - return analysisJobInfoMap.values().stream() - .filter(a -> - a.scheduleType.equals(ScheduleType.AUTOMATIC) - && (!(a.state.equals(AnalysisState.RUNNING) - || a.state.equals(AnalysisState.PENDING))) - && System.currentTimeMillis() - a.lastExecTimeInMs - > TimeUnit.MINUTES.toMillis(Config.auto_check_statistics_in_minutes)) - .collect(Collectors.toList()); - } } public List findPeriodicJobs() { @@ -969,13 +823,13 @@ public List findPeriodicJobs() { } if (a.cronExpression == null) { return a.scheduleType.equals(ScheduleType.PERIOD) - && System.currentTimeMillis() - a.lastExecTimeInMs > a.periodTimeInMs; + && System.currentTimeMillis() - a.lastExecTimeInMs > a.periodTimeInMs; } return a.cronExpression.getTimeAfter(new Date(a.lastExecTimeInMs)).before(new Date()); }; return analysisJobInfoMap.values().stream() - .filter(p) - .collect(Collectors.toList()); + .filter(p) + .collect(Collectors.toList()); } } @@ -1014,12 +868,13 @@ public void dropAnalyzeJob(DropAnalyzeJobStmt analyzeJobStmt) throws DdlExceptio public static AnalysisManager readFields(DataInput in) throws IOException { AnalysisManager analysisManager = new AnalysisManager(); - doRead(in, analysisManager.analysisJobInfoMap, true); - doRead(in, analysisManager.analysisTaskInfoMap, false); + readAnalysisInfo(in, analysisManager.analysisJobInfoMap, true); + readAnalysisInfo(in, analysisManager.analysisTaskInfoMap, false); + readIdToTblStats(in, analysisManager.idToTblStatsStatus); return analysisManager; } - private static void doRead(DataInput in, Map map, boolean job) throws IOException { + private static void readAnalysisInfo(DataInput in, Map map, boolean job) throws IOException { int size = in.readInt(); for (int i = 0; i < size; i++) { AnalysisInfo analysisInfo = AnalysisInfo.read(in); @@ -1027,21 +882,67 @@ private static void doRead(DataInput in, Map map, boolean jo } } + private static void readIdToTblStats(DataInput in, Map map) throws IOException { + int size = in.readInt(); + for (int i = 0; i < size; i++) { + TableStats tableStats = TableStats.read(in); + map.put(tableStats.tblId, tableStats); + } + } + @Override public void write(DataOutput out) throws IOException { - doWrite(out, analysisJobInfoMap); - doWrite(out, analysisTaskInfoMap); + writeJobInfo(out, analysisJobInfoMap); + writeJobInfo(out, analysisTaskInfoMap); + writeTableStats(out); } - private void doWrite(DataOutput out, Map infoMap) throws IOException { + private void writeJobInfo(DataOutput out, Map infoMap) throws IOException { out.writeInt(infoMap.size()); for (Entry entry : infoMap.entrySet()) { entry.getValue().write(out); } } + private void writeTableStats(DataOutput out) throws IOException { + out.writeInt(idToTblStatsStatus.size()); + for (Entry entry : idToTblStatsStatus.entrySet()) { + entry.getValue().write(out); + } + } + // For unit test use only. public void addToJobIdTasksMap(long jobId, Map tasks) { analysisJobIdToTaskMap.put(jobId, tasks); } + + public TableStats findTableStatsStatus(long tblId) { + return idToTblStatsStatus.get(tblId); + } + + // Invoke this when load transaction finished. + public void updateUpdatedRows(long tblId, long rows) { + TableStats statsStatus = idToTblStatsStatus.get(tblId); + if (statsStatus != null) { + statsStatus.updatedRows.addAndGet(rows); + } + } + + public void updateTableStatsStatus(TableStats tableStats) { + replayUpdateTableStatsStatus(tableStats); + logCreateTableStats(tableStats); + } + + public void replayUpdateTableStatsStatus(TableStats tableStats) { + idToTblStatsStatus.put(tableStats.tblId, tableStats); + } + + public void logCreateTableStats(TableStats tableStats) { + Env.getCurrentEnv().getEditLog().logCreateTableStats(tableStats); + } + + public void registerSysJob(AnalysisInfo jobInfo, Map taskInfos) { + systemJobInfoMap.put(jobInfo.jobId, jobInfo); + analysisJobIdToTaskMap.put(jobInfo.jobId, taskInfos); + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java index 6639d2a9340be5..385d2e10f85623 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java @@ -22,6 +22,8 @@ import org.apache.doris.catalog.Env; import org.apache.doris.catalog.TableIf; import org.apache.doris.datasource.CatalogIf; +import org.apache.doris.qe.QueryState; +import org.apache.doris.qe.QueryState.MysqlStateType; import org.apache.doris.qe.StmtExecutor; import org.apache.doris.statistics.AnalysisInfo.AnalysisMethod; import org.apache.doris.statistics.AnalysisInfo.AnalysisType; @@ -64,7 +66,7 @@ public abstract class BaseAnalysisTask { protected static final String INSERT_COL_STATISTICS = "INSERT INTO " + "${internalDB}.${columnStatTbl}" + " SELECT id, catalog_id, db_id, tbl_id, idx_id, col_id, part_id, row_count, " - + " ndv, null_count, min, max, data_size, update_time\n" + + " ndv, null_count, CAST(min AS string), CAST(max AS string), data_size, update_time\n" + " FROM \n" + " (SELECT CONCAT(${tblId}, '-', ${idxId}, '-', '${colId}') AS id, " + " ${catalogId} AS catalog_id, " @@ -226,4 +228,17 @@ public String toString() { info.jobId, info.taskId, catalog.getName(), db.getFullName(), tbl.getName(), col == null ? "TableRowCount" : col.getName()); } + + protected void executeWithExceptionOnFail(StmtExecutor stmtExecutor) throws Exception { + if (killed) { + return; + } + stmtExecutor.execute(); + QueryState queryState = stmtExecutor.getContext().getState(); + if (queryState.getStateType().equals(MysqlStateType.ERR)) { + throw new RuntimeException(String.format("Failed to analyze %s.%s.%s, error: %s sql: %s", + info.catalogName, info.dbName, info.colName, stmtExecutor.getOriginStmt().toString(), + queryState.getErrorMessage())); + } + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColStatsData.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColStatsData.java new file mode 100644 index 00000000000000..a14e32d726ba20 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColStatsData.java @@ -0,0 +1,82 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.statistics; + +import org.apache.doris.statistics.util.StatisticsUtil; + +import java.util.StringJoiner; + +/** + * Used to convert data from ResultRow. + * 0: id + * 1: catalog_id + * 2: db_id + * 3: tbl_id + * 4: idx_id + * 5: col_id + * 6: part_id + * 7: count + * 8: ndv + * 9: null_count + * 10: min + * 11: max + * 12: data_size_in_bytes + * 13: update_time + */ +public class ColStatsData { + public final StatsId statsId; + public final long count; + public final long ndv; + + public final long nullCount; + + public final String minLit; + public final String maxLit; + + public final long dataSizeInBytes; + + public final String updateTime; + + public ColStatsData(ResultRow row) { + this.statsId = new StatsId(row); + this.count = Long.parseLong(row.get(7)); + this.ndv = Long.parseLong(row.getWithDefault(8, "0")); + this.nullCount = Long.parseLong(row.getWithDefault(9, "0")); + this.minLit = row.get(10); + this.maxLit = row.get(11); + this.dataSizeInBytes = Long.parseLong(row.getWithDefault(12, "0")); + this.updateTime = row.get(13); + } + + public String toSQL(boolean roundByParentheses) { + StringJoiner sj = null; + if (roundByParentheses) { + sj = new StringJoiner(",", "(" + statsId.toSQL() + ",", ")"); + } else { + sj = new StringJoiner(",", statsId.toSQL(), ""); + } + sj.add(String.valueOf(count)); + sj.add(String.valueOf(ndv)); + sj.add(String.valueOf(nullCount)); + sj.add(StatisticsUtil.quote(minLit)); + sj.add(StatisticsUtil.quote(maxLit)); + sj.add(String.valueOf(dataSizeInBytes)); + sj.add(StatisticsUtil.quote(updateTime)); + return sj.toString(); + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java index 735b5f640b880c..44b048baa258ce 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java @@ -21,7 +21,6 @@ import org.apache.doris.catalog.Column; import org.apache.doris.catalog.PartitionInfo; import org.apache.doris.catalog.Type; -import org.apache.doris.statistics.util.InternalQueryResult.ResultRow; import org.apache.doris.statistics.util.StatisticsUtil; import com.google.common.base.Preconditions; @@ -135,7 +134,7 @@ public static ColumnStatistic fromResultRow(List resultRows) { ColumnStatistic columnStatistic = null; try { for (ResultRow resultRow : resultRows) { - String partId = resultRow.getColumnValue("part_id"); + String partId = resultRow.get(6); if (partId == null) { columnStatistic = fromResultRow(resultRow); } else { @@ -152,34 +151,34 @@ public static ColumnStatistic fromResultRow(List resultRows) { } // TODO: use thrift - public static ColumnStatistic fromResultRow(ResultRow resultRow) { + public static ColumnStatistic fromResultRow(ResultRow row) { try { ColumnStatisticBuilder columnStatisticBuilder = new ColumnStatisticBuilder(); - double count = Double.parseDouble(resultRow.getColumnValueWithDefault("count", "0")); + double count = Double.parseDouble(row.get(7)); columnStatisticBuilder.setCount(count); - double ndv = Double.parseDouble(resultRow.getColumnValueWithDefault("ndv", "0")); + double ndv = Double.parseDouble(row.getWithDefault(8, "0")); columnStatisticBuilder.setNdv(ndv); - String nullCount = resultRow.getColumnValueWithDefault("null_count", "0"); + String nullCount = row.getWithDefault(9, "0"); columnStatisticBuilder.setNumNulls(Double.parseDouble(nullCount)); columnStatisticBuilder.setDataSize(Double - .parseDouble(resultRow.getColumnValueWithDefault("data_size_in_bytes", "0"))); + .parseDouble(row.getWithDefault(12, "0"))); columnStatisticBuilder.setAvgSizeByte(columnStatisticBuilder.getCount() == 0 ? 0 : columnStatisticBuilder.getDataSize() / columnStatisticBuilder.getCount()); - long catalogId = Long.parseLong(resultRow.getColumnValue("catalog_id")); - long idxId = Long.parseLong(resultRow.getColumnValue("idx_id")); - long dbID = Long.parseLong(resultRow.getColumnValue("db_id")); - long tblId = Long.parseLong(resultRow.getColumnValue("tbl_id")); - String colName = resultRow.getColumnValue("col_id"); + long catalogId = Long.parseLong(row.get(1)); + long idxId = Long.parseLong(row.get(4)); + long dbID = Long.parseLong(row.get(2)); + long tblId = Long.parseLong(row.get(3)); + String colName = row.get(5); Column col = StatisticsUtil.findColumn(catalogId, dbID, tblId, idxId, colName); if (col == null) { LOG.warn("Failed to deserialize column statistics, ctlId: {} dbId: {}" - + "tblId: {} column: {} not exists", + + "tblId: {} column: {} not exists", catalogId, dbID, tblId, colName); return ColumnStatistic.UNKNOWN; } - String min = resultRow.getColumnValue("min"); - String max = resultRow.getColumnValue("max"); + String min = row.get(10); + String max = row.get(11); if (min != null && !min.equalsIgnoreCase("NULL")) { columnStatisticBuilder.setMinValue(StatisticsUtil.convertToDouble(col.getType(), min)); columnStatisticBuilder.setMinExpr(StatisticsUtil.readableValue(col.getType(), min)); @@ -192,7 +191,7 @@ public static ColumnStatistic fromResultRow(ResultRow resultRow) { } else { columnStatisticBuilder.setMaxValue(Double.MAX_VALUE); } - columnStatisticBuilder.setUpdatedTime(resultRow.getColumnValue("update_time")); + columnStatisticBuilder.setUpdatedTime(row.get(13)); return columnStatisticBuilder.build(); } catch (Exception e) { LOG.warn("Failed to deserialize column statistics, column not exists", e); diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatisticsCacheLoader.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatisticsCacheLoader.java index a44ba7867ca722..281a0e8250206f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatisticsCacheLoader.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatisticsCacheLoader.java @@ -21,7 +21,6 @@ import org.apache.doris.catalog.TableIf; import org.apache.doris.common.ThreadPoolManager; import org.apache.doris.qe.InternalQueryExecutionException; -import org.apache.doris.statistics.util.InternalQueryResult.ResultRow; import org.apache.doris.statistics.util.StatisticsUtil; import org.apache.logging.log4j.LogManager; diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java index d569cd79bd4aa0..e741ee1d2caca8 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java @@ -24,7 +24,6 @@ import org.apache.doris.qe.AutoCloseConnectContext; import org.apache.doris.qe.QueryState; import org.apache.doris.qe.StmtExecutor; -import org.apache.doris.statistics.util.InternalQueryResult; import org.apache.doris.statistics.util.StatisticsUtil; import org.apache.commons.lang3.StringUtils; @@ -117,39 +116,14 @@ public void doExecute() throws Exception { * Get table row count and insert the result to __internal_schema.table_statistics */ private void getTableStats() throws Exception { - // Get table level information. An example sql for table stats: - // INSERT INTO __internal_schema.table_statistics VALUES - // ('13055', 13002, 13038, 13055, -1, 'NULL', 5, 1686111064658, NOW()) - Map parameters = table.getRemoteTable().getParameters(); - if (isPartitionOnly) { - for (String partId : partitionNames) { - StringBuilder sb = new StringBuilder(); - sb.append(ANALYZE_SQL_PARTITION_TEMPLATE); - sb.append(" where "); - String[] splits = partId.split("/"); - for (int i = 0; i < splits.length; i++) { - String value = splits[i].split("=")[1]; - splits[i] = splits[i].replace(value, "\'" + value + "\'"); - } - sb.append(StringUtils.join(splits, " and ")); - Map params = buildTableStatsParams(partId); - setParameterData(parameters, params); - List columnResult = - StatisticsUtil.execStatisticQuery(new StringSubstitutor(params) - .replace(sb.toString())); - String rowCount = columnResult.get(0).getColumnValue("rowCount"); - params.put("rowCount", rowCount); - StatisticsRepository.persistTableStats(params); - } - } else { - Map params = buildTableStatsParams(null); - List columnResult = - StatisticsUtil.execStatisticQuery(new StringSubstitutor(params) - .replace(ANALYZE_TABLE_COUNT_TEMPLATE)); - String rowCount = columnResult.get(0).getColumnValue("rowCount"); - params.put("rowCount", rowCount); - StatisticsRepository.persistTableStats(params); - } + Map params = buildTableStatsParams(null); + List columnResult = + StatisticsUtil.execStatisticQuery(new StringSubstitutor(params) + .replace(ANALYZE_TABLE_COUNT_TEMPLATE)); + String rowCount = columnResult.get(0).get(0); + Env.getCurrentEnv().getAnalysisManager() + .updateTableStatsStatus( + new TableStats(table.getId(), Long.parseLong(rowCount), null)); } /** @@ -295,10 +269,6 @@ private void setParameterData(Map parameters, Map public static Histogram fromResultRow(ResultRow resultRow) { try { HistogramBuilder histogramBuilder = new HistogramBuilder(); - - long catalogId = Long.parseLong(resultRow.getColumnValue("catalog_id")); - long idxId = Long.parseLong(resultRow.getColumnValue("idx_id")); - long dbId = Long.parseLong(resultRow.getColumnValue("db_id")); - long tblId = Long.parseLong(resultRow.getColumnValue("tbl_id")); - - String colName = resultRow.getColumnValue("col_id"); + HistData histData = new HistData(resultRow); + long catalogId = histData.statsId.catalogId; + long idxId = histData.statsId.idxId; + long dbId = histData.statsId.dbId; + long tblId = histData.statsId.tblId; + String colName = histData.statsId.colId; Column col = StatisticsUtil.findColumn(catalogId, dbId, tblId, idxId, colName); if (col == null) { LOG.warn("Failed to deserialize histogram statistics, ctlId: {} dbId: {}" @@ -79,10 +77,10 @@ public static Histogram fromResultRow(ResultRow resultRow) { Type dataType = col.getType(); histogramBuilder.setDataType(dataType); - double sampleRate = Double.parseDouble(resultRow.getColumnValue("sample_rate")); + double sampleRate = histData.sampleRate; histogramBuilder.setSampleRate(sampleRate); - String json = resultRow.getColumnValue("buckets"); + String json = histData.buckets; JsonObject jsonObj = JsonParser.parseString(json).getAsJsonObject(); int bucketNum = jsonObj.get("num_buckets").getAsInt(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/HistogramCacheLoader.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/HistogramCacheLoader.java index 0e0752409231d6..d9928f2a639261 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/HistogramCacheLoader.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/HistogramCacheLoader.java @@ -18,7 +18,6 @@ package org.apache.doris.statistics; import org.apache.doris.common.FeConstants; -import org.apache.doris.statistics.util.InternalQueryResult.ResultRow; import org.apache.doris.statistics.util.StatisticsUtil; import org.apache.commons.collections.CollectionUtils; diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisJob.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisJob.java new file mode 100644 index 00000000000000..877a4f5bd09364 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisJob.java @@ -0,0 +1,56 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.statistics; + +import java.util.List; + +public class OlapAnalysisJob { + + + + private List columns; + + private static String collectPartionStatsSQLTemplate = + " SELECT " + + "CONCAT(${tblId}, '-', ${idxId}, '-', '${colId}', '-', ${partId}) AS id, " + + "${catalogId} AS catalog_id, " + + "${dbId} AS db_id, " + + "${tblId} AS tbl_id, " + + "${idxId} AS idx_id, " + + "'${colId}' AS col_id, " + + "${partId} AS part_id, " + + "COUNT(1) AS row_count, " + + "NDV(`${colName}`) AS ndv, " + + "SUM(CASE WHEN `${colName}` IS NULL THEN 1 ELSE 0 END) AS null_count, " + + "MIN(`${colName}`) AS min, " + + "MAX(`${colName}`) AS max, " + + "${dataSizeFunction} AS data_size, " + + "NOW() "; + + + protected void beforeExecution() { + } + + public void execute() { + } + + protected void afterExecution() { + + } + +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java index 71b11915650a88..257a104254d081 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java @@ -26,6 +26,7 @@ import org.apache.doris.statistics.util.StatisticsUtil; import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.Lists; import org.apache.commons.text.StringSubstitutor; import java.util.ArrayList; @@ -33,22 +34,40 @@ import java.util.List; import java.util.Map; import java.util.Set; +import java.util.StringJoiner; +import java.util.stream.Collectors; /** * Each task analyze one column. */ public class OlapAnalysisTask extends BaseAnalysisTask { - private static final String ANALYZE_PARTITION_SQL_TEMPLATE = INSERT_PART_STATISTICS - + "FROM `${dbName}`.`${tblName}` " - + "PARTITION ${partName} ${sampleExpr}"; - // TODO Currently, NDV is computed for the full table; in fact, // NDV should only be computed for the relevant partition. private static final String ANALYZE_COLUMN_SQL_TEMPLATE = INSERT_COL_STATISTICS + " (SELECT NDV(`${colName}`) AS ndv " + " FROM `${dbName}`.`${tblName}` ${sampleExpr}) t2\n"; + private static final String collectPartitionStatsSQLTemplate = + " SELECT " + + "CONCAT(${tblId}, '-', ${idxId}, '-', '${colId}', '-', ${partId}) AS id, " + + "${catalogId} AS catalog_id, " + + "${dbId} AS db_id, " + + "${tblId} AS tbl_id, " + + "${idxId} AS idx_id, " + + "'${colId}' AS col_id, " + + "${partId} AS part_id, " + + "COUNT(1) AS row_count, " + + "NDV(`${colName}`) AS ndv, " + + "SUM(CASE WHEN `${colName}` IS NULL THEN 1 ELSE 0 END) AS null_count, " + + "MIN(`${colName}`) AS min, " + + "MAX(`${colName}`) AS max, " + + "${dataSizeFunction} AS data_size, " + + "NOW() FROM `${dbName}`.`${tblName}` PARTITION ${partitionName}"; + + // cache stats for each partition, it would be inserted into column_statistics in a batch. + private final List> buf = new ArrayList<>(); + public OlapAnalysisTask(AnalysisInfo info) { super(info); } @@ -67,53 +86,121 @@ public void doExecute() throws Exception { params.put("colName", String.valueOf(info.colName)); params.put("tblName", String.valueOf(info.tblName)); params.put("sampleExpr", getSampleExpression()); - List sqls = new ArrayList<>(); + List partitionAnalysisSQLs = new ArrayList<>(); try { tbl.readLock(); - Set partNames = info.colToPartitions.get(info.colName); - for (String partName : partNames) { - Partition part = tbl.getPartition(partName); + Set partitionNames = info.colToPartitions.get(info.colName); + for (String partitionName : partitionNames) { + Partition part = tbl.getPartition(partitionName); if (part == null) { continue; } - params.put("partId", String.valueOf(tbl.getPartition(partName).getId())); + params.put("partId", String.valueOf(tbl.getPartition(partitionName).getId())); // Avoid error when get the default partition - params.put("partName", "`" + partName + "`"); + params.put("partitionName", "`" + partitionName + "`"); StringSubstitutor stringSubstitutor = new StringSubstitutor(params); - sqls.add(stringSubstitutor.replace(ANALYZE_PARTITION_SQL_TEMPLATE)); + partitionAnalysisSQLs.add(stringSubstitutor.replace(collectPartitionStatsSQLTemplate)); } } finally { tbl.readUnlock(); } - params.remove("partId"); - params.put("type", col.getType().toString()); - StringSubstitutor stringSubstitutor = new StringSubstitutor(params); - String sql = stringSubstitutor.replace(ANALYZE_COLUMN_SQL_TEMPLATE); - sqls.add(sql); - execSQLs(sqls); + execSQLs(partitionAnalysisSQLs, params); } @VisibleForTesting - public void execSQLs(List sqls) throws Exception { - long startTime = System.currentTimeMillis(); + public void execSQLs(List partitionAnalysisSQLs, Map params) throws Exception { try (AutoCloseConnectContext r = StatisticsUtil.buildConnectContext()) { - r.connectContext.getSessionVariable().disableNereidsPlannerOnce(); - for (String sql : sqls) { + List> sqlGroups = Lists.partition(partitionAnalysisSQLs, StatisticConstants.UNION_ALL_LIMIT); + for (List group : sqlGroups) { if (killed) { return; } - LOG.info("ANALYZE SQL : " + sql + " start at " + startTime); - stmtExecutor = new StmtExecutor(r.connectContext, sql); - r.connectContext.setExecutor(stmtExecutor); - stmtExecutor.execute(); + StringJoiner partitionCollectSQL = new StringJoiner("UNION ALL"); + group.forEach(partitionCollectSQL::add); + stmtExecutor = new StmtExecutor(r.connectContext, partitionCollectSQL.toString()); + buf.add(stmtExecutor.executeInternalQuery() + .stream().map(ColStatsData::new).collect(Collectors.toList())); QueryState queryState = r.connectContext.getState(); if (queryState.getStateType().equals(MysqlStateType.ERR)) { throw new RuntimeException(String.format("Failed to analyze %s.%s.%s, error: %s sql: %s", - info.catalogName, info.dbName, info.colName, sql, queryState.getErrorMessage())); + info.catalogName, info.dbName, info.colName, partitionCollectSQL, + queryState.getErrorMessage())); } } - } finally { - LOG.debug("Analyze SQL: " + sqls + " cost time: " + (System.currentTimeMillis() - startTime) + "ms"); + if (buf.size() > 1) { + for (List colStatsDataList : buf) { + StringBuilder batchInsertSQL = + new StringBuilder("INSERT INTO __internal_schema.column_statistics VALUES "); + StringJoiner sj = new StringJoiner(","); + colStatsDataList.forEach(c -> sj.add(c.toSQL(true))); + batchInsertSQL.append(sj.toString()); + stmtExecutor = new StmtExecutor(r.connectContext, batchInsertSQL.toString()); + executeWithExceptionOnFail(stmtExecutor); + } + params.put("type", col.getType().toString()); + StringSubstitutor stringSubstitutor = new StringSubstitutor(params); + String sql = stringSubstitutor.replace(ANALYZE_COLUMN_SQL_TEMPLATE); + stmtExecutor = new StmtExecutor(r.connectContext, sql); + executeWithExceptionOnFail(stmtExecutor); + } else { + List colStatsDataList = buf.get(0); + String batchInsertSQLTemplate = "INSERT INTO __internal_schema.column_statistics " + + "SELECT id, catalog_id, db_id, tbl_id, idx_id, col_id, part_id, row_count," + + "ndv, null_count, CAST(min AS string), CAST(max AS string), data_size, update_time FROM (" + + "SELECT CONCAT(${tblId}, '-', ${idxId}, '-', '${colId}') AS id, " + + " ${catalogId} AS catalog_id, " + + " ${dbId} AS db_id, " + + " ${tblId} AS tbl_id, " + + " ${idxId} AS idx_id, " + + " '${colId}' AS col_id, " + + " NULL AS part_id, " + + " SUM(count) AS row_count," + + " SUM(null_count) AS null_count, " + + " MIN(CAST (min AS ${type})) AS min, " + + " MAX(CAST (max AS ${type})) AS max, " + + " SUM(data_size_in_bytes) AS data_size, " + + " NOW() AS update_time" + + " FROM (${partitionStatsView}) psv) t1, " + + " (SELECT NDV(`${colName}`) AS ndv " + + " FROM `${dbName}`.`${tblName}` ${sampleExpr}) t2 UNION ALL ${partitionStatsView}"; + StringJoiner sj = new StringJoiner(" UNION ALL "); + String selectPartitionTemplate = + "SELECT %s AS id," + + "%s AS catalog_id," + + "%s AS db_id," + + "%s AS tbl_id," + + "%s AS idx_id," + + "%s AS col_id," + + "%s AS part_id," + + "%s AS count," + + "%s AS ndv," + + "%s AS null_count," + + "%s as min," + + "%s as max," + + "%s as data_size_in_bytes," + + "%s AS update_time"; + colStatsDataList.forEach(c -> sj.add(String.format(selectPartitionTemplate, + StatisticsUtil.quote(c.statsId.id), + c.statsId.catalogId, + c.statsId.dbId, + c.statsId.tblId, + c.statsId.idxId, + StatisticsUtil.quote(c.statsId.colId), + c.statsId.partId, + c.count, + c.ndv, + c.nullCount, + c.minLit == null ? null : StatisticsUtil.quote(StatisticsUtil.escapeSQL(c.minLit)), + c.maxLit == null ? null : StatisticsUtil.quote(StatisticsUtil.escapeSQL(c.maxLit)), + c.dataSizeInBytes, + StatisticsUtil.quote(c.updateTime)))); + params.put("partitionStatsView", sj.toString()); + params.put("type", col.getType().toString()); + StringSubstitutor stringSubstitutor = new StringSubstitutor(params); + String insertSQL = stringSubstitutor.replace(batchInsertSQLTemplate); + stmtExecutor = new StmtExecutor(r.connectContext, insertSQL); + executeWithExceptionOnFail(stmtExecutor); + } } } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/ResultRow.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/ResultRow.java new file mode 100644 index 00000000000000..9945175a228a93 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/ResultRow.java @@ -0,0 +1,59 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.statistics; + +import com.google.gson.annotations.SerializedName; + +import java.util.Collections; +import java.util.List; +import java.util.StringJoiner; + +public class ResultRow { + @SerializedName("values") + private final List values; + + public ResultRow(List values) { + this.values = values; + } + + public List getValues() { + return values != null ? values : Collections.emptyList(); + } + + @Override + public String toString() { + StringJoiner sj = new StringJoiner(",", "ResultRow:{", "}"); + for (String val : values) { + sj.add(val); + } + return sj.toString(); + } + + public String get(int idx) { + return values.get(idx); + } + + /** + * If analyze an empty table, some stats would be null, return a default value + * to avoid npe would deserialize it. + */ + public String getWithDefault(int idx, String defaultVal) { + String val = values.get(idx); + return val == null ? defaultVal : val; + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java index 1612d618cb824e..ce512c1952bf40 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java @@ -26,7 +26,6 @@ import java.util.concurrent.TimeUnit; public class StatisticConstants { - public static final String ANALYSIS_TBL_NAME = "table_statistics"; public static final String STATISTIC_TBL_NAME = "column_statistics"; @@ -85,6 +84,9 @@ public class StatisticConstants { public static final int LOAD_RETRY_TIMES = 3; + // union more relation than 512 may cause StackOverFlowException in the future. + public static final int UNION_ALL_LIMIT = 512; + static { STATISTICS_DB_BLACK_LIST.add(SystemInfoService.DEFAULT_CLUSTER + ClusterNamespace.CLUSTER_DELIMITER + FeConstants.INTERNAL_DB_NAME); diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoAnalyzer.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoAnalyzer.java index 8a81b05bd42b34..e67fc55e64bc67 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoAnalyzer.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoAnalyzer.java @@ -33,6 +33,7 @@ import org.apache.doris.statistics.util.StatisticsUtil; import com.google.common.collect.Maps; +import org.apache.hudi.common.util.VisibleForTesting; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -41,6 +42,7 @@ import java.time.format.DateTimeParseException; import java.util.ArrayList; import java.util.Collection; +import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Set; @@ -92,7 +94,6 @@ private void analyzeAll() { if (StatisticConstants.STATISTICS_DB_BLACK_LIST.contains(databaseIf.getFullName())) { continue; } - AnalysisManager analysisManager = Env.getCurrentEnv().getAnalysisManager(); List analysisInfos = constructAnalysisInfo(databaseIf); for (AnalysisInfo analysisInfo : analysisInfos) { analysisInfo = getReAnalyzeRequiredPart(analysisInfo); @@ -100,7 +101,7 @@ private void analyzeAll() { continue; } try { - analysisManager.createSystemAnalysisJob(analysisInfo, analysisTaskExecutor); + createSystemAnalysisJob(analysisInfo); } catch (Exception e) { LOG.warn("Failed to create analysis job", e); } @@ -109,7 +110,7 @@ private void analyzeAll() { } } - private List constructAnalysisInfo(DatabaseIf db) { + public List constructAnalysisInfo(DatabaseIf db) { List analysisInfos = new ArrayList<>(); for (TableIf table : db.getTables()) { if (table instanceof View) { @@ -123,13 +124,17 @@ private List constructAnalysisInfo(DatabaseIf db) { .setDbName(db.getFullName()) .setTblName(tableName.getTbl()) .setColName( - table.getBaseSchema().stream().filter(c -> !StatisticsUtil.isUnsupportedType(c.getType())).map( - Column::getName).collect(Collectors.joining(",")) + table.getBaseSchema().stream().filter(c -> !StatisticsUtil.isUnsupportedType(c.getType())) + .map( + Column::getName).collect(Collectors.joining(",")) ) .setAnalysisType(AnalysisInfo.AnalysisType.FUNDAMENTALS) .setAnalysisMode(AnalysisInfo.AnalysisMode.INCREMENTAL) .setAnalysisMethod(AnalysisInfo.AnalysisMethod.FULL) .setScheduleType(AnalysisInfo.ScheduleType.ONCE) + .setState(AnalysisState.PENDING) + .setTaskIds(new ArrayList<>()) + .setLastExecTimeInMs(System.currentTimeMillis()) .setJobType(JobType.SYSTEM).build(); analysisInfos.add(jobInfo); } @@ -142,55 +147,25 @@ private void analyzePeriodically() { List jobInfos = analysisManager.findPeriodicJobs(); for (AnalysisInfo jobInfo : jobInfos) { jobInfo = new AnalysisInfoBuilder(jobInfo).setJobType(JobType.SYSTEM).build(); - analysisManager.createSystemAnalysisJob(jobInfo, analysisTaskExecutor); + createSystemAnalysisJob(jobInfo); } } catch (DdlException e) { LOG.warn("Failed to periodically analyze the statistics." + e); } } - /** - * Check if automatic analysis of statistics is required. - *

- * Step1: check the health of the table, if the health is good, - * there is no need to re-analyze, or check partition - *

- * Step2: check the partition update time, if the partition is not updated - * after the statistics is analyzed, there is no need to re-analyze - *

- * Step3: if the partition is updated after the statistics is analyzed, - * check the health of the partition, if the health is good, there is no need to re-analyze - * - Step3.1: check the analyzed partition statistics - * - Step3.2: Check for new partitions for which statistics were not analyzed - *

- * TODO new columns is not currently supported to analyze automatically - * - * @param jobInfo analysis job info - * @return new job info after check - * @throws Throwable failed to check - */ - private AnalysisInfo getReAnalyzeRequiredPart(AnalysisInfo jobInfo) { + @VisibleForTesting + public AnalysisInfo getReAnalyzeRequiredPart(AnalysisInfo jobInfo) { long lastExecTimeInMs = jobInfo.lastExecTimeInMs; TableIf table = StatisticsUtil .findTable(jobInfo.catalogName, jobInfo.dbName, jobInfo.tblName); - TableStatistic tblStats = null; - try { - tblStats = StatisticsRepository.fetchTableLevelStats(table.getId()); - } catch (Throwable t) { - LOG.warn("Failed to fetch table stats", t); - return null; - } + TableStats tblStats = Env.getCurrentEnv().getAnalysisManager().findTableStatsStatus(table.getId()); - if (!(needReanalyzeTable(table, tblStats) || tblStats == TableStatistic.UNKNOWN)) { + if (!(tblStats == null || needReanalyzeTable(table, tblStats))) { return null; } - Set needRunPartitions = table.getPartitionNames().stream() - .map(table::getPartition) - .filter(Partition::hasData) - .filter(partition -> - partition.getVisibleVersionTime() >= lastExecTimeInMs).map(Partition::getName) - .collect(Collectors.toSet()); + Set needRunPartitions = findReAnalyzeNeededPartitions(table, lastExecTimeInMs); if (needRunPartitions.isEmpty()) { return null; @@ -199,14 +174,29 @@ private AnalysisInfo getReAnalyzeRequiredPart(AnalysisInfo jobInfo) { return getAnalysisJobInfo(jobInfo, table, needRunPartitions); } - private boolean needReanalyzeTable(TableIf table, TableStatistic tblStats) { + @VisibleForTesting + public Set findReAnalyzeNeededPartitions(TableIf table, long lastExecTimeInMs) { + return table.getPartitionNames().stream() + .map(table::getPartition) + .filter(Partition::hasData) + .filter(partition -> + partition.getVisibleVersionTime() >= lastExecTimeInMs).map(Partition::getName) + .collect(Collectors.toSet()); + } + + private boolean needReanalyzeTable(TableIf table, TableStats tblStats) { long rowCount = table.getRowCount(); - long updateRows = Math.abs(rowCount - tblStats.rowCount); + // TODO: Do we need to analyze an empty table? + if (rowCount == 0) { + return false; + } + long updateRows = tblStats.updatedRows.get(); int tblHealth = StatisticsUtil.getTableHealth(rowCount, updateRows); - return tblHealth < StatisticConstants.TABLE_STATS_HEALTH_THRESHOLD; + return tblHealth < Config.table_stats_health_threshold; } - private AnalysisInfo getAnalysisJobInfo(AnalysisInfo jobInfo, TableIf table, + @VisibleForTesting + public AnalysisInfo getAnalysisJobInfo(AnalysisInfo jobInfo, TableIf table, Set needRunPartitions) { Map> newColToPartitions = Maps.newHashMap(); Map> colToPartitions = jobInfo.colToPartitions; @@ -242,4 +232,21 @@ private boolean checkAnalyzeTime(LocalTime now) { return true; } } + + + // Analysis job created by the system + @VisibleForTesting + public void createSystemAnalysisJob(AnalysisInfo jobInfo) + throws DdlException { + if (jobInfo.colToPartitions.isEmpty()) { + // No statistics need to be collected or updated + return; + } + + Map analysisTaskInfos = new HashMap<>(); + AnalysisManager analysisManager = Env.getCurrentEnv().getAnalysisManager(); + analysisManager.createTaskForEachColumns(jobInfo, analysisTaskInfos, false); + Env.getCurrentEnv().getAnalysisManager().registerSysJob(jobInfo, analysisTaskInfos); + analysisTaskInfos.values().forEach(analysisTaskExecutor::submitTask); + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCache.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCache.java index 0b720037cbe9f8..0c53ac9074c086 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCache.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCache.java @@ -24,7 +24,6 @@ import org.apache.doris.ha.FrontendNodeType; import org.apache.doris.persist.gson.GsonUtils; import org.apache.doris.qe.ConnectContext; -import org.apache.doris.statistics.util.InternalQueryResult.ResultRow; import org.apache.doris.statistics.util.StatisticsUtil; import org.apache.doris.system.Frontend; import org.apache.doris.thrift.FrontendService; @@ -34,6 +33,7 @@ import com.github.benmanes.caffeine.cache.AsyncLoadingCache; import com.github.benmanes.caffeine.cache.Caffeine; import org.apache.commons.collections.CollectionUtils; +import org.apache.hudi.common.util.VisibleForTesting; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -47,6 +47,7 @@ import java.util.concurrent.CompletableFuture; import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.TimeUnit; +import java.util.stream.Collectors; public class StatisticsCache { @@ -62,7 +63,6 @@ public class StatisticsCache { private final ColumnStatisticsCacheLoader columnStatisticsCacheLoader = new ColumnStatisticsCacheLoader(); private final HistogramCacheLoader histogramCacheLoader = new HistogramCacheLoader(); - private final TableStatisticsCacheLoader tableStatisticsCacheLoader = new TableStatisticsCacheLoader(); private final AsyncLoadingCache> columnStatisticsCache = Caffeine.newBuilder() @@ -78,20 +78,12 @@ public class StatisticsCache { .executor(threadPool) .buildAsync(histogramCacheLoader); - private final AsyncLoadingCache> tableStatisticsCache = - Caffeine.newBuilder() - .maximumSize(Config.stats_cache_size) - .refreshAfterWrite(Duration.ofHours(StatisticConstants.STATISTICS_CACHE_REFRESH_INTERVAL)) - .executor(threadPool) - .buildAsync(tableStatisticsCacheLoader); - { threadPool.submit(() -> { while (true) { try { columnStatisticsCacheLoader.removeExpiredInProgressing(); histogramCacheLoader.removeExpiredInProgressing(); - tableStatisticsCacheLoader.removeExpiredInProgressing(); } catch (Throwable t) { // IGNORE } @@ -144,23 +136,6 @@ public Optional getHistogram(long tblId, long idxId, String colName) return Optional.empty(); } - public Optional getTableStatistics(long catalogId, long dbId, long tableId) { - ConnectContext ctx = ConnectContext.get(); - if (ctx != null && ctx.getSessionVariable().internalSession) { - return Optional.empty(); - } - StatisticsCacheKey k = new StatisticsCacheKey(catalogId, dbId, tableId); - try { - CompletableFuture> f = tableStatisticsCache.get(k); - if (f.isDone()) { - return f.get(); - } - } catch (Exception e) { - LOG.warn("Unexpected exception while returning Histogram", e); - } - return Optional.empty(); - } - public void invalidate(long tblId, long idxId, String colName) { columnStatisticsCache.synchronous().invalidate(new StatisticsCacheKey(tblId, idxId, colName)); } @@ -177,14 +152,6 @@ public void refreshColStatsSync(long catalogId, long dbId, long tblId, long idxI columnStatisticsCache.synchronous().refresh(new StatisticsCacheKey(catalogId, dbId, tblId, idxId, colName)); } - public void invalidateTableStats(long catalogId, long dbId, long tblId) { - tableStatisticsCache.synchronous().invalidate(new StatisticsCacheKey(catalogId, dbId, tblId)); - } - - public void refreshTableStatsSync(long catalogId, long dbId, long tblId) { - tableStatisticsCache.synchronous().refresh(new StatisticsCacheKey(catalogId, dbId, tblId)); - } - public void refreshHistogramSync(long tblId, long idxId, String colName) { histogramCache.synchronous().refresh(new StatisticsCacheKey(tblId, idxId, colName)); } @@ -225,9 +192,10 @@ private void doPreHeat() { Map keyToColStats = new HashMap<>(); for (ResultRow r : recentStatsUpdatedCols) { try { - long tblId = Long.parseLong(r.getColumnValue("tbl_id")); - long idxId = Long.parseLong(r.getColumnValue("idx_id")); - String colId = r.getColumnValue("col_id"); + StatsId statsId = new StatsId(r); + long tblId = statsId.tblId; + long idxId = statsId.idxId; + String colId = statsId.colId; final StatisticsCacheKey k = new StatisticsCacheKey(tblId, idxId, colId); final ColumnStatistic c = ColumnStatistic.fromResultRow(r); @@ -253,29 +221,36 @@ public void syncLoadColStats(long tableId, long idxId, String colName) { return; } putCache(k, c); + if (ColumnStatistic.UNKNOWN == c) { + return; + } TUpdateFollowerStatsCacheRequest updateFollowerStatsCacheRequest = new TUpdateFollowerStatsCacheRequest(); updateFollowerStatsCacheRequest.key = GsonUtils.GSON.toJson(k); - updateFollowerStatsCacheRequest.colStats = GsonUtils.GSON.toJson(c); + updateFollowerStatsCacheRequest.statsRows = columnResults.stream().map(GsonUtils.GSON::toJson).collect( + Collectors.toList()); for (Frontend frontend : Env.getCurrentEnv().getFrontends(FrontendNodeType.FOLLOWER)) { - if (frontend.getHost().equals(Env.getCurrentEnv().getSelfNode().getHost())) { - // Doesn't need to send request to current node. + if (StatisticsUtil.isMaster(frontend)) { continue; } - TNetworkAddress address = new TNetworkAddress(frontend.getHost(), - frontend.getRpcPort()); - FrontendService.Client client = null; - try { - client = ClientPool.frontendPool.borrowObject(address); - client.updateStatsCache(updateFollowerStatsCacheRequest); - } catch (Throwable t) { - LOG.warn("Failed to sync stats to follower: {}", address, t); - } finally { - if (client != null) { - ClientPool.frontendPool.returnObject(address, client); - } - } + sendStats(frontend, updateFollowerStatsCacheRequest); } + } + @VisibleForTesting + public void sendStats(Frontend frontend, TUpdateFollowerStatsCacheRequest updateFollowerStatsCacheRequest) { + TNetworkAddress address = new TNetworkAddress(frontend.getHost(), + frontend.getRpcPort()); + FrontendService.Client client = null; + try { + client = ClientPool.frontendPool.borrowObject(address); + client.updateStatsCache(updateFollowerStatsCacheRequest); + } catch (Throwable t) { + LOG.warn("Failed to sync stats to follower: {}", address, t); + } finally { + if (client != null) { + ClientPool.frontendPool.returnObject(address, client); + } + } } public void putCache(StatisticsCacheKey k, ColumnStatistic c) { @@ -306,10 +281,11 @@ private void addPartStatsToColStats(Map key List partsStats) { for (ResultRow r : partsStats) { try { - long tblId = Long.parseLong(r.getColumnValue("tbl_id")); - long idxId = Long.parseLong(r.getColumnValue("idx_id")); - long partId = Long.parseLong(r.getColumnValue("part_id")); - String colId = r.getColumnValue("col_id"); + StatsId statsId = new StatsId(r); + long tblId = statsId.tblId; + long idxId = statsId.idxId; + long partId = statsId.partId; + String colId = statsId.colId; ColumnStatistic partStats = ColumnStatistic.fromResultRow(r); keyToColStats.get(new StatisticsCacheKey(tblId, idxId, colId)).putPartStats(partId, partStats); } catch (Throwable t) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCleaner.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCleaner.java index c023b9a33568ff..93807bb1e0d6ba 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCleaner.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCleaner.java @@ -27,7 +27,6 @@ import org.apache.doris.common.util.MasterDaemon; import org.apache.doris.datasource.CatalogIf; import org.apache.doris.datasource.InternalCatalog; -import org.apache.doris.statistics.util.InternalQueryResult.ResultRow; import org.apache.doris.statistics.util.StatisticsUtil; import org.apache.doris.system.SystemInfoService; @@ -190,31 +189,32 @@ private long findExpiredStats(OlapTable statsTbl, ExpiredStats expiredStats, lon pos += StatisticConstants.FETCH_LIMIT; for (ResultRow r : rows) { try { - String id = r.getColumnValue("id"); - long catalogId = Long.parseLong(r.getColumnValue("catalog_id")); + StatsId statsId = new StatsId(r); + String id = statsId.id; + long catalogId = statsId.catalogId; if (!idToCatalog.containsKey(catalogId)) { expiredStats.expiredCatalog.add(catalogId); continue; } - long dbId = Long.parseLong(r.getColumnValue("db_id")); + long dbId = statsId.dbId; if (!idToDb.containsKey(dbId)) { expiredStats.expiredDatabase.add(dbId); continue; } - long tblId = Long.parseLong(r.getColumnValue("tbl_id")); + long tblId = statsId.tblId; if (!idToTbl.containsKey(tblId)) { expiredStats.expiredTable.add(tblId); continue; } - long idxId = Long.parseLong(r.getColumnValue("idx_id")); + long idxId = statsId.idxId; if (idxId != -1 && !idToMVIdx.containsKey(idxId)) { expiredStats.expiredIdxId.add(idxId); continue; } Table t = idToTbl.get(tblId); - String colId = r.getColumnValue("col_id"); + String colId = statsId.colId; if (t.getColumn(colId) == null) { expiredStats.ids.add(id); continue; @@ -223,11 +223,10 @@ private long findExpiredStats(OlapTable statsTbl, ExpiredStats expiredStats, lon continue; } OlapTable olapTable = (OlapTable) t; - String partIdStr = r.getColumnValue("part_id"); - if (partIdStr == null) { + Long partId = statsId.partId; + if (partId == null) { continue; } - long partId = Long.parseLong(partIdStr); if (!olapTable.getPartitionIds().contains(partId)) { expiredStats.ids.add(id); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java index b7b717b5b11871..f9b18f41e4531d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java @@ -18,7 +18,6 @@ package org.apache.doris.statistics; import org.apache.doris.analysis.AlterColumnStatsStmt; -import org.apache.doris.analysis.AlterTableStatsStmt; import org.apache.doris.analysis.TableName; import org.apache.doris.catalog.Column; import org.apache.doris.catalog.Env; @@ -28,11 +27,9 @@ import org.apache.doris.common.DdlException; import org.apache.doris.common.FeConstants; import org.apache.doris.statistics.util.DBObjects; -import org.apache.doris.statistics.util.InternalQueryResult.ResultRow; import org.apache.doris.statistics.util.StatisticsUtil; import org.apache.doris.system.SystemInfoService; -import com.google.common.collect.ImmutableMap; import com.google.common.collect.Maps; import org.apache.commons.text.StringSubstitutor; import org.apache.logging.log4j.LogManager; @@ -96,31 +93,11 @@ public class StatisticsRepository { + " ORDER BY update_time " + "LIMIT ${limit} OFFSET ${offset}"; - private static final String FETCH_STATS_PART_ID = "SELECT col_id, part_id FROM " + private static final String FETCH_STATS_PART_ID = "SELECT * FROM " + FeConstants.INTERNAL_DB_NAME + "." + StatisticConstants.STATISTIC_TBL_NAME + " WHERE tbl_id = ${tblId}" + " AND part_id IS NOT NULL"; - private static final String PERSIST_TABLE_STATS_TEMPLATE = "INSERT INTO " - + FeConstants.INTERNAL_DB_NAME + "." + StatisticConstants.ANALYSIS_TBL_NAME - + " VALUES('${id}', ${catalogId}, ${dbId}, ${tblId}, ${indexId}, ${partId}, ${rowCount}," - + " ${lastAnalyzeTimeInMs}, NOW())"; - - private static final String FETCH_TABLE_LEVEL_STATS_TEMPLATE = "SELECT * FROM " - + FeConstants.INTERNAL_DB_NAME + "." + StatisticConstants.ANALYSIS_TBL_NAME - + " WHERE tbl_id = ${tblId}" - + " AND part_id IS NULL"; - - private static final String FETCH_TABLE_LEVEL_PART_STATS_TEMPLATE = "SELECT * FROM " - + FeConstants.INTERNAL_DB_NAME + "." + StatisticConstants.ANALYSIS_TBL_NAME - + " WHERE part_id = ${partId}"; - - - private static final String FETCH_PART_TABLE_STATS_TEMPLATE = "SELECT * FROM " - + FeConstants.INTERNAL_DB_NAME + "." + StatisticConstants.ANALYSIS_TBL_NAME - + " WHERE tbl_id = ${tblId}" - + " AND part_id IS NOT NULL"; - private static final String QUERY_COLUMN_STATISTICS = "SELECT * FROM " + FeConstants.INTERNAL_DB_NAME + "." + StatisticConstants.STATISTIC_TBL_NAME + " WHERE " + "tbl_id=${tblId} AND idx_id=${idxId} AND col_id='${colId}'"; @@ -203,7 +180,6 @@ private static String constructId(Object... params) { } public static void dropStatistics(Set partIds) throws DdlException { - dropStatisticsByPartId(partIds, StatisticConstants.ANALYSIS_TBL_NAME); dropStatisticsByPartId(partIds, StatisticConstants.STATISTIC_TBL_NAME); } @@ -212,18 +188,6 @@ public static void dropStatistics(long tblId, Set colNames) throws DdlEx dropStatisticsByColName(tblId, colNames, StatisticConstants.HISTOGRAM_TBL_NAME); } - public static void dropExternalTableStatistics(long tblId) throws DdlException { - Map params = new HashMap<>(); - String inPredicate = String.format("tbl_id = %s", tblId); - params.put("tblName", StatisticConstants.ANALYSIS_TBL_NAME); - params.put("condition", inPredicate); - try { - StatisticsUtil.execUpdate(new StringSubstitutor(params).replace(DROP_TABLE_STATISTICS_TEMPLATE)); - } catch (Exception e) { - throw new DdlException(e.getMessage(), e); - } - } - public static void dropStatisticsByColName(long tblId, Set colNames, String statsTblName) throws DdlException { Map params = new HashMap<>(); @@ -251,34 +215,6 @@ public static void dropStatisticsByPartId(Set partIds, String statsTblName } } - public static void persistTableStats(Map params) throws Exception { - StatisticsUtil.execUpdate(PERSIST_TABLE_STATS_TEMPLATE, params); - } - - public static void alterTableStatistics(AlterTableStatsStmt alterTableStatsStmt) throws Exception { - TableName tableName = alterTableStatsStmt.getTableName(); - DBObjects objects = StatisticsUtil.convertTableNameToObjects(tableName); - String rowCount = alterTableStatsStmt.getValue(StatsType.ROW_COUNT); - TableStatisticBuilder builder = new TableStatisticBuilder(); - builder.setRowCount(Long.parseLong(rowCount)); - builder.setLastAnalyzeTimeInMs(0); - TableStatistic tableStatistic = builder.build(); - Map params = new HashMap<>(); - String id = StatisticsUtil.constructId(objects.table.getId(), -1); - params.put("id", id); - params.put("catalogId", String.valueOf(objects.catalog.getId())); - params.put("dbId", String.valueOf(objects.db.getId())); - params.put("tblId", String.valueOf(objects.table.getId())); - params.put("indexId", "-1"); - params.put("partId", "NULL"); - params.put("rowCount", String.valueOf(tableStatistic.rowCount)); - params.put("lastAnalyzeTimeInMs", "0"); - StatisticsUtil.execUpdate(PERSIST_TABLE_STATS_TEMPLATE, params); - // TODO update statistics cache - // Env.getCurrentEnv().getStatisticsCache() - // .updateColStatsCache(objects.table.getId(), -1, builder.build()); - } - public static void alterColumnStatistics(AlterColumnStatsStmt alterColumnStatsStmt) throws Exception { TableName tableName = alterColumnStatsStmt.getTableName(); List partitionIds = alterColumnStatsStmt.getPartitionIds(); @@ -371,14 +307,13 @@ public static Map> fetchColAndPartsForStats(long tblId) { resultRows.forEach(row -> { try { - String colId = row.getColumnValue("col_id"); - String partId = row.getColumnValue("part_id"); - if (partId == null) { + StatsId statsId = new StatsId(row); + if (statsId.partId == null) { return; } - columnToPartitions.computeIfAbsent(colId, - k -> new HashSet<>()).add(Long.valueOf(partId)); - } catch (NumberFormatException | DdlException e) { + columnToPartitions.computeIfAbsent(String.valueOf(statsId.colId), + k -> new HashSet<>()).add(statsId.partId); + } catch (NumberFormatException e) { LOG.warn("Failed to obtain the column and partition for statistics.", e); } @@ -387,50 +322,6 @@ public static Map> fetchColAndPartsForStats(long tblId) { return columnToPartitions; } - public static TableStatistic fetchTableLevelStats(long tblId) throws DdlException { - ImmutableMap params = ImmutableMap - .of("tblId", String.valueOf(tblId)); - String sql = StatisticsUtil.replaceParams(FETCH_TABLE_LEVEL_STATS_TEMPLATE, params); - List resultRows = StatisticsUtil.execStatisticQuery(sql); - if (resultRows.size() == 1) { - return TableStatistic.fromResultRow(resultRows.get(0)); - } - return TableStatistic.UNKNOWN; - } - - public static TableStatistic fetchTableLevelOfPartStats(long partId) throws DdlException { - ImmutableMap params = ImmutableMap - .of("partId", String.valueOf(partId)); - String sql = StatisticsUtil.replaceParams(FETCH_TABLE_LEVEL_PART_STATS_TEMPLATE, params); - List resultRows = StatisticsUtil.execStatisticQuery(sql); - if (resultRows.size() == 1) { - return TableStatistic.fromResultRow(resultRows.get(0)); - } - throw new DdlException("Query result is not as expected: " + sql); - } - - public static Map fetchTableLevelOfIdPartStats(long tblId) throws DdlException { - ImmutableMap params = ImmutableMap - .of("tblId", String.valueOf(tblId)); - StringSubstitutor stringSubstitutor = new StringSubstitutor(params); - String sql = stringSubstitutor.replace(FETCH_PART_TABLE_STATS_TEMPLATE); - List resultRows = StatisticsUtil.execStatisticQuery(sql); - - if (resultRows.size() == 0) { - return Collections.emptyMap(); - } - - Map idToPartitionTableStats = Maps.newHashMap(); - - for (ResultRow resultRow : resultRows) { - long partId = Long.parseLong(resultRow.getColumnValue("part_id")); - TableStatistic partStats = TableStatistic.fromResultRow(resultRow); - idToPartitionTableStats.put(partId, partStats); - } - - return idToPartitionTableStats; - } - public static List loadColStats(long tableId, long idxId, String colName) { Map params = new HashMap<>(); params.put("tblId", String.valueOf(tableId)); diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsId.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsId.java new file mode 100644 index 00000000000000..c7af03a8d9e2c3 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsId.java @@ -0,0 +1,58 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.statistics; + +import org.apache.doris.statistics.util.StatisticsUtil; + +import java.util.StringJoiner; + +public class StatsId { + + public final String id; + public final long catalogId; + public final long dbId; + public final long tblId; + public final long idxId; + + public final String colId; + + // nullable + public final Long partId; + + public StatsId(ResultRow row) { + this.id = row.get(0); + this.catalogId = Long.parseLong(row.get(1)); + this.dbId = Long.parseLong(row.get(2)); + this.tblId = Long.parseLong(row.get(3)); + this.idxId = Long.parseLong(row.get(4)); + this.colId = row.get(5); + this.partId = row.get(6) == null ? null : Long.parseLong(row.get(6)); + } + + public String toSQL() { + StringJoiner sj = new StringJoiner(","); + sj.add(StatisticsUtil.quote(id)); + sj.add(String.valueOf(catalogId)); + sj.add(String.valueOf(dbId)); + sj.add(String.valueOf(tblId)); + sj.add(String.valueOf(idxId)); + sj.add(StatisticsUtil.quote(String.valueOf(colId))); + sj.add(String.valueOf(partId)); + return sj.toString(); + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatistic.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatistic.java deleted file mode 100644 index 28d0c17b561046..00000000000000 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatistic.java +++ /dev/null @@ -1,61 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.statistics; - -import org.apache.doris.common.DdlException; -import org.apache.doris.statistics.util.InternalQueryResult.ResultRow; - -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; - -public class TableStatistic { - - private static final Logger LOG = LogManager.getLogger(TableStatistic.class); - - public static TableStatistic UNKNOWN = new TableStatisticBuilder() - .setRowCount(0).setUpdateTime("NULL").setLastAnalyzeTimeInMs(0L) - .build(); - - public final long rowCount; - public final long lastAnalyzeTimeInMs; - public final String updateTime; - - public TableStatistic(long rowCount, long lastAnalyzeTimeInMs, String updateTime) { - this.rowCount = rowCount; - this.lastAnalyzeTimeInMs = lastAnalyzeTimeInMs; - this.updateTime = updateTime; - } - - // TODO: use thrift - public static TableStatistic fromResultRow(ResultRow resultRow) { - try { - TableStatisticBuilder tableStatisticBuilder = new TableStatisticBuilder(); - long rowCount = Long.parseLong(resultRow.getColumnValue("count")); - String updateTime = resultRow.getColumnValue("update_time"); - long lastAnalyzeTimeInMs = Long - .parseLong(resultRow.getColumnValue("last_analyze_time_in_ms")); - tableStatisticBuilder.setRowCount(rowCount); - tableStatisticBuilder.setLastAnalyzeTimeInMs(lastAnalyzeTimeInMs); - tableStatisticBuilder.setUpdateTime(updateTime); - return tableStatisticBuilder.build(); - } catch (DdlException e) { - LOG.warn("Failed to deserialize table statistics", e); - return TableStatistic.UNKNOWN; - } - } -} diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatisticBuilder.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatisticBuilder.java deleted file mode 100644 index ddb45b824cb1f8..00000000000000 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatisticBuilder.java +++ /dev/null @@ -1,51 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.statistics; - -public class TableStatisticBuilder { - public long rowCount; - public long lastAnalyzeTimeInMs; - public String updateTime; - - public TableStatisticBuilder() { - } - - public TableStatisticBuilder(TableStatistic tableStatistic) { - this.rowCount = tableStatistic.rowCount; - this.updateTime = tableStatistic.updateTime; - } - - public TableStatisticBuilder setRowCount(long rowCount) { - this.rowCount = rowCount; - return this; - } - - public TableStatisticBuilder setLastAnalyzeTimeInMs(long lastAnalyzeTimeInMs) { - this.lastAnalyzeTimeInMs = lastAnalyzeTimeInMs; - return this; - } - - public TableStatisticBuilder setUpdateTime(String updateTime) { - this.updateTime = updateTime; - return this; - } - - public TableStatistic build() { - return new TableStatistic(rowCount, lastAnalyzeTimeInMs, updateTime); - } -} diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatisticsCacheLoader.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatisticsCacheLoader.java deleted file mode 100644 index 953bc9a42742b8..00000000000000 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatisticsCacheLoader.java +++ /dev/null @@ -1,60 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.statistics; - -import org.apache.doris.catalog.Env; -import org.apache.doris.catalog.TableIf; -import org.apache.doris.common.DdlException; - -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; - -import java.text.SimpleDateFormat; -import java.util.Date; -import java.util.Optional; - -public class TableStatisticsCacheLoader extends StatisticsCacheLoader> { - - private static final Logger LOG = LogManager.getLogger(TableStatisticsCacheLoader.class); - - @Override - protected Optional doLoad(StatisticsCacheKey key) { - try { - TableStatistic tableStatistic = StatisticsRepository.fetchTableLevelStats(key.tableId); - if (tableStatistic != TableStatistic.UNKNOWN) { - return Optional.of(tableStatistic); - } - } catch (DdlException e) { - LOG.debug("Fail to get table line number from table_statistics table. " - + "Will try to get from data source.", e); - } - // Get row count by call TableIf interface getRowCount - // when statistic table doesn't contain a record for this table. - try { - TableIf table = Env.getCurrentEnv().getCatalogMgr().getCatalog(key.catalogId) - .getDbOrDdlException(key.dbId).getTableOrAnalysisException(key.tableId); - long rowCount = table.getRowCount(); - long lastAnalyzeTimeInMs = System.currentTimeMillis(); - String updateTime = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(new Date(lastAnalyzeTimeInMs)); - return Optional.of(new TableStatistic(rowCount, lastAnalyzeTimeInMs, updateTime)); - } catch (Exception e) { - LOG.warn(String.format("Fail to get row count for table %d", key.tableId), e); - } - return Optional.empty(); - } -} diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStats.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStats.java new file mode 100644 index 00000000000000..0fffbd9dd71154 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStats.java @@ -0,0 +1,89 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.statistics; + +import org.apache.doris.common.io.Text; +import org.apache.doris.common.io.Writable; +import org.apache.doris.persist.gson.GsonUtils; +import org.apache.doris.statistics.AnalysisInfo.AnalysisMethod; +import org.apache.doris.statistics.AnalysisInfo.AnalysisType; +import org.apache.doris.statistics.AnalysisInfo.JobType; + +import com.google.gson.annotations.SerializedName; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; +import java.util.concurrent.atomic.AtomicLong; + +public class TableStats implements Writable { + + @SerializedName("tblId") + public final long tblId; + @SerializedName("idxId") + public final long idxId; + @SerializedName("updatedRows") + public final AtomicLong updatedRows = new AtomicLong(); + + // We would like to analyze tables which queried frequently with higher priority in the future. + @SerializedName("queriedTimes") + public final AtomicLong queriedTimes = new AtomicLong(); + + // Used for external table. + @SerializedName("rowCount") + public final long rowCount; + + @SerializedName("method") + public final AnalysisMethod analysisMethod; + + @SerializedName("type") + public final AnalysisType analysisType; + + @SerializedName("updateTime") + public final long updatedTime; + + @SerializedName("columns") + public String columns; + + @SerializedName("trigger") + public JobType jobType; + + // It's necessary to store these fields separately from AnalysisInfo, since the lifecycle between AnalysisInfo + // and TableStats is quite different. + public TableStats(long tblId, long rowCount, AnalysisInfo analyzedJob) { + this.tblId = tblId; + this.idxId = -1; + this.rowCount = rowCount; + analysisMethod = analyzedJob.analysisMethod; + analysisType = analyzedJob.analysisType; + updatedTime = System.currentTimeMillis(); + columns = analyzedJob.colName; + jobType = analyzedJob.jobType; + } + + @Override + public void write(DataOutput out) throws IOException { + String json = GsonUtils.GSON.toJson(this); + Text.writeString(out, json); + } + + public static TableStats read(DataInput dataInput) throws IOException { + String json = Text.readString(dataInput); + return GsonUtils.GSON.fromJson(json, TableStats.class); + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/InternalQuery.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/InternalQuery.java index 09af38d830a709..40669b6a9396ea 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/InternalQuery.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/InternalQuery.java @@ -24,7 +24,6 @@ import org.apache.doris.analysis.StatementBase; import org.apache.doris.analysis.UserIdentity; import org.apache.doris.catalog.Env; -import org.apache.doris.catalog.PrimitiveType; import org.apache.doris.cluster.ClusterNamespace; import org.apache.doris.common.Config; import org.apache.doris.common.DdlException; @@ -38,7 +37,7 @@ import org.apache.doris.qe.OriginStatement; import org.apache.doris.qe.QeProcessorImpl; import org.apache.doris.qe.RowBatch; -import org.apache.doris.statistics.util.InternalQueryResult.ResultRow; +import org.apache.doris.statistics.ResultRow; import org.apache.doris.system.SystemInfoService; import org.apache.doris.thrift.TQueryOptions; import org.apache.doris.thrift.TResultBatch; @@ -50,9 +49,9 @@ import java.io.StringReader; import java.nio.ByteBuffer; +import java.util.ArrayList; import java.util.List; import java.util.UUID; -import java.util.stream.Collectors; /** * Execute SQL query statements internally(in FE). Internal-query mainly used for statistics module, @@ -87,7 +86,7 @@ public void setTimeout(int timeout) { * @return Result of the query statement * @throws Exception Errors in parsing or execution */ - public InternalQueryResult query() throws Exception { + public List query() throws Exception { // step1: mock connectContext buildContext(); @@ -180,14 +179,9 @@ private void execute() throws Exception { } } - private InternalQueryResult fetchResult() { + private List fetchResult() { List columns = stmt.getColLabels(); - List types = stmt.getResultExprs().stream() - .map(e -> e.getType().getPrimitiveType()) - .collect(Collectors.toList()); - - InternalQueryResult result = new InternalQueryResult(); - List resultRows = result.getResultRows(); + List resultRows = new ArrayList<>(); for (TResultBatch batch : resultBatches) { List rows = batch.getRows(); @@ -200,12 +194,11 @@ private InternalQueryResult fetchResult() { values.add(value); } - ResultRow resultRow = new ResultRow(columns, types, values); + ResultRow resultRow = new ResultRow(values); resultRows.add(resultRow); } } - - return result; + return resultRows; } public void cancel() { diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/InternalQueryResult.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/InternalQueryResult.java deleted file mode 100644 index e7919860107531..00000000000000 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/InternalQueryResult.java +++ /dev/null @@ -1,242 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.statistics.util; - -import org.apache.doris.catalog.PrimitiveType; -import org.apache.doris.common.DdlException; - -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; - -import java.util.Collections; -import java.util.List; -import java.util.Map; - -/** - * Readable results of internal SQL execution, - * providing some read operations. - */ -public class InternalQueryResult { - private final List resultRows = Lists.newArrayList(); - - public InternalQueryResult() { - } - - public List getResultRows() { - return resultRows; - } - - public static class ResultRow { - private final List columns; - private final List types; - private final List values; - - private final Map columnNameMap = Maps.newHashMap(); - private final Map columnIndexMap = Maps.newHashMap(); - - public ResultRow(List columns, List types, List values) { - this.columns = columns; - this.types = types; - this.values = values; - buildColumnNameMap(); - buildColumnIndexMap(); - } - - public List getColumns() { - return columns != null ? columns : Collections.emptyList(); - } - - public List getTypes() { - return types != null ? types : Collections.emptyList(); - } - - public List getValues() { - return values != null ? values : Collections.emptyList(); - } - - private void buildColumnNameMap() { - List columns = getColumns(); - for (int i = 0; i < columns.size(); i++) { - columnNameMap.put(columns.get(i), i); - } - } - - private void buildColumnIndexMap() { - List columns = getColumns(); - for (int i = 0; i < columns.size(); i++) { - columnIndexMap.put(i, columns.get(i)); - } - } - - public int getColumnIndex(String columnName) { - return columnNameMap.getOrDefault(columnName, -1); - } - - public String getColumnName(int index) throws DdlException { - List columns = getColumns(); - if (columnIndexMap.containsKey(index)) { - return columnIndexMap.get(index); - } else { - throw new DdlException("Index should be between 0 and " + columns.size()); - } - } - - public PrimitiveType getColumnType(String columnName) throws DdlException { - List types = getTypes(); - int index = getColumnIndex(columnName); - if (index == -1) { - throw new DdlException(String.format("The column name:[%s] does not exist.", columnName)); - } - return types.get(index); - } - - public PrimitiveType getColumnType(int index) throws DdlException { - List types = getTypes(); - if (index >= 0 && index < types.size()) { - return types.get(index); - } else { - throw new DdlException("Index should be between 0 and " + types.size()); - } - } - - public String getColumnValue(String columnName) throws DdlException { - int index = getColumnIndex(columnName); - if (index == -1) { - throw new DdlException(String.format("The column name:[%s] does not exist.", columnName)); - } - return values.get(index); - } - - public String getColumnValueWithDefault(String columnName, String defaultVal) throws DdlException { - String val = getColumnValue(columnName); - return val == null ? defaultVal : val; - } - - public Object getColumnValue(int index) throws DdlException { - List columns = getColumns(); - if (index >= 0 && index < columns.size()) { - return values.get(index); - } else { - throw new DdlException("Index should be between 0 and " + columns.size()); - } - } - - public String getString(int index) throws DdlException { - List columns = getColumns(); - if (index >= 0 && index < columns.size()) { - return values.get(index); - } - throw new DdlException("Index should be between 0 and " + columns.size()); - } - - public int getInt(int index) throws DdlException { - List types = getTypes(); - if (index >= 0 && index < types.size()) { - String value = values.get(index); - PrimitiveType type = types.get(index); - switch (type) { - case BOOLEAN: - case TINYINT: - case SMALLINT: - case INT: - case BIGINT: - return new Integer(value); - default: - throw new DdlException("Unable to convert field to int: " + value); - } - } - throw new DdlException("Index should be between 0 and " + types.size()); - } - - public long getLong(int index) throws DdlException { - List types = getTypes(); - if (index >= 0 && index < types.size()) { - String value = values.get(index); - PrimitiveType type = types.get(index); - switch (type) { - case TINYINT: - case SMALLINT: - case INT: - case BIGINT: - return Long.parseLong(value); - default: - throw new DdlException("Unable to convert field to long: " + value); - } - } - throw new DdlException("Index should be between 0 and " + types.size()); - } - - public float getFloat(int index) throws DdlException { - List types = getTypes(); - if (index >= 0 && index < types.size()) { - String value = values.get(index); - PrimitiveType type = types.get(index); - if (type == PrimitiveType.FLOAT) { - return Float.parseFloat(value); - } - throw new DdlException("Unable to convert field to float: " + value); - } - throw new DdlException("Index should be between 0 and " + types.size()); - } - - public double getDouble(int index) throws DdlException { - List types = getTypes(); - if (index >= 0 && index < types.size()) { - String value = values.get(index); - PrimitiveType type = types.get(index); - if (type == PrimitiveType.DOUBLE) { - return Double.parseDouble(value); - } - throw new DdlException("Unable to convert field to long: " + value); - } - throw new DdlException("Index should be between 0 and " + types.size()); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder(); - sb.append("ResultRow{ "); - if (values != null && values.size() > 0) { - List columns = getColumns(); - for (int i = 0; i < values.size(); i++) { - sb.append(columns.get(i)); - sb.append(":"); - sb.append(values.get(i)); - sb.append(" "); - } - } - sb.append("}"); - return sb.toString(); - } - } - - @Override - public String toString() { - if (resultRows.size() > 0) { - StringBuilder sb = new StringBuilder(); - sb.append("InternalQueryResult:\n"); - for (ResultRow resultRow : resultRows) { - sb.append(" - "); - sb.append(resultRow.toString()); - sb.append("\n"); - } - return sb.toString(); - } - return "InternalQueryResult{" + "resultRows=" + resultRows + '}'; - } -} diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java index 767e76d7908834..8d09475bcb08d5 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java @@ -42,6 +42,7 @@ import org.apache.doris.catalog.ScalarType; import org.apache.doris.catalog.StructType; import org.apache.doris.catalog.TableIf; +import org.apache.doris.catalog.TableIf.TableType; import org.apache.doris.catalog.Type; import org.apache.doris.catalog.VariantType; import org.apache.doris.catalog.external.HMSExternalTable; @@ -61,12 +62,12 @@ import org.apache.doris.qe.QueryState; import org.apache.doris.qe.SessionVariable; import org.apache.doris.qe.StmtExecutor; -import org.apache.doris.statistics.AnalysisInfo; import org.apache.doris.statistics.ColumnStatistic; import org.apache.doris.statistics.ColumnStatisticBuilder; import org.apache.doris.statistics.Histogram; +import org.apache.doris.statistics.ResultRow; import org.apache.doris.statistics.StatisticConstants; -import org.apache.doris.statistics.util.InternalQueryResult.ResultRow; +import org.apache.doris.system.Frontend; import org.apache.doris.system.SystemInfoService; import org.apache.doris.thrift.TUniqueId; @@ -83,8 +84,8 @@ import org.apache.iceberg.types.Types; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; -import org.apache.thrift.TException; +import java.net.InetSocketAddress; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Arrays; @@ -104,7 +105,6 @@ public class StatisticsUtil { private static final Logger LOG = LogManager.getLogger(StatisticsUtil.class); private static final String ID_DELIMITER = "-"; - private static final String VALUES_DELIMITER = ","; private static final String TOTAL_SIZE = "totalSize"; private static final String NUM_ROWS = "numRows"; @@ -144,16 +144,6 @@ public static QueryState execUpdate(String sql) throws Exception { } } - public static List deserializeToAnalysisJob(List resultBatches) - throws TException { - if (CollectionUtils.isEmpty(resultBatches)) { - return Collections.emptyList(); - } - return resultBatches.stream() - .map(AnalysisInfo::fromResultRow) - .collect(Collectors.toList()); - } - public static ColumnStatistic deserializeToColumnStatistics(List resultBatches) throws Exception { if (CollectionUtils.isEmpty(resultBatches)) { @@ -221,7 +211,7 @@ public static LiteralExpr readableValue(Type type, String columnValue) throws An case DOUBLE: return new FloatLiteral(columnValue); case DECIMALV2: - //no need to check precision and scale, since V2 is fixed point + // no need to check precision and scale, since V2 is fixed point return new DecimalLiteral(columnValue); case DECIMAL32: case DECIMAL64: @@ -480,9 +470,9 @@ public static String replaceParams(String template, Map params) * when update_rows < row_count, the health degree is 100 (1 - update_rows row_count). * * @param updatedRows The number of rows updated by the table - * @return Health, the value range is [0, 100], the larger the value, * @param totalRows The current number of rows in the table - * the healthier the statistics of the table + * the healthier the statistics of the table + * @return Health, the value range is [0, 100], the larger the value, */ public static int getTableHealth(long totalRows, long updatedRows) { if (updatedRows >= totalRows) { @@ -496,6 +486,7 @@ public static int getTableHealth(long totalRows, long updatedRows) { /** * Estimate hive table row count. * First get it from remote table parameters. If not found, estimate it : totalSize/estimatedRowSize + * * @param table Hive HMSExternalTable to estimate row count. * @return estimated row count */ @@ -526,6 +517,7 @@ public static long getHiveRowCount(HMSExternalTable table) { /** * Estimate iceberg table row count. * Get the row count by adding all task file recordCount. + * * @param table Iceberg HMSExternalTable to estimate row count. * @return estimated row count */ @@ -549,6 +541,7 @@ public static long getIcebergRowCount(HMSExternalTable table) { /** * Estimate hive table row count : totalFileSize/estimatedRowSize + * * @param table Hive HMSExternalTable to estimate row count. * @return estimated row count */ @@ -623,6 +616,7 @@ public static long getRowCountFromFileList(HMSExternalTable table) { /** * Get Iceberg column statistics. + * * @param colName * @param table Iceberg table. * @return Optional Column statistic for the given column. @@ -683,4 +677,31 @@ public static void sleep(long millis) { // IGNORE } } + + public static String quote(String str) { + return "'" + str + "'"; + } + + public static boolean isMaster(Frontend frontend) { + InetSocketAddress socketAddress = new InetSocketAddress(frontend.getHost(), frontend.getEditLogPort()); + return Env.getCurrentEnv().getHaProtocol().getLeader().equals(socketAddress); + } + + public static String escapeSQL(String str) { + if (str == null) { + return null; + } + return org.apache.commons.lang3.StringUtils.replace(str, "'", "''"); + } + + public static boolean isExternalTable(String catalogName, String dbName, String tblName) { + TableIf table; + try { + table = StatisticsUtil.findTable(catalogName, dbName, tblName); + } catch (Throwable e) { + LOG.warn(e.getMessage()); + return false; + } + return table.getType().equals(TableType.HMS_EXTERNAL_TABLE); + } } diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisJobTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisJobTest.java index 8ba334d0706629..e85bbf2c282066 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisJobTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisJobTest.java @@ -26,7 +26,6 @@ import org.apache.doris.statistics.AnalysisInfo.AnalysisMode; import org.apache.doris.statistics.AnalysisInfo.AnalysisType; import org.apache.doris.statistics.AnalysisInfo.JobType; -import org.apache.doris.statistics.util.InternalQueryResult.ResultRow; import org.apache.doris.statistics.util.StatisticsUtil; import org.apache.doris.utframe.TestWithFeService; @@ -38,6 +37,7 @@ import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; +import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.List; @@ -114,10 +114,16 @@ public void execUpdate(String sql) throws Exception { public void syncLoadColStats(long tableId, long idxId, String colName) { } }; - new Expectations() { - { - stmtExecutor.execute(); - times = 2; + new MockUp() { + + @Mock + public void execute() throws Exception { + + } + + @Mock + public List executeInternalQuery() { + return new ArrayList<>(); } }; HashMap> colToPartitions = Maps.newHashMap(); @@ -131,6 +137,12 @@ public void syncLoadColStats(long tableId, long idxId, String colName) { .setColToPartitions(colToPartitions) .build(); new OlapAnalysisTask(analysisJobInfo).doExecute(); + new Expectations() { + { + stmtExecutor.execute(); + times = 1; + } + }; } } diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisManagerTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisManagerTest.java index 8e809f9de7976c..4f7b1a9c5d24f0 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisManagerTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisManagerTest.java @@ -17,22 +17,35 @@ package org.apache.doris.statistics; +import org.apache.doris.analysis.AnalyzeProperties; +import org.apache.doris.analysis.AnalyzeTblStmt; +import org.apache.doris.analysis.PartitionNames; +import org.apache.doris.analysis.TableName; +import org.apache.doris.common.DdlException; import org.apache.doris.statistics.AnalysisInfo.AnalysisType; import org.apache.doris.statistics.AnalysisInfo.JobType; +import org.apache.doris.statistics.util.StatisticsUtil; +import com.google.common.annotations.VisibleForTesting; +import mockit.Expectations; +import mockit.Injectable; import mockit.Mock; import mockit.MockUp; import mockit.Mocked; -import org.junit.Test; import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; +import java.util.ArrayList; +import java.util.Collection; import java.util.HashMap; +import java.util.HashSet; import java.util.Map; +// CHECKSTYLE OFF public class AnalysisManagerTest { @Test public void testUpdateTaskStatus(@Mocked BaseAnalysisTask task1, - @Mocked BaseAnalysisTask task2) { + @Mocked BaseAnalysisTask task2) { new MockUp() { @Mock @@ -83,4 +96,167 @@ public String toString() { manager.updateTaskStatus(taskInfo2, AnalysisState.FINISHED, "", 0); Assertions.assertEquals(job.state, AnalysisState.FINISHED); } + + // test build sync job + @Test + public void testBuildAndAssignJob1() throws Exception { + AnalysisInfo analysisInfo = new AnalysisInfoBuilder().setColToPartitions(new HashMap<>()).build(); + new MockUp() { + + @Mock + public boolean statsTblAvailable() { + return true; + } + }; + new MockUp() { + + @Mock + public AnalysisInfo buildAnalysisJobInfo(AnalyzeTblStmt stmt) throws DdlException { + return analysisInfo; + } + + @Mock + @VisibleForTesting + public void createTaskForExternalTable(AnalysisInfo jobInfo, + Map analysisTasks, + boolean isSync) throws DdlException { + // DO NOTHING + } + + @Mock + public void createTaskForEachColumns(AnalysisInfo jobInfo, Map analysisTasks, + boolean isSync) throws DdlException { + // DO NOTHING + } + + @Mock + public void syncExecute(Collection tasks) { + // DO NOTHING + } + + @Mock + public void updateTableStats(AnalysisInfo jobInfo) { + // DO NOTHING + } + }; + AnalyzeTblStmt analyzeTblStmt = new AnalyzeTblStmt(new TableName("test"), + new PartitionNames(false, new ArrayList() { + { + add("p1"); + add("p2"); + } + }), new ArrayList() { + { + add("c1"); + add("c2"); + } + }, new AnalyzeProperties(new HashMap() { + { + put(AnalyzeProperties.PROPERTY_SYNC, "true"); + } + })); + + AnalysisManager analysisManager = new AnalysisManager(); + Assertions.assertNull(analysisManager.buildAndAssignJob(analyzeTblStmt)); + analysisInfo.colToPartitions.put("c1", new HashSet() { + { + add("p1"); + add("p2"); + } + }); + analysisManager.buildAndAssignJob(analyzeTblStmt); + new Expectations() { + { + analysisManager.syncExecute((Collection) any); + times = 1; + analysisManager.updateTableStats((AnalysisInfo) any); + times = 1; + // Jmockit would try to invoke this method with `null` when initiate instance of Expectations + // and cause NPE, comment these lines until find other way to test behavior that don't invoke something. + // analysisManager.persistAnalysisJob((AnalysisInfo) any); + // times = 0; + } + }; + } + + // test build async job + @Test + public void testBuildAndAssignJob2(@Injectable OlapAnalysisTask analysisTask) throws Exception { + AnalysisInfo analysisInfo = new AnalysisInfoBuilder().setColToPartitions(new HashMap<>()).build(); + new MockUp() { + + @Mock + public boolean statsTblAvailable() { + return true; + } + }; + new MockUp() { + + @Mock + public AnalysisInfo buildAnalysisJobInfo(AnalyzeTblStmt stmt) throws DdlException { + return analysisInfo; + } + + @Mock + @VisibleForTesting + public void createTaskForExternalTable(AnalysisInfo jobInfo, + Map analysisTasks, + boolean isSync) throws DdlException { + // DO NOTHING + } + + @Mock + public void createTaskForEachColumns(AnalysisInfo jobInfo, Map analysisTasks, + boolean isSync) throws DdlException { + analysisTasks.put(1L, analysisTask); + } + + @Mock + public void syncExecute(Collection tasks) { + // DO NOTHING + } + + @Mock + public void updateTableStats(AnalysisInfo jobInfo) { + // DO NOTHING + } + + @Mock + public void logCreateAnalysisJob(AnalysisInfo analysisJob) { + + } + }; + AnalyzeTblStmt analyzeTblStmt = new AnalyzeTblStmt(new TableName("test"), + new PartitionNames(false, new ArrayList() { + { + add("p1"); + add("p2"); + } + }), new ArrayList() { + { + add("c1"); + add("c2"); + } + }, new AnalyzeProperties(new HashMap() { + { + put(AnalyzeProperties.PROPERTY_SYNC, "false"); + } + })); + + AnalysisManager analysisManager = new AnalysisManager(); + analysisInfo.colToPartitions.put("c1", new HashSet() { + { + add("p1"); + add("p2"); + } + }); + analysisManager.buildAndAssignJob(analyzeTblStmt); + new Expectations() { + { + analysisManager.persistAnalysisJob(analysisInfo); + times = 1; + } + }; + } + } diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisTaskExecutorTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisTaskExecutorTest.java index 574c96c73d3b4a..6136b8efd442e1 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisTaskExecutorTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisTaskExecutorTest.java @@ -24,7 +24,6 @@ import org.apache.doris.statistics.AnalysisInfo.AnalysisMode; import org.apache.doris.statistics.AnalysisInfo.AnalysisType; import org.apache.doris.statistics.AnalysisInfo.JobType; -import org.apache.doris.statistics.util.InternalQueryResult.ResultRow; import org.apache.doris.utframe.TestWithFeService; import com.google.common.collect.Maps; @@ -91,6 +90,11 @@ public List executeInternalQuery() { @Mock public void execSQLs(List sqls) throws Exception { } + + @Mock + protected void executeWithExceptionOnFail(StmtExecutor stmtExecutor) throws Exception { + // DO NOTHING + } }; new MockUp() { diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/CacheTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/CacheTest.java index ccf13711e1bb7b..a7582d04582ebe 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/CacheTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/CacheTest.java @@ -25,8 +25,10 @@ import org.apache.doris.catalog.external.HMSExternalTable; import org.apache.doris.datasource.CatalogMgr; import org.apache.doris.datasource.HMSExternalCatalog; -import org.apache.doris.statistics.util.InternalQueryResult.ResultRow; +import org.apache.doris.ha.FrontendNodeType; import org.apache.doris.statistics.util.StatisticsUtil; +import org.apache.doris.system.Frontend; +import org.apache.doris.thrift.TUpdateFollowerStatsCacheRequest; import org.apache.doris.utframe.TestWithFeService; import com.google.common.collect.Lists; @@ -90,60 +92,20 @@ public List execStatisticQuery(String sql) { } catch (InterruptedException e) { // ignore } - List colNames = new ArrayList<>(); - colNames.add("count"); - colNames.add("ndv"); - colNames.add("null_count"); - colNames.add("data_size_in_bytes"); - colNames.add("catalog_id"); - colNames.add("db_id"); - colNames.add("idx_id"); - colNames.add("tbl_id"); - colNames.add("col_id"); - colNames.add("min"); - colNames.add("max"); - colNames.add("part_id"); - colNames.add("update_time"); - List primitiveTypes = new ArrayList<>(); - primitiveTypes.add(PrimitiveType.BIGINT); - primitiveTypes.add(PrimitiveType.BIGINT); - primitiveTypes.add(PrimitiveType.BIGINT); - primitiveTypes.add(PrimitiveType.BIGINT); - primitiveTypes.add(PrimitiveType.VARCHAR); - primitiveTypes.add(PrimitiveType.VARCHAR); - primitiveTypes.add(PrimitiveType.VARCHAR); - primitiveTypes.add(PrimitiveType.VARCHAR); - primitiveTypes.add(PrimitiveType.VARCHAR); - primitiveTypes.add(PrimitiveType.VARCHAR); - primitiveTypes.add(PrimitiveType.VARCHAR); - primitiveTypes.add(PrimitiveType.VARCHAR); - primitiveTypes.add(PrimitiveType.VARCHAR); - List values = new ArrayList<>(); - values.add("1"); - values.add("2"); - values.add("3"); - values.add("4"); - values.add("5"); - values.add("-1"); - values.add("6"); - values.add("7"); - values.add("8"); - values.add("9"); - values.add("10"); - values.add(null); - values.add(new Date().toString()); - ResultRow resultRow = new ResultRow(colNames, primitiveTypes, values); - return Arrays.asList(resultRow); + return Arrays.asList(StatsMockUtil.mockResultRow(true)); } }; StatisticsCache statisticsCache = new StatisticsCache(); ColumnStatistic columnStatistic = statisticsCache.getColumnStatistics(-1, -1, 0, "col"); + // load not finished yet, should return unknown Assertions.assertTrue(columnStatistic.isUnKnown); + // wait 1 sec to ensure `execStatisticQuery` is finished as much as possible. Thread.sleep(1000); + // load has finished, return corresponding stats. columnStatistic = statisticsCache.getColumnStatistics(-1, -1, 0, "col"); - Assertions.assertEquals(1, columnStatistic.count); - Assertions.assertEquals(2, columnStatistic.ndv); - Assertions.assertEquals(10, columnStatistic.maxValue); + Assertions.assertEquals(7, columnStatistic.count); + Assertions.assertEquals(8, columnStatistic.ndv); + Assertions.assertEquals(11, columnStatistic.maxValue); } @Test @@ -159,11 +121,10 @@ public Histogram fromResultRow(ResultRow resultRow) { Type dataType = col.getType(); histogramBuilder.setDataType(dataType); + HistData histData = new HistData(resultRow); + histogramBuilder.setSampleRate(histData.sampleRate); - double sampleRate = Double.parseDouble(resultRow.getColumnValue("sample_rate")); - histogramBuilder.setSampleRate(sampleRate); - - String json = resultRow.getColumnValue("buckets"); + String json = histData.buckets; JsonObject jsonObj = JsonParser.parseString(json).getAsJsonObject(); int bucketNum = jsonObj.get("num_buckets").getAsInt(); @@ -202,28 +163,14 @@ public List execStatisticQuery(String sql) { } catch (InterruptedException e) { // ignore } - List colNames = new ArrayList<>(); - colNames.add("catalog_id"); - colNames.add("db_id"); - colNames.add("idx_id"); - colNames.add("tbl_id"); - colNames.add("col_id"); - colNames.add("sample_rate"); - colNames.add("buckets"); - List primitiveTypes = new ArrayList<>(); - primitiveTypes.add(PrimitiveType.VARCHAR); - primitiveTypes.add(PrimitiveType.VARCHAR); - primitiveTypes.add(PrimitiveType.VARCHAR); - primitiveTypes.add(PrimitiveType.VARCHAR); - primitiveTypes.add(PrimitiveType.VARCHAR); - primitiveTypes.add(PrimitiveType.VARCHAR); - primitiveTypes.add(PrimitiveType.VARCHAR); List values = new ArrayList<>(); values.add("1"); values.add("2"); values.add("3"); - values.add("-1"); values.add("4"); + values.add("-1"); + values.add("col"); + values.add(null); values.add("0.2"); String buckets = "{\"num_buckets\":5,\"buckets\":" + "[{\"lower\":\"2022-09-21 17:30:29\",\"upper\":\"2022-09-21 22:30:29\"," @@ -237,7 +184,8 @@ public List execStatisticQuery(String sql) { + "{\"lower\":\"2022-09-25 17:30:29\",\"upper\":\"2022-09-25 22:30:29\"," + "\"count\":9,\"pre_sum\":37,\"ndv\":1}]}"; values.add(buckets); - ResultRow resultRow = new ResultRow(colNames, primitiveTypes, values); + values.add(new Date().toString()); + ResultRow resultRow = new ResultRow(values); return Collections.singletonList(resultRow); } }; @@ -306,4 +254,96 @@ public Env getCurrentEnv() { Assertions.assertEquals(6, columnStatistic.minValue); Assertions.assertEquals(7, columnStatistic.maxValue); } + + @Test + public void testSync1() throws Exception { + new MockUp() { + @Mock + public List loadColStats(long tableId, long idxId, String colName) { + List rows = new ArrayList<>(); + rows.add(StatsMockUtil.mockResultRow(true)); + rows.add(StatsMockUtil.mockResultRow(false)); + return rows; + } + + @Mock + public boolean isMaster(Frontend frontend) { + return frontend.getRole().equals(FrontendNodeType.MASTER); + } + }; + new MockUp() { + @Mock + public List getFrontends(FrontendNodeType nodeType) { + Frontend frontend1 = new Frontend(FrontendNodeType.MASTER, + "fe1", "localhost:1111", "localhost", 2222); + Frontend frontend2 = new Frontend(FrontendNodeType.FOLLOWER, + "fe1", "localhost:1112", "localhost", 2223); + List frontends = new ArrayList<>(); + frontends.add(frontend1); + frontends.add(frontend2); + return frontends; + } + }; + + new MockUp() { + @Mock + private void sendStats(Frontend frontend, + TUpdateFollowerStatsCacheRequest updateFollowerStatsCacheRequest) { + // DO NONTHING + } + }; + StatisticsCache statisticsCache = new StatisticsCache(); + statisticsCache.syncLoadColStats(1L, 1L, "any"); + new Expectations() { + { + statisticsCache.sendStats((Frontend) any, (TUpdateFollowerStatsCacheRequest) any); + times = 1; + } + }; + } + + @Test + public void testSync2() throws Exception { + new MockUp() { + @Mock + + public ColumnStatistic fromResultRow(ResultRow row) { + return ColumnStatistic.UNKNOWN; + } + + @Mock + public ColumnStatistic fromResultRow(List row) { + return ColumnStatistic.UNKNOWN; + } + }; + new MockUp() { + @Mock + public List getFrontends(FrontendNodeType nodeType) { + Frontend frontend1 = new Frontend(FrontendNodeType.MASTER, + "fe1", "localhost:1111", "localhost", 2222); + Frontend frontend2 = new Frontend(FrontendNodeType.FOLLOWER, + "fe1", "localhost:1112", "localhost", 2223); + List frontends = new ArrayList<>(); + frontends.add(frontend1); + frontends.add(frontend2); + return frontends; + } + }; + + new MockUp() { + @Mock + private void sendStats(Frontend frontend, + TUpdateFollowerStatsCacheRequest updateFollowerStatsCacheRequest) { + // DO NOTHING + } + }; + StatisticsCache statisticsCache = new StatisticsCache(); + statisticsCache.syncLoadColStats(1L, 1L, "any"); + new Expectations() { + { + statisticsCache.sendStats((Frontend) any, (TUpdateFollowerStatsCacheRequest) any); + times = 0; + } + }; + } } diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoAnalyzerTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoAnalyzerTest.java new file mode 100644 index 00000000000000..81337a1580ba6a --- /dev/null +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoAnalyzerTest.java @@ -0,0 +1,202 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.statistics; + +import org.apache.doris.catalog.Column; +import org.apache.doris.catalog.Database; +import org.apache.doris.catalog.DatabaseIf; +import org.apache.doris.catalog.OlapTable; +import org.apache.doris.catalog.PrimitiveType; +import org.apache.doris.catalog.Table; +import org.apache.doris.catalog.TableIf; +import org.apache.doris.catalog.View; +import org.apache.doris.cluster.ClusterNamespace; +import org.apache.doris.common.DdlException; +import org.apache.doris.common.FeConstants; +import org.apache.doris.datasource.CatalogIf; +import org.apache.doris.statistics.AnalysisInfo.AnalysisMethod; +import org.apache.doris.statistics.AnalysisInfo.AnalysisType; +import org.apache.doris.statistics.AnalysisInfo.JobType; +import org.apache.doris.statistics.util.StatisticsUtil; +import org.apache.doris.system.SystemInfoService; + +import mockit.Expectations; +import mockit.Injectable; +import mockit.Mock; +import mockit.MockUp; +import mockit.Mocked; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +public class StatisticsAutoAnalyzerTest { + + @Test + public void testAnalyzeAll(@Injectable AnalysisInfo analysisInfo) { + new MockUp() { + @Mock + public Collection getAllDbs() { + Database db1 = new Database(1, SystemInfoService.DEFAULT_CLUSTER + + ClusterNamespace.CLUSTER_DELIMITER + FeConstants.INTERNAL_DB_NAME); + Database db2 = new Database(2, "anyDB"); + List databaseIfs = new ArrayList<>(); + databaseIfs.add(db1); + databaseIfs.add(db2); + return databaseIfs; + } + }; + new MockUp() { + @Mock + public List constructAnalysisInfo(DatabaseIf db) { + return Arrays.asList(analysisInfo, analysisInfo); + } + + int count = 0; + + @Mock + public AnalysisInfo getReAnalyzeRequiredPart(AnalysisInfo jobInfo) { + return count++ == 0 ? null : jobInfo; + } + + @Mock + public void createSystemAnalysisJob(AnalysisInfo jobInfo) + throws DdlException { + + } + }; + + StatisticsAutoAnalyzer saa = new StatisticsAutoAnalyzer(); + saa.runAfterCatalogReady(); + new Expectations() { + { + try { + saa.createSystemAnalysisJob((AnalysisInfo) any); + times = 1; + } catch (Exception e) { + throw new RuntimeException(e); + } + } + }; + } + + @Test + public void testConstructAnalysisInfo( + @Injectable OlapTable o2, @Injectable View v) { + new MockUp() { + @Mock + public List getTables() { + List
tableIfs = new ArrayList<>(); + tableIfs.add(o2); + tableIfs.add(v); + return tableIfs; + } + + @Mock + public String getFullName() { + return "anyDb"; + } + }; + + new MockUp() { + @Mock + public String getName() { + return "anytable"; + } + + @Mock + public List getBaseSchema() { + List columns = new ArrayList<>(); + columns.add(new Column("c1", PrimitiveType.INT)); + columns.add(new Column("c2", PrimitiveType.HLL)); + return columns; + } + }; + StatisticsAutoAnalyzer saa = new StatisticsAutoAnalyzer(); + List analysisInfos = + saa.constructAnalysisInfo(new Database(1, "anydb")); + Assertions.assertEquals(1, analysisInfos.size()); + Assertions.assertEquals("c1", analysisInfos.get(0).colName.split(",")[0]); + } + + @Test + public void testGetReAnalyzeRequiredPart0(@Mocked TableIf tableIf) { + + new Expectations() { + { + tableIf.getRowCount(); + result = 100; + } + }; + new MockUp() { + @Mock + public TableIf findTable(String catalogName, String dbName, String tblName) { + return tableIf; + } + }; + AnalysisInfo analysisInfo = new AnalysisInfoBuilder().setAnalysisMethod(AnalysisMethod.FULL).setAnalysisType( + AnalysisType.FUNDAMENTALS).setColName("col1").setJobType(JobType.SYSTEM).build(); + new MockUp() { + + int count = 0; + + TableStats[] tableStatsArr = + new TableStats[] {new TableStats(0, 0, analysisInfo), + new TableStats(0, 0, analysisInfo), null}; + + { + tableStatsArr[0].updatedRows.addAndGet(100); + tableStatsArr[1].updatedRows.addAndGet(0); + } + + @Mock + public TableStats findTableStatsStatus(long tblId) { + return tableStatsArr[count++]; + } + }; + + new MockUp() { + @Mock + public Set findReAnalyzeNeededPartitions(TableIf table, long lastExecTimeInMs) { + Set partitionNames = new HashSet<>(); + partitionNames.add("p1"); + partitionNames.add("p2"); + return partitionNames; + } + + @Mock + public AnalysisInfo getAnalysisJobInfo(AnalysisInfo jobInfo, TableIf table, + Set needRunPartitions) { + return new AnalysisInfoBuilder().build(); + } + }; + StatisticsAutoAnalyzer statisticsAutoAnalyzer = new StatisticsAutoAnalyzer(); + AnalysisInfo analysisInfo2 = new AnalysisInfoBuilder() + .setCatalogName("cname") + .setDbName("db") + .setTblName("tbl").build(); + Assertions.assertNotNull(statisticsAutoAnalyzer.getReAnalyzeRequiredPart(analysisInfo2)); + Assertions.assertNull(statisticsAutoAnalyzer.getReAnalyzeRequiredPart(analysisInfo2)); + Assertions.assertNotNull(statisticsAutoAnalyzer.getReAnalyzeRequiredPart(analysisInfo2)); + } +} diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatsMockUtil.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatsMockUtil.java new file mode 100644 index 00000000000000..21035051ff8606 --- /dev/null +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatsMockUtil.java @@ -0,0 +1,48 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.statistics; + +import java.util.ArrayList; +import java.util.List; + +public class StatsMockUtil { + + public static ResultRow mockResultRow(boolean col) { + List vals = new ArrayList() {{ + add("0"); + add("1"); + add("2"); + add("3"); + add("-1"); + add("5"); + if (col) { + add(null); + } else { + add("6"); + } + add("7"); + add("8"); + add("0"); + add("10"); + add("11"); + add("12"); + add(String.valueOf(System.currentTimeMillis())); + }}; + return new ResultRow(vals); + } +} diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/util/InternalQueryResultTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/util/InternalQueryResultTest.java deleted file mode 100644 index 8d2518ae406dc5..00000000000000 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/util/InternalQueryResultTest.java +++ /dev/null @@ -1,119 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.statistics.util; - -import org.apache.doris.catalog.PrimitiveType; -import org.apache.doris.common.DdlException; -import org.apache.doris.statistics.util.InternalQueryResult.ResultRow; - -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; - -import java.util.Arrays; -import java.util.List; - - -public class InternalQueryResultTest { - private InternalQueryResult queryResult; - private InternalQueryResult.ResultRow resultRow; - - @Before - public void setUp() throws Exception { - List columns = Arrays.asList("c1", "c2", "c3", "c4", "c5"); - List types = Arrays.asList(PrimitiveType.STRING, - PrimitiveType.INT, PrimitiveType.FLOAT, - PrimitiveType.DOUBLE, PrimitiveType.BIGINT); - queryResult = new InternalQueryResult(); - List values = Arrays.asList("s1", "1000", "0.1", "0.0001", "1000000"); - resultRow = new ResultRow(columns, types, values); - } - - @Test - public void testGetColumnIndex() { - Assert.assertEquals(0, resultRow.getColumnIndex("c1")); - Assert.assertEquals(1, resultRow.getColumnIndex("c2")); - Assert.assertEquals(2, resultRow.getColumnIndex("c3")); - Assert.assertEquals(3, resultRow.getColumnIndex("c4")); - Assert.assertEquals(4, resultRow.getColumnIndex("c5")); - } - - @Test - public void testGetColumnName() throws Exception { - Assert.assertEquals("c1", resultRow.getColumnName(0)); - Assert.assertEquals("c2", resultRow.getColumnName(1)); - Assert.assertEquals("c3", resultRow.getColumnName(2)); - Assert.assertEquals("c4", resultRow.getColumnName(3)); - Assert.assertEquals("c5", resultRow.getColumnName(4)); - } - - @Test - public void testGetColumnTypeWithIndex() { - try { - Assert.assertEquals(PrimitiveType.STRING, resultRow.getColumnType(0)); - Assert.assertEquals(PrimitiveType.INT, resultRow.getColumnType(1)); - Assert.assertEquals(PrimitiveType.FLOAT, resultRow.getColumnType(2)); - Assert.assertEquals(PrimitiveType.DOUBLE, resultRow.getColumnType(3)); - Assert.assertEquals(PrimitiveType.BIGINT, resultRow.getColumnType(4)); - } catch (DdlException e) { - e.printStackTrace(); - Assert.fail(); - } - } - - @Test - public void testGetColumnTypeWithName() { - try { - Assert.assertEquals(PrimitiveType.STRING, resultRow.getColumnType("c1")); - Assert.assertEquals(PrimitiveType.INT, resultRow.getColumnType("c2")); - Assert.assertEquals(PrimitiveType.FLOAT, resultRow.getColumnType("c3")); - Assert.assertEquals(PrimitiveType.DOUBLE, resultRow.getColumnType("c4")); - Assert.assertEquals(PrimitiveType.BIGINT, resultRow.getColumnType("c5")); - } catch (DdlException e) { - e.printStackTrace(); - Assert.fail(); - } - } - - @Test - public void testGetColumnValueWithIndex() throws Exception { - Assert.assertEquals("s1", resultRow.getColumnValue(0).toString()); - Assert.assertEquals(1000, Integer.parseInt((String) resultRow.getColumnValue(1))); - Assert.assertEquals(0.1f, Float.parseFloat((String) resultRow.getColumnValue(2)), 0.0001); - Assert.assertEquals(0.0001, Double.parseDouble((String) resultRow.getColumnValue(3)), 0.0001); - Assert.assertEquals(1000000, Long.parseLong((String) resultRow.getColumnValue(4))); - } - - @Test - public void testGetColumnValueWithName() throws Exception { - Assert.assertEquals("s1", resultRow.getColumnValue(0).toString()); - Assert.assertEquals(1000, Integer.parseInt((String) resultRow.getColumnValue(1))); - Assert.assertEquals(0.1f, Float.parseFloat((String) resultRow.getColumnValue(2)), 0.0001); - Assert.assertEquals(0.0001, Double.parseDouble((String) resultRow.getColumnValue(3)), 0.0001); - Assert.assertEquals(1000000, Long.parseLong((String) resultRow.getColumnValue(4))); - } - - @Test - public void testGetTypeValue() throws Exception { - Assert.assertEquals("s1", resultRow.getString(0)); - Assert.assertEquals(1000, resultRow.getInt(1)); - Assert.assertEquals(0.1f, resultRow.getFloat(2), 0.0001); - Assert.assertEquals(0.0001, resultRow.getDouble(3), 0.0001); - Assert.assertEquals(1000000, resultRow.getLong(4)); - } -} diff --git a/gensrc/thrift/FrontendService.thrift b/gensrc/thrift/FrontendService.thrift index f4ac4657000f68..d916cfe9c1c53f 100644 --- a/gensrc/thrift/FrontendService.thrift +++ b/gensrc/thrift/FrontendService.thrift @@ -1101,7 +1101,7 @@ struct TGetBinlogLagResult { struct TUpdateFollowerStatsCacheRequest { 1: optional string key; - 2: optional string colStats; + 2: list statsRows; } struct TAutoIncrementRangeRequest { diff --git a/regression-test/suites/statistics/analyze_stats.groovy b/regression-test/suites/statistics/analyze_stats.groovy index 8f50a8a70231a8..40f772025692d4 100644 --- a/regression-test/suites/statistics/analyze_stats.groovy +++ b/regression-test/suites/statistics/analyze_stats.groovy @@ -86,7 +86,7 @@ suite("test_analyze") { sql """ SET enable_nereids_planner=true; - + """ sql """ SET enable_fallback_to_original_planner=false; @@ -98,7 +98,7 @@ suite("test_analyze") { Thread.sleep(1000 * 60) sql """ - SELECT COUNT(*) FROM ${tbl}; + SELECT COUNT(*) FROM ${tbl}; """ sql """ @@ -109,7 +109,7 @@ suite("test_analyze") { try { sql """ - SELECT COUNT(*) FROM ${tbl}; + SELECT COUNT(*) FROM ${tbl}; """ } catch (Exception e) { exception = e @@ -124,7 +124,7 @@ suite("test_analyze") { """ sql """ - SELECT COUNT(*) FROM ${tbl}; + SELECT COUNT(*) FROM ${tbl}; """ sql """ @@ -133,7 +133,7 @@ suite("test_analyze") { try { sql """ - SELECT COUNT(*) FROM ${tbl}; + SELECT COUNT(*) FROM ${tbl}; """ } catch (Exception e) { exception = e @@ -143,99 +143,738 @@ suite("test_analyze") { ANALYZE DATABASE ${db} WITH SYNC WITH SAMPLE PERCENT 10 """ - a_result_2 = sql """ - ANALYZE DATABASE ${db} WITH SYNC WITH SAMPLE PERCENT 5 - """ - - a_result_3 = sql """ - ANALYZE DATABASE ${db} WITH SAMPLE PERCENT 5 WITH AUTO - """ - - show_result = sql """ - SHOW ANALYZE - """ - - def contains_expected_table = {r -> - for(int i = 0; i < r.size; i++) { - if (r[i][3] == "${tbl}" ) { - return true - } - } - return false - } - - def stats_job_removed = {r, id -> - for(int i = 0; i < r.size; i++) { - if (r[i][0] == id ) { - return false - } - } - return true - } - - assert contains_expected_table(show_result) - - sql """ - DROP ANALYZE JOB ${a_result_3[0][4]} - """ - - show_result = sql """ - SHOW ANALYZE - """ - - assert stats_job_removed(show_result, a_result_3[0][4]) +// a_result_2 = sql """ +// ANALYZE DATABASE ${db} WITH SYNC WITH SAMPLE PERCENT 5 +// """ +// +// a_result_3 = sql """ +// ANALYZE DATABASE ${db} WITH SAMPLE PERCENT 5 +// """ +// +// show_result = sql """ +// SHOW ANALYZE +// """ +// +// def contains_expected_table = { r -> +// for (int i = 0; i < r.size; i++) { +// if (r[i][3] == "${tbl}") { +// return true +// } +// } +// return false +// } +// +// def stats_job_removed = { r, id -> +// for (int i = 0; i < r.size; i++) { +// if (r[i][0] == id) { +// return false +// } +// } +// return true +// } +// +// assert contains_expected_table(show_result) +// +// sql """ +// DROP ANALYZE JOB ${a_result_3[0][4]} +// """ +// +// show_result = sql """ +// SHOW ANALYZE +// """ +// +// assert stats_job_removed(show_result, a_result_3[0][4]) +// +// sql """ +// ANALYZE DATABASE ${db} WITH SAMPLE ROWS 5 WITH PERIOD 100000 +// """ +// +// sql """ +// DROP TABLE IF EXISTS analyze_partitioned_tbl_test +// """ + +// sql """ +// CREATE TABLE analyze_partitioned_tbl_test (col1 int, col2 int, col3 int) +// PARTITION BY RANGE(`col2`) ( +// PARTITION `p1` VALUES LESS THAN ('5'), +// PARTITION `p2` VALUES LESS THAN ('10'), +// PARTITION `P3` VALUES LESS THAN ('15'), +// PARTITION `P4` VALUES LESS THAN ('20'), +// PARTITION `P5` VALUES LESS THAN ('25'), +// PARTITION `P6` VALUES LESS THAN ('30')) +// DISTRIBUTED BY HASH(col3) +// BUCKETS 3 +// PROPERTIES( +// "replication_num"="1" +// ) +// """ +// +// sql """insert into analyze_partitioned_tbl_test values(1,3,1) """ +// sql """insert into analyze_partitioned_tbl_test values(6,6,6) """ +// sql """insert into analyze_partitioned_tbl_test values(11,6,6) """ +// sql """insert into analyze_partitioned_tbl_test values(16,6,6) """ +// sql """insert into analyze_partitioned_tbl_test values(21,6,6) """ +// sql """insert into analyze_partitioned_tbl_test values(26,6,6) """ +// +// sql """ +// ANALYZE TABLE analyze_partitioned_tbl_test WITH SYNC +// """ +// +// part_tbl_analyze_result = sql """ +// SHOW COLUMN CACHED STATS analyze_partitioned_tbl_test(col1) +// """ +// +// def expected_result = { r -> +// for (int i = 0; i < r.size; i++) { +// if ((int) Double.parseDouble(r[i][1]) == 6) { +// return true +// } else { +// return false +// } +// } +// return false +// } +// +// assert expected_result(part_tbl_analyze_result) sql """ - ANALYZE DATABASE ${db} WITH SAMPLE ROWS 5 WITH PERIOD 100000 + DROP TABLE IF EXISTS test_600_partition_table_analyze; """ sql """ - DROP TABLE IF EXISTS analyze_partitioned_tbl_test - """ + + CREATE TAbLE IF NOT EXISTS test_600_partition_table_analyze ( + `id` INT NOT NULL + ) ENGINE=OLAP + DUPLICATE KEY(id) + PARTITION BY LIST(id) + ( + PARTITION `p0` VALUES IN (0), +PARTITION `p1` VALUES IN (1), +PARTITION `p2` VALUES IN (2), +PARTITION `p3` VALUES IN (3), +PARTITION `p4` VALUES IN (4), +PARTITION `p5` VALUES IN (5), +PARTITION `p6` VALUES IN (6), +PARTITION `p7` VALUES IN (7), +PARTITION `p8` VALUES IN (8), +PARTITION `p9` VALUES IN (9), +PARTITION `p10` VALUES IN (10), +PARTITION `p11` VALUES IN (11), +PARTITION `p12` VALUES IN (12), +PARTITION `p13` VALUES IN (13), +PARTITION `p14` VALUES IN (14), +PARTITION `p15` VALUES IN (15), +PARTITION `p16` VALUES IN (16), +PARTITION `p17` VALUES IN (17), +PARTITION `p18` VALUES IN (18), +PARTITION `p19` VALUES IN (19), +PARTITION `p20` VALUES IN (20), +PARTITION `p21` VALUES IN (21), +PARTITION `p22` VALUES IN (22), +PARTITION `p23` VALUES IN (23), +PARTITION `p24` VALUES IN (24), +PARTITION `p25` VALUES IN (25), +PARTITION `p26` VALUES IN (26), +PARTITION `p27` VALUES IN (27), +PARTITION `p28` VALUES IN (28), +PARTITION `p29` VALUES IN (29), +PARTITION `p30` VALUES IN (30), +PARTITION `p31` VALUES IN (31), +PARTITION `p32` VALUES IN (32), +PARTITION `p33` VALUES IN (33), +PARTITION `p34` VALUES IN (34), +PARTITION `p35` VALUES IN (35), +PARTITION `p36` VALUES IN (36), +PARTITION `p37` VALUES IN (37), +PARTITION `p38` VALUES IN (38), +PARTITION `p39` VALUES IN (39), +PARTITION `p40` VALUES IN (40), +PARTITION `p41` VALUES IN (41), +PARTITION `p42` VALUES IN (42), +PARTITION `p43` VALUES IN (43), +PARTITION `p44` VALUES IN (44), +PARTITION `p45` VALUES IN (45), +PARTITION `p46` VALUES IN (46), +PARTITION `p47` VALUES IN (47), +PARTITION `p48` VALUES IN (48), +PARTITION `p49` VALUES IN (49), +PARTITION `p50` VALUES IN (50), +PARTITION `p51` VALUES IN (51), +PARTITION `p52` VALUES IN (52), +PARTITION `p53` VALUES IN (53), +PARTITION `p54` VALUES IN (54), +PARTITION `p55` VALUES IN (55), +PARTITION `p56` VALUES IN (56), +PARTITION `p57` VALUES IN (57), +PARTITION `p58` VALUES IN (58), +PARTITION `p59` VALUES IN (59), +PARTITION `p60` VALUES IN (60), +PARTITION `p61` VALUES IN (61), +PARTITION `p62` VALUES IN (62), +PARTITION `p63` VALUES IN (63), +PARTITION `p64` VALUES IN (64), +PARTITION `p65` VALUES IN (65), +PARTITION `p66` VALUES IN (66), +PARTITION `p67` VALUES IN (67), +PARTITION `p68` VALUES IN (68), +PARTITION `p69` VALUES IN (69), +PARTITION `p70` VALUES IN (70), +PARTITION `p71` VALUES IN (71), +PARTITION `p72` VALUES IN (72), +PARTITION `p73` VALUES IN (73), +PARTITION `p74` VALUES IN (74), +PARTITION `p75` VALUES IN (75), +PARTITION `p76` VALUES IN (76), +PARTITION `p77` VALUES IN (77), +PARTITION `p78` VALUES IN (78), +PARTITION `p79` VALUES IN (79), +PARTITION `p80` VALUES IN (80), +PARTITION `p81` VALUES IN (81), +PARTITION `p82` VALUES IN (82), +PARTITION `p83` VALUES IN (83), +PARTITION `p84` VALUES IN (84), +PARTITION `p85` VALUES IN (85), +PARTITION `p86` VALUES IN (86), +PARTITION `p87` VALUES IN (87), +PARTITION `p88` VALUES IN (88), +PARTITION `p89` VALUES IN (89), +PARTITION `p90` VALUES IN (90), +PARTITION `p91` VALUES IN (91), +PARTITION `p92` VALUES IN (92), +PARTITION `p93` VALUES IN (93), +PARTITION `p94` VALUES IN (94), +PARTITION `p95` VALUES IN (95), +PARTITION `p96` VALUES IN (96), +PARTITION `p97` VALUES IN (97), +PARTITION `p98` VALUES IN (98), +PARTITION `p99` VALUES IN (99), +PARTITION `p100` VALUES IN (100), +PARTITION `p101` VALUES IN (101), +PARTITION `p102` VALUES IN (102), +PARTITION `p103` VALUES IN (103), +PARTITION `p104` VALUES IN (104), +PARTITION `p105` VALUES IN (105), +PARTITION `p106` VALUES IN (106), +PARTITION `p107` VALUES IN (107), +PARTITION `p108` VALUES IN (108), +PARTITION `p109` VALUES IN (109), +PARTITION `p110` VALUES IN (110), +PARTITION `p111` VALUES IN (111), +PARTITION `p112` VALUES IN (112), +PARTITION `p113` VALUES IN (113), +PARTITION `p114` VALUES IN (114), +PARTITION `p115` VALUES IN (115), +PARTITION `p116` VALUES IN (116), +PARTITION `p117` VALUES IN (117), +PARTITION `p118` VALUES IN (118), +PARTITION `p119` VALUES IN (119), +PARTITION `p120` VALUES IN (120), +PARTITION `p121` VALUES IN (121), +PARTITION `p122` VALUES IN (122), +PARTITION `p123` VALUES IN (123), +PARTITION `p124` VALUES IN (124), +PARTITION `p125` VALUES IN (125), +PARTITION `p126` VALUES IN (126), +PARTITION `p127` VALUES IN (127), +PARTITION `p128` VALUES IN (128), +PARTITION `p129` VALUES IN (129), +PARTITION `p130` VALUES IN (130), +PARTITION `p131` VALUES IN (131), +PARTITION `p132` VALUES IN (132), +PARTITION `p133` VALUES IN (133), +PARTITION `p134` VALUES IN (134), +PARTITION `p135` VALUES IN (135), +PARTITION `p136` VALUES IN (136), +PARTITION `p137` VALUES IN (137), +PARTITION `p138` VALUES IN (138), +PARTITION `p139` VALUES IN (139), +PARTITION `p140` VALUES IN (140), +PARTITION `p141` VALUES IN (141), +PARTITION `p142` VALUES IN (142), +PARTITION `p143` VALUES IN (143), +PARTITION `p144` VALUES IN (144), +PARTITION `p145` VALUES IN (145), +PARTITION `p146` VALUES IN (146), +PARTITION `p147` VALUES IN (147), +PARTITION `p148` VALUES IN (148), +PARTITION `p149` VALUES IN (149), +PARTITION `p150` VALUES IN (150), +PARTITION `p151` VALUES IN (151), +PARTITION `p152` VALUES IN (152), +PARTITION `p153` VALUES IN (153), +PARTITION `p154` VALUES IN (154), +PARTITION `p155` VALUES IN (155), +PARTITION `p156` VALUES IN (156), +PARTITION `p157` VALUES IN (157), +PARTITION `p158` VALUES IN (158), +PARTITION `p159` VALUES IN (159), +PARTITION `p160` VALUES IN (160), +PARTITION `p161` VALUES IN (161), +PARTITION `p162` VALUES IN (162), +PARTITION `p163` VALUES IN (163), +PARTITION `p164` VALUES IN (164), +PARTITION `p165` VALUES IN (165), +PARTITION `p166` VALUES IN (166), +PARTITION `p167` VALUES IN (167), +PARTITION `p168` VALUES IN (168), +PARTITION `p169` VALUES IN (169), +PARTITION `p170` VALUES IN (170), +PARTITION `p171` VALUES IN (171), +PARTITION `p172` VALUES IN (172), +PARTITION `p173` VALUES IN (173), +PARTITION `p174` VALUES IN (174), +PARTITION `p175` VALUES IN (175), +PARTITION `p176` VALUES IN (176), +PARTITION `p177` VALUES IN (177), +PARTITION `p178` VALUES IN (178), +PARTITION `p179` VALUES IN (179), +PARTITION `p180` VALUES IN (180), +PARTITION `p181` VALUES IN (181), +PARTITION `p182` VALUES IN (182), +PARTITION `p183` VALUES IN (183), +PARTITION `p184` VALUES IN (184), +PARTITION `p185` VALUES IN (185), +PARTITION `p186` VALUES IN (186), +PARTITION `p187` VALUES IN (187), +PARTITION `p188` VALUES IN (188), +PARTITION `p189` VALUES IN (189), +PARTITION `p190` VALUES IN (190), +PARTITION `p191` VALUES IN (191), +PARTITION `p192` VALUES IN (192), +PARTITION `p193` VALUES IN (193), +PARTITION `p194` VALUES IN (194), +PARTITION `p195` VALUES IN (195), +PARTITION `p196` VALUES IN (196), +PARTITION `p197` VALUES IN (197), +PARTITION `p198` VALUES IN (198), +PARTITION `p199` VALUES IN (199), +PARTITION `p200` VALUES IN (200), +PARTITION `p201` VALUES IN (201), +PARTITION `p202` VALUES IN (202), +PARTITION `p203` VALUES IN (203), +PARTITION `p204` VALUES IN (204), +PARTITION `p205` VALUES IN (205), +PARTITION `p206` VALUES IN (206), +PARTITION `p207` VALUES IN (207), +PARTITION `p208` VALUES IN (208), +PARTITION `p209` VALUES IN (209), +PARTITION `p210` VALUES IN (210), +PARTITION `p211` VALUES IN (211), +PARTITION `p212` VALUES IN (212), +PARTITION `p213` VALUES IN (213), +PARTITION `p214` VALUES IN (214), +PARTITION `p215` VALUES IN (215), +PARTITION `p216` VALUES IN (216), +PARTITION `p217` VALUES IN (217), +PARTITION `p218` VALUES IN (218), +PARTITION `p219` VALUES IN (219), +PARTITION `p220` VALUES IN (220), +PARTITION `p221` VALUES IN (221), +PARTITION `p222` VALUES IN (222), +PARTITION `p223` VALUES IN (223), +PARTITION `p224` VALUES IN (224), +PARTITION `p225` VALUES IN (225), +PARTITION `p226` VALUES IN (226), +PARTITION `p227` VALUES IN (227), +PARTITION `p228` VALUES IN (228), +PARTITION `p229` VALUES IN (229), +PARTITION `p230` VALUES IN (230), +PARTITION `p231` VALUES IN (231), +PARTITION `p232` VALUES IN (232), +PARTITION `p233` VALUES IN (233), +PARTITION `p234` VALUES IN (234), +PARTITION `p235` VALUES IN (235), +PARTITION `p236` VALUES IN (236), +PARTITION `p237` VALUES IN (237), +PARTITION `p238` VALUES IN (238), +PARTITION `p239` VALUES IN (239), +PARTITION `p240` VALUES IN (240), +PARTITION `p241` VALUES IN (241), +PARTITION `p242` VALUES IN (242), +PARTITION `p243` VALUES IN (243), +PARTITION `p244` VALUES IN (244), +PARTITION `p245` VALUES IN (245), +PARTITION `p246` VALUES IN (246), +PARTITION `p247` VALUES IN (247), +PARTITION `p248` VALUES IN (248), +PARTITION `p249` VALUES IN (249), +PARTITION `p250` VALUES IN (250), +PARTITION `p251` VALUES IN (251), +PARTITION `p252` VALUES IN (252), +PARTITION `p253` VALUES IN (253), +PARTITION `p254` VALUES IN (254), +PARTITION `p255` VALUES IN (255), +PARTITION `p256` VALUES IN (256), +PARTITION `p257` VALUES IN (257), +PARTITION `p258` VALUES IN (258), +PARTITION `p259` VALUES IN (259), +PARTITION `p260` VALUES IN (260), +PARTITION `p261` VALUES IN (261), +PARTITION `p262` VALUES IN (262), +PARTITION `p263` VALUES IN (263), +PARTITION `p264` VALUES IN (264), +PARTITION `p265` VALUES IN (265), +PARTITION `p266` VALUES IN (266), +PARTITION `p267` VALUES IN (267), +PARTITION `p268` VALUES IN (268), +PARTITION `p269` VALUES IN (269), +PARTITION `p270` VALUES IN (270), +PARTITION `p271` VALUES IN (271), +PARTITION `p272` VALUES IN (272), +PARTITION `p273` VALUES IN (273), +PARTITION `p274` VALUES IN (274), +PARTITION `p275` VALUES IN (275), +PARTITION `p276` VALUES IN (276), +PARTITION `p277` VALUES IN (277), +PARTITION `p278` VALUES IN (278), +PARTITION `p279` VALUES IN (279), +PARTITION `p280` VALUES IN (280), +PARTITION `p281` VALUES IN (281), +PARTITION `p282` VALUES IN (282), +PARTITION `p283` VALUES IN (283), +PARTITION `p284` VALUES IN (284), +PARTITION `p285` VALUES IN (285), +PARTITION `p286` VALUES IN (286), +PARTITION `p287` VALUES IN (287), +PARTITION `p288` VALUES IN (288), +PARTITION `p289` VALUES IN (289), +PARTITION `p290` VALUES IN (290), +PARTITION `p291` VALUES IN (291), +PARTITION `p292` VALUES IN (292), +PARTITION `p293` VALUES IN (293), +PARTITION `p294` VALUES IN (294), +PARTITION `p295` VALUES IN (295), +PARTITION `p296` VALUES IN (296), +PARTITION `p297` VALUES IN (297), +PARTITION `p298` VALUES IN (298), +PARTITION `p299` VALUES IN (299), +PARTITION `p300` VALUES IN (300), +PARTITION `p301` VALUES IN (301), +PARTITION `p302` VALUES IN (302), +PARTITION `p303` VALUES IN (303), +PARTITION `p304` VALUES IN (304), +PARTITION `p305` VALUES IN (305), +PARTITION `p306` VALUES IN (306), +PARTITION `p307` VALUES IN (307), +PARTITION `p308` VALUES IN (308), +PARTITION `p309` VALUES IN (309), +PARTITION `p310` VALUES IN (310), +PARTITION `p311` VALUES IN (311), +PARTITION `p312` VALUES IN (312), +PARTITION `p313` VALUES IN (313), +PARTITION `p314` VALUES IN (314), +PARTITION `p315` VALUES IN (315), +PARTITION `p316` VALUES IN (316), +PARTITION `p317` VALUES IN (317), +PARTITION `p318` VALUES IN (318), +PARTITION `p319` VALUES IN (319), +PARTITION `p320` VALUES IN (320), +PARTITION `p321` VALUES IN (321), +PARTITION `p322` VALUES IN (322), +PARTITION `p323` VALUES IN (323), +PARTITION `p324` VALUES IN (324), +PARTITION `p325` VALUES IN (325), +PARTITION `p326` VALUES IN (326), +PARTITION `p327` VALUES IN (327), +PARTITION `p328` VALUES IN (328), +PARTITION `p329` VALUES IN (329), +PARTITION `p330` VALUES IN (330), +PARTITION `p331` VALUES IN (331), +PARTITION `p332` VALUES IN (332), +PARTITION `p333` VALUES IN (333), +PARTITION `p334` VALUES IN (334), +PARTITION `p335` VALUES IN (335), +PARTITION `p336` VALUES IN (336), +PARTITION `p337` VALUES IN (337), +PARTITION `p338` VALUES IN (338), +PARTITION `p339` VALUES IN (339), +PARTITION `p340` VALUES IN (340), +PARTITION `p341` VALUES IN (341), +PARTITION `p342` VALUES IN (342), +PARTITION `p343` VALUES IN (343), +PARTITION `p344` VALUES IN (344), +PARTITION `p345` VALUES IN (345), +PARTITION `p346` VALUES IN (346), +PARTITION `p347` VALUES IN (347), +PARTITION `p348` VALUES IN (348), +PARTITION `p349` VALUES IN (349), +PARTITION `p350` VALUES IN (350), +PARTITION `p351` VALUES IN (351), +PARTITION `p352` VALUES IN (352), +PARTITION `p353` VALUES IN (353), +PARTITION `p354` VALUES IN (354), +PARTITION `p355` VALUES IN (355), +PARTITION `p356` VALUES IN (356), +PARTITION `p357` VALUES IN (357), +PARTITION `p358` VALUES IN (358), +PARTITION `p359` VALUES IN (359), +PARTITION `p360` VALUES IN (360), +PARTITION `p361` VALUES IN (361), +PARTITION `p362` VALUES IN (362), +PARTITION `p363` VALUES IN (363), +PARTITION `p364` VALUES IN (364), +PARTITION `p365` VALUES IN (365), +PARTITION `p366` VALUES IN (366), +PARTITION `p367` VALUES IN (367), +PARTITION `p368` VALUES IN (368), +PARTITION `p369` VALUES IN (369), +PARTITION `p370` VALUES IN (370), +PARTITION `p371` VALUES IN (371), +PARTITION `p372` VALUES IN (372), +PARTITION `p373` VALUES IN (373), +PARTITION `p374` VALUES IN (374), +PARTITION `p375` VALUES IN (375), +PARTITION `p376` VALUES IN (376), +PARTITION `p377` VALUES IN (377), +PARTITION `p378` VALUES IN (378), +PARTITION `p379` VALUES IN (379), +PARTITION `p380` VALUES IN (380), +PARTITION `p381` VALUES IN (381), +PARTITION `p382` VALUES IN (382), +PARTITION `p383` VALUES IN (383), +PARTITION `p384` VALUES IN (384), +PARTITION `p385` VALUES IN (385), +PARTITION `p386` VALUES IN (386), +PARTITION `p387` VALUES IN (387), +PARTITION `p388` VALUES IN (388), +PARTITION `p389` VALUES IN (389), +PARTITION `p390` VALUES IN (390), +PARTITION `p391` VALUES IN (391), +PARTITION `p392` VALUES IN (392), +PARTITION `p393` VALUES IN (393), +PARTITION `p394` VALUES IN (394), +PARTITION `p395` VALUES IN (395), +PARTITION `p396` VALUES IN (396), +PARTITION `p397` VALUES IN (397), +PARTITION `p398` VALUES IN (398), +PARTITION `p399` VALUES IN (399), +PARTITION `p400` VALUES IN (400), +PARTITION `p401` VALUES IN (401), +PARTITION `p402` VALUES IN (402), +PARTITION `p403` VALUES IN (403), +PARTITION `p404` VALUES IN (404), +PARTITION `p405` VALUES IN (405), +PARTITION `p406` VALUES IN (406), +PARTITION `p407` VALUES IN (407), +PARTITION `p408` VALUES IN (408), +PARTITION `p409` VALUES IN (409), +PARTITION `p410` VALUES IN (410), +PARTITION `p411` VALUES IN (411), +PARTITION `p412` VALUES IN (412), +PARTITION `p413` VALUES IN (413), +PARTITION `p414` VALUES IN (414), +PARTITION `p415` VALUES IN (415), +PARTITION `p416` VALUES IN (416), +PARTITION `p417` VALUES IN (417), +PARTITION `p418` VALUES IN (418), +PARTITION `p419` VALUES IN (419), +PARTITION `p420` VALUES IN (420), +PARTITION `p421` VALUES IN (421), +PARTITION `p422` VALUES IN (422), +PARTITION `p423` VALUES IN (423), +PARTITION `p424` VALUES IN (424), +PARTITION `p425` VALUES IN (425), +PARTITION `p426` VALUES IN (426), +PARTITION `p427` VALUES IN (427), +PARTITION `p428` VALUES IN (428), +PARTITION `p429` VALUES IN (429), +PARTITION `p430` VALUES IN (430), +PARTITION `p431` VALUES IN (431), +PARTITION `p432` VALUES IN (432), +PARTITION `p433` VALUES IN (433), +PARTITION `p434` VALUES IN (434), +PARTITION `p435` VALUES IN (435), +PARTITION `p436` VALUES IN (436), +PARTITION `p437` VALUES IN (437), +PARTITION `p438` VALUES IN (438), +PARTITION `p439` VALUES IN (439), +PARTITION `p440` VALUES IN (440), +PARTITION `p441` VALUES IN (441), +PARTITION `p442` VALUES IN (442), +PARTITION `p443` VALUES IN (443), +PARTITION `p444` VALUES IN (444), +PARTITION `p445` VALUES IN (445), +PARTITION `p446` VALUES IN (446), +PARTITION `p447` VALUES IN (447), +PARTITION `p448` VALUES IN (448), +PARTITION `p449` VALUES IN (449), +PARTITION `p450` VALUES IN (450), +PARTITION `p451` VALUES IN (451), +PARTITION `p452` VALUES IN (452), +PARTITION `p453` VALUES IN (453), +PARTITION `p454` VALUES IN (454), +PARTITION `p455` VALUES IN (455), +PARTITION `p456` VALUES IN (456), +PARTITION `p457` VALUES IN (457), +PARTITION `p458` VALUES IN (458), +PARTITION `p459` VALUES IN (459), +PARTITION `p460` VALUES IN (460), +PARTITION `p461` VALUES IN (461), +PARTITION `p462` VALUES IN (462), +PARTITION `p463` VALUES IN (463), +PARTITION `p464` VALUES IN (464), +PARTITION `p465` VALUES IN (465), +PARTITION `p466` VALUES IN (466), +PARTITION `p467` VALUES IN (467), +PARTITION `p468` VALUES IN (468), +PARTITION `p469` VALUES IN (469), +PARTITION `p470` VALUES IN (470), +PARTITION `p471` VALUES IN (471), +PARTITION `p472` VALUES IN (472), +PARTITION `p473` VALUES IN (473), +PARTITION `p474` VALUES IN (474), +PARTITION `p475` VALUES IN (475), +PARTITION `p476` VALUES IN (476), +PARTITION `p477` VALUES IN (477), +PARTITION `p478` VALUES IN (478), +PARTITION `p479` VALUES IN (479), +PARTITION `p480` VALUES IN (480), +PARTITION `p481` VALUES IN (481), +PARTITION `p482` VALUES IN (482), +PARTITION `p483` VALUES IN (483), +PARTITION `p484` VALUES IN (484), +PARTITION `p485` VALUES IN (485), +PARTITION `p486` VALUES IN (486), +PARTITION `p487` VALUES IN (487), +PARTITION `p488` VALUES IN (488), +PARTITION `p489` VALUES IN (489), +PARTITION `p490` VALUES IN (490), +PARTITION `p491` VALUES IN (491), +PARTITION `p492` VALUES IN (492), +PARTITION `p493` VALUES IN (493), +PARTITION `p494` VALUES IN (494), +PARTITION `p495` VALUES IN (495), +PARTITION `p496` VALUES IN (496), +PARTITION `p497` VALUES IN (497), +PARTITION `p498` VALUES IN (498), +PARTITION `p499` VALUES IN (499), +PARTITION `p500` VALUES IN (500), +PARTITION `p501` VALUES IN (501), +PARTITION `p502` VALUES IN (502), +PARTITION `p503` VALUES IN (503), +PARTITION `p504` VALUES IN (504), +PARTITION `p505` VALUES IN (505), +PARTITION `p506` VALUES IN (506), +PARTITION `p507` VALUES IN (507), +PARTITION `p508` VALUES IN (508), +PARTITION `p509` VALUES IN (509), +PARTITION `p510` VALUES IN (510), +PARTITION `p511` VALUES IN (511), +PARTITION `p512` VALUES IN (512), +PARTITION `p513` VALUES IN (513), +PARTITION `p514` VALUES IN (514), +PARTITION `p515` VALUES IN (515), +PARTITION `p516` VALUES IN (516), +PARTITION `p517` VALUES IN (517), +PARTITION `p518` VALUES IN (518), +PARTITION `p519` VALUES IN (519), +PARTITION `p520` VALUES IN (520), +PARTITION `p521` VALUES IN (521), +PARTITION `p522` VALUES IN (522), +PARTITION `p523` VALUES IN (523), +PARTITION `p524` VALUES IN (524), +PARTITION `p525` VALUES IN (525), +PARTITION `p526` VALUES IN (526), +PARTITION `p527` VALUES IN (527), +PARTITION `p528` VALUES IN (528), +PARTITION `p529` VALUES IN (529), +PARTITION `p530` VALUES IN (530), +PARTITION `p531` VALUES IN (531), +PARTITION `p532` VALUES IN (532), +PARTITION `p533` VALUES IN (533), +PARTITION `p534` VALUES IN (534), +PARTITION `p535` VALUES IN (535), +PARTITION `p536` VALUES IN (536), +PARTITION `p537` VALUES IN (537), +PARTITION `p538` VALUES IN (538), +PARTITION `p539` VALUES IN (539), +PARTITION `p540` VALUES IN (540), +PARTITION `p541` VALUES IN (541), +PARTITION `p542` VALUES IN (542), +PARTITION `p543` VALUES IN (543), +PARTITION `p544` VALUES IN (544), +PARTITION `p545` VALUES IN (545), +PARTITION `p546` VALUES IN (546), +PARTITION `p547` VALUES IN (547), +PARTITION `p548` VALUES IN (548), +PARTITION `p549` VALUES IN (549), +PARTITION `p550` VALUES IN (550), +PARTITION `p551` VALUES IN (551), +PARTITION `p552` VALUES IN (552), +PARTITION `p553` VALUES IN (553), +PARTITION `p554` VALUES IN (554), +PARTITION `p555` VALUES IN (555), +PARTITION `p556` VALUES IN (556), +PARTITION `p557` VALUES IN (557), +PARTITION `p558` VALUES IN (558), +PARTITION `p559` VALUES IN (559), +PARTITION `p560` VALUES IN (560), +PARTITION `p561` VALUES IN (561), +PARTITION `p562` VALUES IN (562), +PARTITION `p563` VALUES IN (563), +PARTITION `p564` VALUES IN (564), +PARTITION `p565` VALUES IN (565), +PARTITION `p566` VALUES IN (566), +PARTITION `p567` VALUES IN (567), +PARTITION `p568` VALUES IN (568), +PARTITION `p569` VALUES IN (569), +PARTITION `p570` VALUES IN (570), +PARTITION `p571` VALUES IN (571), +PARTITION `p572` VALUES IN (572), +PARTITION `p573` VALUES IN (573), +PARTITION `p574` VALUES IN (574), +PARTITION `p575` VALUES IN (575), +PARTITION `p576` VALUES IN (576), +PARTITION `p577` VALUES IN (577), +PARTITION `p578` VALUES IN (578), +PARTITION `p579` VALUES IN (579), +PARTITION `p580` VALUES IN (580), +PARTITION `p581` VALUES IN (581), +PARTITION `p582` VALUES IN (582), +PARTITION `p583` VALUES IN (583), +PARTITION `p584` VALUES IN (584), +PARTITION `p585` VALUES IN (585), +PARTITION `p586` VALUES IN (586), +PARTITION `p587` VALUES IN (587), +PARTITION `p588` VALUES IN (588), +PARTITION `p589` VALUES IN (589), +PARTITION `p590` VALUES IN (590), +PARTITION `p591` VALUES IN (591), +PARTITION `p592` VALUES IN (592), +PARTITION `p593` VALUES IN (593), +PARTITION `p594` VALUES IN (594), +PARTITION `p595` VALUES IN (595), +PARTITION `p596` VALUES IN (596), +PARTITION `p597` VALUES IN (597), +PARTITION `p598` VALUES IN (598), +PARTITION `p599` VALUES IN (599) - sql """ - CREATE TABLE analyze_partitioned_tbl_test (col1 int, col2 int, col3 int) - PARTITION BY RANGE(`col2`) ( - PARTITION `p1` VALUES LESS THAN ('5'), - PARTITION `p2` VALUES LESS THAN ('10'), - PARTITION `P3` VALUES LESS THAN ('15'), - PARTITION `P4` VALUES LESS THAN ('20'), - PARTITION `P5` VALUES LESS THAN ('25'), - PARTITION `P6` VALUES LESS THAN ('30')) - DISTRIBUTED BY HASH(col3) - BUCKETS 3 - PROPERTIES( - "replication_num"="1" ) + DISTRIBUTED BY HASH(`id`) BUCKETS 16 + PROPERTIES + ( + "replication_num" = "1" + ); + """ - sql """insert into analyze_partitioned_tbl_test values(1,3,1) """ - sql """insert into analyze_partitioned_tbl_test values(6,6,6) """ - sql """insert into analyze_partitioned_tbl_test values(11,6,6) """ - sql """insert into analyze_partitioned_tbl_test values(16,6,6) """ - sql """insert into analyze_partitioned_tbl_test values(21,6,6) """ - sql """insert into analyze_partitioned_tbl_test values(26,6,6) """ - sql """ - ANALYZE TABLE analyze_partitioned_tbl_test WITH SYNC + INSERT INTO test_600_partition_table_analyze VALUES (0),(1),(2),(3),(4),(5),(6),(7),(8),(9),(10),(11),(12),(13),(14),(15),(16),(17),(18),(19),(20),(21),(22),(23),(24),(25),(26),(27),(28),(29),(30),(31),(32),(33),(34),(35),(36),(37),(38),(39),(40),(41),(42),(43),(44),(45),(46),(47),(48),(49),(50),(51),(52),(53),(54),(55),(56),(57),(58),(59),(60),(61),(62),(63),(64),(65),(66),(67),(68),(69),(70),(71),(72),(73),(74),(75),(76),(77),(78),(79),(80),(81),(82),(83),(84),(85),(86),(87),(88),(89),(90),(91),(92),(93),(94),(95),(96),(97),(98),(99),(100),(101),(102),(103),(104),(105),(106),(107),(108),(109),(110),(111),(112),(113),(114),(115),(116),(117),(118),(119),(120),(121),(122),(123),(124),(125),(126),(127),(128),(129),(130),(131),(132),(133),(134),(135),(136),(137),(138),(139),(140),(141),(142),(143),(144),(145),(146),(147),(148),(149),(150),(151),(152),(153),(154),(155),(156),(157),(158),(159),(160),(161),(162),(163),(164),(165),(166),(167),(168),(169),(170),(171),(172),(173),(174),(175),(176),(177),(178),(179),(180),(181),(182),(183),(184),(185),(186),(187),(188),(189),(190),(191),(192),(193),(194),(195),(196),(197),(198),(199),(200),(201),(202),(203),(204),(205),(206),(207),(208),(209),(210),(211),(212),(213),(214),(215),(216),(217),(218),(219),(220),(221),(222),(223),(224),(225),(226),(227),(228),(229),(230),(231),(232),(233),(234),(235),(236),(237),(238),(239),(240),(241),(242),(243),(244),(245),(246),(247),(248),(249),(250),(251),(252),(253),(254),(255),(256),(257),(258),(259),(260),(261),(262),(263),(264),(265),(266),(267),(268),(269),(270),(271),(272),(273),(274),(275),(276),(277),(278),(279),(280),(281),(282),(283),(284),(285),(286),(287),(288),(289),(290),(291),(292),(293),(294),(295),(296),(297),(298),(299),(300),(301),(302),(303),(304),(305),(306),(307),(308),(309),(310),(311),(312),(313),(314),(315),(316),(317),(318),(319),(320),(321),(322),(323),(324),(325),(326),(327),(328),(329),(330),(331),(332),(333),(334),(335),(336),(337),(338),(339),(340),(341),(342),(343),(344),(345),(346),(347),(348),(349),(350),(351),(352),(353),(354),(355),(356),(357),(358),(359),(360),(361),(362),(363),(364),(365),(366),(367),(368),(369),(370),(371),(372),(373),(374),(375),(376),(377),(378),(379),(380),(381),(382),(383),(384),(385),(386),(387),(388),(389),(390),(391),(392),(393),(394),(395),(396),(397),(398),(399),(400),(401),(402),(403),(404),(405),(406),(407),(408),(409),(410),(411),(412),(413),(414),(415),(416),(417),(418),(419),(420),(421),(422),(423),(424),(425),(426),(427),(428),(429),(430),(431),(432),(433),(434),(435),(436),(437),(438),(439),(440),(441),(442),(443),(444),(445),(446),(447),(448),(449),(450),(451),(452),(453),(454),(455),(456),(457),(458),(459),(460),(461),(462),(463),(464),(465),(466),(467),(468),(469),(470),(471),(472),(473),(474),(475),(476),(477),(478),(479),(480),(481),(482),(483),(484),(485),(486),(487),(488),(489),(490),(491),(492),(493),(494),(495),(496),(497),(498),(499),(500),(501),(502),(503),(504),(505),(506),(507),(508),(509),(510),(511),(512),(513),(514),(515),(516),(517),(518),(519),(520),(521),(522),(523),(524),(525),(526),(527),(528),(529),(530),(531),(532),(533),(534),(535),(536),(537),(538),(539),(540),(541),(542),(543),(544),(545),(546),(547),(548),(549),(550),(551),(552),(553),(554),(555),(556),(557),(558),(559),(560),(561),(562),(563),(564),(565),(566),(567),(568),(569),(570),(571),(572),(573),(574),(575),(576),(577),(578),(579),(580),(581),(582),(583),(584),(585),(586),(587),(588),(589),(590),(591),(592),(593),(594),(595),(596),(597),(598),(599) """ - part_tbl_analyze_result = sql """ - SHOW COLUMN CACHED STATS analyze_partitioned_tbl_test(col1) + sql """ANALYZE TABLE test_600_partition_table_analyze WITH SYNC""" + + // column_name | count | ndv | num_null | data_size | avg_size_byte | min | max | updated_time + id_col_stats = sql """ + SHOW COLUMN CACHED STATS test_600_partition_table_analyze(id); """ - def expected_result = { r-> - for(int i = 0; i < r.size; i++) { - if ((int) Double.parseDouble(r[i][1]) == 6) { - return true - } else { - return false - } - } - return false + def expected_id_col_stats = { r, expected_value, idx -> + return (int) Double.parseDouble(r[0][idx]) == expected_value } - assert expected_result(part_tbl_analyze_result) - -} - + assert expected_id_col_stats(id_col_stats, 600, 1) + assert expected_id_col_stats(id_col_stats, 599, 7) + assert expected_id_col_stats(id_col_stats, 0, 6) +} \ No newline at end of file