From 03bfa094f644890388edc96944063959c0056a89 Mon Sep 17 00:00:00 2001 From: kikyo Date: Mon, 29 May 2023 10:38:52 +0800 Subject: [PATCH 1/2] analysis job meta --- .../java/org/apache/doris/common/Config.java | 2 +- fe/fe-core/src/main/cup/sql_parser.cup | 68 ++- .../apache/doris/analysis/AnalyzeDBStmt.java | 61 +++ .../doris/analysis/AnalyzeProperties.java | 246 +++++++++ .../apache/doris/analysis/AnalyzeStmt.java | 419 +-------------- .../apache/doris/analysis/AnalyzeTblStmt.java | 234 +++++++++ .../doris/analysis/DropAnalyzeJobStmt.java | 34 ++ .../doris/analysis/ShowAnalyzeStmt.java | 65 +-- .../doris/analysis/ShowAnalyzeTaskStatus.java | 61 +++ .../java/org/apache/doris/catalog/Env.java | 14 +- .../catalog/InternalSchemaInitializer.java | 78 +-- .../org/apache/doris/catalog/OlapTable.java | 8 +- .../java/org/apache/doris/catalog/Table.java | 4 +- .../org/apache/doris/catalog/TableIf.java | 4 +- .../doris/catalog/external/ExternalTable.java | 4 +- .../catalog/external/HMSExternalTable.java | 4 +- .../apache/doris/journal/JournalEntity.java | 22 + .../doris/persist/AnalyzeDeletionLog.java | 42 ++ .../org/apache/doris/persist/EditLog.java | 33 ++ .../apache/doris/persist/OperationType.java | 9 + .../java/org/apache/doris/qe/DdlExecutor.java | 13 +- .../org/apache/doris/qe/SessionVariable.java | 11 - .../org/apache/doris/qe/ShowExecutor.java | 71 ++- .../org/apache/doris/qe/StmtExecutor.java | 6 +- ...nalysisTaskInfo.java => AnalysisInfo.java} | 134 ++++- ...oBuilder.java => AnalysisInfoBuilder.java} | 70 +-- .../doris/statistics/AnalysisManager.java | 478 ++++++++++++------ .../doris/statistics/BaseAnalysisTask.java | 12 +- .../doris/statistics/HMSAnalysisTask.java | 2 +- .../doris/statistics/HistogramTask.java | 4 +- .../doris/statistics/HiveAnalysisTask.java | 2 +- .../doris/statistics/IcebergAnalysisTask.java | 2 +- .../doris/statistics/MVAnalysisTask.java | 2 +- .../doris/statistics/OlapAnalysisTask.java | 2 +- .../doris/statistics/StatisticConstants.java | 2 + .../statistics/StatisticsAutoAnalyzer.java | 30 +- .../doris/statistics/StatisticsCleaner.java | 6 - .../statistics/StatisticsRepository.java | 112 +--- .../doris/statistics/util/StatisticsUtil.java | 21 +- .../doris/statistics/AnalysisJobTest.java | 15 +- .../statistics/AnalysisTaskExecutorTest.java | 23 +- .../doris/statistics/HistogramTaskTest.java | 14 +- tools/tpch-tools/conf/doris-cluster.conf | 4 +- 43 files changed, 1413 insertions(+), 1035 deletions(-) create mode 100644 fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeDBStmt.java create mode 100644 fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeProperties.java create mode 100644 fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeTblStmt.java create mode 100644 fe/fe-core/src/main/java/org/apache/doris/analysis/DropAnalyzeJobStmt.java create mode 100644 fe/fe-core/src/main/java/org/apache/doris/analysis/ShowAnalyzeTaskStatus.java create mode 100644 fe/fe-core/src/main/java/org/apache/doris/persist/AnalyzeDeletionLog.java rename fe/fe-core/src/main/java/org/apache/doris/statistics/{AnalysisTaskInfo.java => AnalysisInfo.java} (57%) rename fe/fe-core/src/main/java/org/apache/doris/statistics/{AnalysisTaskInfoBuilder.java => AnalysisInfoBuilder.java} (68%) diff --git a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java index 602341afaa86c4..38c31934a43704 100644 --- a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java +++ b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java @@ -1445,7 +1445,7 @@ public class Config extends ConfigBase { * the system automatically checks the time interval for statistics */ @ConfField(mutable = true, masterOnly = true) - public static int auto_check_statistics_in_sec = 300; + public static int auto_check_statistics_in_minutes = 5; /** * If this configuration is enabled, you should also specify the trace_export_url. diff --git a/fe/fe-core/src/main/cup/sql_parser.cup b/fe/fe-core/src/main/cup/sql_parser.cup index cc07ea2847fc40..3d4aad05660730 100644 --- a/fe/fe-core/src/main/cup/sql_parser.cup +++ b/fe/fe-core/src/main/cup/sql_parser.cup @@ -929,7 +929,6 @@ nonterminal MVRefreshInfo.RefreshMethod opt_refresh_method; nonterminal MVRefreshTriggerInfo opt_refresh_trigger; nonterminal MVRefreshInfo opt_mv_refersh_info; nonterminal PartitionDesc opt_mv_partition; -nonterminal Boolean opt_sync; precedence nonassoc COMMA; @@ -2779,7 +2778,7 @@ show_create_reporitory_stmt ::= // analyze statment analyze_stmt ::= // statistics - KW_ANALYZE opt_sync:sync KW_TABLE table_name:tbl opt_col_list:cols + KW_ANALYZE KW_TABLE table_name:tbl opt_col_list:cols opt_with_analysis_properties:withAnalysisProperties opt_properties:properties {: if (properties == null) { @@ -2788,22 +2787,31 @@ analyze_stmt ::= for (Map property : withAnalysisProperties) { properties.putAll(property); } - if (!properties.containsKey("sync")) { - properties.put("sync", String.valueOf(sync)); + // Rule: If no type is specified, see if there is a specified column + if (!properties.containsKey("analysis.type")) { + properties.put("analysis.type", "FUNDAMENTALS"); + } + AnalyzeProperties analyzeProperties= new AnalyzeProperties(properties); + RESULT = new AnalyzeTblStmt(tbl, cols, analyzeProperties); + :} + | KW_ANALYZE KW_DATABASE ident:ctlName DOT ident:dbName + opt_with_analysis_properties:withAnalysisProperties opt_properties:properties + {: + if (properties == null) { + properties = Maps.newHashMap(); + } + for (Map property : withAnalysisProperties) { + properties.putAll(property); } // Rule: If no type is specified, see if there is a specified column if (!properties.containsKey("analysis.type")) { - if ((cols == null)) { - properties.put("analysis.type", "INDEX"); - } else { - properties.put("analysis.type", "COLUMN"); - } + properties.put("analysis.type", "FUNDAMENTALS"); } - RESULT = new AnalyzeStmt(tbl, cols, properties); + AnalyzeProperties analyzeProperties= new AnalyzeProperties(properties); + RESULT = new AnalyzeDBStmt(ctlName, dbName, analyzeProperties); :} - // histogram - | KW_ANALYZE opt_sync:sync KW_TABLE table_name:tbl opt_col_list:cols KW_UPDATE KW_HISTOGRAM - opt_with_analysis_properties:withAnalysisProperties opt_properties:properties + | KW_ANALYZE KW_DATABASE ident:dbName + opt_with_analysis_properties:withAnalysisProperties opt_properties:properties {: if (properties == null) { properties = Maps.newHashMap(); @@ -2811,12 +2819,12 @@ analyze_stmt ::= for (Map property : withAnalysisProperties) { properties.putAll(property); } - if (!properties.containsKey("sync")) { - properties.put("sync", String.valueOf(sync)); + // Rule: If no type is specified, see if there is a specified column + if (!properties.containsKey("analysis.type")) { + properties.put("analysis.type", "FUNDAMENTALS"); } - // TODO: Support materialized view - properties.put("analysis.type", "HISTOGRAM"); - RESULT = new AnalyzeStmt(tbl, cols, properties); + AnalyzeProperties analyzeProperties= new AnalyzeProperties(properties); + RESULT = new AnalyzeDBStmt(null, dbName, analyzeProperties); :} ; @@ -3002,6 +3010,10 @@ drop_stmt ::= {: RESULT = new DropStatsStmt(true); :} + | KW_DROP KW_ANALYZE KW_JOB INTEGER_LITERAL:job_id + {: + RESULT = new DropAnalyzeJobStmt(job_id); + :} ; // Recover statement @@ -3997,6 +4009,10 @@ show_param ::= {: RESULT = new ShowAnalyzeStmt(jobId, parser.where, orderByClause, limitClause); :} + | KW_ANALYZE KW_TASK KW_STATUS INTEGER_LITERAL:jobId + {: + RESULT = new ShowAnalyzeTaskStatus(jobId); + :} | KW_CATALOG KW_RECYCLE KW_BIN opt_wild_where {: RESULT = new ShowCatalogRecycleBinStmt(parser.where); @@ -5801,6 +5817,12 @@ with_analysis_properties ::= put("period.seconds", String.valueOf(periodInSec.intValue())); }}; :} + | KW_HISTOGRAM + {: + RESULT = new HashMap() {{ + put("analysis.type", "HISTOGRAM"); + }}; + :} ; opt_with_analysis_properties ::= @@ -7093,16 +7115,6 @@ type_func_name_keyword ::= {: RESULT = id; :} ; -opt_sync ::= - {: - RESULT = false; - :} - | KW_SYNC - {: - RESULT = true; - :} - ; - // Keyword that we allow for identifiers keyword ::= KW_AFTER:id diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeDBStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeDBStmt.java new file mode 100644 index 00000000000000..db155f07ae9795 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeDBStmt.java @@ -0,0 +1,61 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.analysis; + +import org.apache.doris.catalog.DatabaseIf; +import org.apache.doris.catalog.Env; +import org.apache.doris.catalog.TableIf; +import org.apache.doris.cluster.ClusterNamespace; +import org.apache.doris.common.AnalysisException; +import org.apache.doris.common.UserException; +import org.apache.doris.datasource.CatalogIf; +import org.apache.doris.qe.ConnectContext; + +public class AnalyzeDBStmt extends AnalyzeStmt { + + private final String ctlName; + private final String dbName; + + private CatalogIf ctlIf; + + private DatabaseIf db; + + public AnalyzeDBStmt(String ctlName, String dbName, AnalyzeProperties analyzeProperties) { + super(analyzeProperties); + this.ctlName = ctlName; + this.dbName = ConnectContext.get().getClusterName() + ClusterNamespace.CLUSTER_DELIMITER + dbName; + } + + @Override + public void analyze(Analyzer analyzer) throws AnalysisException, UserException { + if (ctlName == null) { + ctlIf = Env.getCurrentInternalCatalog(); + } else { + ctlIf = Env.getCurrentEnv().getCatalogMgr().getCatalogOrAnalysisException(ctlName); + } + db = ctlIf.getDbOrAnalysisException(dbName); + } + + public CatalogIf getCtlIf() { + return ctlIf; + } + + public DatabaseIf getDb() { + return db; + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeProperties.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeProperties.java new file mode 100644 index 00000000000000..ccb122bc26986c --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeProperties.java @@ -0,0 +1,246 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.analysis; + +import org.apache.doris.common.AnalysisException; +import org.apache.doris.common.util.PrintableMap; +import org.apache.doris.statistics.AnalysisInfo.AnalysisType; + +import com.google.common.collect.ImmutableSet; +import org.apache.commons.lang3.StringUtils; + +import java.util.Map; +import java.util.Optional; +import java.util.concurrent.TimeUnit; + +public class AnalyzeProperties { + + private final Map properties; + + public static final String PROPERTY_SYNC = "sync"; + public static final String PROPERTY_INCREMENTAL = "incremental"; + public static final String PROPERTY_AUTOMATIC = "automatic"; + public static final String PROPERTY_SAMPLE_PERCENT = "sample.percent"; + public static final String PROPERTY_SAMPLE_ROWS = "sample.rows"; + public static final String PROPERTY_NUM_BUCKETS = "num.buckets"; + public static final String PROPERTY_ANALYSIS_TYPE = "analysis.type"; + public static final String PROPERTY_PERIOD_SECONDS = "period.seconds"; + + private static final ImmutableSet PROPERTIES_SET = new ImmutableSet.Builder() + .add(PROPERTY_SYNC) + .add(PROPERTY_INCREMENTAL) + .add(PROPERTY_AUTOMATIC) + .add(PROPERTY_SAMPLE_PERCENT) + .add(PROPERTY_SAMPLE_ROWS) + .add(PROPERTY_NUM_BUCKETS) + .add(PROPERTY_ANALYSIS_TYPE) + .add(PROPERTY_PERIOD_SECONDS) + .build(); + + public AnalyzeProperties(Map properties) { + this.properties = properties; + } + + public void check() throws AnalysisException { + String msgTemplate = "%s = %s is invalid property"; + Optional optional = properties.keySet().stream().filter( + entity -> !PROPERTIES_SET.contains(entity)).findFirst(); + + if (optional.isPresent()) { + String msg = String.format(msgTemplate, optional.get(), properties.get(optional.get())); + throw new AnalysisException(msg); + } + checkSampleValue(); + checkPeriodSeconds(); + checkNumBuckets(); + checkSync(msgTemplate); + checkAnalysisMode(msgTemplate); + checkAnalysisType(msgTemplate); + checkScheduleType(msgTemplate); + } + + public boolean isSync() { + return Boolean.parseBoolean(properties.get(PROPERTY_SYNC)); + } + + public boolean isIncremental() { + return Boolean.parseBoolean(properties.get(PROPERTY_INCREMENTAL)); + } + + public boolean isAutomatic() { + return Boolean.parseBoolean(properties.get(PROPERTY_AUTOMATIC)); + } + + public int getSamplePercent() { + if (!properties.containsKey(PROPERTY_SAMPLE_PERCENT)) { + return 0; + } + return Integer.parseInt(properties.get(PROPERTY_SAMPLE_PERCENT)); + } + + public int getSampleRows() { + if (!properties.containsKey(PROPERTY_SAMPLE_ROWS)) { + return 0; + } + return Integer.parseInt(properties.get(PROPERTY_SAMPLE_ROWS)); + } + + public int getNumBuckets() { + if (!properties.containsKey(PROPERTY_NUM_BUCKETS)) { + return 0; + } + return Integer.parseInt(properties.get(PROPERTY_NUM_BUCKETS)); + } + + public long getPeriodTimeInMs() { + if (!properties.containsKey(PROPERTY_PERIOD_SECONDS)) { + return 0; + } + int minutes = Integer.parseInt(properties.get(PROPERTY_PERIOD_SECONDS)); + return TimeUnit.SECONDS.toMillis(minutes); + } + + private void checkPeriodSeconds() throws AnalysisException { + if (properties.containsKey(PROPERTY_PERIOD_SECONDS)) { + checkNumericProperty(PROPERTY_PERIOD_SECONDS, properties.get(PROPERTY_PERIOD_SECONDS), + 1, Integer.MAX_VALUE, true, "needs at least 1 seconds"); + } + } + + private void checkSampleValue() throws AnalysisException { + if (properties.containsKey(PROPERTY_SAMPLE_PERCENT) + && properties.containsKey(PROPERTY_SAMPLE_ROWS)) { + throw new AnalysisException("only one sampling parameter can be specified simultaneously"); + } + + if (properties.containsKey(PROPERTY_SAMPLE_PERCENT)) { + checkNumericProperty(PROPERTY_SAMPLE_PERCENT, properties.get(PROPERTY_SAMPLE_PERCENT), + 1, 100, true, "should be >= 1 and <= 100"); + } + + if (properties.containsKey(PROPERTY_SAMPLE_ROWS)) { + checkNumericProperty(PROPERTY_SAMPLE_ROWS, properties.get(PROPERTY_SAMPLE_ROWS), + 0, Integer.MAX_VALUE, false, "needs at least 1 row"); + } + } + + private void checkNumBuckets() throws AnalysisException { + if (properties.containsKey(PROPERTY_NUM_BUCKETS)) { + checkNumericProperty(PROPERTY_NUM_BUCKETS, properties.get(PROPERTY_NUM_BUCKETS), + 1, Integer.MAX_VALUE, true, "needs at least 1 buckets"); + } + + if (properties.containsKey(PROPERTY_NUM_BUCKETS) + && AnalysisType.valueOf(properties.get(PROPERTY_ANALYSIS_TYPE)) != AnalysisType.HISTOGRAM) { + throw new AnalysisException(PROPERTY_NUM_BUCKETS + " can only be specified when collecting histograms"); + } + } + + private void checkSync(String msgTemplate) throws AnalysisException { + if (properties.containsKey(PROPERTY_SYNC)) { + try { + Boolean.valueOf(properties.get(PROPERTY_SYNC)); + } catch (NumberFormatException e) { + String msg = String.format(msgTemplate, PROPERTY_SYNC, properties.get(PROPERTY_SYNC)); + throw new AnalysisException(msg); + } + } + } + + private void checkAnalysisMode(String msgTemplate) throws AnalysisException { + if (properties.containsKey(PROPERTY_INCREMENTAL)) { + try { + Boolean.valueOf(properties.get(PROPERTY_INCREMENTAL)); + } catch (NumberFormatException e) { + String msg = String.format(msgTemplate, PROPERTY_INCREMENTAL, properties.get(PROPERTY_INCREMENTAL)); + throw new AnalysisException(msg); + } + } + if (properties.containsKey(PROPERTY_INCREMENTAL) + && AnalysisType.valueOf(properties.get(PROPERTY_ANALYSIS_TYPE)) == AnalysisType.HISTOGRAM) { + throw new AnalysisException(PROPERTY_INCREMENTAL + " analysis of histograms is not supported"); + } + } + + private void checkAnalysisType(String msgTemplate) throws AnalysisException { + if (properties.containsKey(PROPERTY_ANALYSIS_TYPE)) { + try { + AnalysisType.valueOf(properties.get(PROPERTY_ANALYSIS_TYPE)); + } catch (NumberFormatException e) { + String msg = String.format(msgTemplate, PROPERTY_ANALYSIS_TYPE, properties.get(PROPERTY_ANALYSIS_TYPE)); + throw new AnalysisException(msg); + } + } + } + + private void checkScheduleType(String msgTemplate) throws AnalysisException { + if (properties.containsKey(PROPERTY_AUTOMATIC)) { + try { + Boolean.valueOf(properties.get(PROPERTY_AUTOMATIC)); + } catch (NumberFormatException e) { + String msg = String.format(msgTemplate, PROPERTY_AUTOMATIC, properties.get(PROPERTY_AUTOMATIC)); + throw new AnalysisException(msg); + } + } + if (properties.containsKey(PROPERTY_AUTOMATIC) + && properties.containsKey(PROPERTY_INCREMENTAL)) { + throw new AnalysisException(PROPERTY_INCREMENTAL + " is invalid when analyze automatically statistics"); + } + if (properties.containsKey(PROPERTY_AUTOMATIC) + && properties.containsKey(PROPERTY_PERIOD_SECONDS)) { + throw new AnalysisException(PROPERTY_PERIOD_SECONDS + " is invalid when analyze automatically statistics"); + } + } + + private void checkNumericProperty(String key, String value, int lowerBound, int upperBound, + boolean includeBoundary, String errorMsg) throws AnalysisException { + if (!StringUtils.isNumeric(value)) { + String msg = String.format("%s = %s is an invalid property.", key, value); + throw new AnalysisException(msg); + } + int intValue = Integer.parseInt(value); + boolean isOutOfBounds = (includeBoundary && (intValue < lowerBound || intValue > upperBound)) + || (!includeBoundary && (intValue <= lowerBound || intValue >= upperBound)); + if (isOutOfBounds) { + throw new AnalysisException(key + " " + errorMsg); + } + } + + public boolean isSample() { + return properties.containsKey(PROPERTY_SAMPLE_PERCENT) + || properties.containsKey(PROPERTY_SAMPLE_ROWS); + } + + public String toSQL() { + StringBuilder sb = new StringBuilder(); + sb.append("PROPERTIES("); + sb.append(new PrintableMap<>(properties, " = ", + true, + false)); + sb.append(")"); + return sb.toString(); + } + + public Map getProperties() { + return properties; + } + + public AnalysisType getAnalysisType() { + return AnalysisType.valueOf(properties.get(PROPERTY_ANALYSIS_TYPE)); + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeStmt.java index a8d004f199e9b2..f1688661d05c91 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeStmt.java @@ -17,431 +17,68 @@ package org.apache.doris.analysis; -import org.apache.doris.catalog.Column; -import org.apache.doris.catalog.Database; -import org.apache.doris.catalog.DatabaseIf; -import org.apache.doris.catalog.Env; -import org.apache.doris.catalog.OlapTable; -import org.apache.doris.catalog.TableIf; -import org.apache.doris.catalog.View; -import org.apache.doris.common.AnalysisException; -import org.apache.doris.common.Config; -import org.apache.doris.common.ErrorCode; -import org.apache.doris.common.ErrorReport; -import org.apache.doris.common.FeNameFormat; -import org.apache.doris.common.UserException; -import org.apache.doris.common.util.PrintableMap; -import org.apache.doris.datasource.CatalogIf; -import org.apache.doris.mysql.privilege.PrivPredicate; -import org.apache.doris.qe.ConnectContext; -import org.apache.doris.statistics.AnalysisTaskInfo.AnalysisMethod; -import org.apache.doris.statistics.AnalysisTaskInfo.AnalysisMode; -import org.apache.doris.statistics.AnalysisTaskInfo.AnalysisType; -import org.apache.doris.statistics.AnalysisTaskInfo.ScheduleType; +import org.apache.doris.statistics.AnalysisInfo.AnalysisMethod; +import org.apache.doris.statistics.AnalysisInfo.AnalysisMode; +import org.apache.doris.statistics.AnalysisInfo.AnalysisType; +import org.apache.doris.statistics.AnalysisInfo.ScheduleType; -import com.google.common.collect.ImmutableSet; -import com.google.common.collect.Sets; -import org.apache.commons.lang3.StringUtils; - -import java.util.List; import java.util.Map; -import java.util.Optional; -import java.util.Set; -import java.util.concurrent.TimeUnit; -import java.util.stream.Collectors; -/** - * Column Statistics Collection Syntax: - * ANALYZE [ SYNC ] TABLE table_name - * [ (column_name [, ...]) ] - * [ [WITH SYNC] | [WITH INCREMENTAL] | [WITH SAMPLE PERCENT | ROWS ] ] - * [ PROPERTIES ('key' = 'value', ...) ]; - * - * Column histogram collection syntax: - * ANALYZE [ SYNC ] TABLE table_name - * [ (column_name [, ...]) ] - * UPDATE HISTOGRAM - * [ [ WITH SYNC ][ WITH INCREMENTAL ][ WITH SAMPLE PERCENT | ROWS ][ WITH BUCKETS ] ] - * [ PROPERTIES ('key' = 'value', ...) ]; - * - * Illustrate: - * - sync:Collect statistics synchronously. Return after collecting. - * - incremental:Collect statistics incrementally. Incremental collection of histogram statistics is not supported. - * - sample percent | rows:Collect statistics by sampling. Scale and number of rows can be sampled. - * - buckets:Specifies the maximum number of buckets generated when collecting histogram statistics. - * - table_name: The purpose table for collecting statistics. Can be of the form `db_name.table_name`. - * - column_name: The specified destination column must be a column that exists in `table_name`, - * and multiple column names are separated by commas. - * - properties:Properties used to set statistics tasks. Currently only the following configurations - * are supported (equivalent to the with statement) - * - 'sync' = 'true' - * - 'incremental' = 'true' - * - 'sample.percent' = '50' - * - 'sample.rows' = '1000' - * - 'num.buckets' = 10 - */ public class AnalyzeStmt extends DdlStmt { - // The properties passed in by the user through "with" or "properties('K', 'V')" - public static final String PROPERTY_SYNC = "sync"; - public static final String PROPERTY_INCREMENTAL = "incremental"; - public static final String PROPERTY_AUTOMATIC = "automatic"; - public static final String PROPERTY_SAMPLE_PERCENT = "sample.percent"; - public static final String PROPERTY_SAMPLE_ROWS = "sample.rows"; - public static final String PROPERTY_NUM_BUCKETS = "num.buckets"; - public static final String PROPERTY_ANALYSIS_TYPE = "analysis.type"; - public static final String PROPERTY_PERIOD_SECONDS = "period.seconds"; - - private static final ImmutableSet PROPERTIES_SET = new ImmutableSet.Builder() - .add(PROPERTY_SYNC) - .add(PROPERTY_INCREMENTAL) - .add(PROPERTY_AUTOMATIC) - .add(PROPERTY_SAMPLE_PERCENT) - .add(PROPERTY_SAMPLE_ROWS) - .add(PROPERTY_NUM_BUCKETS) - .add(PROPERTY_ANALYSIS_TYPE) - .add(PROPERTY_PERIOD_SECONDS) - .build(); - - private final TableName tableName; - private final List columnNames; - private final Map properties; - - // after analyzed - private long dbId; - private TableIf table; - - public AnalyzeStmt(TableName tableName, - List columnNames, - Map properties) { - this.tableName = tableName; - this.columnNames = columnNames; - this.properties = properties; - } - - @Override - @SuppressWarnings({"rawtypes"}) - public void analyze(Analyzer analyzer) throws UserException { - if (!Config.enable_stats) { - throw new UserException("Analyze function is forbidden, you should add `enable_stats=true`" - + "in your FE conf file"); - } - super.analyze(analyzer); - - tableName.analyze(analyzer); - - String catalogName = tableName.getCtl(); - String dbName = tableName.getDb(); - String tblName = tableName.getTbl(); - CatalogIf catalog = analyzer.getEnv().getCatalogMgr() - .getCatalogOrAnalysisException(catalogName); - DatabaseIf db = catalog.getDbOrAnalysisException(dbName); - dbId = db.getId(); - table = db.getTableOrAnalysisException(tblName); - if (table instanceof View) { - throw new AnalysisException("Analyze view is not allowed"); - } - checkAnalyzePriv(dbName, tblName); - - if (columnNames != null && !columnNames.isEmpty()) { - table.readLock(); - try { - List baseSchema = table.getBaseSchema(false) - .stream().map(Column::getName).collect(Collectors.toList()); - Optional optional = columnNames.stream() - .filter(entity -> !baseSchema.contains(entity)).findFirst(); - if (optional.isPresent()) { - String columnName = optional.get(); - ErrorReport.reportAnalysisException(ErrorCode.ERR_WRONG_COLUMN_NAME, - columnName, FeNameFormat.getColumnNameRegex()); - } - } finally { - table.readUnlock(); - } - } - - checkProperties(); - - // TODO support external table - if (properties.containsKey(PROPERTY_SAMPLE_PERCENT) - || properties.containsKey(PROPERTY_SAMPLE_ROWS)) { - if (!(table instanceof OlapTable)) { - throw new AnalysisException("Sampling statistics " - + "collection of external tables is not supported"); - } - } - } - - @Override - public RedirectStatus getRedirectStatus() { - return RedirectStatus.FORWARD_NO_SYNC; - } - - private void checkAnalyzePriv(String dbName, String tblName) throws AnalysisException { - if (!Env.getCurrentEnv().getAccessManager() - .checkTblPriv(ConnectContext.get(), dbName, tblName, PrivPredicate.SELECT)) { - ErrorReport.reportAnalysisException( - ErrorCode.ERR_TABLEACCESS_DENIED_ERROR, - "ANALYZE", - ConnectContext.get().getQualifiedUser(), - ConnectContext.get().getRemoteIP(), - dbName + ": " + tblName); - } - } - - private void checkProperties() throws UserException { - if (properties == null || properties.isEmpty()) { - throw new AnalysisException("analysis properties should not be empty"); - } - - String msgTemplate = "%s = %s is invalid property"; - Optional optional = properties.keySet().stream().filter( - entity -> !PROPERTIES_SET.contains(entity)).findFirst(); - - if (optional.isPresent()) { - String msg = String.format(msgTemplate, optional.get(), properties.get(optional.get())); - throw new AnalysisException(msg); - } - checkSampleValue(); - checkPeriodSeconds(); - checkNumBuckets(); - checkSync(msgTemplate); - checkAnalysisMode(msgTemplate); - checkAnalysisType(msgTemplate); - checkScheduleType(msgTemplate); - } + protected AnalyzeProperties analyzeProperties; - private void checkPeriodSeconds() throws AnalysisException { - if (properties.containsKey(PROPERTY_PERIOD_SECONDS)) { - checkNumericProperty(PROPERTY_PERIOD_SECONDS, properties.get(PROPERTY_PERIOD_SECONDS), - 1, Integer.MAX_VALUE, true, "needs at least 1 seconds"); - } + public AnalyzeStmt(AnalyzeProperties analyzeProperties) { + this.analyzeProperties = analyzeProperties; } - private void checkSampleValue() throws AnalysisException { - if (properties.containsKey(PROPERTY_SAMPLE_PERCENT) - && properties.containsKey(PROPERTY_SAMPLE_ROWS)) { - throw new AnalysisException("only one sampling parameter can be specified simultaneously"); - } - - if (properties.containsKey(PROPERTY_SAMPLE_PERCENT)) { - checkNumericProperty(PROPERTY_SAMPLE_PERCENT, properties.get(PROPERTY_SAMPLE_PERCENT), - 1, 100, true, "should be >= 1 and <= 100"); - } - - if (properties.containsKey(PROPERTY_SAMPLE_ROWS)) { - checkNumericProperty(PROPERTY_SAMPLE_ROWS, properties.get(PROPERTY_SAMPLE_ROWS), - 0, Integer.MAX_VALUE, false, "needs at least 1 row"); - } - } - - private void checkNumBuckets() throws AnalysisException { - if (properties.containsKey(PROPERTY_NUM_BUCKETS)) { - checkNumericProperty(PROPERTY_NUM_BUCKETS, properties.get(PROPERTY_NUM_BUCKETS), - 1, Integer.MAX_VALUE, true, "needs at least 1 buckets"); - } - - if (properties.containsKey(PROPERTY_NUM_BUCKETS) - && AnalysisType.valueOf(properties.get(PROPERTY_ANALYSIS_TYPE)) != AnalysisType.HISTOGRAM) { - throw new AnalysisException(PROPERTY_NUM_BUCKETS + " can only be specified when collecting histograms"); - } - } - private void checkSync(String msgTemplate) throws AnalysisException { - if (properties.containsKey(PROPERTY_SYNC)) { - try { - Boolean.valueOf(properties.get(PROPERTY_SYNC)); - } catch (NumberFormatException e) { - String msg = String.format(msgTemplate, PROPERTY_SYNC, properties.get(PROPERTY_SYNC)); - throw new AnalysisException(msg); - } - } + public Map getProperties() { + return analyzeProperties.getProperties(); } - private void checkAnalysisMode(String msgTemplate) throws AnalysisException { - if (properties.containsKey(PROPERTY_INCREMENTAL)) { - try { - Boolean.valueOf(properties.get(PROPERTY_INCREMENTAL)); - } catch (NumberFormatException e) { - String msg = String.format(msgTemplate, PROPERTY_INCREMENTAL, properties.get(PROPERTY_INCREMENTAL)); - throw new AnalysisException(msg); - } - } - if (properties.containsKey(PROPERTY_INCREMENTAL) - && AnalysisType.valueOf(properties.get(PROPERTY_ANALYSIS_TYPE)) == AnalysisType.HISTOGRAM) { - throw new AnalysisException(PROPERTY_INCREMENTAL + " analysis of histograms is not supported"); - } + public AnalysisMode getAnalysisMode() { + return analyzeProperties.isIncremental() ? AnalysisMode.INCREMENTAL : AnalysisMode.FULL; } - private void checkAnalysisType(String msgTemplate) throws AnalysisException { - if (properties.containsKey(PROPERTY_ANALYSIS_TYPE)) { - try { - AnalysisType.valueOf(properties.get(PROPERTY_ANALYSIS_TYPE)); - } catch (NumberFormatException e) { - String msg = String.format(msgTemplate, PROPERTY_ANALYSIS_TYPE, properties.get(PROPERTY_ANALYSIS_TYPE)); - throw new AnalysisException(msg); - } - } + public AnalysisType getAnalysisType() { + return analyzeProperties.getAnalysisType(); } - private void checkScheduleType(String msgTemplate) throws AnalysisException { - if (properties.containsKey(PROPERTY_AUTOMATIC)) { - try { - Boolean.valueOf(properties.get(PROPERTY_AUTOMATIC)); - } catch (NumberFormatException e) { - String msg = String.format(msgTemplate, PROPERTY_AUTOMATIC, properties.get(PROPERTY_AUTOMATIC)); - throw new AnalysisException(msg); - } - } - if (properties.containsKey(PROPERTY_AUTOMATIC) - && properties.containsKey(PROPERTY_INCREMENTAL)) { - throw new AnalysisException(PROPERTY_INCREMENTAL + " is invalid when analyze automatically statistics"); - } - if (properties.containsKey(PROPERTY_AUTOMATIC) - && properties.containsKey(PROPERTY_PERIOD_SECONDS)) { - throw new AnalysisException(PROPERTY_PERIOD_SECONDS + " is invalid when analyze automatically statistics"); - } + public AnalysisMethod getAnalysisMethod() { + double samplePercent = analyzeProperties.getSamplePercent(); + int sampleRows = analyzeProperties.getSampleRows(); + return (samplePercent > 0 || sampleRows > 0) ? AnalysisMethod.SAMPLE : AnalysisMethod.FULL; } - private void checkNumericProperty(String key, String value, int lowerBound, int upperBound, - boolean includeBoundary, String errorMsg) throws AnalysisException { - if (!StringUtils.isNumeric(value)) { - String msg = String.format("%s = %s is an invalid property.", key, value); - throw new AnalysisException(msg); - } - int intValue = Integer.parseInt(value); - boolean isOutOfBounds = (includeBoundary && (intValue < lowerBound || intValue > upperBound)) - || (!includeBoundary && (intValue <= lowerBound || intValue >= upperBound)); - if (isOutOfBounds) { - throw new AnalysisException(key + " " + errorMsg); + public ScheduleType getScheduleType() { + if (analyzeProperties.isAutomatic()) { + return ScheduleType.AUTOMATIC; } - } - - public String getCatalogName() { - return tableName.getCtl(); - } - - public long getDbId() { - return dbId; - } - - public String getDBName() { - return tableName.getDb(); - } - - public Database getDb() throws AnalysisException { - return analyzer.getEnv().getInternalCatalog().getDbOrAnalysisException(dbId); - } - - public TableIf getTable() { - return table; - } - - public TableName getTblName() { - return tableName; - } - - public Set getColumnNames() { - return columnNames == null ? table.getBaseSchema(false) - .stream().map(Column::getName).collect(Collectors.toSet()) : Sets.newHashSet(columnNames); - } - - public Map getProperties() { - return properties; + return analyzeProperties.getPeriodTimeInMs() > 0 ? ScheduleType.PERIOD : ScheduleType.ONCE; } public boolean isSync() { - return Boolean.parseBoolean(properties.get(PROPERTY_SYNC)); - } - - public boolean isIncremental() { - return Boolean.parseBoolean(properties.get(PROPERTY_INCREMENTAL)); - } - - public boolean isAutomatic() { - return Boolean.parseBoolean(properties.get(PROPERTY_AUTOMATIC)); + return analyzeProperties.isSync(); } public int getSamplePercent() { - if (!properties.containsKey(PROPERTY_SAMPLE_PERCENT)) { - return 0; - } - return Integer.parseInt(properties.get(PROPERTY_SAMPLE_PERCENT)); + return analyzeProperties.getSamplePercent(); } public int getSampleRows() { - if (!properties.containsKey(PROPERTY_SAMPLE_ROWS)) { - return 0; - } - return Integer.parseInt(properties.get(PROPERTY_SAMPLE_ROWS)); + return analyzeProperties.getSampleRows(); } public int getNumBuckets() { - if (!properties.containsKey(PROPERTY_NUM_BUCKETS)) { - return 0; - } - return Integer.parseInt(properties.get(PROPERTY_NUM_BUCKETS)); + return analyzeProperties.getNumBuckets(); } public long getPeriodTimeInMs() { - if (!properties.containsKey(PROPERTY_PERIOD_SECONDS)) { - return 0; - } - int minutes = Integer.parseInt(properties.get(PROPERTY_PERIOD_SECONDS)); - return TimeUnit.SECONDS.toMillis(minutes); - } - - public AnalysisMode getAnalysisMode() { - return isIncremental() ? AnalysisMode.INCREMENTAL : AnalysisMode.FULL; - } - - public AnalysisType getAnalysisType() { - return AnalysisType.valueOf(properties.get(PROPERTY_ANALYSIS_TYPE)); - } - - public AnalysisMethod getAnalysisMethod() { - double samplePercent = getSamplePercent(); - int sampleRows = getSampleRows(); - return (samplePercent > 0 || sampleRows > 0) ? AnalysisMethod.SAMPLE : AnalysisMethod.FULL; - } - - public ScheduleType getScheduleType() { - if (isAutomatic()) { - return ScheduleType.AUTOMATIC; - } - return getPeriodTimeInMs() > 0 ? ScheduleType.PERIOD : ScheduleType.ONCE; + return analyzeProperties.getPeriodTimeInMs(); } - @Override - public String toSql() { - StringBuilder sb = new StringBuilder(); - sb.append("ANALYZE TABLE "); - - if (tableName != null) { - sb.append(" "); - sb.append(tableName.toSql()); - } - - if (columnNames != null) { - sb.append("("); - sb.append(StringUtils.join(columnNames, ",")); - sb.append(")"); - } - - if (getAnalysisType().equals(AnalysisType.HISTOGRAM)) { - sb.append(" "); - sb.append("UPDATE HISTOGRAM"); - } - - if (properties != null) { - sb.append(" "); - sb.append("PROPERTIES("); - sb.append(new PrintableMap<>(properties, " = ", - true, - false)); - sb.append(")"); - } - - return sb.toString(); + public AnalyzeProperties getAnalyzeProperties() { + return analyzeProperties; } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeTblStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeTblStmt.java new file mode 100644 index 00000000000000..0e719f349cc972 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeTblStmt.java @@ -0,0 +1,234 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.analysis; + +import org.apache.doris.catalog.Column; +import org.apache.doris.catalog.Database; +import org.apache.doris.catalog.DatabaseIf; +import org.apache.doris.catalog.Env; +import org.apache.doris.catalog.OlapTable; +import org.apache.doris.catalog.TableIf; +import org.apache.doris.catalog.View; +import org.apache.doris.common.AnalysisException; +import org.apache.doris.common.Config; +import org.apache.doris.common.ErrorCode; +import org.apache.doris.common.ErrorReport; +import org.apache.doris.common.FeNameFormat; +import org.apache.doris.common.UserException; +import org.apache.doris.datasource.CatalogIf; +import org.apache.doris.mysql.privilege.PrivPredicate; +import org.apache.doris.qe.ConnectContext; +import org.apache.doris.statistics.AnalysisInfo.AnalysisType; + +import com.google.common.collect.Sets; +import org.apache.commons.lang3.StringUtils; + +import java.util.List; +import java.util.Optional; +import java.util.Set; +import java.util.stream.Collectors; + +/** + * Column Statistics Collection Syntax: + * ANALYZE [ SYNC ] TABLE table_name + * [ (column_name [, ...]) ] + * [ [WITH SYNC] | [WITH INCREMENTAL] | [WITH SAMPLE PERCENT | ROWS ] ] + * [ PROPERTIES ('key' = 'value', ...) ]; + *

+ * Column histogram collection syntax: + * ANALYZE [ SYNC ] TABLE table_name + * [ (column_name [, ...]) ] + * UPDATE HISTOGRAM + * [ [ WITH SYNC ][ WITH INCREMENTAL ][ WITH SAMPLE PERCENT | ROWS ][ WITH BUCKETS ] ] + * [ PROPERTIES ('key' = 'value', ...) ]; + *

+ * Illustrate: + * - sync:Collect statistics synchronously. Return after collecting. + * - incremental:Collect statistics incrementally. Incremental collection of histogram statistics is not supported. + * - sample percent | rows:Collect statistics by sampling. Scale and number of rows can be sampled. + * - buckets:Specifies the maximum number of buckets generated when collecting histogram statistics. + * - table_name: The purpose table for collecting statistics. Can be of the form `db_name.table_name`. + * - column_name: The specified destination column must be a column that exists in `table_name`, + * and multiple column names are separated by commas. + * - properties:Properties used to set statistics tasks. Currently only the following configurations + * are supported (equivalent to the with statement) + * - 'sync' = 'true' + * - 'incremental' = 'true' + * - 'sample.percent' = '50' + * - 'sample.rows' = '1000' + * - 'num.buckets' = 10 + */ +public class AnalyzeTblStmt extends AnalyzeStmt { + // The properties passed in by the user through "with" or "properties('K', 'V')" + + private final TableName tableName; + private final List columnNames; + + // after analyzed + private long dbId; + private TableIf table; + + public AnalyzeTblStmt(TableName tableName, + List columnNames, + AnalyzeProperties properties) { + super(properties); + this.tableName = tableName; + this.columnNames = columnNames; + this.analyzeProperties = properties; + } + + public AnalyzeTblStmt(AnalyzeProperties analyzeProperties, TableName tableName, List columnNames, long dbId, + TableIf table) { + super(analyzeProperties); + this.tableName = tableName; + this.columnNames = columnNames; + this.dbId = dbId; + this.table = table; + } + + @Override + @SuppressWarnings({"rawtypes"}) + public void analyze(Analyzer analyzer) throws UserException { + if (!Config.enable_stats) { + throw new UserException("Analyze function is forbidden, you should add `enable_stats=true`" + + "in your FE conf file"); + } + super.analyze(analyzer); + + tableName.analyze(analyzer); + + String catalogName = tableName.getCtl(); + String dbName = tableName.getDb(); + String tblName = tableName.getTbl(); + CatalogIf catalog = analyzer.getEnv().getCatalogMgr() + .getCatalogOrAnalysisException(catalogName); + DatabaseIf db = catalog.getDbOrAnalysisException(dbName); + dbId = db.getId(); + table = db.getTableOrAnalysisException(tblName); + check(); + } + + public void check() throws AnalysisException { + if (table instanceof View) { + throw new AnalysisException("Analyze view is not allowed"); + } + checkAnalyzePriv(tableName.getDb(), tableName.getTbl()); + + if (columnNames != null && !columnNames.isEmpty()) { + table.readLock(); + try { + List baseSchema = table.getBaseSchema(false) + .stream().map(Column::getName).collect(Collectors.toList()); + Optional optional = columnNames.stream() + .filter(entity -> !baseSchema.contains(entity)).findFirst(); + if (optional.isPresent()) { + String columnName = optional.get(); + ErrorReport.reportAnalysisException(ErrorCode.ERR_WRONG_COLUMN_NAME, + columnName, FeNameFormat.getColumnNameRegex()); + } + } finally { + table.readUnlock(); + } + } + + analyzeProperties.check(); + + // TODO support external table + if (analyzeProperties.isSample()) { + if (!(table instanceof OlapTable)) { + throw new AnalysisException("Sampling statistics " + + "collection of external tables is not supported"); + } + } + } + + public String getCatalogName() { + return tableName.getCtl(); + } + + public long getDbId() { + return dbId; + } + + public String getDBName() { + return tableName.getDb(); + } + + public TableIf getTable() { + return table; + } + + public TableName getTblName() { + return tableName; + } + + public Set getColumnNames() { + return columnNames == null ? table.getBaseSchema(false) + .stream().map(Column::getName).collect(Collectors.toSet()) : Sets.newHashSet(columnNames); + } + + @Override + public RedirectStatus getRedirectStatus() { + return RedirectStatus.FORWARD_NO_SYNC; + } + + private void checkAnalyzePriv(String dbName, String tblName) throws AnalysisException { + if (!Env.getCurrentEnv().getAccessManager() + .checkTblPriv(ConnectContext.get(), dbName, tblName, PrivPredicate.SELECT)) { + ErrorReport.reportAnalysisException( + ErrorCode.ERR_TABLEACCESS_DENIED_ERROR, + "ANALYZE", + ConnectContext.get().getQualifiedUser(), + ConnectContext.get().getRemoteIP(), + dbName + ": " + tblName); + } + } + + @Override + public String toSql() { + StringBuilder sb = new StringBuilder(); + sb.append("ANALYZE TABLE "); + + if (tableName != null) { + sb.append(" "); + sb.append(tableName.toSql()); + } + + if (columnNames != null) { + sb.append("("); + sb.append(StringUtils.join(columnNames, ",")); + sb.append(")"); + } + + if (getAnalysisType().equals(AnalysisType.HISTOGRAM)) { + sb.append(" "); + sb.append("UPDATE HISTOGRAM"); + } + + if (analyzeProperties != null) { + sb.append(" "); + sb.append(analyzeProperties.toSQL()); + } + + return sb.toString(); + } + + public Database getDb() throws AnalysisException { + return analyzer.getEnv().getInternalCatalog().getDbOrAnalysisException(dbId); + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/DropAnalyzeJobStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/DropAnalyzeJobStmt.java new file mode 100644 index 00000000000000..34a7c875edab64 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/DropAnalyzeJobStmt.java @@ -0,0 +1,34 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.analysis; + +/** + * DROP ANALYZE JOB [JOB_ID] + */ +public class DropAnalyzeJobStmt extends DdlStmt { + + private final long jobId; + + public DropAnalyzeJobStmt(long jobId) { + this.jobId = jobId; + } + + public long getJobId() { + return jobId; + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowAnalyzeStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowAnalyzeStmt.java index b91e171a03b6d3..ee646aa2a9b6de 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowAnalyzeStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowAnalyzeStmt.java @@ -72,12 +72,6 @@ public class ShowAnalyzeStmt extends ShowStmt { private Expr whereClause; private LimitElement limitElement; private List orderByElements; - - // after analyzed - private String catalogName; - private String dbName; - private String tblName; - private String stateValue; private ArrayList orderByPairs; @@ -105,32 +99,10 @@ public ShowAnalyzeStmt(Long jobId, this.limitElement = limitElement; } - public ImmutableList getTitleNames() { - return TITLE_NAMES; - } - public Long getJobId() { return jobId; } - public String getCatalogName() { - Preconditions.checkArgument(isAnalyzed(), - "The catalogName must be obtained after the parsing is complete"); - return catalogName; - } - - public String getDbName() { - Preconditions.checkArgument(isAnalyzed(), - "The dbName must be obtained after the parsing is complete"); - return dbName; - } - - public String getTblName() { - Preconditions.checkArgument(isAnalyzed(), - "The tblName must be obtained after the parsing is complete"); - return tblName; - } - public String getStateValue() { Preconditions.checkArgument(isAnalyzed(), "The stateValue must be obtained after the parsing is complete"); @@ -143,37 +115,11 @@ public ArrayList getOrderByPairs() { return orderByPairs; } - public String getWhereClause() { + public Expr getWhereClause() { Preconditions.checkArgument(isAnalyzed(), "The whereClause must be obtained after the parsing is complete"); - StringBuilder clauseBuilder = new StringBuilder(); - - if (jobId != null) { - clauseBuilder.append("job_Id = ").append(jobId); - } - - if (!Strings.isNullOrEmpty(catalogName)) { - clauseBuilder.append(clauseBuilder.length() > 0 ? " AND " : "") - .append("catalog_name = \"").append(catalogName).append("\""); - } - - if (!Strings.isNullOrEmpty(dbName)) { - clauseBuilder.append(clauseBuilder.length() > 0 ? " AND " : "") - .append("db_name = \"").append(dbName).append("\""); - } - - if (!Strings.isNullOrEmpty(tblName)) { - clauseBuilder.append(clauseBuilder.length() > 0 ? " AND " : "") - .append("tbl_name = \"").append(tblName).append("\""); - } - - if (!Strings.isNullOrEmpty(stateValue)) { - clauseBuilder.append(clauseBuilder.length() > 0 ? " AND " : "") - .append("state = \"").append(stateValue).append("\""); - } - - return clauseBuilder.toString(); + return whereClause; } public long getLimit() { @@ -190,15 +136,12 @@ public void analyze(Analyzer analyzer) throws UserException { + "in your FE conf file"); } super.analyze(analyzer); - catalogName = analyzer.getEnv().getInternalCatalog().getName(); if (dbTableName != null) { dbTableName.analyze(analyzer); String dbName = dbTableName.getDb(); String tblName = dbTableName.getTbl(); checkShowAnalyzePriv(dbName, tblName); - this.dbName = dbName; - this.tblName = tblName; } // analyze where clause if not null @@ -361,4 +304,8 @@ public String toSql() { public String toString() { return toSql(); } + + public TableName getDbTableName() { + return dbTableName; + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowAnalyzeTaskStatus.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowAnalyzeTaskStatus.java new file mode 100644 index 00000000000000..03d304f3935e13 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowAnalyzeTaskStatus.java @@ -0,0 +1,61 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.analysis; + +import org.apache.doris.catalog.Column; +import org.apache.doris.catalog.ScalarType; +import org.apache.doris.common.Config; +import org.apache.doris.common.UserException; +import org.apache.doris.qe.ShowResultSetMetaData; + +/** + * SHOW ANALYZE TASK STATUS [JOB_ID] + */ +public class ShowAnalyzeTaskStatus extends ShowStmt { + + private static final ShowResultSetMetaData ROW_META_DATA = + ShowResultSetMetaData.builder() + .addColumn(new Column("task_id", ScalarType.createVarchar(100))) + .addColumn(new Column("col_name", ScalarType.createVarchar(1000))) + .addColumn(new Column("message", ScalarType.createVarchar(1000))) + .addColumn(new Column("last_exec_time_in_ms", ScalarType.createVarchar(1000))) + .addColumn(new Column("state", ScalarType.createVarchar(1000))).build(); + + private final long jobId; + + public ShowAnalyzeTaskStatus(long jobId) { + this.jobId = jobId; + } + + @Override + public void analyze(Analyzer analyzer) throws UserException { + if (!Config.enable_stats) { + throw new UserException("Analyze function is forbidden, you should add `enable_stats=true`" + + "in your FE conf file"); + } + } + + @Override + public ShowResultSetMetaData getMetaData() { + return ROW_META_DATA; + } + + public long getJobId() { + return jobId; + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java index 11cd3f76121d20..9c00a5adc2505a 100755 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java @@ -38,7 +38,7 @@ import org.apache.doris.analysis.AlterSystemStmt; import org.apache.doris.analysis.AlterTableStmt; import org.apache.doris.analysis.AlterViewStmt; -import org.apache.doris.analysis.AnalyzeStmt; +import org.apache.doris.analysis.AnalyzeTblStmt; import org.apache.doris.analysis.BackupStmt; import org.apache.doris.analysis.CancelAlterSystemStmt; import org.apache.doris.analysis.CancelAlterTableStmt; @@ -657,11 +657,9 @@ private Env(boolean isCheckpointCatalog) { this.policyMgr = new PolicyMgr(); this.mtmvJobManager = new MTMVJobManager(); this.extMetaCacheMgr = new ExternalMetaCacheMgr(); - if (Config.enable_stats && !isCheckpointCatalog) { - this.analysisManager = new AnalysisManager(); - this.statisticsCleaner = new StatisticsCleaner(); - this.statisticsAutoAnalyzer = new StatisticsAutoAnalyzer(); - } + this.analysisManager = new AnalysisManager(); + this.statisticsCleaner = new StatisticsCleaner(); + this.statisticsAutoAnalyzer = new StatisticsAutoAnalyzer(); this.globalFunctionMgr = new GlobalFunctionMgr(); this.workloadGroupMgr = new WorkloadGroupMgr(); this.queryStats = new QueryStats(); @@ -5303,8 +5301,8 @@ public AnalysisTaskScheduler getAnalysisJobScheduler() { // 1. handle partition level analysis statement properly // 2. support sample job // 3. support period job - public void createAnalysisJob(AnalyzeStmt analyzeStmt) throws DdlException { - analysisManager.createAnalysisJob(analyzeStmt); + public void createAnalysisJob(AnalyzeTblStmt analyzeTblStmt) throws DdlException { + analysisManager.createAnalysisJob(analyzeTblStmt); } public AnalysisManager getAnalysisManager() { diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/InternalSchemaInitializer.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/InternalSchemaInitializer.java index 3abaa3b8e6ae30..c5a3197deeac3b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/InternalSchemaInitializer.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/InternalSchemaInitializer.java @@ -21,7 +21,6 @@ import org.apache.doris.analysis.CreateDbStmt; import org.apache.doris.analysis.CreateTableStmt; import org.apache.doris.analysis.DistributionDesc; -import org.apache.doris.analysis.DropTableStmt; import org.apache.doris.analysis.HashDistributionDesc; import org.apache.doris.analysis.KeysDesc; import org.apache.doris.analysis.TableName; @@ -51,18 +50,9 @@ public class InternalSchemaInitializer extends Thread { - private static final Logger LOG = LogManager.getLogger(InternalSchemaInitializer.class); - - /** - * If internal table creation failed, will retry after below seconds. - */ public static final int TABLE_CREATION_RETRY_INTERVAL_IN_SECONDS = 5; - /** - * Used when an internal table schema changes. - * TODO remove this code after the table structure is stable - */ - private boolean isSchemaChanged = false; + private static final Logger LOG = LogManager.getLogger(InternalSchemaInitializer.class); public void run() { if (FeConstants.disableInternalSchemaDb) { @@ -91,7 +81,6 @@ private void createTbl() throws UserException { Env.getCurrentEnv().getInternalCatalog().createTable(buildAnalysisTblStmt()); Env.getCurrentEnv().getInternalCatalog().createTable(buildStatisticsTblStmt()); Env.getCurrentEnv().getInternalCatalog().createTable(buildHistogramTblStmt()); - Env.getCurrentEnv().getInternalCatalog().createTable(buildAnalysisJobTblStmt()); } @VisibleForTesting @@ -215,59 +204,6 @@ public CreateTableStmt buildHistogramTblStmt() throws UserException { return createTableStmt; } - @VisibleForTesting - public CreateTableStmt buildAnalysisJobTblStmt() throws UserException { - TableName tableName = new TableName("", - FeConstants.INTERNAL_DB_NAME, StatisticConstants.ANALYSIS_JOB_TABLE); - List columnDefs = new ArrayList<>(); - columnDefs.add(new ColumnDef("job_id", TypeDef.create(PrimitiveType.BIGINT))); - columnDefs.add(new ColumnDef("task_id", TypeDef.create(PrimitiveType.BIGINT))); - columnDefs.add(new ColumnDef("catalog_name", TypeDef.createVarchar(1024))); - columnDefs.add(new ColumnDef("db_name", TypeDef.createVarchar(1024))); - columnDefs.add(new ColumnDef("tbl_name", TypeDef.createVarchar(1024))); - columnDefs.add(new ColumnDef("col_name", TypeDef.createVarchar(1024))); - columnDefs.add(new ColumnDef("index_id", TypeDef.create(PrimitiveType.BIGINT))); - columnDefs.add(new ColumnDef("col_partitions", TypeDef.createVarchar(ScalarType.MAX_VARCHAR_LENGTH))); - columnDefs.add(new ColumnDef("job_type", TypeDef.createVarchar(32))); - columnDefs.add(new ColumnDef("analysis_type", TypeDef.createVarchar(32))); - columnDefs.add(new ColumnDef("analysis_mode", TypeDef.createVarchar(32))); - columnDefs.add(new ColumnDef("analysis_method", TypeDef.createVarchar(32))); - columnDefs.add(new ColumnDef("schedule_type", TypeDef.createVarchar(32))); - columnDefs.add(new ColumnDef("state", TypeDef.createVarchar(32))); - columnDefs.add(new ColumnDef("sample_percent", TypeDef.create(PrimitiveType.BIGINT))); - columnDefs.add(new ColumnDef("sample_rows", TypeDef.create(PrimitiveType.BIGINT))); - columnDefs.add(new ColumnDef("max_bucket_num", TypeDef.create(PrimitiveType.BIGINT))); - columnDefs.add(new ColumnDef("period_time_in_ms", TypeDef.create(PrimitiveType.BIGINT))); - columnDefs.add(new ColumnDef("last_exec_time_in_ms", TypeDef.create(PrimitiveType.BIGINT))); - columnDefs.add(new ColumnDef("message", TypeDef.createVarchar(1024))); - // TODO remove this code after the table structure is stable - if (!isSchemaChanged && isTableChanged(tableName, columnDefs)) { - isSchemaChanged = true; - DropTableStmt dropTableStmt = new DropTableStmt(true, tableName, true); - StatisticsUtil.analyze(dropTableStmt); - Env.getCurrentEnv().getInternalCatalog().dropTable(dropTableStmt); - } - String engineName = "olap"; - ArrayList uniqueKeys = Lists.newArrayList("job_id", "task_id", - "catalog_name", "db_name", "tbl_name", "col_name", "index_id"); - KeysDesc keysDesc = new KeysDesc(KeysType.UNIQUE_KEYS, uniqueKeys); - - DistributionDesc distributionDesc = new HashDistributionDesc( - StatisticConstants.STATISTIC_TABLE_BUCKET_COUNT, - Lists.newArrayList("job_id", "task_id")); - Map properties = new HashMap() { - { - put("replication_num", String.valueOf(Config.statistic_internal_table_replica_num)); - } - }; - CreateTableStmt createTableStmt = new CreateTableStmt(true, false, - tableName, columnDefs, engineName, keysDesc, null, distributionDesc, - properties, null, "Doris internal statistics table, don't modify it", null); - // createTableStmt.setClusterName(SystemInfoService.DEFAULT_CLUSTER); - StatisticsUtil.analyze(createTableStmt); - return createTableStmt; - } - private boolean created() { Optional optionalDatabase = Env.getCurrentEnv().getInternalCatalog() @@ -276,17 +212,9 @@ private boolean created() { return false; } Database db = optionalDatabase.get(); - // TODO remove this code after the table structure is stable - try { - buildAnalysisJobTblStmt(); - } catch (UserException ignored) { - // CHECKSTYLE IGNORE THIS LINE - } - return !isSchemaChanged - && db.getTable(StatisticConstants.ANALYSIS_TBL_NAME).isPresent() + return db.getTable(StatisticConstants.ANALYSIS_TBL_NAME).isPresent() && db.getTable(StatisticConstants.STATISTIC_TBL_NAME).isPresent() - && db.getTable(StatisticConstants.HISTOGRAM_TBL_NAME).isPresent() - && db.getTable(StatisticConstants.ANALYSIS_JOB_TABLE).isPresent(); + && db.getTable(StatisticConstants.HISTOGRAM_TBL_NAME).isPresent(); } /** diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java index dfeff8047829d0..025795f87d1746 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java @@ -48,8 +48,8 @@ import org.apache.doris.common.util.Util; import org.apache.doris.qe.OriginStatement; import org.apache.doris.resource.Tag; -import org.apache.doris.statistics.AnalysisTaskInfo; -import org.apache.doris.statistics.AnalysisTaskInfo.AnalysisType; +import org.apache.doris.statistics.AnalysisInfo; +import org.apache.doris.statistics.AnalysisInfo.AnalysisType; import org.apache.doris.statistics.BaseAnalysisTask; import org.apache.doris.statistics.HistogramTask; import org.apache.doris.statistics.MVAnalysisTask; @@ -1071,11 +1071,11 @@ public TTableDescriptor toThrift() { } @Override - public BaseAnalysisTask createAnalysisTask(AnalysisTaskInfo info) { + public BaseAnalysisTask createAnalysisTask(AnalysisInfo info) { if (info.analysisType.equals(AnalysisType.HISTOGRAM)) { return new HistogramTask(info); } - if (info.analysisType.equals(AnalysisType.COLUMN)) { + if (info.analysisType.equals(AnalysisType.FUNDAMENTALS)) { return new OlapAnalysisTask(info); } return new MVAnalysisTask(info); diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Table.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Table.java index 4595f0fc9f4bc4..0c3c7d1795a2cc 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Table.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Table.java @@ -27,7 +27,7 @@ import org.apache.doris.common.util.SqlUtils; import org.apache.doris.common.util.Util; import org.apache.doris.external.hudi.HudiTable; -import org.apache.doris.statistics.AnalysisTaskInfo; +import org.apache.doris.statistics.AnalysisInfo; import org.apache.doris.statistics.BaseAnalysisTask; import org.apache.doris.thrift.TTableDescriptor; @@ -521,7 +521,7 @@ public Set getPartitionNames() { } @Override - public BaseAnalysisTask createAnalysisTask(AnalysisTaskInfo info) { + public BaseAnalysisTask createAnalysisTask(AnalysisInfo info) { throw new NotImplementedException("createAnalysisTask not implemented"); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/TableIf.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/TableIf.java index 86545fa4d31704..c79acc79dfcb62 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/TableIf.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/TableIf.java @@ -20,7 +20,7 @@ import org.apache.doris.alter.AlterCancelException; import org.apache.doris.common.DdlException; import org.apache.doris.common.MetaNotFoundException; -import org.apache.doris.statistics.AnalysisTaskInfo; +import org.apache.doris.statistics.AnalysisInfo; import org.apache.doris.statistics.BaseAnalysisTask; import org.apache.doris.thrift.TTableDescriptor; @@ -124,7 +124,7 @@ default int getBaseColumnIdxByName(String colName) { TTableDescriptor toThrift(); - BaseAnalysisTask createAnalysisTask(AnalysisTaskInfo info); + BaseAnalysisTask createAnalysisTask(AnalysisInfo info); long estimatedRowCount(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/external/ExternalTable.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/external/ExternalTable.java index 7a8ff075e5daf2..45f1425764a7db 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/external/ExternalTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/external/ExternalTable.java @@ -31,7 +31,7 @@ import org.apache.doris.datasource.ExternalSchemaCache; import org.apache.doris.persist.gson.GsonPostProcessable; import org.apache.doris.persist.gson.GsonUtils; -import org.apache.doris.statistics.AnalysisTaskInfo; +import org.apache.doris.statistics.AnalysisInfo; import org.apache.doris.statistics.BaseAnalysisTask; import org.apache.doris.thrift.TTableDescriptor; @@ -308,7 +308,7 @@ public TTableDescriptor toThrift() { } @Override - public BaseAnalysisTask createAnalysisTask(AnalysisTaskInfo info) { + public BaseAnalysisTask createAnalysisTask(AnalysisInfo info) { throw new NotImplementedException("createAnalysisTask not implemented"); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java index 48f0e8a2c720fe..af032d4ae2cd1a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java @@ -24,7 +24,7 @@ import org.apache.doris.common.Config; import org.apache.doris.datasource.HMSExternalCatalog; import org.apache.doris.datasource.hive.PooledHiveMetaStoreClient; -import org.apache.doris.statistics.AnalysisTaskInfo; +import org.apache.doris.statistics.AnalysisInfo; import org.apache.doris.statistics.BaseAnalysisTask; import org.apache.doris.statistics.ColumnStatistic; import org.apache.doris.statistics.HiveAnalysisTask; @@ -285,7 +285,7 @@ public TTableDescriptor toThrift() { } @Override - public BaseAnalysisTask createAnalysisTask(AnalysisTaskInfo info) { + public BaseAnalysisTask createAnalysisTask(AnalysisInfo info) { makeSureInitialized(); switch (dlaType) { case HIVE: diff --git a/fe/fe-core/src/main/java/org/apache/doris/journal/JournalEntity.java b/fe/fe-core/src/main/java/org/apache/doris/journal/JournalEntity.java index 00906c2c8f740f..37a7f1a1a64129 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/journal/JournalEntity.java +++ b/fe/fe-core/src/main/java/org/apache/doris/journal/JournalEntity.java @@ -66,6 +66,7 @@ import org.apache.doris.persist.AlterRoutineLoadJobOperationLog; import org.apache.doris.persist.AlterUserOperationLog; import org.apache.doris.persist.AlterViewInfo; +import org.apache.doris.persist.AnalyzeDeletionLog; import org.apache.doris.persist.BackendReplicasInfo; import org.apache.doris.persist.BackendTabletsInfo; import org.apache.doris.persist.BatchDropInfo; @@ -114,6 +115,7 @@ import org.apache.doris.policy.Policy; import org.apache.doris.policy.StoragePolicy; import org.apache.doris.resource.workloadgroup.WorkloadGroup; +import org.apache.doris.statistics.AnalysisInfo; import org.apache.doris.system.Backend; import org.apache.doris.system.Frontend; import org.apache.doris.transaction.TransactionState; @@ -795,6 +797,26 @@ public void readFields(DataInput in) throws IOException { isRead = true; break; } + case OperationType.OP_CREATE_ANALYSIS_JOB: { + data = AnalysisInfo.read(in); + isRead = true; + break; + } + case OperationType.OP_CREATE_ANALYSIS_TASK: { + data = AnalysisInfo.read(in); + isRead = true; + break; + } + case OperationType.OP_DELETE_ANALYSIS_JOB: { + data = AnalyzeDeletionLog.read(in); + isRead = true; + break; + } + case OperationType.OP_DELETE_ANALYSIS_TASK: { + data = AnalyzeDeletionLog.read(in); + isRead = true; + break; + } default: { IOException e = new IOException(); LOG.error("UNKNOWN Operation Type {}", opCode, e); diff --git a/fe/fe-core/src/main/java/org/apache/doris/persist/AnalyzeDeletionLog.java b/fe/fe-core/src/main/java/org/apache/doris/persist/AnalyzeDeletionLog.java new file mode 100644 index 00000000000000..7535aeb29b4068 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/persist/AnalyzeDeletionLog.java @@ -0,0 +1,42 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.persist; + +import org.apache.doris.common.io.Writable; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; + +public class AnalyzeDeletionLog implements Writable { + + public final long id; + + public AnalyzeDeletionLog(long id) { + this.id = id; + } + + @Override + public void write(DataOutput out) throws IOException { + out.writeLong(id); + } + + public static AnalyzeDeletionLog read(DataInput dataInput) throws IOException { + return new AnalyzeDeletionLog(dataInput.readLong()); + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/persist/EditLog.java b/fe/fe-core/src/main/java/org/apache/doris/persist/EditLog.java index 5cbb47b93e8e7b..c64882632f8d72 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/persist/EditLog.java +++ b/fe/fe-core/src/main/java/org/apache/doris/persist/EditLog.java @@ -80,6 +80,7 @@ import org.apache.doris.policy.Policy; import org.apache.doris.policy.StoragePolicy; import org.apache.doris.resource.workloadgroup.WorkloadGroup; +import org.apache.doris.statistics.AnalysisInfo; import org.apache.doris.system.Backend; import org.apache.doris.system.Frontend; import org.apache.doris.transaction.TransactionState; @@ -1010,6 +1011,22 @@ public static void loadJournal(Env env, Long logId, JournalEntity journal) { // Do nothing. break; } + case OperationType.OP_CREATE_ANALYSIS_JOB: { + env.getAnalysisManager().replayCreateAnalysisJob((AnalysisInfo) journal.getData()); + break; + } + case OperationType.OP_CREATE_ANALYSIS_TASK: { + env.getAnalysisManager().replayCreateAnalysisTask((AnalysisInfo) journal.getData()); + break; + } + case OperationType.OP_DELETE_ANALYSIS_JOB: { + env.getAnalysisManager().replayDeleteAnalysisJob((AnalyzeDeletionLog) journal.getData()); + break; + } + case OperationType.OP_DELETE_ANALYSIS_TASK: { + env.getAnalysisManager().replayDeleteAnalysisTask((AnalyzeDeletionLog) journal.getData()); + break; + } default: { IOException e = new IOException(); LOG.error("UNKNOWN Operation Type {}", opCode, e); @@ -1760,4 +1777,20 @@ public void logAlterMTMV(AlterMultiMaterializedView log) { public void logCleanQueryStats(CleanQueryStatsInfo log) { logEdit(OperationType.OP_CLEAN_QUERY_STATS, log); } + + public void logCreateAnalysisTasks(AnalysisInfo log) { + logEdit(OperationType.OP_CREATE_ANALYSIS_TASK, log); + } + + public void logCreateAnalysisJob(AnalysisInfo log) { + logEdit(OperationType.OP_CREATE_ANALYSIS_JOB, log); + } + + public void logDeleteAnalysisJob(AnalyzeDeletionLog log) { + logEdit(OperationType.OP_DELETE_ANALYSIS_JOB, log); + } + + public void logDeleteAnalysisTask(AnalyzeDeletionLog log) { + logEdit(OperationType.OP_DELETE_ANALYSIS_TASK, log); + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/persist/OperationType.java b/fe/fe-core/src/main/java/org/apache/doris/persist/OperationType.java index 8261cb94d08ce5..2ed5f47b7072b9 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/persist/OperationType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/persist/OperationType.java @@ -295,6 +295,15 @@ public class OperationType { // update binlog config public static final short OP_UPDATE_BINLOG_CONFIG = 425; + public static final short OP_CREATE_ANALYSIS_TASK = 435; + + public static final short OP_DELETE_ANALYSIS_TASK = 436; + + public static final short OP_CREATE_ANALYSIS_JOB = 437; + + public static final short OP_DELETE_ANALYSIS_JOB = 438; + + /** * Get opcode name by op code. **/ diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/DdlExecutor.java b/fe/fe-core/src/main/java/org/apache/doris/qe/DdlExecutor.java index 6aa6a9c616fd9c..ac8842d8fc6140 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/DdlExecutor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/DdlExecutor.java @@ -43,7 +43,8 @@ import org.apache.doris.analysis.AlterUserStmt; import org.apache.doris.analysis.AlterViewStmt; import org.apache.doris.analysis.AlterWorkloadGroupStmt; -import org.apache.doris.analysis.AnalyzeStmt; +import org.apache.doris.analysis.AnalyzeDBStmt; +import org.apache.doris.analysis.AnalyzeTblStmt; import org.apache.doris.analysis.BackupStmt; import org.apache.doris.analysis.CancelAlterSystemStmt; import org.apache.doris.analysis.CancelAlterTableStmt; @@ -75,6 +76,7 @@ import org.apache.doris.analysis.CreateWorkloadGroupStmt; import org.apache.doris.analysis.DdlStmt; import org.apache.doris.analysis.DeleteStmt; +import org.apache.doris.analysis.DropAnalyzeJobStmt; import org.apache.doris.analysis.DropCatalogStmt; import org.apache.doris.analysis.DropDbStmt; import org.apache.doris.analysis.DropEncryptKeyStmt; @@ -294,8 +296,8 @@ public static void execute(Env env, DdlStmt ddlStmt) throws Exception { env.getRefreshManager().handleRefreshTable((RefreshTableStmt) ddlStmt); } else if (ddlStmt instanceof RefreshDbStmt) { env.getRefreshManager().handleRefreshDb((RefreshDbStmt) ddlStmt); - } else if (ddlStmt instanceof AnalyzeStmt) { - env.createAnalysisJob((AnalyzeStmt) ddlStmt); + } else if (ddlStmt instanceof AnalyzeTblStmt) { + env.createAnalysisJob((AnalyzeTblStmt) ddlStmt); } else if (ddlStmt instanceof AlterResourceStmt) { env.getResourceMgr().alterResource((AlterResourceStmt) ddlStmt); } else if (ddlStmt instanceof AlterWorkloadGroupStmt) { @@ -332,6 +334,8 @@ public static void execute(Env env, DdlStmt ddlStmt) throws Exception { env.getAnalysisManager().dropStats((DropStatsStmt) ddlStmt); } else if (ddlStmt instanceof KillAnalysisJobStmt) { env.getAnalysisManager().handleKillAnalyzeStmt((KillAnalysisJobStmt) ddlStmt); + } else if (ddlStmt instanceof AnalyzeDBStmt) { + env.getAnalysisManager().createAnalysisJobs((AnalyzeDBStmt) ddlStmt); } else if (ddlStmt instanceof CleanQueryStatsStmt) { CleanQueryStatsStmt stmt = (CleanQueryStatsStmt) ddlStmt; CleanQueryStatsInfo cleanQueryStatsInfo = null; @@ -352,6 +356,9 @@ public static void execute(Env env, DdlStmt ddlStmt) throws Exception { throw new DdlException("Unknown scope: " + stmt.getScope()); } env.cleanQueryStats(cleanQueryStatsInfo); + } else if (ddlStmt instanceof DropAnalyzeJobStmt) { + DropAnalyzeJobStmt analyzeJobStmt = (DropAnalyzeJobStmt) ddlStmt; + Env.getCurrentEnv().getAnalysisManager().dropAnalyzeJob(analyzeJobStmt); } else { LOG.warn("Unkown statement " + ddlStmt.getClass()); throw new DdlException("Unknown statement."); diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java index 00ed7a501da908..58baf1044b3076 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java @@ -799,13 +799,6 @@ public int getBeNumberForTest() { @VariableMgr.VarAttr(name = ENABLE_CBO_STATISTICS) public boolean enableCboStatistics = false; - /** - * If true, when synchronously collecting statistics, the information of - * the statistics job will be saved, currently mainly used for p0 test - */ - @VariableMgr.VarAttr(name = ENABLE_SAVE_STATISTICS_SYNC_JOB) - public boolean enableSaveStatisticsSyncJob = false; - @VariableMgr.VarAttr(name = ENABLE_ELIMINATE_SORT_NODE) public boolean enableEliminateSortNode = true; @@ -1573,10 +1566,6 @@ public boolean getEnableCboStatistics() { return enableCboStatistics; } - public boolean isEnableSaveStatisticsSyncJob() { - return enableSaveStatisticsSyncJob; - } - public long getFileSplitSize() { return fileSplitSize; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java b/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java index 093722ff6692a5..08af281e093b05 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java @@ -28,6 +28,7 @@ import org.apache.doris.analysis.PartitionNames; import org.apache.doris.analysis.ShowAlterStmt; import org.apache.doris.analysis.ShowAnalyzeStmt; +import org.apache.doris.analysis.ShowAnalyzeTaskStatus; import org.apache.doris.analysis.ShowAuthorStmt; import org.apache.doris.analysis.ShowBackendsStmt; import org.apache.doris.analysis.ShowBackupStmt; @@ -187,6 +188,7 @@ import org.apache.doris.mtmv.metadata.MTMVJob; import org.apache.doris.mtmv.metadata.MTMVTask; import org.apache.doris.mysql.privilege.PrivPredicate; +import org.apache.doris.statistics.AnalysisInfo; import org.apache.doris.statistics.ColumnStatistic; import org.apache.doris.statistics.Histogram; import org.apache.doris.statistics.StatisticsRepository; @@ -221,6 +223,9 @@ import java.net.MalformedURLException; import java.net.URL; import java.net.URLConnection; +import java.time.Instant; +import java.time.LocalDateTime; +import java.time.ZoneId; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; @@ -416,6 +421,8 @@ public ShowResultSet execute() throws AnalysisException { handleShowTypeCastStmt(); } else if (stmt instanceof ShowBuildIndexStmt) { handleShowBuildIndexStmt(); + } else if (stmt instanceof ShowAnalyzeTaskStatus) { + handleShowAnalyzeTaskStatus(); } else { handleEmtpy(); } @@ -423,6 +430,7 @@ public ShowResultSet execute() throws AnalysisException { return resultSet; } + private void handleShowRollup() { // TODO: not implemented yet ShowRollupStmt showRollupStmt = (ShowRollupStmt) stmt; @@ -2505,36 +2513,26 @@ private void handleShowCreateCatalog() throws AnalysisException { private void handleShowAnalyze() { ShowAnalyzeStmt showStmt = (ShowAnalyzeStmt) stmt; - - List> results; + List results = Env.getCurrentEnv().getAnalysisManager() + .showAnalysisJob(showStmt); List> resultRows = Lists.newArrayList(); - - try { - results = Env.getCurrentEnv().getAnalysisManager() - .showAnalysisJob(showStmt); - } catch (DdlException e) { - resultSet = new ShowResultSet(showStmt.getMetaData(), resultRows); - return; - } - - // order the result - ListComparator> comparator; - List orderByPairs = showStmt.getOrderByPairs(); - if (orderByPairs == null) { - // sort by id asc - comparator = new ListComparator<>(0); - } else { - OrderByPair[] orderByPairArr = new OrderByPair[orderByPairs.size()]; - comparator = new ListComparator<>(orderByPairs.toArray(orderByPairArr)); - } - results.sort(comparator); - - // convert to result and return it - for (List result : results) { - List row = result.stream().map(Object::toString).collect(Collectors.toList()); + for (AnalysisInfo analysisInfo : results) { + List row = new ArrayList<>(); + row.add(String.valueOf(analysisInfo.jobId)); + row.add(analysisInfo.catalogName); + row.add(analysisInfo.dbName); + row.add(analysisInfo.tblName); + row.add(analysisInfo.colName); + row.add(analysisInfo.jobType.toString()); + row.add(analysisInfo.analysisType.toString()); + row.add(analysisInfo.message); + row.add(TimeUtils.DATETIME_FORMAT.format( + LocalDateTime.ofInstant(Instant.ofEpochMilli(analysisInfo.lastExecTimeInMs), + ZoneId.systemDefault()))); + row.add(analysisInfo.state.toString()); + row.add(analysisInfo.scheduleType.toString()); resultRows.add(row); } - resultSet = new ShowResultSet(showStmt.getMetaData(), resultRows); } @@ -2719,5 +2717,24 @@ private void handleShowBuildIndexStmt() throws AnalysisException { showStmt.getOrderPairs(), showStmt.getLimitElement()).getRows(); resultSet = new ShowResultSet(showStmt.getMetaData(), rows); } + + private void handleShowAnalyzeTaskStatus() { + ShowAnalyzeTaskStatus showStmt = (ShowAnalyzeTaskStatus) stmt; + List analysisInfos = Env.getCurrentEnv().getAnalysisManager().findTasks(showStmt.getJobId()); + List> rows = new ArrayList<>(); + for (AnalysisInfo analysisInfo : analysisInfos) { + List row = new ArrayList<>(); + row.add(String.valueOf(analysisInfo.taskId)); + row.add(analysisInfo.colName); + row.add(analysisInfo.message); + row.add(TimeUtils.DATETIME_FORMAT.format( + LocalDateTime.ofInstant(Instant.ofEpochMilli(analysisInfo.lastExecTimeInMs), + ZoneId.systemDefault()))); + row.add(analysisInfo.state.toString()); + rows.add(row); + } + resultSet = new ShowResultSet(showStmt.getMetaData(), rows); + } + } diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java b/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java index a759d4de0fbb4d..8055e1caa1fdc9 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java @@ -20,7 +20,7 @@ import org.apache.doris.analysis.AddPartitionLikeClause; import org.apache.doris.analysis.AlterClause; import org.apache.doris.analysis.AlterTableStmt; -import org.apache.doris.analysis.AnalyzeStmt; +import org.apache.doris.analysis.AnalyzeTblStmt; import org.apache.doris.analysis.Analyzer; import org.apache.doris.analysis.ArrayLiteral; import org.apache.doris.analysis.CreateTableAsSelectStmt; @@ -1135,7 +1135,7 @@ public void cancel() { if (mysqlLoadId != null) { Env.getCurrentEnv().getLoadManager().getMysqlLoadManager().cancelMySqlLoad(mysqlLoadId); } - if (parsedStmt instanceof AnalyzeStmt) { + if (parsedStmt instanceof AnalyzeTblStmt) { Env.getCurrentEnv().getAnalysisManager().cancelSyncTask(context); } } @@ -2125,7 +2125,7 @@ private void handleDeleteStmt() { private void handleDdlStmt() { try { DdlExecutor.execute(context.getEnv(), (DdlStmt) parsedStmt); - if (!(parsedStmt instanceof AnalyzeStmt)) { + if (!(parsedStmt instanceof AnalyzeTblStmt)) { context.getState().setOk(); } } catch (QueryStateException e) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisTaskInfo.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java similarity index 57% rename from fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisTaskInfo.java rename to fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java index 2860a472c6ffb9..dea2155199a2e1 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisTaskInfo.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java @@ -17,6 +17,8 @@ package org.apache.doris.statistics; +import org.apache.doris.common.io.Text; +import org.apache.doris.common.io.Writable; import org.apache.doris.statistics.util.InternalQueryResult.ResultRow; import org.apache.doris.statistics.util.StatisticsUtil; @@ -25,14 +27,20 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; import java.lang.reflect.Type; +import java.util.HashMap; +import java.util.HashSet; import java.util.Map; +import java.util.Map.Entry; import java.util.Set; import java.util.StringJoiner; -public class AnalysisTaskInfo { +public class AnalysisInfo implements Writable { - private static final Logger LOG = LogManager.getLogger(AnalysisTaskInfo.class); + private static final Logger LOG = LogManager.getLogger(AnalysisInfo.class); public enum AnalysisMode { INCREMENTAL, @@ -45,7 +53,7 @@ public enum AnalysisMethod { } public enum AnalysisType { - COLUMN, + FUNDAMENTALS, INDEX, HISTOGRAM } @@ -77,7 +85,7 @@ public enum ScheduleType { public final String colName; - public final Long indexId; + public final long indexId; public final JobType jobType; @@ -108,7 +116,7 @@ public enum ScheduleType { // This kind of task is mainly to collect the number of rows of a table. public boolean externalTableLevelTask; - public AnalysisTaskInfo(long jobId, long taskId, String catalogName, String dbName, String tblName, + public AnalysisInfo(long jobId, long taskId, String catalogName, String dbName, String tblName, Map> colToPartitions, String colName, Long indexId, JobType jobType, AnalysisMode analysisMode, AnalysisMethod analysisMethod, AnalysisType analysisType, int samplePercent, int sampleRows, int maxBucketNum, long periodTimeInMs, String message, @@ -179,51 +187,51 @@ public boolean isJob() { } // TODO: use thrift - public static AnalysisTaskInfo fromResultRow(ResultRow resultRow) { + public static AnalysisInfo fromResultRow(ResultRow resultRow) { try { - AnalysisTaskInfoBuilder analysisTaskInfoBuilder = new AnalysisTaskInfoBuilder(); + AnalysisInfoBuilder analysisInfoBuilder = new AnalysisInfoBuilder(); long jobId = Long.parseLong(resultRow.getColumnValue("job_id")); - analysisTaskInfoBuilder.setJobId(jobId); + analysisInfoBuilder.setJobId(jobId); long taskId = Long.parseLong(resultRow.getColumnValue("task_id")); - analysisTaskInfoBuilder.setTaskId(taskId); + analysisInfoBuilder.setTaskId(taskId); String catalogName = resultRow.getColumnValue("catalog_name"); - analysisTaskInfoBuilder.setCatalogName(catalogName); + analysisInfoBuilder.setCatalogName(catalogName); String dbName = resultRow.getColumnValue("db_name"); - analysisTaskInfoBuilder.setDbName(dbName); + analysisInfoBuilder.setDbName(dbName); String tblName = resultRow.getColumnValue("tbl_name"); - analysisTaskInfoBuilder.setTblName(tblName); + analysisInfoBuilder.setTblName(tblName); String colName = resultRow.getColumnValue("col_name"); - analysisTaskInfoBuilder.setColName(colName); + analysisInfoBuilder.setColName(colName); long indexId = Long.parseLong(resultRow.getColumnValue("index_id")); - analysisTaskInfoBuilder.setIndexId(indexId); + analysisInfoBuilder.setIndexId(indexId); String partitionNames = resultRow.getColumnValue("col_partitions"); Map> colToPartitions = getColToPartition(partitionNames); - analysisTaskInfoBuilder.setColToPartitions(colToPartitions); + analysisInfoBuilder.setColToPartitions(colToPartitions); String jobType = resultRow.getColumnValue("job_type"); - analysisTaskInfoBuilder.setJobType(JobType.valueOf(jobType)); + analysisInfoBuilder.setJobType(JobType.valueOf(jobType)); String analysisType = resultRow.getColumnValue("analysis_type"); - analysisTaskInfoBuilder.setAnalysisType(AnalysisType.valueOf(analysisType)); + analysisInfoBuilder.setAnalysisType(AnalysisType.valueOf(analysisType)); String analysisMode = resultRow.getColumnValue("analysis_mode"); - analysisTaskInfoBuilder.setAnalysisMode(AnalysisMode.valueOf(analysisMode)); + analysisInfoBuilder.setAnalysisMode(AnalysisMode.valueOf(analysisMode)); String analysisMethod = resultRow.getColumnValue("analysis_method"); - analysisTaskInfoBuilder.setAnalysisMethod(AnalysisMethod.valueOf(analysisMethod)); + analysisInfoBuilder.setAnalysisMethod(AnalysisMethod.valueOf(analysisMethod)); String scheduleType = resultRow.getColumnValue("schedule_type"); - analysisTaskInfoBuilder.setScheduleType(ScheduleType.valueOf(scheduleType)); + analysisInfoBuilder.setScheduleType(ScheduleType.valueOf(scheduleType)); String state = resultRow.getColumnValue("state"); - analysisTaskInfoBuilder.setState(AnalysisState.valueOf(state)); + analysisInfoBuilder.setState(AnalysisState.valueOf(state)); String samplePercent = resultRow.getColumnValue("sample_percent"); - analysisTaskInfoBuilder.setSamplePercent(StatisticsUtil.convertStrToInt(samplePercent)); + analysisInfoBuilder.setSamplePercent(StatisticsUtil.convertStrToInt(samplePercent)); String sampleRows = resultRow.getColumnValue("sample_rows"); - analysisTaskInfoBuilder.setSampleRows(StatisticsUtil.convertStrToInt(sampleRows)); + analysisInfoBuilder.setSampleRows(StatisticsUtil.convertStrToInt(sampleRows)); String maxBucketNum = resultRow.getColumnValue("max_bucket_num"); - analysisTaskInfoBuilder.setMaxBucketNum(StatisticsUtil.convertStrToInt(maxBucketNum)); + analysisInfoBuilder.setMaxBucketNum(StatisticsUtil.convertStrToInt(maxBucketNum)); String periodTimeInMs = resultRow.getColumnValue("period_time_in_ms"); - analysisTaskInfoBuilder.setPeriodTimeInMs(StatisticsUtil.convertStrToInt(periodTimeInMs)); + analysisInfoBuilder.setPeriodTimeInMs(StatisticsUtil.convertStrToInt(periodTimeInMs)); String lastExecTimeInMs = resultRow.getColumnValue("last_exec_time_in_ms"); - analysisTaskInfoBuilder.setLastExecTimeInMs(StatisticsUtil.convertStrToLong(lastExecTimeInMs)); + analysisInfoBuilder.setLastExecTimeInMs(StatisticsUtil.convertStrToLong(lastExecTimeInMs)); String message = resultRow.getColumnValue("message"); - analysisTaskInfoBuilder.setMessage(message); - return analysisTaskInfoBuilder.build(); + analysisInfoBuilder.setMessage(message); + return analysisInfoBuilder.build(); } catch (Exception e) { LOG.warn("Failed to deserialize analysis task info.", e); return null; @@ -246,4 +254,74 @@ private static Map> getColToPartition(String colToPartitionS Type type = new TypeToken>>() {}.getType(); return gson.fromJson(colToPartitionStr, type); } + + @Override + public void write(DataOutput out) throws IOException { + out.writeLong(jobId); + out.writeLong(taskId); + Text.writeString(out, catalogName); + Text.writeString(out, dbName); + Text.writeString(out, tblName); + out.writeInt(colToPartitions.size()); + for (Entry> entry : colToPartitions.entrySet()) { + Text.writeString(out, entry.getKey()); + out.writeInt(entry.getValue().size()); + for (String part : entry.getValue()) { + Text.writeString(out, part); + } + } + Text.writeString(out, colName); + out.writeLong(indexId); + Text.writeString(out, jobType.toString()); + Text.writeString(out, analysisMode.toString()); + Text.writeString(out, analysisMethod.toString()); + Text.writeString(out, analysisType.toString()); + out.writeInt(samplePercent); + out.writeInt(sampleRows); + out.writeInt(maxBucketNum); + out.writeLong(periodTimeInMs); + out.writeLong(lastExecTimeInMs); + Text.writeString(out, state.toString()); + Text.writeString(out, scheduleType.toString()); + Text.writeString(out, message); + out.writeBoolean(externalTableLevelTask); + } + + public static AnalysisInfo read(DataInput dataInput) throws IOException { + AnalysisInfoBuilder analysisInfoBuilder = new AnalysisInfoBuilder(); + analysisInfoBuilder.setJobId(dataInput.readLong()); + long taskId = dataInput.readLong(); + analysisInfoBuilder.setTaskId(taskId); + analysisInfoBuilder.setCatalogName(Text.readString(dataInput)); + analysisInfoBuilder.setDbName(Text.readString(dataInput)); + analysisInfoBuilder.setTblName(Text.readString(dataInput)); + int size = dataInput.readInt(); + Map> colToPartitions = new HashMap<>(); + for (int i = 0; i < size; i++) { + String k = Text.readString(dataInput); + int partSize = dataInput.readInt(); + Set parts = new HashSet<>(); + for (int j = 0; j < partSize; j++) { + parts.add(Text.readString(dataInput)); + } + colToPartitions.put(k, parts); + } + analysisInfoBuilder.setColToPartitions(colToPartitions); + analysisInfoBuilder.setColName(Text.readString(dataInput)); + analysisInfoBuilder.setIndexId(dataInput.readLong()); + analysisInfoBuilder.setJobType(JobType.valueOf(Text.readString(dataInput))); + analysisInfoBuilder.setAnalysisMode(AnalysisMode.valueOf(Text.readString(dataInput))); + analysisInfoBuilder.setAnalysisMethod(AnalysisMethod.valueOf(Text.readString(dataInput))); + analysisInfoBuilder.setAnalysisType(AnalysisType.valueOf(Text.readString(dataInput))); + analysisInfoBuilder.setSamplePercent(dataInput.readInt()); + analysisInfoBuilder.setSampleRows(dataInput.readInt()); + analysisInfoBuilder.setMaxBucketNum(dataInput.readInt()); + analysisInfoBuilder.setPeriodTimeInMs(dataInput.readLong()); + analysisInfoBuilder.setLastExecTimeInMs(dataInput.readLong()); + analysisInfoBuilder.setState(AnalysisState.valueOf(Text.readString(dataInput))); + analysisInfoBuilder.setScheduleType(ScheduleType.valueOf(Text.readString(dataInput))); + analysisInfoBuilder.setMessage(Text.readString(dataInput)); + analysisInfoBuilder.setExternalTableLevelTask(dataInput.readBoolean()); + return analysisInfoBuilder.build(); + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisTaskInfoBuilder.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfoBuilder.java similarity index 68% rename from fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisTaskInfoBuilder.java rename to fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfoBuilder.java index acaae0baab63eb..e0ab8220c86ed8 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisTaskInfoBuilder.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfoBuilder.java @@ -17,16 +17,16 @@ package org.apache.doris.statistics; -import org.apache.doris.statistics.AnalysisTaskInfo.AnalysisMethod; -import org.apache.doris.statistics.AnalysisTaskInfo.AnalysisMode; -import org.apache.doris.statistics.AnalysisTaskInfo.AnalysisType; -import org.apache.doris.statistics.AnalysisTaskInfo.JobType; -import org.apache.doris.statistics.AnalysisTaskInfo.ScheduleType; +import org.apache.doris.statistics.AnalysisInfo.AnalysisMethod; +import org.apache.doris.statistics.AnalysisInfo.AnalysisMode; +import org.apache.doris.statistics.AnalysisInfo.AnalysisType; +import org.apache.doris.statistics.AnalysisInfo.JobType; +import org.apache.doris.statistics.AnalysisInfo.ScheduleType; import java.util.Map; import java.util.Set; -public class AnalysisTaskInfoBuilder { +public class AnalysisInfoBuilder { private long jobId; private long taskId; private String catalogName; @@ -34,7 +34,7 @@ public class AnalysisTaskInfoBuilder { private String tblName; private Map> colToPartitions; private String colName; - private Long indexId; + private Long indexId = -1L; private JobType jobType; private AnalysisMode analysisMode; private AnalysisMethod analysisMethod; @@ -46,13 +46,13 @@ public class AnalysisTaskInfoBuilder { private long lastExecTimeInMs; private AnalysisState state; private ScheduleType scheduleType; - private String message; + private String message = ""; private boolean externalTableLevelTask; - public AnalysisTaskInfoBuilder() { + public AnalysisInfoBuilder() { } - public AnalysisTaskInfoBuilder(AnalysisTaskInfo info) { + public AnalysisInfoBuilder(AnalysisInfo info) { jobId = info.jobId; taskId = info.taskId; catalogName = info.catalogName; @@ -75,120 +75,120 @@ public AnalysisTaskInfoBuilder(AnalysisTaskInfo info) { scheduleType = info.scheduleType; } - public AnalysisTaskInfoBuilder setJobId(long jobId) { + public AnalysisInfoBuilder setJobId(long jobId) { this.jobId = jobId; return this; } - public AnalysisTaskInfoBuilder setTaskId(long taskId) { + public AnalysisInfoBuilder setTaskId(long taskId) { this.taskId = taskId; return this; } - public AnalysisTaskInfoBuilder setCatalogName(String catalogName) { + public AnalysisInfoBuilder setCatalogName(String catalogName) { this.catalogName = catalogName; return this; } - public AnalysisTaskInfoBuilder setDbName(String dbName) { + public AnalysisInfoBuilder setDbName(String dbName) { this.dbName = dbName; return this; } - public AnalysisTaskInfoBuilder setTblName(String tblName) { + public AnalysisInfoBuilder setTblName(String tblName) { this.tblName = tblName; return this; } - public AnalysisTaskInfoBuilder setColToPartitions(Map> colToPartitions) { + public AnalysisInfoBuilder setColToPartitions(Map> colToPartitions) { this.colToPartitions = colToPartitions; return this; } - public AnalysisTaskInfoBuilder setColName(String colName) { + public AnalysisInfoBuilder setColName(String colName) { this.colName = colName; return this; } - public AnalysisTaskInfoBuilder setIndexId(Long indexId) { + public AnalysisInfoBuilder setIndexId(Long indexId) { this.indexId = indexId; return this; } - public AnalysisTaskInfoBuilder setJobType(JobType jobType) { + public AnalysisInfoBuilder setJobType(JobType jobType) { this.jobType = jobType; return this; } - public AnalysisTaskInfoBuilder setAnalysisMode(AnalysisMode analysisMode) { + public AnalysisInfoBuilder setAnalysisMode(AnalysisMode analysisMode) { this.analysisMode = analysisMode; return this; } - public AnalysisTaskInfoBuilder setAnalysisMethod(AnalysisMethod analysisMethod) { + public AnalysisInfoBuilder setAnalysisMethod(AnalysisMethod analysisMethod) { this.analysisMethod = analysisMethod; return this; } - public AnalysisTaskInfoBuilder setAnalysisType(AnalysisType analysisType) { + public AnalysisInfoBuilder setAnalysisType(AnalysisType analysisType) { this.analysisType = analysisType; return this; } - public AnalysisTaskInfoBuilder setMaxBucketNum(int maxBucketNum) { + public AnalysisInfoBuilder setMaxBucketNum(int maxBucketNum) { this.maxBucketNum = maxBucketNum; return this; } - public AnalysisTaskInfoBuilder setSamplePercent(int samplePercent) { + public AnalysisInfoBuilder setSamplePercent(int samplePercent) { this.samplePercent = samplePercent; return this; } - public AnalysisTaskInfoBuilder setSampleRows(int sampleRows) { + public AnalysisInfoBuilder setSampleRows(int sampleRows) { this.sampleRows = sampleRows; return this; } - public AnalysisTaskInfoBuilder setPeriodTimeInMs(long periodTimeInMs) { + public AnalysisInfoBuilder setPeriodTimeInMs(long periodTimeInMs) { this.periodTimeInMs = periodTimeInMs; return this; } - public AnalysisTaskInfoBuilder setMessage(String message) { + public AnalysisInfoBuilder setMessage(String message) { this.message = message; return this; } - public AnalysisTaskInfoBuilder setLastExecTimeInMs(long lastExecTimeInMs) { + public AnalysisInfoBuilder setLastExecTimeInMs(long lastExecTimeInMs) { this.lastExecTimeInMs = lastExecTimeInMs; return this; } - public AnalysisTaskInfoBuilder setState(AnalysisState state) { + public AnalysisInfoBuilder setState(AnalysisState state) { this.state = state; return this; } - public AnalysisTaskInfoBuilder setScheduleType(ScheduleType scheduleType) { + public AnalysisInfoBuilder setScheduleType(ScheduleType scheduleType) { this.scheduleType = scheduleType; return this; } - public AnalysisTaskInfoBuilder setExternalTableLevelTask(boolean isTableLevel) { + public AnalysisInfoBuilder setExternalTableLevelTask(boolean isTableLevel) { this.externalTableLevelTask = isTableLevel; return this; } - public AnalysisTaskInfo build() { - return new AnalysisTaskInfo(jobId, taskId, catalogName, dbName, tblName, colToPartitions, + public AnalysisInfo build() { + return new AnalysisInfo(jobId, taskId, catalogName, dbName, tblName, colToPartitions, colName, indexId, jobType, analysisMode, analysisMethod, analysisType, samplePercent, sampleRows, maxBucketNum, periodTimeInMs, message, lastExecTimeInMs, state, scheduleType, externalTableLevelTask); } - public AnalysisTaskInfoBuilder copy() { - return new AnalysisTaskInfoBuilder() + public AnalysisInfoBuilder copy() { + return new AnalysisInfoBuilder() .setJobId(jobId) .setTaskId(taskId) .setCatalogName(catalogName) diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java index f62a6e6c2206ed..71cf3dae138845 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java @@ -17,7 +17,9 @@ package org.apache.doris.statistics; -import org.apache.doris.analysis.AnalyzeStmt; +import org.apache.doris.analysis.AnalyzeDBStmt; +import org.apache.doris.analysis.AnalyzeTblStmt; +import org.apache.doris.analysis.DropAnalyzeJobStmt; import org.apache.doris.analysis.DropStatsStmt; import org.apache.doris.analysis.KillAnalysisJobStmt; import org.apache.doris.analysis.ShowAnalyzeStmt; @@ -31,95 +33,168 @@ import org.apache.doris.catalog.ScalarType; import org.apache.doris.catalog.TableIf; import org.apache.doris.catalog.TableIf.TableType; +import org.apache.doris.common.AnalysisException; +import org.apache.doris.common.Config; import org.apache.doris.common.DdlException; import org.apache.doris.common.FeConstants; -import org.apache.doris.common.util.TimeUtils; +import org.apache.doris.common.util.Daemon; import org.apache.doris.datasource.CatalogIf; import org.apache.doris.mysql.privilege.PrivPredicate; +import org.apache.doris.persist.AnalyzeDeletionLog; import org.apache.doris.qe.ConnectContext; import org.apache.doris.qe.ShowResultSet; import org.apache.doris.qe.ShowResultSetMetaData; -import org.apache.doris.statistics.AnalysisTaskInfo.AnalysisMethod; -import org.apache.doris.statistics.AnalysisTaskInfo.AnalysisMode; -import org.apache.doris.statistics.AnalysisTaskInfo.AnalysisType; -import org.apache.doris.statistics.AnalysisTaskInfo.JobType; -import org.apache.doris.statistics.AnalysisTaskInfo.ScheduleType; -import org.apache.doris.statistics.util.InternalQueryResult.ResultRow; +import org.apache.doris.statistics.AnalysisInfo.AnalysisMethod; +import org.apache.doris.statistics.AnalysisInfo.AnalysisMode; +import org.apache.doris.statistics.AnalysisInfo.AnalysisType; +import org.apache.doris.statistics.AnalysisInfo.JobType; +import org.apache.doris.statistics.AnalysisInfo.ScheduleType; import org.apache.doris.statistics.util.StatisticsUtil; import com.google.common.collect.ImmutableList; -import com.google.common.collect.Lists; import com.google.common.collect.Maps; -import org.apache.commons.lang3.StringUtils; -import org.apache.commons.text.StringSubstitutor; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import org.jetbrains.annotations.Nullable; -import java.time.Instant; -import java.time.LocalDateTime; -import java.time.ZoneId; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; +import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Set; +import java.util.StringJoiner; +import java.util.TreeMap; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; +import java.util.concurrent.TimeUnit; +import java.util.function.Function; +import java.util.function.Predicate; import java.util.stream.Collectors; -public class AnalysisManager { +public class AnalysisManager extends Daemon { - public final AnalysisTaskScheduler taskScheduler; + public AnalysisTaskScheduler taskScheduler; private static final Logger LOG = LogManager.getLogger(AnalysisManager.class); - private static final String UPDATE_JOB_STATE_SQL_TEMPLATE = "UPDATE " - + FeConstants.INTERNAL_DB_NAME + "." + StatisticConstants.ANALYSIS_JOB_TABLE + " " - + "SET state = '${jobState}' ${message} ${updateExecTime} " - + "WHERE job_id = ${jobId} and (task_id=${taskId} || ${isAllTask})"; + private ConcurrentMap> analysisJobIdToTaskMap = new ConcurrentHashMap<>(); - private static final String SHOW_JOB_STATE_SQL_TEMPLATE = "SELECT " - + "job_id, catalog_name, db_name, tbl_name, col_name, job_type, " - + "analysis_type, message, last_exec_time_in_ms, state, schedule_type " - + "FROM " + FeConstants.INTERNAL_DB_NAME + "." + StatisticConstants.ANALYSIS_JOB_TABLE; + private StatisticsCache statisticsCache; - // The time field that needs to be displayed - private static final String LAST_EXEC_TIME_IN_MS = "last_exec_time_in_ms"; + private AnalysisTaskExecutor taskExecutor; - private final ConcurrentMap> analysisJobIdToTaskMap; + private final Map analysisTaskInfoMap = Collections.synchronizedMap(new TreeMap<>()); + private final Map analysisJobInfoMap = Collections.synchronizedMap(new TreeMap<>()); - private StatisticsCache statisticsCache; + private final ConcurrentMap ctxToSyncTask = new ConcurrentHashMap<>(); - private final AnalysisTaskExecutor taskExecutor; + public AnalysisManager() { + super(TimeUnit.SECONDS.toMillis(StatisticConstants.ANALYZE_MANAGER_INTERVAL_IN_SECS)); + if (!Env.isCheckpointThread()) { + this.taskScheduler = new AnalysisTaskScheduler(); + this.taskExecutor = new AnalysisTaskExecutor(taskScheduler); + this.statisticsCache = new StatisticsCache(); + taskExecutor.start(); + } + } - private ConcurrentMap ctxToSyncTask = new ConcurrentHashMap<>(); + @Override + protected void runOneCycle() { + clear(); + } - public AnalysisManager() { - analysisJobIdToTaskMap = new ConcurrentHashMap<>(); - this.taskScheduler = new AnalysisTaskScheduler(); - taskExecutor = new AnalysisTaskExecutor(taskScheduler); - this.statisticsCache = new StatisticsCache(); - taskExecutor.start(); + private void clear() { + clearMeta(analysisJobInfoMap, (a) -> + a.scheduleType.equals(ScheduleType.ONCE) + && System.currentTimeMillis() - a.lastExecTimeInMs + > TimeUnit.DAYS.toMillis(StatisticConstants.ANALYSIS_JOB_INFO_EXPIRATION_TIME_IN_DAYS), + (id) -> { + Env.getCurrentEnv().getEditLog().logDeleteAnalysisJob(new AnalyzeDeletionLog(id)); + return null; + }); + clearMeta(analysisTaskInfoMap, (a) -> System.currentTimeMillis() - a.lastExecTimeInMs + > TimeUnit.DAYS.toMillis(StatisticConstants.ANALYSIS_JOB_INFO_EXPIRATION_TIME_IN_DAYS), + (id) -> { + Env.getCurrentEnv().getEditLog().logDeleteAnalysisTask(new AnalyzeDeletionLog(id)); + return null; + }); + } + + private void clearMeta(Map infoMap, Predicate isExpired, + Function writeLog) { + synchronized (infoMap) { + List expired = new ArrayList<>(); + for (Entry entry : infoMap.entrySet()) { + if (isExpired.test(entry.getValue())) { + expired.add(entry.getKey()); + } + } + for (Long k : expired) { + infoMap.remove(k); + writeLog.apply(k); + } + } } public StatisticsCache getStatisticsCache() { return statisticsCache; } + public void createAnalysisJobs(AnalyzeDBStmt analyzeDBStmt) throws DdlException { + DatabaseIf db = analyzeDBStmt.getDb(); + List tbls = db.getTables(); + List analysisInfos = new ArrayList<>(); + db.readLock(); + try { + List analyzeStmts = new ArrayList<>(); + for (TableIf table : tbls) { + TableName tableName = new TableName(analyzeDBStmt.getCtlIf().getName(), db.getFullName(), + table.getName()); + AnalyzeTblStmt analyzeTblStmt = new AnalyzeTblStmt(analyzeDBStmt.getAnalyzeProperties(), tableName, + table.getBaseSchema().stream().map( + Column::getName).collect( + Collectors.toList()), db.getId(), table); + try { + analyzeTblStmt.check(); + } catch (AnalysisException analysisException) { + throw new DdlException(analysisException.getMessage(), analysisException); + } + analyzeStmts.add(analyzeTblStmt); + } + for (AnalyzeTblStmt analyzeTblStmt : analyzeStmts) { + buildAndAssignJob(analyzeTblStmt); + } + sendJobId(analysisInfos); + } finally { + db.readUnlock(); + } + + } + // Each analyze stmt corresponding to an analysis job. - public void createAnalysisJob(AnalyzeStmt stmt) throws DdlException { + public void createAnalysisJob(AnalyzeTblStmt stmt) throws DdlException { + AnalysisInfo jobInfo = buildAndAssignJob(stmt); + if (jobInfo == null) { + return; + } + sendJobId(ImmutableList.of(jobInfo)); + } + + @Nullable + private AnalysisInfo buildAndAssignJob(AnalyzeTblStmt stmt) throws DdlException { if (!StatisticsUtil.statsTblAvailable() && !FeConstants.runningUnitTest) { throw new DdlException("Stats table not available, please make sure your cluster status is normal"); } - AnalysisTaskInfo jobInfo = buildAnalysisJobInfo(stmt); + AnalysisInfo jobInfo = buildAnalysisJobInfo(stmt); if (jobInfo.colToPartitions.isEmpty()) { // No statistics need to be collected or updated - return; + return null; } boolean isSync = stmt.isSync(); @@ -128,8 +203,7 @@ public void createAnalysisJob(AnalyzeStmt stmt) throws DdlException { createTaskForMVIdx(jobInfo, analysisTaskInfos, isSync); createTaskForExternalTable(jobInfo, analysisTaskInfos, isSync); - ConnectContext ctx = ConnectContext.get(); - if (!isSync || ctx.getSessionVariable().enableSaveStatisticsSyncJob) { + if (!isSync) { persistAnalysisJob(jobInfo); analysisJobIdToTaskMap.put(jobInfo.jobId, analysisTaskInfos); } @@ -142,16 +216,16 @@ public void createAnalysisJob(AnalyzeStmt stmt) throws DdlException { if (isSync) { syncExecute(analysisTaskInfos.values()); - return; + return null; } analysisTaskInfos.values().forEach(taskScheduler::schedule); - sendJobId(jobInfo.jobId); + return jobInfo; } // Analysis job created by the system - public void createAnalysisJob(AnalysisTaskInfo info) throws DdlException { - AnalysisTaskInfo jobInfo = buildAnalysisJobInfo(info); + public void createAnalysisJob(AnalysisInfo info) throws DdlException { + AnalysisInfo jobInfo = buildAnalysisJobInfo(info); if (jobInfo.colToPartitions.isEmpty()) { // No statistics need to be collected or updated return; @@ -173,14 +247,24 @@ public void createAnalysisJob(AnalysisTaskInfo info) throws DdlException { analysisTaskInfos.values().forEach(taskScheduler::schedule); } - private void sendJobId(long jobId) { + private void sendJobId(List analysisInfos) { List columns = new ArrayList<>(); + columns.add(new Column("Catalog_Name", ScalarType.createVarchar(1024))); + columns.add(new Column("DB_Name", ScalarType.createVarchar(1024))); + columns.add(new Column("Table_Name", ScalarType.createVarchar(1024))); + columns.add(new Column("Columns", ScalarType.createVarchar(1024))); columns.add(new Column("Job_Id", ScalarType.createVarchar(19))); ShowResultSetMetaData commonResultSetMetaData = new ShowResultSetMetaData(columns); List> resultRows = new ArrayList<>(); - List row = new ArrayList<>(); - row.add(String.valueOf(jobId)); - resultRows.add(row); + for (AnalysisInfo analysisInfo : analysisInfos) { + List row = new ArrayList<>(); + row.add(analysisInfo.catalogName); + row.add(analysisInfo.dbName); + row.add(analysisInfo.tblName); + row.add(analysisInfo.colName); + row.add(String.valueOf(analysisInfo.jobId)); + resultRows.add(row); + } ShowResultSet commonResultSet = new ShowResultSet(commonResultSetMetaData, resultRows); try { ConnectContext.get().getExecutor().sendResultSet(commonResultSet); @@ -204,7 +288,7 @@ private void sendJobId(long jobId) { * TODO Supports incremental collection of statistics from materialized views */ private Map> validateAndGetPartitions(TableIf table, Set columnNames, - AnalysisType analysisType, AnalysisMode analysisMode) throws DdlException { + AnalysisType analysisType, AnalysisMode analysisMode) throws DdlException { long tableId = table.getId(); Set partitionNames = table.getPartitionNames(); @@ -241,7 +325,7 @@ private Map> validateAndGetPartitions(TableIf table, Set partIds.removeAll(invalidPartIds)); // In incremental collection mode, just collect the uncollected partition statistics existColAndPartsForStats.forEach((columnName, partitionIds) -> { @@ -263,8 +347,8 @@ private Map> validateAndGetPartitions(TableIf table, Set 0) { return; } - try { - AnalysisTaskInfoBuilder jobInfoBuilder = new AnalysisTaskInfoBuilder(jobInfo); - AnalysisTaskInfo analysisTaskInfo = jobInfoBuilder.setTaskId(-1).build(); - StatisticsRepository.persistAnalysisTask(analysisTaskInfo); - } catch (Throwable t) { - throw new DdlException(t.getMessage(), t); - } + AnalysisInfoBuilder jobInfoBuilder = new AnalysisInfoBuilder(jobInfo); + AnalysisInfo analysisInfo = jobInfoBuilder.setTaskId(-1).build(); + logCreateAnalysisJob(analysisInfo); } - private void createTaskForMVIdx(AnalysisTaskInfo jobInfo, Map analysisTasks, + private void createTaskForMVIdx(AnalysisInfo jobInfo, Map analysisTasks, boolean isSync) throws DdlException { TableIf table; try { @@ -384,53 +469,59 @@ private void createTaskForMVIdx(AnalysisTaskInfo jobInfo, Map analysisTasks, + private void createTaskForEachColumns(AnalysisInfo jobInfo, Map analysisTasks, boolean isSync) throws DdlException { Map> columnToPartitions = jobInfo.colToPartitions; for (Entry> entry : columnToPartitions.entrySet()) { long indexId = -1; long taskId = Env.getCurrentEnv().getNextId(); String colName = entry.getKey(); - AnalysisTaskInfoBuilder colTaskInfoBuilder = new AnalysisTaskInfoBuilder(jobInfo); + AnalysisInfoBuilder colTaskInfoBuilder = new AnalysisInfoBuilder(jobInfo); if (jobInfo.analysisType != AnalysisType.HISTOGRAM) { - colTaskInfoBuilder.setAnalysisType(AnalysisType.COLUMN); + colTaskInfoBuilder.setAnalysisType(AnalysisType.FUNDAMENTALS); colTaskInfoBuilder.setColToPartitions(Collections.singletonMap(colName, entry.getValue())); } - AnalysisTaskInfo analysisTaskInfo = colTaskInfoBuilder.setColName(colName).setIndexId(indexId) + AnalysisInfo analysisInfo = colTaskInfoBuilder.setColName(colName).setIndexId(indexId) .setTaskId(taskId).build(); - analysisTasks.put(taskId, createTask(analysisTaskInfo)); - if (isSync && !ConnectContext.get().getSessionVariable().enableSaveStatisticsSyncJob) { + analysisTasks.put(taskId, createTask(analysisInfo)); + if (isSync) { continue; } try { - StatisticsRepository.persistAnalysisTask(analysisTaskInfo); + logCreateAnalysisTask(analysisInfo); } catch (Exception e) { throw new DdlException("Failed to create analysis task", e); } } } - private void createTaskForExternalTable(AnalysisTaskInfo jobInfo, - Map analysisTasks, - boolean isSync) throws DdlException { + private void logCreateAnalysisTask(AnalysisInfo analysisInfo) { + Env.getCurrentEnv().getEditLog().logCreateAnalysisTasks(analysisInfo); + analysisTaskInfoMap.put(analysisInfo.taskId, analysisInfo); + } + + private void logCreateAnalysisJob(AnalysisInfo analysisJob) { + Env.getCurrentEnv().getEditLog().logCreateAnalysisJob(analysisJob); + analysisJobInfoMap.put(analysisJob.jobId, analysisJob); + } + + private void createTaskForExternalTable(AnalysisInfo jobInfo, + Map analysisTasks, + boolean isSync) throws DdlException { TableIf table; try { table = StatisticsUtil.findTable(jobInfo.catalogName, jobInfo.dbName, jobInfo.tblName); @@ -441,50 +532,58 @@ private void createTaskForExternalTable(AnalysisTaskInfo jobInfo, if (jobInfo.analysisType == AnalysisType.HISTOGRAM || table.getType() != TableType.HMS_EXTERNAL_TABLE) { return; } - AnalysisTaskInfoBuilder colTaskInfoBuilder = new AnalysisTaskInfoBuilder(jobInfo); + AnalysisInfoBuilder colTaskInfoBuilder = new AnalysisInfoBuilder(jobInfo); long taskId = Env.getCurrentEnv().getNextId(); - AnalysisTaskInfo analysisTaskInfo = colTaskInfoBuilder.setIndexId(-1L) + AnalysisInfo analysisInfo = colTaskInfoBuilder.setIndexId(-1L) .setTaskId(taskId).setExternalTableLevelTask(true).build(); - analysisTasks.put(taskId, createTask(analysisTaskInfo)); + analysisTasks.put(taskId, createTask(analysisInfo)); try { - StatisticsRepository.persistAnalysisTask(analysisTaskInfo); + logCreateAnalysisJob(analysisInfo); } catch (Exception e) { throw new DdlException("Failed to create analysis task", e); } } - public void updateTaskStatus(AnalysisTaskInfo info, AnalysisState jobState, String message, long time) { + public void updateTaskStatus(AnalysisInfo info, AnalysisState jobState, String message, long time) { if (analysisJobIdToTaskMap.get(info.jobId) == null) { return; } - Map params = new HashMap<>(); - params.put("jobState", jobState.toString()); - params.put("message", StringUtils.isNotEmpty(message) ? String.format(", message = '%s'", message) : ""); - params.put("updateExecTime", time == -1 ? "" : ", last_exec_time_in_ms=" + time); - params.put("jobId", String.valueOf(info.jobId)); - params.put("taskId", String.valueOf(info.taskId)); - params.put("isAllTask", "false"); - try { - StatisticsUtil.execUpdate(new StringSubstitutor(params).replace(UPDATE_JOB_STATE_SQL_TEMPLATE)); - } catch (Exception e) { - LOG.warn(String.format("Failed to update state for task: %d, %d", info.jobId, info.taskId), e); - } finally { - info.state = jobState; - if (analysisJobIdToTaskMap.get(info.jobId).values() - .stream().allMatch(t -> t.info.state != null - && t.info.state != AnalysisState.PENDING && t.info.state != AnalysisState.RUNNING)) { - analysisJobIdToTaskMap.remove(info.jobId); - params.put("taskId", String.valueOf(-1)); - try { - StatisticsUtil.execUpdate(new StringSubstitutor(params).replace(UPDATE_JOB_STATE_SQL_TEMPLATE)); - } catch (Exception e) { - LOG.warn(String.format("Failed to update state for job: %s", info.jobId), e); - } + info.state = jobState; + info.message = message; + info.lastExecTimeInMs = time; + logCreateAnalysisTask(info); + + AnalysisInfo job = analysisJobInfoMap.get(info.jobId); + job.lastExecTimeInMs = time; + if (info.state.equals(AnalysisState.RUNNING) && !job.state.equals(AnalysisState.PENDING)) { + job.state = AnalysisState.RUNNING; + Env.getCurrentEnv().getEditLog().logCreateAnalysisTasks(job); + } + boolean allFinished = true; + boolean hasFailure = false; + for (BaseAnalysisTask task : analysisJobIdToTaskMap.get(info.jobId).values()) { + AnalysisInfo taskInfo = task.info; + if (taskInfo.state.equals(AnalysisState.RUNNING) || taskInfo.state.equals(AnalysisState.PENDING)) { + allFinished = false; + break; + } + if (taskInfo.state.equals(AnalysisState.FAILED)) { + hasFailure = true; } } + if (allFinished) { + if (hasFailure) { + job.state = AnalysisState.FAILED; + logCreateAnalysisJob(job); + } else { + job.state = AnalysisState.FINISHED; + logCreateAnalysisJob(job); + } + analysisJobIdToTaskMap.remove(job.jobId); + } } - private void updateTableStats(AnalysisTaskInfo jobInfo) throws Throwable { + private void updateTableStats(AnalysisInfo jobInfo) throws Throwable { Map params = buildTableStatsParams(jobInfo); TableIf tbl = StatisticsUtil.findTable(jobInfo.catalogName, jobInfo.dbName, jobInfo.tblName); @@ -499,11 +598,11 @@ private void updateTableStats(AnalysisTaskInfo jobInfo) throws Throwable { } @SuppressWarnings("rawtypes") - private Map buildTableStatsParams(AnalysisTaskInfo jobInfo) throws Throwable { + private Map buildTableStatsParams(AnalysisInfo jobInfo) throws Throwable { CatalogIf catalog = StatisticsUtil.findCatalog(jobInfo.catalogName); DatabaseIf db = StatisticsUtil.findDatabase(jobInfo.catalogName, jobInfo.dbName); TableIf tbl = StatisticsUtil.findTable(jobInfo.catalogName, jobInfo.dbName, jobInfo.tblName); - String indexId = jobInfo.indexId == null ? "-1" : String.valueOf(jobInfo.indexId); + String indexId = String.valueOf(jobInfo.indexId); String id = StatisticsUtil.constructId(tbl.getId(), indexId); Map commonParams = new HashMap<>(); commonParams.put("id", id); @@ -533,32 +632,15 @@ private void updateOlapTableStats(OlapTable table, Map params) t StatisticsRepository.persistTableStats(tblParams); } - public List> showAnalysisJob(ShowAnalyzeStmt stmt) throws DdlException { - String whereClause = stmt.getWhereClause(); - long limit = stmt.getLimit(); - String executeSql = SHOW_JOB_STATE_SQL_TEMPLATE - + (whereClause.isEmpty() ? "" : " WHERE " + whereClause) - + (limit == -1L ? "" : " LIMIT " + limit); - - List> results = Lists.newArrayList(); - ImmutableList titleNames = stmt.getTitleNames(); - List resultRows = StatisticsUtil.execStatisticQuery(executeSql); - - for (ResultRow resultRow : resultRows) { - List result = Lists.newArrayList(); - for (String column : titleNames) { - String value = resultRow.getColumnValue(column); - if (LAST_EXEC_TIME_IN_MS.equals(column)) { - long timeMillis = Long.parseLong(value); - value = TimeUtils.DATETIME_FORMAT.format( - LocalDateTime.ofInstant(Instant.ofEpochMilli(timeMillis), ZoneId.systemDefault())); - } - result.add(value); - } - results.add(result); - } - - return results; + public List showAnalysisJob(ShowAnalyzeStmt stmt) { + String state = stmt.getStateValue(); + TableName tblName = stmt.getDbTableName(); + return analysisJobInfoMap.values().stream() + .filter(a -> state == null || a.state.equals(AnalysisState.valueOf(state))) + .filter(a -> tblName == null || a.catalogName.equals(tblName.getCtl()) + && a.dbName.equals(tblName.getDb()) && a.tblName.equals(tblName.getTbl())) + .sorted(Comparator.comparingLong(a -> a.jobId)) + .collect(Collectors.toList()); } private void syncExecute(Collection tasks) { @@ -586,37 +668,36 @@ public void dropStats(DropStatsStmt dropStatsStmt) throws DdlException { } public void handleKillAnalyzeStmt(KillAnalysisJobStmt killAnalysisJobStmt) throws DdlException { - Map analysisTaskInfoMap = analysisJobIdToTaskMap.remove(killAnalysisJobStmt.jobId); - if (analysisTaskInfoMap == null) { + Map analysisTaskMap = analysisJobIdToTaskMap.remove(killAnalysisJobStmt.jobId); + if (analysisTaskMap == null) { throw new DdlException("Job not exists or already finished"); } - BaseAnalysisTask anyTask = analysisTaskInfoMap.values().stream().findFirst().orElse(null); + BaseAnalysisTask anyTask = analysisTaskMap.values().stream().findFirst().orElse(null); if (anyTask == null) { return; } checkPriv(anyTask); - for (BaseAnalysisTask taskInfo : analysisTaskInfoMap.values()) { + logKilled(analysisJobInfoMap.get(anyTask.getJobId())); + for (BaseAnalysisTask taskInfo : analysisTaskMap.values()) { taskInfo.markAsKilled(); + logKilled(taskInfo.info); } - Map params = new HashMap<>(); - params.put("jobState", AnalysisState.FAILED.toString()); - params.put("message", ", message = 'Killed by user : " + ConnectContext.get().getQualifiedUser() + "'"); - params.put("updateExecTime", ", last_exec_time_in_ms=" + System.currentTimeMillis()); - params.put("jobId", String.valueOf(killAnalysisJobStmt.jobId)); - params.put("taskId", "'-1'"); - params.put("isAllTask", "true"); - try { - StatisticsUtil.execUpdate(new StringSubstitutor(params).replace(UPDATE_JOB_STATE_SQL_TEMPLATE)); - } catch (Exception e) { - LOG.warn("Failed to update status", e); - } + } + + private void logKilled(AnalysisInfo info) { + info.state = AnalysisState.FAILED; + info.message = "Killed by user: " + ConnectContext.get().getQualifiedUser(); + info.lastExecTimeInMs = System.currentTimeMillis(); + Env.getCurrentEnv().getEditLog().logCreateAnalysisTasks(info); } private void checkPriv(BaseAnalysisTask analysisTask) { - String dbName = analysisTask.db.getFullName(); - String tblName = analysisTask.tbl.getName(); + checkPriv(analysisTask.info); + } + + private void checkPriv(AnalysisInfo analysisInfo) { if (!Env.getCurrentEnv().getAccessManager() - .checkTblPriv(ConnectContext.get(), dbName, tblName, PrivPredicate.SELECT)) { + .checkTblPriv(ConnectContext.get(), analysisInfo.dbName, analysisInfo.tblName, PrivPredicate.SELECT)) { throw new RuntimeException("You need at least SELECT PRIV to corresponding table to kill this analyze" + " job"); } @@ -629,17 +710,33 @@ public void cancelSyncTask(ConnectContext connectContext) { } } - private BaseAnalysisTask createTask(AnalysisTaskInfo analysisTaskInfo) throws DdlException { + private BaseAnalysisTask createTask(AnalysisInfo analysisInfo) throws DdlException { try { - TableIf table = StatisticsUtil.findTable(analysisTaskInfo.catalogName, - analysisTaskInfo.dbName, analysisTaskInfo.tblName); - return table.createAnalysisTask(analysisTaskInfo); + TableIf table = StatisticsUtil.findTable(analysisInfo.catalogName, + analysisInfo.dbName, analysisInfo.tblName); + return table.createAnalysisTask(analysisInfo); } catch (Throwable t) { LOG.warn("Failed to find table", t); throw new DdlException("Error when trying to find table", t); } } + public void replayCreateAnalysisJob(AnalysisInfo taskInfo) { + this.analysisJobInfoMap.put(taskInfo.jobId, taskInfo); + } + + public void replayCreateAnalysisTask(AnalysisInfo jobInfo) { + this.analysisTaskInfoMap.put(jobInfo.taskId, jobInfo); + } + + public void replayDeleteAnalysisJob(AnalyzeDeletionLog log) { + this.analysisJobInfoMap.remove(log.id); + } + + public void replayDeleteAnalysisTask(AnalyzeDeletionLog log) { + this.analysisTaskInfoMap.remove(log.id); + } + private static class SyncTaskCollection { public volatile boolean cancelled; @@ -675,10 +772,57 @@ public void execute() { } private void updateSyncTaskStatus(BaseAnalysisTask task, AnalysisState state) { - if (ConnectContext.get().getSessionVariable().enableSaveStatisticsSyncJob) { - Env.getCurrentEnv().getAnalysisManager() - .updateTaskStatus(task.info, state, "", System.currentTimeMillis()); - } + Env.getCurrentEnv().getAnalysisManager() + .updateTaskStatus(task.info, state, "", System.currentTimeMillis()); } } + + public List findAutomaticAnalysisJobs() { + synchronized (analysisJobInfoMap) { + return analysisJobInfoMap.values().stream() + .filter(a -> + a.scheduleType.equals(ScheduleType.AUTOMATIC) + && (!(a.state.equals(AnalysisState.RUNNING) + || a.state.equals(AnalysisState.PENDING))) + && System.currentTimeMillis() - a.lastExecTimeInMs + > TimeUnit.MINUTES.toMillis(Config.auto_check_statistics_in_minutes)) + .collect(Collectors.toList()); + } + } + + public List findPeriodicJobs() { + synchronized (analysisJobInfoMap) { + return analysisJobInfoMap.values().stream() + .filter(a -> a.scheduleType.equals(ScheduleType.PERIOD) + && (a.state.equals(AnalysisState.FINISHED)) + && System.currentTimeMillis() - a.lastExecTimeInMs > a.periodTimeInMs) + .collect(Collectors.toList()); + } + } + + public List findTasks(long jobId) { + synchronized (analysisTaskInfoMap) { + return analysisTaskInfoMap.values().stream().filter(i -> i.jobId == jobId).collect(Collectors.toList()); + } + } + + public void removeAll(List analysisInfos) { + for (AnalysisInfo analysisInfo : analysisInfos) { + analysisTaskInfoMap.remove(analysisInfo.taskId); + } + } + + public void dropAnalyzeJob(DropAnalyzeJobStmt analyzeJobStmt) throws DdlException { + AnalysisInfo jobInfo = analysisJobInfoMap.get(analyzeJobStmt.getJobId()); + if (jobInfo == null) { + throw new DdlException(String.format("Analyze job [%d] not exists", jobInfo.jobId)); + } + checkPriv(jobInfo); + long jobId = analyzeJobStmt.getJobId(); + AnalyzeDeletionLog analyzeDeletionLog = new AnalyzeDeletionLog(jobId); + Env.getCurrentEnv().getEditLog().logDeleteAnalysisJob(analyzeDeletionLog); + replayDeleteAnalysisJob(analyzeDeletionLog); + removeAll(findTasks(jobId)); + } + } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java index b1ccbf68b510be..1fc97c10dc1395 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java @@ -24,8 +24,8 @@ import org.apache.doris.catalog.TableIf; import org.apache.doris.datasource.CatalogIf; import org.apache.doris.qe.StmtExecutor; -import org.apache.doris.statistics.AnalysisTaskInfo.AnalysisMethod; -import org.apache.doris.statistics.AnalysisTaskInfo.AnalysisType; +import org.apache.doris.statistics.AnalysisInfo.AnalysisMethod; +import org.apache.doris.statistics.AnalysisInfo.AnalysisType; import com.google.common.annotations.VisibleForTesting; import org.apache.logging.log4j.LogManager; @@ -87,7 +87,7 @@ public abstract class BaseAnalysisTask { + " ${internalDB}.${columnStatTbl}.part_id IS NOT NULL" + " ) t1, \n"; - protected AnalysisTaskInfo info; + protected AnalysisInfo info; protected CatalogIf catalog; @@ -108,7 +108,7 @@ public BaseAnalysisTask() { } - public BaseAnalysisTask(AnalysisTaskInfo info) { + public BaseAnalysisTask(AnalysisInfo info) { this.info = info; init(info); } @@ -122,7 +122,7 @@ protected void initUnsupportedType() { unsupportedType.add(PrimitiveType.STRUCT); } - private void init(AnalysisTaskInfo info) { + private void init(AnalysisInfo info) { initUnsupportedType(); catalog = Env.getCurrentEnv().getCatalogMgr().getCatalog(info.catalogName); if (catalog == null) { @@ -146,7 +146,7 @@ private void init(AnalysisTaskInfo info) { if (info.externalTableLevelTask) { return; } - if (info.analysisType != null && (info.analysisType.equals(AnalysisType.COLUMN) + if (info.analysisType != null && (info.analysisType.equals(AnalysisType.FUNDAMENTALS) || info.analysisType.equals(AnalysisType.HISTOGRAM))) { col = tbl.getColumn(info.colName); if (col == null) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java index bb3c07300258d7..b5bee03f0b00ab 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java @@ -26,7 +26,7 @@ public class HMSAnalysisTask extends BaseAnalysisTask { protected HMSExternalTable table; - public HMSAnalysisTask(AnalysisTaskInfo info) { + public HMSAnalysisTask(AnalysisInfo info) { super(info); table = (HMSExternalTable) tbl; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/HistogramTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/HistogramTask.java index e8186fff25eec7..fdadff6c053b6a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/HistogramTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/HistogramTask.java @@ -19,7 +19,7 @@ import org.apache.doris.catalog.Env; import org.apache.doris.common.FeConstants; -import org.apache.doris.statistics.AnalysisTaskInfo.AnalysisMethod; +import org.apache.doris.statistics.AnalysisInfo.AnalysisMethod; import org.apache.doris.statistics.util.StatisticsUtil; import com.google.common.annotations.VisibleForTesting; @@ -53,7 +53,7 @@ public HistogramTask() { super(); } - public HistogramTask(AnalysisTaskInfo info) { + public HistogramTask(AnalysisInfo info) { super(info); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/HiveAnalysisTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/HiveAnalysisTask.java index d9469c0024f31c..0e358857ca15d3 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/HiveAnalysisTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/HiveAnalysisTask.java @@ -84,7 +84,7 @@ public class HiveAnalysisTask extends HMSAnalysisTask { private final boolean isTableLevelTask; - public HiveAnalysisTask(AnalysisTaskInfo info) { + public HiveAnalysisTask(AnalysisInfo info) { super(info); isTableLevelTask = info.externalTableLevelTask; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/IcebergAnalysisTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/IcebergAnalysisTask.java index 7246c3e71735a9..105ef758f0f076 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/IcebergAnalysisTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/IcebergAnalysisTask.java @@ -45,7 +45,7 @@ public class IcebergAnalysisTask extends HMSAnalysisTask { private long dataSize = 0; private long numNulls = 0; - public IcebergAnalysisTask(AnalysisTaskInfo info) { + public IcebergAnalysisTask(AnalysisInfo info) { super(info); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/MVAnalysisTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/MVAnalysisTask.java index a3bac1bbc8bb3e..701f3109b8a631 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/MVAnalysisTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/MVAnalysisTask.java @@ -62,7 +62,7 @@ public class MVAnalysisTask extends BaseAnalysisTask { private OlapTable olapTable; - public MVAnalysisTask(AnalysisTaskInfo info) { + public MVAnalysisTask(AnalysisInfo info) { super(info); init(); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java index ec46300dd610dd..a65553a8384980 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java @@ -53,7 +53,7 @@ public OlapAnalysisTask() { super(); } - public OlapAnalysisTask(AnalysisTaskInfo info) { + public OlapAnalysisTask(AnalysisInfo info) { super(info); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java index 8b358bf74d5f7c..93072c750f33d4 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java @@ -72,4 +72,6 @@ public class StatisticConstants { */ public static final int TABLE_STATS_HEALTH_THRESHOLD = 80; + public static final int ANALYZE_MANAGER_INTERVAL_IN_SECS = 60; + } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoAnalyzer.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoAnalyzer.java index 5cf291de4b5f2b..8128d3a7d3eb0c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoAnalyzer.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoAnalyzer.java @@ -25,13 +25,11 @@ import org.apache.doris.common.Config; import org.apache.doris.common.DdlException; import org.apache.doris.common.util.MasterDaemon; -import org.apache.doris.statistics.util.InternalQueryResult.ResultRow; import org.apache.doris.statistics.util.StatisticsUtil; import com.google.common.collect.Maps; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; -import org.apache.thrift.TException; import java.util.Collection; import java.util.HashSet; @@ -47,7 +45,7 @@ public class StatisticsAutoAnalyzer extends MasterDaemon { private static final Logger LOG = LogManager.getLogger(StatisticsAutoAnalyzer.class); public StatisticsAutoAnalyzer() { - super("Automatic Analyzer", TimeUnit.SECONDS.toMillis(Config.auto_check_statistics_in_sec)); + super("Automatic Analyzer", TimeUnit.MINUTES.toMillis(Config.auto_check_statistics_in_minutes)); } @Override @@ -69,31 +67,23 @@ public void autoAnalyzeStats(DdlStmt ddlStmt) { } private void analyzePeriodically() { - List resultRows = StatisticsRepository.fetchPeriodicAnalysisJobs(); - if (resultRows.isEmpty()) { - return; - } try { AnalysisManager analysisManager = Env.getCurrentEnv().getAnalysisManager(); - List jobInfos = StatisticsUtil.deserializeToAnalysisJob(resultRows); - for (AnalysisTaskInfo jobInfo : jobInfos) { + List jobInfos = analysisManager.findPeriodicJobs(); + for (AnalysisInfo jobInfo : jobInfos) { analysisManager.createAnalysisJob(jobInfo); } - } catch (TException | DdlException e) { + } catch (DdlException e) { LOG.warn("Failed to periodically analyze the statistics." + e); } } private void analyzeAutomatically() { - List resultRows = StatisticsRepository.fetchAutomaticAnalysisJobs(); - if (resultRows.isEmpty()) { - return; - } try { AnalysisManager analysisManager = Env.getCurrentEnv().getAnalysisManager(); - List jobInfos = StatisticsUtil.deserializeToAnalysisJob(resultRows); - for (AnalysisTaskInfo jobInfo : jobInfos) { - AnalysisTaskInfo checkedJobInfo = checkAutomaticJobInfo(jobInfo); + List jobInfos = analysisManager.findAutomaticAnalysisJobs(); + for (AnalysisInfo jobInfo : jobInfos) { + AnalysisInfo checkedJobInfo = checkAutomaticJobInfo(jobInfo); if (checkedJobInfo != null) { analysisManager.createAnalysisJob(checkedJobInfo); } @@ -123,7 +113,7 @@ private void analyzeAutomatically() { * @return new job info after check * @throws Throwable failed to check */ - private AnalysisTaskInfo checkAutomaticJobInfo(AnalysisTaskInfo jobInfo) throws Throwable { + private AnalysisInfo checkAutomaticJobInfo(AnalysisInfo jobInfo) throws Throwable { long lastExecTimeInMs = jobInfo.lastExecTimeInMs; TableIf table = StatisticsUtil .findTable(jobInfo.catalogName, jobInfo.dbName, jobInfo.tblName); @@ -206,7 +196,7 @@ private void checkNewPartitions(TableIf table, Set needRunPartitions, lo ); } - private AnalysisTaskInfo getAnalysisJobInfo(AnalysisTaskInfo jobInfo, TableIf table, + private AnalysisInfo getAnalysisJobInfo(AnalysisInfo jobInfo, TableIf table, Set needRunPartitions) { Map> newColToPartitions = Maps.newHashMap(); Map> colToPartitions = jobInfo.colToPartitions; @@ -216,7 +206,7 @@ private AnalysisTaskInfo getAnalysisJobInfo(AnalysisTaskInfo jobInfo, TableIf ta newColToPartitions.put(colName, needRunPartitions); } }); - return new AnalysisTaskInfoBuilder(jobInfo) + return new AnalysisInfoBuilder(jobInfo) .setColToPartitions(newColToPartitions).build(); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCleaner.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCleaner.java index b23310b240febc..d68eb39dad45c6 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCleaner.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCleaner.java @@ -88,7 +88,6 @@ public synchronized void clear() { } clearStats(colStatsTbl); clearStats(histStatsTbl); - clearJobTbl(); } private void clearStats(OlapTable statsTbl) { @@ -101,11 +100,6 @@ private void clearStats(OlapTable statsTbl) { } while (!expiredStats.isEmpty()); } - private void clearJobTbl() { - clearJobTbl(StatisticsRepository::fetchExpiredAutoJob, true); - clearJobTbl(StatisticsRepository::fetchExpiredOnceJobs, false); - } - private void clearJobTbl(BiFunction> fetchFunc, boolean taskOnly) { List jobIds = null; long offset = 0; diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java index ae16445942f2c6..63e5d1b63446c4 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java @@ -45,7 +45,6 @@ import java.util.Map; import java.util.Set; import java.util.StringJoiner; -import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; /** @@ -64,9 +63,6 @@ public class StatisticsRepository { private static final String FULL_QUALIFIED_COLUMN_HISTOGRAM_NAME = FULL_QUALIFIED_DB_NAME + "." + "`" + StatisticConstants.HISTOGRAM_TBL_NAME + "`"; - private static final String FULL_QUALIFIED_ANALYSIS_JOB_TABLE_NAME = FULL_QUALIFIED_DB_NAME + "." - + "`" + StatisticConstants.ANALYSIS_JOB_TABLE + "`"; - private static final String FETCH_COLUMN_STATISTIC_TEMPLATE = "SELECT * FROM " + FULL_QUALIFIED_COLUMN_STATISTICS_NAME + " WHERE `id` = '${id}'"; @@ -79,13 +75,6 @@ public class StatisticsRepository { + FULL_QUALIFIED_COLUMN_HISTOGRAM_NAME + " WHERE `id` = '${id}'"; - private static final String PERSIST_ANALYSIS_TASK_SQL_TEMPLATE = - "INSERT INTO " + FULL_QUALIFIED_ANALYSIS_JOB_TABLE_NAME - + " VALUES(${jobId}, ${taskId}, '${catalogName}', '${dbName}', '${tblName}', " - + "'${colName}', '${indexId}', '${colPartitions}', '${jobType}', '${analysisType}', " - + "'${analysisMode}', '${analysisMethod}', '${scheduleType}', '${state}', ${samplePercent}, " - + "${sampleRows}, ${maxBucketNum}, ${periodTimeInMs}, ${lastExecTimeInMs}, '${message}')"; - private static final String INSERT_INTO_COLUMN_STATISTICS = "INSERT INTO " + FULL_QUALIFIED_COLUMN_STATISTICS_NAME + " VALUES('${id}', ${catalogId}, ${dbId}, ${tblId}, '${idxId}'," + "'${colId}', ${partId}, ${count}, ${ndv}, ${nullCount}, '${min}', '${max}', ${dataSize}, NOW())"; @@ -93,22 +82,6 @@ public class StatisticsRepository { private static final String DROP_TABLE_STATISTICS_TEMPLATE = "DELETE FROM " + FeConstants.INTERNAL_DB_NAME + "." + "${tblName}" + " WHERE ${condition}"; - private static final String FIND_EXPIRED_ONCE_JOBS = "SELECT job_id FROM " - + FULL_QUALIFIED_ANALYSIS_JOB_TABLE_NAME - + " WHERE task_id = -1 AND ${now} - last_exec_time_in_ms > " - + TimeUnit.HOURS.toMillis(StatisticConstants.ANALYSIS_JOB_INFO_EXPIRATION_TIME_IN_DAYS) - + " AND schedule_type = 'ONCE'" - + " ORDER BY last_exec_time_in_ms" - + " LIMIT ${limit} OFFSET ${offset}"; - - private static final String FIND_EXPIRED_AUTO_JOBS = "SELECT DISTINCT(job_id) FROM (SELECT job_id FROM " - + FULL_QUALIFIED_ANALYSIS_JOB_TABLE_NAME - + "WHERE task_id != -1 AND ${now} - last_exec_time_in_ms > " - + TimeUnit.HOURS.toMillis(StatisticConstants.ANALYSIS_JOB_INFO_EXPIRATION_TIME_IN_DAYS) - + " AND schedule_type = 'PERIOD' OR schedule_type = 'AUTOMATIC'" - + " ORDER BY last_exec_time_in_ms" - + " LIMIT ${limit} OFFSET ${offset}) t"; - private static final String FETCH_RECENT_STATS_UPDATED_COL = "SELECT * FROM " + FeConstants.INTERNAL_DB_NAME + "." + StatisticConstants.STATISTIC_TBL_NAME @@ -127,20 +100,6 @@ public class StatisticsRepository { + " WHERE tbl_id = ${tblId}" + " AND part_id IS NOT NULL"; - private static final String FETCH_PERIODIC_ANALYSIS_JOB_TEMPLATE = "SELECT * FROM " - + FULL_QUALIFIED_ANALYSIS_JOB_TABLE_NAME - + " WHERE task_id = -1 " - + " AND schedule_type = 'PERIOD' " - + " AND state = 'FINISHED' " - + " AND (${currentTimeStamp} - last_exec_time_in_ms >= period_time_in_ms)"; - - private static final String FETCH_AUTOMATIC_ANALYSIS_JOB_SQL = "SELECT * FROM " - + FULL_QUALIFIED_ANALYSIS_JOB_TABLE_NAME - + " WHERE task_id = -1 " - + " AND schedule_type = 'AUTOMATIC' " - + " AND state = 'FINISHED' " - + " AND last_exec_time_in_ms > 0"; - private static final String PERSIST_TABLE_STATS_TEMPLATE = "INSERT INTO " + FeConstants.INTERNAL_DB_NAME + "." + StatisticConstants.ANALYSIS_TBL_NAME + " VALUES('${id}', ${catalogId}, ${dbId}, ${tblId}, ${indexId}, ${partId}, ${rowCount}," @@ -270,32 +229,6 @@ public static void dropStatisticsByPartId(Set partIds, String statsTblName } } - public static void persistAnalysisTask(AnalysisTaskInfo analysisTaskInfo) throws Exception { - Map params = new HashMap<>(); - params.put("jobId", String.valueOf(analysisTaskInfo.jobId)); - params.put("taskId", String.valueOf(analysisTaskInfo.taskId)); - params.put("catalogName", analysisTaskInfo.catalogName); - params.put("dbName", analysisTaskInfo.dbName); - params.put("tblName", analysisTaskInfo.tblName); - params.put("colName", analysisTaskInfo.colName == null ? "" : analysisTaskInfo.colName); - params.put("indexId", analysisTaskInfo.indexId == null ? "-1" : String.valueOf(analysisTaskInfo.indexId)); - params.put("colPartitions", analysisTaskInfo.getColToPartitionStr()); - params.put("jobType", analysisTaskInfo.jobType.toString()); - params.put("analysisType", analysisTaskInfo.analysisType.toString()); - params.put("analysisMode", analysisTaskInfo.analysisMode.toString()); - params.put("analysisMethod", analysisTaskInfo.analysisMethod.toString()); - params.put("scheduleType", analysisTaskInfo.scheduleType.toString()); - params.put("state", analysisTaskInfo.state.toString()); - params.put("samplePercent", String.valueOf(analysisTaskInfo.samplePercent)); - params.put("sampleRows", String.valueOf(analysisTaskInfo.sampleRows)); - params.put("maxBucketNum", String.valueOf(analysisTaskInfo.maxBucketNum)); - params.put("periodTimeInMs", String.valueOf(analysisTaskInfo.periodTimeInMs)); - params.put("lastExecTimeInMs", String.valueOf(analysisTaskInfo.lastExecTimeInMs)); - params.put("message", ""); - StatisticsUtil.execUpdate( - new StringSubstitutor(params).replace(PERSIST_ANALYSIS_TASK_SQL_TEMPLATE)); - } - public static void persistTableStats(Map params) throws Exception { StatisticsUtil.execUpdate(PERSIST_TABLE_STATS_TEMPLATE, params); } @@ -390,22 +323,6 @@ public static List fetchStatsFullName(long limit, long offset) { return StatisticsUtil.execStatisticQuery(new StringSubstitutor(params).replace(FETCH_STATS_FULL_NAME)); } - public static List fetchExpiredOnceJobs(long limit, long offset) { - Map params = new HashMap<>(); - params.put("limit", String.valueOf(limit)); - params.put("offset", String.valueOf(offset)); - params.put("now", String.valueOf(System.currentTimeMillis())); - return StatisticsUtil.execStatisticQuery(new StringSubstitutor(params).replace(FIND_EXPIRED_ONCE_JOBS)); - } - - public static List fetchExpiredAutoJob(long limit, long offset) { - Map params = new HashMap<>(); - params.put("limit", String.valueOf(limit)); - params.put("offset", String.valueOf(offset)); - params.put("now", String.valueOf(System.currentTimeMillis())); - return StatisticsUtil.execStatisticQuery(new StringSubstitutor(params).replace(FIND_EXPIRED_AUTO_JOBS)); - } - public static Map> fetchColAndPartsForStats(long tblId) { Map params = Maps.newHashMap(); params.put("tblId", String.valueOf(tblId)); @@ -419,39 +336,20 @@ public static Map> fetchColAndPartsForStats(long tblId) { try { String colId = row.getColumnValue("col_id"); String partId = row.getColumnValue("part_id"); + if (partId == null) { + return; + } columnToPartitions.computeIfAbsent(colId, k -> new HashSet<>()).add(Long.valueOf(partId)); } catch (NumberFormatException | DdlException e) { - LOG.warn("Failed to obtain the column and partition for statistics.{}", - e.getMessage()); + LOG.warn("Failed to obtain the column and partition for statistics.", + e); } }); return columnToPartitions; } - public static List fetchPeriodicAnalysisJobs() { - ImmutableMap params = ImmutableMap - .of("currentTimeStamp", String.valueOf(System.currentTimeMillis())); - try { - StringSubstitutor stringSubstitutor = new StringSubstitutor(params); - String sql = stringSubstitutor.replace(FETCH_PERIODIC_ANALYSIS_JOB_TEMPLATE); - return StatisticsUtil.execStatisticQuery(sql); - } catch (Exception e) { - LOG.warn("Failed to update status", e); - return Collections.emptyList(); - } - } - - public static List fetchAutomaticAnalysisJobs() { - try { - return StatisticsUtil.execStatisticQuery(FETCH_AUTOMATIC_ANALYSIS_JOB_SQL); - } catch (Exception e) { - LOG.warn("Failed to update status", e); - return Collections.emptyList(); - } - } - public static TableStatistic fetchTableLevelStats(long tblId) throws DdlException { ImmutableMap params = ImmutableMap .of("tblId", String.valueOf(tblId)); diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java index f5a7485f2fbd04..802c6dffee31ad 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java @@ -49,7 +49,7 @@ import org.apache.doris.qe.ConnectContext; import org.apache.doris.qe.SessionVariable; import org.apache.doris.qe.StmtExecutor; -import org.apache.doris.statistics.AnalysisTaskInfo; +import org.apache.doris.statistics.AnalysisInfo; import org.apache.doris.statistics.ColumnStatistic; import org.apache.doris.statistics.Histogram; import org.apache.doris.statistics.StatisticConstants; @@ -114,13 +114,13 @@ public static void execUpdate(String sql) throws Exception { } } - public static List deserializeToAnalysisJob(List resultBatches) + public static List deserializeToAnalysisJob(List resultBatches) throws TException { if (CollectionUtils.isEmpty(resultBatches)) { return Collections.emptyList(); } return resultBatches.stream() - .map(AnalysisTaskInfo::fromResultRow) + .map(AnalysisInfo::fromResultRow) .collect(Collectors.toList()); } @@ -316,10 +316,14 @@ public static Column findColumn(String catalogName, String dbName, String tblNam * Throw RuntimeException if table not exists. */ @SuppressWarnings({"unchecked", "rawtypes"}) - public static TableIf findTable(String catalogName, String dbName, String tblName) throws Throwable { - DatabaseIf db = findDatabase(catalogName, dbName); - return db.getTableOrException(tblName, - t -> new RuntimeException("Table: " + t + " not exists")); + public static TableIf findTable(String catalogName, String dbName, String tblName) { + try { + DatabaseIf db = findDatabase(catalogName, dbName); + return db.getTableOrException(tblName, + t -> new RuntimeException("Table: " + t + " not exists")); + } catch (Throwable t) { + throw new RuntimeException("Table: `" + catalogName + "." + dbName + "." + tblName + "` not exists"); + } } /** @@ -363,9 +367,6 @@ public static boolean statsTblAvailable() { .findTable(InternalCatalog.INTERNAL_CATALOG_NAME, dbName, StatisticConstants.HISTOGRAM_TBL_NAME)); - statsTbls.add((OlapTable) StatisticsUtil.findTable(InternalCatalog.INTERNAL_CATALOG_NAME, - dbName, - StatisticConstants.ANALYSIS_JOB_TABLE)); } catch (Throwable t) { return false; } diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisJobTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisJobTest.java index 5e21efb47ce032..e8984cd20935df 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisJobTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisJobTest.java @@ -17,16 +17,15 @@ package org.apache.doris.statistics; -import org.apache.doris.catalog.Env; import org.apache.doris.catalog.InternalSchemaInitializer; import org.apache.doris.common.FeConstants; import org.apache.doris.qe.AutoCloseConnectContext; import org.apache.doris.qe.ConnectContext; import org.apache.doris.qe.StmtExecutor; -import org.apache.doris.statistics.AnalysisTaskInfo.AnalysisMethod; -import org.apache.doris.statistics.AnalysisTaskInfo.AnalysisMode; -import org.apache.doris.statistics.AnalysisTaskInfo.AnalysisType; -import org.apache.doris.statistics.AnalysisTaskInfo.JobType; +import org.apache.doris.statistics.AnalysisInfo.AnalysisMethod; +import org.apache.doris.statistics.AnalysisInfo.AnalysisMode; +import org.apache.doris.statistics.AnalysisInfo.AnalysisType; +import org.apache.doris.statistics.AnalysisInfo.JobType; import org.apache.doris.statistics.util.StatisticsUtil; import org.apache.doris.utframe.TestWithFeService; @@ -54,8 +53,6 @@ protected void runBeforeAll() throws Exception { + "DISTRIBUTED BY HASH(col3)\n" + "BUCKETS 1\n" + "PROPERTIES(\n" + " \"replication_num\"=\"1\"\n" + ");"); - InternalSchemaInitializer storageInitializer = new InternalSchemaInitializer(); - Env.getCurrentEnv().createTable(storageInitializer.buildAnalysisJobTblStmt()); } catch (Exception e) { throw new RuntimeException(e); } @@ -116,12 +113,12 @@ public void execUpdate(String sql) throws Exception { }; HashMap> colToPartitions = Maps.newHashMap(); colToPartitions.put("col1", Collections.singleton("t1")); - AnalysisTaskInfo analysisJobInfo = new AnalysisTaskInfoBuilder().setJobId(0).setTaskId(0) + AnalysisInfo analysisJobInfo = new AnalysisInfoBuilder().setJobId(0).setTaskId(0) .setCatalogName("internal").setDbName("default_cluster:analysis_job_test").setTblName("t1") .setColName("col1").setJobType(JobType.MANUAL) .setAnalysisMode(AnalysisMode.FULL) .setAnalysisMethod(AnalysisMethod.FULL) - .setAnalysisType(AnalysisType.COLUMN) + .setAnalysisType(AnalysisType.FUNDAMENTALS) .setColToPartitions(colToPartitions) .build(); new OlapAnalysisTask(analysisJobInfo).execute(); diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisTaskExecutorTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisTaskExecutorTest.java index 0974b438fe1e01..c2b355067d14da 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisTaskExecutorTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisTaskExecutorTest.java @@ -17,13 +17,12 @@ package org.apache.doris.statistics; -import org.apache.doris.catalog.Env; import org.apache.doris.catalog.InternalSchemaInitializer; import org.apache.doris.common.jmockit.Deencapsulation; -import org.apache.doris.statistics.AnalysisTaskInfo.AnalysisMethod; -import org.apache.doris.statistics.AnalysisTaskInfo.AnalysisMode; -import org.apache.doris.statistics.AnalysisTaskInfo.AnalysisType; -import org.apache.doris.statistics.AnalysisTaskInfo.JobType; +import org.apache.doris.statistics.AnalysisInfo.AnalysisMethod; +import org.apache.doris.statistics.AnalysisInfo.AnalysisMode; +import org.apache.doris.statistics.AnalysisInfo.AnalysisType; +import org.apache.doris.statistics.AnalysisInfo.JobType; import org.apache.doris.statistics.util.BlockingCounter; import org.apache.doris.utframe.TestWithFeService; @@ -56,8 +55,6 @@ protected void runBeforeAll() throws Exception { + "DISTRIBUTED BY HASH(col3)\n" + "BUCKETS 1\n" + "PROPERTIES(\n" + " \"replication_num\"=\"1\"\n" + ");"); - InternalSchemaInitializer storageInitializer = new InternalSchemaInitializer(); - Env.getCurrentEnv().createTable(storageInitializer.buildAnalysisJobTblStmt()); } catch (Exception e) { throw new RuntimeException(e); } @@ -65,12 +62,12 @@ protected void runBeforeAll() throws Exception { @Test public void testExpiredJobCancellation() throws Exception { - AnalysisTaskInfo analysisJobInfo = new AnalysisTaskInfoBuilder().setJobId(0).setTaskId(0) + AnalysisInfo analysisJobInfo = new AnalysisInfoBuilder().setJobId(0).setTaskId(0) .setCatalogName("internal").setDbName("default_cluster:analysis_job_test").setTblName("t1") .setColName("col1").setJobType(JobType.MANUAL) .setAnalysisMode(AnalysisMode.FULL) .setAnalysisMethod(AnalysisMethod.FULL) - .setAnalysisType(AnalysisType.COLUMN) + .setAnalysisType(AnalysisType.FUNDAMENTALS) .build(); OlapAnalysisTask analysisJob = new OlapAnalysisTask(analysisJobInfo); @@ -100,15 +97,15 @@ public void testTaskExecution() throws Exception { AnalysisTaskExecutor analysisTaskExecutor = new AnalysisTaskExecutor(analysisTaskScheduler); HashMap> colToPartitions = Maps.newHashMap(); colToPartitions.put("col1", Collections.singleton("t1")); - AnalysisTaskInfo analysisTaskInfo = new AnalysisTaskInfoBuilder().setJobId(0).setTaskId(0) + AnalysisInfo analysisInfo = new AnalysisInfoBuilder().setJobId(0).setTaskId(0) .setCatalogName("internal").setDbName("default_cluster:analysis_job_test").setTblName("t1") .setColName("col1").setJobType(JobType.MANUAL) .setAnalysisMode(AnalysisMode.FULL) .setAnalysisMethod(AnalysisMethod.FULL) - .setAnalysisType(AnalysisType.COLUMN) + .setAnalysisType(AnalysisType.FUNDAMENTALS) .setColToPartitions(colToPartitions) .build(); - OlapAnalysisTask task = new OlapAnalysisTask(analysisTaskInfo); + OlapAnalysisTask task = new OlapAnalysisTask(analysisInfo); new MockUp() { @Mock public synchronized BaseAnalysisTask getPendingTasks() { @@ -117,7 +114,7 @@ public synchronized BaseAnalysisTask getPendingTasks() { }; new MockUp() { @Mock - public void updateTaskStatus(AnalysisTaskInfo info, AnalysisState jobState, String message, long time) {} + public void updateTaskStatus(AnalysisInfo info, AnalysisState jobState, String message, long time) {} }; new Expectations() { { diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/HistogramTaskTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/HistogramTaskTest.java index c81c16ad60da3e..8ba2f3eb9a28dc 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/HistogramTaskTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/HistogramTaskTest.java @@ -21,10 +21,10 @@ import org.apache.doris.common.FeConstants; import org.apache.doris.common.jmockit.Deencapsulation; import org.apache.doris.qe.StmtExecutor; -import org.apache.doris.statistics.AnalysisTaskInfo.AnalysisMethod; -import org.apache.doris.statistics.AnalysisTaskInfo.AnalysisMode; -import org.apache.doris.statistics.AnalysisTaskInfo.AnalysisType; -import org.apache.doris.statistics.AnalysisTaskInfo.JobType; +import org.apache.doris.statistics.AnalysisInfo.AnalysisMethod; +import org.apache.doris.statistics.AnalysisInfo.AnalysisMode; +import org.apache.doris.statistics.AnalysisInfo.AnalysisType; +import org.apache.doris.statistics.AnalysisInfo.JobType; import org.apache.doris.system.SystemInfoService; import org.apache.doris.utframe.TestWithFeService; @@ -101,7 +101,7 @@ public void test1TaskCreation() throws Exception { @Test public void test2TaskExecution() throws Exception { AnalysisTaskExecutor analysisTaskExecutor = new AnalysisTaskExecutor(analysisTaskScheduler); - AnalysisTaskInfo analysisTaskInfo = new AnalysisTaskInfoBuilder() + AnalysisInfo analysisInfo = new AnalysisInfoBuilder() .setJobId(0).setTaskId(0).setCatalogName("internal") .setDbName(SystemInfoService.DEFAULT_CLUSTER + ":" + "histogram_task_test").setTblName("t1") .setColName("col1").setJobType(JobType.MANUAL) @@ -109,7 +109,7 @@ public void test2TaskExecution() throws Exception { .setAnalysisMethod(AnalysisMethod.FULL) .setAnalysisType(AnalysisType.HISTOGRAM) .build(); - HistogramTask task = new HistogramTask(analysisTaskInfo); + HistogramTask task = new HistogramTask(analysisInfo); new MockUp() { @Mock @@ -119,7 +119,7 @@ public synchronized BaseAnalysisTask getPendingTasks() { }; new MockUp() { @Mock - public void updateTaskStatus(AnalysisTaskInfo info, AnalysisState jobState, String message, long time) {} + public void updateTaskStatus(AnalysisInfo info, AnalysisState jobState, String message, long time) {} }; new Expectations() { { diff --git a/tools/tpch-tools/conf/doris-cluster.conf b/tools/tpch-tools/conf/doris-cluster.conf index 9417bcb9e0ae87..21fe5c09ff3954 100644 --- a/tools/tpch-tools/conf/doris-cluster.conf +++ b/tools/tpch-tools/conf/doris-cluster.conf @@ -16,7 +16,7 @@ # under the License. # Any of FE host -export FE_HOST='127.0.0.1' +export FE_HOST='172.16.1.0' # http_port in fe.conf export FE_HTTP_PORT=8030 # query_port in fe.conf @@ -26,4 +26,4 @@ export USER='root' # Doris password export PASSWORD='' # The database where TPC-H tables located -export DB='tpch' +export DB='tpch1G' From 4f90a20b968cd7b2b6429f9278ab28beee2cbc15 Mon Sep 17 00:00:00 2001 From: kikyo Date: Thu, 1 Jun 2023 11:45:33 +0800 Subject: [PATCH 2/2] check type --- docs/en/docs/query-acceleration/statistics.md | 74 ++++++------------- .../docs/query-acceleration/statistics.md | 58 +++++++-------- .../apache/doris/analysis/AnalyzeStmt.java | 2 +- .../apache/doris/analysis/AnalyzeTblStmt.java | 56 +++++++++----- .../doris/analysis/ShowAnalyzeStmt.java | 10 +-- .../nereids/stats/ExpressionEstimation.java | 2 +- .../apache/doris/persist/OperationType.java | 8 +- .../org/apache/doris/qe/ShowExecutor.java | 1 - .../org/apache/doris/qe/StmtExecutor.java | 3 +- .../doris/statistics/AnalysisInfoBuilder.java | 2 +- .../doris/statistics/AnalysisManager.java | 7 +- .../doris/statistics/ColumnStatistic.java | 19 +++-- .../doris/statistics/util/StatisticsUtil.java | 1 + .../doris/statistics/HistogramTaskTest.java | 5 +- 14 files changed, 120 insertions(+), 128 deletions(-) diff --git a/docs/en/docs/query-acceleration/statistics.md b/docs/en/docs/query-acceleration/statistics.md index ad0160630696a0..c9106d9e7561f0 100644 --- a/docs/en/docs/query-acceleration/statistics.md +++ b/docs/en/docs/query-acceleration/statistics.md @@ -79,15 +79,8 @@ The user triggers a manual collection job through a statement `ANALYZE` to colle Column statistics collection syntax: ```SQL -ANALYZE [ SYNC ] TABLE table_name - [ (column_name [, ...]) ] [ [ WITH SYNC ] [ WITH INCREMENTAL ] [ WITH SAMPLE PERCENT | ROWS ] [ WITH PERIOD ] ] [ PROPERTIES ("key" = "value", ...) ]; -``` - -Column histogram collection syntax: - -```SQL -ANALYZE [ SYNC ] TABLE table_name - [ (column_name [, ...]) ] UPDATE HISTOGRAM [ [ WITH SYNC] [ WITH SAMPLE PERCENT | ROWS ][ WITH BUCKETS ] [ WITH PERIOD ] ] [ PROPERTIES ("key" = "value", ...) ]; +ANALYZE TABLE | DATABASE table_name | db_name + [ (column_name [, ...]) ] [ [ WITH SYNC ] [ WITH INCREMENTAL ] [ WITH SAMPLE PERCENT | ROWS ] [ WITH PERIOD ] [WITH HISTOGRAM]] [ PROPERTIES ("key" = "value", ...) ]; ``` Explanation: @@ -422,7 +415,7 @@ The syntax is as follows: ```SQL SHOW ANALYZE [ table_name | job_id ] - [ WHERE [ STATE = [ "PENDING" | "RUNNING" | "FINISHED" | "FAILED" ] ] ] [ ORDER BY ... ] [ LIMIT OFFSET ]; + [ WHERE [ STATE = [ "PENDING" | "RUNNING" | "FINISHED" | "FAILED" ] ] ]; ``` Explanation: @@ -450,54 +443,29 @@ Currently `SHOW ANALYZE`, 11 columns are output, as follows: Example: -- View statistics job information with ID `68603`, using the following syntax: +- View statistics job information with ID `20038`, using the following syntax: ```SQL -mysql> SHOW ANALYZE 68603; -+--------+--------------+----------------------------+-------------+-----------------+----------+---------------+---------+----------------------+----------+---------------+ -| job_id | catalog_name | db_name | tbl_name | col_name | job_type | analysis_type | message | last_exec_time_in_ms | state | schedule_type | -+--------+--------------+----------------------------+-------------+-----------------+----------+---------------+---------+----------------------+----------+---------------+ -| 68603 | internal | default_cluster:stats_test | example_tbl | | MANUAL | INDEX | | 2023-05-05 17:53:27 | FINISHED | ONCE | -| 68603 | internal | default_cluster:stats_test | example_tbl | last_visit_date | MANUAL | COLUMN | | 2023-05-05 17:53:26 | FINISHED | ONCE | -| 68603 | internal | default_cluster:stats_test | example_tbl | age | MANUAL | COLUMN | | 2023-05-05 17:53:27 | FINISHED | ONCE | -| 68603 | internal | default_cluster:stats_test | example_tbl | sex | MANUAL | COLUMN | | 2023-05-05 17:53:26 | FINISHED | ONCE | -| 68603 | internal | default_cluster:stats_test | example_tbl | date | MANUAL | COLUMN | | 2023-05-05 17:53:27 | FINISHED | ONCE | -| 68603 | internal | default_cluster:stats_test | example_tbl | user_id | MANUAL | COLUMN | | 2023-05-05 17:53:25 | FINISHED | ONCE | -| 68603 | internal | default_cluster:stats_test | example_tbl | max_dwell_time | MANUAL | COLUMN | | 2023-05-05 17:53:26 | FINISHED | ONCE | -| 68603 | internal | default_cluster:stats_test | example_tbl | cost | MANUAL | COLUMN | | 2023-05-05 17:53:27 | FINISHED | ONCE | -| 68603 | internal | default_cluster:stats_test | example_tbl | min_dwell_time | MANUAL | COLUMN | | 2023-05-05 17:53:24 | FINISHED | ONCE | -| 68603 | internal | default_cluster:stats_test | example_tbl | city | MANUAL | COLUMN | | 2023-05-05 17:53:25 | FINISHED | ONCE | -+--------+--------------+----------------------------+-------------+-----------------+----------+---------------+---------+----------------------+----------+---------------+ +mysql> SHOW ANALYZE 20038 ++--------+--------------+----------------------+----------+-----------------------+----------+---------------+---------+----------------------+----------+---------------+ +| job_id | catalog_name | db_name | tbl_name | col_name | job_type | analysis_type | message | last_exec_time_in_ms | state | schedule_type | ++--------+--------------+----------------------+----------+-----------------------+----------+---------------+---------+----------------------+----------+---------------+ +| 20038 | internal | default_cluster:test | t3 | [col4,col2,col3,col1] | MANUAL | FUNDAMENTALS | | 2023-06-01 17:22:15 | FINISHED | ONCE | ++--------+--------------+----------------------+----------+-----------------------+----------+---------------+---------+----------------------+----------+---------------+ + ``` -- To view `example_tbl` statistics job information for a table, use the following syntax: +``` +mysql> show analyze task status 20038 ; ++---------+----------+---------+----------------------+----------+ +| task_id | col_name | message | last_exec_time_in_ms | state | ++---------+----------+---------+----------------------+----------+ +| 20039 | col4 | | 2023-06-01 17:22:15 | FINISHED | +| 20040 | col2 | | 2023-06-01 17:22:15 | FINISHED | +| 20041 | col3 | | 2023-06-01 17:22:15 | FINISHED | +| 20042 | col1 | | 2023-06-01 17:22:15 | FINISHED | ++---------+----------+---------+----------------------+----------+ -```SQL -mysql> SHOW ANALYZE stats_test.example_tbl; -+--------+--------------+----------------------------+-------------+-----------------+----------+---------------+---------+----------------------+----------+---------------+ -| job_id | catalog_name | db_name | tbl_name | col_name | job_type | analysis_type | message | last_exec_time_in_ms | state | schedule_type | -+--------+--------------+----------------------------+-------------+-----------------+----------+---------------+---------+----------------------+----------+---------------+ -| 68603 | internal | default_cluster:stats_test | example_tbl | | MANUAL | INDEX | | 2023-05-05 17:53:27 | FINISHED | ONCE | -| 68603 | internal | default_cluster:stats_test | example_tbl | last_visit_date | MANUAL | COLUMN | | 2023-05-05 17:53:26 | FINISHED | ONCE | -| 68603 | internal | default_cluster:stats_test | example_tbl | age | MANUAL | COLUMN | | 2023-05-05 17:53:27 | FINISHED | ONCE | -| 68603 | internal | default_cluster:stats_test | example_tbl | city | MANUAL | COLUMN | | 2023-05-05 17:53:25 | FINISHED | ONCE | -| 68603 | internal | default_cluster:stats_test | example_tbl | cost | MANUAL | COLUMN | | 2023-05-05 17:53:27 | FINISHED | ONCE | -| 68603 | internal | default_cluster:stats_test | example_tbl | min_dwell_time | MANUAL | COLUMN | | 2023-05-05 17:53:24 | FINISHED | ONCE | -| 68603 | internal | default_cluster:stats_test | example_tbl | date | MANUAL | COLUMN | | 2023-05-05 17:53:27 | FINISHED | ONCE | -| 68603 | internal | default_cluster:stats_test | example_tbl | user_id | MANUAL | COLUMN | | 2023-05-05 17:53:25 | FINISHED | ONCE | -| 68603 | internal | default_cluster:stats_test | example_tbl | max_dwell_time | MANUAL | COLUMN | | 2023-05-05 17:53:26 | FINISHED | ONCE | -| 68603 | internal | default_cluster:stats_test | example_tbl | sex | MANUAL | COLUMN | | 2023-05-05 17:53:26 | FINISHED | ONCE | -| 68678 | internal | default_cluster:stats_test | example_tbl | user_id | MANUAL | HISTOGRAM | | 2023-05-05 18:00:11 | FINISHED | ONCE | -| 68678 | internal | default_cluster:stats_test | example_tbl | sex | MANUAL | HISTOGRAM | | 2023-05-05 18:00:09 | FINISHED | ONCE | -| 68678 | internal | default_cluster:stats_test | example_tbl | last_visit_date | MANUAL | HISTOGRAM | | 2023-05-05 18:00:10 | FINISHED | ONCE | -| 68678 | internal | default_cluster:stats_test | example_tbl | date | MANUAL | HISTOGRAM | | 2023-05-05 18:00:10 | FINISHED | ONCE | -| 68678 | internal | default_cluster:stats_test | example_tbl | cost | MANUAL | HISTOGRAM | | 2023-05-05 18:00:10 | FINISHED | ONCE | -| 68678 | internal | default_cluster:stats_test | example_tbl | age | MANUAL | HISTOGRAM | | 2023-05-05 18:00:10 | FINISHED | ONCE | -| 68678 | internal | default_cluster:stats_test | example_tbl | min_dwell_time | MANUAL | HISTOGRAM | | 2023-05-05 18:00:10 | FINISHED | ONCE | -| 68678 | internal | default_cluster:stats_test | example_tbl | max_dwell_time | MANUAL | HISTOGRAM | | 2023-05-05 18:00:09 | FINISHED | ONCE | -| 68678 | internal | default_cluster:stats_test | example_tbl | | MANUAL | HISTOGRAM | | 2023-05-05 18:00:11 | FINISHED | ONCE | -| 68678 | internal | default_cluster:stats_test | example_tbl | city | MANUAL | HISTOGRAM | | 2023-05-05 18:00:11 | FINISHED | ONCE | -+--------+--------------+----------------------------+-------------+-----------------+----------+---------------+---------+----------------------+----------+---------------+ ``` - View all statistics job information, and return the first 3 pieces of information in descending order of the last completion time, using the following syntax: diff --git a/docs/zh-CN/docs/query-acceleration/statistics.md b/docs/zh-CN/docs/query-acceleration/statistics.md index 0a8744c114196c..c0091507b7461e 100644 --- a/docs/zh-CN/docs/query-acceleration/statistics.md +++ b/docs/zh-CN/docs/query-acceleration/statistics.md @@ -79,19 +79,9 @@ Doris 查询优化器使用统计信息来确定查询最有效的执行计划 列统计信息收集语法: ```SQL -ANALYZE [ SYNC ] TABLE table_name +ANALYZE TABLE | DATABASE table_name | db_name [ (column_name [, ...]) ] - [ [ WITH SYNC ] [ WITH INCREMENTAL ] [ WITH SAMPLE PERCENT | ROWS ] [ WITH PERIOD ] ] - [ PROPERTIES ("key" = "value", ...) ]; -``` - -列直方图收集语法: - -```SQL -ANALYZE [ SYNC ] TABLE table_name - [ (column_name [, ...]) ] - UPDATE HISTOGRAM - [ [ WITH SYNC] [ WITH SAMPLE PERCENT | ROWS ][ WITH BUCKETS ] [ WITH PERIOD ] ] + [ [ WITH SYNC ] [ WITH INCREMENTAL ] [ WITH SAMPLE PERCENT | ROWS ] [ WITH PERIOD ] [WITH HISTOGRAM]] [ PROPERTIES ("key" = "value", ...) ]; ``` @@ -456,9 +446,7 @@ mysql> ANALYZE TABLE stats_test.example_tbl UPDATE HISTOGRAM WITH PERIOD 86400; ```SQL SHOW ANALYZE [ table_name | job_id ] - [ WHERE [ STATE = [ "PENDING" | "RUNNING" | "FINISHED" | "FAILED" ] ] ] - [ ORDER BY ... ] - [ LIMIT OFFSET ]; + [ WHERE [ STATE = [ "PENDING" | "RUNNING" | "FINISHED" | "FAILED" ] ] ]; ``` 其中: @@ -486,24 +474,32 @@ SHOW ANALYZE [ table_name | job_id ] 示例: -- 查看 ID 为 `68603` 的统计任务信息,使用以下语法: +- 查看 ID 为 `20038` 的统计任务信息,使用以下语法: ```SQL -mysql> SHOW ANALYZE 68603; -+--------+--------------+----------------------------+-------------+-----------------+----------+---------------+---------+----------------------+----------+---------------+ -| job_id | catalog_name | db_name | tbl_name | col_name | job_type | analysis_type | message | last_exec_time_in_ms | state | schedule_type | -+--------+--------------+----------------------------+-------------+-----------------+----------+---------------+---------+----------------------+----------+---------------+ -| 68603 | internal | default_cluster:stats_test | example_tbl | | MANUAL | INDEX | | 2023-05-05 17:53:27 | FINISHED | ONCE | -| 68603 | internal | default_cluster:stats_test | example_tbl | last_visit_date | MANUAL | COLUMN | | 2023-05-05 17:53:26 | FINISHED | ONCE | -| 68603 | internal | default_cluster:stats_test | example_tbl | age | MANUAL | COLUMN | | 2023-05-05 17:53:27 | FINISHED | ONCE | -| 68603 | internal | default_cluster:stats_test | example_tbl | sex | MANUAL | COLUMN | | 2023-05-05 17:53:26 | FINISHED | ONCE | -| 68603 | internal | default_cluster:stats_test | example_tbl | date | MANUAL | COLUMN | | 2023-05-05 17:53:27 | FINISHED | ONCE | -| 68603 | internal | default_cluster:stats_test | example_tbl | user_id | MANUAL | COLUMN | | 2023-05-05 17:53:25 | FINISHED | ONCE | -| 68603 | internal | default_cluster:stats_test | example_tbl | max_dwell_time | MANUAL | COLUMN | | 2023-05-05 17:53:26 | FINISHED | ONCE | -| 68603 | internal | default_cluster:stats_test | example_tbl | cost | MANUAL | COLUMN | | 2023-05-05 17:53:27 | FINISHED | ONCE | -| 68603 | internal | default_cluster:stats_test | example_tbl | min_dwell_time | MANUAL | COLUMN | | 2023-05-05 17:53:24 | FINISHED | ONCE | -| 68603 | internal | default_cluster:stats_test | example_tbl | city | MANUAL | COLUMN | | 2023-05-05 17:53:25 | FINISHED | ONCE | -+--------+--------------+----------------------------+-------------+-----------------+----------+---------------+---------+----------------------+----------+---------------+ +mysql> SHOW ANALYZE 20038 ++--------+--------------+----------------------+----------+-----------------------+----------+---------------+---------+----------------------+----------+---------------+ +| job_id | catalog_name | db_name | tbl_name | col_name | job_type | analysis_type | message | last_exec_time_in_ms | state | schedule_type | ++--------+--------------+----------------------+----------+-----------------------+----------+---------------+---------+----------------------+----------+---------------+ +| 20038 | internal | default_cluster:test | t3 | [col4,col2,col3,col1] | MANUAL | FUNDAMENTALS | | 2023-06-01 17:22:15 | FINISHED | ONCE | ++--------+--------------+----------------------+----------+-----------------------+----------+---------------+---------+----------------------+----------+---------------+ + +``` + +可通过`SHOW ANALYZE TASK STATUS [job_id]`,查看具体每个列统计信息的收集完成情况。 + +``` +mysql> show analyze task status 20038 ; ++---------+----------+---------+----------------------+----------+ +| task_id | col_name | message | last_exec_time_in_ms | state | ++---------+----------+---------+----------------------+----------+ +| 20039 | col4 | | 2023-06-01 17:22:15 | FINISHED | +| 20040 | col2 | | 2023-06-01 17:22:15 | FINISHED | +| 20041 | col3 | | 2023-06-01 17:22:15 | FINISHED | +| 20042 | col1 | | 2023-06-01 17:22:15 | FINISHED | ++---------+----------+---------+----------------------+----------+ + + ``` - 查看 `example_tbl` 表的的统计任务信息,使用以下语法: diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeStmt.java index f1688661d05c91..202a870f124642 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeStmt.java @@ -17,6 +17,7 @@ package org.apache.doris.analysis; + import org.apache.doris.statistics.AnalysisInfo.AnalysisMethod; import org.apache.doris.statistics.AnalysisInfo.AnalysisMode; import org.apache.doris.statistics.AnalysisInfo.AnalysisType; @@ -32,7 +33,6 @@ public AnalyzeStmt(AnalyzeProperties analyzeProperties) { this.analyzeProperties = analyzeProperties; } - public Map getProperties() { return analyzeProperties.getProperties(); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeTblStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeTblStmt.java index 0e719f349cc972..c200d1974e6a80 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeTblStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeTblStmt.java @@ -34,6 +34,7 @@ import org.apache.doris.mysql.privilege.PrivPredicate; import org.apache.doris.qe.ConnectContext; import org.apache.doris.statistics.AnalysisInfo.AnalysisType; +import org.apache.doris.statistics.ColumnStatistic; import com.google.common.collect.Sets; import org.apache.commons.lang3.StringUtils; @@ -77,7 +78,7 @@ public class AnalyzeTblStmt extends AnalyzeStmt { // The properties passed in by the user through "with" or "properties('K', 'V')" private final TableName tableName; - private final List columnNames; + private List columnNames; // after analyzed private long dbId; @@ -128,24 +129,25 @@ public void check() throws AnalysisException { throw new AnalysisException("Analyze view is not allowed"); } checkAnalyzePriv(tableName.getDb(), tableName.getTbl()); - - if (columnNames != null && !columnNames.isEmpty()) { - table.readLock(); - try { - List baseSchema = table.getBaseSchema(false) - .stream().map(Column::getName).collect(Collectors.toList()); - Optional optional = columnNames.stream() - .filter(entity -> !baseSchema.contains(entity)).findFirst(); - if (optional.isPresent()) { - String columnName = optional.get(); - ErrorReport.reportAnalysisException(ErrorCode.ERR_WRONG_COLUMN_NAME, - columnName, FeNameFormat.getColumnNameRegex()); - } - } finally { - table.readUnlock(); + if (columnNames == null) { + columnNames = table.getBaseSchema(false) + .stream().map(Column::getName).collect(Collectors.toList()); + } + table.readLock(); + try { + List baseSchema = table.getBaseSchema(false) + .stream().map(Column::getName).collect(Collectors.toList()); + Optional optional = columnNames.stream() + .filter(entity -> !baseSchema.contains(entity)).findFirst(); + if (optional.isPresent()) { + String columnName = optional.get(); + ErrorReport.reportAnalysisException(ErrorCode.ERR_WRONG_COLUMN_NAME, + columnName, FeNameFormat.getColumnNameRegex()); } + } finally { + table.readUnlock(); } - + checkColumn(); analyzeProperties.check(); // TODO support external table @@ -157,6 +159,26 @@ public void check() throws AnalysisException { } } + private void checkColumn() throws AnalysisException { + table.readLock(); + try { + for (String colName : columnNames) { + Column column = table.getColumn(colName); + if (column == null) { + ErrorReport.reportAnalysisException(ErrorCode.ERR_WRONG_COLUMN_NAME, + colName, FeNameFormat.getColumnNameRegex()); + } + if (ColumnStatistic.UNSUPPORTED_TYPE.contains(column.getType())) { + throw new AnalysisException(String.format("Column[%s] with type[%s] is not supported to analyze", + colName, column.getType().toString())); + } + } + } finally { + table.readUnlock(); + } + + } + public String getCatalogName() { return tableName.getCtl(); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowAnalyzeStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowAnalyzeStmt.java index ee646aa2a9b6de..73bf77b23c3327 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowAnalyzeStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowAnalyzeStmt.java @@ -67,7 +67,7 @@ public class ShowAnalyzeStmt extends ShowStmt { .add("schedule_type") .build(); - private Long jobId; + private long jobId; private TableName dbTableName; private Expr whereClause; private LimitElement limitElement; @@ -88,10 +88,11 @@ public ShowAnalyzeStmt(TableName dbTableName, this.limitElement = limitElement; } - public ShowAnalyzeStmt(Long jobId, + public ShowAnalyzeStmt(long jobId, Expr whereClause, List orderByElements, LimitElement limitElement) { + Preconditions.checkArgument(jobId > 0, "JobId must greater than 0."); this.jobId = jobId; this.dbTableName = null; this.whereClause = whereClause; @@ -99,7 +100,7 @@ public ShowAnalyzeStmt(Long jobId, this.limitElement = limitElement; } - public Long getJobId() { + public long getJobId() { return jobId; } @@ -136,7 +137,6 @@ public void analyze(Analyzer analyzer) throws UserException { + "in your FE conf file"); } super.analyze(analyzer); - if (dbTableName != null) { dbTableName.analyze(analyzer); String dbName = dbTableName.getDb(); @@ -261,7 +261,7 @@ public String toSql() { StringBuilder sb = new StringBuilder(); sb.append("SHOW ANALYZE"); - if (jobId != null) { + if (jobId != 0) { sb.append(" "); sb.append(jobId); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/ExpressionEstimation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/ExpressionEstimation.java index 782609afd57dc0..d5449987cf9a34 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/ExpressionEstimation.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/ExpressionEstimation.java @@ -150,7 +150,7 @@ public ColumnStatistic visitCast(Cast cast, Statistics context) { @Override public ColumnStatistic visitLiteral(Literal literal, Statistics context) { - if (ColumnStatistic.MAX_MIN_UNSUPPORTED_TYPE.contains(literal.getDataType().toCatalogDataType())) { + if (ColumnStatistic.UNSUPPORTED_TYPE.contains(literal.getDataType().toCatalogDataType())) { return ColumnStatistic.UNKNOWN; } double literalVal = literal.getDouble(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/persist/OperationType.java b/fe/fe-core/src/main/java/org/apache/doris/persist/OperationType.java index 2ed5f47b7072b9..32f7d958e87718 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/persist/OperationType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/persist/OperationType.java @@ -295,13 +295,13 @@ public class OperationType { // update binlog config public static final short OP_UPDATE_BINLOG_CONFIG = 425; - public static final short OP_CREATE_ANALYSIS_TASK = 435; + public static final short OP_CREATE_ANALYSIS_TASK = 430; - public static final short OP_DELETE_ANALYSIS_TASK = 436; + public static final short OP_DELETE_ANALYSIS_TASK = 431; - public static final short OP_CREATE_ANALYSIS_JOB = 437; + public static final short OP_CREATE_ANALYSIS_JOB = 432; - public static final short OP_DELETE_ANALYSIS_JOB = 438; + public static final short OP_DELETE_ANALYSIS_JOB = 433; /** diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java b/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java index 08af281e093b05..8b508b2cd7874d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java @@ -430,7 +430,6 @@ public ShowResultSet execute() throws AnalysisException { return resultSet; } - private void handleShowRollup() { // TODO: not implemented yet ShowRollupStmt showRollupStmt = (ShowRollupStmt) stmt; diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java b/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java index 8055e1caa1fdc9..e3f76c649e5d32 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java @@ -20,6 +20,7 @@ import org.apache.doris.analysis.AddPartitionLikeClause; import org.apache.doris.analysis.AlterClause; import org.apache.doris.analysis.AlterTableStmt; +import org.apache.doris.analysis.AnalyzeStmt; import org.apache.doris.analysis.AnalyzeTblStmt; import org.apache.doris.analysis.Analyzer; import org.apache.doris.analysis.ArrayLiteral; @@ -2125,7 +2126,7 @@ private void handleDeleteStmt() { private void handleDdlStmt() { try { DdlExecutor.execute(context.getEnv(), (DdlStmt) parsedStmt); - if (!(parsedStmt instanceof AnalyzeTblStmt)) { + if (!(parsedStmt instanceof AnalyzeStmt)) { context.getState().setOk(); } } catch (QueryStateException e) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfoBuilder.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfoBuilder.java index e0ab8220c86ed8..c47b1dc7abd6d6 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfoBuilder.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfoBuilder.java @@ -34,7 +34,7 @@ public class AnalysisInfoBuilder { private String tblName; private Map> colToPartitions; private String colName; - private Long indexId = -1L; + private long indexId = -1L; private JobType jobType; private AnalysisMode analysisMode; private AnalysisMethod analysisMethod; diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java index 71cf3dae138845..d48eed96181256 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java @@ -167,7 +167,7 @@ public void createAnalysisJobs(AnalyzeDBStmt analyzeDBStmt) throws DdlException analyzeStmts.add(analyzeTblStmt); } for (AnalyzeTblStmt analyzeTblStmt : analyzeStmts) { - buildAndAssignJob(analyzeTblStmt); + analysisInfos.add(buildAndAssignJob(analyzeTblStmt)); } sendJobId(analysisInfos); } finally { @@ -472,10 +472,10 @@ private void createTaskForMVIdx(AnalysisInfo jobInfo, Map showAnalysisJob(ShowAnalyzeStmt stmt) { String state = stmt.getStateValue(); TableName tblName = stmt.getDbTableName(); return analysisJobInfoMap.values().stream() + .filter(a -> stmt.getJobId() == 0 || a.jobId == stmt.getJobId()) .filter(a -> state == null || a.state.equals(AnalysisState.valueOf(state))) .filter(a -> tblName == null || a.catalogName.equals(tblName.getCtl()) && a.dbName.equals(tblName.getDb()) && a.tblName.equals(tblName.getTbl())) @@ -717,7 +718,7 @@ private BaseAnalysisTask createTask(AnalysisInfo analysisInfo) throws DdlExcepti return table.createAnalysisTask(analysisInfo); } catch (Throwable t) { LOG.warn("Failed to find table", t); - throw new DdlException("Error when trying to find table", t); + throw new DdlException("Failed to create task", t); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java index ff218d6f30b621..46dd8fa4a59131 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java @@ -52,14 +52,21 @@ public class ColumnStatistic { .setSelectivity(0) .build(); - public static final Set MAX_MIN_UNSUPPORTED_TYPE = new HashSet<>(); + public static final Set UNSUPPORTED_TYPE = new HashSet<>(); static { - MAX_MIN_UNSUPPORTED_TYPE.add(Type.HLL); - MAX_MIN_UNSUPPORTED_TYPE.add(Type.BITMAP); - MAX_MIN_UNSUPPORTED_TYPE.add(Type.ARRAY); - MAX_MIN_UNSUPPORTED_TYPE.add(Type.STRUCT); - MAX_MIN_UNSUPPORTED_TYPE.add(Type.MAP); + UNSUPPORTED_TYPE.add(Type.HLL); + UNSUPPORTED_TYPE.add(Type.BITMAP); + UNSUPPORTED_TYPE.add(Type.ARRAY); + UNSUPPORTED_TYPE.add(Type.STRUCT); + UNSUPPORTED_TYPE.add(Type.MAP); + UNSUPPORTED_TYPE.add(Type.QUANTILE_STATE); + UNSUPPORTED_TYPE.add(Type.AGG_STATE); + UNSUPPORTED_TYPE.add(Type.JSONB); + UNSUPPORTED_TYPE.add(Type.VARIANT); + UNSUPPORTED_TYPE.add(Type.TIME); + UNSUPPORTED_TYPE.add(Type.TIMEV2); + UNSUPPORTED_TYPE.add(Type.LAMBDA_FUNCTION); } public final double count; diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java index 802c6dffee31ad..fed9d26165c5d0 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java @@ -202,6 +202,7 @@ public static LiteralExpr readableValue(Type type, String columnValue) throws An return new DateLiteral(columnValue, type); case CHAR: case VARCHAR: + case STRING: return new StringLiteral(columnValue); case HLL: case BITMAP: diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/HistogramTaskTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/HistogramTaskTest.java index 8ba2f3eb9a28dc..0c6b507b136851 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/HistogramTaskTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/HistogramTaskTest.java @@ -32,7 +32,6 @@ import mockit.Mock; import mockit.MockUp; import mockit.Mocked; -import mockit.Tested; import org.junit.FixMethodOrder; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; @@ -71,14 +70,12 @@ protected void runBeforeAll() throws Exception { FeConstants.runningUnitTest = true; } - @Tested - @Test public void test1TaskCreation() throws Exception { AnalysisManager analysisManager = Env.getCurrentEnv().getAnalysisManager(); StmtExecutor executor = getSqlStmtExecutor( - "ANALYZE TABLE t1(col1) UPDATE HISTOGRAM"); + "ANALYZE TABLE t1(col1) WITH HISTOGRAM"); Assertions.assertNotNull(executor); ConcurrentMap> taskMap =