diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowAnalyzeStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowAnalyzeStmt.java index efcfc517024683..734073901fe7f6 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowAnalyzeStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowAnalyzeStmt.java @@ -62,6 +62,7 @@ public class ShowAnalyzeStmt extends ShowStmt { .add("schedule_type") .add("start_time") .add("end_time") + .add("priority") .build(); private long jobId; diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java index 13c3e8baf0cbc5..8c1c0326fafd5d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java @@ -1503,7 +1503,7 @@ public void setEnableLeftZigZag(boolean enableLeftZigZag) { @VariableMgr.VarAttr(name = ENABLE_AUTO_ANALYZE_INTERNAL_CATALOG, description = {"临时参数,收否自动收集所有内表", "Temp variable, enable to auto collect all OlapTable."}, flag = VariableMgr.GLOBAL) - public boolean enableAutoAnalyzeInternalCatalog = false; + public boolean enableAutoAnalyzeInternalCatalog = true; @VariableMgr.VarAttr(name = AUTO_ANALYZE_TABLE_WIDTH_THRESHOLD, description = {"参与自动收集的最大表宽度,列数多于这个参数的表不参与自动收集", diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java b/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java index dda7045eabb6a7..fa2046e54679ba 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java @@ -2859,6 +2859,7 @@ private void handleShowAnalyze() { java.time.ZoneId.systemDefault()); row.add(startTime.format(formatter)); row.add(endTime.format(formatter)); + row.add(analysisInfo.priority.name()); resultRows.add(row); } catch (Exception e) { LOG.warn("Failed to get analyze info for table {}.{}.{}, reason: {}", @@ -2876,8 +2877,7 @@ private void handleShowAutoAnalyzePendingJobs() { for (AutoAnalysisPendingJob job : jobs) { try { List row = new ArrayList<>(); - CatalogIf> c - = StatisticsUtil.findCatalog(job.catalogName); + CatalogIf> c = StatisticsUtil.findCatalog(job.catalogName); row.add(c.getName()); Optional> databaseIf = c.getDb(job.dbName); row.add(databaseIf.isPresent() ? databaseIf.get().getFullName() : "DB may get deleted"); diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java index 0c5047a53c5d06..24cf6f38d68962 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java @@ -200,8 +200,12 @@ public enum ScheduleType { */ public final long tblUpdateTime; + @SerializedName("userInject") public final boolean userInject; + @SerializedName("priority") + public final JobPriority priority; + public AnalysisInfo(long jobId, long taskId, List taskIds, long catalogId, long dbId, long tblId, Map> colToPartitions, Set partitionNames, String colName, Long indexId, JobType jobType, AnalysisMode analysisMode, AnalysisMethod analysisMethod, AnalysisType analysisType, @@ -210,7 +214,7 @@ public AnalysisInfo(long jobId, long taskId, List taskIds, long catalogId, boolean isExternalTableLevelTask, boolean partitionOnly, boolean samplingPartition, boolean isAllPartition, long partitionCount, CronExpression cronExpression, boolean forceFull, boolean usingSqlForPartitionColumn, long tblUpdateTime, long rowCount, boolean userInject, - long updateRows) { + long updateRows, JobPriority priority) { this.jobId = jobId; this.taskId = taskId; this.taskIds = taskIds; @@ -249,6 +253,7 @@ public AnalysisInfo(long jobId, long taskId, List taskIds, long catalogId, this.rowCount = rowCount; this.userInject = userInject; this.updateRows = updateRows; + this.priority = priority; } @Override @@ -293,6 +298,7 @@ public String toString() { sj.add("rowCount: " + rowCount); sj.add("userInject: " + userInject); sj.add("updateRows: " + updateRows); + sj.add("priority: " + priority.name()); return sj.toString(); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfoBuilder.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfoBuilder.java index 527d503fd522a5..2f60b258598670 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfoBuilder.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfoBuilder.java @@ -65,6 +65,7 @@ public class AnalysisInfoBuilder { private long rowCount; private boolean userInject; private long updateRows; + private JobPriority priority; public AnalysisInfoBuilder() { } @@ -105,6 +106,7 @@ public AnalysisInfoBuilder(AnalysisInfo info) { rowCount = info.rowCount; userInject = info.userInject; updateRows = info.updateRows; + priority = info.priority; } public AnalysisInfoBuilder setJobId(long jobId) { @@ -282,12 +284,18 @@ public AnalysisInfoBuilder setUpdateRows(long updateRows) { return this; } + public AnalysisInfoBuilder setPriority(JobPriority priority) { + this.priority = priority; + return this; + } + public AnalysisInfo build() { return new AnalysisInfo(jobId, taskId, taskIds, catalogId, dbId, tblId, colToPartitions, partitionNames, colName, indexId, jobType, analysisMode, analysisMethod, analysisType, samplePercent, sampleRows, maxBucketNum, periodTimeInMs, message, lastExecTimeInMs, timeCostInMs, state, scheduleType, externalTableLevelTask, partitionOnly, samplingPartition, isAllPartition, partitionCount, - cronExpression, forceFull, usingSqlForPartitionColumn, tblUpdateTime, rowCount, userInject, updateRows); + cronExpression, forceFull, usingSqlForPartitionColumn, tblUpdateTime, rowCount, userInject, updateRows, + priority); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java index c6713b3a7c11a8..ddcdf459e6e096 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java @@ -219,7 +219,8 @@ public List buildAnalysisInfosForDB(DatabaseIf db, Analyz public void createAnalysisJob(AnalyzeTblStmt stmt, boolean proxy) throws DdlException { // Using auto analyzer if user specifies. if (stmt.getAnalyzeProperties().getProperties().containsKey("use.auto.analyzer")) { - Env.getCurrentEnv().getStatisticsAutoCollector().processOneJob(stmt.getTable(), stmt.getColumnNames()); + Env.getCurrentEnv().getStatisticsAutoCollector() + .processOneJob(stmt.getTable(), stmt.getColumnNames(), JobPriority.HIGH); return; } AnalysisInfo jobInfo = buildAndAssignJob(stmt); @@ -422,6 +423,7 @@ public AnalysisInfo buildAnalysisJobInfo(AnalyzeTblStmt stmt) throws DdlExceptio infoBuilder.setRowCount(rowCount); TableStatsMeta tableStatsStatus = findTableStatsStatus(table.getId()); infoBuilder.setUpdateRows(tableStatsStatus == null ? 0 : tableStatsStatus.updatedRows.get()); + infoBuilder.setPriority(JobPriority.MANUAL); return infoBuilder.build(); } @@ -1230,12 +1232,14 @@ protected void updateColumn(Collection slotReferences, Queue highColumns, Collection midColumns) { for (TQueryColumn c : highColumns) { - if (!highPriorityColumns.offer(new HighPriorityColumn(c.catalogId, c.dbId, c.tblId, c.colName))) { + if (!highPriorityColumns.offer(new HighPriorityColumn(Long.parseLong(c.catalogId), Long.parseLong(c.dbId), + Long.parseLong(c.tblId), c.colName))) { break; } } for (TQueryColumn c : midColumns) { - if (!midPriorityColumns.offer(new HighPriorityColumn(c.catalogId, c.dbId, c.tblId, c.colName))) { + if (!midPriorityColumns.offer(new HighPriorityColumn(Long.parseLong(c.catalogId), Long.parseLong(c.dbId), + Long.parseLong(c.tblId), c.colName))) { break; } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/FollowerColumnSender.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/FollowerColumnSender.java index 181000c1ef23e4..0a804152694486 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/FollowerColumnSender.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/FollowerColumnSender.java @@ -32,14 +32,16 @@ import org.apache.logging.log4j.Logger; import java.net.InetSocketAddress; +import java.util.ArrayList; import java.util.List; +import java.util.Set; import java.util.stream.Collectors; public class FollowerColumnSender extends MasterDaemon { private static final Logger LOG = LogManager.getLogger(FollowerColumnSender.class); - public static final long INTERVAL = 5000; + public static final long INTERVAL = 60000; public FollowerColumnSender() { super("Follower Column Sender", INTERVAL); @@ -68,21 +70,28 @@ protected void send() { if (analysisManager.highPriorityColumns.isEmpty() && analysisManager.midPriorityColumns.isEmpty()) { return; } - List highPriorityColumns + Set highPriorityColumns = analysisManager.highPriorityColumns .stream() + .filter(c -> StatisticsUtil.needAnalyzeColumn(c)) .map(HighPriorityColumn::toThrift) - .collect(Collectors.toList()); - List midPriorityColumns + .collect(Collectors.toSet()); + Set midPriorityColumns = analysisManager.midPriorityColumns .stream() + .filter(c -> StatisticsUtil.needAnalyzeColumn(c)) + .filter(c -> !highPriorityColumns.contains(c)) .map(HighPriorityColumn::toThrift) - .collect(Collectors.toList()); + .collect(Collectors.toSet()); analysisManager.highPriorityColumns.clear(); analysisManager.midPriorityColumns.clear(); TSyncQueryColumns queryColumns = new TSyncQueryColumns(); - queryColumns.highPriorityColumns = highPriorityColumns; - queryColumns.midPriorityColumns = midPriorityColumns; + List highs = new ArrayList<>(); + highs.addAll(highPriorityColumns); + queryColumns.highPriorityColumns = highs; + List mids = new ArrayList<>(); + mids.addAll(midPriorityColumns); + queryColumns.midPriorityColumns = mids; Frontend master = null; try { InetSocketAddress masterAddress = currentEnv.getHaProtocol().getLeader(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/HighPriorityColumn.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/HighPriorityColumn.java index b2292ef725d35e..d619ef82c080ba 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/HighPriorityColumn.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/HighPriorityColumn.java @@ -57,9 +57,9 @@ public boolean equals(Object other) { public TQueryColumn toThrift() { TQueryColumn tQueryColumn = new TQueryColumn(); - tQueryColumn.catalogId = catalogId; - tQueryColumn.dbId = dbId; - tQueryColumn.tblId = tblId; + tQueryColumn.catalogId = String.valueOf(catalogId); + tQueryColumn.dbId = String.valueOf(dbId); + tQueryColumn.tblId = String.valueOf(tblId); tQueryColumn.colName = colName; return tQueryColumn; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/JobPriority.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/JobPriority.java index 2786b063563bd3..c3656b929279e6 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/JobPriority.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/JobPriority.java @@ -20,5 +20,6 @@ public enum JobPriority { HIGH, MID, - LOW; + LOW, + MANUAL; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java index 227074dbb5c8b4..c26e7b05efd8c4 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java @@ -23,9 +23,9 @@ import org.apache.doris.catalog.TableIf; import org.apache.doris.common.Config; import org.apache.doris.common.DdlException; +import org.apache.doris.common.Pair; import org.apache.doris.common.util.TimeUtils; import org.apache.doris.datasource.hive.HMSExternalTable; -import org.apache.doris.datasource.hive.HMSExternalTable.DLAType; import org.apache.doris.statistics.AnalysisInfo.AnalysisMethod; import org.apache.doris.statistics.AnalysisInfo.JobType; import org.apache.doris.statistics.AnalysisInfo.ScheduleType; @@ -39,6 +39,7 @@ import java.util.HashMap; import java.util.HashSet; import java.util.Map; +import java.util.Map.Entry; import java.util.Optional; import java.util.Set; import java.util.concurrent.TimeUnit; @@ -58,29 +59,22 @@ public StatisticsAutoCollector() { @Override protected void collect() { while (canCollect()) { - Map.Entry> job = getJob(); + Pair>, JobPriority> job = getJob(); if (job == null) { // No more job to process, break and sleep. break; } try { - TableName tblName = job.getKey(); + TableName tblName = job.first.getKey(); TableIf table = StatisticsUtil.findTable(tblName.getCtl(), tblName.getDb(), tblName.getTbl()); if (!supportAutoAnalyze(table)) { continue; } - Set columns = job.getValue() - .stream() - .filter(c -> { - boolean needAnalyzeColumn = needAnalyzeColumn(table, c); - LOG.info("Need analyze column " + c + " ? " + needAnalyzeColumn); - return needAnalyzeColumn; - }) - .collect(Collectors.toSet()); - processOneJob(table, columns); + Set columns = job.first.getValue().stream().collect(Collectors.toSet()); + processOneJob(table, columns, job.second); } catch (Exception e) { - LOG.warn("Failed to analyze table {} with columns [{}]", - job.getKey().getTbl(), job.getValue().stream().collect(Collectors.joining(",")), e); + LOG.warn("Failed to analyze table {} with columns [{}]", job.first.getKey().getTbl(), + job.first.getValue().stream().collect(Collectors.joining(",")), e); } } } @@ -90,18 +84,18 @@ protected boolean canCollect() { && StatisticsUtil.inAnalyzeTime(LocalTime.now(TimeUtils.getTimeZone().toZoneId())); } - protected Map.Entry> getJob() { + protected Pair>, JobPriority> getJob() { AnalysisManager manager = Env.getServingEnv().getAnalysisManager(); - Optional>> job = fetchJobFromMap(manager.highPriorityJobs); + Optional>> job = fetchJobFromMap(manager.highPriorityJobs); if (job.isPresent()) { - return job.get(); + return Pair.of(job.get(), JobPriority.HIGH); } job = fetchJobFromMap(manager.midPriorityJobs); if (job.isPresent()) { - return job.get(); + return Pair.of(job.get(), JobPriority.MID); } job = fetchJobFromMap(manager.lowPriorityJobs); - return job.isPresent() ? job.get() : null; + return job.isPresent() ? Pair.of(job.get(), JobPriority.LOW) : null; } protected Optional>> fetchJobFromMap(Map> jobMap) { @@ -112,12 +106,12 @@ protected Optional>> fetchJobFromMap(Map columns) throws DdlException { + protected void processOneJob(TableIf table, Set columns, JobPriority priority) throws DdlException { appendPartitionColumns(table, columns); if (columns.isEmpty()) { return; } - AnalysisInfo analyzeJob = createAnalyzeJobForTbl(table, columns); + AnalysisInfo analyzeJob = createAnalyzeJobForTbl(table, columns, priority); LOG.info("Analyze job : {}", analyzeJob.toString()); createSystemAnalysisJob(analyzeJob); } @@ -134,69 +128,6 @@ protected void appendPartitionColumns(TableIf table, Set columns) { } } - // TODO: Need refactor, hard to understand now. - protected boolean needAnalyzeColumn(TableIf table, String column) { - AnalysisManager manager = Env.getServingEnv().getAnalysisManager(); - TableStatsMeta tableStatsStatus = manager.findTableStatsStatus(table.getId()); - if (tableStatsStatus == null) { - return true; - } - if (tableStatsStatus.userInjected) { - return false; - } - ColStatsMeta columnStatsMeta = tableStatsStatus.findColumnStatsMeta(column); - if (columnStatsMeta == null) { - return true; - } - if (table instanceof OlapTable) { - long currentUpdatedRows = tableStatsStatus.updatedRows.get(); - long lastAnalyzeUpdateRows = columnStatsMeta.updatedRows; - if (lastAnalyzeUpdateRows == 0 && currentUpdatedRows > 0) { - return true; - } - if (lastAnalyzeUpdateRows > currentUpdatedRows) { - // Shouldn't happen. Just in case. - return true; - } - OlapTable olapTable = (OlapTable) table; - long currentRowCount = olapTable.getRowCount(); - long lastAnalyzeRowCount = columnStatsMeta.rowCount; - if (tableStatsStatus.newPartitionLoaded.get() && olapTable.isPartitionColumn(column)) { - return true; - } - if (lastAnalyzeRowCount == 0 && currentRowCount > 0) { - return true; - } - if (currentUpdatedRows == lastAnalyzeUpdateRows) { - return false; - } - double healthValue = ((double) (currentUpdatedRows - lastAnalyzeUpdateRows) - / (double) currentUpdatedRows) * 100.0; - LOG.info("Column " + column + " update rows health value is " + healthValue); - if (healthValue < StatisticsUtil.getTableStatsHealthThreshold()) { - return true; - } - if (currentRowCount == 0 && lastAnalyzeRowCount != 0) { - return true; - } - if (currentRowCount == 0 && lastAnalyzeRowCount == 0) { - return false; - } - healthValue = ((double) (currentRowCount - lastAnalyzeRowCount) / (double) currentRowCount) * 100.0; - return healthValue < StatisticsUtil.getTableStatsHealthThreshold(); - } else { - if (!(table instanceof HMSExternalTable)) { - return false; - } - HMSExternalTable hmsTable = (HMSExternalTable) table; - if (!hmsTable.getDlaType().equals(DLAType.HIVE)) { - return false; - } - return System.currentTimeMillis() - - tableStatsStatus.updatedTime > StatisticsUtil.getExternalTableAutoAnalyzeIntervalInMillis(); - } - } - protected boolean supportAutoAnalyze(TableIf tableIf) { if (tableIf == null) { return false; @@ -206,7 +137,7 @@ protected boolean supportAutoAnalyze(TableIf tableIf) { && ((HMSExternalTable) tableIf).getDlaType().equals(HMSExternalTable.DLAType.HIVE); } - protected AnalysisInfo createAnalyzeJobForTbl(TableIf table, Set columns) { + protected AnalysisInfo createAnalyzeJobForTbl(TableIf table, Set columns, JobPriority priority) { AnalysisMethod analysisMethod = table.getDataSize(true) >= StatisticsUtil.getHugeTableLowerBoundSizeInBytes() ? AnalysisMethod.SAMPLE : AnalysisMethod.FULL; AnalysisManager manager = Env.getServingEnv().getAnalysisManager(); @@ -236,6 +167,7 @@ protected AnalysisInfo createAnalyzeJobForTbl(TableIf table, Set columns .setRowCount(rowCount) .setUpdateRows(tableStatsStatus == null ? 0 : tableStatsStatus.updatedRows.get()) .setColToPartitions(colToPartitions) + .setPriority(priority) .build(); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsJobAppender.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsJobAppender.java index 93d03a3fdb86ac..9e07c65e2feb3d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsJobAppender.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsJobAppender.java @@ -86,6 +86,9 @@ protected void appendColumnsToJobs(Queue columnQueue, Map> jobsMap) { if (!jobsMap.containsKey(tableName) && jobsMap.size() >= JOB_MAP_SIZE) { return; } - Set columns - = t.getColumns().stream().filter(c -> !StatisticsUtil.isUnsupportedType(c.getType())) + Set columns = t.getColumns().stream() + .filter(c -> !StatisticsUtil.isUnsupportedType(c.getType())) + .filter(c -> StatisticsUtil.needAnalyzeColumn(t, c.getName())) .map(c -> c.getName()).collect(Collectors.toSet()); if (jobsMap.containsKey(tableName)) { jobsMap.get(tableName).addAll(columns); diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java index 65c2ee9e6da013..27aa83ba716a88 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java @@ -58,6 +58,7 @@ import org.apache.doris.datasource.InternalCatalog; import org.apache.doris.datasource.hive.HMSExternalCatalog; import org.apache.doris.datasource.hive.HMSExternalTable; +import org.apache.doris.datasource.hive.HMSExternalTable.DLAType; import org.apache.doris.datasource.hive.HiveMetaStoreCache; import org.apache.doris.datasource.hive.HivePartition; import org.apache.doris.nereids.trees.expressions.literal.DateTimeLiteral; @@ -68,11 +69,15 @@ import org.apache.doris.qe.SessionVariable; import org.apache.doris.qe.StmtExecutor; import org.apache.doris.qe.VariableMgr; +import org.apache.doris.statistics.AnalysisManager; +import org.apache.doris.statistics.ColStatsMeta; import org.apache.doris.statistics.ColumnStatistic; import org.apache.doris.statistics.ColumnStatisticBuilder; +import org.apache.doris.statistics.HighPriorityColumn; import org.apache.doris.statistics.Histogram; import org.apache.doris.statistics.ResultRow; import org.apache.doris.statistics.StatisticConstants; +import org.apache.doris.statistics.TableStatsMeta; import org.apache.doris.system.Frontend; import com.google.common.base.Preconditions; @@ -874,7 +879,7 @@ public static boolean enableAutoAnalyzeInternalCatalog() { } catch (Exception e) { LOG.warn("Fail to get value of enable auto analyze internal catalog, return false by default", e); } - return false; + return true; } public static int getInsertMergeCount() { @@ -984,4 +989,85 @@ public static boolean isMvColumn(TableIf table, String columnName) { || columnName.startsWith(CreateMaterializedViewStmt.MATERIALIZED_VIEW_AGGREGATE_NAME_PREFIX); } + // TODO: Need refactor, hard to understand now. + public static boolean needAnalyzeColumn(TableIf table, String column) { + AnalysisManager manager = Env.getServingEnv().getAnalysisManager(); + TableStatsMeta tableStatsStatus = manager.findTableStatsStatus(table.getId()); + if (tableStatsStatus == null) { + return true; + } + if (tableStatsStatus.userInjected) { + return false; + } + ColStatsMeta columnStatsMeta = tableStatsStatus.findColumnStatsMeta(column); + if (columnStatsMeta == null) { + return true; + } + if (table instanceof OlapTable) { + long currentUpdatedRows = tableStatsStatus.updatedRows.get(); + long lastAnalyzeUpdateRows = columnStatsMeta.updatedRows; + if (lastAnalyzeUpdateRows == 0 && currentUpdatedRows > 0) { + return true; + } + if (lastAnalyzeUpdateRows > currentUpdatedRows) { + // Shouldn't happen. Just in case. + return true; + } + OlapTable olapTable = (OlapTable) table; + long currentRowCount = olapTable.getRowCount(); + long lastAnalyzeRowCount = columnStatsMeta.rowCount; + if (tableStatsStatus.newPartitionLoaded.get() && olapTable.isPartitionColumn(column)) { + return true; + } + if (lastAnalyzeRowCount == 0 && currentRowCount > 0) { + return true; + } + if (currentUpdatedRows == lastAnalyzeUpdateRows) { + return false; + } + double healthValue = ((double) (currentUpdatedRows - lastAnalyzeUpdateRows) + / (double) currentUpdatedRows) * 100.0; + LOG.info("Column " + column + " update rows health value is " + healthValue); + if (healthValue < StatisticsUtil.getTableStatsHealthThreshold()) { + return true; + } + if (currentRowCount == 0 && lastAnalyzeRowCount != 0) { + return true; + } + if (currentRowCount == 0 && lastAnalyzeRowCount == 0) { + return false; + } + healthValue = ((double) (currentRowCount - lastAnalyzeRowCount) / (double) currentRowCount) * 100.0; + return healthValue < StatisticsUtil.getTableStatsHealthThreshold(); + } else { + if (!(table instanceof HMSExternalTable)) { + return false; + } + HMSExternalTable hmsTable = (HMSExternalTable) table; + if (!hmsTable.getDlaType().equals(DLAType.HIVE)) { + return false; + } + return System.currentTimeMillis() + - tableStatsStatus.updatedTime > StatisticsUtil.getExternalTableAutoAnalyzeIntervalInMillis(); + } + } + + public static boolean needAnalyzeColumn(HighPriorityColumn column) { + if (column == null) { + return false; + } + TableIf table; + Column col; + try { + table = StatisticsUtil.findTable(column.catalogId, column.dbId, column.tblId); + col = table.getColumn(column.colName); + } catch (Exception e) { + LOG.warn("Failed to find table for column {}", column.colName, e); + return false; + } + return col != null + && !StatisticsUtil.isUnsupportedType(col.getType()) + && StatisticsUtil.needAnalyzeColumn(table, column.colName); + } + } diff --git a/gensrc/thrift/FrontendService.thrift b/gensrc/thrift/FrontendService.thrift index d2cdecbbe569e4..6be02613d5e622 100644 --- a/gensrc/thrift/FrontendService.thrift +++ b/gensrc/thrift/FrontendService.thrift @@ -1411,9 +1411,9 @@ struct TReportCommitTxnResultRequest { } struct TQueryColumn { - 1: optional i64 catalogId - 2: optional i64 dbId - 3: optional i64 tblId + 1: optional string catalogId + 2: optional string dbId + 3: optional string tblId 4: optional string colName }