Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 30 additions & 2 deletions fe/fe-common/src/main/java/org/apache/doris/common/Config.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@

package org.apache.doris.common;

import java.util.concurrent.TimeUnit;

public class Config extends ConfigBase {

@ConfField(description = {"用户自定义配置文件的路径,用于存放 fe_custom.conf。该文件中的配置会覆盖 fe.conf 中的配置",
Expand Down Expand Up @@ -1516,8 +1518,12 @@ public class Config extends ConfigBase {
/*
* the system automatically checks the time interval for statistics
*/
@ConfField(mutable = true, masterOnly = true)
public static int auto_check_statistics_in_minutes = 1;
@ConfField(mutable = true, masterOnly = true, description = {
"该参数控制自动收集作业检查库表统计信息健康度并触发自动收集的时间间隔",
"This parameter controls the time interval for automatic collection jobs to check the health of table"
+ "statistics and trigger automatic collection"
})
public static int auto_check_statistics_in_minutes = 10;

/**
* If this configuration is enabled, you should also specify the trace_export_url.
Expand Down Expand Up @@ -2172,4 +2178,26 @@ public class Config extends ConfigBase {
+ "The larger the value, the more uniform the distribution of the hash algorithm, "
+ "but it will increase the memory overhead."})
public static int virtual_node_number = 2048;

@ConfField(description = {"控制对大表的自动ANALYZE的最小时间间隔,"
+ "在该时间间隔内大小超过huge_table_lower_bound_size_in_bytes的表仅ANALYZE一次",
"This controls the minimum time interval for automatic ANALYZE on large tables. Within this interval,"
+ "tables larger than huge_table_lower_bound_size_in_bytes are analyzed only once."})
public static long huge_table_auto_analyze_interval_in_millis = TimeUnit.HOURS.toMillis(12);

@ConfField(description = {"定义大表的大小下界,在开启enable_auto_sample的情况下,"
+ "大小超过该值的表将会自动通过采样收集统计信息", "This defines the lower size bound for large tables. "
+ "When enable_auto_sample is enabled, tables larger than this value will automatically collect "
+ "statistics through sampling"})
public static long huge_table_lower_bound_size_in_bytes = 5L * 1024 * 1024 * 1024;

@ConfField(description = {"定义开启开启大表自动sample后,对大表的采样行数",
"This defines the number of sample rows for large tables when automatic sampling for"
+ "large tables is enabled"})
public static int huge_table_default_sample_rows = 20_0000;

@ConfField(description = {"是否开启大表自动sample,开启后对于大小超过huge_table_lower_bound_size_in_bytes会自动通过采样收集"
+ "统计信息", "Whether to enable automatic sampling for large tables, which, when enabled, automatically"
+ "collects statistics through sampling for tables larger than 'huge_table_lower_bound_size_in_bytes'"})
public static boolean enable_auto_sample = false;
}
7 changes: 7 additions & 0 deletions fe/fe-core/src/main/cup/sql_parser.cup
Original file line number Diff line number Diff line change
Expand Up @@ -6074,6 +6074,13 @@ with_analysis_properties ::=
put("period.cron", cron_expr);
}};
:}
| KW_FULL
{:
RESULT = new HashMap<String, String>() {{
put(AnalyzeProperties.PROPERTY_FORCE_FULL, "true");
}};
:}

;

opt_with_analysis_properties ::=
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import java.util.Optional;
import java.util.concurrent.TimeUnit;

// TODO: Remove map
public class AnalyzeProperties {

public static final String PROPERTY_SYNC = "sync";
Expand All @@ -42,6 +43,8 @@ public class AnalyzeProperties {
public static final String PROPERTY_ANALYSIS_TYPE = "analysis.type";
public static final String PROPERTY_PERIOD_SECONDS = "period.seconds";

public static final String PROPERTY_FORCE_FULL = "force.full";

public static final AnalyzeProperties DEFAULT_PROP = new AnalyzeProperties(new HashMap<String, String>() {
{
put(AnalyzeProperties.PROPERTY_SYNC, "false");
Expand All @@ -67,6 +70,7 @@ public class AnalyzeProperties {
.add(PROPERTY_ANALYSIS_TYPE)
.add(PROPERTY_PERIOD_SECONDS)
.add(PROPERTY_PERIOD_CRON)
.add(PROPERTY_FORCE_FULL)
.build();

public AnalyzeProperties(Map<String, String> properties) {
Expand Down Expand Up @@ -264,6 +268,10 @@ public boolean isSample() {
|| properties.containsKey(PROPERTY_SAMPLE_ROWS);
}

public boolean forceFull() {
return properties.containsKey(PROPERTY_FORCE_FULL);
}

public String toSQL() {
StringBuilder sb = new StringBuilder();
sb.append("PROPERTIES(");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -93,4 +93,8 @@ public RedirectStatus getRedirectStatus() {
public CronExpression getCron() {
return analyzeProperties.getCron();
}

public boolean forceFull() {
return analyzeProperties.forceFull();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,9 @@ public void check() throws AnalysisException {
throw new AnalysisException("Automatic collection "
+ "and period statistics collection cannot be set at same time");
}
if (analyzeProperties.isSample() && analyzeProperties.forceFull()) {
throw new AnalysisException("Impossible to analyze with sample and full simultaneously");
}
}

private void checkColumn() throws AnalysisException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ public ShowResultSet constructResultSet(TableStats tableStatistic) {
row.add(tableStatistic.analysisMethod.toString());
row.add(tableStatistic.analysisType.toString());
row.add(new Date(tableStatistic.updatedTime).toString());
row.add(tableStatistic.columns);
row.add(tableStatistic.analyzeColumns().toString());
row.add(tableStatistic.jobType.toString());
result.add(row);
return new ShowResultSet(getMetaData(), result);
Expand Down
39 changes: 27 additions & 12 deletions fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java
Original file line number Diff line number Diff line change
Expand Up @@ -1132,24 +1132,39 @@ public boolean needReAnalyzeTable(TableStats tblStats) {
if (rowCount == 0) {
return false;
}
long updateRows = tblStats.updatedRows.get();
if (!tblStats.analyzeColumns().containsAll(getBaseSchema()
.stream()
.map(Column::getName)
.collect(Collectors.toSet()))) {
return true;
}
// long updateRows = tblStats.updatedRows.get();
long updateRows = Math.abs(tblStats.rowCount - rowCount);
int tblHealth = StatisticsUtil.getTableHealth(rowCount, updateRows);
return tblHealth < Config.table_stats_health_threshold;
}

@Override
public Set<String> findReAnalyzeNeededPartitions() {
TableStats tableStats = Env.getCurrentEnv().getAnalysisManager().findTableStatsStatus(getId());
if (tableStats == null) {
return getPartitionNames().stream().map(this::getPartition)
public Map<String, Set<String>> findReAnalyzeNeededPartitions() {
TableIf table = this;
TableStats tableStats = Env.getCurrentEnv().getAnalysisManager().findTableStatsStatus(table.getId());
Set<String> allPartitions = table.getPartitionNames().stream().map(table::getPartition)
.filter(Partition::hasData).map(Partition::getName).collect(Collectors.toSet());
}
return getPartitionNames().stream()
.map(this::getPartition)
.filter(Partition::hasData)
.filter(partition ->
partition.getVisibleVersionTime() >= tableStats.updatedTime).map(Partition::getName)
.collect(Collectors.toSet());
if (tableStats == null) {
return table.getBaseSchema().stream().collect(Collectors.toMap(Column::getName, v -> allPartitions));
}
Map<String, Set<String>> colToPart = new HashMap<>();
for (Column col : table.getBaseSchema()) {
long lastUpdateTime = tableStats.findColumnLastUpdateTime(col.getName());
Set<String> partitions = table.getPartitionNames().stream()
.map(table::getPartition)
.filter(Partition::hasData)
.filter(partition ->
partition.getVisibleVersionTime() >= lastUpdateTime).map(Partition::getName)
.collect(Collectors.toSet());
colToPart.put(col.getName(), partitions);
}
return colToPart;
}

@Override
Expand Down
4 changes: 2 additions & 2 deletions fe/fe-core/src/main/java/org/apache/doris/catalog/Table.java
Original file line number Diff line number Diff line change
Expand Up @@ -580,7 +580,7 @@ public boolean needReAnalyzeTable(TableStats tblStats) {
}

@Override
public Set<String> findReAnalyzeNeededPartitions() {
return Collections.emptySet();
public Map<String, Set<String>> findReAnalyzeNeededPartitions() {
return Collections.emptyMap();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
import java.io.IOException;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.TimeUnit;
Expand Down Expand Up @@ -139,7 +140,7 @@ default int getBaseColumnIdxByName(String colName) {

boolean needReAnalyzeTable(TableStats tblStats);

Set<String> findReAnalyzeNeededPartitions();
Map<String, Set<String>> findReAnalyzeNeededPartitions();

void write(DataOutput out) throws IOException;

Expand Down Expand Up @@ -244,5 +245,10 @@ default boolean isManagedTable() {
default long getLastUpdateTime() {
return -1L;
}

default long getDataSize() {
// TODO: Each tableIf should impl it by itself.
return 0;
}
}

Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,12 @@
import java.io.IOException;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.locks.ReentrantReadWriteLock;
import java.util.stream.Collectors;

/**
* External table represent tables that are not self-managed by Doris.
Expand Down Expand Up @@ -388,10 +390,10 @@ public boolean needReAnalyzeTable(TableStats tblStats) {
}

@Override
public Set<String> findReAnalyzeNeededPartitions() {
public Map<String, Set<String>> findReAnalyzeNeededPartitions() {
HashSet<String> partitions = Sets.newHashSet();
// TODO: Find a way to collect external table partitions that need to be analyzed.
partitions.add("Dummy Partition");
return partitions;
return getBaseSchema().stream().collect(Collectors.toMap(Column::getName, k -> partitions));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import com.google.gson.Gson;
import com.google.gson.annotations.SerializedName;
import com.google.gson.reflect.TypeToken;
import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.quartz.CronExpression;
Expand Down Expand Up @@ -167,13 +168,16 @@ public enum ScheduleType {

public CronExpression cronExpression;

@SerializedName("forceFull")
public final boolean forceFull;

public AnalysisInfo(long jobId, long taskId, List<Long> taskIds, String catalogName, String dbName, String tblName,
Map<String, Set<String>> colToPartitions, Set<String> partitionNames, String colName, Long indexId,
JobType jobType, AnalysisMode analysisMode, AnalysisMethod analysisMethod, AnalysisType analysisType,
int samplePercent, int sampleRows, int maxBucketNum, long periodTimeInMs, String message,
long lastExecTimeInMs, long timeCostInMs, AnalysisState state, ScheduleType scheduleType,
boolean isExternalTableLevelTask, boolean partitionOnly, boolean samplingPartition,
CronExpression cronExpression) {
CronExpression cronExpression, boolean forceFull) {
this.jobId = jobId;
this.taskId = taskId;
this.taskIds = taskIds;
Expand Down Expand Up @@ -204,6 +208,7 @@ public AnalysisInfo(long jobId, long taskId, List<Long> taskIds, String catalogN
if (cronExpression != null) {
this.cronExprStr = cronExpression.getCronExpression();
}
this.forceFull = forceFull;
}

@Override
Expand All @@ -214,11 +219,11 @@ public String toString() {
sj.add("DBName: " + dbName);
sj.add("TableName: " + tblName);
sj.add("ColumnName: " + colName);
sj.add("TaskType: " + analysisType.toString());
sj.add("TaskMode: " + analysisMode.toString());
sj.add("TaskMethod: " + analysisMethod.toString());
sj.add("TaskType: " + analysisType);
sj.add("TaskMode: " + analysisMode);
sj.add("TaskMethod: " + analysisMethod);
sj.add("Message: " + message);
sj.add("CurrentState: " + state.toString());
sj.add("CurrentState: " + state);
if (samplePercent > 0) {
sj.add("SamplePercent: " + samplePercent);
}
Expand All @@ -240,6 +245,10 @@ public String toString() {
if (periodTimeInMs > 0) {
sj.add("periodTimeInMs: " + StatisticsUtil.getReadableTime(periodTimeInMs));
}
if (StringUtils.isNotEmpty(cronExprStr)) {
sj.add("cronExpr: " + cronExprStr);
}
sj.add("forceFull: " + forceFull);
return sj.toString();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ public class AnalysisInfoBuilder {

private CronExpression cronExpression;

private boolean forceFull;

public AnalysisInfoBuilder() {
}

Expand Down Expand Up @@ -90,6 +92,7 @@ public AnalysisInfoBuilder(AnalysisInfo info) {
partitionOnly = info.partitionOnly;
samplingPartition = info.samplingPartition;
cronExpression = info.cronExpression;
forceFull = info.forceFull;
}

public AnalysisInfoBuilder setJobId(long jobId) {
Expand Down Expand Up @@ -226,37 +229,14 @@ public void setCronExpression(CronExpression cronExpression) {
this.cronExpression = cronExpression;
}

public void setForceFull(boolean forceFull) {
this.forceFull = forceFull;
}

public AnalysisInfo build() {
return new AnalysisInfo(jobId, taskId, taskIds, catalogName, dbName, tblName, colToPartitions, partitionNames,
colName, indexId, jobType, analysisMode, analysisMethod, analysisType, samplePercent,
sampleRows, maxBucketNum, periodTimeInMs, message, lastExecTimeInMs, timeCostInMs, state, scheduleType,
externalTableLevelTask, partitionOnly, samplingPartition, cronExpression);
}

public AnalysisInfoBuilder copy() {
return new AnalysisInfoBuilder()
.setJobId(jobId)
.setTaskId(taskId)
.setTaskIds(taskIds)
.setCatalogName(catalogName)
.setDbName(dbName)
.setTblName(tblName)
.setColToPartitions(colToPartitions)
.setColName(colName)
.setIndexId(indexId)
.setJobType(jobType)
.setAnalysisMode(analysisMode)
.setAnalysisMethod(analysisMethod)
.setAnalysisType(analysisType)
.setSamplePercent(samplePercent)
.setSampleRows(sampleRows)
.setPeriodTimeInMs(periodTimeInMs)
.setMaxBucketNum(maxBucketNum)
.setMessage(message)
.setLastExecTimeInMs(lastExecTimeInMs)
.setTimeCostInMs(timeCostInMs)
.setState(state)
.setScheduleType(scheduleType)
.setExternalTableLevelTask(externalTableLevelTask);
externalTableLevelTask, partitionOnly, samplingPartition, cronExpression, forceFull);
}
}
Loading