Skip to content

Commit 4b6ff32

Browse files
authored
[refactor][backport](stats) Migrate stats framework from master to branch 2.0 (#25119)
This PR is composed of belowing commits which has been merged to Doirs master: * #24769 * #24672 * #24599 * #24521 * #24405 * #24237 * #24135 * #24074 * #24026 * #23992 * #23978 * #23622 * #23507 * #23354 * #23103 * #22963 * #22896 * #22775 * #22773
1 parent b9fba6a commit 4b6ff32

File tree

102 files changed

+3860
-2724
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

102 files changed

+3860
-2724
lines changed

fe/fe-common/src/main/java/org/apache/doris/common/Config.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2178,6 +2178,13 @@ public class Config extends ConfigBase {
21782178
})
21792179
public static int autobucket_min_buckets = 1;
21802180

2181+
@ConfField
2182+
public static int full_auto_analyze_simultaneously_running_task_num = 1;
2183+
2184+
@ConfField
2185+
public static final int period_analyze_simultaneously_running_task_num = 1;
2186+
2187+
21812188
@ConfField(mutable = true, description = {
21822189
"Doris 为了兼用 mysql 周边工具生态,会内置一个名为 mysql 的数据库,如果该数据库与用户自建数据库冲突,"
21832190
+ "请修改这个字段,为 doris 内置的 mysql database 更换一个名字",

fe/fe-core/src/main/cup/sql_parser.cup

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4134,13 +4134,17 @@ show_param ::=
41344134
RESULT = new ShowCreateMaterializedViewStmt(mvName, tableName);
41354135
:}
41364136
/* show analyze job */
4137-
| KW_ANALYZE opt_table_name:tbl opt_wild_where order_by_clause:orderByClause limit_clause:limitClause
4137+
| KW_ANALYZE opt_table_name:tbl opt_wild_where
41384138
{:
4139-
RESULT = new ShowAnalyzeStmt(tbl, parser.where, orderByClause, limitClause);
4139+
RESULT = new ShowAnalyzeStmt(tbl, parser.where, false);
41404140
:}
4141-
| KW_ANALYZE INTEGER_LITERAL:jobId opt_wild_where order_by_clause:orderByClause limit_clause:limitClause
4141+
| KW_ANALYZE INTEGER_LITERAL:jobId opt_wild_where
41424142
{:
4143-
RESULT = new ShowAnalyzeStmt(jobId, parser.where, orderByClause, limitClause);
4143+
RESULT = new ShowAnalyzeStmt(jobId, parser.where);
4144+
:}
4145+
| KW_AUTO KW_ANALYZE opt_table_name:tbl opt_wild_where
4146+
{:
4147+
RESULT = new ShowAnalyzeStmt(tbl, parser.where, true);
41444148
:}
41454149
| KW_ANALYZE KW_TASK KW_STATUS INTEGER_LITERAL:jobId
41464150
{:

fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeProperties.java

Lines changed: 52 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,16 +22,18 @@
2222
import org.apache.doris.statistics.AnalysisInfo.AnalysisType;
2323

2424
import com.google.common.collect.ImmutableSet;
25+
import com.google.gson.annotations.SerializedName;
2526
import org.apache.commons.lang3.StringUtils;
27+
import org.apache.logging.log4j.core.util.CronExpression;
2628

29+
import java.util.HashMap;
2730
import java.util.Map;
2831
import java.util.Optional;
2932
import java.util.concurrent.TimeUnit;
3033

34+
// TODO: Remove map
3135
public class AnalyzeProperties {
3236

33-
private final Map<String, String> properties;
34-
3537
public static final String PROPERTY_SYNC = "sync";
3638
public static final String PROPERTY_INCREMENTAL = "incremental";
3739
public static final String PROPERTY_AUTOMATIC = "automatic";
@@ -41,6 +43,23 @@ public class AnalyzeProperties {
4143
public static final String PROPERTY_ANALYSIS_TYPE = "analysis.type";
4244
public static final String PROPERTY_PERIOD_SECONDS = "period.seconds";
4345

46+
public static final String PROPERTY_FORCE_FULL = "force.full";
47+
48+
public static final AnalyzeProperties DEFAULT_PROP = new AnalyzeProperties(new HashMap<String, String>() {
49+
{
50+
put(AnalyzeProperties.PROPERTY_SYNC, "false");
51+
put(AnalyzeProperties.PROPERTY_AUTOMATIC, "false");
52+
put(AnalyzeProperties.PROPERTY_ANALYSIS_TYPE, AnalysisType.FUNDAMENTALS.toString());
53+
}
54+
});
55+
56+
public static final String PROPERTY_PERIOD_CRON = "period.cron";
57+
58+
private CronExpression cronExpression;
59+
60+
@SerializedName("analyzeProperties")
61+
private final Map<String, String> properties;
62+
4463
private static final ImmutableSet<String> PROPERTIES_SET = new ImmutableSet.Builder<String>()
4564
.add(PROPERTY_SYNC)
4665
.add(PROPERTY_INCREMENTAL)
@@ -50,6 +69,8 @@ public class AnalyzeProperties {
5069
.add(PROPERTY_NUM_BUCKETS)
5170
.add(PROPERTY_ANALYSIS_TYPE)
5271
.add(PROPERTY_PERIOD_SECONDS)
72+
.add(PROPERTY_PERIOD_CRON)
73+
.add(PROPERTY_FORCE_FULL)
5374
.build();
5475

5576
public AnalyzeProperties(Map<String, String> properties) {
@@ -72,6 +93,7 @@ public void check() throws AnalysisException {
7293
checkAnalysisMode(msgTemplate);
7394
checkAnalysisType(msgTemplate);
7495
checkScheduleType(msgTemplate);
96+
checkPeriod();
7597
}
7698

7799
public boolean isSync() {
@@ -115,6 +137,10 @@ public long getPeriodTimeInMs() {
115137
return TimeUnit.SECONDS.toMillis(minutes);
116138
}
117139

140+
public CronExpression getCron() {
141+
return cronExpression;
142+
}
143+
118144
private void checkPeriodSeconds() throws AnalysisException {
119145
if (properties.containsKey(PROPERTY_PERIOD_SECONDS)) {
120146
checkNumericProperty(PROPERTY_PERIOD_SECONDS, properties.get(PROPERTY_PERIOD_SECONDS),
@@ -207,6 +233,22 @@ private void checkScheduleType(String msgTemplate) throws AnalysisException {
207233
}
208234
}
209235

236+
private void checkPeriod() throws AnalysisException {
237+
if (properties.containsKey(PROPERTY_PERIOD_SECONDS)
238+
&& properties.containsKey(PROPERTY_PERIOD_CRON)) {
239+
throw new AnalysisException(PROPERTY_PERIOD_SECONDS + " and " + PROPERTY_PERIOD_CRON
240+
+ " couldn't be set simultaneously");
241+
}
242+
String cronExprStr = properties.get(PROPERTY_PERIOD_CRON);
243+
if (cronExprStr != null) {
244+
try {
245+
cronExpression = new CronExpression(cronExprStr);
246+
} catch (java.text.ParseException e) {
247+
throw new AnalysisException("Invalid cron expression: " + cronExprStr);
248+
}
249+
}
250+
}
251+
210252
private void checkNumericProperty(String key, String value, int lowerBound, int upperBound,
211253
boolean includeBoundary, String errorMsg) throws AnalysisException {
212254
if (!StringUtils.isNumeric(value)) {
@@ -226,6 +268,14 @@ public boolean isSample() {
226268
|| properties.containsKey(PROPERTY_SAMPLE_ROWS);
227269
}
228270

271+
public boolean forceFull() {
272+
return properties.containsKey(PROPERTY_FORCE_FULL);
273+
}
274+
275+
public boolean isSampleRows() {
276+
return properties.containsKey(PROPERTY_SAMPLE_ROWS);
277+
}
278+
229279
public String toSQL() {
230280
StringBuilder sb = new StringBuilder();
231281
sb.append("PROPERTIES(");

fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeStmt.java

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@
2323
import org.apache.doris.statistics.AnalysisInfo.AnalysisType;
2424
import org.apache.doris.statistics.AnalysisInfo.ScheduleType;
2525

26+
import org.apache.logging.log4j.core.util.CronExpression;
27+
2628
import java.util.Map;
2729

2830
public class AnalyzeStmt extends StatementBase {
@@ -55,7 +57,8 @@ public ScheduleType getScheduleType() {
5557
if (analyzeProperties.isAutomatic()) {
5658
return ScheduleType.AUTOMATIC;
5759
}
58-
return analyzeProperties.getPeriodTimeInMs() > 0 ? ScheduleType.PERIOD : ScheduleType.ONCE;
60+
return analyzeProperties.getPeriodTimeInMs() > 0 || analyzeProperties.getCron() != null
61+
? ScheduleType.PERIOD : ScheduleType.ONCE;
5962
}
6063

6164
public boolean isSync() {
@@ -86,4 +89,12 @@ public AnalyzeProperties getAnalyzeProperties() {
8689
public RedirectStatus getRedirectStatus() {
8790
return RedirectStatus.FORWARD_WITH_SYNC;
8891
}
92+
93+
public CronExpression getCron() {
94+
return analyzeProperties.getCron();
95+
}
96+
97+
public boolean forceFull() {
98+
return analyzeProperties.forceFull();
99+
}
89100
}

fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeTblStmt.java

Lines changed: 41 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
import org.apache.doris.catalog.OlapTable;
2525
import org.apache.doris.catalog.TableIf;
2626
import org.apache.doris.catalog.View;
27+
import org.apache.doris.catalog.external.ExternalTable;
2728
import org.apache.doris.catalog.external.HMSExternalTable;
2829
import org.apache.doris.common.AnalysisException;
2930
import org.apache.doris.common.Config;
@@ -41,6 +42,7 @@
4142
import com.google.common.collect.Sets;
4243
import org.apache.commons.lang3.StringUtils;
4344

45+
import java.util.Collections;
4446
import java.util.List;
4547
import java.util.Optional;
4648
import java.util.Set;
@@ -84,7 +86,7 @@ public class AnalyzeTblStmt extends AnalyzeStmt {
8486

8587
private final TableName tableName;
8688
private List<String> columnNames;
87-
private List<String> partitionNames;
89+
private PartitionNames partitionNames;
8890
private boolean isAllColumns;
8991

9092
// after analyzed
@@ -97,7 +99,7 @@ public AnalyzeTblStmt(TableName tableName,
9799
AnalyzeProperties properties) {
98100
super(properties);
99101
this.tableName = tableName;
100-
this.partitionNames = partitionNames == null ? null : partitionNames.getPartitionNames();
102+
this.partitionNames = partitionNames;
101103
this.columnNames = columnNames;
102104
this.analyzeProperties = properties;
103105
this.isAllColumns = columnNames == null;
@@ -166,11 +168,9 @@ public void check() throws AnalysisException {
166168
analyzeProperties.check();
167169

168170
// TODO support external table
169-
if (analyzeProperties.isSample()) {
170-
if (!(table instanceof OlapTable)) {
171-
throw new AnalysisException("Sampling statistics "
172-
+ "collection of external tables is not supported");
173-
}
171+
if (analyzeProperties.isSampleRows() && !(table instanceof OlapTable)) {
172+
throw new AnalysisException("Sampling statistics "
173+
+ "collection of external tables is not supported with rows, use percent instead.");
174174
}
175175
if (analyzeProperties.isSync()
176176
&& (analyzeProperties.isAutomatic() || analyzeProperties.getPeriodTimeInMs() != 0)) {
@@ -181,6 +181,9 @@ public void check() throws AnalysisException {
181181
throw new AnalysisException("Automatic collection "
182182
+ "and period statistics collection cannot be set at same time");
183183
}
184+
if (analyzeProperties.isSample() && analyzeProperties.forceFull()) {
185+
throw new AnalysisException("Impossible to analyze with sample and full simultaneously");
186+
}
184187
}
185188

186189
private void checkColumn() throws AnalysisException {
@@ -196,7 +199,8 @@ private void checkColumn() throws AnalysisException {
196199
}
197200
}
198201
if (containsUnsupportedTytpe) {
199-
if (!ConnectContext.get().getSessionVariable().enableAnalyzeComplexTypeColumn) {
202+
if (ConnectContext.get() == null
203+
|| !ConnectContext.get().getSessionVariable().enableAnalyzeComplexTypeColumn) {
200204
columnNames = columnNames.stream()
201205
.filter(c -> !StatisticsUtil.isUnsupportedType(table.getColumn(c).getType()))
202206
.collect(Collectors.toList());
@@ -236,14 +240,33 @@ public Set<String> getColumnNames() {
236240
}
237241

238242
public Set<String> getPartitionNames() {
239-
Set<String> partitions = partitionNames == null ? table.getPartitionNames() : Sets.newHashSet(partitionNames);
240-
if (isSamplingPartition()) {
241-
int partNum = ConnectContext.get().getSessionVariable().getExternalTableAnalyzePartNum();
242-
partitions = partitions.stream().limit(partNum).collect(Collectors.toSet());
243+
if (partitionNames == null || partitionNames.getPartitionNames() == null) {
244+
if (table instanceof ExternalTable) {
245+
// External table couldn't return all partitions when partitionNames is not set.
246+
// Because Analyze Table command for external table could specify partition names.
247+
return Collections.emptySet();
248+
}
249+
return table.getPartitionNames();
243250
}
251+
Set<String> partitions = Sets.newHashSet();
252+
partitions.addAll(partitionNames.getPartitionNames());
244253
return partitions;
245254
}
246255

256+
public boolean isAllPartitions() {
257+
if (partitionNames == null) {
258+
return false;
259+
}
260+
return partitionNames.isAllPartitions();
261+
}
262+
263+
public long getPartitionCount() {
264+
if (partitionNames == null) {
265+
return 0;
266+
}
267+
return partitionNames.getCount();
268+
}
269+
247270
public boolean isPartitionOnly() {
248271
return partitionNames != null;
249272
}
@@ -260,8 +283,13 @@ public boolean isSamplingPartition() {
260283
}
261284

262285
private void checkAnalyzePriv(String dbName, String tblName) throws AnalysisException {
286+
ConnectContext ctx = ConnectContext.get();
287+
// means it a system analyze
288+
if (ctx == null) {
289+
return;
290+
}
263291
if (!Env.getCurrentEnv().getAccessManager()
264-
.checkTblPriv(ConnectContext.get(), dbName, tblName, PrivPredicate.SELECT)) {
292+
.checkTblPriv(ctx, dbName, tblName, PrivPredicate.SELECT)) {
265293
ErrorReport.reportAnalysisException(
266294
ErrorCode.ERR_TABLEACCESS_DENIED_ERROR,
267295
"ANALYZE",

fe/fe-core/src/main/java/org/apache/doris/analysis/PartitionNames.java

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,15 +48,37 @@ public class PartitionNames implements ParseNode, Writable {
4848
// true if these partitions are temp partitions
4949
@SerializedName(value = "isTemp")
5050
private final boolean isTemp;
51+
private final boolean allPartitions;
52+
private final long count;
53+
// Default partition count to collect statistic for external table.
54+
private static final long DEFAULT_PARTITION_COUNT = 100;
5155

5256
public PartitionNames(boolean isTemp, List<String> partitionNames) {
5357
this.partitionNames = partitionNames;
5458
this.isTemp = isTemp;
59+
this.allPartitions = false;
60+
this.count = 0;
5561
}
5662

5763
public PartitionNames(PartitionNames other) {
5864
this.partitionNames = Lists.newArrayList(other.partitionNames);
5965
this.isTemp = other.isTemp;
66+
this.allPartitions = other.allPartitions;
67+
this.count = 0;
68+
}
69+
70+
public PartitionNames(boolean allPartitions) {
71+
this.partitionNames = null;
72+
this.isTemp = false;
73+
this.allPartitions = allPartitions;
74+
this.count = 0;
75+
}
76+
77+
public PartitionNames(long partitionCount) {
78+
this.partitionNames = null;
79+
this.isTemp = false;
80+
this.allPartitions = false;
81+
this.count = partitionCount;
6082
}
6183

6284
public List<String> getPartitionNames() {
@@ -67,9 +89,23 @@ public boolean isTemp() {
6789
return isTemp;
6890
}
6991

92+
public boolean isAllPartitions() {
93+
return allPartitions;
94+
}
95+
96+
public long getCount() {
97+
return count;
98+
}
99+
70100
@Override
71101
public void analyze(Analyzer analyzer) throws AnalysisException {
72-
if (partitionNames.isEmpty()) {
102+
if (allPartitions && count > 0) {
103+
throw new AnalysisException("All partition and partition count couldn't be set at the same time.");
104+
}
105+
if (allPartitions || count > 0) {
106+
return;
107+
}
108+
if (partitionNames == null || partitionNames.isEmpty()) {
73109
throw new AnalysisException("No partition specified in partition lists");
74110
}
75111
// check if partition name is not empty string

0 commit comments

Comments
 (0)