Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions fe/fe-core/src/main/cup/sql_parser.cup
Original file line number Diff line number Diff line change
Expand Up @@ -524,6 +524,7 @@ terminal String
KW_QUOTA,
KW_RANDOM,
KW_RANGE,
KW_RECENT,
KW_READ,
KW_REBALANCE,
KW_RECOVER,
Expand Down Expand Up @@ -5789,6 +5790,14 @@ partition_names ::=
{:
RESULT = new PartitionNames(true, Lists.newArrayList(partName));
:}
| KW_PARTITIONS LPAREN STAR RPAREN
{:
RESULT = new PartitionNames(true);
:}
| KW_PARTITIONS KW_WITH KW_RECENT INTEGER_LITERAL:count
{:
RESULT = new PartitionNames(count);
:}
;

opt_table_sample ::=
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
import org.apache.doris.catalog.Database;
import org.apache.doris.catalog.DatabaseIf;
import org.apache.doris.catalog.Env;
import org.apache.doris.catalog.OlapTable;
import org.apache.doris.catalog.TableIf;
import org.apache.doris.catalog.View;
import org.apache.doris.catalog.external.ExternalTable;
Expand Down Expand Up @@ -167,11 +166,6 @@ public void check() throws AnalysisException {
}
analyzeProperties.check();

// TODO support external table
if (analyzeProperties.isSampleRows() && !(table instanceof OlapTable)) {
throw new AnalysisException("Sampling statistics "
+ "collection of external tables is not supported with rows, use percent instead.");
}
if (analyzeProperties.isSync()
&& (analyzeProperties.isAutomatic() || analyzeProperties.getPeriodTimeInMs() != 0)) {
throw new AnalysisException("Automatic/Period statistics collection "
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,4 +59,9 @@ public ShowResultSetMetaData getMetaData() {
public long getJobId() {
return jobId;
}

@Override
public RedirectStatus getRedirectStatus() {
return RedirectStatus.FORWARD_NO_SYNC;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -470,9 +470,11 @@ protected void analyzeSortHints() throws AnalysisException {
}

protected void analyzeSample() throws AnalysisException {
if ((sampleTabletIds != null || tableSample != null) && desc.getTable().getType() != TableIf.TableType.OLAP) {
if ((sampleTabletIds != null || tableSample != null)
&& desc.getTable().getType() != TableIf.TableType.OLAP
&& desc.getTable().getType() != TableIf.TableType.HMS_EXTERNAL_TABLE) {
throw new AnalysisException("Sample table " + desc.getTable().getName()
+ " type " + desc.getTable().getType() + " is not OLAP");
+ " type " + desc.getTable().getType() + " is not supported");
}
}

Expand Down
8 changes: 4 additions & 4 deletions fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java
Original file line number Diff line number Diff line change
Expand Up @@ -5420,10 +5420,6 @@ public LoadManagerAdapter getLoadManagerAdapter() {
return loadManagerAdapter;
}

public StatisticsAutoCollector getStatisticsAutoCollector() {
return statisticsAutoCollector;
}

public QueryStats getQueryStats() {
return queryStats;
}
Expand All @@ -5436,4 +5432,8 @@ public void cleanQueryStats(CleanQueryStatsInfo info) throws DdlException {
public ColumnIdFlushDaemon getColumnIdFlusher() {
return columnIdFlusher;
}

public StatisticsAutoCollector getStatisticsAutoCollector() {
return statisticsAutoCollector;
}
}
40 changes: 20 additions & 20 deletions fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java
Original file line number Diff line number Diff line change
Expand Up @@ -1105,6 +1105,26 @@ public BaseAnalysisTask createAnalysisTask(AnalysisInfo info) {
return new MVAnalysisTask(info);
}

public boolean needReAnalyzeTable(TableStatsMeta tblStats) {
if (tblStats == null) {
return true;
}
long rowCount = getRowCount();
// TODO: Do we need to analyze an empty table?
if (rowCount == 0) {
return false;
}
if (!tblStats.analyzeColumns().containsAll(getBaseSchema()
.stream()
.map(Column::getName)
.collect(Collectors.toSet()))) {
return true;
}
long updateRows = tblStats.updatedRows.get();
int tblHealth = StatisticsUtil.getTableHealth(rowCount, updateRows);
return tblHealth < Config.table_stats_health_threshold;
}

@Override
public long getRowCount() {
long rowCount = 0;
Expand Down Expand Up @@ -2282,24 +2302,4 @@ public long getDataSize(boolean singleReplica) {
}
return dataSize;
}

public boolean needReAnalyzeTable(TableStatsMeta tblStats) {
if (tblStats == null) {
return true;
}
long rowCount = getRowCount();
// TODO: Do we need to analyze an empty table?
if (rowCount == 0) {
return false;
}
if (!tblStats.analyzeColumns().containsAll(getBaseSchema()
.stream()
.map(Column::getName)
.collect(Collectors.toSet()))) {
return true;
}
long updateRows = tblStats.updatedRows.get();
int tblHealth = StatisticsUtil.getTableHealth(rowCount, updateRows);
return tblHealth < Config.table_stats_health_threshold;
}
}
10 changes: 7 additions & 3 deletions fe/fe-core/src/main/java/org/apache/doris/catalog/Table.java
Original file line number Diff line number Diff line change
Expand Up @@ -556,7 +556,11 @@ public Optional<ColumnStatistic> getColumnStatistic(String colName) {
return Optional.empty();
}

public void analyze(String dbName) {
public void analyze(String dbName) {}

@Override
public boolean needReAnalyzeTable(TableStatsMeta tblStats) {
return true;
}

@Override
Expand All @@ -565,7 +569,7 @@ public Map<String, Set<String>> findReAnalyzeNeededPartitions() {
}

@Override
public boolean needReAnalyzeTable(TableStatsMeta tblStats) {
return true;
public List<Long> getChunkSizes() {
throw new NotImplementedException("getChunkSized not implemented");
}
}
13 changes: 8 additions & 5 deletions fe/fe-core/src/main/java/org/apache/doris/catalog/TableIf.java
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,14 @@ default int getBaseColumnIdxByName(String colName) {

Optional<ColumnStatistic> getColumnStatistic(String colName);

boolean needReAnalyzeTable(TableStatsMeta tblStats);

Map<String, Set<String>> findReAnalyzeNeededPartitions();

// Get all the chunk sizes of this table. Now, only HMS external table implemented this interface.
// For HMS external table, the return result is a list of all the files' size.
List<Long> getChunkSizes();

void write(DataOutput out) throws IOException;

/**
Expand Down Expand Up @@ -239,15 +247,10 @@ default long getLastUpdateTime() {
return -1L;
}

Map<String, Set<String>> findReAnalyzeNeededPartitions();

default long getDataSize(boolean singleReplica) {
// TODO: Each tableIf should impl it by itself.
return 0;
}

boolean needReAnalyzeTable(TableStatsMeta tblStats);


}

Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ public class ExternalTable implements TableIf, Writable, GsonPostProcessable {
// this field will be refreshed after reloading schema
protected volatile long schemaUpdateTime;

protected long dbId;
protected boolean objectCreated;
protected ExternalCatalog catalog;
protected ReentrantReadWriteLock rwLock = new ReentrantReadWriteLock(true);
Expand Down Expand Up @@ -119,6 +120,7 @@ protected void makeSureInitialized() {
try {
// getDbOrAnalysisException will call makeSureInitialized in ExternalCatalog.
ExternalDatabase db = catalog.getDbOrAnalysisException(dbName);
dbId = db.getId();
db.makeSureInitialized();
} catch (AnalysisException e) {
Util.logAndThrowRuntimeException(LOG, String.format("Exception to get db %s", dbName), e);
Expand Down Expand Up @@ -397,4 +399,9 @@ public Map<String, Set<String>> findReAnalyzeNeededPartitions() {
partitions.add("Dummy Partition");
return getBaseSchema().stream().collect(Collectors.toMap(Column::getName, k -> partitions));
}

@Override
public List<Long> getChunkSizes() {
throw new NotImplementedException("getChunkSized not implemented");
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import org.apache.doris.catalog.Type;
import org.apache.doris.common.AnalysisException;
import org.apache.doris.datasource.HMSExternalCatalog;
import org.apache.doris.datasource.hive.HiveMetaStoreCache;
import org.apache.doris.datasource.hive.PooledHiveMetaStoreClient;
import org.apache.doris.statistics.AnalysisInfo;
import org.apache.doris.statistics.BaseAnalysisTask;
Expand Down Expand Up @@ -644,6 +645,36 @@ public void gsonPostProcess() throws IOException {
super.gsonPostProcess();
estimatedRowCount = -1;
}

@Override
public List<Long> getChunkSizes() {
HiveMetaStoreCache.HivePartitionValues partitionValues = StatisticsUtil.getPartitionValuesForTable(this);
List<HiveMetaStoreCache.FileCacheValue> filesByPartitions
= StatisticsUtil.getFilesForPartitions(this, partitionValues, 0);
List<Long> result = Lists.newArrayList();
for (HiveMetaStoreCache.FileCacheValue files : filesByPartitions) {
for (HiveMetaStoreCache.HiveFileStatus file : files.getFiles()) {
result.add(file.getLength());
}
}
return result;
}

@Override
public long getDataSize(boolean singleReplica) {
long totalSize = StatisticsUtil.getTotalSizeFromHMS(this);
// Usually, we can get total size from HMS parameter.
if (totalSize > 0) {
return totalSize;
}
// If not found the size in HMS, calculate it by sum all files' size in table.
List<Long> chunkSizes = getChunkSizes();
long total = 0;
for (long size : chunkSizes) {
total += size;
}
return total;
}
}


Original file line number Diff line number Diff line change
Expand Up @@ -177,5 +177,4 @@ default CatalogLog constructEditLog() {
public ConcurrentHashMap<Long, DatabaseIf> getIdToDb();

public boolean enableAutoAnalyze();

}
Original file line number Diff line number Diff line change
Expand Up @@ -612,5 +612,4 @@ public boolean enableAutoAnalyze() {
}
return ret;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -1071,6 +1071,9 @@ public static class HiveFileStatus {
long length;
long blockSize;
long modificationTime;
boolean splittable;
List<String> partitionValues;
AcidInfo acidInfo;
}

@Data
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -469,7 +469,12 @@ public PlanFragment visitPhysicalFileScan(PhysicalFileScan fileScan, PlanTransla
break;
case HIVE:
scanNode = new HiveScanNode(fileScan.translatePlanNodeId(), tupleDescriptor, false);
((HiveScanNode) scanNode).setSelectedPartitions(fileScan.getSelectedPartitions());
HiveScanNode hiveScanNode = (HiveScanNode) scanNode;
hiveScanNode.setSelectedPartitions(fileScan.getSelectedPartitions());
if (fileScan.getTableSample().isPresent()) {
hiveScanNode.setTableSample(new TableSample(fileScan.getTableSample().get().isPercent,
fileScan.getTableSample().get().sampleValue, fileScan.getTableSample().get().seek));
}
break;
default:
throw new RuntimeException("do not support DLA type " + ((HMSExternalTable) table).getDlaType());
Expand All @@ -491,7 +496,9 @@ public PlanFragment visitPhysicalFileScan(PhysicalFileScan fileScan, PlanTransla
TableRef ref = new TableRef(tableName, null, null);
BaseTableRef tableRef = new BaseTableRef(ref, table, tableName);
tupleDescriptor.setRef(tableRef);

if (fileScan.getStats() != null) {
scanNode.setCardinality((long) fileScan.getStats().getRowCount());
}
Utils.execWithUncheckedException(scanNode::init);
context.addScanNode(scanNode);
ScanNode finalScanNode = scanNode;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@
import org.apache.doris.nereids.trees.expressions.EqualTo;
import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.Slot;
import org.apache.doris.nereids.trees.expressions.StatementScopeIdGenerator;
import org.apache.doris.nereids.trees.expressions.literal.TinyIntLiteral;
import org.apache.doris.nereids.trees.plans.Plan;
import org.apache.doris.nereids.trees.plans.PreAggStatus;
Expand Down Expand Up @@ -214,13 +213,13 @@ private LogicalPlan getLogicalPlan(TableIf table, UnboundRelation unboundRelatio
Plan hiveViewPlan = parseAndAnalyzeHiveView(hiveCatalog, ddlSql, cascadesContext);
return new LogicalSubQueryAlias<>(tableQualifier, hiveViewPlan);
}
return new LogicalFileScan(StatementScopeIdGenerator.newRelationId(),
(HMSExternalTable) table, tableQualifier);
return new LogicalFileScan(unboundRelation.getRelationId(), (HMSExternalTable) table, tableQualifier,
unboundRelation.getTableSample());
case ICEBERG_EXTERNAL_TABLE:
case PAIMON_EXTERNAL_TABLE:
case MAX_COMPUTE_EXTERNAL_TABLE:
return new LogicalFileScan(StatementScopeIdGenerator.newRelationId(),
(ExternalTable) table, tableQualifier);
return new LogicalFileScan(unboundRelation.getRelationId(), (ExternalTable) table, tableQualifier,
unboundRelation.getTableSample());
case SCHEMA:
return new LogicalSchemaScan(unboundRelation.getRelationId(), table, tableQualifier);
case JDBC_EXTERNAL_TABLE:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@ public Rule build() {
Optional.empty(),
fileScan.getLogicalProperties(),
fileScan.getConjuncts(),
fileScan.getSelectedPartitions())
fileScan.getSelectedPartitions(),
fileScan.getTableSample())
).toRule(RuleType.LOGICAL_FILE_SCAN_TO_PHYSICAL_FILE_SCAN_RULE);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ public Plan visitLogicalFileScan(LogicalFileScan fileScan, DeepCopierContext con
return context.getRelationReplaceMap().get(fileScan.getRelationId());
}
LogicalFileScan newFileScan = new LogicalFileScan(StatementScopeIdGenerator.newRelationId(),
fileScan.getTable(), fileScan.getQualifier());
fileScan.getTable(), fileScan.getQualifier(), fileScan.getTableSample());
updateReplaceMapWithOutput(fileScan, newFileScan, context.exprIdReplaceMap);
context.putRelation(fileScan.getRelationId(), newFileScan);
Set<Expression> conjuncts = fileScan.getConjuncts().stream()
Expand Down
Loading