From 84a4ca8dedcb9cccb318db35decc76b93b21ff8c Mon Sep 17 00:00:00 2001 From: zhangdong Date: Mon, 25 Nov 2024 17:15:08 +0800 Subject: [PATCH] 1 --- .../doris/datasource/ExternalTable.java | 53 +++++++++++++++++++ .../datasource/hive/HMSExternalTable.java | 26 ++++++--- .../nereids/rules/analysis/BindRelation.java | 1 - .../rules/rewrite/PruneFileScanPartition.java | 18 +++---- .../trees/plans/logical/LogicalFileScan.java | 11 ++-- 5 files changed, 84 insertions(+), 25 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalTable.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalTable.java index 1eadb46fe82eed..3aee5550acf646 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalTable.java @@ -20,6 +20,7 @@ import org.apache.doris.catalog.Column; import org.apache.doris.catalog.DatabaseIf; import org.apache.doris.catalog.Env; +import org.apache.doris.catalog.PartitionItem; import org.apache.doris.catalog.TableAttributes; import org.apache.doris.catalog.TableIf; import org.apache.doris.catalog.TableIndexes; @@ -30,6 +31,7 @@ import org.apache.doris.common.io.Writable; import org.apache.doris.common.util.PropertyAnalyzer; import org.apache.doris.common.util.Util; +import org.apache.doris.nereids.trees.plans.logical.LogicalFileScan.SelectedPartitions; import org.apache.doris.persist.gson.GsonPostProcessable; import org.apache.doris.persist.gson.GsonUtils; import org.apache.doris.statistics.AnalysisInfo; @@ -41,6 +43,7 @@ import com.google.common.collect.Sets; import com.google.gson.annotations.SerializedName; import lombok.Getter; +import org.apache.commons.collections.CollectionUtils; import org.apache.commons.lang3.NotImplementedException; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -48,9 +51,11 @@ import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; +import java.util.Collections; import java.util.List; import java.util.Map; import java.util.Optional; +import java.util.OptionalLong; import java.util.Set; /** @@ -364,4 +369,52 @@ protected Optional getSchemaCacheValue() { public TableIndexes getTableIndexes() { return new TableIndexes(); } + + /** + * Retrieve all partitions and initialize SelectedPartitions + * + * @param snapshotId if not support mvcc, ignore this + * @return + */ + public SelectedPartitions initSelectedPartitions(OptionalLong snapshotId) { + if (!supportPartitionPruned()) { + return SelectedPartitions.NOT_PRUNED; + } + if (CollectionUtils.isEmpty(this.getPartitionColumns(snapshotId))) { + return SelectedPartitions.NOT_PRUNED; + } + Map nameToPartitionItems = getNameToPartitionItems(snapshotId); + return new SelectedPartitions(nameToPartitionItems.size(), nameToPartitionItems, false); + } + + /** + * get partition map + * If partition related operations are supported, this method needs to be implemented in the subclass + * + * @param snapshotId if not support mvcc, ignore this + * @return partitionName ==> PartitionItem + */ + public Map getNameToPartitionItems(OptionalLong snapshotId) { + return Collections.emptyMap(); + } + + /** + * get partition column list + * If partition related operations are supported, this method needs to be implemented in the subclass + * + * @param snapshotId if not support mvcc, ignore this + * @return + */ + public List getPartitionColumns(OptionalLong snapshotId) { + return Collections.emptyList(); + } + + /** + * Does it support partition cpruned, If so, this method needs to be overridden in subclasses + * + * @return + */ + public boolean supportPartitionPruned() { + return false; + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java index 98984467d75b5c..544e3ac4ca8d78 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java @@ -40,7 +40,6 @@ import org.apache.doris.mtmv.MTMVSnapshotIf; import org.apache.doris.mtmv.MTMVTimestampSnapshot; import org.apache.doris.nereids.exceptions.NotSupportedException; -import org.apache.doris.nereids.trees.plans.logical.LogicalFileScan.SelectedPartitions; import org.apache.doris.qe.GlobalVariable; import org.apache.doris.statistics.AnalysisInfo; import org.apache.doris.statistics.BaseAnalysisTask; @@ -297,19 +296,34 @@ public List getPartitionColumns() { .orElse(Collections.emptyList()); } - public SelectedPartitions getAllPartitions() { + @Override + public List getPartitionColumns(OptionalLong snapshotId) { + return getPartitionColumns(); + } + + @Override + public boolean supportPartitionPruned() { + return getDlaType() == DLAType.HIVE; + } + + @Override + public Map getNameToPartitionItems(OptionalLong snapshotId) { if (CollectionUtils.isEmpty(this.getPartitionColumns())) { - return SelectedPartitions.NOT_PRUNED; + return Collections.emptyMap(); } - HiveMetaStoreCache cache = Env.getCurrentEnv().getExtMetaCacheMgr() .getMetaStoreCache((HMSExternalCatalog) this.getCatalog()); List partitionColumnTypes = this.getPartitionColumnTypes(); HiveMetaStoreCache.HivePartitionValues hivePartitionValues = cache.getPartitionValues( this.getDbName(), this.getName(), partitionColumnTypes); Map idToPartitionItem = hivePartitionValues.getIdToPartitionItem(); - - return new SelectedPartitions(idToPartitionItem.size(), idToPartitionItem, false); + // transfer id to name + BiMap idToName = hivePartitionValues.getPartitionNameToIdMap().inverse(); + Map nameToPartitionItem = Maps.newHashMapWithExpectedSize(idToPartitionItem.size()); + for (Entry entry : idToPartitionItem.entrySet()) { + nameToPartitionItem.put(idToName.get(entry.getKey()), entry.getValue()); + } + return nameToPartitionItem; } public boolean isHiveTransactionalTable() { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindRelation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindRelation.java index 9699ca243cf989..cba3afca6f08d3 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindRelation.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindRelation.java @@ -435,7 +435,6 @@ private LogicalPlan getLogicalPlan(TableIf table, UnboundRelation unboundRelatio } else { return new LogicalFileScan(unboundRelation.getRelationId(), (HMSExternalTable) table, qualifierWithoutTableName, - ((HMSExternalTable) table).getAllPartitions(), unboundRelation.getTableSample(), unboundRelation.getTableSnapshot()); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PruneFileScanPartition.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PruneFileScanPartition.java index 2de4efab2ff6ed..3282629ba8e3f0 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PruneFileScanPartition.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PruneFileScanPartition.java @@ -19,8 +19,6 @@ import org.apache.doris.catalog.PartitionItem; import org.apache.doris.datasource.ExternalTable; -import org.apache.doris.datasource.hive.HMSExternalTable; -import org.apache.doris.datasource.hive.HMSExternalTable.DLAType; import org.apache.doris.nereids.CascadesContext; import org.apache.doris.nereids.rules.Rule; import org.apache.doris.nereids.rules.RuleType; @@ -38,6 +36,7 @@ import java.util.ArrayList; import java.util.List; import java.util.Map; +import java.util.OptionalLong; import java.util.function.Function; import java.util.stream.Collectors; @@ -60,10 +59,8 @@ public Rule build() { ExternalTable tbl = scan.getTable(); SelectedPartitions selectedPartitions; - // TODO(cmy): support other external table - if (tbl instanceof HMSExternalTable && ((HMSExternalTable) tbl).getDlaType() == DLAType.HIVE) { - HMSExternalTable hiveTbl = (HMSExternalTable) tbl; - selectedPartitions = pruneHivePartitions(hiveTbl, filter, scan, ctx.cascadesContext); + if (tbl.supportPartitionPruned()) { + selectedPartitions = pruneExternalPartitions(tbl, filter, scan, ctx.cascadesContext); } else { // set isPruned so that it won't go pass the partition prune again selectedPartitions = new SelectedPartitions(0, ImmutableMap.of(), true); @@ -74,10 +71,11 @@ public Rule build() { }).toRule(RuleType.FILE_SCAN_PARTITION_PRUNE); } - private SelectedPartitions pruneHivePartitions(HMSExternalTable hiveTbl, + private SelectedPartitions pruneExternalPartitions(ExternalTable externalTable, LogicalFilter filter, LogicalFileScan scan, CascadesContext ctx) { Map selectedPartitionItems = Maps.newHashMap(); - if (CollectionUtils.isEmpty(hiveTbl.getPartitionColumns())) { + // todo: real snapshotId + if (CollectionUtils.isEmpty(externalTable.getPartitionColumns(OptionalLong.empty()))) { // non partitioned table, return NOT_PRUNED. // non partition table will be handled in HiveScanNode. return SelectedPartitions.NOT_PRUNED; @@ -85,8 +83,8 @@ private SelectedPartitions pruneHivePartitions(HMSExternalTable hiveTbl, Map scanOutput = scan.getOutput() .stream() .collect(Collectors.toMap(slot -> slot.getName().toLowerCase(), Function.identity())); - - List partitionSlots = hiveTbl.getPartitionColumns() + // todo: real snapshotId + List partitionSlots = externalTable.getPartitionColumns(OptionalLong.empty()) .stream() .map(column -> scanOutput.get(column.getName().toLowerCase())) .collect(Collectors.toList()); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalFileScan.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalFileScan.java index 0a2c69b68c1d33..d301f04ece0d95 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalFileScan.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalFileScan.java @@ -36,6 +36,7 @@ import java.util.Map; import java.util.Objects; import java.util.Optional; +import java.util.OptionalLong; /** * Logical file scan for external catalog. @@ -59,17 +60,11 @@ protected LogicalFileScan(RelationId id, ExternalTable table, List quali this.tableSnapshot = tableSnapshot; } - public LogicalFileScan(RelationId id, ExternalTable table, List qualifier, - SelectedPartitions selectedPartitions, - Optional tableSample, Optional tableSnapshot) { - this(id, table, qualifier, Optional.empty(), Optional.empty(), - selectedPartitions, tableSample, tableSnapshot); - } - public LogicalFileScan(RelationId id, ExternalTable table, List qualifier, Optional tableSample, Optional tableSnapshot) { + // todo: real snapshotId this(id, table, qualifier, Optional.empty(), Optional.empty(), - SelectedPartitions.NOT_PRUNED, tableSample, tableSnapshot); + table.initSelectedPartitions(OptionalLong.empty()), tableSample, tableSnapshot); } public SelectedPartitions getSelectedPartitions() {