From 12ffc5b49ea14e20b133faa56e28c24da46e233b Mon Sep 17 00:00:00 2001 From: daidai Date: Wed, 27 Nov 2024 00:43:36 +0800 Subject: [PATCH 1/5] mc && hudi --- .../doris/datasource/ExternalTable.java | 7 +- .../datasource/hive/HMSExternalTable.java | 30 +- .../datasource/hive/source/HiveScanNode.java | 2 +- .../doris/datasource/hudi/HudiUtils.java | 51 ++++ .../datasource/hudi/source/HudiScanNode.java | 71 ++--- .../maxcompute/MaxComputeExternalTable.java | 30 ++ .../maxcompute/source/MaxComputeScanNode.java | 44 ++- .../translator/PhysicalPlanTranslator.java | 5 +- .../expression/rules/PartitionPruner.java | 2 +- .../rules/rewrite/PruneFileScanPartition.java | 4 +- .../trees/plans/logical/LogicalFileScan.java | 2 +- .../trees/plans/logical/LogicalHudiScan.java | 2 +- .../doris/planner/SingleNodePlanner.java | 4 +- .../hudi/test_hudi_partition_prune.out | 151 ++++++++++ .../hudi/test_hudi_snapshot.out | Bin 348526 -> 350438 bytes .../test_max_compute_partition_prune.out | 125 ++++++++ .../hudi/test_hudi_partition_prune.groovy | 275 +++++++++++++++++ .../hudi/test_hudi_snapshot.groovy | 10 +- .../test_max_compute_partition_prune.groovy | 282 ++++++++++++++++++ 19 files changed, 1025 insertions(+), 72 deletions(-) create mode 100644 regression-test/data/external_table_p2/hudi/test_hudi_partition_prune.out create mode 100644 regression-test/data/external_table_p2/maxcompute/test_max_compute_partition_prune.out create mode 100644 regression-test/suites/external_table_p2/hudi/test_hudi_partition_prune.groovy create mode 100644 regression-test/suites/external_table_p2/maxcompute/test_max_compute_partition_prune.groovy diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalTable.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalTable.java index d82959954f2607..aebe980afb9f37 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalTable.java @@ -377,7 +377,7 @@ public TableIndexes getTableIndexes() { * @return */ public SelectedPartitions initSelectedPartitions(Optional snapshot) { - if (!supportPartitionPruned()) { + if (!supportInternalPartitionPruned()) { return SelectedPartitions.NOT_PRUNED; } if (CollectionUtils.isEmpty(this.getPartitionColumns(snapshot))) { @@ -410,11 +410,12 @@ public List getPartitionColumns(Optional snapshot) { } /** - * Does it support partition cpruned, If so, this method needs to be overridden in subclasses + * Does it support Internal partition pruned, If so, this method needs to be overridden in subclasses + * Internal partition pruned : Implement partition pruning logic without relying on external APIs. * * @return */ - public boolean supportPartitionPruned() { + public boolean supportInternalPartitionPruned() { return false; } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java index 6d65f8bcdbccb7..34abb11665f02e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java @@ -17,6 +17,7 @@ package org.apache.doris.datasource.hive; +import org.apache.doris.analysis.TableSnapshot; import org.apache.doris.catalog.Column; import org.apache.doris.catalog.Env; import org.apache.doris.catalog.ListPartitionItem; @@ -31,6 +32,7 @@ import org.apache.doris.common.DdlException; import org.apache.doris.datasource.ExternalTable; import org.apache.doris.datasource.SchemaCacheValue; +import org.apache.doris.datasource.TablePartitionValues; import org.apache.doris.datasource.hudi.HudiUtils; import org.apache.doris.datasource.iceberg.IcebergUtils; import org.apache.doris.datasource.mvcc.MvccSnapshot; @@ -41,6 +43,7 @@ import org.apache.doris.mtmv.MTMVSnapshotIf; import org.apache.doris.mtmv.MTMVTimestampSnapshot; import org.apache.doris.nereids.exceptions.NotSupportedException; +import org.apache.doris.nereids.trees.plans.logical.LogicalFileScan.SelectedPartitions; import org.apache.doris.qe.GlobalVariable; import org.apache.doris.statistics.AnalysisInfo; import org.apache.doris.statistics.BaseAnalysisTask; @@ -301,10 +304,33 @@ public List getPartitionColumns(Optional snapshot) { } @Override - public boolean supportPartitionPruned() { - return getDlaType() == DLAType.HIVE; + public boolean supportInternalPartitionPruned() { + return getDlaType() == DLAType.HIVE || getDlaType() == DLAType.HUDI; } + public SelectedPartitions initHudiSelectedPartitions(Optional tableSnapshot) { + if (getDlaType() != DLAType.HUDI) { + return SelectedPartitions.NOT_PRUNED; + } + + if (getPartitionColumns().isEmpty()) { + return SelectedPartitions.NOT_PRUNED; + } + TablePartitionValues tablePartitionValues = HudiUtils.getPartitionValues(tableSnapshot, this); + + + Map idToPartitionItem = tablePartitionValues.getIdToPartitionItem(); + Map idToNameMap = tablePartitionValues.getPartitionIdToNameMap(); + + Map nameToPartitionItems = Maps.newHashMapWithExpectedSize(idToPartitionItem.size()); + for (Entry entry : idToPartitionItem.entrySet()) { + nameToPartitionItems.put(idToNameMap.get(entry.getKey()), entry.getValue()); + } + + return new SelectedPartitions(nameToPartitionItems.size(), nameToPartitionItems, false); + } + + @Override public Map getNameToPartitionItems(Optional snapshot) { return getNameToPartitionItems(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java index 3a2a4d3eb5c6ae..99d3cd1cd21622 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java @@ -82,7 +82,7 @@ public class HiveScanNode extends FileQueryScanNode { // will only be set in Nereids, for lagency planner, it should be null @Setter - private SelectedPartitions selectedPartitions = null; + protected SelectedPartitions selectedPartitions = null; private boolean partitionInit = false; private final AtomicReference batchException = new AtomicReference<>(null); diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/HudiUtils.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/HudiUtils.java index d7803b1a516f9e..7db2fc13098d9b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/HudiUtils.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/HudiUtils.java @@ -17,24 +17,35 @@ package org.apache.doris.datasource.hudi; +import org.apache.doris.analysis.TableSnapshot; import org.apache.doris.catalog.ArrayType; +import org.apache.doris.catalog.Env; import org.apache.doris.catalog.MapType; import org.apache.doris.catalog.ScalarType; import org.apache.doris.catalog.StructField; import org.apache.doris.catalog.StructType; import org.apache.doris.catalog.Type; +import org.apache.doris.datasource.TablePartitionValues; +import org.apache.doris.datasource.hive.HMSExternalTable; +import org.apache.doris.datasource.hive.HiveMetaStoreClientHelper; +import org.apache.doris.datasource.hudi.source.HudiCachedPartitionProcessor; import org.apache.avro.LogicalType; import org.apache.avro.LogicalTypes; import org.apache.avro.Schema; import org.apache.avro.Schema.Field; +import org.apache.hudi.common.table.HoodieTableMetaClient; import org.apache.hudi.common.table.timeline.HoodieActiveTimeline; +import org.apache.hudi.common.table.timeline.HoodieInstant; import org.apache.hudi.common.table.timeline.HoodieInstantTimeGenerator; +import org.apache.hudi.common.table.timeline.HoodieTimeline; +import org.apache.hudi.common.util.Option; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.List; +import java.util.Optional; import java.util.stream.Collectors; public class HudiUtils { @@ -231,4 +242,44 @@ private static Type handleUnionType(Schema avroSchema) { } return Type.UNSUPPORTED; } + + public static TablePartitionValues getPartitionValues(Optional tableSnapshot, + HMSExternalTable hmsTable) { + TablePartitionValues partitionValues = new TablePartitionValues(); + if (hmsTable.getPartitionColumns().isEmpty()) { + //isn't partition table. + return partitionValues; + } + + HoodieTableMetaClient hudiClient = HiveMetaStoreClientHelper.getHudiClient(hmsTable); + HudiCachedPartitionProcessor processor = (HudiCachedPartitionProcessor) Env.getCurrentEnv() + .getExtMetaCacheMgr().getHudiPartitionProcess(hmsTable.getCatalog()); + boolean useHiveSyncPartition = hmsTable.useHiveSyncPartition(); + + if (tableSnapshot.isPresent()) { + if (tableSnapshot.get().getType() == TableSnapshot.VersionType.VERSION) { + // Hudi does not support `FOR VERSION AS OF`, please use `FOR TIME AS OF`"; + return partitionValues; + } + String queryInstant = tableSnapshot.get().getTime().replaceAll("[-: ]", ""); + + partitionValues = + HiveMetaStoreClientHelper.ugiDoAs( + HiveMetaStoreClientHelper.getConfiguration(hmsTable), + () -> processor.getSnapshotPartitionValues( + hmsTable, hudiClient, queryInstant, useHiveSyncPartition)); + } else { + HoodieTimeline timeline = hudiClient.getCommitsAndCompactionTimeline().filterCompletedInstants(); + Option snapshotInstant = timeline.lastInstant(); + if (!snapshotInstant.isPresent()) { + return partitionValues; + } + partitionValues = + HiveMetaStoreClientHelper.ugiDoAs( + HiveMetaStoreClientHelper.getConfiguration(hmsTable), + () -> processor.getPartitionValues(hmsTable, hudiClient, useHiveSyncPartition)); + } + return partitionValues; + } + } diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiScanNode.java index a8f2a362bfde8d..14f3656fb69d42 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiScanNode.java @@ -21,6 +21,7 @@ import org.apache.doris.analysis.TableSnapshot; import org.apache.doris.analysis.TupleDescriptor; import org.apache.doris.catalog.Env; +import org.apache.doris.catalog.ListPartitionItem; import org.apache.doris.catalog.PartitionItem; import org.apache.doris.catalog.Type; import org.apache.doris.common.AnalysisException; @@ -29,12 +30,10 @@ import org.apache.doris.datasource.ExternalTable; import org.apache.doris.datasource.FileSplit; import org.apache.doris.datasource.TableFormatType; -import org.apache.doris.datasource.TablePartitionValues; import org.apache.doris.datasource.hive.HiveMetaStoreClientHelper; import org.apache.doris.datasource.hive.HivePartition; import org.apache.doris.datasource.hive.source.HiveScanNode; import org.apache.doris.datasource.hudi.HudiUtils; -import org.apache.doris.planner.ListPartitionPrunerV2; import org.apache.doris.planner.PlanNodeId; import org.apache.doris.qe.ConnectContext; import org.apache.doris.spi.Split; @@ -68,7 +67,6 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; -import java.util.Collection; import java.util.Collections; import java.util.List; import java.util.Locale; @@ -259,50 +257,29 @@ private void setHudiParams(TFileRangeDesc rangeDesc, HudiSplit hudiSplit) { rangeDesc.setTableFormatParams(tableFormatFileDesc); } - private List getPrunedPartitions( - HoodieTableMetaClient metaClient, Option snapshotTimestamp) throws AnalysisException { + private List getPrunedPartitions(HoodieTableMetaClient metaClient) { List partitionColumnTypes = hmsTable.getPartitionColumnTypes(); if (!partitionColumnTypes.isEmpty()) { - HudiCachedPartitionProcessor processor = (HudiCachedPartitionProcessor) Env.getCurrentEnv() - .getExtMetaCacheMgr().getHudiPartitionProcess(hmsTable.getCatalog()); - TablePartitionValues partitionValues; - if (snapshotTimestamp.isPresent()) { - partitionValues = processor.getSnapshotPartitionValues( - hmsTable, metaClient, snapshotTimestamp.get(), useHiveSyncPartition); - } else { - partitionValues = processor.getPartitionValues(hmsTable, metaClient, useHiveSyncPartition); - } - if (partitionValues != null) { - // 2. prune partitions by expr - partitionValues.readLock().lock(); - try { - Map idToPartitionItem = partitionValues.getIdToPartitionItem(); - this.totalPartitionNum = idToPartitionItem.size(); - ListPartitionPrunerV2 pruner = new ListPartitionPrunerV2(idToPartitionItem, - hmsTable.getPartitionColumns(), columnNameToRange, - partitionValues.getUidToPartitionRange(), - partitionValues.getRangeToId(), - partitionValues.getSingleColumnRangeMap(), - true); - Collection filteredPartitionIds = pruner.prune(); - this.selectedPartitionNum = filteredPartitionIds.size(); - // 3. get partitions from cache - String dbName = hmsTable.getDbName(); - String tblName = hmsTable.getName(); - String inputFormat = hmsTable.getRemoteTable().getSd().getInputFormat(); - String basePath = metaClient.getBasePathV2().toString(); - Map partitionIdToNameMap = partitionValues.getPartitionIdToNameMap(); - Map> partitionValuesMap = partitionValues.getPartitionValuesMap(); - return filteredPartitionIds.stream().map(id -> { - String path = basePath + "/" + partitionIdToNameMap.get(id); - return new HivePartition( - dbName, tblName, false, inputFormat, path, partitionValuesMap.get(id), - Maps.newHashMap()); - }).collect(Collectors.toList()); - } finally { - partitionValues.readLock().unlock(); - } - } + this.totalPartitionNum = selectedPartitions.totalPartitionNum; + Map prunedPartitions = selectedPartitions.selectedPartitions; + this.selectedPartitionNum = prunedPartitions.size(); + + String dbName = hmsTable.getDbName(); + String tblName = hmsTable.getName(); + String inputFormat = hmsTable.getRemoteTable().getSd().getInputFormat(); + String basePath = metaClient.getBasePathV2().toString(); + + List hivePartitions = Lists.newArrayList(); + prunedPartitions.forEach( + (key, value) -> { + String path = basePath + "/" + key; + hivePartitions.add(new HivePartition( + dbName, tblName, false, inputFormat, path, + ((ListPartitionItem) value).getItems().get(0).getPartitionValuesAsStringList(), + Maps.newHashMap())); + } + ); + return hivePartitions; } // unpartitioned table, create a dummy partition to save location and inputformat, // so that we can unify the interface. @@ -392,7 +369,7 @@ public List getSplits() throws UserException { if (!partitionInit) { prunedPartitions = HiveMetaStoreClientHelper.ugiDoAs( HiveMetaStoreClientHelper.getConfiguration(hmsTable), - () -> getPrunedPartitions(hudiClient, snapshotTimestamp)); + () -> getPrunedPartitions(hudiClient)); partitionInit = true; } List splits = Collections.synchronizedList(new ArrayList<>()); @@ -454,7 +431,7 @@ public boolean isBatchMode() { // Non partition table will get one dummy partition prunedPartitions = HiveMetaStoreClientHelper.ugiDoAs( HiveMetaStoreClientHelper.getConfiguration(hmsTable), - () -> getPrunedPartitions(hudiClient, snapshotTimestamp)); + () -> getPrunedPartitions(hudiClient)); partitionInit = true; } int numPartitions = ConnectContext.get().getSessionVariable().getNumPartitionsInBatchMode(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/MaxComputeExternalTable.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/MaxComputeExternalTable.java index dc3232f79f5f71..fdd9f977c9119c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/MaxComputeExternalTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/MaxComputeExternalTable.java @@ -21,6 +21,7 @@ import org.apache.doris.catalog.Column; import org.apache.doris.catalog.Env; import org.apache.doris.catalog.MapType; +import org.apache.doris.catalog.PartitionItem; import org.apache.doris.catalog.ScalarType; import org.apache.doris.catalog.StructField; import org.apache.doris.catalog.StructType; @@ -50,7 +51,9 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Map.Entry; import java.util.Optional; +import java.util.OptionalLong; import java.util.stream.Collectors; /** @@ -71,6 +74,14 @@ protected synchronized void makeSureInitialized() { } } + public boolean supportInternalPartitionPruned() { + return true; + } + + + public List getPartitionColumns(OptionalLong snapshotId) { + return getPartitionColumns(); + } public List getPartitionColumns() { makeSureInitialized(); @@ -79,6 +90,23 @@ public List getPartitionColumns() { .orElse(Collections.emptyList()); } + public Map getNameToPartitionItems(OptionalLong snapshotId) { + if (getPartitionColumns().isEmpty()) { + return Collections.emptyMap(); + } + + TablePartitionValues tablePartitionValues = getPartitionValues(); + + Map idToPartitionItem = tablePartitionValues.getIdToPartitionItem(); + Map idToNameMap = tablePartitionValues.getPartitionIdToNameMap(); + + Map nameToPartitionItem = Maps.newHashMapWithExpectedSize(idToPartitionItem.size()); + for (Entry entry : idToPartitionItem.entrySet()) { + nameToPartitionItem.put(idToNameMap.get(entry.getKey()), entry.getValue()); + } + return nameToPartitionItem; + } + public TablePartitionValues getPartitionValues() { makeSureInitialized(); Optional schemaCacheValue = getSchemaCacheValue(); @@ -110,6 +138,8 @@ private TablePartitionValues loadPartitionValues(MaxComputeSchemaCacheValue sche /** * parse all values from partitionPath to a single list. + * In MaxCompute : Support special characters : _$#.!@ + * Ref : MaxCompute Error Code: ODPS-0130071 Invalid partition value. * * @param partitionColumns partitionColumns can contain the part1,part2,part3... * @param partitionPath partitionPath format is like the 'part1=123/part2=abc/part3=1bc' diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/source/MaxComputeScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/source/MaxComputeScanNode.java index e0b84b0860e551..d3f02e4207b80a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/source/MaxComputeScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/source/MaxComputeScanNode.java @@ -40,6 +40,7 @@ import org.apache.doris.datasource.maxcompute.MaxComputeExternalTable; import org.apache.doris.datasource.maxcompute.source.MaxComputeSplit.SplitType; import org.apache.doris.datasource.property.constants.MCProperties; +import org.apache.doris.nereids.trees.plans.logical.LogicalFileScan.SelectedPartitions; import org.apache.doris.nereids.util.DateUtils; import org.apache.doris.planner.PlanNodeId; import org.apache.doris.spi.Split; @@ -50,6 +51,7 @@ import org.apache.doris.thrift.TTableFormatFileDesc; import com.aliyun.odps.OdpsType; +import com.aliyun.odps.PartitionSpec; import com.aliyun.odps.table.TableIdentifier; import com.aliyun.odps.table.configuration.ArrowOptions; import com.aliyun.odps.table.configuration.ArrowOptions.TimestampUnit; @@ -60,6 +62,7 @@ import com.aliyun.odps.table.read.split.impl.IndexedInputSplit; import com.google.common.collect.Maps; import jline.internal.Log; +import lombok.Setter; import java.io.ByteArrayOutputStream; import java.io.IOException; @@ -86,14 +89,26 @@ public class MaxComputeScanNode extends FileQueryScanNode { private static final LocationPath ROW_OFFSET_PATH = new LocationPath("/row_offset", Maps.newHashMap()); private static final LocationPath BYTE_SIZE_PATH = new LocationPath("/byte_size", Maps.newHashMap()); + @Setter + private SelectedPartitions selectedPartitions = null; + + public MaxComputeScanNode(PlanNodeId id, TupleDescriptor desc, + SelectedPartitions selectedPartitions, boolean needCheckColumnPriv) { + this(id, desc, "MCScanNode", StatisticalType.MAX_COMPUTE_SCAN_NODE, + selectedPartitions, needCheckColumnPriv); + } + public MaxComputeScanNode(PlanNodeId id, TupleDescriptor desc, boolean needCheckColumnPriv) { - this(id, desc, "MCScanNode", StatisticalType.MAX_COMPUTE_SCAN_NODE, needCheckColumnPriv); + this(id, desc, "MCScanNode", StatisticalType.MAX_COMPUTE_SCAN_NODE, + SelectedPartitions.NOT_PRUNED, needCheckColumnPriv); } public MaxComputeScanNode(PlanNodeId id, TupleDescriptor desc, String planNodeName, - StatisticalType statisticalType, boolean needCheckColumnPriv) { + StatisticalType statisticalType, SelectedPartitions selectedPartitions, + boolean needCheckColumnPriv) { super(id, desc, planNodeName, statisticalType, needCheckColumnPriv); table = (MaxComputeExternalTable) desc.getTable(); + this.selectedPartitions = selectedPartitions; } @Override @@ -117,10 +132,25 @@ private void setScanParams(TFileRangeDesc rangeDesc, MaxComputeSplit maxComputeS rangeDesc.setSize(maxComputeSplit.getLength()); } - void createTableBatchReadSession() throws UserException { + boolean createTableBatchReadSession() throws UserException { List requiredPartitionColumns = new ArrayList<>(); List orderedRequiredDataColumns = new ArrayList<>(); + List requiredPartitionSpecs = new ArrayList<>(); + //if requiredPartitionSpecs is empty, get all partition data. + if (!table.getPartitionColumns().isEmpty() && selectedPartitions != SelectedPartitions.NOT_PRUNED) { + this.totalPartitionNum = selectedPartitions.totalPartitionNum; + this.selectedPartitionNum = selectedPartitions.selectedPartitions.size(); + + if (selectedPartitions.selectedPartitions.isEmpty()) { + //no need read any partition data. + return false; + } + selectedPartitions.selectedPartitions.forEach( + (key, value) -> requiredPartitionSpecs.add(new PartitionSpec(key)) + ); + } + Set requiredSlots = desc.getSlots().stream().map(e -> e.getColumn().getName()).collect(Collectors.toSet()); @@ -150,6 +180,7 @@ void createTableBatchReadSession() throws UserException { .withSettings(mcCatalog.getSettings()) .withSplitOptions(mcCatalog.getSplitOption()) .requiredPartitionColumns(requiredPartitionColumns) + .requiredPartitions(requiredPartitionSpecs) .requiredDataColumns(orderedRequiredDataColumns) .withArrowOptions( ArrowOptions.newBuilder() @@ -162,7 +193,7 @@ void createTableBatchReadSession() throws UserException { } catch (java.io.IOException e) { throw new RuntimeException(e); } - + return true; } @Override @@ -430,7 +461,10 @@ public List getSplits() throws UserException { if (desc.getSlots().isEmpty() || odpsTable.getFileNum() <= 0) { return result; } - createTableBatchReadSession(); + + if (!createTableBatchReadSession()) { + return result; + } try { String scanSessionSerialize = serializeSession(tableBatchReadSession); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java index 56ae65ec722941..5a9370246fd461 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java @@ -579,7 +579,8 @@ public PlanFragment visitPhysicalFileScan(PhysicalFileScan fileScan, PlanTransla } else if (table instanceof TrinoConnectorExternalTable) { scanNode = new TrinoConnectorScanNode(context.nextPlanNodeId(), tupleDescriptor, false); } else if (table instanceof MaxComputeExternalTable) { - scanNode = new MaxComputeScanNode(context.nextPlanNodeId(), tupleDescriptor, false); + scanNode = new MaxComputeScanNode(context.nextPlanNodeId(), tupleDescriptor, + fileScan.getSelectedPartitions(), false); } else if (table instanceof LakeSoulExternalTable) { scanNode = new LakeSoulScanNode(context.nextPlanNodeId(), tupleDescriptor, false); } else { @@ -652,6 +653,8 @@ public PlanFragment visitPhysicalHudiScan(PhysicalHudiScan fileScan, PlanTransla if (fileScan.getTableSnapshot().isPresent()) { ((FileQueryScanNode) scanNode).setQueryTableSnapshot(fileScan.getTableSnapshot().get()); } + HudiScanNode hudiScanNode = (HudiScanNode) scanNode; + hudiScanNode.setSelectedPartitions(fileScan.getSelectedPartitions()); return getPlanFragmentForPhysicalFileScan(fileScan, context, scanNode, table, tupleDescriptor); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/PartitionPruner.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/PartitionPruner.java index fac1a7f82d2cfb..ed783aa3d5a9b6 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/PartitionPruner.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/PartitionPruner.java @@ -55,7 +55,7 @@ public class PartitionPruner extends DefaultExpressionRewriter { /** Different type of table may have different partition prune behavior. */ public enum PartitionTableType { OLAP, - HIVE + EXTERNAL } private PartitionPruner(List partitions, Expression partitionPredicate) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PruneFileScanPartition.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PruneFileScanPartition.java index 4bbb0a8aa76270..ba8b270d1f397d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PruneFileScanPartition.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PruneFileScanPartition.java @@ -59,7 +59,7 @@ public Rule build() { ExternalTable tbl = scan.getTable(); SelectedPartitions selectedPartitions; - if (tbl.supportPartitionPruned()) { + if (tbl.supportInternalPartitionPruned()) { selectedPartitions = pruneExternalPartitions(tbl, filter, scan, ctx.cascadesContext); } else { // set isPruned so that it won't go pass the partition prune again @@ -91,7 +91,7 @@ private SelectedPartitions pruneExternalPartitions(ExternalTable externalTable, Map nameToPartitionItem = scan.getSelectedPartitions().selectedPartitions; List prunedPartitions = new ArrayList<>(PartitionPruner.prune( - partitionSlots, filter.getPredicate(), nameToPartitionItem, ctx, PartitionTableType.HIVE)); + partitionSlots, filter.getPredicate(), nameToPartitionItem, ctx, PartitionTableType.EXTERNAL)); for (String name : prunedPartitions) { selectedPartitionItems.put(name, nameToPartitionItem.get(name)); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalFileScan.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalFileScan.java index 96b8e032d11274..0822bfbc5fa830 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalFileScan.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalFileScan.java @@ -42,7 +42,7 @@ */ public class LogicalFileScan extends LogicalCatalogRelation { - protected final SelectedPartitions selectedPartitions; + protected SelectedPartitions selectedPartitions; protected final Optional tableSample; protected final Optional tableSnapshot; diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalHudiScan.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalHudiScan.java index 629690889432b3..51e68eb07631ae 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalHudiScan.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalHudiScan.java @@ -84,7 +84,7 @@ protected LogicalHudiScan(RelationId id, ExternalTable table, List quali public LogicalHudiScan(RelationId id, ExternalTable table, List qualifier, Optional tableSample, Optional tableSnapshot) { this(id, table, qualifier, Optional.empty(), Optional.empty(), - SelectedPartitions.NOT_PRUNED, tableSample, tableSnapshot, + ((HMSExternalTable) table).initHudiSelectedPartitions(tableSnapshot), tableSample, tableSnapshot, Optional.empty(), Optional.empty()); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java b/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java index d94ad0a2552240..a82ab4248ec600 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java @@ -76,7 +76,6 @@ import org.apache.doris.datasource.trinoconnector.source.TrinoConnectorScanNode; import org.apache.doris.qe.ConnectContext; import org.apache.doris.rewrite.mvrewrite.MVSelectFailedException; -import org.apache.doris.statistics.StatisticalType; import org.apache.doris.thrift.TPushAggOp; import com.google.common.base.Preconditions; @@ -1994,8 +1993,7 @@ private PlanNode createScanNode(Analyzer analyzer, TableRef tblRef, SelectStmt s break; case MAX_COMPUTE_EXTERNAL_TABLE: // TODO: support max compute scan node - scanNode = new MaxComputeScanNode(ctx.getNextNodeId(), tblRef.getDesc(), "MCScanNode", - StatisticalType.MAX_COMPUTE_SCAN_NODE, true); + scanNode = new MaxComputeScanNode(ctx.getNextNodeId(), tblRef.getDesc(), true); break; case ES_EXTERNAL_TABLE: scanNode = new EsScanNode(ctx.getNextNodeId(), tblRef.getDesc(), true); diff --git a/regression-test/data/external_table_p2/hudi/test_hudi_partition_prune.out b/regression-test/data/external_table_p2/hudi/test_hudi_partition_prune.out new file mode 100644 index 00000000000000..2e827d3fc5b9cf --- /dev/null +++ b/regression-test/data/external_table_p2/hudi/test_hudi_partition_prune.out @@ -0,0 +1,151 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !one_partition_1_1 -- +1 Alice 2024 +2 Bob 2024 +3 Charlie 2024 + +-- !one_partition_2_1 -- +4 David 2025 +5 Eva 2025 + +-- !one_partition_3_all -- +1 Alice 2024 +2 Bob 2024 +3 Charlie 2024 +4 David 2025 +5 Eva 2025 + +-- !one_partition_4_all -- +5 Eva 2025 + +-- !one_partition_5_1 -- +3 Charlie 2024 + +-- !two_partition_1_1 -- +1 Alice US 1 +2 Bob US 1 +3 Charlie US 1 + +-- !two_partition_2_1 -- +8 Hannah EU 2 +9 Ivy EU 2 +10 Jack EU 2 + +-- !two_partition_3_2 -- +1 Alice US 1 +2 Bob US 1 +3 Charlie US 1 +4 David US 2 +5 Eva US 2 + +-- !two_partition_4_all -- +1 Alice US 1 +2 Bob US 1 +3 Charlie US 1 +4 David US 2 +5 Eva US 2 +6 Frank EU 1 +7 Grace EU 1 +8 Hannah EU 2 +9 Ivy EU 2 +10 Jack EU 2 + +-- !two_partition_5_1 -- + +-- !two_partition_6_1 -- +8 Hannah EU 2 +9 Ivy EU 2 +10 Jack EU 2 + +-- !three_partition_1_1 -- +1 Alice US 2024 Q1 +2 Bob US 2024 Q1 +3 Charlie US 2024 Q1 + +-- !three_partition_2_1 -- +10 Jack EU 2025 Q2 +11 Leo EU 2025 Q2 + +-- !three_partition_3_3 -- +13 Nina AS 2025 Q1 +14 Oscar AS 2025 Q2 +15 Paul AS 2025 Q3 + +-- !three_partition_4_2 -- +1 Alice US 2024 Q1 +2 Bob US 2024 Q1 +3 Charlie US 2024 Q1 +6 Frank US 2025 Q1 + +-- !three_partition_5_all -- +1 Alice US 2024 Q1 +2 Bob US 2024 Q1 +3 Charlie US 2024 Q1 +4 David US 2024 Q2 +5 Eva US 2024 Q2 +6 Frank US 2025 Q1 +7 Grace US 2025 Q2 +8 Hannah EU 2024 Q1 +9 Ivy EU 2024 Q1 +10 Jack EU 2025 Q2 +11 Leo EU 2025 Q2 +12 Mia EU 2025 Q3 +13 Nina AS 2025 Q1 +14 Oscar AS 2025 Q2 +15 Paul AS 2025 Q3 + +-- !three_partition_6_1 -- +8 Hannah EU 2024 Q1 +9 Ivy EU 2024 Q1 + +-- !three_partition_7_7 -- +6 Frank US 2025 Q1 +7 Grace US 2025 Q2 +10 Jack EU 2025 Q2 +11 Leo EU 2025 Q2 +12 Mia EU 2025 Q3 +13 Nina AS 2025 Q1 +14 Oscar AS 2025 Q2 +15 Paul AS 2025 Q3 + +-- !three_partition_8_2 -- +7 Grace US 2025 Q2 + +-- !one_partition_6_0 -- + +-- !two_partition_7_0 -- + +-- !two_partition_8_0 -- + +-- !three_partition_9_0 -- + +-- !three_partition_10_0 -- + +-- !three_partition_11_0 -- + +-- !time_travel_two_partition_1_3 -- +1 Alice US 1 +2 Bob US 1 +3 Charlie US 1 +4 David US 2 +5 Eva US 2 +6 Frank EU 1 + +-- !time_travel_two_partition_2_2 -- +1 Alice US 1 +2 Bob US 1 +3 Charlie US 1 +4 David US 2 +5 Eva US 2 + +-- !time_travel_two_partition_3_1 -- +4 David US 2 +5 Eva US 2 + +-- !time_travel_two_partition_4_0 -- + +-- !time_travel_two_partition_5_0 -- + +-- !time_travel_two_partition_6_1 -- +1 Alice US 1 + diff --git a/regression-test/data/external_table_p2/hudi/test_hudi_snapshot.out b/regression-test/data/external_table_p2/hudi/test_hudi_snapshot.out index efad67ffbfa8c407fa42d0dd28f569f492bd1d12..2ce13dc272b739a7c028e2f8dbbe303148e312e5 100644 GIT binary patch delta 5036 zcmeHLU5H#o7WUrTZRo)rGmeh)8z-HaXf)N<@4Z!{pi$90WYxHnuwvp^x2k&5rl)(- zJ=rCOWdt7r{_A}xBBHqLgLw!We?o)oZbD>TaFqyZ@ImkgRCe)i9)-y6>Fy*%@gXd` zZXO11)j9Xv+g0aObH4MP`}OZH`~0QL9(*;}+?$=ecu)3N)(ck zIwY8)LNUXYqS=O(zg#?*7Yj;Kj7fpQg(*XXAj+mGnWAb6vl5qt;J8rM)ut=(l2V1W z8@FwHV9S=t3(9t~^39Py_rq9;V1jGvlx&+f-o2QB%*Nj1ItF43i3K-XQjp8nln_#qk8 zT1`bsNWqg>z^c`HZML$o+q&WuAtj<1Wh^7l56+pyIxMt7B5@3HmSCi{P$<<{#}qTJ zQqys3!OhLo^c*ZuOe+g*lhF%qw%!0|?kU#i7b^8y+xlBwt+wSJr*{;$gEHT!82A+v zC6cXvYdC_ivt759iqmy#$2aOmT5oh#J3FjRdUvDZGX0OC0HXO*ZX)}5`^Xx@xw8pJ zmT-eOXDO1x8sy5>>ezunr2d7&x&A9_v+lFV6L*wR}}s62cnS)I3uex;fSU&oL0;dq^yz%r!FlkCR8eL zc6@x&KX)*u{0M;{FM{AhT2j;c8J zeTRZ8J^pn#FoC&{iNIx)2uBewON^wE5-CoSL^-RiHvV5PM-yYVZWg-d>2#yI=j?zf z_2kK5%pd$LAjRn&^||?Gy*9qJQmyJ!$JPA@w(6Q|Hns1&I~WYG-}QPh5s+;0y7jHA zCWCdk?8wc-5fQ@wZ9ZI^-L!I3>sPx%na_IPA8R!Sg2#LPu@lktUOp7!*1)yFI}`q? z!{KB|di)_9v}JbR;N%^o$1-EE69R(J5EDrO2OMZ+XOdL1lyRmCq(@=toJ|wN2!I6F zR-lwyf^^IiKny1}h1dMilhNS5qtij_=ydpLpMUH(;o#MTk{NnVfa72ID(Ii;CL};p z92-s%iD?2GLP99(G_tWQ$IduN4ubFufrl}dh$M=~e(4Xv$TDCp1J*KNoiBjpzyE7+ zO{8XG|LkP2*7uL(`=$s+m=?-br8u=1Q4F<&t1>|<7BSK!HI^_cG-sUzmuC`Ob`rdG z`0eQB$m9LNQ0v%y_-5YUc_JA0tNQaBcv(<}6Rc63v}=KN5A=pUV3~rD&w}@L!@QdRuM!H~Djb%|0M~t)nM%-}U+JALfSsL%t`ZQ&w}} zp$cjOPoRZNEnuxmlqf5j@E90im+wg_plg*P_Nz;|WxluEpe{G4=S_p^mt<~To{*nn ze5r^PAxzP%-Y=^#`_(?3?{9kUMdwTv29+zkh delta 11237 zcmeHNdypJQea_78$zH2)ES(}FTgH8*zO5>iy9 z$k($gdvzv-{*P7j+SS}nPxtTf{l4$_gVP&-_Qx9^`ZMd`m7SktTXs+?O3L>*Pol_S zJdPbs1a-oIDu-~zeL|TEU43TQx^4LqZd9d^2Ap{%Y%X!RL#adDBZSS+aE6kK7lw>@ zrFL^sm3So#OQSD6^EBP-uldI>;{w|V_G1G$6uO@`(!YNKS9R>m+l!6&wc5Da*Y5r} z;f@=W?wpj3rS^QaF*o_<@?<=pWi4f{ZZ?O-?zODdvsQcZ?$Y}v{Wc%4}9w37iw8!uG(Hq)Z~oA{EF}~Et-DL8lGZ2@_gnd4v!@c zX%stB`iT=$u7V(-e&Xu-CyTo(GO1S^)mFPH+tsXLhSjRp7rFzC7x7kRpv`KGM+8`_ zS3YZvS)pF{fKBw*hX#sUWUIU%FJ%6(H~0ftvt2$Qo5xgB?-?j;vN`WeY@gKkuC>Qg z8A(Q^>%;*wV^e_>@icL$Br1xTpZX*)KUHn3`q750Axehw(@+aviGt*bSmMOq92J%Gy~&pPjWf6jRx1>oYeM1`8e``nmh< zu})(z-MYUzxA^O4P;5wIkCp_X85ZeO-nFPt({DL@6hV z1Z*=Qc*{hewTpvJ*W%;8r8=24kC`DxwJb~er-pj^>^JQ3KUiNN8Qd?`1Ue9k=@mb{=B8vH!&kQvIc zRVeX@=k~=NL=hYNG3*n=Wd|p0d!PTnbPMO-$|h#J-&EiI(lfU|eeha*>iw>8C_%_+ zO1PV`Y*<7!t}e)~X)R=}YP)&@#)U|cHH=G6o}91NRDbB$FPB<~D~WcTf$fD*aDAp; z+&bk1ZiO-GhGbRTpdPIdAvoh5`l+FYAB9{+JaPn!jo}l_o5$g$K^g>#66LdKwY8}e z2y7T`|B^2>vt;pAcZ*c5io5!kJ|v_4WJ(dZ0ZEjj5)n9@at{V6LdO?w9P=<#O6b#f6bG%kK7LPe za424^wHKQgjlrXS1twmzhdbmmMg8+hy8_o-Y|7W0;XZjxm2b(Cs+JEIH|Pm>U|N6u zbH%Y`UkMf>x+Y2}pyEvef(SYfac#RyzR+K#oD&)c}#d*BH=~Cceo4_N2G)~(vPK#65m7SD0x@F z^Ldc?fdf;3PkEu|$S_Lr!mdbT*H?r@xJ`)2Be5@?gu&r>fHjGN1o;Qc6RR*Ggmc^$ z5rx&`)JbOjlsMcEr6Xd7&%#4s;Y!3QHTNY79&5k@%nygeN6bNT&cl)Cr7Dj7lJ~)v z4_hlQAGUwFPT%n-_Ruay!lPlyWz=UPj2%QLtd1*U1b~=0f+qq`@dZ=*Q9iHO)eg=Sxhxcu-1ZtpS9XDckW2d%M(?TX%T!P86QLNnMb64aPw=hC3lh=V8@L-0oAR^qK{eCr#@2{ zpY#CGD1o8DaQkr!B|*h!A#<5Nv#~flhZqy2EO2l{05S_7r4e^L#o=hm1#%Iq9jd8n zIeKZ$R~J@0mW--3lS%sg#|wiB>70K5t!r**szxI9%OAIf^wwV$M)G(`zgU>kU-%p3 z_VIChWJh=uC^m`1KnB7ANP!I|1cUOJ>$t8@SRBNGpX$?71A{=J4kIO(rjZw>2%<#7 zp9I1KTmt??ViCI$myt+eN_ZCsSk8fs`b0l9ZjUYh=I6+ghp0?>iMzeU^BmVZLc$py z7-I@tfV<%0WOV1~8mxA5Y$uN8zx_iT2k$pVg>B(2IIQ{Z7w>AAU)v8%oxj1I0COj* zwJxU3QR>NTv6e@zM5U?$;5#!}Z6r!1{mBO?<^=vTYv6BdJ>)8wOLOG9m6HmmArrwW z6QEvD0p!u2imcIT54lt%+;_Yn@(?{R=@Ii0LR2DE$l>~aq9?XkgL@>#w<_y2)$zqD zVm!WX1827{7;%=SNKt(zKm)%*^}52qh<;~Oq@#Cai;cPR4U5gWF3#*?k9~`=Y;{g; z-c)pnALh{G@oTJ+<&Yxer9jgmq6y-qqKI4oXJHy6+7KcWFCRlciiH`zZnyy2O}LsF zZcBF0b7t86BZUK1a;%SF4{X^|M-!brXq7sTT((A+_uG}NH#biLI2qKPPe*U)CST2B zg_iX7mc6OS;dxsA8^GLEhemvf_H_ zgOhVvKE3ijbE;(k=6&-jJip?0PTfD&`T2KtkGuzkL3w|*)#_Ciy@a$sThfoVfiM|> zv_2aS?9d+_wKvDimBgpQNmD;Udcpk4I0_t>5HEDySR$RS9u(4DK7IYrBU@O&Tr4(L zc|CGL#n-QF7#P#h!`ALK2Y{Ta3;LA<_NLN>z}40dH42*x0rxtmyvdLMwY^?{e%c;h zk;m<42lR)TJ-RDA>IwkCP$kp_7?uH2oR2Jq)J2?Bd4WucO9K|^C;qdz39v8@fR7{O z6-kldcx*su%$dXRLTOBVWCoL0T$l9n3Ss)d2as1T;TebpL?D@7z@FxL7 zczomiINpQZ6w)^ufV+&nyO|(^&HT)?2{KEjsK$0j_tZU}=$^iH>eg4k1sHyM=lgmw z4N!!-$VhNp0AN(p7i=v*=k>vo>XzJ*ahRFOx{~_*f;Bj>ryi+hXX1O?i%9)}mXWo! zOlgICE(40s|NhmYnYk5!kQau%%Ml2Q20Sns1(8psDf)_o z)C{NJ*c$0W=JY6(*Y>k5Sftb@=Vx?Z3H-S}j2=bJ7h=lQIyYMO2y*f1b` zQ4#RK5zE&PtrSS-k*kZFR+s39GR#q1AN-%7NLLVlT$|UeG!CyIU+bAI#`p*<32$gizQY zW@ocTqpI}jm+Z|21Rs62U7XMl+17?x>IxBbK@+8vQx8^QzBwAwqac@PNH`0agd1{T z1p_isr?`&iO3(Mhgn0=yZnzOqrIe;5ijz3PtRklX&iRg@L^_z$gacim7$ERNQ$ZMb z7lb6x^l{|mOL$005EMz?YmUgk9UXD$3PtNctV|@*Vp|k2PWO7d#h8rkB8cxZda+YD)bo&XJRok9QskLx9%uzE1+D}KfB5vDj;0z^FIOw!#c;`M%LaSYl+EGs7v&@zpyu0$Mo5+ z6^4rfY@RW_9i2z^Z<X*!r(;|#GWfU@~7MMBfE;l^%o^c zPxy%K>vVBUp9>1Rr{1ojtSRTqhk*iAyWiFhWl7VJANT8ncf;I35$H#sx2HPy%8_Z; z4LG4doqj@*fYJp1@B6M3fTaw8S^z$%KK1YRptc^i2B$FljjUdc&Afv?JD~uVaM>fv zXdr+v_B>#XY3pu#_vN5`RQVlQQ=k32!eHn8w}%U$c{=AmF=Z36vezBB+}7uwvqo0F z`YHR-ft7EbFN6hs?s|J1}biT3zY z_R8Z=6@Kw1{kG=|qdPB=9#;C)^M(Cy@WcJC{02Y#20#1;Kb%V?{}(^3-P88Yl_$Sh z_^2U>uh7?j+upMBi8F;=MNP&EmrfFQqt6>G@f$4h|4Wv5#jwQNmH`t|XiW)NR|fbI zhk!;Q3Um}tBnI*YBZUlq1rv!c0^n%md~m3qQF|$HL7IpNN*d~yNZ~dgJQ!gl07}Vm zJAPNDGIUaiHWFYKTyl!r+`vx)l29j}4dYyM!VeRIn_ZVh%rJt1xi4ap5I12?oInoX zF^G66W3Y3957k-vUTO$u4TgB=1N(|!-ZyZ5oqi@Lj_h(dIASiKY=PQ#M38{hP9yL* zE(w8~e8`g+3`+uS{m>c=jp|bhXzWRkafi795SB|K0I?*2EGAtgK@=*~PN0ch7rZ#Q z6_6KHpB^fXE?>%nN*rR652pAvat}-~MLADrK&^NXqG&(4*Qi~MBI32udg5+;9K&u1 z;a)AB5Btu^t06Zpb%h#83iZ5#Q1?M|G1aqdf&k0uz4vC-hJNMy_NMW!IGW3HMo=;^ z7nMlcFn>2gUwYz?ikpfaWnND2mR(wL!Qy(W$vtqbzDpskni6{|l|E&W1p666zvm2B z62%v)eHFMqh?J0Z77hdAf7%`x#{*CHq~-2|``|U`!mA*^bbj?5E#A5WuJORR!iKf^ zkPoQ{tESI9UEB=%8RBaW@QsEkl!X2T@)is#Dwz~zt>DlefbJm+G8!mT;D5*3U@ht= zzF`k;X)Uz?G|ZcyG#Xsr^QjB5_(#QIP2Op((O=Tw;r2E`hLvZ*oA>0fE)#c~I$3^4 zQ#Lex(H<-^<_Uf3$>NkH^ns5Qcda};HSnd&^oO?$4DS+0LF)#Rr<6*FSrNJ>xD1%7 zzcE^prk)UzfD$+6xCvrH6r;*@q1MpI@Q@*RNU)Cr;uA|FU-%M59Bh%om|==63@(`B zr97mB3L@knvsoV}bA*Z+vxOjR84qcHO$&>gNO8dSU2VO0&pKRq*sr$^_e_~Hj~aTE5tmd!8@XTA4Apr9zd6BQZb##+aw`y#)X zH=L`v0+9+nJX!2A9uh2VS0Kri*yfyURPXQhPZ(F~8cfyncI-h7UyVVa_cwP`3x<|G z9To>KH^y<3tS{u`$3Q?q9O`xNwzf`$6`#YMUZ<9Xv=U$)8S3UrVR$N$zVuPY`3j68 zSDsT)jKlY0?ou8hm zOz&yU*VGbJ(PQ1I>DhEH%g)O7JYKJ%<$v2FMTqmkt6dArv76!_w7MXK`GNHe5aOXI zgO!0m>m^`UUe~n%D*SaivVxS*%o>Ks@6LTItE+akuDV4%qE;L2AbpOBz6}W7diK0M ztRLD@q?g?YO*Ct_^`2+#!L?mIc@H`?YWnP1^n5^gdF34tL=RabQy|0zsBeTM7Ngqr zO&*J4&+)y4C<^CGMCa^{gI9O;4`V2OJr3O#Abi=Y|3e<4{-0QDZPt(cN0D#62}+Y` zWN4N5)>N0{>CWXL8P8)h$B)@V9WuUM{}(B)(;wP0fc_~`V#q4ogXR>aV32vPwj%qY z0|d1>4j|-wefIO#M*aOQ1BaF`<@S2LsTJ-F{888HC8AHrqw{-^1XF-SC^ z_c)K>8L^L5^|6+aqnVTe#G8P2p4+rz>s+;&v&(bM>|}es-;$7zDy|`wwrG3Qu8ed^ z)AAv>YaSfB%V%^K{e@rJLq+fzU~!MwQ;%IYl1d6bBr%Ml4|Kx?WDRK;7Q-+3GK!&t zz0UqmR8_b0rO&KP3rH^Jw=#5R_1n20z`o8_kpm&Mln+%Q4mA@ix!7{!^>gvln=xWKuW+a+Xxi&j(uZKI+p*u(*m*5ZCLdw*yC)p~Meo6J zyFpP#!Z5c#a0CaVj7;>NK^?pm6w#XOcA>Qvdi^tXF5-_ofOSTB7#6P#7O~-HI=~)$u=sV1XV-rl0s3 zqQ8&kuQWw(8i+km+F${lP(=`MdCa&_AQ)NR0`r#Fw`3h_WG8DX={B4!8TIou$Lq5) zj%0A#7Jct_i>^Jq*piL%ktTFkePy$--43Wee!DfTe|69r4ba0tsDw%Zl@4ykB$n6k z#cl$5J91U*Dc3xrUs6Qa(DEyV%~xIA6h;w~e^?mPFHPE1ANYZ_W@r}qHt!(o@7voW zn;rV(h9V8Uuw#8_5`BB4N6|K73IIUxKpK-nwE{?lejE=1kcTF=NU+tz$hyLIe>6uA zWVMFIhrX%sLQw&oT^Z?o>tl3$HcOVuH#D=Xe^|MUco=?&ac#6c-Z_14qC-YYEBoAm zf8Mq%5*C7k^&I5~@FInZ1+5Lhpi!8}FmM&R{7}xsPy}Uw!YE{^aW*6*e8wB`7>-B5 z@DkHM1AItgCBVZoydMW8i>4jZz7xk1%}*TdH34ozLuJfB4?FZmkiIBb(6%4~=A&WB oOT$Qz6lQ|^7#QZ1;pZU3;x= 3 ORDER BY id;""" + + def two_partition_1_1 = """SELECT id,name,part1,part2 FROM two_partition_tb WHERE part1 = 'US' AND part2 = 1 ORDER BY id;""" + def two_partition_2_1 = """SELECT id,name,part1,part2 FROM two_partition_tb WHERE part1 = 'EU' AND part2 = 2 ORDER BY id;""" + def two_partition_3_2 = """SELECT id,name,part1,part2 FROM two_partition_tb WHERE part1 = 'US' ORDER BY id;""" + def two_partition_4_all = """SELECT id,name,part1,part2 FROM two_partition_tb ORDER BY id;""" + def two_partition_5_1 = """SELECT id,name,part1,part2 FROM two_partition_tb WHERE part1 = 'US' AND part2 = 2 AND id > 5 ORDER BY id;""" + def two_partition_6_1 = """SELECT id,name,part1,part2 FROM two_partition_tb WHERE part1 = 'EU' AND part2 = 2 ORDER BY id;""" + + def three_partition_1_1 = """SELECT id,name,part1,part2,part3 FROM three_partition_tb WHERE part1 = 'US' AND part2 = 2024 AND part3 = 'Q1' ORDER BY id;""" + def three_partition_2_1 = """SELECT id,name,part1,part2,part3 FROM three_partition_tb WHERE part1 = 'EU' AND part2 = 2025 AND part3 = 'Q2' ORDER BY id;""" + def three_partition_3_3 = """SELECT id,name,part1,part2,part3 FROM three_partition_tb WHERE part1 = 'AS' AND part2 = 2025 ORDER BY id;""" + def three_partition_4_2 = """SELECT id,name,part1,part2,part3 FROM three_partition_tb WHERE part1 = 'US' AND part3 = 'Q1' ORDER BY id;""" + def three_partition_5_all = """SELECT id,name,part1,part2,part3 FROM three_partition_tb ORDER BY id;""" + def three_partition_6_1 = """SELECT id,name,part1,part2,part3 FROM three_partition_tb WHERE part1 = 'EU' AND part2 = 2024 AND part3 = 'Q1' ORDER BY id;""" + def three_partition_7_7 = """SELECT id,name,part1,part2,part3 FROM three_partition_tb WHERE part2 = 2025 ORDER BY id;""" + def three_partition_8_2 = """SELECT id,name,part1,part2,part3 FROM three_partition_tb WHERE part1 = 'US' AND part3 = 'Q2' AND id BETWEEN 6 AND 10 ORDER BY id;""" + + + + qt_one_partition_1_1 one_partition_1_1 + explain { + sql("${one_partition_1_1}") + contains "partition=1/2" + } + + qt_one_partition_2_1 one_partition_2_1 + explain { + sql("${one_partition_2_1}") + contains "partition=1/2" + } + + qt_one_partition_3_all one_partition_3_all + explain { + sql("${one_partition_3_all}") + contains "partition=2/2" + } + + qt_one_partition_4_all one_partition_4_all + explain { + sql("${one_partition_4_all}") + contains "partition=2/2" + } + + qt_one_partition_5_1 one_partition_5_1 + explain { + sql("${one_partition_5_1}") + contains "partition=1/2" + } + + + qt_two_partition_1_1 two_partition_1_1 + explain { + sql("${two_partition_1_1}") + contains "partition=1/4" + } + + qt_two_partition_2_1 two_partition_2_1 + explain { + sql("${two_partition_2_1}") + contains "partition=1/4" + } + + qt_two_partition_3_2 two_partition_3_2 + explain { + sql("${two_partition_3_2}") + contains "partition=2/4" + } + + qt_two_partition_4_all two_partition_4_all + explain { + sql("${two_partition_4_all}") + contains "partition=4/4" + } + + qt_two_partition_5_1 two_partition_5_1 + explain { + sql("${two_partition_5_1}") + contains "partition=1/4" + } + + qt_two_partition_6_1 two_partition_6_1 + explain { + sql("${two_partition_6_1}") + contains "partition=1/4" + } + + + + qt_three_partition_1_1 three_partition_1_1 + explain { + sql("${three_partition_1_1}") + contains "partition=1/10" + } + + qt_three_partition_2_1 three_partition_2_1 + explain { + sql("${three_partition_2_1}") + contains "partition=1/10" + } + + qt_three_partition_3_3 three_partition_3_3 + explain { + sql("${three_partition_3_3}") + contains "partition=3/10" + } + + qt_three_partition_4_2 three_partition_4_2 + explain { + sql("${three_partition_4_2}") + contains "partition=2/10" + } + + qt_three_partition_5_all three_partition_5_all + explain { + sql("${three_partition_5_all}") + contains "partition=10/10" + } + + qt_three_partition_6_1 three_partition_6_1 + explain { + sql("${three_partition_6_1}") + contains "partition=1/10" + } + + qt_three_partition_7_7 three_partition_7_7 + explain { + sql("${three_partition_7_7}") + contains "partition=7/10" + } + + qt_three_partition_8_2 three_partition_8_2 + explain { + sql("${three_partition_8_2}") + contains "partition=2/10" + } + + + // 0 partitions + def one_partition_6_0 = """SELECT id,name,part1 FROM one_partition_tb WHERE part1 = 2023 ORDER BY id;""" + qt_one_partition_6_0 one_partition_6_0 + explain { + sql("${one_partition_6_0}") + contains "partition=0/2" + } + + def two_partition_7_0 = """SELECT id,name,part1 FROM two_partition_tb WHERE part1 = 'CN' AND part2 = 1 ORDER BY id;""" + qt_two_partition_7_0 two_partition_7_0 + explain { + sql("${two_partition_7_0}") + contains "partition=0/4" + } + + def two_partition_8_0 = """SELECT id,name,part1 FROM two_partition_tb WHERE part1 = 'US' AND part2 = 3 ORDER BY id;""" + qt_two_partition_8_0 two_partition_8_0 + explain { + sql("${two_partition_8_0}") + contains "partition=0/4" + } + + def three_partition_9_0 = """SELECT id,name,part1 FROM three_partition_tb WHERE part1 = 'US' AND part2 = 2023 AND part3 = 'Q1' ORDER BY id;""" + qt_three_partition_9_0 three_partition_9_0 + explain { + sql("${three_partition_9_0}") + contains "partition=0/10" + } + + def three_partition_10_0 = """SELECT id,name,part1 FROM three_partition_tb WHERE part1 = 'EU' AND part2 = 2024 AND part3 = 'Q4' ORDER BY id;""" + qt_three_partition_10_0 three_partition_10_0 + explain { + sql("${three_partition_10_0}") + contains "partition=0/10" + } + + def three_partition_11_0 = """SELECT id,name,part1 FROM three_partition_tb WHERE part1 = 'AS' AND part2 = 2025 AND part3 = 'Q4' ORDER BY id;""" + qt_three_partition_11_0 three_partition_11_0 + explain { + sql("${three_partition_11_0}") + contains "partition=0/10" + } + + + //time travel + def time_travel_two_partition_1_3 = "select id,name,part1,part2 from two_partition_tb FOR TIME AS OF '20241126012025218' order by id;" + def time_travel_two_partition_2_2 = "select id,name,part1,part2 from two_partition_tb FOR TIME AS OF '20241126012025218' where part1='US' order by id;" + def time_travel_two_partition_3_1 = "select id,name,part1,part2 from two_partition_tb FOR TIME AS OF '20241126012025218' where part2=2 order by id;" + def time_travel_two_partition_4_0 = "select id,name,part1,part2 from two_partition_tb FOR TIME AS OF '20241126012025218' where part2=10 order by id;" + + qt_time_travel_two_partition_1_3 time_travel_two_partition_1_3 + explain { + sql("${time_travel_two_partition_1_3}") + contains "partition=3/3" + } + + + qt_time_travel_two_partition_2_2 time_travel_two_partition_2_2 + explain { + sql("${time_travel_two_partition_2_2}") + contains "partition=2/3" + } + + qt_time_travel_two_partition_3_1 time_travel_two_partition_3_1 + explain { + sql("${time_travel_two_partition_3_1}") + contains "partition=1/3" + } + + qt_time_travel_two_partition_4_0 time_travel_two_partition_4_0 + explain { + sql("${time_travel_two_partition_4_0}") + contains "partition=0/3" + } + + + + + def time_travel_two_partition_5_0 = "select id,name,part1,part2 from two_partition_tb FOR TIME AS OF '20231126012025218' order by id;" + qt_time_travel_two_partition_5_0 time_travel_two_partition_5_0 + explain { + sql("${time_travel_two_partition_5_0}") + contains "partition=0/0" + } + + def time_travel_two_partition_6_1 = "select id,name,part1,part2 from two_partition_tb FOR TIME AS OF '20241126012011733' order by id;" + qt_time_travel_two_partition_6_1 time_travel_two_partition_6_1 + explain { + sql("${time_travel_two_partition_6_1}") + contains "partition=1/1" + } + + sql """drop catalog if exists ${catalog_name};""" +} \ No newline at end of file diff --git a/regression-test/suites/external_table_p2/hudi/test_hudi_snapshot.groovy b/regression-test/suites/external_table_p2/hudi/test_hudi_snapshot.groovy index 53c09e6d5a9031..274b7efdbbb5ab 100644 --- a/regression-test/suites/external_table_p2/hudi/test_hudi_snapshot.groovy +++ b/regression-test/suites/external_table_p2/hudi/test_hudi_snapshot.groovy @@ -52,7 +52,7 @@ suite("test_hudi_snapshot", "p2,external,hudi,external_remote,external_remote_hu qt_q05 """SELECT age, COUNT(*) AS user_count FROM ${table_name} GROUP BY age ORDER BY user_count DESC LIMIT 5;""" // Query users with purchase records and limit output - qt_q06 """SELECT user_id, purchases FROM ${table_name} WHERE array_size(purchases) > 0 ORDER BY user_id LIMIT 5;""" + qt_q06 """SELECT user_id, purchases FROM ${table_name} WHERE array_size(purchases) > 0 ORDER BY user_id,event_time LIMIT 5;""" // Query users with a specific tag and limit output qt_q07 """SELECT * FROM ${table_name} WHERE array_contains(tags, 'others') ORDER BY event_time LIMIT 5;""" @@ -64,7 +64,7 @@ suite("test_hudi_snapshot", "p2,external,hudi,external_remote,external_remote_hu qt_q09 """SELECT * FROM ${table_name} WHERE struct_element(struct_element(address, 'coordinates'), 'latitude') BETWEEN 0 AND 100 AND struct_element(struct_element(address, 'coordinates'), 'longitude') BETWEEN 0 AND 100 ORDER BY event_time LIMIT 5;""" // Query records with ratings above a specific value and limit output - qt_q10 """SELECT * FROM ${table_name} WHERE rating > 4.5 ORDER BY rating DESC LIMIT 5;""" + qt_q10 """SELECT * FROM ${table_name} WHERE rating > 4.5 ORDER BY rating DESC,event_time LIMIT 5;""" // Query all users' signup dates and limit output qt_q11 """SELECT user_id, signup_date FROM ${table_name} ORDER BY signup_date DESC LIMIT 10;""" @@ -73,13 +73,13 @@ suite("test_hudi_snapshot", "p2,external,hudi,external_remote,external_remote_hu qt_q12 """SELECT * FROM ${table_name} WHERE struct_element(address, 'postal_code') = '80312' ORDER BY event_time LIMIT 5;""" // Query users with profile pictures and limit output - qt_q13 """SELECT user_id, profile_picture FROM ${table_name} WHERE profile_picture IS NOT NULL ORDER BY user_id LIMIT 5;""" + qt_q13 """SELECT user_id, profile_picture FROM ${table_name} WHERE profile_picture IS NOT NULL ORDER BY user_id,event_time LIMIT 5;""" // Query users by signup date and limit output - qt_q14 """SELECT * FROM ${table_name} WHERE signup_date = '2024-01-15' ORDER BY user_id LIMIT 5;""" + qt_q14 """SELECT * FROM ${table_name} WHERE signup_date = '2024-01-15' ORDER BY user_id,event_time LIMIT 5;""" // Query the total count of purchases for each user and limit output - qt_q15 """SELECT user_id, array_size(purchases) AS purchase_count FROM ${table_name} ORDER BY purchase_count DESC LIMIT 5;""" + qt_q15 """SELECT user_id, array_size(purchases) AS purchase_count FROM ${table_name} ORDER BY purchase_count,event_time DESC LIMIT 5;""" } test_hudi_snapshot_querys("user_activity_log_cow_non_partition") diff --git a/regression-test/suites/external_table_p2/maxcompute/test_max_compute_partition_prune.groovy b/regression-test/suites/external_table_p2/maxcompute/test_max_compute_partition_prune.groovy new file mode 100644 index 00000000000000..e34569117a167f --- /dev/null +++ b/regression-test/suites/external_table_p2/maxcompute/test_max_compute_partition_prune.groovy @@ -0,0 +1,282 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + + + +/* +CREATE TABLE one_partition_tb ( + id INT, + name string +) +PARTITIONED BY (part1 INT); +INSERT INTO one_partition_tb PARTITION (part1=2024) VALUES (1, 'Alice'); +INSERT INTO one_partition_tb PARTITION (part1=2024) VALUES (2, 'Bob'); +INSERT INTO one_partition_tb PARTITION (part1=2024) VALUES (3, 'Charlie'); +INSERT INTO one_partition_tb PARTITION (part1=2025) VALUES (4, 'David'); +INSERT INTO one_partition_tb PARTITION (part1=2025) VALUES (5, 'Eva'); +CREATE TABLE two_partition_tb ( + id INT, + name string +) +PARTITIONED BY (part1 STRING, part2 int); +INSERT INTO two_partition_tb PARTITION (part1='US', part2=1) VALUES (1, 'Alice'); +INSERT INTO two_partition_tb PARTITION (part1='US', part2=1) VALUES (2, 'Bob'); +INSERT INTO two_partition_tb PARTITION (part1='US', part2=1) VALUES (3, 'Charlie'); +INSERT INTO two_partition_tb PARTITION (part1='US', part2=2) VALUES (4, 'David'); +INSERT INTO two_partition_tb PARTITION (part1='US', part2=2) VALUES (5, 'Eva'); +INSERT INTO two_partition_tb PARTITION (part1='EU', part2=1) VALUES (6, 'Frank'); +INSERT INTO two_partition_tb PARTITION (part1='EU', part2=1) VALUES (7, 'Grace'); +INSERT INTO two_partition_tb PARTITION (part1='EU', part2=2) VALUES (8, 'Hannah'); +INSERT INTO two_partition_tb PARTITION (part1='EU', part2=2) VALUES (9, 'Ivy'); +INSERT INTO two_partition_tb PARTITION (part1='EU', part2=2) VALUES (10, 'Jack'); +CREATE TABLE three_partition_tb ( + id INT, + name string +) +PARTITIONED BY (part1 STRING, part2 INT, part3 STRING); +INSERT INTO three_partition_tb PARTITION (part1='US', part2=2024, part3='Q1') VALUES (1, 'Alice'); +INSERT INTO three_partition_tb PARTITION (part1='US', part2=2024, part3='Q1') VALUES (2, 'Bob'); +INSERT INTO three_partition_tb PARTITION (part1='US', part2=2024, part3='Q1') VALUES (3, 'Charlie'); +INSERT INTO three_partition_tb PARTITION (part1='US', part2=2024, part3='Q2') VALUES (4, 'David'); +INSERT INTO three_partition_tb PARTITION (part1='US', part2=2024, part3='Q2') VALUES (5, 'Eva'); +INSERT INTO three_partition_tb PARTITION (part1='US', part2=2025, part3='Q1') VALUES (6, 'Frank'); +INSERT INTO three_partition_tb PARTITION (part1='US', part2=2025, part3='Q2') VALUES (7, 'Grace'); +INSERT INTO three_partition_tb PARTITION (part1='EU', part2=2024, part3='Q1') VALUES (8, 'Hannah'); +INSERT INTO three_partition_tb PARTITION (part1='EU', part2=2024, part3='Q1') VALUES (9, 'Ivy'); +INSERT INTO three_partition_tb PARTITION (part1='EU', part2=2025, part3='Q2') VALUES (10, 'Jack'); +INSERT INTO three_partition_tb PARTITION (part1='EU', part2=2025, part3='Q2') VALUES (11, 'Leo'); +INSERT INTO three_partition_tb PARTITION (part1='EU', part2=2025, part3='Q3') VALUES (12, 'Mia'); +INSERT INTO three_partition_tb PARTITION (part1='AS', part2=2025, part3='Q1') VALUES (13, 'Nina'); +INSERT INTO three_partition_tb PARTITION (part1='AS', part2=2025, part3='Q2') VALUES (14, 'Oscar'); +INSERT INTO three_partition_tb PARTITION (part1='AS', part2=2025, part3='Q3') VALUES (15, 'Paul'); +select * from one_partition_tb; +select * from two_partition_tb; +select * from three_partition_tb; +show partitions one_partition_tb; +show partitions two_partition_tb; +show partitions three_partition_tb; +*/ + +suite("test_max_compute_partition_prune", "p2,external,maxcompute,external_remote,external_remote_maxcompute") { + + + def one_partition_1_1 = """SELECT * FROM one_partition_tb WHERE part1 = 2024 ORDER BY id;""" + def one_partition_2_1 = """SELECT * FROM one_partition_tb WHERE part1 = 2025 ORDER BY id;""" + def one_partition_3_all = """SELECT * FROM one_partition_tb ORDER BY id;""" + def one_partition_4_all = """SELECT * FROM one_partition_tb WHERE id = 5 ORDER BY id;""" + def one_partition_5_1 = """SELECT * FROM one_partition_tb WHERE part1 = 2024 AND id >= 3 ORDER BY id;""" + + def two_partition_1_1 = """SELECT * FROM two_partition_tb WHERE part1 = 'US' AND part2 = 1 ORDER BY id;""" + def two_partition_2_1 = """SELECT * FROM two_partition_tb WHERE part1 = 'EU' AND part2 = 2 ORDER BY id;""" + def two_partition_3_2 = """SELECT * FROM two_partition_tb WHERE part1 = 'US' ORDER BY id;""" + def two_partition_4_all = """SELECT * FROM two_partition_tb ORDER BY id;""" + def two_partition_5_1 = """SELECT * FROM two_partition_tb WHERE part1 = 'US' AND part2 = 2 AND id > 5 ORDER BY id;""" + def two_partition_6_1 = """SELECT * FROM two_partition_tb WHERE part1 = 'EU' AND part2 = 2 ORDER BY id;""" + + def three_partition_1_1 = """SELECT * FROM three_partition_tb WHERE part1 = 'US' AND part2 = 2024 AND part3 = 'Q1' ORDER BY id;""" + def three_partition_2_1 = """SELECT * FROM three_partition_tb WHERE part1 = 'EU' AND part2 = 2025 AND part3 = 'Q2' ORDER BY id;""" + def three_partition_3_3 = """SELECT * FROM three_partition_tb WHERE part1 = 'AS' AND part2 = 2025 ORDER BY id;""" + def three_partition_4_2 = """SELECT * FROM three_partition_tb WHERE part1 = 'US' AND part3 = 'Q1' ORDER BY id;""" + def three_partition_5_all = """SELECT * FROM three_partition_tb ORDER BY id;""" + def three_partition_6_1 = """SELECT * FROM three_partition_tb WHERE part1 = 'EU' AND part2 = 2024 AND part3 = 'Q1' ORDER BY id;""" + def three_partition_7_7 = """SELECT * FROM three_partition_tb WHERE part2 = 2025 ORDER BY id;""" + def three_partition_8_2 = """SELECT * FROM three_partition_tb WHERE part1 = 'US' AND part3 = 'Q2' AND id BETWEEN 6 AND 10 ORDER BY id;""" + + + String enabled = context.config.otherConfigs.get("enableMaxComputeTest") + if (enabled != null && enabled.equalsIgnoreCase("true")) { + String ak = context.config.otherConfigs.get("ak") + String sk = context.config.otherConfigs.get("sk"); + String mc_db = "mc_datalake" + String mc_catalog_name = "test_max_compute_partition_prune" + + sql """drop catalog if exists ${mc_catalog_name};""" + sql """ + create catalog if not exists ${mc_catalog_name} properties ( + "type" = "max_compute", + "mc.default.project" = "${mc_db}", + "mc.access_key" = "${ak}", + "mc.secret_key" = "${sk}", + "mc.endpoint" = "http://service.cn-beijing-vpc.maxcompute.aliyun-inc.com/api" + ); + """ + sql """ switch ${mc_catalog_name} """ + sql """ use ${mc_db}""" + + qt_one_partition_1_1 one_partition_1_1 + explain { + sql("${one_partition_1_1}") + contains "partition=1/2" + } + + qt_one_partition_2_1 one_partition_2_1 + explain { + sql("${one_partition_2_1}") + contains "partition=1/2" + } + + qt_one_partition_3_all one_partition_3_all + explain { + sql("${one_partition_3_all}") + contains "partition=2/2" + } + + qt_one_partition_4_all one_partition_4_all + explain { + sql("${one_partition_4_all}") + contains "partition=2/2" + } + + qt_one_partition_5_1 one_partition_5_1 + explain { + sql("${one_partition_5_1}") + contains "partition=1/2" + } + + + qt_two_partition_1_1 two_partition_1_1 + explain { + sql("${two_partition_1_1}") + contains "partition=1/4" + } + + qt_two_partition_2_1 two_partition_2_1 + explain { + sql("${two_partition_2_1}") + contains "partition=1/4" + } + + qt_two_partition_3_2 two_partition_3_2 + explain { + sql("${two_partition_3_2}") + contains "partition=2/4" + } + + qt_two_partition_4_all two_partition_4_all + explain { + sql("${two_partition_4_all}") + contains "partition=4/4" + } + + qt_two_partition_5_1 two_partition_5_1 + explain { + sql("${two_partition_5_1}") + contains "partition=1/4" + } + + qt_two_partition_6_1 two_partition_6_1 + explain { + sql("${two_partition_6_1}") + contains "partition=1/4" + } + + + + qt_three_partition_1_1 three_partition_1_1 + explain { + sql("${three_partition_1_1}") + contains "partition=1/10" + } + + qt_three_partition_2_1 three_partition_2_1 + explain { + sql("${three_partition_2_1}") + contains "partition=1/10" + } + + qt_three_partition_3_3 three_partition_3_3 + explain { + sql("${three_partition_3_3}") + contains "partition=3/10" + } + + qt_three_partition_4_2 three_partition_4_2 + explain { + sql("${three_partition_4_2}") + contains "partition=2/10" + } + + qt_three_partition_5_all three_partition_5_all + explain { + sql("${three_partition_5_all}") + contains "partition=10/10" + } + + qt_three_partition_6_1 three_partition_6_1 + explain { + sql("${three_partition_6_1}") + contains "partition=1/10" + } + + qt_three_partition_7_7 three_partition_7_7 + explain { + sql("${three_partition_7_7}") + contains "partition=7/10" + } + + qt_three_partition_8_2 three_partition_8_2 + explain { + sql("${three_partition_8_2}") + contains "partition=2/10" + } + + + // 0 partitions + def one_partition_6_0 = """SELECT * FROM one_partition_tb WHERE part1 = 2023 ORDER BY id;""" + qt_one_partition_6_0 one_partition_6_0 + explain { + sql("${one_partition_6_0}") + contains "partition=0/2" + } + + def two_partition_7_0 = """SELECT * FROM two_partition_tb WHERE part1 = 'CN' AND part2 = 1 ORDER BY id;""" + qt_two_partition_7_0 two_partition_7_0 + explain { + sql("${two_partition_7_0}") + contains "partition=0/4" + } + + def two_partition_8_0 = """SELECT * FROM two_partition_tb WHERE part1 = 'US' AND part2 = 3 ORDER BY id;""" + qt_two_partition_8_0 two_partition_8_0 + explain { + sql("${two_partition_8_0}") + contains "partition=0/4" + } + + def three_partition_9_0 = """SELECT * FROM three_partition_tb WHERE part1 = 'US' AND part2 = 2023 AND part3 = 'Q1' ORDER BY id;""" + qt_three_partition_9_0 three_partition_9_0 + explain { + sql("${three_partition_9_0}") + contains "partition=0/10" + } + + def three_partition_10_0 = """SELECT * FROM three_partition_tb WHERE part1 = 'EU' AND part2 = 2024 AND part3 = 'Q4' ORDER BY id;""" + qt_three_partition_10_0 three_partition_10_0 + explain { + sql("${three_partition_10_0}") + contains "partition=0/10" + } + + def three_partition_11_0 = """SELECT * FROM three_partition_tb WHERE part1 = 'AS' AND part2 = 2025 AND part3 = 'Q4' ORDER BY id;""" + qt_three_partition_11_0 three_partition_11_0 + explain { + sql("${three_partition_11_0}") + contains "partition=0/10" + } + + } +} \ No newline at end of file From daaa56c21e948245bf477f204bc2f5f561aede6a Mon Sep 17 00:00:00 2001 From: daidai Date: Wed, 27 Nov 2024 11:27:13 +0800 Subject: [PATCH 2/5] revert hudi --- .../datasource/hive/HMSExternalTable.java | 26 -- .../doris/datasource/hudi/HudiUtils.java | 51 ---- .../datasource/hudi/source/HudiScanNode.java | 71 +++-- .../translator/PhysicalPlanTranslator.java | 2 - .../trees/plans/logical/LogicalFileScan.java | 2 +- .../trees/plans/logical/LogicalHudiScan.java | 2 +- .../hudi/test_hudi_partition_prune.out | 151 ---------- .../hudi/test_hudi_snapshot.out | Bin 350438 -> 348526 bytes .../hudi/test_hudi_partition_prune.groovy | 275 ------------------ .../hudi/test_hudi_snapshot.groovy | 10 +- 10 files changed, 54 insertions(+), 536 deletions(-) delete mode 100644 regression-test/data/external_table_p2/hudi/test_hudi_partition_prune.out delete mode 100644 regression-test/suites/external_table_p2/hudi/test_hudi_partition_prune.groovy diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java index 34abb11665f02e..7defec7485c5c2 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java @@ -17,7 +17,6 @@ package org.apache.doris.datasource.hive; -import org.apache.doris.analysis.TableSnapshot; import org.apache.doris.catalog.Column; import org.apache.doris.catalog.Env; import org.apache.doris.catalog.ListPartitionItem; @@ -32,7 +31,6 @@ import org.apache.doris.common.DdlException; import org.apache.doris.datasource.ExternalTable; import org.apache.doris.datasource.SchemaCacheValue; -import org.apache.doris.datasource.TablePartitionValues; import org.apache.doris.datasource.hudi.HudiUtils; import org.apache.doris.datasource.iceberg.IcebergUtils; import org.apache.doris.datasource.mvcc.MvccSnapshot; @@ -43,7 +41,6 @@ import org.apache.doris.mtmv.MTMVSnapshotIf; import org.apache.doris.mtmv.MTMVTimestampSnapshot; import org.apache.doris.nereids.exceptions.NotSupportedException; -import org.apache.doris.nereids.trees.plans.logical.LogicalFileScan.SelectedPartitions; import org.apache.doris.qe.GlobalVariable; import org.apache.doris.statistics.AnalysisInfo; import org.apache.doris.statistics.BaseAnalysisTask; @@ -308,29 +305,6 @@ public boolean supportInternalPartitionPruned() { return getDlaType() == DLAType.HIVE || getDlaType() == DLAType.HUDI; } - public SelectedPartitions initHudiSelectedPartitions(Optional tableSnapshot) { - if (getDlaType() != DLAType.HUDI) { - return SelectedPartitions.NOT_PRUNED; - } - - if (getPartitionColumns().isEmpty()) { - return SelectedPartitions.NOT_PRUNED; - } - TablePartitionValues tablePartitionValues = HudiUtils.getPartitionValues(tableSnapshot, this); - - - Map idToPartitionItem = tablePartitionValues.getIdToPartitionItem(); - Map idToNameMap = tablePartitionValues.getPartitionIdToNameMap(); - - Map nameToPartitionItems = Maps.newHashMapWithExpectedSize(idToPartitionItem.size()); - for (Entry entry : idToPartitionItem.entrySet()) { - nameToPartitionItems.put(idToNameMap.get(entry.getKey()), entry.getValue()); - } - - return new SelectedPartitions(nameToPartitionItems.size(), nameToPartitionItems, false); - } - - @Override public Map getNameToPartitionItems(Optional snapshot) { return getNameToPartitionItems(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/HudiUtils.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/HudiUtils.java index 7db2fc13098d9b..d7803b1a516f9e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/HudiUtils.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/HudiUtils.java @@ -17,35 +17,24 @@ package org.apache.doris.datasource.hudi; -import org.apache.doris.analysis.TableSnapshot; import org.apache.doris.catalog.ArrayType; -import org.apache.doris.catalog.Env; import org.apache.doris.catalog.MapType; import org.apache.doris.catalog.ScalarType; import org.apache.doris.catalog.StructField; import org.apache.doris.catalog.StructType; import org.apache.doris.catalog.Type; -import org.apache.doris.datasource.TablePartitionValues; -import org.apache.doris.datasource.hive.HMSExternalTable; -import org.apache.doris.datasource.hive.HiveMetaStoreClientHelper; -import org.apache.doris.datasource.hudi.source.HudiCachedPartitionProcessor; import org.apache.avro.LogicalType; import org.apache.avro.LogicalTypes; import org.apache.avro.Schema; import org.apache.avro.Schema.Field; -import org.apache.hudi.common.table.HoodieTableMetaClient; import org.apache.hudi.common.table.timeline.HoodieActiveTimeline; -import org.apache.hudi.common.table.timeline.HoodieInstant; import org.apache.hudi.common.table.timeline.HoodieInstantTimeGenerator; -import org.apache.hudi.common.table.timeline.HoodieTimeline; -import org.apache.hudi.common.util.Option; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.List; -import java.util.Optional; import java.util.stream.Collectors; public class HudiUtils { @@ -242,44 +231,4 @@ private static Type handleUnionType(Schema avroSchema) { } return Type.UNSUPPORTED; } - - public static TablePartitionValues getPartitionValues(Optional tableSnapshot, - HMSExternalTable hmsTable) { - TablePartitionValues partitionValues = new TablePartitionValues(); - if (hmsTable.getPartitionColumns().isEmpty()) { - //isn't partition table. - return partitionValues; - } - - HoodieTableMetaClient hudiClient = HiveMetaStoreClientHelper.getHudiClient(hmsTable); - HudiCachedPartitionProcessor processor = (HudiCachedPartitionProcessor) Env.getCurrentEnv() - .getExtMetaCacheMgr().getHudiPartitionProcess(hmsTable.getCatalog()); - boolean useHiveSyncPartition = hmsTable.useHiveSyncPartition(); - - if (tableSnapshot.isPresent()) { - if (tableSnapshot.get().getType() == TableSnapshot.VersionType.VERSION) { - // Hudi does not support `FOR VERSION AS OF`, please use `FOR TIME AS OF`"; - return partitionValues; - } - String queryInstant = tableSnapshot.get().getTime().replaceAll("[-: ]", ""); - - partitionValues = - HiveMetaStoreClientHelper.ugiDoAs( - HiveMetaStoreClientHelper.getConfiguration(hmsTable), - () -> processor.getSnapshotPartitionValues( - hmsTable, hudiClient, queryInstant, useHiveSyncPartition)); - } else { - HoodieTimeline timeline = hudiClient.getCommitsAndCompactionTimeline().filterCompletedInstants(); - Option snapshotInstant = timeline.lastInstant(); - if (!snapshotInstant.isPresent()) { - return partitionValues; - } - partitionValues = - HiveMetaStoreClientHelper.ugiDoAs( - HiveMetaStoreClientHelper.getConfiguration(hmsTable), - () -> processor.getPartitionValues(hmsTable, hudiClient, useHiveSyncPartition)); - } - return partitionValues; - } - } diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiScanNode.java index 14f3656fb69d42..a8f2a362bfde8d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiScanNode.java @@ -21,7 +21,6 @@ import org.apache.doris.analysis.TableSnapshot; import org.apache.doris.analysis.TupleDescriptor; import org.apache.doris.catalog.Env; -import org.apache.doris.catalog.ListPartitionItem; import org.apache.doris.catalog.PartitionItem; import org.apache.doris.catalog.Type; import org.apache.doris.common.AnalysisException; @@ -30,10 +29,12 @@ import org.apache.doris.datasource.ExternalTable; import org.apache.doris.datasource.FileSplit; import org.apache.doris.datasource.TableFormatType; +import org.apache.doris.datasource.TablePartitionValues; import org.apache.doris.datasource.hive.HiveMetaStoreClientHelper; import org.apache.doris.datasource.hive.HivePartition; import org.apache.doris.datasource.hive.source.HiveScanNode; import org.apache.doris.datasource.hudi.HudiUtils; +import org.apache.doris.planner.ListPartitionPrunerV2; import org.apache.doris.planner.PlanNodeId; import org.apache.doris.qe.ConnectContext; import org.apache.doris.spi.Split; @@ -67,6 +68,7 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collection; import java.util.Collections; import java.util.List; import java.util.Locale; @@ -257,29 +259,50 @@ private void setHudiParams(TFileRangeDesc rangeDesc, HudiSplit hudiSplit) { rangeDesc.setTableFormatParams(tableFormatFileDesc); } - private List getPrunedPartitions(HoodieTableMetaClient metaClient) { + private List getPrunedPartitions( + HoodieTableMetaClient metaClient, Option snapshotTimestamp) throws AnalysisException { List partitionColumnTypes = hmsTable.getPartitionColumnTypes(); if (!partitionColumnTypes.isEmpty()) { - this.totalPartitionNum = selectedPartitions.totalPartitionNum; - Map prunedPartitions = selectedPartitions.selectedPartitions; - this.selectedPartitionNum = prunedPartitions.size(); - - String dbName = hmsTable.getDbName(); - String tblName = hmsTable.getName(); - String inputFormat = hmsTable.getRemoteTable().getSd().getInputFormat(); - String basePath = metaClient.getBasePathV2().toString(); - - List hivePartitions = Lists.newArrayList(); - prunedPartitions.forEach( - (key, value) -> { - String path = basePath + "/" + key; - hivePartitions.add(new HivePartition( - dbName, tblName, false, inputFormat, path, - ((ListPartitionItem) value).getItems().get(0).getPartitionValuesAsStringList(), - Maps.newHashMap())); - } - ); - return hivePartitions; + HudiCachedPartitionProcessor processor = (HudiCachedPartitionProcessor) Env.getCurrentEnv() + .getExtMetaCacheMgr().getHudiPartitionProcess(hmsTable.getCatalog()); + TablePartitionValues partitionValues; + if (snapshotTimestamp.isPresent()) { + partitionValues = processor.getSnapshotPartitionValues( + hmsTable, metaClient, snapshotTimestamp.get(), useHiveSyncPartition); + } else { + partitionValues = processor.getPartitionValues(hmsTable, metaClient, useHiveSyncPartition); + } + if (partitionValues != null) { + // 2. prune partitions by expr + partitionValues.readLock().lock(); + try { + Map idToPartitionItem = partitionValues.getIdToPartitionItem(); + this.totalPartitionNum = idToPartitionItem.size(); + ListPartitionPrunerV2 pruner = new ListPartitionPrunerV2(idToPartitionItem, + hmsTable.getPartitionColumns(), columnNameToRange, + partitionValues.getUidToPartitionRange(), + partitionValues.getRangeToId(), + partitionValues.getSingleColumnRangeMap(), + true); + Collection filteredPartitionIds = pruner.prune(); + this.selectedPartitionNum = filteredPartitionIds.size(); + // 3. get partitions from cache + String dbName = hmsTable.getDbName(); + String tblName = hmsTable.getName(); + String inputFormat = hmsTable.getRemoteTable().getSd().getInputFormat(); + String basePath = metaClient.getBasePathV2().toString(); + Map partitionIdToNameMap = partitionValues.getPartitionIdToNameMap(); + Map> partitionValuesMap = partitionValues.getPartitionValuesMap(); + return filteredPartitionIds.stream().map(id -> { + String path = basePath + "/" + partitionIdToNameMap.get(id); + return new HivePartition( + dbName, tblName, false, inputFormat, path, partitionValuesMap.get(id), + Maps.newHashMap()); + }).collect(Collectors.toList()); + } finally { + partitionValues.readLock().unlock(); + } + } } // unpartitioned table, create a dummy partition to save location and inputformat, // so that we can unify the interface. @@ -369,7 +392,7 @@ public List getSplits() throws UserException { if (!partitionInit) { prunedPartitions = HiveMetaStoreClientHelper.ugiDoAs( HiveMetaStoreClientHelper.getConfiguration(hmsTable), - () -> getPrunedPartitions(hudiClient)); + () -> getPrunedPartitions(hudiClient, snapshotTimestamp)); partitionInit = true; } List splits = Collections.synchronizedList(new ArrayList<>()); @@ -431,7 +454,7 @@ public boolean isBatchMode() { // Non partition table will get one dummy partition prunedPartitions = HiveMetaStoreClientHelper.ugiDoAs( HiveMetaStoreClientHelper.getConfiguration(hmsTable), - () -> getPrunedPartitions(hudiClient)); + () -> getPrunedPartitions(hudiClient, snapshotTimestamp)); partitionInit = true; } int numPartitions = ConnectContext.get().getSessionVariable().getNumPartitionsInBatchMode(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java index 5a9370246fd461..c474e6bd56e1d7 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java @@ -653,8 +653,6 @@ public PlanFragment visitPhysicalHudiScan(PhysicalHudiScan fileScan, PlanTransla if (fileScan.getTableSnapshot().isPresent()) { ((FileQueryScanNode) scanNode).setQueryTableSnapshot(fileScan.getTableSnapshot().get()); } - HudiScanNode hudiScanNode = (HudiScanNode) scanNode; - hudiScanNode.setSelectedPartitions(fileScan.getSelectedPartitions()); return getPlanFragmentForPhysicalFileScan(fileScan, context, scanNode, table, tupleDescriptor); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalFileScan.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalFileScan.java index 0822bfbc5fa830..96b8e032d11274 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalFileScan.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalFileScan.java @@ -42,7 +42,7 @@ */ public class LogicalFileScan extends LogicalCatalogRelation { - protected SelectedPartitions selectedPartitions; + protected final SelectedPartitions selectedPartitions; protected final Optional tableSample; protected final Optional tableSnapshot; diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalHudiScan.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalHudiScan.java index 51e68eb07631ae..629690889432b3 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalHudiScan.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalHudiScan.java @@ -84,7 +84,7 @@ protected LogicalHudiScan(RelationId id, ExternalTable table, List quali public LogicalHudiScan(RelationId id, ExternalTable table, List qualifier, Optional tableSample, Optional tableSnapshot) { this(id, table, qualifier, Optional.empty(), Optional.empty(), - ((HMSExternalTable) table).initHudiSelectedPartitions(tableSnapshot), tableSample, tableSnapshot, + SelectedPartitions.NOT_PRUNED, tableSample, tableSnapshot, Optional.empty(), Optional.empty()); } diff --git a/regression-test/data/external_table_p2/hudi/test_hudi_partition_prune.out b/regression-test/data/external_table_p2/hudi/test_hudi_partition_prune.out deleted file mode 100644 index 2e827d3fc5b9cf..00000000000000 --- a/regression-test/data/external_table_p2/hudi/test_hudi_partition_prune.out +++ /dev/null @@ -1,151 +0,0 @@ --- This file is automatically generated. You should know what you did if you want to edit this --- !one_partition_1_1 -- -1 Alice 2024 -2 Bob 2024 -3 Charlie 2024 - --- !one_partition_2_1 -- -4 David 2025 -5 Eva 2025 - --- !one_partition_3_all -- -1 Alice 2024 -2 Bob 2024 -3 Charlie 2024 -4 David 2025 -5 Eva 2025 - --- !one_partition_4_all -- -5 Eva 2025 - --- !one_partition_5_1 -- -3 Charlie 2024 - --- !two_partition_1_1 -- -1 Alice US 1 -2 Bob US 1 -3 Charlie US 1 - --- !two_partition_2_1 -- -8 Hannah EU 2 -9 Ivy EU 2 -10 Jack EU 2 - --- !two_partition_3_2 -- -1 Alice US 1 -2 Bob US 1 -3 Charlie US 1 -4 David US 2 -5 Eva US 2 - --- !two_partition_4_all -- -1 Alice US 1 -2 Bob US 1 -3 Charlie US 1 -4 David US 2 -5 Eva US 2 -6 Frank EU 1 -7 Grace EU 1 -8 Hannah EU 2 -9 Ivy EU 2 -10 Jack EU 2 - --- !two_partition_5_1 -- - --- !two_partition_6_1 -- -8 Hannah EU 2 -9 Ivy EU 2 -10 Jack EU 2 - --- !three_partition_1_1 -- -1 Alice US 2024 Q1 -2 Bob US 2024 Q1 -3 Charlie US 2024 Q1 - --- !three_partition_2_1 -- -10 Jack EU 2025 Q2 -11 Leo EU 2025 Q2 - --- !three_partition_3_3 -- -13 Nina AS 2025 Q1 -14 Oscar AS 2025 Q2 -15 Paul AS 2025 Q3 - --- !three_partition_4_2 -- -1 Alice US 2024 Q1 -2 Bob US 2024 Q1 -3 Charlie US 2024 Q1 -6 Frank US 2025 Q1 - --- !three_partition_5_all -- -1 Alice US 2024 Q1 -2 Bob US 2024 Q1 -3 Charlie US 2024 Q1 -4 David US 2024 Q2 -5 Eva US 2024 Q2 -6 Frank US 2025 Q1 -7 Grace US 2025 Q2 -8 Hannah EU 2024 Q1 -9 Ivy EU 2024 Q1 -10 Jack EU 2025 Q2 -11 Leo EU 2025 Q2 -12 Mia EU 2025 Q3 -13 Nina AS 2025 Q1 -14 Oscar AS 2025 Q2 -15 Paul AS 2025 Q3 - --- !three_partition_6_1 -- -8 Hannah EU 2024 Q1 -9 Ivy EU 2024 Q1 - --- !three_partition_7_7 -- -6 Frank US 2025 Q1 -7 Grace US 2025 Q2 -10 Jack EU 2025 Q2 -11 Leo EU 2025 Q2 -12 Mia EU 2025 Q3 -13 Nina AS 2025 Q1 -14 Oscar AS 2025 Q2 -15 Paul AS 2025 Q3 - --- !three_partition_8_2 -- -7 Grace US 2025 Q2 - --- !one_partition_6_0 -- - --- !two_partition_7_0 -- - --- !two_partition_8_0 -- - --- !three_partition_9_0 -- - --- !three_partition_10_0 -- - --- !three_partition_11_0 -- - --- !time_travel_two_partition_1_3 -- -1 Alice US 1 -2 Bob US 1 -3 Charlie US 1 -4 David US 2 -5 Eva US 2 -6 Frank EU 1 - --- !time_travel_two_partition_2_2 -- -1 Alice US 1 -2 Bob US 1 -3 Charlie US 1 -4 David US 2 -5 Eva US 2 - --- !time_travel_two_partition_3_1 -- -4 David US 2 -5 Eva US 2 - --- !time_travel_two_partition_4_0 -- - --- !time_travel_two_partition_5_0 -- - --- !time_travel_two_partition_6_1 -- -1 Alice US 1 - diff --git a/regression-test/data/external_table_p2/hudi/test_hudi_snapshot.out b/regression-test/data/external_table_p2/hudi/test_hudi_snapshot.out index 2ce13dc272b739a7c028e2f8dbbe303148e312e5..efad67ffbfa8c407fa42d0dd28f569f492bd1d12 100644 GIT binary patch delta 11237 zcmeHNdypJQea_78$zH2)ES(}FTgH8*zO5>iy9 z$k($gdvzv-{*P7j+SS}nPxtTf{l4$_gVP&-_Qx9^`ZMd`m7SktTXs+?O3L>*Pol_S zJdPbs1a-oIDu-~zeL|TEU43TQx^4LqZd9d^2Ap{%Y%X!RL#adDBZSS+aE6kK7lw>@ zrFL^sm3So#OQSD6^EBP-uldI>;{w|V_G1G$6uO@`(!YNKS9R>m+l!6&wc5Da*Y5r} z;f@=W?wpj3rS^QaF*o_<@?<=pWi4f{ZZ?O-?zODdvsQcZ?$Y}v{Wc%4}9w37iw8!uG(Hq)Z~oA{EF}~Et-DL8lGZ2@_gnd4v!@c zX%stB`iT=$u7V(-e&Xu-CyTo(GO1S^)mFPH+tsXLhSjRp7rFzC7x7kRpv`KGM+8`_ zS3YZvS)pF{fKBw*hX#sUWUIU%FJ%6(H~0ftvt2$Qo5xgB?-?j;vN`WeY@gKkuC>Qg z8A(Q^>%;*wV^e_>@icL$Br1xTpZX*)KUHn3`q750Axehw(@+aviGt*bSmMOq92J%Gy~&pPjWf6jRx1>oYeM1`8e``nmh< zu})(z-MYUzxA^O4P;5wIkCp_X85ZeO-nFPt({DL@6hV z1Z*=Qc*{hewTpvJ*W%;8r8=24kC`DxwJb~er-pj^>^JQ3KUiNN8Qd?`1Ue9k=@mb{=B8vH!&kQvIc zRVeX@=k~=NL=hYNG3*n=Wd|p0d!PTnbPMO-$|h#J-&EiI(lfU|eeha*>iw>8C_%_+ zO1PV`Y*<7!t}e)~X)R=}YP)&@#)U|cHH=G6o}91NRDbB$FPB<~D~WcTf$fD*aDAp; z+&bk1ZiO-GhGbRTpdPIdAvoh5`l+FYAB9{+JaPn!jo}l_o5$g$K^g>#66LdKwY8}e z2y7T`|B^2>vt;pAcZ*c5io5!kJ|v_4WJ(dZ0ZEjj5)n9@at{V6LdO?w9P=<#O6b#f6bG%kK7LPe za424^wHKQgjlrXS1twmzhdbmmMg8+hy8_o-Y|7W0;XZjxm2b(Cs+JEIH|Pm>U|N6u zbH%Y`UkMf>x+Y2}pyEvef(SYfac#RyzR+K#oD&)c}#d*BH=~Cceo4_N2G)~(vPK#65m7SD0x@F z^Ldc?fdf;3PkEu|$S_Lr!mdbT*H?r@xJ`)2Be5@?gu&r>fHjGN1o;Qc6RR*Ggmc^$ z5rx&`)JbOjlsMcEr6Xd7&%#4s;Y!3QHTNY79&5k@%nygeN6bNT&cl)Cr7Dj7lJ~)v z4_hlQAGUwFPT%n-_Ruay!lPlyWz=UPj2%QLtd1*U1b~=0f+qq`@dZ=*Q9iHO)eg=Sxhxcu-1ZtpS9XDckW2d%M(?TX%T!P86QLNnMb64aPw=hC3lh=V8@L-0oAR^qK{eCr#@2{ zpY#CGD1o8DaQkr!B|*h!A#<5Nv#~flhZqy2EO2l{05S_7r4e^L#o=hm1#%Iq9jd8n zIeKZ$R~J@0mW--3lS%sg#|wiB>70K5t!r**szxI9%OAIf^wwV$M)G(`zgU>kU-%p3 z_VIChWJh=uC^m`1KnB7ANP!I|1cUOJ>$t8@SRBNGpX$?71A{=J4kIO(rjZw>2%<#7 zp9I1KTmt??ViCI$myt+eN_ZCsSk8fs`b0l9ZjUYh=I6+ghp0?>iMzeU^BmVZLc$py z7-I@tfV<%0WOV1~8mxA5Y$uN8zx_iT2k$pVg>B(2IIQ{Z7w>AAU)v8%oxj1I0COj* zwJxU3QR>NTv6e@zM5U?$;5#!}Z6r!1{mBO?<^=vTYv6BdJ>)8wOLOG9m6HmmArrwW z6QEvD0p!u2imcIT54lt%+;_Yn@(?{R=@Ii0LR2DE$l>~aq9?XkgL@>#w<_y2)$zqD zVm!WX1827{7;%=SNKt(zKm)%*^}52qh<;~Oq@#Cai;cPR4U5gWF3#*?k9~`=Y;{g; z-c)pnALh{G@oTJ+<&Yxer9jgmq6y-qqKI4oXJHy6+7KcWFCRlciiH`zZnyy2O}LsF zZcBF0b7t86BZUK1a;%SF4{X^|M-!brXq7sTT((A+_uG}NH#biLI2qKPPe*U)CST2B zg_iX7mc6OS;dxsA8^GLEhemvf_H_ zgOhVvKE3ijbE;(k=6&-jJip?0PTfD&`T2KtkGuzkL3w|*)#_Ciy@a$sThfoVfiM|> zv_2aS?9d+_wKvDimBgpQNmD;Udcpk4I0_t>5HEDySR$RS9u(4DK7IYrBU@O&Tr4(L zc|CGL#n-QF7#P#h!`ALK2Y{Ta3;LA<_NLN>z}40dH42*x0rxtmyvdLMwY^?{e%c;h zk;m<42lR)TJ-RDA>IwkCP$kp_7?uH2oR2Jq)J2?Bd4WucO9K|^C;qdz39v8@fR7{O z6-kldcx*su%$dXRLTOBVWCoL0T$l9n3Ss)d2as1T;TebpL?D@7z@FxL7 zczomiINpQZ6w)^ufV+&nyO|(^&HT)?2{KEjsK$0j_tZU}=$^iH>eg4k1sHyM=lgmw z4N!!-$VhNp0AN(p7i=v*=k>vo>XzJ*ahRFOx{~_*f;Bj>ryi+hXX1O?i%9)}mXWo! zOlgICE(40s|NhmYnYk5!kQau%%Ml2Q20Sns1(8psDf)_o z)C{NJ*c$0W=JY6(*Y>k5Sftb@=Vx?Z3H-S}j2=bJ7h=lQIyYMO2y*f1b` zQ4#RK5zE&PtrSS-k*kZFR+s39GR#q1AN-%7NLLVlT$|UeG!CyIU+bAI#`p*<32$gizQY zW@ocTqpI}jm+Z|21Rs62U7XMl+17?x>IxBbK@+8vQx8^QzBwAwqac@PNH`0agd1{T z1p_isr?`&iO3(Mhgn0=yZnzOqrIe;5ijz3PtRklX&iRg@L^_z$gacim7$ERNQ$ZMb z7lb6x^l{|mOL$005EMz?YmUgk9UXD$3PtNctV|@*Vp|k2PWO7d#h8rkB8cxZda+YD)bo&XJRok9QskLx9%uzE1+D}KfB5vDj;0z^FIOw!#c;`M%LaSYl+EGs7v&@zpyu0$Mo5+ z6^4rfY@RW_9i2z^Z<X*!r(;|#GWfU@~7MMBfE;l^%o^c zPxy%K>vVBUp9>1Rr{1ojtSRTqhk*iAyWiFhWl7VJANT8ncf;I35$H#sx2HPy%8_Z; z4LG4doqj@*fYJp1@B6M3fTaw8S^z$%KK1YRptc^i2B$FljjUdc&Afv?JD~uVaM>fv zXdr+v_B>#XY3pu#_vN5`RQVlQQ=k32!eHn8w}%U$c{=AmF=Z36vezBB+}7uwvqo0F z`YHR-ft7EbFN6hs?s|J1}biT3zY z_R8Z=6@Kw1{kG=|qdPB=9#;C)^M(Cy@WcJC{02Y#20#1;Kb%V?{}(^3-P88Yl_$Sh z_^2U>uh7?j+upMBi8F;=MNP&EmrfFQqt6>G@f$4h|4Wv5#jwQNmH`t|XiW)NR|fbI zhk!;Q3Um}tBnI*YBZUlq1rv!c0^n%md~m3qQF|$HL7IpNN*d~yNZ~dgJQ!gl07}Vm zJAPNDGIUaiHWFYKTyl!r+`vx)l29j}4dYyM!VeRIn_ZVh%rJt1xi4ap5I12?oInoX zF^G66W3Y3957k-vUTO$u4TgB=1N(|!-ZyZ5oqi@Lj_h(dIASiKY=PQ#M38{hP9yL* zE(w8~e8`g+3`+uS{m>c=jp|bhXzWRkafi795SB|K0I?*2EGAtgK@=*~PN0ch7rZ#Q z6_6KHpB^fXE?>%nN*rR652pAvat}-~MLADrK&^NXqG&(4*Qi~MBI32udg5+;9K&u1 z;a)AB5Btu^t06Zpb%h#83iZ5#Q1?M|G1aqdf&k0uz4vC-hJNMy_NMW!IGW3HMo=;^ z7nMlcFn>2gUwYz?ikpfaWnND2mR(wL!Qy(W$vtqbzDpskni6{|l|E&W1p666zvm2B z62%v)eHFMqh?J0Z77hdAf7%`x#{*CHq~-2|``|U`!mA*^bbj?5E#A5WuJORR!iKf^ zkPoQ{tESI9UEB=%8RBaW@QsEkl!X2T@)is#Dwz~zt>DlefbJm+G8!mT;D5*3U@ht= zzF`k;X)Uz?G|ZcyG#Xsr^QjB5_(#QIP2Op((O=Tw;r2E`hLvZ*oA>0fE)#c~I$3^4 zQ#Lex(H<-^<_Uf3$>NkH^ns5Qcda};HSnd&^oO?$4DS+0LF)#Rr<6*FSrNJ>xD1%7 zzcE^prk)UzfD$+6xCvrH6r;*@q1MpI@Q@*RNU)Cr;uA|FU-%M59Bh%om|==63@(`B zr97mB3L@knvsoV}bA*Z+vxOjR84qcHO$&>gNO8dSU2VO0&pKRq*sr$^_e_~Hj~aTE5tmd!8@XTA4Apr9zd6BQZb##+aw`y#)X zH=L`v0+9+nJX!2A9uh2VS0Kri*yfyURPXQhPZ(F~8cfyncI-h7UyVVa_cwP`3x<|G z9To>KH^y<3tS{u`$3Q?q9O`xNwzf`$6`#YMUZ<9Xv=U$)8S3UrVR$N$zVuPY`3j68 zSDsT)jKlY0?ou8hm zOz&yU*VGbJ(PQ1I>DhEH%g)O7JYKJ%<$v2FMTqmkt6dArv76!_w7MXK`GNHe5aOXI zgO!0m>m^`UUe~n%D*SaivVxS*%o>Ks@6LTItE+akuDV4%qE;L2AbpOBz6}W7diK0M ztRLD@q?g?YO*Ct_^`2+#!L?mIc@H`?YWnP1^n5^gdF34tL=RabQy|0zsBeTM7Ngqr zO&*J4&+)y4C<^CGMCa^{gI9O;4`V2OJr3O#Abi=Y|3e<4{-0QDZPt(cN0D#62}+Y` zWN4N5)>N0{>CWXL8P8)h$B)@V9WuUM{}(B)(;wP0fc_~`V#q4ogXR>aV32vPwj%qY z0|d1>4j|-wefIO#M*aOQ1BaF`<@S2LsTJ-F{888HC8AHrqw{-^1XF-SC^ z_c)K>8L^L5^|6+aqnVTe#G8P2p4+rz>s+;&v&(bM>|}es-;$7zDy|`wwrG3Qu8ed^ z)AAv>YaSfB%V%^K{e@rJLq+fzU~!MwQ;%IYl1d6bBr%Ml4|Kx?WDRK;7Q-+3GK!&t zz0UqmR8_b0rO&KP3rH^Jw=#5R_1n20z`o8_kpm&Mln+%Q4mA@ix!7{!^>gvln=xWKuW+a+Xxi&j(uZKI+p*u(*m*5ZCLdw*yC)p~Meo6J zyFpP#!Z5c#a0CaVj7;>NK^?pm6w#XOcA>Qvdi^tXF5-_ofOSTB7#6P#7O~-HI=~)$u=sV1XV-rl0s3 zqQ8&kuQWw(8i+km+F${lP(=`MdCa&_AQ)NR0`r#Fw`3h_WG8DX={B4!8TIou$Lq5) zj%0A#7Jct_i>^Jq*piL%ktTFkePy$--43Wee!DfTe|69r4ba0tsDw%Zl@4ykB$n6k z#cl$5J91U*Dc3xrUs6Qa(DEyV%~xIA6h;w~e^?mPFHPE1ANYZ_W@r}qHt!(o@7voW zn;rV(h9V8Uuw#8_5`BB4N6|K73IIUxKpK-nwE{?lejE=1kcTF=NU+tz$hyLIe>6uA zWVMFIhrX%sLQw&oT^Z?o>tl3$HcOVuH#D=Xe^|MUco=?&ac#6c-Z_14qC-YYEBoAm zf8Mq%5*C7k^&I5~@FInZ1+5Lhpi!8}FmM&R{7}xsPy}Uw!YE{^aW*6*e8wB`7>-B5 z@DkHM1AItgCBVZoydMW8i>4jZz7xk1%}*TdH34ozLuJfB4?FZmkiIBb(6%4~=A&WB oOT$Qz6lQ|^7#QZ1;pZU3;xTaFqyZ@ImkgRCe)i9)-y6>Fy*%@gXd` zZXO11)j9Xv+g0aObH4MP`}OZH`~0QL9(*;}+?$=ecu)3N)(ck zIwY8)LNUXYqS=O(zg#?*7Yj;Kj7fpQg(*XXAj+mGnWAb6vl5qt;J8rM)ut=(l2V1W z8@FwHV9S=t3(9t~^39Py_rq9;V1jGvlx&+f-o2QB%*Nj1ItF43i3K-XQjp8nln_#qk8 zT1`bsNWqg>z^c`HZML$o+q&WuAtj<1Wh^7l56+pyIxMt7B5@3HmSCi{P$<<{#}qTJ zQqys3!OhLo^c*ZuOe+g*lhF%qw%!0|?kU#i7b^8y+xlBwt+wSJr*{;$gEHT!82A+v zC6cXvYdC_ivt759iqmy#$2aOmT5oh#J3FjRdUvDZGX0OC0HXO*ZX)}5`^Xx@xw8pJ zmT-eOXDO1x8sy5>>ezunr2d7&x&A9_v+lFV6L*wR}}s62cnS)I3uex;fSU&oL0;dq^yz%r!FlkCR8eL zc6@x&KX)*u{0M;{FM{AhT2j;c8J zeTRZ8J^pn#FoC&{iNIx)2uBewON^wE5-CoSL^-RiHvV5PM-yYVZWg-d>2#yI=j?zf z_2kK5%pd$LAjRn&^||?Gy*9qJQmyJ!$JPA@w(6Q|Hns1&I~WYG-}QPh5s+;0y7jHA zCWCdk?8wc-5fQ@wZ9ZI^-L!I3>sPx%na_IPA8R!Sg2#LPu@lktUOp7!*1)yFI}`q? z!{KB|di)_9v}JbR;N%^o$1-EE69R(J5EDrO2OMZ+XOdL1lyRmCq(@=toJ|wN2!I6F zR-lwyf^^IiKny1}h1dMilhNS5qtij_=ydpLpMUH(;o#MTk{NnVfa72ID(Ii;CL};p z92-s%iD?2GLP99(G_tWQ$IduN4ubFufrl}dh$M=~e(4Xv$TDCp1J*KNoiBjpzyE7+ zO{8XG|LkP2*7uL(`=$s+m=?-br8u=1Q4F<&t1>|<7BSK!HI^_cG-sUzmuC`Ob`rdG z`0eQB$m9LNQ0v%y_-5YUc_JA0tNQaBcv(<}6Rc63v}=KN5A=pUV3~rD&w}@L!@QdRuM!H~Djb%|0M~t)nM%-}U+JALfSsL%t`ZQ&w}} zp$cjOPoRZNEnuxmlqf5j@E90im+wg_plg*P_Nz;|WxluEpe{G4=S_p^mt<~To{*nn ze5r^PAxzP%-Y=^#`_(?3?{9kUMdwTv29+zkh diff --git a/regression-test/suites/external_table_p2/hudi/test_hudi_partition_prune.groovy b/regression-test/suites/external_table_p2/hudi/test_hudi_partition_prune.groovy deleted file mode 100644 index 9947496ee46f24..00000000000000 --- a/regression-test/suites/external_table_p2/hudi/test_hudi_partition_prune.groovy +++ /dev/null @@ -1,275 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -suite("test_hudi_partition_prune", "p2,external,hudi,external_remote,external_remote_hudi") { - String enabled = context.config.otherConfigs.get("enableExternalHudiTest") - if (enabled == null || !enabled.equalsIgnoreCase("true")) { - logger.info("disable hudi test") - } - - String catalog_name = "test_hudi_partition_prune" - String props = context.config.otherConfigs.get("hudiEmrCatalog") - sql """drop catalog if exists ${catalog_name};""" - sql """ - create catalog if not exists ${catalog_name} properties ( - ${props} - ,"use_hive_sync_partition"="true" - ); - """ - - sql """ switch ${catalog_name};""" - sql """ use regression_hudi;""" - sql """ set enable_fallback_to_original_planner=false """ - - - - def one_partition_1_1 = """SELECT id,name,part1 FROM one_partition_tb WHERE part1 = 2024 ORDER BY id;""" - def one_partition_2_1 = """SELECT id,name,part1 FROM one_partition_tb WHERE part1 = 2025 ORDER BY id;""" - def one_partition_3_all = """SELECT id,name,part1 FROM one_partition_tb ORDER BY id;""" - def one_partition_4_all = """SELECT id,name,part1 FROM one_partition_tb WHERE id = 5 ORDER BY id;""" - def one_partition_5_1 = """SELECT id,name,part1 FROM one_partition_tb WHERE part1 = 2024 AND id >= 3 ORDER BY id;""" - - def two_partition_1_1 = """SELECT id,name,part1,part2 FROM two_partition_tb WHERE part1 = 'US' AND part2 = 1 ORDER BY id;""" - def two_partition_2_1 = """SELECT id,name,part1,part2 FROM two_partition_tb WHERE part1 = 'EU' AND part2 = 2 ORDER BY id;""" - def two_partition_3_2 = """SELECT id,name,part1,part2 FROM two_partition_tb WHERE part1 = 'US' ORDER BY id;""" - def two_partition_4_all = """SELECT id,name,part1,part2 FROM two_partition_tb ORDER BY id;""" - def two_partition_5_1 = """SELECT id,name,part1,part2 FROM two_partition_tb WHERE part1 = 'US' AND part2 = 2 AND id > 5 ORDER BY id;""" - def two_partition_6_1 = """SELECT id,name,part1,part2 FROM two_partition_tb WHERE part1 = 'EU' AND part2 = 2 ORDER BY id;""" - - def three_partition_1_1 = """SELECT id,name,part1,part2,part3 FROM three_partition_tb WHERE part1 = 'US' AND part2 = 2024 AND part3 = 'Q1' ORDER BY id;""" - def three_partition_2_1 = """SELECT id,name,part1,part2,part3 FROM three_partition_tb WHERE part1 = 'EU' AND part2 = 2025 AND part3 = 'Q2' ORDER BY id;""" - def three_partition_3_3 = """SELECT id,name,part1,part2,part3 FROM three_partition_tb WHERE part1 = 'AS' AND part2 = 2025 ORDER BY id;""" - def three_partition_4_2 = """SELECT id,name,part1,part2,part3 FROM three_partition_tb WHERE part1 = 'US' AND part3 = 'Q1' ORDER BY id;""" - def three_partition_5_all = """SELECT id,name,part1,part2,part3 FROM three_partition_tb ORDER BY id;""" - def three_partition_6_1 = """SELECT id,name,part1,part2,part3 FROM three_partition_tb WHERE part1 = 'EU' AND part2 = 2024 AND part3 = 'Q1' ORDER BY id;""" - def three_partition_7_7 = """SELECT id,name,part1,part2,part3 FROM three_partition_tb WHERE part2 = 2025 ORDER BY id;""" - def three_partition_8_2 = """SELECT id,name,part1,part2,part3 FROM three_partition_tb WHERE part1 = 'US' AND part3 = 'Q2' AND id BETWEEN 6 AND 10 ORDER BY id;""" - - - - qt_one_partition_1_1 one_partition_1_1 - explain { - sql("${one_partition_1_1}") - contains "partition=1/2" - } - - qt_one_partition_2_1 one_partition_2_1 - explain { - sql("${one_partition_2_1}") - contains "partition=1/2" - } - - qt_one_partition_3_all one_partition_3_all - explain { - sql("${one_partition_3_all}") - contains "partition=2/2" - } - - qt_one_partition_4_all one_partition_4_all - explain { - sql("${one_partition_4_all}") - contains "partition=2/2" - } - - qt_one_partition_5_1 one_partition_5_1 - explain { - sql("${one_partition_5_1}") - contains "partition=1/2" - } - - - qt_two_partition_1_1 two_partition_1_1 - explain { - sql("${two_partition_1_1}") - contains "partition=1/4" - } - - qt_two_partition_2_1 two_partition_2_1 - explain { - sql("${two_partition_2_1}") - contains "partition=1/4" - } - - qt_two_partition_3_2 two_partition_3_2 - explain { - sql("${two_partition_3_2}") - contains "partition=2/4" - } - - qt_two_partition_4_all two_partition_4_all - explain { - sql("${two_partition_4_all}") - contains "partition=4/4" - } - - qt_two_partition_5_1 two_partition_5_1 - explain { - sql("${two_partition_5_1}") - contains "partition=1/4" - } - - qt_two_partition_6_1 two_partition_6_1 - explain { - sql("${two_partition_6_1}") - contains "partition=1/4" - } - - - - qt_three_partition_1_1 three_partition_1_1 - explain { - sql("${three_partition_1_1}") - contains "partition=1/10" - } - - qt_three_partition_2_1 three_partition_2_1 - explain { - sql("${three_partition_2_1}") - contains "partition=1/10" - } - - qt_three_partition_3_3 three_partition_3_3 - explain { - sql("${three_partition_3_3}") - contains "partition=3/10" - } - - qt_three_partition_4_2 three_partition_4_2 - explain { - sql("${three_partition_4_2}") - contains "partition=2/10" - } - - qt_three_partition_5_all three_partition_5_all - explain { - sql("${three_partition_5_all}") - contains "partition=10/10" - } - - qt_three_partition_6_1 three_partition_6_1 - explain { - sql("${three_partition_6_1}") - contains "partition=1/10" - } - - qt_three_partition_7_7 three_partition_7_7 - explain { - sql("${three_partition_7_7}") - contains "partition=7/10" - } - - qt_three_partition_8_2 three_partition_8_2 - explain { - sql("${three_partition_8_2}") - contains "partition=2/10" - } - - - // 0 partitions - def one_partition_6_0 = """SELECT id,name,part1 FROM one_partition_tb WHERE part1 = 2023 ORDER BY id;""" - qt_one_partition_6_0 one_partition_6_0 - explain { - sql("${one_partition_6_0}") - contains "partition=0/2" - } - - def two_partition_7_0 = """SELECT id,name,part1 FROM two_partition_tb WHERE part1 = 'CN' AND part2 = 1 ORDER BY id;""" - qt_two_partition_7_0 two_partition_7_0 - explain { - sql("${two_partition_7_0}") - contains "partition=0/4" - } - - def two_partition_8_0 = """SELECT id,name,part1 FROM two_partition_tb WHERE part1 = 'US' AND part2 = 3 ORDER BY id;""" - qt_two_partition_8_0 two_partition_8_0 - explain { - sql("${two_partition_8_0}") - contains "partition=0/4" - } - - def three_partition_9_0 = """SELECT id,name,part1 FROM three_partition_tb WHERE part1 = 'US' AND part2 = 2023 AND part3 = 'Q1' ORDER BY id;""" - qt_three_partition_9_0 three_partition_9_0 - explain { - sql("${three_partition_9_0}") - contains "partition=0/10" - } - - def three_partition_10_0 = """SELECT id,name,part1 FROM three_partition_tb WHERE part1 = 'EU' AND part2 = 2024 AND part3 = 'Q4' ORDER BY id;""" - qt_three_partition_10_0 three_partition_10_0 - explain { - sql("${three_partition_10_0}") - contains "partition=0/10" - } - - def three_partition_11_0 = """SELECT id,name,part1 FROM three_partition_tb WHERE part1 = 'AS' AND part2 = 2025 AND part3 = 'Q4' ORDER BY id;""" - qt_three_partition_11_0 three_partition_11_0 - explain { - sql("${three_partition_11_0}") - contains "partition=0/10" - } - - - //time travel - def time_travel_two_partition_1_3 = "select id,name,part1,part2 from two_partition_tb FOR TIME AS OF '20241126012025218' order by id;" - def time_travel_two_partition_2_2 = "select id,name,part1,part2 from two_partition_tb FOR TIME AS OF '20241126012025218' where part1='US' order by id;" - def time_travel_two_partition_3_1 = "select id,name,part1,part2 from two_partition_tb FOR TIME AS OF '20241126012025218' where part2=2 order by id;" - def time_travel_two_partition_4_0 = "select id,name,part1,part2 from two_partition_tb FOR TIME AS OF '20241126012025218' where part2=10 order by id;" - - qt_time_travel_two_partition_1_3 time_travel_two_partition_1_3 - explain { - sql("${time_travel_two_partition_1_3}") - contains "partition=3/3" - } - - - qt_time_travel_two_partition_2_2 time_travel_two_partition_2_2 - explain { - sql("${time_travel_two_partition_2_2}") - contains "partition=2/3" - } - - qt_time_travel_two_partition_3_1 time_travel_two_partition_3_1 - explain { - sql("${time_travel_two_partition_3_1}") - contains "partition=1/3" - } - - qt_time_travel_two_partition_4_0 time_travel_two_partition_4_0 - explain { - sql("${time_travel_two_partition_4_0}") - contains "partition=0/3" - } - - - - - def time_travel_two_partition_5_0 = "select id,name,part1,part2 from two_partition_tb FOR TIME AS OF '20231126012025218' order by id;" - qt_time_travel_two_partition_5_0 time_travel_two_partition_5_0 - explain { - sql("${time_travel_two_partition_5_0}") - contains "partition=0/0" - } - - def time_travel_two_partition_6_1 = "select id,name,part1,part2 from two_partition_tb FOR TIME AS OF '20241126012011733' order by id;" - qt_time_travel_two_partition_6_1 time_travel_two_partition_6_1 - explain { - sql("${time_travel_two_partition_6_1}") - contains "partition=1/1" - } - - sql """drop catalog if exists ${catalog_name};""" -} \ No newline at end of file diff --git a/regression-test/suites/external_table_p2/hudi/test_hudi_snapshot.groovy b/regression-test/suites/external_table_p2/hudi/test_hudi_snapshot.groovy index 274b7efdbbb5ab..53c09e6d5a9031 100644 --- a/regression-test/suites/external_table_p2/hudi/test_hudi_snapshot.groovy +++ b/regression-test/suites/external_table_p2/hudi/test_hudi_snapshot.groovy @@ -52,7 +52,7 @@ suite("test_hudi_snapshot", "p2,external,hudi,external_remote,external_remote_hu qt_q05 """SELECT age, COUNT(*) AS user_count FROM ${table_name} GROUP BY age ORDER BY user_count DESC LIMIT 5;""" // Query users with purchase records and limit output - qt_q06 """SELECT user_id, purchases FROM ${table_name} WHERE array_size(purchases) > 0 ORDER BY user_id,event_time LIMIT 5;""" + qt_q06 """SELECT user_id, purchases FROM ${table_name} WHERE array_size(purchases) > 0 ORDER BY user_id LIMIT 5;""" // Query users with a specific tag and limit output qt_q07 """SELECT * FROM ${table_name} WHERE array_contains(tags, 'others') ORDER BY event_time LIMIT 5;""" @@ -64,7 +64,7 @@ suite("test_hudi_snapshot", "p2,external,hudi,external_remote,external_remote_hu qt_q09 """SELECT * FROM ${table_name} WHERE struct_element(struct_element(address, 'coordinates'), 'latitude') BETWEEN 0 AND 100 AND struct_element(struct_element(address, 'coordinates'), 'longitude') BETWEEN 0 AND 100 ORDER BY event_time LIMIT 5;""" // Query records with ratings above a specific value and limit output - qt_q10 """SELECT * FROM ${table_name} WHERE rating > 4.5 ORDER BY rating DESC,event_time LIMIT 5;""" + qt_q10 """SELECT * FROM ${table_name} WHERE rating > 4.5 ORDER BY rating DESC LIMIT 5;""" // Query all users' signup dates and limit output qt_q11 """SELECT user_id, signup_date FROM ${table_name} ORDER BY signup_date DESC LIMIT 10;""" @@ -73,13 +73,13 @@ suite("test_hudi_snapshot", "p2,external,hudi,external_remote,external_remote_hu qt_q12 """SELECT * FROM ${table_name} WHERE struct_element(address, 'postal_code') = '80312' ORDER BY event_time LIMIT 5;""" // Query users with profile pictures and limit output - qt_q13 """SELECT user_id, profile_picture FROM ${table_name} WHERE profile_picture IS NOT NULL ORDER BY user_id,event_time LIMIT 5;""" + qt_q13 """SELECT user_id, profile_picture FROM ${table_name} WHERE profile_picture IS NOT NULL ORDER BY user_id LIMIT 5;""" // Query users by signup date and limit output - qt_q14 """SELECT * FROM ${table_name} WHERE signup_date = '2024-01-15' ORDER BY user_id,event_time LIMIT 5;""" + qt_q14 """SELECT * FROM ${table_name} WHERE signup_date = '2024-01-15' ORDER BY user_id LIMIT 5;""" // Query the total count of purchases for each user and limit output - qt_q15 """SELECT user_id, array_size(purchases) AS purchase_count FROM ${table_name} ORDER BY purchase_count,event_time DESC LIMIT 5;""" + qt_q15 """SELECT user_id, array_size(purchases) AS purchase_count FROM ${table_name} ORDER BY purchase_count DESC LIMIT 5;""" } test_hudi_snapshot_querys("user_activity_log_cow_non_partition") From 5590d6fceb3722b1332286f9aef4efcf340d8bc6 Mon Sep 17 00:00:00 2001 From: daidai Date: Wed, 27 Nov 2024 11:48:55 +0800 Subject: [PATCH 3/5] fix revert2 --- .../java/org/apache/doris/datasource/hive/HMSExternalTable.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java index 7defec7485c5c2..2115f47d777b80 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java @@ -302,7 +302,7 @@ public List getPartitionColumns(Optional snapshot) { @Override public boolean supportInternalPartitionPruned() { - return getDlaType() == DLAType.HIVE || getDlaType() == DLAType.HUDI; + return getDlaType() == DLAType.HIVE; } @Override From 7b7c30061ea36c3e608245b3e58288139ad31b97 Mon Sep 17 00:00:00 2001 From: morningman Date: Fri, 29 Nov 2024 22:20:40 +0800 Subject: [PATCH 4/5] rebase MvccSnaphost --- .../datasource/maxcompute/MaxComputeExternalTable.java | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/MaxComputeExternalTable.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/MaxComputeExternalTable.java index fdd9f977c9119c..8db54fc1ce546b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/MaxComputeExternalTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/MaxComputeExternalTable.java @@ -29,6 +29,7 @@ import org.apache.doris.datasource.ExternalTable; import org.apache.doris.datasource.SchemaCacheValue; import org.apache.doris.datasource.TablePartitionValues; +import org.apache.doris.datasource.mvcc.MvccSnapshot; import org.apache.doris.thrift.TMCTable; import org.apache.doris.thrift.TTableDescriptor; import org.apache.doris.thrift.TTableType; @@ -53,7 +54,6 @@ import java.util.Map; import java.util.Map.Entry; import java.util.Optional; -import java.util.OptionalLong; import java.util.stream.Collectors; /** @@ -74,12 +74,13 @@ protected synchronized void makeSureInitialized() { } } + @Override public boolean supportInternalPartitionPruned() { return true; } - - public List getPartitionColumns(OptionalLong snapshotId) { + @Override + public List getPartitionColumns(Optional snapshot) { return getPartitionColumns(); } @@ -90,7 +91,8 @@ public List getPartitionColumns() { .orElse(Collections.emptyList()); } - public Map getNameToPartitionItems(OptionalLong snapshotId) { + @Override + public Map getNameToPartitionItems(Optional snapshot) { if (getPartitionColumns().isEmpty()) { return Collections.emptyMap(); } From 2b3e29a54baaebf3ab4ed98cc1ab22626931d156 Mon Sep 17 00:00:00 2001 From: morningman Date: Fri, 29 Nov 2024 22:31:21 +0800 Subject: [PATCH 5/5] fix --- .../java/org/apache/doris/datasource/ExternalTable.java | 2 +- .../apache/doris/datasource/hive/HMSExternalTable.java | 2 +- .../datasource/maxcompute/MaxComputeExternalTable.java | 5 ++--- .../datasource/maxcompute/source/MaxComputeScanNode.java | 8 ++++++-- .../java/org/apache/doris/planner/SingleNodePlanner.java | 1 - 5 files changed, 10 insertions(+), 8 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalTable.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalTable.java index aebe980afb9f37..6c72d0a67d4732 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalTable.java @@ -394,7 +394,7 @@ public SelectedPartitions initSelectedPartitions(Optional snapshot * @param snapshot if not support mvcc, ignore this * @return partitionName ==> PartitionItem */ - public Map getNameToPartitionItems(Optional snapshot) { + protected Map getNameToPartitionItems(Optional snapshot) { return Collections.emptyMap(); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java index 2115f47d777b80..134ad362fa1eed 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java @@ -306,7 +306,7 @@ public boolean supportInternalPartitionPruned() { } @Override - public Map getNameToPartitionItems(Optional snapshot) { + protected Map getNameToPartitionItems(Optional snapshot) { return getNameToPartitionItems(); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/MaxComputeExternalTable.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/MaxComputeExternalTable.java index 8db54fc1ce546b..0f748f59e927bc 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/MaxComputeExternalTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/MaxComputeExternalTable.java @@ -92,13 +92,12 @@ public List getPartitionColumns() { } @Override - public Map getNameToPartitionItems(Optional snapshot) { + protected Map getNameToPartitionItems(Optional snapshot) { if (getPartitionColumns().isEmpty()) { return Collections.emptyMap(); } TablePartitionValues tablePartitionValues = getPartitionValues(); - Map idToPartitionItem = tablePartitionValues.getIdToPartitionItem(); Map idToNameMap = tablePartitionValues.getPartitionIdToNameMap(); @@ -109,7 +108,7 @@ public Map getNameToPartitionItems(Optional return nameToPartitionItem; } - public TablePartitionValues getPartitionValues() { + private TablePartitionValues getPartitionValues() { makeSureInitialized(); Optional schemaCacheValue = getSchemaCacheValue(); if (!schemaCacheValue.isPresent()) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/source/MaxComputeScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/source/MaxComputeScanNode.java index d3f02e4207b80a..e177e9d8b7c88c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/source/MaxComputeScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/source/MaxComputeScanNode.java @@ -92,19 +92,21 @@ public class MaxComputeScanNode extends FileQueryScanNode { @Setter private SelectedPartitions selectedPartitions = null; + // For new planner public MaxComputeScanNode(PlanNodeId id, TupleDescriptor desc, SelectedPartitions selectedPartitions, boolean needCheckColumnPriv) { this(id, desc, "MCScanNode", StatisticalType.MAX_COMPUTE_SCAN_NODE, selectedPartitions, needCheckColumnPriv); } + // For old planner public MaxComputeScanNode(PlanNodeId id, TupleDescriptor desc, boolean needCheckColumnPriv) { this(id, desc, "MCScanNode", StatisticalType.MAX_COMPUTE_SCAN_NODE, SelectedPartitions.NOT_PRUNED, needCheckColumnPriv); } - public MaxComputeScanNode(PlanNodeId id, TupleDescriptor desc, String planNodeName, - StatisticalType statisticalType, SelectedPartitions selectedPartitions, + private MaxComputeScanNode(PlanNodeId id, TupleDescriptor desc, String planNodeName, + StatisticalType statisticalType, SelectedPartitions selectedPartitions, boolean needCheckColumnPriv) { super(id, desc, planNodeName, statisticalType, needCheckColumnPriv); table = (MaxComputeExternalTable) desc.getTable(); @@ -132,6 +134,8 @@ private void setScanParams(TFileRangeDesc rangeDesc, MaxComputeSplit maxComputeS rangeDesc.setSize(maxComputeSplit.getLength()); } + // Return false if no need to read any partition data. + // Return true if need to read partition data. boolean createTableBatchReadSession() throws UserException { List requiredPartitionColumns = new ArrayList<>(); List orderedRequiredDataColumns = new ArrayList<>(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java b/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java index a82ab4248ec600..4091640066c1d7 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java @@ -1992,7 +1992,6 @@ private PlanNode createScanNode(Analyzer analyzer, TableRef tblRef, SelectStmt s scanNode = new TrinoConnectorScanNode(ctx.getNextNodeId(), tblRef.getDesc(), true); break; case MAX_COMPUTE_EXTERNAL_TABLE: - // TODO: support max compute scan node scanNode = new MaxComputeScanNode(ctx.getNextNodeId(), tblRef.getDesc(), true); break; case ES_EXTERNAL_TABLE: