From bb0dcf21775608d6237200cd890c0338771bea1e Mon Sep 17 00:00:00 2001 From: JingsongLi Date: Mon, 3 Nov 2025 23:15:06 +0800 Subject: [PATCH 1/2] [core][format] Format Table plan partitions should ignore hidden & illegal dirs --- .../paimon/table/format/FormatTableScan.java | 13 ++- .../paimon/utils/PartitionPathUtils.java | 25 ++++-- .../table/format/FormatTableScanTest.java | 79 ++++++++++++++++++- 3 files changed, 102 insertions(+), 15 deletions(-) diff --git a/paimon-core/src/main/java/org/apache/paimon/table/format/FormatTableScan.java b/paimon-core/src/main/java/org/apache/paimon/table/format/FormatTableScan.java index f9a3f0067c9a..ac4c29d134a5 100644 --- a/paimon-core/src/main/java/org/apache/paimon/table/format/FormatTableScan.java +++ b/paimon-core/src/main/java/org/apache/paimon/table/format/FormatTableScan.java @@ -151,6 +151,7 @@ public List splits() { } private List, Path>> findPartitions() { + boolean onlyValueInPath = coreOptions.formatTablePartitionOnlyValueInPath(); if (partitionFilter instanceof MultiplePartitionPredicate) { // generate partitions directly Set partitions = ((MultiplePartitionPredicate) partitionFilter).partitions(); @@ -160,7 +161,7 @@ private List, Path>> findPartitions() { table.defaultPartName(), new Path(table.location()), partitions, - coreOptions.formatTablePartitionOnlyValueInPath()); + onlyValueInPath); } else { // search paths Pair scanPathAndLevel = @@ -169,15 +170,13 @@ private List, Path>> findPartitions() { table.partitionKeys(), partitionFilter, table.partitionType(), - coreOptions.formatTablePartitionOnlyValueInPath()); - Path scanPath = scanPathAndLevel.getLeft(); - int level = scanPathAndLevel.getRight(); + onlyValueInPath); return searchPartSpecAndPaths( table.fileIO(), - scanPath, - level, + scanPathAndLevel.getLeft(), + scanPathAndLevel.getRight(), table.partitionKeys(), - coreOptions.formatTablePartitionOnlyValueInPath()); + onlyValueInPath); } } diff --git a/paimon-core/src/main/java/org/apache/paimon/utils/PartitionPathUtils.java b/paimon-core/src/main/java/org/apache/paimon/utils/PartitionPathUtils.java index 1aade5fbda88..fa4a20762b49 100644 --- a/paimon-core/src/main/java/org/apache/paimon/utils/PartitionPathUtils.java +++ b/paimon-core/src/main/java/org/apache/paimon/utils/PartitionPathUtils.java @@ -24,8 +24,6 @@ import org.apache.paimon.types.DataField; import org.apache.paimon.types.RowType; -import javax.annotation.Nullable; - import java.io.IOException; import java.util.ArrayList; import java.util.BitSet; @@ -272,8 +270,8 @@ public static List, Path>> searchPartSpecAndP FileIO fileIO, Path path, int partitionNumber, - @Nullable List partitionKeys, - boolean enablePartitionOnlyValueInPath) { + List partitionKeys, + boolean onlyValueInPath) { FileStatus[] generatedParts = getFileStatusRecurse(path, partitionNumber, fileIO); List, Path>> ret = new ArrayList<>(); for (FileStatus part : generatedParts) { @@ -281,14 +279,19 @@ public static List, Path>> searchPartSpecAndP if (isHiddenFile(part)) { continue; } - if (enablePartitionOnlyValueInPath && partitionKeys != null) { + if (onlyValueInPath) { ret.add( Pair.of( extractPartitionSpecFromPathOnlyValue( part.getPath(), partitionKeys), part.getPath())); } else { - ret.add(Pair.of(extractPartitionSpecFromPath(part.getPath()), part.getPath())); + LinkedHashMap spec = extractPartitionSpecFromPath(part.getPath()); + if (spec.isEmpty() && !partitionKeys.isEmpty()) { + // illegal path, for example: /path/to/table/tmp/unknown, path without "=" + continue; + } + ret.add(Pair.of(spec, part.getPath())); } } return ret; @@ -314,6 +317,10 @@ private static void listStatusRecursively( int expectLevel, List results) throws IOException { + if (isHiddenFile(fileStatus.getPath())) { + return; + } + if (expectLevel == level) { results.add(fileStatus); return; @@ -327,7 +334,11 @@ private static void listStatusRecursively( } private static boolean isHiddenFile(FileStatus fileStatus) { - String name = fileStatus.getPath().getName(); + return isHiddenFile(fileStatus.getPath()); + } + + private static boolean isHiddenFile(Path path) { + String name = path.getName(); return name.startsWith("_") || name.startsWith("."); } } diff --git a/paimon-core/src/test/java/org/apache/paimon/table/format/FormatTableScanTest.java b/paimon-core/src/test/java/org/apache/paimon/table/format/FormatTableScanTest.java index 5bf13d92093c..ac49164ea85b 100644 --- a/paimon-core/src/test/java/org/apache/paimon/table/format/FormatTableScanTest.java +++ b/paimon-core/src/test/java/org/apache/paimon/table/format/FormatTableScanTest.java @@ -161,7 +161,7 @@ void testComputeScanPathWithoutFilter() throws IOException { partitionType, enablePartitionValueOnly); - // Should optimize to specific partition path for first key + // Should not be optimized because of greater than assertThat(result.getLeft()).isEqualTo(tableLocation); assertThat(result.getRight()).isEqualTo(2); @@ -202,6 +202,7 @@ void testGetScanPathAndLevelWithEqualityFilter() throws IOException { partitionType, enablePartitionValueOnly); String partitionPath = enablePartitionValueOnly ? "2023/12" : "year=2023/month=12"; + // Should optimize to specific partition path assertThat(result.getLeft().toString()).isEqualTo(tableLocation + partitionPath); assertThat(result.getRight()).isEqualTo(0); @@ -265,6 +266,82 @@ void testComputeScanPathWithFirstLevel() throws IOException { assertThat(searched.size()).isEqualTo(1); } + @TestTemplate + void testNoOptimizationWithSecondEquality() throws IOException { + Path tableLocation = new Path(tmpPath.toUri()); + // Create equality predicate for only the second partition key + PredicateBuilder builder = new PredicateBuilder(partitionType); + Predicate predicate = + PredicateBuilder.and(builder.greaterOrEqual(0, 2023), builder.equal(1, 12)); + PartitionPredicate partitionFilter = + PartitionPredicate.fromPredicate(partitionType, predicate); + + Pair result = + FormatTableScan.computeScanPathAndLevel( + tableLocation, + partitionKeys, + partitionFilter, + partitionType, + enablePartitionValueOnly); + + // Should not optimize with second equality filter + assertThat(result.getLeft()).isEqualTo(tableLocation); + assertThat(result.getRight()).isEqualTo(2); + + // test searchPartSpecAndPaths + LocalFileIO fileIO = LocalFileIO.create(); + String partitionPath = enablePartitionValueOnly ? "2023/12" : "year=2023/month=12"; + fileIO.mkdirs(new Path(tableLocation, partitionPath)); + List, Path>> searched = + searchPartSpecAndPaths( + fileIO, + result.getLeft(), + result.getRight(), + partitionKeys, + enablePartitionValueOnly); + LinkedHashMap expectPartitionSpec = + new LinkedHashMap<>(partitionKeys.size()); + expectPartitionSpec.put("year", "2023"); + expectPartitionSpec.put("month", "12"); + assertThat(searched.get(0).getLeft()).isEqualTo(expectPartitionSpec); + assertThat(searched.size()).isEqualTo(1); + } + + @TestTemplate + void testSkipIllegalPath() throws IOException { + Path tableLocation = new Path(tmpPath.toUri()); + PartitionPredicate partitionFilter = PartitionPredicate.fromPredicate(partitionType, null); + Pair result = + FormatTableScan.computeScanPathAndLevel( + tableLocation, + partitionKeys, + partitionFilter, + partitionType, + enablePartitionValueOnly); + + LocalFileIO fileIO = LocalFileIO.create(); + String illegalPath = + enablePartitionValueOnly + ? "_unknown-year/unknown-month" + : "unknown-year/unknown-month"; + fileIO.mkdirs(new Path(tableLocation, illegalPath)); + String partitionPath = enablePartitionValueOnly ? "2023/12" : "year=2023/month=12"; + fileIO.mkdirs(new Path(tableLocation, partitionPath)); + List, Path>> searched = + searchPartSpecAndPaths( + fileIO, + result.getLeft(), + result.getRight(), + partitionKeys, + enablePartitionValueOnly); + LinkedHashMap expectPartitionSpec = + new LinkedHashMap<>(partitionKeys.size()); + expectPartitionSpec.put("year", "2023"); + expectPartitionSpec.put("month", "12"); + assertThat(searched.get(0).getLeft()).isEqualTo(expectPartitionSpec); + assertThat(searched.size()).isEqualTo(1); + } + @TestTemplate void testComputeScanPathAndLevel() { Path tableLocation = new Path(tmpPath.toUri()); From 78cc9c12b44a117c7a0fd7eeb18d64625614ccd9 Mon Sep 17 00:00:00 2001 From: JingsongLi Date: Tue, 4 Nov 2025 06:34:00 +0800 Subject: [PATCH 2/2] fix --- .../main/java/org/apache/paimon/utils/PartitionPathUtils.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paimon-core/src/main/java/org/apache/paimon/utils/PartitionPathUtils.java b/paimon-core/src/main/java/org/apache/paimon/utils/PartitionPathUtils.java index fa4a20762b49..880e4dfe67b4 100644 --- a/paimon-core/src/main/java/org/apache/paimon/utils/PartitionPathUtils.java +++ b/paimon-core/src/main/java/org/apache/paimon/utils/PartitionPathUtils.java @@ -287,7 +287,7 @@ public static List, Path>> searchPartSpecAndP part.getPath())); } else { LinkedHashMap spec = extractPartitionSpecFromPath(part.getPath()); - if (spec.isEmpty() && !partitionKeys.isEmpty()) { + if (spec.size() != partitionKeys.size()) { // illegal path, for example: /path/to/table/tmp/unknown, path without "=" continue; }