From 30b74f05315fa270e6f92e25f00c1c982daaae48 Mon Sep 17 00:00:00 2001 From: zouxxyy Date: Mon, 6 Jan 2025 23:26:11 +0800 Subject: [PATCH] v1 --- docs/content/concepts/system-tables.md | 24 +++++------ .../apache/paimon/casting/CastExecutors.java | 11 +++++ .../paimon/table/system/BucketsTable.java | 41 +++++++----------- .../paimon/table/system/FilesTable.java | 22 +++++----- .../paimon/table/system/PartitionsTable.java | 39 +++++++---------- .../table/system/TableIndexesTable.java | 18 ++++---- .../paimon/table/system/BucketsTableTest.java | 4 +- .../paimon/table/system/FilesTableTest.java | 13 +++--- .../table/system/PartitionsTableTest.java | 16 +++---- .../apache/paimon/flink/BranchSqlITCase.java | 6 +-- .../paimon/flink/CatalogTableITCase.java | 42 +++++++++---------- .../paimon/flink/SystemTableITCase.java | 4 +- .../apache/paimon/spark/SparkWriteITCase.java | 2 +- .../spark/sql/PaimonSystemTableTest.scala | 24 +++++++++-- 14 files changed, 139 insertions(+), 127 deletions(-) diff --git a/docs/content/concepts/system-tables.md b/docs/content/concepts/system-tables.md index 92119874e266..c6fc76807133 100644 --- a/docs/content/concepts/system-tables.md +++ b/docs/content/concepts/system-tables.md @@ -190,12 +190,12 @@ SELECT * FROM my_table$files; +-----------+--------+--------------------------------+-------------+-----------+-------+--------------+--------------------+---------+---------+------------------------+-------------------------+-------------------------+---------------------+---------------------+-----------------------+ | partition | bucket | file_path | file_format | schema_id | level | record_count | file_size_in_bytes | min_key | max_key | null_value_counts | min_value_stats | max_value_stats | min_sequence_number | max_sequence_number | creation_time | +-----------+--------+--------------------------------+-------------+-----------+-------+--------------+--------------------+---------+---------+------------------------+-------------------------+-------------------------+---------------------+---------------------+-----------------------+ -| [3] | 0 | data-8f64af95-29cc-4342-adc... | orc | 0 | 0 | 1 | 593 | [c] | [c] | {cnt=0, val=0, word=0} | {cnt=3, val=33, word=c} | {cnt=3, val=33, word=c} | 1691551246234 | 1691551246637 |2023-02-24T16:06:21.166| -| [2] | 0 | data-8b369068-0d37-4011-aa5... | orc | 0 | 0 | 1 | 593 | [b] | [b] | {cnt=0, val=0, word=0} | {cnt=2, val=22, word=b} | {cnt=2, val=22, word=b} | 1691551246233 | 1691551246732 |2023-02-24T16:06:21.166| -| [2] | 0 | data-83aa7973-060b-40b6-8c8... | orc | 0 | 0 | 1 | 605 | [d] | [d] | {cnt=0, val=0, word=0} | {cnt=2, val=32, word=d} | {cnt=2, val=32, word=d} | 1691551246267 | 1691551246798 |2023-02-24T16:06:21.166| -| [5] | 0 | data-3d304f4a-bcea-44dc-a13... | orc | 0 | 0 | 1 | 593 | [c] | [c] | {cnt=0, val=0, word=0} | {cnt=5, val=51, word=c} | {cnt=5, val=51, word=c} | 1691551246788 | 1691551246152 |2023-02-24T16:06:21.166| -| [1] | 0 | data-10abb5bc-0170-43ae-b6a... | orc | 0 | 0 | 1 | 595 | [a] | [a] | {cnt=0, val=0, word=0} | {cnt=1, val=11, word=a} | {cnt=1, val=11, word=a} | 1691551246722 | 1691551246273 |2023-02-24T16:06:21.166| -| [4] | 0 | data-2c9b7095-65b7-4013-a7a... | orc | 0 | 0 | 1 | 593 | [a] | [a] | {cnt=0, val=0, word=0} | {cnt=4, val=12, word=a} | {cnt=4, val=12, word=a} | 1691551246321 | 1691551246109 |2023-02-24T16:06:21.166| +| {3} | 0 | data-8f64af95-29cc-4342-adc... | orc | 0 | 0 | 1 | 593 | [c] | [c] | {cnt=0, val=0, word=0} | {cnt=3, val=33, word=c} | {cnt=3, val=33, word=c} | 1691551246234 | 1691551246637 |2023-02-24T16:06:21.166| +| {2} | 0 | data-8b369068-0d37-4011-aa5... | orc | 0 | 0 | 1 | 593 | [b] | [b] | {cnt=0, val=0, word=0} | {cnt=2, val=22, word=b} | {cnt=2, val=22, word=b} | 1691551246233 | 1691551246732 |2023-02-24T16:06:21.166| +| {2} | 0 | data-83aa7973-060b-40b6-8c8... | orc | 0 | 0 | 1 | 605 | [d] | [d] | {cnt=0, val=0, word=0} | {cnt=2, val=32, word=d} | {cnt=2, val=32, word=d} | 1691551246267 | 1691551246798 |2023-02-24T16:06:21.166| +| {5} | 0 | data-3d304f4a-bcea-44dc-a13... | orc | 0 | 0 | 1 | 593 | [c] | [c] | {cnt=0, val=0, word=0} | {cnt=5, val=51, word=c} | {cnt=5, val=51, word=c} | 1691551246788 | 1691551246152 |2023-02-24T16:06:21.166| +| {1} | 0 | data-10abb5bc-0170-43ae-b6a... | orc | 0 | 0 | 1 | 595 | [a] | [a] | {cnt=0, val=0, word=0} | {cnt=1, val=11, word=a} | {cnt=1, val=11, word=a} | 1691551246722 | 1691551246273 |2023-02-24T16:06:21.166| +| {4} | 0 | data-2c9b7095-65b7-4013-a7a... | orc | 0 | 0 | 1 | 593 | [a] | [a] | {cnt=0, val=0, word=0} | {cnt=4, val=12, word=a} | {cnt=4, val=12, word=a} | 1691551246321 | 1691551246109 |2023-02-24T16:06:21.166| +-----------+--------+--------------------------------+-------------+-----------+-------+--------------+--------------------+---------+---------+------------------------+-------------------------+-------------------------+---------------------+---------------------+-----------------------+ 6 rows in set */ @@ -207,9 +207,9 @@ SELECT * FROM my_table$files /*+ OPTIONS('scan.snapshot-id'='1') */; +-----------+--------+--------------------------------+-------------+-----------+-------+--------------+--------------------+---------+---------+------------------------+-------------------------+-------------------------+---------------------+---------------------+-----------------------+ | partition | bucket | file_path | file_format | schema_id | level | record_count | file_size_in_bytes | min_key | max_key | null_value_counts | min_value_stats | max_value_stats | min_sequence_number | max_sequence_number | creation_time | +-----------+--------+--------------------------------+-------------+-----------+-------+--------------+--------------------+---------+---------+------------------------+-------------------------+-------------------------+---------------------+---------------------+-----------------------+ -| [3] | 0 | data-8f64af95-29cc-4342-adc... | orc | 0 | 0 | 1 | 593 | [c] | [c] | {cnt=0, val=0, word=0} | {cnt=3, val=33, word=c} | {cnt=3, val=33, word=c} | 1691551246234 | 1691551246637 |2023-02-24T16:06:21.166| -| [2] | 0 | data-8b369068-0d37-4011-aa5... | orc | 0 | 0 | 1 | 593 | [b] | [b] | {cnt=0, val=0, word=0} | {cnt=2, val=22, word=b} | {cnt=2, val=22, word=b} | 1691551246233 | 1691551246732 |2023-02-24T16:06:21.166| -| [1] | 0 | data-10abb5bc-0170-43ae-b6a... | orc | 0 | 0 | 1 | 595 | [a] | [a] | {cnt=0, val=0, word=0} | {cnt=1, val=11, word=a} | {cnt=1, val=11, word=a} | 1691551246267 | 1691551246798 |2023-02-24T16:06:21.166| +| {3} | 0 | data-8f64af95-29cc-4342-adc... | orc | 0 | 0 | 1 | 593 | [c] | [c] | {cnt=0, val=0, word=0} | {cnt=3, val=33, word=c} | {cnt=3, val=33, word=c} | 1691551246234 | 1691551246637 |2023-02-24T16:06:21.166| +| {2} | 0 | data-8b369068-0d37-4011-aa5... | orc | 0 | 0 | 1 | 593 | [b] | [b] | {cnt=0, val=0, word=0} | {cnt=2, val=22, word=b} | {cnt=2, val=22, word=b} | 1691551246233 | 1691551246732 |2023-02-24T16:06:21.166| +| {1} | 0 | data-10abb5bc-0170-43ae-b6a... | orc | 0 | 0 | 1 | 595 | [a] | [a] | {cnt=0, val=0, word=0} | {cnt=1, val=11, word=a} | {cnt=1, val=11, word=a} | 1691551246267 | 1691551246798 |2023-02-24T16:06:21.166| +-----------+--------+--------------------------------+-------------+-----------+-------+--------------+--------------------+---------+---------+------------------------+-------------------------+-------------------------+---------------------+---------------------+-----------------------+ 3 rows in set */ @@ -352,7 +352,7 @@ SELECT * FROM my_table$partitions; +---------------+----------------+--------------------+--------------------+------------------------+ | partition | record_count | file_size_in_bytes| file_count| last_update_time| +---------------+----------------+--------------------+--------------------+------------------------+ -| [1] | 1 | 645 | 1 | 2024-06-24 10:25:57.400| +| {1} | 1 | 645 | 1 | 2024-06-24 10:25:57.400| +---------------+----------------+--------------------+--------------------+------------------------+ */ ``` @@ -401,8 +401,8 @@ SELECT * FROM my_table$table_indexes; +--------------------------------+-------------+--------------------------------+--------------------------------+----------------------+----------------------+--------------------------------+ | partition | bucket | index_type | file_name | file_size | row_count | dv_ranges | +--------------------------------+-------------+--------------------------------+--------------------------------+----------------------+----------------------+--------------------------------+ -| [2024-10-01] | 0 | HASH | index-70abfebf-149e-4796-9f... | 12 | 3 | | -| [2024-10-01] | 0 | DELETION_VECTORS | index-633857e7-cdce-47d2-87... | 33 | 1 | [(data-346cb9c8-4032-4d66-a... | +| {2024-10-01} | 0 | HASH | index-70abfebf-149e-4796-9f... | 12 | 3 | | +| {2024-10-01} | 0 | DELETION_VECTORS | index-633857e7-cdce-47d2-87... | 33 | 1 | [(data-346cb9c8-4032-4d66-a... | +--------------------------------+-------------+--------------------------------+--------------------------------+----------------------+----------------------+--------------------------------+ 2 rows in set */ diff --git a/paimon-common/src/main/java/org/apache/paimon/casting/CastExecutors.java b/paimon-common/src/main/java/org/apache/paimon/casting/CastExecutors.java index 8b95f00210b8..5971f80c4d23 100644 --- a/paimon-common/src/main/java/org/apache/paimon/casting/CastExecutors.java +++ b/paimon-common/src/main/java/org/apache/paimon/casting/CastExecutors.java @@ -21,6 +21,7 @@ import org.apache.paimon.types.DataType; import org.apache.paimon.types.DataTypeFamily; import org.apache.paimon.types.DataTypeRoot; +import org.apache.paimon.types.DataTypes; import javax.annotation.Nullable; @@ -101,6 +102,16 @@ public class CastExecutors { return rule.create(inputType, outputType); } + /** Resolve a {@link CastExecutor} for the provided input type to StringType. */ + public static CastExecutor resolveToString(DataType inputType) { + CastExecutor castExecutor = resolve(inputType, DataTypes.STRING()); + if (castExecutor == null) { + throw new UnsupportedOperationException( + "Cast " + inputType + " to StringType is not supported."); + } + return castExecutor; + } + public static CastExecutor identityCastExecutor() { return IDENTITY_CAST_EXECUTOR; } diff --git a/paimon-core/src/main/java/org/apache/paimon/table/system/BucketsTable.java b/paimon-core/src/main/java/org/apache/paimon/table/system/BucketsTable.java index ccc260ef0b79..a81babf5aba9 100644 --- a/paimon-core/src/main/java/org/apache/paimon/table/system/BucketsTable.java +++ b/paimon-core/src/main/java/org/apache/paimon/table/system/BucketsTable.java @@ -18,6 +18,8 @@ package org.apache.paimon.table.system; +import org.apache.paimon.casting.CastExecutor; +import org.apache.paimon.casting.CastExecutors; import org.apache.paimon.data.BinaryString; import org.apache.paimon.data.GenericRow; import org.apache.paimon.data.InternalRow; @@ -40,9 +42,7 @@ import org.apache.paimon.types.DataTypes; import org.apache.paimon.types.RowType; import org.apache.paimon.utils.IteratorRecordReader; -import org.apache.paimon.utils.Pair; import org.apache.paimon.utils.ProjectedRow; -import org.apache.paimon.utils.RowDataToObjectArrayConverter; import org.apache.paimon.utils.SerializationUtils; import org.apache.paimon.shade.guava30.com.google.common.collect.Iterators; @@ -50,14 +50,12 @@ import java.time.Instant; import java.time.LocalDateTime; import java.time.ZoneId; -import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.Comparator; import java.util.Iterator; import java.util.List; import java.util.Map; -import java.util.stream.Collectors; import static org.apache.paimon.catalog.Catalog.SYSTEM_TABLE_SPLITTER; @@ -180,31 +178,21 @@ public RecordReader createReader(Split split) { List buckets = fileStoreTable.newSnapshotReader().bucketEntries(); - RowDataToObjectArrayConverter converter = - new RowDataToObjectArrayConverter( - fileStoreTable.schema().logicalPartitionType()); + @SuppressWarnings("unchecked") + CastExecutor partitionCastExecutor = + (CastExecutor) + CastExecutors.resolveToString( + fileStoreTable.schema().logicalPartitionType()); // sorted by partition and bucket - List> bucketList = + Iterator iterator = buckets.stream() - .map( - entry -> - Pair.of( - Arrays.toString( - converter.convert(entry.partition())), - entry)) + .map(bucketEntry -> toRow(bucketEntry, partitionCastExecutor)) .sorted( - Comparator.comparing( - (Pair p) -> p.getLeft()) - .thenComparing(p -> p.getRight().bucket())) - .collect(Collectors.toList()); - - List results = new ArrayList<>(buckets.size()); - for (Pair pair : bucketList) { - results.add(toRow(pair.getLeft(), pair.getRight())); - } + Comparator.comparing((InternalRow row) -> row.getString(0)) + .thenComparing(row -> row.getInt(1))) + .iterator(); - Iterator iterator = results.iterator(); if (readType != null) { iterator = Iterators.transform( @@ -216,9 +204,10 @@ public RecordReader createReader(Split split) { return new IteratorRecordReader<>(iterator); } - private GenericRow toRow(String partStr, BucketEntry entry) { + private InternalRow toRow( + BucketEntry entry, CastExecutor partitionCastExecutor) { return GenericRow.of( - BinaryString.fromString(partStr), + partitionCastExecutor.cast(entry.partition()), entry.bucket(), entry.recordCount(), entry.fileSizeInBytes(), diff --git a/paimon-core/src/main/java/org/apache/paimon/table/system/FilesTable.java b/paimon-core/src/main/java/org/apache/paimon/table/system/FilesTable.java index 5c7ccd4809c2..34673b6a0f91 100644 --- a/paimon-core/src/main/java/org/apache/paimon/table/system/FilesTable.java +++ b/paimon-core/src/main/java/org/apache/paimon/table/system/FilesTable.java @@ -18,6 +18,8 @@ package org.apache.paimon.table.system; +import org.apache.paimon.casting.CastExecutor; +import org.apache.paimon.casting.CastExecutors; import org.apache.paimon.data.BinaryRow; import org.apache.paimon.data.BinaryString; import org.apache.paimon.data.GenericRow; @@ -182,10 +184,10 @@ public Plan innerPlan() { SnapshotReader snapshotReader = fileStoreTable.newSnapshotReader(); if (partitionPredicate != null && partitionPredicate.function() instanceof Equal) { String partitionStr = partitionPredicate.literals().get(0).toString(); - if (partitionStr.startsWith("[")) { + if (partitionStr.startsWith("{")) { partitionStr = partitionStr.substring(1); } - if (partitionStr.endsWith("]")) { + if (partitionStr.endsWith("}")) { partitionStr = partitionStr.substring(0, partitionStr.length() - 1); } String[] partFields = partitionStr.split(", "); @@ -318,8 +320,11 @@ public RecordReader createReader(Split split) { new SimpleStatsEvolutions( sid -> schemaManager.schema(sid).fields(), storeTable.schema().id()); - RowDataToObjectArrayConverter partitionConverter = - new RowDataToObjectArrayConverter(storeTable.schema().logicalPartitionType()); + @SuppressWarnings("unchecked") + CastExecutor partitionCastExecutor = + (CastExecutor) + CastExecutors.resolveToString( + storeTable.schema().logicalPartitionType()); Function keyConverters = new Function() { @@ -349,7 +354,7 @@ public RowDataToObjectArrayConverter apply(Long schemaId) { file -> toRow( (DataSplit) dataSplit, - partitionConverter, + partitionCastExecutor, keyConverters, file, simpleStatsEvolutions))); @@ -368,7 +373,7 @@ public RowDataToObjectArrayConverter apply(Long schemaId) { private LazyGenericRow toRow( DataSplit dataSplit, - RowDataToObjectArrayConverter partitionConverter, + CastExecutor partitionCastExecutor, Function keyConverters, DataFileMeta file, SimpleStatsEvolutions simpleStatsEvolutions) { @@ -379,10 +384,7 @@ private LazyGenericRow toRow( () -> dataSplit.partition() == null ? null - : BinaryString.fromString( - Arrays.toString( - partitionConverter.convert( - dataSplit.partition()))), + : partitionCastExecutor.cast(dataSplit.partition()), dataSplit::bucket, () -> BinaryString.fromString( diff --git a/paimon-core/src/main/java/org/apache/paimon/table/system/PartitionsTable.java b/paimon-core/src/main/java/org/apache/paimon/table/system/PartitionsTable.java index 7e0b1f1d7568..7f43acc087da 100644 --- a/paimon-core/src/main/java/org/apache/paimon/table/system/PartitionsTable.java +++ b/paimon-core/src/main/java/org/apache/paimon/table/system/PartitionsTable.java @@ -18,6 +18,8 @@ package org.apache.paimon.table.system; +import org.apache.paimon.casting.CastExecutor; +import org.apache.paimon.casting.CastExecutors; import org.apache.paimon.data.BinaryString; import org.apache.paimon.data.GenericRow; import org.apache.paimon.data.InternalRow; @@ -40,9 +42,7 @@ import org.apache.paimon.types.DataTypes; import org.apache.paimon.types.RowType; import org.apache.paimon.utils.IteratorRecordReader; -import org.apache.paimon.utils.Pair; import org.apache.paimon.utils.ProjectedRow; -import org.apache.paimon.utils.RowDataToObjectArrayConverter; import org.apache.paimon.utils.SerializationUtils; import org.apache.paimon.shade.guava30.com.google.common.collect.Iterators; @@ -51,14 +51,12 @@ import java.time.Instant; import java.time.LocalDateTime; import java.time.ZoneId; -import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.Comparator; import java.util.Iterator; import java.util.List; import java.util.Map; -import java.util.stream.Collectors; import static org.apache.paimon.catalog.Catalog.SYSTEM_TABLE_SPLITTER; @@ -180,28 +178,19 @@ public RecordReader createReader(Split split) throws IOException { List partitions = fileStoreTable.newScan().listPartitionEntries(); - RowDataToObjectArrayConverter converter = - new RowDataToObjectArrayConverter( - fileStoreTable.schema().logicalPartitionType()); + @SuppressWarnings("unchecked") + CastExecutor partitionCastExecutor = + (CastExecutor) + CastExecutors.resolveToString( + fileStoreTable.schema().logicalPartitionType()); // sorted by partition - List> partitionList = + Iterator iterator = partitions.stream() - .map( - entry -> - Pair.of( - Arrays.toString( - converter.convert(entry.partition())), - entry)) - .sorted(Comparator.comparing(Pair::getLeft)) - .collect(Collectors.toList()); - - List results = new ArrayList<>(partitions.size()); - for (Pair pair : partitionList) { - results.add(toRow(pair.getLeft(), pair.getRight())); - } + .map(partitionEntry -> toRow(partitionEntry, partitionCastExecutor)) + .sorted(Comparator.comparing(row -> row.getString(0))) + .iterator(); - Iterator iterator = results.iterator(); if (readType != null) { iterator = Iterators.transform( @@ -213,9 +202,11 @@ public RecordReader createReader(Split split) throws IOException { return new IteratorRecordReader<>(iterator); } - private GenericRow toRow(String partStr, PartitionEntry entry) { + private InternalRow toRow( + PartitionEntry entry, + CastExecutor partitionCastExecutor) { return GenericRow.of( - BinaryString.fromString(partStr), + partitionCastExecutor.cast(entry.partition()), entry.recordCount(), entry.fileSizeInBytes(), entry.fileCount(), diff --git a/paimon-core/src/main/java/org/apache/paimon/table/system/TableIndexesTable.java b/paimon-core/src/main/java/org/apache/paimon/table/system/TableIndexesTable.java index 08731e768a93..24ba1e793b2b 100644 --- a/paimon-core/src/main/java/org/apache/paimon/table/system/TableIndexesTable.java +++ b/paimon-core/src/main/java/org/apache/paimon/table/system/TableIndexesTable.java @@ -19,6 +19,8 @@ package org.apache.paimon.table.system; import org.apache.paimon.Snapshot; +import org.apache.paimon.casting.CastExecutor; +import org.apache.paimon.casting.CastExecutors; import org.apache.paimon.data.BinaryString; import org.apache.paimon.data.GenericRow; import org.apache.paimon.data.InternalRow; @@ -46,7 +48,6 @@ import org.apache.paimon.types.RowType; import org.apache.paimon.utils.IteratorRecordReader; import org.apache.paimon.utils.ProjectedRow; -import org.apache.paimon.utils.RowDataToObjectArrayConverter; import org.apache.paimon.utils.SerializationUtils; import org.apache.paimon.shade.guava30.com.google.common.collect.Iterators; @@ -182,13 +183,16 @@ public RecordReader createReader(Split split) { } List manifestFileMetas = allIndexEntries(dataTable); - RowDataToObjectArrayConverter partitionConverter = - new RowDataToObjectArrayConverter(dataTable.schema().logicalPartitionType()); + @SuppressWarnings("unchecked") + CastExecutor partitionCastExecutor = + (CastExecutor) + CastExecutors.resolveToString( + dataTable.schema().logicalPartitionType()); Iterator rows = Iterators.transform( manifestFileMetas.iterator(), - indexManifestEntry -> toRow(indexManifestEntry, partitionConverter)); + indexManifestEntry -> toRow(indexManifestEntry, partitionCastExecutor)); if (readType != null) { rows = Iterators.transform( @@ -202,13 +206,11 @@ public RecordReader createReader(Split split) { private InternalRow toRow( IndexManifestEntry indexManifestEntry, - RowDataToObjectArrayConverter partitionConverter) { + CastExecutor partitionCastExecutor) { LinkedHashMap dvMetas = indexManifestEntry.indexFile().deletionVectorMetas(); return GenericRow.of( - BinaryString.fromString( - Arrays.toString( - partitionConverter.convert(indexManifestEntry.partition()))), + partitionCastExecutor.cast(indexManifestEntry.partition()), indexManifestEntry.bucket(), BinaryString.fromString(indexManifestEntry.indexFile().indexType()), BinaryString.fromString(indexManifestEntry.indexFile().fileName()), diff --git a/paimon-core/src/test/java/org/apache/paimon/table/system/BucketsTableTest.java b/paimon-core/src/test/java/org/apache/paimon/table/system/BucketsTableTest.java index b6bd71087412..1415baefcaae 100644 --- a/paimon-core/src/test/java/org/apache/paimon/table/system/BucketsTableTest.java +++ b/paimon-core/src/test/java/org/apache/paimon/table/system/BucketsTableTest.java @@ -74,7 +74,7 @@ public void before() throws Exception { public void testBucketsTable() throws Exception { assertThat(read(bucketsTable, new int[] {0, 1, 2, 4})) .containsExactlyInAnyOrder( - GenericRow.of(BinaryString.fromString("[1]"), 0, 2L, 2L), - GenericRow.of(BinaryString.fromString("[2]"), 0, 2L, 2L)); + GenericRow.of(BinaryString.fromString("{1}"), 0, 2L, 2L), + GenericRow.of(BinaryString.fromString("{2}"), 0, 2L, 2L)); } } diff --git a/paimon-core/src/test/java/org/apache/paimon/table/system/FilesTableTest.java b/paimon-core/src/test/java/org/apache/paimon/table/system/FilesTableTest.java index f0280560c267..82aefcbdd352 100644 --- a/paimon-core/src/test/java/org/apache/paimon/table/system/FilesTableTest.java +++ b/paimon-core/src/test/java/org/apache/paimon/table/system/FilesTableTest.java @@ -106,15 +106,15 @@ public void testReadWithFilter() throws Exception { write(table, GenericRow.of(3, 1, 10, 1)); assertThat(readPartBucketLevel(null)) .containsExactlyInAnyOrder( - "[1, 10]-0-0", "[1, 10]-0-0", "[1, 10]-1-0", "[2, 20]-0-5"); + "{1, 10}-0-0", "{1, 10}-0-0", "{1, 10}-1-0", "{2, 20}-0-5"); PredicateBuilder builder = new PredicateBuilder(FilesTable.TABLE_TYPE); - assertThat(readPartBucketLevel(builder.equal(0, "[2, 20]"))) - .containsExactlyInAnyOrder("[2, 20]-0-5"); + assertThat(readPartBucketLevel(builder.equal(0, "{2, 20}"))) + .containsExactlyInAnyOrder("{2, 20}-0-5"); assertThat(readPartBucketLevel(builder.equal(1, 1))) - .containsExactlyInAnyOrder("[1, 10]-1-0"); + .containsExactlyInAnyOrder("{1, 10}-1-0"); assertThat(readPartBucketLevel(builder.equal(5, 5))) - .containsExactlyInAnyOrder("[2, 20]-0-5"); + .containsExactlyInAnyOrder("{2, 20}-0-5"); } private List readPartBucketLevel(Predicate predicate) throws IOException { @@ -188,8 +188,7 @@ private List getExpectedResult(long snapshotId) { String maxCol1 = String.valueOf(file.valueStats().maxValues().getInt(3)); expectedRow.add( GenericRow.of( - BinaryString.fromString( - Arrays.toString(new String[] {partition1, partition2})), + BinaryString.fromString("{" + partition1 + ", " + partition2 + "}"), fileEntry.bucket(), BinaryString.fromString( table.location() diff --git a/paimon-core/src/test/java/org/apache/paimon/table/system/PartitionsTableTest.java b/paimon-core/src/test/java/org/apache/paimon/table/system/PartitionsTableTest.java index 74d9a5eb7b1f..a286b187c687 100644 --- a/paimon-core/src/test/java/org/apache/paimon/table/system/PartitionsTableTest.java +++ b/paimon-core/src/test/java/org/apache/paimon/table/system/PartitionsTableTest.java @@ -85,9 +85,9 @@ public void before() throws Exception { @Test public void testPartitionRecordCount() throws Exception { List expectedRow = new ArrayList<>(); - expectedRow.add(GenericRow.of(BinaryString.fromString("[1]"), 2L)); - expectedRow.add(GenericRow.of(BinaryString.fromString("[2]"), 1L)); - expectedRow.add(GenericRow.of(BinaryString.fromString("[3]"), 1L)); + expectedRow.add(GenericRow.of(BinaryString.fromString("{1}"), 2L)); + expectedRow.add(GenericRow.of(BinaryString.fromString("{2}"), 1L)); + expectedRow.add(GenericRow.of(BinaryString.fromString("{3}"), 1L)); // Only read partition and record count, record size may not stable. List result = read(partitionsTable, new int[] {0, 1}); @@ -97,8 +97,8 @@ public void testPartitionRecordCount() throws Exception { @Test public void testPartitionTimeTravel() throws Exception { List expectedRow = new ArrayList<>(); - expectedRow.add(GenericRow.of(BinaryString.fromString("[1]"), 1L)); - expectedRow.add(GenericRow.of(BinaryString.fromString("[3]"), 1L)); + expectedRow.add(GenericRow.of(BinaryString.fromString("{1}"), 1L)); + expectedRow.add(GenericRow.of(BinaryString.fromString("{3}"), 1L)); // Only read partition and record count, record size may not stable. List result = @@ -113,9 +113,9 @@ public void testPartitionTimeTravel() throws Exception { public void testPartitionValue() throws Exception { write(table, GenericRow.of(2, 1, 3), GenericRow.of(3, 1, 4)); List expectedRow = new ArrayList<>(); - expectedRow.add(GenericRow.of(BinaryString.fromString("[1]"), 4L, 3L)); - expectedRow.add(GenericRow.of(BinaryString.fromString("[2]"), 1L, 1L)); - expectedRow.add(GenericRow.of(BinaryString.fromString("[3]"), 1L, 1L)); + expectedRow.add(GenericRow.of(BinaryString.fromString("{1}"), 4L, 3L)); + expectedRow.add(GenericRow.of(BinaryString.fromString("{2}"), 1L, 1L)); + expectedRow.add(GenericRow.of(BinaryString.fromString("{3}"), 1L, 1L)); List result = read(partitionsTable, new int[] {0, 1, 3}); assertThat(result).containsExactlyInAnyOrderElementsOf(expectedRow); diff --git a/paimon-flink/paimon-flink-common/src/test/java/org/apache/paimon/flink/BranchSqlITCase.java b/paimon-flink/paimon-flink-common/src/test/java/org/apache/paimon/flink/BranchSqlITCase.java index 2566fbe92e4c..18dc02377154 100644 --- a/paimon-flink/paimon-flink-common/src/test/java/org/apache/paimon/flink/BranchSqlITCase.java +++ b/paimon-flink/paimon-flink-common/src/test/java/org/apache/paimon/flink/BranchSqlITCase.java @@ -531,15 +531,15 @@ public void testBranchPartitionsTable() throws Exception { sql("INSERT INTO t$branch_b1 VALUES (1, 4, 'S3'), (2, 2, 'S4')"); assertThat(collectResult("SELECT `partition`, record_count, file_count FROM t$partitions")) - .containsExactlyInAnyOrder("+I[[1], 3, 3]", "+I[[2], 3, 2]"); + .containsExactlyInAnyOrder("+I[{1}, 3, 3]", "+I[{2}, 3, 2]"); assertThat( collectResult( "SELECT `partition`, record_count, file_count FROM t$branch_b1$partitions")) - .containsExactlyInAnyOrder("+I[[1], 2, 2]", "+I[[2], 3, 2]"); + .containsExactlyInAnyOrder("+I[{1}, 2, 2]", "+I[{2}, 3, 2]"); assertThat( collectResult( "SELECT `partition`, record_count, file_count FROM t$partitions /*+ OPTIONS('branch'='b1') */")) - .containsExactlyInAnyOrder("+I[[1], 2, 2]", "+I[[2], 3, 2]"); + .containsExactlyInAnyOrder("+I[{1}, 2, 2]", "+I[{2}, 3, 2]"); } @Test diff --git a/paimon-flink/paimon-flink-common/src/test/java/org/apache/paimon/flink/CatalogTableITCase.java b/paimon-flink/paimon-flink-common/src/test/java/org/apache/paimon/flink/CatalogTableITCase.java index 10b03b7139ae..b82b511b6330 100644 --- a/paimon-flink/paimon-flink-common/src/test/java/org/apache/paimon/flink/CatalogTableITCase.java +++ b/paimon-flink/paimon-flink-common/src/test/java/org/apache/paimon/flink/CatalogTableITCase.java @@ -794,7 +794,7 @@ private void assertFilesTable(String tableName) { assertThat(getRowStringList(rows1)) .containsExactlyInAnyOrder( String.format( - "[2],0,parquet,4,0,2,%s,{a=0, bb=0, dd=0, f=0, p=0},{a=23, bb=24, dd=25, f=26, p=2},{a=27, bb=28, dd=29, f=30, p=2}", + "{2},0,parquet,4,0,2,%s,{a=0, bb=0, dd=0, f=0, p=0},{a=23, bb=24, dd=25, f=26, p=2},{a=27, bb=28, dd=29, f=30, p=2}", StringUtils.endsWith(tableName, "VALUE_COUNT") // value count table use all fields as min/max key ? "[23, 2, 24, 25, 26],[27, 2, 28, 29, 30]" @@ -804,21 +804,21 @@ private void assertFilesTable(String tableName) { // with key table use primary key trimmed partition : "[23],[27]")), String.format( - "[1],0,parquet,0,0,2,%s,{a=0, bb=0, dd=2, f=2, p=0},{a=1, bb=2, dd=null, f=null, p=1},{a=3, bb=4, dd=null, f=null, p=1}", + "{1},0,parquet,0,0,2,%s,{a=0, bb=0, dd=2, f=2, p=0},{a=1, bb=2, dd=null, f=null, p=1},{a=3, bb=4, dd=null, f=null, p=1}", StringUtils.endsWith(tableName, "VALUE_COUNT") ? "[1, 1, 2, S1],[3, 1, 4, S2]" : (StringUtils.endsWith(tableName, "APPEND_ONLY") ? "," : "[1],[3]")), String.format( - "[1],0,parquet,1,0,2,%s,{a=0, bb=0, dd=0, f=0, p=0},{a=5, bb=6, dd=7, f=9, p=1},{a=10, bb=11, dd=12, f=14, p=1}", + "{1},0,parquet,1,0,2,%s,{a=0, bb=0, dd=0, f=0, p=0},{a=5, bb=6, dd=7, f=9, p=1},{a=10, bb=11, dd=12, f=14, p=1}", StringUtils.endsWith(tableName, "VALUE_COUNT") ? "[5, 1, 6, S3, 7, 8, 9],[10, 1, 11, S4, 12, 13, 14]" : (StringUtils.endsWith(tableName, "APPEND_ONLY") ? "," : "[5],[10]")), String.format( - "[1],0,parquet,4,0,2,%s,{a=0, bb=0, dd=0, f=0, p=0},{a=15, bb=16, dd=17, f=18, p=1},{a=19, bb=20, dd=21, f=22, p=1}", + "{1},0,parquet,4,0,2,%s,{a=0, bb=0, dd=0, f=0, p=0},{a=15, bb=16, dd=17, f=18, p=1},{a=19, bb=20, dd=21, f=22, p=1}", StringUtils.endsWith(tableName, "VALUE_COUNT") ? "[15, 1, 16, 17, 18],[19, 1, 20, 21, 22]" : (StringUtils.endsWith(tableName, "APPEND_ONLY") @@ -839,14 +839,14 @@ private void assertFilesTable(String tableName) { assertThat(getRowStringList(rows2)) .containsExactlyInAnyOrder( String.format( - "[1],0,parquet,0,0,2,%s,{a=0, b=0, c=0, d=2, e=2, f=2, p=0},{a=1, b=2, c=S1, d=null, e=null, f=null, p=1},{a=3, b=4, c=S2, d=null, e=null, f=null, p=1}", + "{1},0,parquet,0,0,2,%s,{a=0, b=0, c=0, d=2, e=2, f=2, p=0},{a=1, b=2, c=S1, d=null, e=null, f=null, p=1},{a=3, b=4, c=S2, d=null, e=null, f=null, p=1}", StringUtils.endsWith(tableName, "VALUE_COUNT") ? "[1, 1, 2, S1],[3, 1, 4, S2]" : (StringUtils.endsWith(tableName, "APPEND_ONLY") ? "," : "[1],[3]")), String.format( - "[1],0,parquet,1,0,2,%s,{a=0, b=0, c=0, d=0, e=0, f=0, p=0},{a=5, b=6, c=S3, d=7, e=8, f=9, p=1},{a=10, b=11, c=S4, d=12, e=13, f=14, p=1}", + "{1},0,parquet,1,0,2,%s,{a=0, b=0, c=0, d=0, e=0, f=0, p=0},{a=5, b=6, c=S3, d=7, e=8, f=9, p=1},{a=10, b=11, c=S4, d=12, e=13, f=14, p=1}", StringUtils.endsWith(tableName, "VALUE_COUNT") ? "[5, 1, 6, S3, 7, 8, 9],[10, 1, 11, S4, 12, 13, 14]" : (StringUtils.endsWith(tableName, "APPEND_ONLY") @@ -866,18 +866,18 @@ public void testFilesTableWithFilter() { assertThat(sql("SELECT `partition`, bucket, level FROM T_WITH_FILTER$files")) .containsExactlyInAnyOrder( - Row.of("[2]", 0, 5), Row.of("[5]", 0, 5), Row.of("[8]", 1, 0)); + Row.of("{2}", 0, 5), Row.of("{5}", 0, 5), Row.of("{8}", 1, 0)); assertThat( sql( - "SELECT `partition`, bucket, level FROM T_WITH_FILTER$files WHERE `partition`='[2]'")) - .containsExactlyInAnyOrder(Row.of("[2]", 0, 5)); + "SELECT `partition`, bucket, level FROM T_WITH_FILTER$files WHERE `partition`='{2}'")) + .containsExactlyInAnyOrder(Row.of("{2}", 0, 5)); assertThat(sql("SELECT `partition`, bucket, level FROM T_WITH_FILTER$files WHERE bucket=0")) - .containsExactlyInAnyOrder(Row.of("[2]", 0, 5), Row.of("[5]", 0, 5)); + .containsExactlyInAnyOrder(Row.of("{2}", 0, 5), Row.of("{5}", 0, 5)); assertThat(sql("SELECT `partition`, bucket, level FROM T_WITH_FILTER$files WHERE level=0")) - .containsExactlyInAnyOrder(Row.of("[8]", 1, 0)); + .containsExactlyInAnyOrder(Row.of("{8}", 1, 0)); } @Nonnull @@ -1012,7 +1012,7 @@ public void testPartitionsTable() { sql("INSERT INTO %s VALUES (3, 1, 4, 'S3'), (1, 2, 2, 'S4')", table); List result = sql("SELECT `partition`, record_count, file_count FROM %s$partitions", table); - assertThat(result).containsExactlyInAnyOrder(Row.of("[1]", 2L, 2L), Row.of("[2]", 3L, 2L)); + assertThat(result).containsExactlyInAnyOrder(Row.of("{1}", 2L, 2L), Row.of("{2}", 3L, 2L)); // assert new files in partition sql("INSERT INTO %s VALUES (3, 4, 4, 'S3'), (1, 3, 2, 'S4')", table); @@ -1024,10 +1024,10 @@ public void testPartitionsTable() { table)); assertThat(result) .containsExactlyInAnyOrder( - Row.of("[1]", 3L, 3L), - Row.of("[2]", 4L, 3L), - Row.of("[3]", 1L, 1L), - Row.of("[4]", 1L, 1L)); + Row.of("{1}", 3L, 3L), + Row.of("{2}", 4L, 3L), + Row.of("{3}", 1L, 1L), + Row.of("{4}", 1L, 1L)); // assert delete partitions sql("ALTER TABLE %s DROP PARTITION (p = 2)", table); @@ -1038,7 +1038,7 @@ public void testPartitionsTable() { table)); assertThat(result) .containsExactlyInAnyOrder( - Row.of("[1]", 3L, 3L), Row.of("[3]", 1L, 1L), Row.of("[4]", 1L, 1L)); + Row.of("{1}", 3L, 3L), Row.of("{3}", 1L, 1L), Row.of("{4}", 1L, 1L)); // add new file to p 2 sql("INSERT INTO %s VALUES (1, 2, 2, 'S1')", table); @@ -1049,10 +1049,10 @@ public void testPartitionsTable() { table)); assertThat(result) .containsExactlyInAnyOrder( - Row.of("[1]", 3L, 3L), - Row.of("[2]", 1L, 1L), - Row.of("[3]", 1L, 1L), - Row.of("[4]", 1L, 1L)); + Row.of("{1}", 3L, 3L), + Row.of("{2}", 1L, 1L), + Row.of("{3}", 1L, 1L), + Row.of("{4}", 1L, 1L)); } @Test diff --git a/paimon-flink/paimon-flink-common/src/test/java/org/apache/paimon/flink/SystemTableITCase.java b/paimon-flink/paimon-flink-common/src/test/java/org/apache/paimon/flink/SystemTableITCase.java index 98ec635e85f7..e28078052b5b 100644 --- a/paimon-flink/paimon-flink-common/src/test/java/org/apache/paimon/flink/SystemTableITCase.java +++ b/paimon-flink/paimon-flink-common/src/test/java/org/apache/paimon/flink/SystemTableITCase.java @@ -76,7 +76,7 @@ public void testIndexesTable() { List rows = sql("SELECT * FROM `T$table_indexes` WHERE index_type = 'HASH'"); assertThat(rows.size()).isEqualTo(1); Row row = rows.get(0); - assertThat(row.getField(0)).isEqualTo("[2024-10-01]"); + assertThat(row.getField(0)).isEqualTo("{2024-10-01}"); assertThat(row.getField(1)).isEqualTo(0); assertThat(row.getField(2)).isEqualTo("HASH"); assertThat(row.getField(3).toString().startsWith("index-")).isTrue(); @@ -87,7 +87,7 @@ public void testIndexesTable() { rows = sql("SELECT * FROM `T$table_indexes` WHERE index_type = 'DELETION_VECTORS'"); assertThat(rows.size()).isEqualTo(1); row = rows.get(0); - assertThat(row.getField(0)).isEqualTo("[2024-10-01]"); + assertThat(row.getField(0)).isEqualTo("{2024-10-01}"); assertThat(row.getField(1)).isEqualTo(0); assertThat(row.getField(2)).isEqualTo("DELETION_VECTORS"); assertThat(row.getField(3).toString().startsWith("index-")).isTrue(); diff --git a/paimon-spark/paimon-spark-ut/src/test/java/org/apache/paimon/spark/SparkWriteITCase.java b/paimon-spark/paimon-spark-ut/src/test/java/org/apache/paimon/spark/SparkWriteITCase.java index b0d5b380c1f2..fff94ce0374d 100644 --- a/paimon-spark/paimon-spark-ut/src/test/java/org/apache/paimon/spark/SparkWriteITCase.java +++ b/paimon-spark/paimon-spark-ut/src/test/java/org/apache/paimon/spark/SparkWriteITCase.java @@ -210,7 +210,7 @@ public void testWriteDynamicBucketPartitionedTable() { spark.sql( "SELECT partition, max(bucket) FROM `T$FILES` GROUP BY partition ORDER BY partition") .collectAsList(); - assertThat(rows.toString()).isEqualTo("[[[1],2], [[2],0]]"); + assertThat(rows.toString()).isEqualTo("[[{1},2], [{2},0]]"); } @Test diff --git a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/PaimonSystemTableTest.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/PaimonSystemTableTest.scala index 7baa57a54d90..0c645191f63e 100644 --- a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/PaimonSystemTableTest.scala +++ b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/PaimonSystemTableTest.scala @@ -58,8 +58,8 @@ class PaimonSystemTableTest extends PaimonSparkTestBase { checkAnswer(spark.sql("select count(*) from `T$partitions`"), Row(5) :: Nil) checkAnswer( spark.sql("select partition from `T$partitions`"), - Row("[2024-10-09, 01]") :: Row("[2024-10-09, 02]") :: Row("[2024-10-10, 01]") :: Row( - "[2024-10-10, 12]") :: Row("[2024-10-10, 23]") :: Nil + Row("{2024-10-09, 01}") :: Row("{2024-10-09, 02}") :: Row("{2024-10-10, 01}") :: Row( + "{2024-10-10, 12}") :: Row("{2024-10-10, 23}") :: Nil ) } @@ -79,7 +79,25 @@ class PaimonSystemTableTest extends PaimonSparkTestBase { checkAnswer(spark.sql("select count(*) from `T$partitions`"), Row(1) :: Nil) checkAnswer( spark.sql("select partition,bucket from `T$buckets`"), - Row("[2024-10-10, 01]", 0) :: Row("[2024-10-10, 01]", 1) :: Row("[2024-10-10, 01]", 2) :: Nil) + Row("{2024-10-10, 01}", 0) :: Row("{2024-10-10, 01}", 1) :: Row("{2024-10-10, 01}", 2) :: Nil) + } + + test("system table: date partition table") { + sql(s""" + |CREATE TABLE T (a INT, p1 DATE, p2 INT) + |PARTITIONED BY (p1, p2) + |""".stripMargin) + + sql("INSERT INTO T VALUES(1, cast('2024-10-10' as date), 1)") + sql("INSERT INTO T VALUES(2, null, 1)") + + checkAnswer( + sql("SELECT partition FROM `T$partitions`"), + Seq(Row("{2024-10-10, 1}"), Row("{null, 1}"))) + + checkAnswer( + sql("SELECT partition, bucket FROM `T$buckets`"), + Seq(Row("{2024-10-10, 1}", 0), Row("{null, 1}", 0))) } test("system table: binlog table") {