Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 12 additions & 12 deletions docs/content/concepts/system-tables.md
Original file line number Diff line number Diff line change
Expand Up @@ -190,12 +190,12 @@ SELECT * FROM my_table$files;
+-----------+--------+--------------------------------+-------------+-----------+-------+--------------+--------------------+---------+---------+------------------------+-------------------------+-------------------------+---------------------+---------------------+-----------------------+
| partition | bucket | file_path | file_format | schema_id | level | record_count | file_size_in_bytes | min_key | max_key | null_value_counts | min_value_stats | max_value_stats | min_sequence_number | max_sequence_number | creation_time |
+-----------+--------+--------------------------------+-------------+-----------+-------+--------------+--------------------+---------+---------+------------------------+-------------------------+-------------------------+---------------------+---------------------+-----------------------+
| [3] | 0 | data-8f64af95-29cc-4342-adc... | orc | 0 | 0 | 1 | 593 | [c] | [c] | {cnt=0, val=0, word=0} | {cnt=3, val=33, word=c} | {cnt=3, val=33, word=c} | 1691551246234 | 1691551246637 |2023-02-24T16:06:21.166|
| [2] | 0 | data-8b369068-0d37-4011-aa5... | orc | 0 | 0 | 1 | 593 | [b] | [b] | {cnt=0, val=0, word=0} | {cnt=2, val=22, word=b} | {cnt=2, val=22, word=b} | 1691551246233 | 1691551246732 |2023-02-24T16:06:21.166|
| [2] | 0 | data-83aa7973-060b-40b6-8c8... | orc | 0 | 0 | 1 | 605 | [d] | [d] | {cnt=0, val=0, word=0} | {cnt=2, val=32, word=d} | {cnt=2, val=32, word=d} | 1691551246267 | 1691551246798 |2023-02-24T16:06:21.166|
| [5] | 0 | data-3d304f4a-bcea-44dc-a13... | orc | 0 | 0 | 1 | 593 | [c] | [c] | {cnt=0, val=0, word=0} | {cnt=5, val=51, word=c} | {cnt=5, val=51, word=c} | 1691551246788 | 1691551246152 |2023-02-24T16:06:21.166|
| [1] | 0 | data-10abb5bc-0170-43ae-b6a... | orc | 0 | 0 | 1 | 595 | [a] | [a] | {cnt=0, val=0, word=0} | {cnt=1, val=11, word=a} | {cnt=1, val=11, word=a} | 1691551246722 | 1691551246273 |2023-02-24T16:06:21.166|
| [4] | 0 | data-2c9b7095-65b7-4013-a7a... | orc | 0 | 0 | 1 | 593 | [a] | [a] | {cnt=0, val=0, word=0} | {cnt=4, val=12, word=a} | {cnt=4, val=12, word=a} | 1691551246321 | 1691551246109 |2023-02-24T16:06:21.166|
| {3} | 0 | data-8f64af95-29cc-4342-adc... | orc | 0 | 0 | 1 | 593 | [c] | [c] | {cnt=0, val=0, word=0} | {cnt=3, val=33, word=c} | {cnt=3, val=33, word=c} | 1691551246234 | 1691551246637 |2023-02-24T16:06:21.166|
| {2} | 0 | data-8b369068-0d37-4011-aa5... | orc | 0 | 0 | 1 | 593 | [b] | [b] | {cnt=0, val=0, word=0} | {cnt=2, val=22, word=b} | {cnt=2, val=22, word=b} | 1691551246233 | 1691551246732 |2023-02-24T16:06:21.166|
| {2} | 0 | data-83aa7973-060b-40b6-8c8... | orc | 0 | 0 | 1 | 605 | [d] | [d] | {cnt=0, val=0, word=0} | {cnt=2, val=32, word=d} | {cnt=2, val=32, word=d} | 1691551246267 | 1691551246798 |2023-02-24T16:06:21.166|
| {5} | 0 | data-3d304f4a-bcea-44dc-a13... | orc | 0 | 0 | 1 | 593 | [c] | [c] | {cnt=0, val=0, word=0} | {cnt=5, val=51, word=c} | {cnt=5, val=51, word=c} | 1691551246788 | 1691551246152 |2023-02-24T16:06:21.166|
| {1} | 0 | data-10abb5bc-0170-43ae-b6a... | orc | 0 | 0 | 1 | 595 | [a] | [a] | {cnt=0, val=0, word=0} | {cnt=1, val=11, word=a} | {cnt=1, val=11, word=a} | 1691551246722 | 1691551246273 |2023-02-24T16:06:21.166|
| {4} | 0 | data-2c9b7095-65b7-4013-a7a... | orc | 0 | 0 | 1 | 593 | [a] | [a] | {cnt=0, val=0, word=0} | {cnt=4, val=12, word=a} | {cnt=4, val=12, word=a} | 1691551246321 | 1691551246109 |2023-02-24T16:06:21.166|
+-----------+--------+--------------------------------+-------------+-----------+-------+--------------+--------------------+---------+---------+------------------------+-------------------------+-------------------------+---------------------+---------------------+-----------------------+
6 rows in set
*/
Expand All @@ -207,9 +207,9 @@ SELECT * FROM my_table$files /*+ OPTIONS('scan.snapshot-id'='1') */;
+-----------+--------+--------------------------------+-------------+-----------+-------+--------------+--------------------+---------+---------+------------------------+-------------------------+-------------------------+---------------------+---------------------+-----------------------+
| partition | bucket | file_path | file_format | schema_id | level | record_count | file_size_in_bytes | min_key | max_key | null_value_counts | min_value_stats | max_value_stats | min_sequence_number | max_sequence_number | creation_time |
+-----------+--------+--------------------------------+-------------+-----------+-------+--------------+--------------------+---------+---------+------------------------+-------------------------+-------------------------+---------------------+---------------------+-----------------------+
| [3] | 0 | data-8f64af95-29cc-4342-adc... | orc | 0 | 0 | 1 | 593 | [c] | [c] | {cnt=0, val=0, word=0} | {cnt=3, val=33, word=c} | {cnt=3, val=33, word=c} | 1691551246234 | 1691551246637 |2023-02-24T16:06:21.166|
| [2] | 0 | data-8b369068-0d37-4011-aa5... | orc | 0 | 0 | 1 | 593 | [b] | [b] | {cnt=0, val=0, word=0} | {cnt=2, val=22, word=b} | {cnt=2, val=22, word=b} | 1691551246233 | 1691551246732 |2023-02-24T16:06:21.166|
| [1] | 0 | data-10abb5bc-0170-43ae-b6a... | orc | 0 | 0 | 1 | 595 | [a] | [a] | {cnt=0, val=0, word=0} | {cnt=1, val=11, word=a} | {cnt=1, val=11, word=a} | 1691551246267 | 1691551246798 |2023-02-24T16:06:21.166|
| {3} | 0 | data-8f64af95-29cc-4342-adc... | orc | 0 | 0 | 1 | 593 | [c] | [c] | {cnt=0, val=0, word=0} | {cnt=3, val=33, word=c} | {cnt=3, val=33, word=c} | 1691551246234 | 1691551246637 |2023-02-24T16:06:21.166|
| {2} | 0 | data-8b369068-0d37-4011-aa5... | orc | 0 | 0 | 1 | 593 | [b] | [b] | {cnt=0, val=0, word=0} | {cnt=2, val=22, word=b} | {cnt=2, val=22, word=b} | 1691551246233 | 1691551246732 |2023-02-24T16:06:21.166|
| {1} | 0 | data-10abb5bc-0170-43ae-b6a... | orc | 0 | 0 | 1 | 595 | [a] | [a] | {cnt=0, val=0, word=0} | {cnt=1, val=11, word=a} | {cnt=1, val=11, word=a} | 1691551246267 | 1691551246798 |2023-02-24T16:06:21.166|
+-----------+--------+--------------------------------+-------------+-----------+-------+--------------+--------------------+---------+---------+------------------------+-------------------------+-------------------------+---------------------+---------------------+-----------------------+
3 rows in set
*/
Expand Down Expand Up @@ -352,7 +352,7 @@ SELECT * FROM my_table$partitions;
+---------------+----------------+--------------------+--------------------+------------------------+
| partition | record_count | file_size_in_bytes| file_count| last_update_time|
+---------------+----------------+--------------------+--------------------+------------------------+
| [1] | 1 | 645 | 1 | 2024-06-24 10:25:57.400|
| {1} | 1 | 645 | 1 | 2024-06-24 10:25:57.400|
+---------------+----------------+--------------------+--------------------+------------------------+
*/
```
Expand Down Expand Up @@ -401,8 +401,8 @@ SELECT * FROM my_table$table_indexes;
+--------------------------------+-------------+--------------------------------+--------------------------------+----------------------+----------------------+--------------------------------+
| partition | bucket | index_type | file_name | file_size | row_count | dv_ranges |
+--------------------------------+-------------+--------------------------------+--------------------------------+----------------------+----------------------+--------------------------------+
| [2024-10-01] | 0 | HASH | index-70abfebf-149e-4796-9f... | 12 | 3 | <NULL> |
| [2024-10-01] | 0 | DELETION_VECTORS | index-633857e7-cdce-47d2-87... | 33 | 1 | [(data-346cb9c8-4032-4d66-a... |
| {2024-10-01} | 0 | HASH | index-70abfebf-149e-4796-9f... | 12 | 3 | <NULL> |
| {2024-10-01} | 0 | DELETION_VECTORS | index-633857e7-cdce-47d2-87... | 33 | 1 | [(data-346cb9c8-4032-4d66-a... |
+--------------------------------+-------------+--------------------------------+--------------------------------+----------------------+----------------------+--------------------------------+
2 rows in set
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import org.apache.paimon.types.DataType;
import org.apache.paimon.types.DataTypeFamily;
import org.apache.paimon.types.DataTypeRoot;
import org.apache.paimon.types.DataTypes;

import javax.annotation.Nullable;

Expand Down Expand Up @@ -101,6 +102,16 @@ public class CastExecutors {
return rule.create(inputType, outputType);
}

/** Resolve a {@link CastExecutor} for the provided input type to StringType. */
public static CastExecutor<?, ?> resolveToString(DataType inputType) {
CastExecutor<?, ?> castExecutor = resolve(inputType, DataTypes.STRING());
if (castExecutor == null) {
throw new UnsupportedOperationException(
"Cast " + inputType + " to StringType is not supported.");
}
return castExecutor;
}

public static CastExecutor<?, ?> identityCastExecutor() {
return IDENTITY_CAST_EXECUTOR;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@

package org.apache.paimon.table.system;

import org.apache.paimon.casting.CastExecutor;
import org.apache.paimon.casting.CastExecutors;
import org.apache.paimon.data.BinaryString;
import org.apache.paimon.data.GenericRow;
import org.apache.paimon.data.InternalRow;
Expand All @@ -40,24 +42,20 @@
import org.apache.paimon.types.DataTypes;
import org.apache.paimon.types.RowType;
import org.apache.paimon.utils.IteratorRecordReader;
import org.apache.paimon.utils.Pair;
import org.apache.paimon.utils.ProjectedRow;
import org.apache.paimon.utils.RowDataToObjectArrayConverter;
import org.apache.paimon.utils.SerializationUtils;

import org.apache.paimon.shade.guava30.com.google.common.collect.Iterators;

import java.time.Instant;
import java.time.LocalDateTime;
import java.time.ZoneId;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;

import static org.apache.paimon.catalog.Catalog.SYSTEM_TABLE_SPLITTER;

Expand Down Expand Up @@ -180,31 +178,21 @@ public RecordReader<InternalRow> createReader(Split split) {

List<BucketEntry> buckets = fileStoreTable.newSnapshotReader().bucketEntries();

RowDataToObjectArrayConverter converter =
new RowDataToObjectArrayConverter(
fileStoreTable.schema().logicalPartitionType());
@SuppressWarnings("unchecked")
CastExecutor<InternalRow, BinaryString> partitionCastExecutor =
(CastExecutor<InternalRow, BinaryString>)
CastExecutors.resolveToString(
fileStoreTable.schema().logicalPartitionType());

// sorted by partition and bucket
List<Pair<String, BucketEntry>> bucketList =
Iterator<InternalRow> iterator =
buckets.stream()
.map(
entry ->
Pair.of(
Arrays.toString(
converter.convert(entry.partition())),
entry))
.map(bucketEntry -> toRow(bucketEntry, partitionCastExecutor))
.sorted(
Comparator.comparing(
(Pair<String, BucketEntry> p) -> p.getLeft())
.thenComparing(p -> p.getRight().bucket()))
.collect(Collectors.toList());

List<InternalRow> results = new ArrayList<>(buckets.size());
for (Pair<String, BucketEntry> pair : bucketList) {
results.add(toRow(pair.getLeft(), pair.getRight()));
}
Comparator.comparing((InternalRow row) -> row.getString(0))
.thenComparing(row -> row.getInt(1)))
.iterator();

Iterator<InternalRow> iterator = results.iterator();
if (readType != null) {
iterator =
Iterators.transform(
Expand All @@ -216,9 +204,10 @@ public RecordReader<InternalRow> createReader(Split split) {
return new IteratorRecordReader<>(iterator);
}

private GenericRow toRow(String partStr, BucketEntry entry) {
private InternalRow toRow(
BucketEntry entry, CastExecutor<InternalRow, BinaryString> partitionCastExecutor) {
return GenericRow.of(
BinaryString.fromString(partStr),
partitionCastExecutor.cast(entry.partition()),
entry.bucket(),
entry.recordCount(),
entry.fileSizeInBytes(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@

package org.apache.paimon.table.system;

import org.apache.paimon.casting.CastExecutor;
import org.apache.paimon.casting.CastExecutors;
import org.apache.paimon.data.BinaryRow;
import org.apache.paimon.data.BinaryString;
import org.apache.paimon.data.GenericRow;
Expand Down Expand Up @@ -182,10 +184,10 @@ public Plan innerPlan() {
SnapshotReader snapshotReader = fileStoreTable.newSnapshotReader();
if (partitionPredicate != null && partitionPredicate.function() instanceof Equal) {
String partitionStr = partitionPredicate.literals().get(0).toString();
if (partitionStr.startsWith("[")) {
if (partitionStr.startsWith("{")) {
partitionStr = partitionStr.substring(1);
}
if (partitionStr.endsWith("]")) {
if (partitionStr.endsWith("}")) {
partitionStr = partitionStr.substring(0, partitionStr.length() - 1);
}
String[] partFields = partitionStr.split(", ");
Expand Down Expand Up @@ -318,8 +320,11 @@ public RecordReader<InternalRow> createReader(Split split) {
new SimpleStatsEvolutions(
sid -> schemaManager.schema(sid).fields(), storeTable.schema().id());

RowDataToObjectArrayConverter partitionConverter =
new RowDataToObjectArrayConverter(storeTable.schema().logicalPartitionType());
@SuppressWarnings("unchecked")
CastExecutor<InternalRow, BinaryString> partitionCastExecutor =
(CastExecutor<InternalRow, BinaryString>)
CastExecutors.resolveToString(
storeTable.schema().logicalPartitionType());

Function<Long, RowDataToObjectArrayConverter> keyConverters =
new Function<Long, RowDataToObjectArrayConverter>() {
Expand Down Expand Up @@ -349,7 +354,7 @@ public RowDataToObjectArrayConverter apply(Long schemaId) {
file ->
toRow(
(DataSplit) dataSplit,
partitionConverter,
partitionCastExecutor,
keyConverters,
file,
simpleStatsEvolutions)));
Expand All @@ -368,7 +373,7 @@ public RowDataToObjectArrayConverter apply(Long schemaId) {

private LazyGenericRow toRow(
DataSplit dataSplit,
RowDataToObjectArrayConverter partitionConverter,
CastExecutor<InternalRow, BinaryString> partitionCastExecutor,
Function<Long, RowDataToObjectArrayConverter> keyConverters,
DataFileMeta file,
SimpleStatsEvolutions simpleStatsEvolutions) {
Expand All @@ -379,10 +384,7 @@ private LazyGenericRow toRow(
() ->
dataSplit.partition() == null
? null
: BinaryString.fromString(
Arrays.toString(
partitionConverter.convert(
dataSplit.partition()))),
: partitionCastExecutor.cast(dataSplit.partition()),
dataSplit::bucket,
() ->
BinaryString.fromString(
Expand Down
Loading
Loading