diff --git a/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/paimon/run07.sql b/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/paimon/run07.sql new file mode 100644 index 00000000000000..0eae7bed67b043 --- /dev/null +++ b/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/paimon/run07.sql @@ -0,0 +1,143 @@ +use paimon; + +create database if not exists test_paimon_partition; + +use test_paimon_partition; +-- ============================================ +-- 1. Create Date Partition Table +-- ============================================ +DROP TABLE IF EXISTS sales_by_date; +CREATE TABLE test_paimon_partition.sales_by_date ( + id BIGINT, + product_name STRING, + price DECIMAL(10,2), + quantity INT, + sale_date DATE, + created_at TIMESTAMP +) +PARTITIONED BY (sale_date) +TBLPROPERTIES ( + 'primary-key' = 'id,sale_date', + 'file.format' = 'parquet', + 'bucket' = '2' +); + +INSERT INTO test_paimon_partition.sales_by_date VALUES + (1, 'iPhone 15', 999.99, 2, DATE '2024-01-15', TIMESTAMP '2024-01-15 10:30:00'), + (2, 'MacBook Pro', 2499.99, 1, DATE '2024-01-15', TIMESTAMP '2024-01-15 11:00:00'), + (3, 'iPad Air', 599.99, 3, DATE '2024-01-16', TIMESTAMP '2024-01-16 09:15:00'), + (4, 'Apple Watch', 399.99, 2, DATE '2024-01-16', TIMESTAMP '2024-01-16 14:20:00'), + (5, 'AirPods Pro', 249.99, 5, DATE '2024-01-17', TIMESTAMP '2024-01-17 16:45:00'); + +-- ============================================ +-- 2. Create Region Partition Table +-- ============================================ +DROP TABLE IF EXISTS test_paimon_partition.sales_by_region; +CREATE TABLE sales_by_region ( + id BIGINT, + customer_name STRING, + product_name STRING, + price DECIMAL(10,2), + quantity INT, + region STRING, + created_at TIMESTAMP +) +PARTITIONED BY (region) +TBLPROPERTIES ( + 'primary-key' = 'id,region', + 'bucket' = '2', + 'file.format' = 'parquet' +); + +INSERT INTO test_paimon_partition.sales_by_region VALUES + (1, 'Zhang Wei', 'iPhone 15', 999.99, 1, 'China-Beijing', TIMESTAMP '2024-01-15 10:30:00'), + (2, 'John Smith', 'MacBook Pro', 2499.99, 1, 'USA-California', TIMESTAMP '2024-01-15 11:00:00'), + (3, 'Tanaka Taro', 'iPad Air', 599.99, 2, 'Japan-Tokyo', TIMESTAMP '2024-01-16 09:15:00'); + +-- ============================================ +-- 3. Create Date and Region Mixed Partition Table +-- ============================================ +DROP TABLE IF EXISTS sales_by_date_region; +CREATE TABLE test_paimon_partition.sales_by_date_region ( + id BIGINT, + customer_name STRING, + product_name STRING, + price DECIMAL(10,2), + quantity INT, + sale_date DATE, + region STRING, + created_at TIMESTAMP +) +PARTITIONED BY (sale_date, region) +TBLPROPERTIES ( + 'primary-key' = 'id,sale_date,region', + 'bucket' = '2', + 'file.format' = 'parquet' +); + +INSERT INTO test_paimon_partition.sales_by_date_region VALUES + (1, 'Wang Qiang', 'iPhone 15', 999.99, 1, DATE '2024-01-15', 'China-Beijing', TIMESTAMP '2024-01-15 10:30:00'), + (2, 'Alice Brown', 'MacBook Pro', 2499.99, 1, DATE '2024-01-15', 'USA-California', TIMESTAMP '2024-01-15 11:00:00'), + (3, 'Yamada Taro', 'iPad Air', 599.99, 2, DATE '2024-01-15', 'Japan-Tokyo', TIMESTAMP '2024-01-15 09:15:00'), + (4, 'Zhao Mei', 'Apple Watch', 399.99, 3, DATE '2024-01-16', 'China-Shanghai', TIMESTAMP '2024-01-16 14:20:00'), + (5, 'Bob Johnson', 'AirPods Pro', 249.99, 2, DATE '2024-01-16', 'USA-New York', TIMESTAMP '2024-01-16 16:45:00'), + (6, 'Suzuki Ichiro', 'iPhone 15', 999.99, 1, DATE '2024-01-16', 'Japan-Osaka', TIMESTAMP '2024-01-16 12:30:00'); + + +-- ============================================ +-- 4. Create Timestamp Partition Table (Hourly Partition) +-- ============================================ +DROP TABLE IF EXISTS events_by_hour; +CREATE TABLE test_paimon_partition.events_by_hour ( + id BIGINT, + event_type STRING, + user_id STRING, + event_data STRING, + event_timestamp TIMESTAMP, + hour_partition STRING +) +PARTITIONED BY (hour_partition) +TBLPROPERTIES ( + 'primary-key' = 'id,hour_partition', + 'bucket' = '2', + 'file.format' = 'parquet' +); + + +INSERT INTO test_paimon_partition.events_by_hour VALUES + (1, 'login', 'user001', 'successful login', TIMESTAMP '2024-01-15 10:30:00', '2024-01-15-10'), + (2, 'purchase', 'user002', 'bought iPhone', TIMESTAMP '2024-01-15 10:45:00', '2024-01-15-10'), + (3, 'logout', 'user001', 'session ended', TIMESTAMP '2024-01-15 11:15:00', '2024-01-15-11'), + (4, 'login', 'user003', 'successful login', TIMESTAMP '2024-01-15 11:30:00', '2024-01-15-11'), + (5, 'view_product', 'user002', 'viewed MacBook', TIMESTAMP '2024-01-15 14:20:00', '2024-01-15-14'), + (6, 'purchase', 'user003', 'bought iPad', TIMESTAMP '2024-01-15 14:35:00', '2024-01-15-14'); + + +-- ============================================ +-- 5. Create Composite Time Partition Table (Year-Month-Day Hierarchical Partition) +-- ============================================ +DROP TABLE IF EXISTS logs_by_date_hierarchy; +CREATE TABLE test_paimon_partition.logs_by_date_hierarchy ( + log_id BIGINT, + log_level STRING, + message STRING, + service_name STRING, + log_timestamp TIMESTAMP, + year_val INT, + month_val INT, + day_val INT +) +PARTITIONED BY (year_val, month_val, day_val) +TBLPROPERTIES ( + 'primary-key' = 'log_id,year_val,month_val,day_val', + 'bucket' = '2', + 'file.format' = 'parquet' +); +INSERT INTO test_paimon_partition.logs_by_date_hierarchy VALUES + (1, 'INFO', 'Service started successfully', 'user-service', TIMESTAMP '2024-01-15 08:00:00', 2024, 1, 15), + (2, 'WARN', 'High memory usage detected', 'order-service', TIMESTAMP '2024-01-15 10:30:00', 2024, 1, 15), + (3, 'ERROR', 'Database connection failed', 'payment-service', TIMESTAMP '2024-01-16 09:15:00', 2024, 1, 16), + (4, 'INFO', 'User login successful', 'auth-service', TIMESTAMP '2024-01-16 14:20:00', 2024, 1, 16), + (5, 'DEBUG', 'Cache miss for user data', 'user-service', TIMESTAMP '2024-01-17 11:45:00', 2024, 1, 17), + (6, 'ERROR', 'Payment processing failed', 'payment-service', TIMESTAMP '2024-02-01 13:30:00', 2024, 2, 1); + diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonExternalTable.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonExternalTable.java index 371ce3864dd1dc..cb229406b856a1 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonExternalTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonExternalTable.java @@ -171,7 +171,7 @@ public List getPartitionColumns(Optional snapshot) { return getPaimonSchemaCacheValue(snapshot).getPartitionColumns(); } - private boolean isPartitionInvalid(Optional snapshot) { + public boolean isPartitionInvalid(Optional snapshot) { PaimonSnapshotCacheValue paimonSnapshotCacheValue = getOrFetchSnapshotCacheValue(snapshot); return paimonSnapshotCacheValue.getPartitionInfo().isPartitionInvalid(); } @@ -194,6 +194,13 @@ public MTMVSnapshotIf getTableSnapshot(MTMVRefreshContext context, Optional getPartitionSnapshot( + Optional snapshot) { + + return getOrFetchSnapshotCacheValue(snapshot).getPartitionInfo() + .getNameToPartition(); + } + @Override public MTMVSnapshotIf getTableSnapshot(Optional snapshot) throws AnalysisException { PaimonSnapshotCacheValue paimonSnapshot = getOrFetchSnapshotCacheValue(snapshot); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/ShowPartitionsCommand.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/ShowPartitionsCommand.java index 558117ed209594..9700ba56993541 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/ShowPartitionsCommand.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/ShowPartitionsCommand.java @@ -25,8 +25,8 @@ import org.apache.doris.catalog.PartitionItem; import org.apache.doris.catalog.PartitionKey; import org.apache.doris.catalog.ScalarType; -import org.apache.doris.catalog.Table; import org.apache.doris.catalog.TableIf; +import org.apache.doris.catalog.TableIf.TableType; import org.apache.doris.cluster.ClusterNamespace; import org.apache.doris.common.AnalysisException; import org.apache.doris.common.Config; @@ -46,6 +46,9 @@ import org.apache.doris.datasource.iceberg.IcebergExternalTable; import org.apache.doris.datasource.maxcompute.MaxComputeExternalCatalog; import org.apache.doris.datasource.maxcompute.MaxComputeExternalTable; +import org.apache.doris.datasource.paimon.PaimonExternalCatalog; +import org.apache.doris.datasource.paimon.PaimonExternalDatabase; +import org.apache.doris.datasource.paimon.PaimonExternalTable; import org.apache.doris.mysql.privilege.PrivPredicate; import org.apache.doris.nereids.analyzer.UnboundSlot; import org.apache.doris.nereids.properties.OrderKey; @@ -74,13 +77,17 @@ import org.apache.commons.collections.CollectionUtils; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import org.apache.paimon.partition.Partition; import java.util.ArrayList; +import java.util.Collections; import java.util.Comparator; import java.util.HashMap; +import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Optional; +import java.util.stream.Collectors; /** * show partitions command @@ -211,7 +218,8 @@ protected void validate(ConnectContext ctx) throws AnalysisException { // disallow unsupported catalog if (!(catalog.isInternalCatalog() || catalog instanceof HMSExternalCatalog - || catalog instanceof MaxComputeExternalCatalog || catalog instanceof IcebergExternalCatalog)) { + || catalog instanceof MaxComputeExternalCatalog || catalog instanceof IcebergExternalCatalog + || catalog instanceof PaimonExternalCatalog)) { throw new AnalysisException(String.format("Catalog of type '%s' is not allowed in ShowPartitionsCommand", catalog.getType())); } @@ -261,9 +269,9 @@ protected void analyze() throws UserException { } DatabaseIf db = catalog.getDbOrAnalysisException(dbName); - TableIf table = db.getTableOrMetaException(tblName, Table.TableType.OLAP, - TableIf.TableType.HMS_EXTERNAL_TABLE, TableIf.TableType.MAX_COMPUTE_EXTERNAL_TABLE, - TableIf.TableType.ICEBERG_EXTERNAL_TABLE); + TableIf table = db.getTableOrMetaException(tblName, TableType.OLAP, + TableType.HMS_EXTERNAL_TABLE, TableType.MAX_COMPUTE_EXTERNAL_TABLE, + TableType.ICEBERG_EXTERNAL_TABLE, TableType.PAIMON_EXTERNAL_TABLE); if (table instanceof HMSExternalTable) { if (((HMSExternalTable) table).isView()) { @@ -289,6 +297,13 @@ protected void analyze() throws UserException { return; } + if (table instanceof PaimonExternalTable) { + if (((PaimonExternalTable) table).isPartitionInvalid(Optional.empty())) { + throw new AnalysisException("Table " + tblName + " is not a partitioned table"); + } + return; + } + table.readLock(); try { // build proc path @@ -360,6 +375,52 @@ private ShowResultSet handleShowIcebergTablePartitions() { return new ShowResultSet(getMetaData(), rows); } + private ShowResultSet handleShowPaimonTablePartitions() throws AnalysisException { + PaimonExternalCatalog paimonCatalog = (PaimonExternalCatalog) catalog; + String db = ClusterNamespace.getNameFromFullName(tableName.getDb()); + String tbl = tableName.getTbl(); + + PaimonExternalDatabase database = (PaimonExternalDatabase) paimonCatalog.getDb(db) + .orElseThrow(() -> new AnalysisException("Paimon database '" + db + "' does not exist")); + PaimonExternalTable paimonTable = database.getTable(tbl) + .orElseThrow(() -> new AnalysisException("Paimon table '" + db + "." + tbl + "' does not exist")); + + Map partitionSnapshot = paimonTable.getPartitionSnapshot(Optional.empty()); + if (partitionSnapshot == null) { + partitionSnapshot = Collections.emptyMap(); + } + + LinkedHashSet partitionColumnNames = paimonTable + .getPartitionColumns(Optional.empty()) + .stream() + .map(Column::getName) + .collect(Collectors.toCollection(LinkedHashSet::new)); + String partitionColumnsStr = String.join(",", partitionColumnNames); + + List> rows = partitionSnapshot + .entrySet() + .stream() + .map(entry -> { + List row = new ArrayList<>(5); + row.add(entry.getKey()); + row.add(partitionColumnsStr); + row.add(String.valueOf(entry.getValue().recordCount())); + row.add(String.valueOf(entry.getValue().fileSizeInBytes())); + row.add(String.valueOf(entry.getValue().fileCount())); + return row; + }).collect(Collectors.toList()); + // sort by partition name + if (orderByPairs != null && orderByPairs.get(0).isDesc()) { + rows.sort(Comparator.comparing(x -> x.get(0), Comparator.reverseOrder())); + } else { + rows.sort(Comparator.comparing(x -> x.get(0))); + } + + rows = applyLimit(limit, offset, rows); + + return new ShowResultSet(getMetaData(), rows); + } + private ShowResultSet handleShowHMSTablePartitions() throws AnalysisException { HMSExternalCatalog hmsCatalog = (HMSExternalCatalog) catalog; List> rows = new ArrayList<>(); @@ -427,6 +488,8 @@ protected ShowResultSet handleShowPartitions(ConnectContext ctx, StmtExecutor ex return handleShowMaxComputeTablePartitions(); } else if (catalog instanceof IcebergExternalCatalog) { return handleShowIcebergTablePartitions(); + } else if (catalog instanceof PaimonExternalCatalog) { + return handleShowPaimonTablePartitions(); } else { return handleShowHMSTablePartitions(); } @@ -450,6 +513,13 @@ public ShowResultSetMetaData getMetaData() { builder.addColumn(new Column("Partition", ScalarType.createVarchar(60))); builder.addColumn(new Column("Lower Bound", ScalarType.createVarchar(100))); builder.addColumn(new Column("Upper Bound", ScalarType.createVarchar(100))); + } else if (catalog instanceof PaimonExternalCatalog) { + builder.addColumn(new Column("Partition", ScalarType.createVarchar(300))) + .addColumn(new Column("PartitionKey", ScalarType.createVarchar(300))) + .addColumn(new Column("RecordCount", ScalarType.createVarchar(300))) + .addColumn(new Column("FileSizeInBytes", ScalarType.createVarchar(300))) + .addColumn(new Column("FileCount", ScalarType.createVarchar(300))); + } else { builder.addColumn(new Column("Partition", ScalarType.createVarchar(60))); } diff --git a/regression-test/data/external_table_p0/paimon/test_paimon_partition_table.out b/regression-test/data/external_table_p0/paimon/test_paimon_partition_table.out new file mode 100644 index 00000000000000..f5a5af51ad005d --- /dev/null +++ b/regression-test/data/external_table_p0/paimon/test_paimon_partition_table.out @@ -0,0 +1,30 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !show_partition_sales_by_date -- +sale_date=2024-01-15 sale_date 2 2051 1 +sale_date=2024-01-16 sale_date 2 3899 2 +sale_date=2024-01-17 sale_date 1 1959 1 + +-- !show_partition_sales_by_region -- +region=China-Beijing region 1 2240 1 +region=Japan-Tokyo region 1 2233 1 +region=USA-California region 1 2268 1 + +-- !show_partition_sales_by_date_region -- +sale_date=2024-01-15/region=China-Beijing sale_date,region 1 2426 1 +sale_date=2024-01-15/region=Japan-Tokyo sale_date,region 1 2412 1 +sale_date=2024-01-15/region=USA-California sale_date,region 1 2454 1 +sale_date=2024-01-16/region=China-Shanghai sale_date,region 1 2433 1 +sale_date=2024-01-16/region=Japan-Osaka sale_date,region 1 2433 1 +sale_date=2024-01-16/region=USA-New York sale_date,region 1 2440 1 + +-- !show_partition_events_by_hour -- +hour_partition=2024-01-15-10 hour_partition 2 2181 1 +hour_partition=2024-01-15-11 hour_partition 2 4170 2 +hour_partition=2024-01-15-14 hour_partition 2 2190 1 + +-- !show_partition_logs_by_date_hierarchy -- +year_val=2024/month_val=1/day_val=15 year_val,month_val,day_val 2 2628 1 +year_val=2024/month_val=1/day_val=16 year_val,month_val,day_val 2 4918 2 +year_val=2024/month_val=1/day_val=17 year_val,month_val,day_val 1 2456 1 +year_val=2024/month_val=2/day_val=1 year_val,month_val,day_val 1 2485 1 + diff --git a/regression-test/suites/external_table_p0/paimon/test_paimon_partition_table.groovy b/regression-test/suites/external_table_p0/paimon/test_paimon_partition_table.groovy new file mode 100644 index 00000000000000..f1073977b1a591 --- /dev/null +++ b/regression-test/suites/external_table_p0/paimon/test_paimon_partition_table.groovy @@ -0,0 +1,61 @@ +import com.google.common.collect.Lists + +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_paimon_partition_table", "p0,external,doris,external_docker,external_docker_doris") { + logger.info("start paimon test") + String enabled = context.config.otherConfigs.get("enablePaimonTest") + if (enabled == null || !enabled.equalsIgnoreCase("true")) { + logger.info("disable paimon test.") + return + } + String minio_port = context.config.otherConfigs.get("iceberg_minio_port") + String catalog_name = "test_paimon_partition_catalog" + String db_name = "test_paimon_partition" + List tableList = new ArrayList<>(Arrays.asList("sales_by_date","sales_by_region", + "sales_by_date_region","events_by_hour","logs_by_date_hierarchy")) + String externalEnvIp = context.config.otherConfigs.get("externalEnvIp") + try { + sql """drop catalog if exists ${catalog_name}""" + + sql """ + CREATE CATALOG ${catalog_name} PROPERTIES ( + 'type' = 'paimon', + 'warehouse' = 's3://warehouse/wh', + 's3.endpoint' = 'http://${externalEnvIp}:${minio_port}', + 's3.access_key' = 'admin', + 's3.secret_key' = 'password', + 's3.path.style.access' = 'true' + ); + """ + sql """switch `${catalog_name}`""" + + sql """use ${db_name}""" + sql """refresh database ${db_name}""" + tableList.eachWithIndex { String tableName, int index -> + logger.info("show partitions command ${index + 1}: ${tableName}") + String baseQueryName = "qt_show_partition_${tableName}" + "$baseQueryName" """show partitions from ${tableName};""" + } + + + } finally { + sql """drop catalog if exists ${catalog_name}""" + } +} +