From 3b76ae6368029b11d38c5f50010471084ffc86da Mon Sep 17 00:00:00 2001 From: daidai Date: Mon, 17 Mar 2025 10:13:24 +0800 Subject: [PATCH] [Enhancement](paimon)support native read paimon top level schema change table. (#48723) Problem Summary: Supports native reader reading tables after the top-level schema of paimon is changed, but does not support tables after the internal schema of struct is changed. change top-level schema(support): ```sql --spark sql ALTER TABLE table_name ADD COLUMNS (c1 INT,c2 STRING); ALTER TABLE table_name RENAME COLUMN c0 TO c1; ALTER TABLE table_name DROP COLUMNS (c1, c2); ALTER TABLE table_name ADD COLUMN c INT FIRST; ALTER TABLE table_name ADD COLUMN c INT AFTER b; ALTER TABLE table_name ALTER COLUMN col_a FIRST; ALTER TABLE table_name ALTER COLUMN col_a AFTER col_b; ``` change internal schema of struct schema(not support, will support in the next PR): ```sql --spark sql ALTER TABLE table_name ADD COLUMN v.value.f3 STRING; ALTER TABLE table_name RENAME COLUMN v.f1 to f100; ALTER TABLE table_name DROP COLUMN v.value.f3 ; ALTER TABLE table_name ALTER COLUMN v.col_a FIRST; ``` --- .../vec/exec/format/table/iceberg_reader.cpp | 4 +- be/src/vec/exec/format/table/iceberg_reader.h | 6 +- .../vec/exec/format/table/paimon_reader.cpp | 87 ++++++- be/src/vec/exec/format/table/paimon_reader.h | 57 +++++ be/src/vec/exec/scan/vfile_scanner.cpp | 24 +- be/src/vec/exec/scan/vfile_scanner.h | 2 +- .../paimon/paimon_schema_change_test.cpp | 136 ++++++++++ .../paimon/run02.sql | 159 ++++++++++++ .../paimon/PaimonExternalTable.java | 2 +- .../paimon/PaimonSchemaCacheValue.java | 12 +- .../paimon/source/PaimonScanNode.java | 19 +- .../datasource/paimon/source/PaimonSplit.java | 9 + gensrc/thrift/PlanNodes.thrift | 2 + .../paimon/test_paimon_schema_change.out | 232 ++++++++++++++++++ .../paimon/test_paimon_schema_change.groovy | 88 +++++++ 15 files changed, 817 insertions(+), 22 deletions(-) create mode 100644 be/test/vec/exec/format/paimon/paimon_schema_change_test.cpp create mode 100644 docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/paimon/run02.sql create mode 100644 regression-test/data/external_table_p0/paimon/test_paimon_schema_change.out create mode 100644 regression-test/suites/external_table_p0/paimon/test_paimon_schema_change.groovy diff --git a/be/src/vec/exec/format/table/iceberg_reader.cpp b/be/src/vec/exec/format/table/iceberg_reader.cpp index b7666bda68874a..d209eb7d271916 100644 --- a/be/src/vec/exec/format/table/iceberg_reader.cpp +++ b/be/src/vec/exec/format/table/iceberg_reader.cpp @@ -502,7 +502,7 @@ void IcebergTableReader::_gen_position_delete_file_range(Block& block, DeleteFil Status IcebergParquetReader::init_reader( const std::vector& file_col_names, - const std::unordered_map& col_id_name_map, + const std::unordered_map& col_id_name_map, const std::unordered_map* colname_to_value_range, const VExprContextSPtrs& conjuncts, const TupleDescriptor* tuple_descriptor, const RowDescriptor* row_descriptor, @@ -575,7 +575,7 @@ Status IcebergParquetReader ::_read_position_delete_file(const TFileRangeDesc* d Status IcebergOrcReader::init_reader( const std::vector& file_col_names, - const std::unordered_map& col_id_name_map, + const std::unordered_map& col_id_name_map, const std::unordered_map* colname_to_value_range, const VExprContextSPtrs& conjuncts, const TupleDescriptor* tuple_descriptor, const RowDescriptor* row_descriptor, diff --git a/be/src/vec/exec/format/table/iceberg_reader.h b/be/src/vec/exec/format/table/iceberg_reader.h index 48236021ce9432..8a2b253d16f9c5 100644 --- a/be/src/vec/exec/format/table/iceberg_reader.h +++ b/be/src/vec/exec/format/table/iceberg_reader.h @@ -148,7 +148,7 @@ class IcebergTableReader : public TableFormatReader { // copy from _colname_to_value_range with new column name that is in parquet/orc file, to support schema evolution. std::unordered_map _new_colname_to_value_range; // column id to name map. Collect from FE slot descriptor. - std::unordered_map _col_id_name_map; + std::unordered_map _col_id_name_map; // col names in the parquet,orc file std::vector _all_required_col_names; // col names in table but not in parquet,orc file @@ -194,7 +194,7 @@ class IcebergParquetReader final : public IcebergTableReader { kv_cache, io_ctx) {} Status init_reader( const std::vector& file_col_names, - const std::unordered_map& col_id_name_map, + const std::unordered_map& col_id_name_map, const std::unordered_map* colname_to_value_range, const VExprContextSPtrs& conjuncts, const TupleDescriptor* tuple_descriptor, const RowDescriptor* row_descriptor, @@ -240,7 +240,7 @@ class IcebergOrcReader final : public IcebergTableReader { Status init_reader( const std::vector& file_col_names, - const std::unordered_map& col_id_name_map, + const std::unordered_map& col_id_name_map, const std::unordered_map* colname_to_value_range, const VExprContextSPtrs& conjuncts, const TupleDescriptor* tuple_descriptor, const RowDescriptor* row_descriptor, diff --git a/be/src/vec/exec/format/table/paimon_reader.cpp b/be/src/vec/exec/format/table/paimon_reader.cpp index cd8b7c0060f6d8..e3fba810bbaf72 100644 --- a/be/src/vec/exec/format/table/paimon_reader.cpp +++ b/be/src/vec/exec/format/table/paimon_reader.cpp @@ -37,6 +37,68 @@ PaimonReader::PaimonReader(std::unique_ptr file_format_reader, ADD_CHILD_TIMER(_profile, "DeleteFileReadTime", paimon_profile); } +Status PaimonReader::gen_file_col_name( + const std::vector& read_table_col_names, + const std::unordered_map& table_col_id_table_name_map, + const std::unordered_map* + table_col_name_to_value_range) { + // It is a bit similar to iceberg. I will consider integrating it when I write hudi schema change later. + _table_col_to_file_col.clear(); + _file_col_to_table_col.clear(); + + if (!_params.__isset.paimon_schema_info) [[unlikely]] { + return Status::RuntimeError("miss paimon schema info."); + } + + if (!_params.paimon_schema_info.contains(_range.table_format_params.paimon_params.schema_id)) + [[unlikely]] { + return Status::InternalError("miss paimon schema info."); + } + + const auto& table_id_to_file_name = + _params.paimon_schema_info.at(_range.table_format_params.paimon_params.schema_id); + for (auto [table_col_id, file_col_name] : table_id_to_file_name) { + if (table_col_id_table_name_map.find(table_col_id) == table_col_id_table_name_map.end()) { + continue; + } + auto& table_col_name = table_col_id_table_name_map.at(table_col_id); + + _table_col_to_file_col.emplace(table_col_name, file_col_name); + _file_col_to_table_col.emplace(file_col_name, table_col_name); + if (table_col_name != file_col_name) { + _has_schema_change = true; + } + } + + _all_required_col_names.clear(); + _not_in_file_col_names.clear(); + for (auto name : read_table_col_names) { + auto iter = _table_col_to_file_col.find(name); + if (iter == _table_col_to_file_col.end()) { + auto name_low = to_lower(name); + _all_required_col_names.emplace_back(name_low); + + _table_col_to_file_col.emplace(name, name_low); + _file_col_to_table_col.emplace(name_low, name); + if (name != name_low) { + _has_schema_change = true; + } + } else { + _all_required_col_names.emplace_back(iter->second); + } + } + + for (auto& it : *table_col_name_to_value_range) { + auto iter = _table_col_to_file_col.find(it.first); + if (iter == _table_col_to_file_col.end()) { + _new_colname_to_value_range.emplace(it.first, it.second); + } else { + _new_colname_to_value_range.emplace(iter->second, it.second); + } + } + return Status::OK(); +} + Status PaimonReader::init_row_filters() { const auto& table_desc = _range.table_format_params.paimon_params; if (!table_desc.__isset.deletion_file) { @@ -106,6 +168,29 @@ Status PaimonReader::init_row_filters() { } Status PaimonReader::get_next_block_inner(Block* block, size_t* read_rows, bool* eof) { - return _file_format_reader->get_next_block(block, read_rows, eof); + if (_has_schema_change) { + for (int i = 0; i < block->columns(); i++) { + ColumnWithTypeAndName& col = block->get_by_position(i); + auto iter = _table_col_to_file_col.find(col.name); + if (iter != _table_col_to_file_col.end()) { + col.name = iter->second; + } + } + block->initialize_index_by_name(); + } + + RETURN_IF_ERROR(_file_format_reader->get_next_block(block, read_rows, eof)); + + if (_has_schema_change) { + for (int i = 0; i < block->columns(); i++) { + ColumnWithTypeAndName& col = block->get_by_position(i); + auto iter = _file_col_to_table_col.find(col.name); + if (iter != _file_col_to_table_col.end()) { + col.name = iter->second; + } + } + block->initialize_index_by_name(); + } + return Status::OK(); } } // namespace doris::vectorized diff --git a/be/src/vec/exec/format/table/paimon_reader.h b/be/src/vec/exec/format/table/paimon_reader.h index dcc7bf9d700ff6..11fffca943788f 100644 --- a/be/src/vec/exec/format/table/paimon_reader.h +++ b/be/src/vec/exec/format/table/paimon_reader.h @@ -30,12 +30,19 @@ class PaimonReader : public TableFormatReader { PaimonReader(std::unique_ptr file_format_reader, RuntimeProfile* profile, RuntimeState* state, const TFileScanRangeParams& params, const TFileRangeDesc& range, io::IOContext* io_ctx); + ~PaimonReader() override = default; Status init_row_filters() final; Status get_next_block_inner(Block* block, size_t* read_rows, bool* eof) final; + Status gen_file_col_name( + const std::vector& read_table_col_names, + const std::unordered_map& table_col_id_table_name_map, + const std::unordered_map* + table_col_name_to_value_range); + protected: struct PaimonProfile { RuntimeProfile::Counter* num_delete_rows; @@ -44,6 +51,16 @@ class PaimonReader : public TableFormatReader { std::vector _delete_rows; PaimonProfile _paimon_profile; + std::unordered_map _new_colname_to_value_range; + + std::unordered_map _file_col_to_table_col; + std::unordered_map _table_col_to_file_col; + + std::vector _all_required_col_names; + std::vector _not_in_file_col_names; + + bool _has_schema_change = false; + virtual void set_delete_rows() = 0; }; @@ -60,6 +77,25 @@ class PaimonOrcReader final : public PaimonReader { (reinterpret_cast(_file_format_reader.get())) ->set_position_delete_rowids(&_delete_rows); } + + Status init_reader( + const std::vector& read_table_col_names, + const std::unordered_map& table_col_id_table_name_map, + const std::unordered_map* + table_col_name_to_value_range, + const VExprContextSPtrs& conjuncts, const TupleDescriptor* tuple_descriptor, + const RowDescriptor* row_descriptor, + const VExprContextSPtrs* not_single_slot_filter_conjuncts, + const std::unordered_map* slot_id_to_filter_conjuncts) { + RETURN_IF_ERROR(gen_file_col_name(read_table_col_names, table_col_id_table_name_map, + table_col_name_to_value_range)); + auto* orc_reader = static_cast(_file_format_reader.get()); + orc_reader->set_table_col_to_file_col(_table_col_to_file_col); + return orc_reader->init_reader(&_all_required_col_names, &_new_colname_to_value_range, + conjuncts, false, tuple_descriptor, row_descriptor, + not_single_slot_filter_conjuncts, + slot_id_to_filter_conjuncts); + } }; class PaimonParquetReader final : public PaimonReader { @@ -75,5 +111,26 @@ class PaimonParquetReader final : public PaimonReader { (reinterpret_cast(_file_format_reader.get())) ->set_delete_rows(&_delete_rows); } + + Status init_reader( + const std::vector& read_table_col_names, + const std::unordered_map& table_col_id_table_name_map, + const std::unordered_map* + table_col_name_to_value_range, + const VExprContextSPtrs& conjuncts, const TupleDescriptor* tuple_descriptor, + const RowDescriptor* row_descriptor, + const std::unordered_map* colname_to_slot_id, + const VExprContextSPtrs* not_single_slot_filter_conjuncts, + const std::unordered_map* slot_id_to_filter_conjuncts) { + RETURN_IF_ERROR(gen_file_col_name(read_table_col_names, table_col_id_table_name_map, + table_col_name_to_value_range)); + auto* parquet_reader = static_cast(_file_format_reader.get()); + parquet_reader->set_table_to_file_col_map(_table_col_to_file_col); + + return parquet_reader->init_reader( + _all_required_col_names, _not_in_file_col_names, &_new_colname_to_value_range, + conjuncts, tuple_descriptor, row_descriptor, colname_to_slot_id, + not_single_slot_filter_conjuncts, slot_id_to_filter_conjuncts); + } }; } // namespace doris::vectorized diff --git a/be/src/vec/exec/scan/vfile_scanner.cpp b/be/src/vec/exec/scan/vfile_scanner.cpp index 985111cd04051d..1c17e788b005be 100644 --- a/be/src/vec/exec/scan/vfile_scanner.cpp +++ b/be/src/vec/exec/scan/vfile_scanner.cpp @@ -961,15 +961,14 @@ Status VFileScanner::_get_next_reader() { _cur_reader = std::move(iceberg_reader); } else if (range.__isset.table_format_params && range.table_format_params.table_format_type == "paimon") { - std::vector place_holder; - init_status = parquet_reader->init_reader( - _file_col_names, place_holder, _colname_to_value_range, - _push_down_conjuncts, _real_tuple_desc, _default_val_row_desc.get(), - _col_name_to_slot_id, &_not_single_slot_filter_conjuncts, - &_slot_id_to_filter_conjuncts); std::unique_ptr paimon_reader = PaimonParquetReader::create_unique(std::move(parquet_reader), _profile, _state, *_params, range, _io_ctx.get()); + init_status = paimon_reader->init_reader( + _file_col_names, _col_id_name_map, _colname_to_value_range, + _push_down_conjuncts, _real_tuple_desc, _default_val_row_desc.get(), + _col_name_to_slot_id, &_not_single_slot_filter_conjuncts, + &_slot_id_to_filter_conjuncts); RETURN_IF_ERROR(paimon_reader->init_row_filters()); _cur_reader = std::move(paimon_reader); } else { @@ -1027,12 +1026,13 @@ Status VFileScanner::_get_next_reader() { _cur_reader = std::move(iceberg_reader); } else if (range.__isset.table_format_params && range.table_format_params.table_format_type == "paimon") { - init_status = orc_reader->init_reader( - &_file_col_names, _colname_to_value_range, _push_down_conjuncts, false, - _real_tuple_desc, _default_val_row_desc.get(), - &_not_single_slot_filter_conjuncts, &_slot_id_to_filter_conjuncts); std::unique_ptr paimon_reader = PaimonOrcReader::create_unique( std::move(orc_reader), _profile, _state, *_params, range, _io_ctx.get()); + + init_status = paimon_reader->init_reader( + _file_col_names, _col_id_name_map, _colname_to_value_range, + _push_down_conjuncts, _real_tuple_desc, _default_val_row_desc.get(), + &_not_single_slot_filter_conjuncts, &_slot_id_to_filter_conjuncts); RETURN_IF_ERROR(paimon_reader->init_row_filters()); _cur_reader = std::move(paimon_reader); } else { @@ -1237,7 +1237,9 @@ Status VFileScanner::_init_expr_ctxes() { if (slot_info.is_file_slot) { _file_slot_descs.emplace_back(it->second); _file_col_names.push_back(it->second->col_name()); - if (it->second->col_unique_id() > 0) { + if (it->second->col_unique_id() >= 0) { + // Iceberg field unique ID starts from 1, Paimon/Hudi field unique ID starts from 0. + // For other data sources, all columns are set to -1. _col_id_name_map.emplace(it->second->col_unique_id(), it->second->col_name()); } } else { diff --git a/be/src/vec/exec/scan/vfile_scanner.h b/be/src/vec/exec/scan/vfile_scanner.h index ca7a03c68c022d..9bd1f3e2aa1533 100644 --- a/be/src/vec/exec/scan/vfile_scanner.h +++ b/be/src/vec/exec/scan/vfile_scanner.h @@ -107,7 +107,7 @@ class VFileScanner : public VScanner { // col names from _file_slot_descs std::vector _file_col_names; // column id to name map. Collect from FE slot descriptor. - std::unordered_map _col_id_name_map; + std::unordered_map _col_id_name_map; // Partition source slot descriptors std::vector _partition_slot_descs; diff --git a/be/test/vec/exec/format/paimon/paimon_schema_change_test.cpp b/be/test/vec/exec/format/paimon/paimon_schema_change_test.cpp new file mode 100644 index 00000000000000..8dafed48a97bf4 --- /dev/null +++ b/be/test/vec/exec/format/paimon/paimon_schema_change_test.cpp @@ -0,0 +1,136 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include + +#include +#include + +#include "io/file_factory.h" +#include "io/fs/file_reader.h" +#include "io/io_common.h" +#include "runtime/runtime_state.h" +#include "util/runtime_profile.h" +#include "vec/exec/format/table/paimon_reader.h" + +namespace doris::vectorized { + +class PaimonMockReader final : public PaimonReader { +public: + PaimonMockReader(std::unique_ptr file_format_reader, RuntimeProfile* profile, + RuntimeState* state, const TFileScanRangeParams& params, + const TFileRangeDesc& range, io::IOContext* io_ctx) + : PaimonReader(std::move(file_format_reader), profile, state, params, range, io_ctx) {}; + ~PaimonMockReader() final = default; + + void set_delete_rows() final { + (reinterpret_cast(_file_format_reader.get())) + ->set_position_delete_rowids(&_delete_rows); + } + + void check() { + ASSERT_TRUE(_has_schema_change == true); + ASSERT_TRUE(_new_colname_to_value_range.empty()); + std::unordered_map table_col_to_file_col_ans; + table_col_to_file_col_ans["b"] = "map_col"; + table_col_to_file_col_ans["e"] = "array_col"; + table_col_to_file_col_ans["d"] = "struct_col"; + table_col_to_file_col_ans["a"] = "vvv"; + table_col_to_file_col_ans["c"] = "k"; + table_col_to_file_col_ans["nonono"] = "nonono"; + for (auto [table_col, file_col] : table_col_to_file_col_ans) { + ASSERT_TRUE(_table_col_to_file_col[table_col] == file_col); + ASSERT_TRUE(_file_col_to_table_col[file_col] == table_col); + } + } +}; + +class PaimonReaderTest : public ::testing::Test { +protected: + void SetUp() override { + _profile = new RuntimeProfile("test_profile"); + _state = new RuntimeState(TQueryGlobals()); + _io_ctx = new io::IOContext(); + _schema_file_path = "./be/test/exec/test_data/paimon_scanner/schema-0"; + } + + void TearDown() override { + delete _profile; + delete _state; + delete _io_ctx; + } + + RuntimeProfile* _profile; + RuntimeState* _state; + io::IOContext* _io_ctx; + std::string _schema_file_path; +}; + +TEST_F(PaimonReaderTest, ReadSchemaFile) { + std::map file_id_to_name; + file_id_to_name[0] = "k"; + file_id_to_name[1] = "vvv"; + file_id_to_name[2] = "array_col"; + file_id_to_name[3] = "struct_col"; + file_id_to_name[6] = "map_col"; + + TFileScanRangeParams params; + params.file_type = TFileType::FILE_LOCAL; + params.properties = {}; + params.hdfs_params = {}; + params.__isset.paimon_schema_info = true; + params.paimon_schema_info[0] = file_id_to_name; + TFileRangeDesc range; + range.table_format_params.paimon_params.schema_id = 0; + + PaimonMockReader reader(nullptr, _profile, _state, params, range, _io_ctx); + + // create table tmp5 ( + // k int, + // vVV string, + // array_col array, + // struct_COL struct, + // map_COL map + // ) tblproperties ( + // 'primary-key' = 'k', + // "file.format" = "parquet" + // ); + + std::vector read_table_col_names; + read_table_col_names.emplace_back("a"); + read_table_col_names.emplace_back("b"); + read_table_col_names.emplace_back("c"); + read_table_col_names.emplace_back("d"); + read_table_col_names.emplace_back("e"); + read_table_col_names.emplace_back("nonono"); + + std::unordered_map table_col_id_table_name_map; + table_col_id_table_name_map[1] = "a"; + table_col_id_table_name_map[6] = "b"; + table_col_id_table_name_map[0] = "c"; + table_col_id_table_name_map[3] = "d"; + table_col_id_table_name_map[2] = "e"; + table_col_id_table_name_map[10] = "nonono"; + + std::unordered_map table_col_name_to_value_range; + Status status = reader.gen_file_col_name(read_table_col_names, table_col_id_table_name_map, + &table_col_name_to_value_range); + ASSERT_TRUE(status.ok()); + reader.check(); +} + +} // namespace doris::vectorized \ No newline at end of file diff --git a/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/paimon/run02.sql b/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/paimon/run02.sql new file mode 100644 index 00000000000000..1d199a2bec82bf --- /dev/null +++ b/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/paimon/run02.sql @@ -0,0 +1,159 @@ +use paimon; + +create database if not exists test_paimon_schema_change; + +use test_paimon_schema_change; + +CREATE TABLE sc_orc_pk ( + id INT, + name STRING, + age INT +) USING paimon +TBLPROPERTIES ('primary-key' = 'id', "file.format" = "orc",'deletion-vectors.enabled' = 'true'); + +INSERT INTO sc_orc_pk (id, name, age) VALUES (1, 'Alice', 30), (2, 'Bob', 25); +INSERT INTO sc_orc_pk (id, name, age) VALUES (3, 'Charlie', 28); +ALTER TABLE sc_orc_pk ADD COLUMNS (city STRING); +INSERT INTO sc_orc_pk (id, name, age, city) VALUES (4, 'Charlie', 28, 'New York'); +INSERT INTO sc_orc_pk (id, name, age, city) VALUES (5, 'David', 32, 'Los Angeles'); +ALTER TABLE sc_orc_pk RENAME COLUMN name TO full_name; +INSERT INTO sc_orc_pk (id, full_name, age, city) VALUES (6, 'David', 35, 'Los Angeles'); +INSERT INTO sc_orc_pk (id, full_name, age, city) VALUES (7, 'Eve', 27, 'San Francisco'); +ALTER TABLE sc_orc_pk DROP COLUMN age; +INSERT INTO sc_orc_pk (id, full_name, city) VALUES (8, 'Eve', 'San Francisco'); +INSERT INTO sc_orc_pk (id, full_name, city) VALUES (9, 'Frank', 'Chicago'); +ALTER TABLE sc_orc_pk CHANGE COLUMN id id BIGINT; +INSERT INTO sc_orc_pk (id, full_name, city) VALUES (10000000000, 'Frank', 'Chicago'); +INSERT INTO sc_orc_pk (id, full_name, city) VALUES (10, 'Grace', 'Seattle'); + +ALTER TABLE sc_orc_pk ADD COLUMN salary DECIMAL(10,2) FIRST; +INSERT INTO sc_orc_pk (id, full_name, city, salary) VALUES (11, 'Grace', 'Seattle', 5000.00); +INSERT INTO sc_orc_pk (id, full_name, city, salary) VALUES (12, 'Heidi', 'Boston', 6000.00); + +ALTER TABLE sc_orc_pk RENAME COLUMN city TO location; +INSERT INTO sc_orc_pk (id, full_name, location, salary) VALUES (13, 'Heidi', 'Boston', 6000.00); +INSERT INTO sc_orc_pk (id, full_name, location, salary) VALUES (14, 'Ivan', 'Miami', 7000.00); + +ALTER TABLE sc_orc_pk CHANGE COLUMN salary salary DECIMAL(12,2); +INSERT INTO sc_orc_pk (id, full_name, location, salary) VALUES (15, 'Ivan', 'Miami', 7000.00); +INSERT INTO sc_orc_pk (id, full_name, location, salary) VALUES (16, 'Judy', 'Denver', 8000.00); + +ALTER TABLE sc_orc_pk ALTER COLUMN salary AFTER location; +INSERT INTO sc_orc_pk (id, full_name, location, salary) VALUES (17, 'Stm', 'ttttt', 8000.00); +INSERT INTO sc_orc_pk (id, full_name, location, salary) VALUES (18, 'Ken', 'Austin', 9000.00); + +ALTER TABLE sc_orc_pk ALTER COLUMN full_name FIRST; +INSERT INTO sc_orc_pk (id, full_name, location, salary) VALUES (19, 'AAAA', 'BBBB', 9000.00); +INSERT INTO sc_orc_pk (id, full_name, location, salary) VALUES (20, 'Laura', 'Portland', 10000.00); + + + + + +CREATE TABLE sc_parquet_pk ( + id INT, + name STRING, + age INT +) USING paimon +TBLPROPERTIES ('primary-key' = 'id',"file.format" = "parquet",'deletion-vectors.enabled' = 'true'); + +INSERT INTO sc_parquet_pk (id, name, age) VALUES (1, 'Alice', 30), (2, 'Bob', 25); +INSERT INTO sc_parquet_pk (id, name, age) VALUES (3, 'Charlie', 28); + +ALTER TABLE sc_parquet_pk ADD COLUMNS (city STRING); +INSERT INTO sc_parquet_pk (id, name, age, city) VALUES (3, 'Charlie', 28, 'New York'); +INSERT INTO sc_parquet_pk (id, name, age, city) VALUES (4, 'David', 32, 'Los Angeles'); + +ALTER TABLE sc_parquet_pk RENAME COLUMN name TO full_name; +INSERT INTO sc_parquet_pk (id, full_name, age, city) VALUES (4, 'David', 35, 'Los Angeles'); +INSERT INTO sc_parquet_pk (id, full_name, age, city) VALUES (5, 'Eve', 27, 'San Francisco'); + +ALTER TABLE sc_parquet_pk DROP COLUMN age; +INSERT INTO sc_parquet_pk (id, full_name, city) VALUES (5, 'Eve', 'San Francisco'); +INSERT INTO sc_parquet_pk (id, full_name, city) VALUES (6, 'Frank', 'Chicago'); + +ALTER TABLE sc_parquet_pk CHANGE COLUMN id id BIGINT; +INSERT INTO sc_parquet_pk (id, full_name, city) VALUES (10000000000, 'Frank', 'Chicago'); +INSERT INTO sc_parquet_pk (id, full_name, city) VALUES (7, 'Grace', 'Seattle'); + +ALTER TABLE sc_parquet_pk ADD COLUMN salary DECIMAL(10,2) FIRST; +INSERT INTO sc_parquet_pk (id, full_name, city, salary) VALUES (6, 'Grace', 'Seattle', 5000.00); +INSERT INTO sc_parquet_pk (id, full_name, city, salary) VALUES (8, 'Heidi', 'Boston', 6000.00); + +ALTER TABLE sc_parquet_pk RENAME COLUMN city TO location; +INSERT INTO sc_parquet_pk (id, full_name, location, salary) VALUES (7, 'Heidi', 'Boston', 6000.00); +INSERT INTO sc_parquet_pk (id, full_name, location, salary) VALUES (9, 'Ivan', 'Miami', 7000.00); + +ALTER TABLE sc_parquet_pk CHANGE COLUMN salary salary DECIMAL(12,2); +INSERT INTO sc_parquet_pk (id, full_name, location, salary) VALUES (8, 'Ivan', 'Miami', 7000.00); +INSERT INTO sc_parquet_pk (id, full_name, location, salary) VALUES (10, 'Judy', 'Denver', 8000.00); + +ALTER TABLE sc_parquet_pk ALTER COLUMN salary AFTER location; +INSERT INTO sc_parquet_pk (id, full_name, location, salary) VALUES (9, 'Stm', 'ttttt', 8000.00); +INSERT INTO sc_parquet_pk (id, full_name, location, salary) VALUES (11, 'Ken', 'Austin', 9000.00); + +ALTER TABLE sc_parquet_pk ALTER COLUMN full_name FIRST; +INSERT INTO sc_parquet_pk (id, full_name, location, salary) VALUES (10, 'AAAA', 'BBBB', 9000.00); +INSERT INTO sc_parquet_pk (id, full_name, location, salary) VALUES (12, 'Laura', 'Portland', 10000.00); + + + + + + + + +create table sc_parquet ( + k int, + vVV string, + col1 array, + col2 struct, + col3 map +) tblproperties ( + "file.format" = "parquet" +); +INSERT INTO sc_parquet (k,vVV,col1,col2,col3) VALUES + (1, 'hello', array(1,2,3), named_struct('a', 10, 'b', 'world'), map('key1', 100, 'key2', 200)); + +ALTER TABLE sc_parquet RENAME COLUMN col1 TO new_col1; +ALTER TABLE sc_parquet RENAME COLUMN col2 TO new_col2; +ALTER TABLE sc_parquet RENAME COLUMN col3 TO new_col3; +ALTER TABLE sc_parquet RENAME COLUMN vVV to vv; +alter table sc_parquet ALTER COLUMN new_col2 AFTER new_col3; +alter table sc_parquet ALTER COLUMN new_col1 AFTER new_col2; + +INSERT INTO sc_parquet (k,vv,new_col1,new_col2,new_col3) VALUES + (2, 'test', array(4,5,6), named_struct('a', 20, 'b', 'spark'), map('key3', 300)), + (3, 'example', array(7,8,9), named_struct('a', 30, 'b', 'hive'), map('key4', 400, 'key5', 500)); + + + + + + + +create table sc_orc ( + k int, + vVV string, + col1 array, + col2 struct, + col3 map +) tblproperties ( + "file.format" = "orc" +); + +INSERT INTO sc_orc (k,vVV,col1,col2,col3) VALUES + (1, 'hello', array(1,2,3), named_struct('a', 10, 'b', 'world'), map('key1', 100, 'key2', 200)); + +ALTER TABLE sc_orc RENAME COLUMN col1 TO new_col1; +ALTER TABLE sc_orc RENAME COLUMN col2 TO new_col2; +ALTER TABLE sc_orc RENAME COLUMN col3 TO new_col3; +ALTER TABLE sc_orc RENAME COLUMN vVV to vv; +alter table sc_orc ALTER COLUMN new_col2 AFTER new_col3; +alter table sc_orc ALTER COLUMN new_col1 AFTER new_col2; + +INSERT INTO sc_orc (k,vv,new_col1,new_col2,new_col3) VALUES + (2, 'test', array(4,5,6), named_struct('a', 20, 'b', 'spark'), map('key3', 300)), + (3, 'example', array(7,8,9), named_struct('a', 30, 'b', 'hive'), map('key4', 400, 'key5', 500)); + + diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonExternalTable.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonExternalTable.java index 4878d2bdfb2c17..80710a5fb2ab7b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonExternalTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonExternalTable.java @@ -254,7 +254,7 @@ public Optional initSchema(SchemaCacheKey key) { partitionColumns.add(column); } } - return Optional.of(new PaimonSchemaCacheValue(dorisColumns, partitionColumns)); + return Optional.of(new PaimonSchemaCacheValue(dorisColumns, partitionColumns, tableSchema)); } catch (Exception e) { throw new CacheException("failed to initSchema for: %s.%s.%s.%s", null, getCatalog().getName(), key.getDbName(), key.getTblName(), diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonSchemaCacheValue.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonSchemaCacheValue.java index ccb530a3cbccc7..e931b52336ba8f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonSchemaCacheValue.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonSchemaCacheValue.java @@ -20,18 +20,28 @@ import org.apache.doris.catalog.Column; import org.apache.doris.datasource.SchemaCacheValue; +import org.apache.paimon.schema.TableSchema; + import java.util.List; public class PaimonSchemaCacheValue extends SchemaCacheValue { private List partitionColumns; - public PaimonSchemaCacheValue(List schema, List partitionColumns) { + private TableSchema tableSchema; + // Caching TableSchema can reduce the reading of schema files and json parsing. + + public PaimonSchemaCacheValue(List schema, List partitionColumns, TableSchema tableSchema) { super(schema); this.partitionColumns = partitionColumns; + this.tableSchema = tableSchema; } public List getPartitionColumns() { return partitionColumns; } + + public TableSchema getTableSchema() { + return tableSchema; + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonScanNode.java index 1b4edfa5ed9d10..25539c8247704c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonScanNode.java @@ -48,10 +48,12 @@ import org.apache.paimon.CoreOptions; import org.apache.paimon.data.BinaryRow; import org.apache.paimon.predicate.Predicate; +import org.apache.paimon.schema.TableSchema; import org.apache.paimon.table.source.DataSplit; import org.apache.paimon.table.source.DeletionFile; import org.apache.paimon.table.source.RawFile; import org.apache.paimon.table.source.ReadBuilder; +import org.apache.paimon.types.DataField; import org.apache.paimon.utils.InstantiationUtil; import java.io.IOException; @@ -130,6 +132,7 @@ protected void doInitialize() throws UserException { source = new PaimonSource(desc); serializedTable = encodeObjectToString(source.getPaimonTable()); Preconditions.checkNotNull(source); + params.setPaimonSchemaInfo(new HashMap<>()); } @VisibleForTesting @@ -167,6 +170,17 @@ protected Optional getSerializedTable() { return Optional.of(serializedTable); } + private Map getSchemaInfo(Long schemaId) { + PaimonExternalTable table = (PaimonExternalTable) source.getTargetTable(); + TableSchema tableSchema = table.getPaimonSchemaCacheValue(schemaId).getTableSchema(); + Map columnIdToName = new HashMap<>(tableSchema.fields().size()); + for (DataField dataField : tableSchema.fields()) { + columnIdToName.put((long) dataField.id(), dataField.name().toLowerCase()); + } + + return columnIdToName; + } + private void setPaimonParams(TFileRangeDesc rangeDesc, PaimonSplit paimonSplit) { TTableFormatFileDesc tableFormatFileDesc = new TTableFormatFileDesc(); tableFormatFileDesc.setTableFormatType(paimonSplit.getTableFormatType().value()); @@ -188,8 +202,9 @@ private void setPaimonParams(TFileRangeDesc rangeDesc, PaimonSplit paimonSplit) } else { throw new RuntimeException("Unsupported file format: " + fileFormat); } + fileDesc.setSchemaId(paimonSplit.getSchemaId()); + params.paimon_schema_info.computeIfAbsent(paimonSplit.getSchemaId(), this::getSchemaInfo); } - fileDesc.setFileFormat(fileFormat); fileDesc.setPaimonPredicate(encodeObjectToString(predicates)); fileDesc.setPaimonColumnNames(source.getDesc().getSlots().stream().map(slot -> slot.getColumn().getName()) @@ -250,7 +265,6 @@ public List getSplits(int numBackends) throws UserException { for (DataSplit dataSplit : dataSplits) { SplitStat splitStat = new SplitStat(); splitStat.setRowCount(dataSplit.rowCount()); - BinaryRow partitionValue = dataSplit.partition(); selectedPartitionValues.add(partitionValue); Optional> optRawFiles = dataSplit.convertToRawFiles(); @@ -283,6 +297,7 @@ public List getSplits(int numBackends) throws UserException { null, PaimonSplit.PaimonSplitCreator.DEFAULT); for (Split dorisSplit : dorisSplits) { + ((PaimonSplit) dorisSplit).setSchemaId(file.schemaId()); // try to set deletion file if (optDeletionFiles.isPresent() && optDeletionFiles.get().get(i) != null) { ((PaimonSplit) dorisSplit).setDeletionFile(optDeletionFiles.get().get(i)); diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonSplit.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonSplit.java index 224960e5c96778..822d4b28dcce19 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonSplit.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonSplit.java @@ -37,6 +37,7 @@ public class PaimonSplit extends FileSplit { private TableFormatType tableFormatType; private Optional optDeletionFile = Optional.empty(); private Optional optRowCount = Optional.empty(); + private Optional schemaId = Optional.empty(); public PaimonSplit(DataSplit split) { super(DUMMY_PATH, 0, 0, 0, 0, null, null); @@ -93,6 +94,14 @@ public void setRowCount(long rowCount) { this.optRowCount = Optional.of(rowCount); } + public void setSchemaId(long schemaId) { + this.schemaId = Optional.of(schemaId); + } + + public Long getSchemaId() { + return schemaId.orElse(null); + } + public static class PaimonSplitCreator implements SplitCreator { static final PaimonSplitCreator DEFAULT = new PaimonSplitCreator(); diff --git a/gensrc/thrift/PlanNodes.thrift b/gensrc/thrift/PlanNodes.thrift index a8f10a3f053fdf..c4a01bf2ec4e79 100644 --- a/gensrc/thrift/PlanNodes.thrift +++ b/gensrc/thrift/PlanNodes.thrift @@ -340,6 +340,7 @@ struct TPaimonFileDesc { 13: optional map hadoop_conf // deprecated 14: optional string paimon_table // deprecated 15: optional i64 row_count // deprecated + 16: optional i64 schema_id; // for schema change. } struct TTrinoConnectorFileDesc { @@ -466,6 +467,7 @@ struct TFileScanRangeParams { // 1. Reduce the access to HMS and HDFS on the JNI side. // 2. There will be no inconsistency between the fe and be tables. 24: optional string serialized_table + 25: optional map> paimon_schema_info //paimon map> : for schema change. } struct TFileRangeDesc { diff --git a/regression-test/data/external_table_p0/paimon/test_paimon_schema_change.out b/regression-test/data/external_table_p0/paimon/test_paimon_schema_change.out new file mode 100644 index 00000000000000..5d33ed9f7e6066 --- /dev/null +++ b/regression-test/data/external_table_p0/paimon/test_paimon_schema_change.out @@ -0,0 +1,232 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !desc_1 -- +full_name text Yes true \N +id bigint Yes true \N +location text Yes true \N +salary decimal(12,2) Yes true \N + +-- !parquet_pk_1 -- +Alice 1 \N \N +Bob 2 \N \N +Charlie 3 New York \N +David 4 Los Angeles \N +Eve 5 San Francisco \N +Grace 6 Seattle 5000.00 +Heidi 7 Boston 6000.00 +Ivan 8 Miami 7000.00 +Stm 9 ttttt 8000.00 +AAAA 10 BBBB 9000.00 +Ken 11 Austin 9000.00 +Laura 12 Portland 10000.00 +Frank 10000000000 Chicago \N + +-- !parquet_pk_2 -- +Alice \N +Bob \N +Charlie New York +David Los Angeles +Eve San Francisco +Grace Seattle +Heidi Boston +Ivan Miami +Stm ttttt +AAAA BBBB +Ken Austin +Laura Portland +Frank Chicago + +-- !parquet_pk_3 -- +Alice 1 \N \N +Bob 2 \N \N +Charlie 3 New York \N +David 4 Los Angeles \N +Eve 5 San Francisco \N +Frank 10000000000 Chicago \N + +-- !parquet_pk_4 -- +Grace 6 Seattle 5000.00 +Heidi 7 Boston 6000.00 +Ivan 8 Miami 7000.00 +Stm 9 ttttt 8000.00 +AAAA 10 BBBB 9000.00 +Ken 11 Austin 9000.00 +Laura 12 Portland 10000.00 + +-- !parquet_pk_5 -- +Charlie 3 New York \N +David 4 Los Angeles \N + +-- !parquet_pk_6 -- +Grace 6 Seattle 5000.00 +Heidi 7 Boston 6000.00 +Ivan 8 Miami 7000.00 +Stm 9 ttttt 8000.00 +AAAA 10 BBBB 9000.00 +Ken 11 Austin 9000.00 +Laura 12 Portland 10000.00 +Frank 10000000000 Chicago \N + +-- !parquet_pk_7 -- +Ivan 8 Miami 7000.00 +Stm 9 ttttt 8000.00 +AAAA 10 BBBB 9000.00 +Ken 11 Austin 9000.00 +Laura 12 Portland 10000.00 + +-- !desc_2 -- +full_name text Yes true \N +id bigint Yes true \N +location text Yes true \N +salary decimal(12,2) Yes true \N + +-- !orc_pk_1 -- +Alice 1 \N \N +Bob 2 \N \N +Charlie 3 \N \N +Charlie 4 New York \N +David 5 Los Angeles \N +David 6 Los Angeles \N +Eve 7 San Francisco \N +Eve 8 San Francisco \N +Frank 9 Chicago \N +Grace 10 Seattle \N +Grace 11 Seattle 5000.00 +Heidi 12 Boston 6000.00 +Heidi 13 Boston 6000.00 +Ivan 14 Miami 7000.00 +Ivan 15 Miami 7000.00 +Judy 16 Denver 8000.00 +Stm 17 ttttt 8000.00 +Ken 18 Austin 9000.00 +AAAA 19 BBBB 9000.00 +Laura 20 Portland 10000.00 +Frank 10000000000 Chicago \N + +-- !orc_pk_2 -- +Alice \N +Bob \N +Charlie \N +Charlie New York +David Los Angeles +David Los Angeles +Eve San Francisco +Eve San Francisco +Frank Chicago +Grace Seattle +Grace Seattle +Heidi Boston +Heidi Boston +Ivan Miami +Ivan Miami +Judy Denver +Stm ttttt +Ken Austin +AAAA BBBB +Laura Portland +Frank Chicago + +-- !orc_pk_3 -- +Alice 1 \N \N +Bob 2 \N \N +Charlie 3 \N \N +Charlie 4 New York \N +David 5 Los Angeles \N +David 6 Los Angeles \N +Eve 7 San Francisco \N +Eve 8 San Francisco \N +Frank 9 Chicago \N +Grace 10 Seattle \N +Frank 10000000000 Chicago \N + +-- !orc_pk_4 -- +Grace 11 Seattle 5000.00 +Heidi 12 Boston 6000.00 +Heidi 13 Boston 6000.00 +Ivan 14 Miami 7000.00 +Ivan 15 Miami 7000.00 +Judy 16 Denver 8000.00 +Stm 17 ttttt 8000.00 +Ken 18 Austin 9000.00 +AAAA 19 BBBB 9000.00 +Laura 20 Portland 10000.00 + +-- !orc_pk_5 -- +Charlie 4 New York \N +David 5 Los Angeles \N +David 6 Los Angeles \N + +-- !orc_pk_6 -- +David 6 Los Angeles \N +Eve 7 San Francisco \N +Eve 8 San Francisco \N +Frank 9 Chicago \N +Grace 10 Seattle \N +Grace 11 Seattle 5000.00 +Heidi 12 Boston 6000.00 +Heidi 13 Boston 6000.00 +Ivan 14 Miami 7000.00 +Ivan 15 Miami 7000.00 +Judy 16 Denver 8000.00 +Stm 17 ttttt 8000.00 +Ken 18 Austin 9000.00 +AAAA 19 BBBB 9000.00 +Laura 20 Portland 10000.00 +Frank 10000000000 Chicago \N + +-- !orc_pk_7 -- +Ivan 14 Miami 7000.00 +Ivan 15 Miami 7000.00 +Judy 16 Denver 8000.00 +Stm 17 ttttt 8000.00 +Ken 18 Austin 9000.00 +AAAA 19 BBBB 9000.00 +Laura 20 Portland 10000.00 + +-- !desc_3 -- +k int Yes true \N +vv text Yes true \N +new_col3 map Yes true \N +new_col2 struct Yes true \N +new_col1 array Yes true \N + +-- !parquet_1 -- +1 hello {"key1":100, "key2":200} {"a":10, "b":"world"} [1, 2, 3] +2 test {"key3":300} {"a":20, "b":"spark"} [4, 5, 6] +3 example {"key4":400, "key5":500} {"a":30, "b":"hive"} [7, 8, 9] + +-- !parquet_2 -- +3 example {"key4":400, "key5":500} {"a":30, "b":"hive"} [7, 8, 9] + +-- !parquet_3 -- +1 hello {"key1":100, "key2":200} {"a":10, "b":"world"} [1, 2, 3] + +-- !desc_4 -- +k int Yes true \N +vv text Yes true \N +new_col3 map Yes true \N +new_col2 struct Yes true \N +new_col1 array Yes true \N + +-- !orc_1 -- +1 hello {"key1":100, "key2":200} {"a":10, "b":"world"} [1, 2, 3] +2 test {"key3":300} {"a":20, "b":"spark"} [4, 5, 6] +3 example {"key4":400, "key5":500} {"a":30, "b":"hive"} [7, 8, 9] + +-- !orc_2 -- +3 example {"key4":400, "key5":500} {"a":30, "b":"hive"} [7, 8, 9] + +-- !orc_3 -- +1 hello {"key1":100, "key2":200} {"a":10, "b":"world"} [1, 2, 3] + +-- !count_1 -- +13 + +-- !count_2 -- +21 + +-- !count_3 -- +3 + +-- !count_4 -- +3 + diff --git a/regression-test/suites/external_table_p0/paimon/test_paimon_schema_change.groovy b/regression-test/suites/external_table_p0/paimon/test_paimon_schema_change.groovy new file mode 100644 index 00000000000000..2e9f9790a28583 --- /dev/null +++ b/regression-test/suites/external_table_p0/paimon/test_paimon_schema_change.groovy @@ -0,0 +1,88 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_paimon_schema_change", "p0,external,doris,external_docker,external_docker_doris") { + String enabled = context.config.otherConfigs.get("enablePaimonTest") + if (enabled != null && enabled.equalsIgnoreCase("true")) { + String minio_port = context.config.otherConfigs.get("iceberg_minio_port") + String catalog_name = "test_paimon_schema_change" + String externalEnvIp = context.config.otherConfigs.get("externalEnvIp") + + String table_name = "ts_scale_orc" + + sql """drop catalog if exists ${catalog_name}""" + + sql """ + CREATE CATALOG ${catalog_name} PROPERTIES ( + 'type' = 'paimon', + 'warehouse' = 's3://warehouse/wh', + 's3.endpoint' = 'http://${externalEnvIp}:${minio_port}', + 's3.access_key' = 'admin', + 's3.secret_key' = 'password', + 's3.path.style.access' = 'true' + ); + """ + sql """switch `${catalog_name}`""" + sql """show databases; """ + sql """use test_paimon_schema_change """ + + + qt_desc_1 """ desc sc_parquet_pk """ + + qt_parquet_pk_1 """SELECT * FROM sc_parquet_pk order by id;""" + qt_parquet_pk_2 """SELECT full_name, location FROM sc_parquet_pk order by id;""" + qt_parquet_pk_3 """SELECT * FROM sc_parquet_pk WHERE salary IS NULL order by id;""" + qt_parquet_pk_4 """SELECT * FROM sc_parquet_pk WHERE salary IS NOT NULL order by id;""" + qt_parquet_pk_5 """SELECT * FROM sc_parquet_pk WHERE location = 'New York' OR location = 'Los Angeles' order by id;""" + qt_parquet_pk_6 """SELECT * FROM sc_parquet_pk WHERE id > 5 order by id;""" + qt_parquet_pk_7 """SELECT * FROM sc_parquet_pk WHERE salary > 6000 order by id;""" + + + qt_desc_2 """ desc sc_orc_pk """ + qt_orc_pk_1 """SELECT * FROM sc_orc_pk order by id;""" + qt_orc_pk_2 """SELECT full_name, location FROM sc_orc_pk order by id;""" + qt_orc_pk_3 """SELECT * FROM sc_orc_pk WHERE salary IS NULL order by id;""" + qt_orc_pk_4 """SELECT * FROM sc_orc_pk WHERE salary IS NOT NULL order by id;""" + qt_orc_pk_5 """SELECT * FROM sc_orc_pk WHERE location = 'New York' OR location = 'Los Angeles' order by id;""" + qt_orc_pk_6 """SELECT * FROM sc_orc_pk WHERE id > 5 order by id;""" + qt_orc_pk_7 """SELECT * FROM sc_orc_pk WHERE salary > 6000 order by id;""" + + + + qt_desc_3 """ desc sc_parquet """ + + qt_parquet_1 """select * from sc_parquet order by k;""" + qt_parquet_2 """select * from sc_parquet where k >= 3;""" + qt_parquet_3 """select * from sc_parquet where k <= 1;""" + + + qt_desc_4 """ desc sc_orc """ + qt_orc_1 """select * from sc_orc order by k;""" + qt_orc_2 """select * from sc_orc where k >= 3;""" + qt_orc_3 """select * from sc_orc where k <= 1;""" + + + qt_count_1 """ select count(*) from sc_parquet_pk;""" + qt_count_2 """ select count(*) from sc_orc_pk;""" + qt_count_3 """ select count(*) from sc_parquet;""" + qt_count_4 """ select count(*) from sc_orc;""" + + + } +} + +