From 4ae8b5a9af1537cd50e07292658487ea45f30ffe Mon Sep 17 00:00:00 2001 From: Socrates Date: Tue, 6 Jan 2026 11:46:00 +0800 Subject: [PATCH 1/4] fix --- .../format/parquet/vparquet_column_reader.cpp | 12 ++- .../iceberg/run24.sql | 54 ++++++++++ ...est_iceberg_struct_schema_evolution.groovy | 100 ++++++++++++++++++ 3 files changed, 162 insertions(+), 4 deletions(-) create mode 100644 docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/iceberg/run24.sql create mode 100644 regression-test/suites/external_table_p0/iceberg/test_iceberg_struct_schema_evolution.groovy diff --git a/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp b/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp index 0917ca7cd06fb2..dac6b76aecef54 100644 --- a/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp +++ b/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp @@ -932,10 +932,14 @@ Status StructColumnReader::read_column_data( size_t field_rows = 0; bool field_eof = false; - // Use root_node to get the correct child node for the reference column - // reference_file_column_name is the file column name, use get_children_node_by_file_column_name - auto ref_child_node = - root_node->get_children_node_by_file_column_name(reference_file_column_name); + // Use ConstNode for the reference column instead of looking up from root_node. + // The reference column is only used to get RL/DL information for determining the number + // of elements in the struct. It may be a column that has been dropped from the table + // schema (e.g., 'removed' field), but still exists in older parquet files. + // Since we don't need schema mapping for this column (we just need its RL/DL levels), + // using ConstNode is safe and avoids the issue where the reference column doesn't exist + // in root_node (because it was dropped from table schema). + auto ref_child_node = TableSchemaChangeHelper::ConstNode::get_instance(); not_missing_orig_column_size = temp_column->size(); RETURN_IF_ERROR((*reference_reader) diff --git a/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/iceberg/run24.sql b/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/iceberg/run24.sql new file mode 100644 index 00000000000000..147c604d1cd491 --- /dev/null +++ b/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/iceberg/run24.sql @@ -0,0 +1,54 @@ +use demo.test_db; + +DROP TABLE IF EXISTS test_struct_evolution; + +-- Test case for struct schema evolution bug +-- Bug scenario: When querying a struct field after schema evolution, if all queried fields are missing +-- in old Parquet files, the code tries to find a reference column from file schema. However, if the +-- reference column (e.g., 'removed') was dropped from table schema, accessing it via root_node will fail. +-- +-- Steps to reproduce: +-- 1. Create table with struct containing: removed, rename, keep, drop_and_add +-- 2. Insert data (creates Parquet file with these fields) +-- 3. DROP a_struct.removed - removes field from table schema +-- 4. DROP a_struct.drop_and_add then ADD a_struct.drop_and_add - gets new field ID +-- 5. ADD a_struct.added - adds new field +-- 6. Query struct_element(a_struct, 'drop_and_add') or struct_element(a_struct, 'added') +-- -> This will fail because all queried fields are missing in old file, and the reference +-- column 'removed' doesn't exist in root_node (it was dropped from table schema) + +-- Step 1: Create table +CREATE TABLE test_struct_evolution ( + id BIGINT, + a_struct STRUCT +) USING ICEBERG +TBLPROPERTIES ('write.format.default' = 'parquet', 'format-version' = 2); + +-- Step 2: Insert data (creates Parquet file with original schema) +INSERT INTO test_struct_evolution +SELECT 1, named_struct('removed', 10, 'rename', 11, 'keep', 12, 'drop_and_add', 13); + +-- Step 3: Schema evolution - drop removed field +ALTER TABLE test_struct_evolution DROP COLUMN a_struct.removed; + +-- Step 4: Rename field (field ID stays the same) +ALTER TABLE test_struct_evolution RENAME COLUMN a_struct.rename TO renamed; + +-- Step 5: Drop and add drop_and_add (new field ID) +ALTER TABLE test_struct_evolution DROP COLUMN a_struct.drop_and_add; +ALTER TABLE test_struct_evolution ADD COLUMN a_struct.drop_and_add BIGINT; + +-- Step 6: Add new field +ALTER TABLE test_struct_evolution ADD COLUMN a_struct.added BIGINT; + +-- Step 7: Insert new data after schema evolution (creates new Parquet file) +INSERT INTO test_struct_evolution +SELECT 2, named_struct('renamed', 21, 'keep', 22, 'drop_and_add', 23, 'added', 24); + +-- Now the table contains two Parquet files: +-- - Old file: contains removed, rename, keep, drop_and_add (old field ID) +-- - New file: contains renamed, keep, drop_and_add (new field ID), added +-- +-- Querying struct_element(a_struct, 'drop_and_add') or struct_element(a_struct, 'added') +-- on the old file will trigger the bug + diff --git a/regression-test/suites/external_table_p0/iceberg/test_iceberg_struct_schema_evolution.groovy b/regression-test/suites/external_table_p0/iceberg/test_iceberg_struct_schema_evolution.groovy new file mode 100644 index 00000000000000..8b5418123d8cb8 --- /dev/null +++ b/regression-test/suites/external_table_p0/iceberg/test_iceberg_struct_schema_evolution.groovy @@ -0,0 +1,100 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// Test for struct field schema evolution in Iceberg tables. +// This test case verifies the fix for the bug where querying a struct field +// that was added after schema evolution fails when all queried columns are +// missing in the original file, and the reference column used for RL/DL +// was dropped from the table schema. +// +// Bug: "File column name 'removed' not found in struct children" +// Fix: Use ConstNode for reference column when reading RL/DL information +// +// Prerequisites: +// - Tables created by run24.sql in docker iceberg scripts + +suite("test_iceberg_struct_schema_evolution", "p0,external,doris,external_docker,external_docker_doris") { + + String enabled = context.config.otherConfigs.get("enableIcebergTest") + if (enabled == null || !enabled.equalsIgnoreCase("true")) { + logger.info("disable iceberg test.") + return + } + + String rest_port = context.config.otherConfigs.get("iceberg_rest_uri_port") + String minio_port = context.config.otherConfigs.get("iceberg_minio_port") + String externalEnvIp = context.config.otherConfigs.get("externalEnvIp") + String catalog_name = "test_iceberg_struct_schema_evolution" + + sql """drop catalog if exists ${catalog_name}""" + sql """ + CREATE CATALOG ${catalog_name} PROPERTIES ( + 'type'='iceberg', + 'iceberg.catalog.type'='rest', + 'uri' = 'http://${externalEnvIp}:${rest_port}', + "s3.access_key" = "admin", + "s3.secret_key" = "password", + "s3.endpoint" = "http://${externalEnvIp}:${minio_port}", + "s3.region" = "us-east-1" + );""" + + logger.info("catalog " + catalog_name + " created") + sql """switch ${catalog_name};""" + logger.info("switched to catalog " + catalog_name) + sql """use test_db;""" + + sql """set enable_fallback_to_original_planner=false;""" + + def table_name = "test_struct_evolution" + + // Verify table schema after evolution + qt_desc """DESC ${table_name}""" + + // Test 1: Query all columns - should work + qt_select_all """SELECT * FROM ${table_name} ORDER BY id""" + + // Test 2: Query struct field that exists in both old and new files + qt_struct_keep """SELECT struct_element(a_struct, 'keep') FROM ${table_name} ORDER BY id""" + qt_struct_renamed """SELECT struct_element(a_struct, 'renamed') FROM ${table_name} ORDER BY id""" + + // Test 3: Query struct field that was dropped and re-added (BUG FIX TEST) + // This query would crash before the fix with: + // "Not support read struct 'a_struct' which columns are all missing" + // or "File column name 'removed' not found in struct children" + qt_struct_drop_and_add """SELECT struct_element(a_struct, 'drop_and_add') FROM ${table_name} ORDER BY id""" + + // Test 4: Query struct field that was newly added (BUG FIX TEST) + qt_struct_added """SELECT struct_element(a_struct, 'added') FROM ${table_name} ORDER BY id""" + + // Test 5: Query entire struct column + qt_struct_full """SELECT a_struct FROM ${table_name} ORDER BY id""" + + // Test 6: Query with predicate on struct field + qt_struct_predicate_1 """SELECT id FROM ${table_name} WHERE struct_element(a_struct, 'renamed') = 11 ORDER BY id""" + qt_struct_predicate_2 """SELECT id FROM ${table_name} WHERE struct_element(a_struct, 'drop_and_add') IS NULL ORDER BY id""" + qt_struct_predicate_3 """SELECT id FROM ${table_name} WHERE struct_element(a_struct, 'added') IS NULL ORDER BY id""" + qt_struct_predicate_4 """SELECT id FROM ${table_name} WHERE struct_element(a_struct, 'added') IS NOT NULL ORDER BY id""" + + // Test 7: Multiple struct fields in one query + qt_struct_multi """SELECT struct_element(a_struct, 'renamed'), struct_element(a_struct, 'keep'), struct_element(a_struct, 'drop_and_add'), struct_element(a_struct, 'added') FROM ${table_name} ORDER BY id""" + + // Test 8: DISTINCT query on struct fields + qt_struct_distinct """SELECT DISTINCT struct_element(a_struct, 'renamed'), struct_element(a_struct, 'added'), struct_element(a_struct, 'keep') FROM ${table_name} ORDER BY 1, 2, 3""" + + // Clean up + sql """drop catalog if exists ${catalog_name}""" +} From 7c57aae8c17e7b1257c40e0b4f2a6daf1b092697 Mon Sep 17 00:00:00 2001 From: Socrates Date: Tue, 6 Jan 2026 11:56:22 +0800 Subject: [PATCH 2/4] flush out --- .../test_iceberg_struct_schema_evolution.out | 49 +++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 regression-test/data/external_table_p0/iceberg/test_iceberg_struct_schema_evolution.out diff --git a/regression-test/data/external_table_p0/iceberg/test_iceberg_struct_schema_evolution.out b/regression-test/data/external_table_p0/iceberg/test_iceberg_struct_schema_evolution.out new file mode 100644 index 00000000000000..6d378d679067af --- /dev/null +++ b/regression-test/data/external_table_p0/iceberg/test_iceberg_struct_schema_evolution.out @@ -0,0 +1,49 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !desc -- +id bigint Yes true \N +a_struct struct Yes true \N + +-- !select_all -- +1 {"renamed":11, "keep":12, "drop_and_add":null, "added":null} +2 {"renamed":21, "keep":22, "drop_and_add":23, "added":24} + +-- !struct_keep -- +12 +22 + +-- !struct_renamed -- +11 +21 + +-- !struct_drop_and_add -- +\N +23 + +-- !struct_added -- +\N +24 + +-- !struct_full -- +{"renamed":11, "keep":12, "drop_and_add":null, "added":null} +{"renamed":21, "keep":22, "drop_and_add":23, "added":24} + +-- !struct_predicate_1 -- +1 + +-- !struct_predicate_2 -- +1 + +-- !struct_predicate_3 -- +1 + +-- !struct_predicate_4 -- +2 + +-- !struct_multi -- +11 12 \N \N +21 22 23 24 + +-- !struct_distinct -- +11 \N 12 +21 24 22 + From 43c14fccc88ddf91c76c3401bf92fbfd8afa95d4 Mon Sep 17 00:00:00 2001 From: Socrates Date: Tue, 6 Jan 2026 12:12:43 +0800 Subject: [PATCH 3/4] add test for orc --- .../iceberg/run24.sql | 27 ++++++++++++++++ .../test_iceberg_struct_schema_evolution.out | 32 +++++++++++++++++++ ...est_iceberg_struct_schema_evolution.groovy | 27 ++++++++++++++++ 3 files changed, 86 insertions(+) diff --git a/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/iceberg/run24.sql b/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/iceberg/run24.sql index 147c604d1cd491..d504036014350e 100644 --- a/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/iceberg/run24.sql +++ b/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/iceberg/run24.sql @@ -52,3 +52,30 @@ SELECT 2, named_struct('renamed', 21, 'keep', 22, 'drop_and_add', 23, 'added', 2 -- Querying struct_element(a_struct, 'drop_and_add') or struct_element(a_struct, 'added') -- on the old file will trigger the bug +-- ============================================================ +-- ORC format test table (for completeness, though ORC doesn't have the same bug) +-- ============================================================ +DROP TABLE IF EXISTS test_struct_evolution_orc; + +-- Create ORC format table with same schema evolution scenario +CREATE TABLE test_struct_evolution_orc ( + id BIGINT, + a_struct STRUCT +) USING ICEBERG +TBLPROPERTIES ('write.format.default' = 'orc', 'format-version' = 2); + +-- Insert initial data (creates ORC file with original schema) +INSERT INTO test_struct_evolution_orc +SELECT 1, named_struct('removed', 10, 'rename', 11, 'keep', 12, 'drop_and_add', 13); + +-- Schema evolution - same operations as Parquet table +ALTER TABLE test_struct_evolution_orc DROP COLUMN a_struct.removed; +ALTER TABLE test_struct_evolution_orc RENAME COLUMN a_struct.rename TO renamed; +ALTER TABLE test_struct_evolution_orc DROP COLUMN a_struct.drop_and_add; +ALTER TABLE test_struct_evolution_orc ADD COLUMN a_struct.drop_and_add BIGINT; +ALTER TABLE test_struct_evolution_orc ADD COLUMN a_struct.added BIGINT; + +-- Insert new data after schema evolution (creates new ORC file) +INSERT INTO test_struct_evolution_orc +SELECT 2, named_struct('renamed', 21, 'keep', 22, 'drop_and_add', 23, 'added', 24); + diff --git a/regression-test/data/external_table_p0/iceberg/test_iceberg_struct_schema_evolution.out b/regression-test/data/external_table_p0/iceberg/test_iceberg_struct_schema_evolution.out index 6d378d679067af..877713408a1db0 100644 --- a/regression-test/data/external_table_p0/iceberg/test_iceberg_struct_schema_evolution.out +++ b/regression-test/data/external_table_p0/iceberg/test_iceberg_struct_schema_evolution.out @@ -47,3 +47,35 @@ a_struct struct Yes 11 \N 12 21 24 22 +-- !orc_desc -- +id bigint Yes true \N +a_struct struct Yes true \N + +-- !orc_select_all -- +1 {"renamed":11, "keep":12, "drop_and_add":null, "added":null} +2 {"renamed":21, "keep":22, "drop_and_add":23, "added":24} + +-- !orc_struct_keep -- +12 +22 + +-- !orc_struct_renamed -- +11 +21 + +-- !orc_struct_drop_and_add -- +\N +23 + +-- !orc_struct_added -- +\N +24 + +-- !orc_struct_full -- +{"renamed":11, "keep":12, "drop_and_add":null, "added":null} +{"renamed":21, "keep":22, "drop_and_add":23, "added":24} + +-- !orc_struct_multi -- +11 12 \N \N +21 22 23 24 + diff --git a/regression-test/suites/external_table_p0/iceberg/test_iceberg_struct_schema_evolution.groovy b/regression-test/suites/external_table_p0/iceberg/test_iceberg_struct_schema_evolution.groovy index 8b5418123d8cb8..d5aa211b962b60 100644 --- a/regression-test/suites/external_table_p0/iceberg/test_iceberg_struct_schema_evolution.groovy +++ b/regression-test/suites/external_table_p0/iceberg/test_iceberg_struct_schema_evolution.groovy @@ -95,6 +95,33 @@ suite("test_iceberg_struct_schema_evolution", "p0,external,doris,external_docker // Test 8: DISTINCT query on struct fields qt_struct_distinct """SELECT DISTINCT struct_element(a_struct, 'renamed'), struct_element(a_struct, 'added'), struct_element(a_struct, 'keep') FROM ${table_name} ORDER BY 1, 2, 3""" + // ============================================================ + // Test with ORC format (for completeness) + // ============================================================ + def orc_table_name = "test_struct_evolution_orc" + + // Verify ORC table schema after evolution + qt_orc_desc """DESC ${orc_table_name}""" + + // Test 1: Query all columns - should work + qt_orc_select_all """SELECT * FROM ${orc_table_name} ORDER BY id""" + + // Test 2: Query struct field that exists in both old and new files + qt_orc_struct_keep """SELECT struct_element(a_struct, 'keep') FROM ${orc_table_name} ORDER BY id""" + qt_orc_struct_renamed """SELECT struct_element(a_struct, 'renamed') FROM ${orc_table_name} ORDER BY id""" + + // Test 3: Query struct field that was dropped and re-added + qt_orc_struct_drop_and_add """SELECT struct_element(a_struct, 'drop_and_add') FROM ${orc_table_name} ORDER BY id""" + + // Test 4: Query struct field that was newly added + qt_orc_struct_added """SELECT struct_element(a_struct, 'added') FROM ${orc_table_name} ORDER BY id""" + + // Test 5: Query entire struct column + qt_orc_struct_full """SELECT a_struct FROM ${orc_table_name} ORDER BY id""" + + // Test 6: Multiple struct fields in one query + qt_orc_struct_multi """SELECT struct_element(a_struct, 'renamed'), struct_element(a_struct, 'keep'), struct_element(a_struct, 'drop_and_add'), struct_element(a_struct, 'added') FROM ${orc_table_name} ORDER BY id""" + // Clean up sql """drop catalog if exists ${catalog_name}""" } From bd31370edb4e0d78719d98000f2613682e7a021f Mon Sep 17 00:00:00 2001 From: Socrates Date: Wed, 7 Jan 2026 16:43:53 +0800 Subject: [PATCH 4/4] Add case sensitivity tests for struct schema evolution in Iceberg This commit introduces tests for handling mixed case field names in struct schema evolution. It includes the creation of test tables with mixed case fields, schema evolution operations, and corresponding data insertions for both Parquet and ORC formats. The tests verify that case sensitivity is correctly managed during schema evolution and querying operations. --- .../iceberg/run24.sql | 70 ++++++++++++++++ .../test_iceberg_struct_schema_evolution.out | 80 +++++++++++++++++++ ...est_iceberg_struct_schema_evolution.groovy | 67 ++++++++++++++++ 3 files changed, 217 insertions(+) diff --git a/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/iceberg/run24.sql b/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/iceberg/run24.sql index d504036014350e..b5b19b1f15c717 100644 --- a/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/iceberg/run24.sql +++ b/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/iceberg/run24.sql @@ -79,3 +79,73 @@ ALTER TABLE test_struct_evolution_orc ADD COLUMN a_struct.added BIGINT; INSERT INTO test_struct_evolution_orc SELECT 2, named_struct('renamed', 21, 'keep', 22, 'drop_and_add', 23, 'added', 24); +-- ============================================================ +-- Case sensitivity test table (mixed case field names) +-- ============================================================ +DROP TABLE IF EXISTS test_struct_evolution_case; + +-- Test case for struct schema evolution with mixed case field names +-- This tests that case sensitivity is handled correctly when: +-- - Field names have mixed case (e.g., REMOVED, rename, keep, drop_and_add) +-- - Schema evolution operations are performed +-- - Querying struct fields with different case patterns + +-- Step 1: Create table with mixed case field names +CREATE TABLE test_struct_evolution_case ( + id BIGINT, + a_struct STRUCT +) USING ICEBERG +TBLPROPERTIES ('write.format.default' = 'parquet', 'format-version' = 2); + +-- Step 2: Insert data (creates Parquet file with original schema) +INSERT INTO test_struct_evolution_case +SELECT 1, named_struct('REMOVED', 10, 'rename', 11, 'keep', 12, 'drop_and_add', 13); + +-- Step 3: Schema evolution - drop REMOVED field (uppercase) +ALTER TABLE test_struct_evolution_case DROP COLUMN a_struct.REMOVED; + +-- Step 4: Rename field (field ID stays the same) +ALTER TABLE test_struct_evolution_case RENAME COLUMN a_struct.rename TO renamed; + +-- Step 5: Drop and add drop_and_add with case change (new field ID) +-- Initial: drop_and_add (lowercase), after re-add: DROP_AND_ADD (uppercase) +ALTER TABLE test_struct_evolution_case DROP COLUMN a_struct.drop_and_add; +ALTER TABLE test_struct_evolution_case ADD COLUMN a_struct.DROP_AND_ADD BIGINT; + +-- Step 6: Add new field +ALTER TABLE test_struct_evolution_case ADD COLUMN a_struct.added BIGINT; + +-- Step 7: Insert new data after schema evolution (creates new Parquet file) +-- Note: Use DROP_AND_ADD (uppercase) in the new data +INSERT INTO test_struct_evolution_case +SELECT 2, named_struct('renamed', 21, 'keep', 22, 'DROP_AND_ADD', 23, 'added', 24); + +-- ============================================================ +-- ORC format test table with mixed case (for completeness) +-- ============================================================ +DROP TABLE IF EXISTS test_struct_evolution_case_orc; + +-- Create ORC format table with same schema evolution scenario and mixed case +CREATE TABLE test_struct_evolution_case_orc ( + id BIGINT, + a_struct STRUCT +) USING ICEBERG +TBLPROPERTIES ('write.format.default' = 'orc', 'format-version' = 2); + +-- Insert initial data (creates ORC file with original schema) +INSERT INTO test_struct_evolution_case_orc +SELECT 1, named_struct('REMOVED', 10, 'rename', 11, 'keep', 12, 'drop_and_add', 13); + +-- Schema evolution - same operations as Parquet table +ALTER TABLE test_struct_evolution_case_orc DROP COLUMN a_struct.REMOVED; +ALTER TABLE test_struct_evolution_case_orc RENAME COLUMN a_struct.rename TO renamed; +-- Drop and add with case change: drop_and_add (lowercase) -> DROP_AND_ADD (uppercase) +ALTER TABLE test_struct_evolution_case_orc DROP COLUMN a_struct.drop_and_add; +ALTER TABLE test_struct_evolution_case_orc ADD COLUMN a_struct.DROP_AND_ADD BIGINT; +ALTER TABLE test_struct_evolution_case_orc ADD COLUMN a_struct.added BIGINT; + +-- Insert new data after schema evolution (creates new ORC file) +-- Note: Use DROP_AND_ADD (uppercase) in the new data +INSERT INTO test_struct_evolution_case_orc +SELECT 2, named_struct('renamed', 21, 'keep', 22, 'DROP_AND_ADD', 23, 'added', 24); + diff --git a/regression-test/data/external_table_p0/iceberg/test_iceberg_struct_schema_evolution.out b/regression-test/data/external_table_p0/iceberg/test_iceberg_struct_schema_evolution.out index 877713408a1db0..a364316df427a9 100644 --- a/regression-test/data/external_table_p0/iceberg/test_iceberg_struct_schema_evolution.out +++ b/regression-test/data/external_table_p0/iceberg/test_iceberg_struct_schema_evolution.out @@ -79,3 +79,83 @@ a_struct struct Yes 11 12 \N \N 21 22 23 24 +-- !case_desc -- +id bigint Yes true \N +a_struct struct Yes true \N + +-- !case_select_all -- +1 {"renamed":11, "keep":12, "drop_and_add":null, "added":null} +2 {"renamed":21, "keep":22, "drop_and_add":23, "added":24} + +-- !case_struct_keep -- +12 +22 + +-- !case_struct_renamed -- +11 +21 + +-- !case_struct_drop_and_add -- +\N +23 + +-- !case_struct_added -- +\N +24 + +-- !case_struct_full -- +{"renamed":11, "keep":12, "drop_and_add":null, "added":null} +{"renamed":21, "keep":22, "drop_and_add":23, "added":24} + +-- !case_struct_predicate_1 -- +1 + +-- !case_struct_predicate_2 -- +1 + +-- !case_struct_predicate_3 -- +1 + +-- !case_struct_predicate_4 -- +2 + +-- !case_struct_multi -- +11 12 \N \N +21 22 23 24 + +-- !case_struct_distinct -- +11 \N 12 +21 24 22 + +-- !case_orc_desc -- +id bigint Yes true \N +a_struct struct Yes true \N + +-- !case_orc_select_all -- +1 {"renamed":11, "keep":12, "drop_and_add":null, "added":null} +2 {"renamed":21, "keep":22, "drop_and_add":23, "added":24} + +-- !case_orc_struct_keep -- +12 +22 + +-- !case_orc_struct_renamed -- +11 +21 + +-- !case_orc_struct_drop_and_add -- +\N +23 + +-- !case_orc_struct_added -- +\N +24 + +-- !case_orc_struct_full -- +{"renamed":11, "keep":12, "drop_and_add":null, "added":null} +{"renamed":21, "keep":22, "drop_and_add":23, "added":24} + +-- !case_orc_struct_multi -- +11 12 \N \N +21 22 23 24 + diff --git a/regression-test/suites/external_table_p0/iceberg/test_iceberg_struct_schema_evolution.groovy b/regression-test/suites/external_table_p0/iceberg/test_iceberg_struct_schema_evolution.groovy index d5aa211b962b60..f4e95fa4fbd811 100644 --- a/regression-test/suites/external_table_p0/iceberg/test_iceberg_struct_schema_evolution.groovy +++ b/regression-test/suites/external_table_p0/iceberg/test_iceberg_struct_schema_evolution.groovy @@ -122,6 +122,73 @@ suite("test_iceberg_struct_schema_evolution", "p0,external,doris,external_docker // Test 6: Multiple struct fields in one query qt_orc_struct_multi """SELECT struct_element(a_struct, 'renamed'), struct_element(a_struct, 'keep'), struct_element(a_struct, 'drop_and_add'), struct_element(a_struct, 'added') FROM ${orc_table_name} ORDER BY id""" + // ============================================================ + // Test with mixed case field names (case sensitivity test) + // ============================================================ + def case_table_name = "test_struct_evolution_case" + + // Verify case-sensitive table schema after evolution + qt_case_desc """DESC ${case_table_name}""" + + // Test 1: Query all columns - should work + qt_case_select_all """SELECT * FROM ${case_table_name} ORDER BY id""" + + // Test 2: Query struct field that exists in both old and new files + qt_case_struct_keep """SELECT struct_element(a_struct, 'keep') FROM ${case_table_name} ORDER BY id""" + qt_case_struct_renamed """SELECT struct_element(a_struct, 'renamed') FROM ${case_table_name} ORDER BY id""" + + // Test 3: Query struct field that was dropped and re-added with case change + // Note: Even though we use DROP_AND_ADD (uppercase) in SQL, the system normalizes + // field names to lowercase, so we query with 'drop_and_add' (lowercase) + qt_case_struct_drop_and_add """SELECT struct_element(a_struct, 'drop_and_add') FROM ${case_table_name} ORDER BY id""" + + // Test 4: Query struct field that was newly added + qt_case_struct_added """SELECT struct_element(a_struct, 'added') FROM ${case_table_name} ORDER BY id""" + + // Test 5: Query entire struct column + qt_case_struct_full """SELECT a_struct FROM ${case_table_name} ORDER BY id""" + + // Test 6: Query with predicate on struct field + qt_case_struct_predicate_1 """SELECT id FROM ${case_table_name} WHERE struct_element(a_struct, 'renamed') = 11 ORDER BY id""" + qt_case_struct_predicate_2 """SELECT id FROM ${case_table_name} WHERE struct_element(a_struct, 'drop_and_add') IS NULL ORDER BY id""" + qt_case_struct_predicate_3 """SELECT id FROM ${case_table_name} WHERE struct_element(a_struct, 'added') IS NULL ORDER BY id""" + qt_case_struct_predicate_4 """SELECT id FROM ${case_table_name} WHERE struct_element(a_struct, 'added') IS NOT NULL ORDER BY id""" + + // Test 7: Multiple struct fields in one query + qt_case_struct_multi """SELECT struct_element(a_struct, 'renamed'), struct_element(a_struct, 'keep'), struct_element(a_struct, 'drop_and_add'), struct_element(a_struct, 'added') FROM ${case_table_name} ORDER BY id""" + + // Test 8: DISTINCT query on struct fields + qt_case_struct_distinct """SELECT DISTINCT struct_element(a_struct, 'renamed'), struct_element(a_struct, 'added'), struct_element(a_struct, 'keep') FROM ${case_table_name} ORDER BY 1, 2, 3""" + + // ============================================================ + // Test with ORC format and mixed case field names + // ============================================================ + def case_orc_table_name = "test_struct_evolution_case_orc" + + // Verify ORC case-sensitive table schema after evolution + qt_case_orc_desc """DESC ${case_orc_table_name}""" + + // Test 1: Query all columns - should work + qt_case_orc_select_all """SELECT * FROM ${case_orc_table_name} ORDER BY id""" + + // Test 2: Query struct field that exists in both old and new files + qt_case_orc_struct_keep """SELECT struct_element(a_struct, 'keep') FROM ${case_orc_table_name} ORDER BY id""" + qt_case_orc_struct_renamed """SELECT struct_element(a_struct, 'renamed') FROM ${case_orc_table_name} ORDER BY id""" + + // Test 3: Query struct field that was dropped and re-added with case change + // Note: Even though we use DROP_AND_ADD (uppercase) in SQL, the system normalizes + // field names to lowercase, so we query with 'drop_and_add' (lowercase) + qt_case_orc_struct_drop_and_add """SELECT struct_element(a_struct, 'drop_and_add') FROM ${case_orc_table_name} ORDER BY id""" + + // Test 4: Query struct field that was newly added + qt_case_orc_struct_added """SELECT struct_element(a_struct, 'added') FROM ${case_orc_table_name} ORDER BY id""" + + // Test 5: Query entire struct column + qt_case_orc_struct_full """SELECT a_struct FROM ${case_orc_table_name} ORDER BY id""" + + // Test 6: Multiple struct fields in one query + qt_case_orc_struct_multi """SELECT struct_element(a_struct, 'renamed'), struct_element(a_struct, 'keep'), struct_element(a_struct, 'drop_and_add'), struct_element(a_struct, 'added') FROM ${case_orc_table_name} ORDER BY id""" + // Clean up sql """drop catalog if exists ${catalog_name}""" }