diff --git a/be/src/vec/data_types/data_type_struct.cpp b/be/src/vec/data_types/data_type_struct.cpp index 2049a47a891e70..1b9c516d1faa5f 100644 --- a/be/src/vec/data_types/data_type_struct.cpp +++ b/be/src/vec/data_types/data_type_struct.cpp @@ -355,14 +355,7 @@ std::optional DataTypeStruct::try_get_position_by_name(const String& nam } String DataTypeStruct::get_name_by_position(size_t i) const { - if (i == 0 || i > names.size()) { - fmt::memory_buffer error_msg; - fmt::format_to(error_msg, "Index of tuple element ({}) if out range ([1, {}])", i, - names.size()); - LOG(FATAL) << fmt::to_string(error_msg); - } - - return names[i - 1]; + return names[i]; } int64_t DataTypeStruct::get_uncompressed_serialized_bytes(const IColumn& column, diff --git a/be/src/vec/exec/format/orc/vorc_reader.cpp b/be/src/vec/exec/format/orc/vorc_reader.cpp index 46512170a45b8b..71e334e4b36db3 100644 --- a/be/src/vec/exec/format/orc/vorc_reader.cpp +++ b/be/src/vec/exec/format/orc/vorc_reader.cpp @@ -1274,16 +1274,15 @@ Status OrcReader::_fill_doris_array_offsets(const std::string& col_name, } template -Status OrcReader::_orc_column_to_doris_column(const std::string& col_name, - const ColumnPtr& doris_column, +Status OrcReader::_orc_column_to_doris_column(const std::string& col_name, ColumnPtr& doris_column, const DataTypePtr& data_type, const orc::Type* orc_column_type, orc::ColumnVectorBatch* cvb, size_t num_values) { MutableColumnPtr data_column; if (doris_column->is_nullable()) { SCOPED_RAW_TIMER(&_statistics.decode_null_map_time); - auto* nullable_column = reinterpret_cast( - (*std::move(doris_column)).mutate().get()); + auto* nullable_column = + reinterpret_cast(doris_column->assume_mutable().get()); data_column = nullable_column->get_nested_column_ptr(); NullMap& map_data_column = nullable_column->get_null_map_data(); auto origin_size = map_data_column.size(); @@ -1294,9 +1293,7 @@ Status OrcReader::_orc_column_to_doris_column(const std::string& col_name, map_data_column[origin_size + i] = !cvb_nulls[i]; } } else { - for (int i = 0; i < num_values; ++i) { - map_data_column[origin_size + i] = false; - } + memset(map_data_column.data() + origin_size, 0, num_values); } } else { if (cvb->hasNulls) { @@ -1306,6 +1303,16 @@ Status OrcReader::_orc_column_to_doris_column(const std::string& col_name, data_column = doris_column->assume_mutable(); } + return _fill_doris_data_column(col_name, data_column, data_type, orc_column_type, + cvb, num_values); +} + +template +Status OrcReader::_fill_doris_data_column(const std::string& col_name, + MutableColumnPtr& data_column, + const DataTypePtr& data_type, + const orc::Type* orc_column_type, + orc::ColumnVectorBatch* cvb, size_t num_values) { TypeIndex logical_type = remove_nullable(data_type)->get_type_id(); switch (logical_type) { #define DISPATCH(FlatType, CppType, OrcColumnType) \ @@ -1346,9 +1353,11 @@ Status OrcReader::_orc_column_to_doris_column(const std::string& col_name, cvb, num_values); case TypeIndex::Array: { if (orc_column_type->getKind() != orc::TypeKind::LIST) { - return Status::InternalError("Wrong data type for colum '{}'", col_name); + return Status::InternalError( + "Wrong data type for column '{}', expected list, actual {}", col_name, + orc_column_type->getKind()); } - auto* orc_list = down_cast(cvb); + auto* orc_list = dynamic_cast(cvb); auto& doris_offsets = static_cast(*data_column).get_offsets(); auto& orc_offsets = orc_list->offsets; size_t element_size = 0; @@ -1358,15 +1367,17 @@ Status OrcReader::_orc_column_to_doris_column(const std::string& col_name, reinterpret_cast(remove_nullable(data_type).get()) ->get_nested_type()); const orc::Type* nested_orc_type = orc_column_type->getSubtype(0); + std::string element_name = col_name + ".element"; return _orc_column_to_doris_column( - col_name, static_cast(*data_column).get_data_ptr(), nested_type, + element_name, static_cast(*data_column).get_data_ptr(), nested_type, nested_orc_type, orc_list->elements.get(), element_size); } case TypeIndex::Map: { if (orc_column_type->getKind() != orc::TypeKind::MAP) { - return Status::InternalError("Wrong data type for colum '{}'", col_name); + return Status::InternalError("Wrong data type for column '{}', expected map, actual {}", + col_name, orc_column_type->getKind()); } - auto* orc_map = down_cast(cvb); + auto* orc_map = dynamic_cast(cvb); auto& doris_map = static_cast(*data_column); size_t element_size = 0; RETURN_IF_ERROR(_fill_doris_array_offsets(col_name, doris_map.get_offsets(), @@ -1379,33 +1390,64 @@ Status OrcReader::_orc_column_to_doris_column(const std::string& col_name, ->get_value_type()); const orc::Type* orc_key_type = orc_column_type->getSubtype(0); const orc::Type* orc_value_type = orc_column_type->getSubtype(1); - const ColumnPtr& doris_key_column = doris_map.get_keys_ptr(); - const ColumnPtr& doris_value_column = doris_map.get_values_ptr(); - RETURN_IF_ERROR(_orc_column_to_doris_column(col_name, doris_key_column, + ColumnPtr& doris_key_column = doris_map.get_keys_ptr(); + ColumnPtr& doris_value_column = doris_map.get_values_ptr(); + std::string key_col_name = col_name + ".key"; + std::string value_col_name = col_name + ".value"; + RETURN_IF_ERROR(_orc_column_to_doris_column(key_col_name, doris_key_column, doris_key_type, orc_key_type, orc_map->keys.get(), element_size)); - return _orc_column_to_doris_column(col_name, doris_value_column, + return _orc_column_to_doris_column(value_col_name, doris_value_column, doris_value_type, orc_value_type, orc_map->elements.get(), element_size); } case TypeIndex::Struct: { if (orc_column_type->getKind() != orc::TypeKind::STRUCT) { - return Status::InternalError("Wrong data type for colum '{}'", col_name); + return Status::InternalError( + "Wrong data type for column '{}', expected struct, actual {}", col_name, + orc_column_type->getKind()); } - auto* orc_struct = down_cast(cvb); + auto* orc_struct = dynamic_cast(cvb); auto& doris_struct = static_cast(*data_column); - if (orc_struct->fields.size() != doris_struct.tuple_size()) { - return Status::InternalError("Wrong number of struct fields for column '{}'", col_name); - } + std::map read_fields; + std::set missing_fields; const DataTypeStruct* doris_struct_type = reinterpret_cast(remove_nullable(data_type).get()); for (int i = 0; i < doris_struct.tuple_size(); ++i) { - orc::ColumnVectorBatch* orc_field = orc_struct->fields[i]; - const orc::Type* orc_type = orc_column_type->getSubtype(i); - const ColumnPtr& doris_field = doris_struct.get_column_ptr(i); - const DataTypePtr& doris_type = doris_struct_type->get_element(i); + bool is_missing_col = true; + for (int j = 0; j < orc_column_type->getSubtypeCount(); ++j) { + if (boost::iequals(doris_struct_type->get_name_by_position(i), + orc_column_type->getFieldName(j))) { + read_fields[i] = j; + is_missing_col = false; + break; + } + } + if (is_missing_col) { + missing_fields.insert(i); + } + } + + for (int missing_field : missing_fields) { + ColumnPtr& doris_field = doris_struct.get_column_ptr(missing_field); + if (!doris_field->is_nullable()) { + return Status::InternalError( + "Child field of '{}' is not nullable, but is missing in orc file", + col_name); + } + reinterpret_cast(doris_field->assume_mutable().get()) + ->insert_null_elements(num_values); + } + + for (auto read_field : read_fields) { + orc::ColumnVectorBatch* orc_field = orc_struct->fields[read_field.second]; + const orc::Type* orc_type = orc_column_type->getSubtype(read_field.second); + std::string field_name = + col_name + "." + orc_column_type->getFieldName(read_field.second); + ColumnPtr& doris_field = doris_struct.get_column_ptr(read_field.first); + const DataTypePtr& doris_type = doris_struct_type->get_element(read_field.first); RETURN_IF_ERROR(_orc_column_to_doris_column( - col_name, doris_field, doris_type, orc_type, orc_field, num_values)); + field_name, doris_field, doris_type, orc_type, orc_field, num_values)); } return Status::OK(); } diff --git a/be/src/vec/exec/format/orc/vorc_reader.h b/be/src/vec/exec/format/orc/vorc_reader.h index 4cb5361b8d8a7b..dba6e0f7fef040 100644 --- a/be/src/vec/exec/format/orc/vorc_reader.h +++ b/be/src/vec/exec/format/orc/vorc_reader.h @@ -267,11 +267,16 @@ class OrcReader : public GenericReader { void _init_system_properties(); void _init_file_description(); template - Status _orc_column_to_doris_column(const std::string& col_name, const ColumnPtr& doris_column, + Status _orc_column_to_doris_column(const std::string& col_name, ColumnPtr& doris_column, const DataTypePtr& data_type, const orc::Type* orc_column_type, orc::ColumnVectorBatch* cvb, size_t num_values); + template + Status _fill_doris_data_column(const std::string& col_name, MutableColumnPtr& data_column, + const DataTypePtr& data_type, const orc::Type* orc_column_type, + orc::ColumnVectorBatch* cvb, size_t num_values); + template Status _decode_flat_column(const std::string& col_name, const MutableColumnPtr& data_column, orc::ColumnVectorBatch* cvb, size_t num_values) { diff --git a/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_table.hql b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_table.hql index ad6d326823bdfe..26bfdc8bdfe287 100644 --- a/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_table.hql +++ b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_table.hql @@ -1871,6 +1871,19 @@ LOCATION msck repair table test_string_dict_filter_orc; +create table test_hive_struct_add_column_orc ( + `id` int, + `name` string, + `details` struct, + `sex` int, + `complex` array>> +) +STORED AS ORC +LOCATION '/user/doris/preinstalled_data/orc_table/test_hive_struct_add_column_orc'; + +msck repair table test_hive_struct_add_column_orc; + + show tables; diff --git a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/test_hive_struct_add_column_orc/000000_0 b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/test_hive_struct_add_column_orc/000000_0 new file mode 100644 index 00000000000000..df41136523a380 Binary files /dev/null and b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/test_hive_struct_add_column_orc/000000_0 differ diff --git a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/test_hive_struct_add_column_orc/000000_0_copy_1 b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/test_hive_struct_add_column_orc/000000_0_copy_1 new file mode 100644 index 00000000000000..d6a1045e1d6ce3 Binary files /dev/null and b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/test_hive_struct_add_column_orc/000000_0_copy_1 differ diff --git a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/test_hive_struct_add_column_orc/000000_0_copy_2 b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/test_hive_struct_add_column_orc/000000_0_copy_2 new file mode 100644 index 00000000000000..46cb7e8647c07a Binary files /dev/null and b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/test_hive_struct_add_column_orc/000000_0_copy_2 differ diff --git a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/test_hive_struct_add_column_orc/000000_0_copy_3 b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/test_hive_struct_add_column_orc/000000_0_copy_3 new file mode 100644 index 00000000000000..a92fdd7f2d9a26 Binary files /dev/null and b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/test_hive_struct_add_column_orc/000000_0_copy_3 differ diff --git a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/test_hive_struct_add_column_orc/000000_0_copy_4 b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/test_hive_struct_add_column_orc/000000_0_copy_4 new file mode 100644 index 00000000000000..b8f4d0e0658fd5 Binary files /dev/null and b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/test_hive_struct_add_column_orc/000000_0_copy_4 differ diff --git a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/test_hive_struct_add_column_orc/000000_0_copy_5 b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/test_hive_struct_add_column_orc/000000_0_copy_5 new file mode 100644 index 00000000000000..1536bc5a9592ca Binary files /dev/null and b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/test_hive_struct_add_column_orc/000000_0_copy_5 differ diff --git a/regression-test/data/external_table_p0/hive/test_hive_struct_add_column.out b/regression-test/data/external_table_p0/hive/test_hive_struct_add_column.out new file mode 100644 index 00000000000000..a87aab2d81deda --- /dev/null +++ b/regression-test/data/external_table_p0/hive/test_hive_struct_add_column.out @@ -0,0 +1,206 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !desc -- +id INT Yes true \N +name TEXT Yes true \N +details STRUCT Yes true \N +sex INT Yes true \N +complex ARRAY>> Yes true \N + +-- !test_1 -- +1 Alice {"age": 25, "city": "New York", "email": null, "phone": null} \N \N +2 Blice {"age": 26, "city": "New York New York", "email": null, "phone": null} \N \N +3 Clice {"age": 27, "city": "New York New York New York", "email": null, "phone": null} \N \N +4 Dlice {"age": 28, "city": "New York New York New York New York", "email": null, "phone": null} \N \N +5 Elice {"age": 29, "city": "New York New York New York New York New York", "email": null, "phone": null} \N \N +11 AAlice {"age": 125, "city": "acity", "email": "alice@example.com", "phone": null} \N \N +12 BBlice {"age": 126, "city": "bcity", "email": "bob@example.com", "phone": null} \N \N +13 CClice {"age": 127, "city": "ccity", "email": "alicebob@example.com", "phone": null} \N \N +14 DDlice {"age": 128, "city": "dcity", "email": "xxxxxbob@example.com", "phone": null} \N \N +15 EElice {"age": 129, "city": "ecity", "email": null, "phone": null} \N \N +21 Charlie {"age": 218, "city": "San Francisco", "email": "asdacharlie@example.com", "phone": 123} \N \N +22 Charlie {"age": 228, "city": "San-Francisco", "email": "ssscharlie@example.com", "phone": 1234} \N \N +23 Charlie {"age": 238, "city": "SanxFrancisco", "email": "333charlie@example.com", "phone": 12345} \N \N +24 Charlie {"age": 248, "city": "San888Francisco", "email": "777charlie@example.com", "phone": 123456} \N \N +25 Charlie {"age": 258, "city": "San0000Francisco", "email": "9999chasasrlie@example.com", "phone": null} \N \N +31 Alice {"age": 25, "city": "New York", "email": "alice@example.com", "phone": 123456} 0 \N +32 Bob {"age": 30, "city": "Los Angeles", "email": "bob@example.com", "phone": 789012} 0 \N +33 Charlie {"age": 28, "city": "San Francisco", "email": "charlie@example.com", "phone": 456789} 1 \N +34 David {"age": 32, "city": "Chicago", "email": "david@example.com", "phone": 987654} 0 \N +35 Eve {"age": 27, "city": "Seattle", "email": "eve@example.com", "phone": null} \N \N +41 Alice {"age": 25, "city": "New York", "email": "alice@example.com", "phone": 123456} 1 [{"a": 1, "b": null}, {"a": 1, "b": null}] +42 Bob {"age": 30, "city": "Los Angeles", "email": "bob@example.com", "phone": 789012} 1 [{"a": 2, "b": null}, {"a": 1, "b": null}] +43 Charlie {"age": 28, "city": "San Francisco", "email": "charlie@example.com", "phone": 456789} 2 [{"a": 3, "b": null}, {"a": 1, "b": null}] +44 David {"age": 32, "city": "Chicago", "email": "david@example.com", "phone": 987654} 1 [{"a": 4, "b": null}, {"a": 1, "b": null}] +45 Eve {"age": 27, "city": "Seattle", "email": "eve@example.com", "phone": 654321} 2 [{"a": 5, "b": null}, {"a": 1, "b": null}] +51 Alice {"age": 25, "city": "New York", "email": "alice@example.com", "phone": 123456} 1 [{"a": 1, "b": {"aa": "foo", "bb": 100}}, {"a": 1, "b": {"aa": "foo", "bb": 100}}] +52 Bob {"age": 30, "city": "Los Angeles", "email": "bob@example.com", "phone": 789012} 2 [{"a": 2, "b": {"aa": "bar", "bb": 200}}] +53 Charlie {"age": 28, "city": "San Francisco", "email": "charlie@example.com", "phone": 456789} 1 [{"a": 3, "b": {"aa": "baz", "bb": 300}}] +54 David {"age": 32, "city": "Chicago", "email": "david@example.com", "phone": 987654} 2 [{"a": 8, "b": {"aa": "qux", "bb": 400}}] +55 Eve {"age": 27, "city": "Seattle", "email": "eve@example.com", "phone": 654321} 1 [{"a": 5, "b": {"aa": "abcd", "bb": 500}}] + +-- !test_2 -- +1 Alice {"age": 25, "city": "New York", "email": null, "phone": null} \N \N + +-- !test_3 -- +1 Alice {"age": 25, "city": "New York", "email": null, "phone": null} \N \N +2 Blice {"age": 26, "city": "New York New York", "email": null, "phone": null} \N \N +3 Clice {"age": 27, "city": "New York New York New York", "email": null, "phone": null} \N \N +4 Dlice {"age": 28, "city": "New York New York New York New York", "email": null, "phone": null} \N \N +5 Elice {"age": 29, "city": "New York New York New York New York New York", "email": null, "phone": null} \N \N +11 AAlice {"age": 125, "city": "acity", "email": "alice@example.com", "phone": null} \N \N +12 BBlice {"age": 126, "city": "bcity", "email": "bob@example.com", "phone": null} \N \N +13 CClice {"age": 127, "city": "ccity", "email": "alicebob@example.com", "phone": null} \N \N +14 DDlice {"age": 128, "city": "dcity", "email": "xxxxxbob@example.com", "phone": null} \N \N +15 EElice {"age": 129, "city": "ecity", "email": null, "phone": null} \N \N +21 Charlie {"age": 218, "city": "San Francisco", "email": "asdacharlie@example.com", "phone": 123} \N \N +22 Charlie {"age": 228, "city": "San-Francisco", "email": "ssscharlie@example.com", "phone": 1234} \N \N +23 Charlie {"age": 238, "city": "SanxFrancisco", "email": "333charlie@example.com", "phone": 12345} \N \N +24 Charlie {"age": 248, "city": "San888Francisco", "email": "777charlie@example.com", "phone": 123456} \N \N +25 Charlie {"age": 258, "city": "San0000Francisco", "email": "9999chasasrlie@example.com", "phone": null} \N \N +31 Alice {"age": 25, "city": "New York", "email": "alice@example.com", "phone": 123456} 0 \N +32 Bob {"age": 30, "city": "Los Angeles", "email": "bob@example.com", "phone": 789012} 0 \N +33 Charlie {"age": 28, "city": "San Francisco", "email": "charlie@example.com", "phone": 456789} 1 \N +34 David {"age": 32, "city": "Chicago", "email": "david@example.com", "phone": 987654} 0 \N +35 Eve {"age": 27, "city": "Seattle", "email": "eve@example.com", "phone": null} \N \N + +-- !test_4 -- +41 Alice {"age": 25, "city": "New York", "email": "alice@example.com", "phone": 123456} 1 [{"a": 1, "b": null}, {"a": 1, "b": null}] +42 Bob {"age": 30, "city": "Los Angeles", "email": "bob@example.com", "phone": 789012} 1 [{"a": 2, "b": null}, {"a": 1, "b": null}] +43 Charlie {"age": 28, "city": "San Francisco", "email": "charlie@example.com", "phone": 456789} 2 [{"a": 3, "b": null}, {"a": 1, "b": null}] +44 David {"age": 32, "city": "Chicago", "email": "david@example.com", "phone": 987654} 1 [{"a": 4, "b": null}, {"a": 1, "b": null}] +45 Eve {"age": 27, "city": "Seattle", "email": "eve@example.com", "phone": 654321} 2 [{"a": 5, "b": null}, {"a": 1, "b": null}] +51 Alice {"age": 25, "city": "New York", "email": "alice@example.com", "phone": 123456} 1 [{"a": 1, "b": {"aa": "foo", "bb": 100}}, {"a": 1, "b": {"aa": "foo", "bb": 100}}] +52 Bob {"age": 30, "city": "Los Angeles", "email": "bob@example.com", "phone": 789012} 2 [{"a": 2, "b": {"aa": "bar", "bb": 200}}] +53 Charlie {"age": 28, "city": "San Francisco", "email": "charlie@example.com", "phone": 456789} 1 [{"a": 3, "b": {"aa": "baz", "bb": 300}}] +54 David {"age": 32, "city": "Chicago", "email": "david@example.com", "phone": 987654} 2 [{"a": 8, "b": {"aa": "qux", "bb": 400}}] +55 Eve {"age": 27, "city": "Seattle", "email": "eve@example.com", "phone": 654321} 1 [{"a": 5, "b": {"aa": "abcd", "bb": 500}}] + +-- !test_5 -- +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N + +-- !test_6 -- +[{"a": 1, "b": null}, {"a": 1, "b": null}] +[{"a": 2, "b": null}, {"a": 1, "b": null}] +[{"a": 3, "b": null}, {"a": 1, "b": null}] +[{"a": 4, "b": null}, {"a": 1, "b": null}] +[{"a": 5, "b": null}, {"a": 1, "b": null}] +[{"a": 1, "b": {"aa": "foo", "bb": 100}}, {"a": 1, "b": {"aa": "foo", "bb": 100}}] +[{"a": 2, "b": {"aa": "bar", "bb": 200}}] +[{"a": 3, "b": {"aa": "baz", "bb": 300}}] +[{"a": 8, "b": {"aa": "qux", "bb": 400}}] +[{"a": 5, "b": {"aa": "abcd", "bb": 500}}] + +-- !test_7 -- +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N + +-- !test_8 -- +[{"a": 1, "b": null}, {"a": 1, "b": null}] +[{"a": 2, "b": null}, {"a": 1, "b": null}] +[{"a": 3, "b": null}, {"a": 1, "b": null}] +[{"a": 4, "b": null}, {"a": 1, "b": null}] +[{"a": 5, "b": null}, {"a": 1, "b": null}] +[{"a": 1, "b": {"aa": "foo", "bb": 100}}, {"a": 1, "b": {"aa": "foo", "bb": 100}}] +[{"a": 2, "b": {"aa": "bar", "bb": 200}}] +[{"a": 3, "b": {"aa": "baz", "bb": 300}}] +[{"a": 8, "b": {"aa": "qux", "bb": 400}}] +[{"a": 5, "b": {"aa": "abcd", "bb": 500}}] + +-- !test_9 -- +0 +0 +0 + +-- !test_10 -- +1 +1 +1 +1 +1 +1 +1 + +-- !test_11 -- +2 +2 +2 +2 + +-- !test_12 -- +43 Charlie {"age": 28, "city": "San Francisco", "email": "charlie@example.com", "phone": 456789} 2 [{"a": 3, "b": null}, {"a": 1, "b": null}] +45 Eve {"age": 27, "city": "Seattle", "email": "eve@example.com", "phone": 654321} 2 [{"a": 5, "b": null}, {"a": 1, "b": null}] +52 Bob {"age": 30, "city": "Los Angeles", "email": "bob@example.com", "phone": 789012} 2 [{"a": 2, "b": {"aa": "bar", "bb": 200}}] +54 David {"age": 32, "city": "Chicago", "email": "david@example.com", "phone": 987654} 2 [{"a": 8, "b": {"aa": "qux", "bb": 400}}] + +-- !test_13 -- + +-- !test_14 -- +53 Charlie {"age": 28, "city": "San Francisco", "email": "charlie@example.com", "phone": 456789} 1 [{"a": 3, "b": {"aa": "baz", "bb": 300}}] +54 David {"age": 32, "city": "Chicago", "email": "david@example.com", "phone": 987654} 2 [{"a": 8, "b": {"aa": "qux", "bb": 400}}] + +-- !test_15 -- +41 Alice {"age": 25, "city": "New York", "email": "alice@example.com", "phone": 123456} 1 [{"a": 1, "b": null}, {"a": 1, "b": null}] {"a": 1, "b": null} +51 Alice {"age": 25, "city": "New York", "email": "alice@example.com", "phone": 123456} 1 [{"a": 1, "b": {"aa": "foo", "bb": 100}}, {"a": 1, "b": {"aa": "foo", "bb": 100}}] {"a": 1, "b": {"aa": "foo", "bb": 100}} + +-- !test_16 -- +[{"a": 2, "b": null}, {"a": 1, "b": null}] + +-- !test_17 -- +{"age": 25, "city": "New York", "email": "alice@example.com", "phone": 123456} +{"age": 25, "city": "New York", "email": "alice@example.com", "phone": 123456} +{"age": 25, "city": "New York", "email": "alice@example.com", "phone": 123456} + +-- !test_18 -- +{"age": 25, "city": "New York", "email": null, "phone": null} + +-- !test_19 -- +{"age": 25, "city": "New York", "email": "alice@example.com", "phone": 123456} 31 +{"age": 25, "city": "New York", "email": "alice@example.com", "phone": 123456} 41 +{"age": 25, "city": "New York", "email": "alice@example.com", "phone": 123456} 51 + +-- !test_20 -- +{"age": 25, "city": "New York", "email": null, "phone": null} 1 + +-- !test_21 -- +0 3 +2 4 +1 7 +\N 16 + diff --git a/regression-test/suites/external_table_p0/hive/test_hive_struct_add_column.groovy b/regression-test/suites/external_table_p0/hive/test_hive_struct_add_column.groovy new file mode 100644 index 00000000000000..91a724fc10fd27 --- /dev/null +++ b/regression-test/suites/external_table_p0/hive/test_hive_struct_add_column.groovy @@ -0,0 +1,157 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_hive_struct_add_column", "all_types,p0,external,hive,external_docker,external_docker_hive") { + + String enabled = context.config.otherConfigs.get("enableHiveTest") + if (enabled == null || !enabled.equalsIgnoreCase("true")) { + logger.info("diable Hive test.") + return; + } + + try { + String hms_port = context.config.otherConfigs.get("hms_port") + String catalog_name = "test_hive_struct_add_column" + String externalEnvIp = context.config.otherConfigs.get("externalEnvIp") + + sql """drop catalog if exists ${catalog_name}""" + sql """create catalog if not exists ${catalog_name} properties ( + "type"="hms", + 'hive.metastore.uris' = 'thrift://${externalEnvIp}:${hms_port}' + );""" + sql """use `${catalog_name}`.`default`""" + + qt_desc """ desc test_hive_struct_add_column_orc;""" + qt_test_1 """ select * from test_hive_struct_add_column_orc order by id;""" + qt_test_2 """ select * from test_hive_struct_add_column_orc where id = 1 order by id;""" + qt_test_3 """ select * from test_hive_struct_add_column_orc where complex is null order by id;""" + qt_test_4 """ select * from test_hive_struct_add_column_orc where complex is not null order by id""" + qt_test_5 """ select complex from test_hive_struct_add_column_orc where complex is null order by id """ + qt_test_6 """ select complex from test_hive_struct_add_column_orc where complex is not null order by id """ + qt_test_7 """select complex from test_hive_struct_add_column_orc where complex is null order by id; """ + qt_test_8 """select complex from test_hive_struct_add_column_orc where complex is not null order by id;""" + qt_test_9 """select sex from test_hive_struct_add_column_orc where sex = 0 order by id;""" + qt_test_10 """select sex from test_hive_struct_add_column_orc where sex = 1 order by id;""" + qt_test_11 """select sex from test_hive_struct_add_column_orc where sex = 2 order by id;""" + qt_test_12 """select * from test_hive_struct_add_column_orc where sex = 2 order by id; """ + qt_test_13 """select * from test_hive_struct_add_column_orc where id =sex order by id;""" + qt_test_14 """select * from test_hive_struct_add_column_orc where id -52=sex order by id;""" + qt_test_15 """select *,complex[1] from test_hive_struct_add_column_orc where struct_element(complex[1],1) = 1 order by id;""" + qt_test_16 """ select complex from test_hive_struct_add_column_orc where struct_element(complex[1],1) = 2 and struct_element(complex[1],2) is null order by id ; """ + qt_test_17 """select details from test_hive_struct_add_column_orc where struct_element(details,1) = 25 and struct_element(details,4) is not null order by id;""" + qt_test_18 """select details from test_hive_struct_add_column_orc where struct_element(details,1) = 25 and struct_element(details,4) is null order by id;""" + qt_test_19 """ select details,id from test_hive_struct_add_column_orc where struct_element(details,1) = 25 and struct_element(details,4) is not null order by id ;""" + qt_test_20 """ select details,id from test_hive_struct_add_column_orc where struct_element(details,1) = 25 and struct_element(details,4) is null order by id;""" + qt_test_21 """ select sex,count(*) from test_hive_struct_add_column_orc group by sex order by count(*);""" + + + sql """drop catalog if exists ${catalog_name}""" + + } finally { + } + +} + + +/* +drop table user_info_orc; +CREATE TABLE user_info_orc ( + id INT, + name STRING, + details STRUCT +) +stored as orc; + +INSERT INTO TABLE user_info_orc +VALUES + (1, 'Alice', named_struct('age', 25, 'city', 'New York')), + (2, 'Blice', named_struct('age', 26, 'city', 'New York New York')), + (3, 'Clice', named_struct('age', 27, 'city', 'New York New York New York')), + (4, 'Dlice', named_struct('age', 28, 'city', 'New York New York New York New York')), + (5, 'Elice', named_struct('age', 29, 'city', 'New York New York New York New York New York')); + +ALTER TABLE user_info_orc CHANGE COLUMN details details STRUCT; + +INSERT INTO TABLE user_info_orc +VALUES + (11, 'AAlice', named_struct('age', 125, 'city', 'acity', 'email', 'alice@example.com')), + (12, 'BBlice', named_struct('age', 126, 'city', 'bcity', 'email', 'bob@example.com')), + (13, 'CClice', named_struct('age', 127, 'city', 'ccity', 'email', 'alicebob@example.com')), + (14, 'DDlice', named_struct('age', 128, 'city', 'dcity', 'email', 'xxxxxbob@example.com')), + (15, 'EElice', named_struct('age', 129, 'city', 'ecity', 'email', NULL)); + + + +ALTER TABLE user_info_orc CHANGE COLUMN details details STRUCT; + + +INSERT INTO user_info_orc +VALUES + (21, 'Charlie', named_struct('age', 218, 'city', 'San Francisco', 'email', 'asdacharlie@example.com','phone',123)), + (22, 'Charlie', named_struct('age', 228, 'city', 'San-Francisco', 'email', 'ssscharlie@example.com','phone',1234)), + (23, 'Charlie', named_struct('age', 238, 'city', 'SanxFrancisco', 'email', '333charlie@example.com','phone',12345)), + (24, 'Charlie', named_struct('age', 248, 'city', 'San888Francisco', 'email', '777charlie@example.com','phone',123456)), + (25, 'Charlie', named_struct('age', 258, 'city', 'San0000Francisco', 'email', '9999chasasrlie@example.com','phone',NULL)); + + + +desc user_info_orc; +ALTER TABLE user_info_orc add columns (sex int); + +INSERT INTO TABLE user_info_orc +VALUES + (31, 'Alice', named_struct('age', 25, 'city', 'New York', 'email', 'alice@example.com', 'phone', 123456),0), + (32, 'Bob', named_struct('age', 30, 'city', 'Los Angeles', 'email', 'bob@example.com', 'phone', 789012),0), + (33, 'Charlie', named_struct('age', 28, 'city', 'San Francisco', 'email', 'charlie@example.com', 'phone', 456789),1), + (34, 'David', named_struct('age', 32, 'city', 'Chicago', 'email', 'david@example.com', 'phone', 987654),0), + (35, 'Eve', named_struct('age', 27, 'city', 'Seattle', 'email', 'eve@example.com', 'phone', NULL),NULL); + + +ALTER TABLE user_info_orc add columns (complex array>); + +INSERT INTO TABLE user_info_orc +VALUES + (41,'Alice', named_struct('age', 25, 'city', 'New York', 'email', 'alice@example.com', 'phone', 123456), 1, array(named_struct('a', 1),named_struct('a', 1))), + (42,'Bob', named_struct('age', 30, 'city', 'Los Angeles', 'email', 'bob@example.com', 'phone', 789012), 1, array(named_struct('a', 2),named_struct('a', 1))), + (43,'Charlie', named_struct('age', 28, 'city', 'San Francisco', 'email', 'charlie@example.com', 'phone', 456789), 2, array(named_struct('a', 3),named_struct('a', 1))), + (44,'David', named_struct('age', 32, 'city', 'Chicago', 'email', 'david@example.com', 'phone', 987654), 1, array(named_struct('a', 4),named_struct('a', 1))), + (45,'Eve', named_struct('age', 27, 'city', 'Seattle', 'email', 'eve@example.com', 'phone', 654321), 2, array(named_struct('a', 5),named_struct('a', 1))); + +ALTER TABLE user_info_orc CHANGE COLUMN complex complex array>>; + +INSERT INTO TABLE user_info_orc +VALUES + (51, 'Alice', named_struct('age', 25, 'city', 'New York', 'email', 'alice@example.com', 'phone', 123456), 1, array(named_struct('a', 1, 'b', named_struct('aa', 'foo', 'bb', 100)),named_struct('a', 1, 'b', named_struct('aa', 'foo', 'bb', 100)))), + (52, 'Bob', named_struct('age', 30, 'city', 'Los Angeles', 'email', 'bob@example.com', 'phone', 789012), 2, array(named_struct('a', 2, 'b', named_struct('aa', 'bar', 'bb', 200)))), + (53, 'Charlie', named_struct('age', 28, 'city', 'San Francisco', 'email', 'charlie@example.com', 'phone', 456789), 1, array(named_struct('a', 3, 'b', named_struct('aa', 'baz', 'bb', 300)))), + (54, 'David', named_struct('age', 32, 'city', 'Chicago', 'email', 'david@example.com', 'phone', 987654), 2, array(named_struct('a', 8, 'b', named_struct('aa', 'qux', 'bb', 400)))), + (55, 'Eve', named_struct('age', 27, 'city', 'Seattle', 'email', 'eve@example.com', 'phone', 654321), 1, array(named_struct('a', 5, 'b', named_struct('aa', 'abcd', 'bb', 500)))); + + + +cp user_info_orc/ => test_hive_struct_add_column_orc/ + +create table test_hive_struct_add_column_orc ( + `id` int, + `name` string, + `details` struct, + `sex` int, + `complex` array>> +) +STORED AS ORC; +LOCATION '/user/doris/preinstalled_data/orc_table/test_hive_struct_add_column_orc'; +*/