diff --git a/be/src/vec/data_types/data_type.h b/be/src/vec/data_types/data_type.h index 990901a4aa724d..76dfd3bd5c5560 100644 --- a/be/src/vec/data_types/data_type.h +++ b/be/src/vec/data_types/data_type.h @@ -315,6 +315,7 @@ struct WhichDataType { bool is_uuid() const { return idx == TypeIndex::UUID; } bool is_array() const { return idx == TypeIndex::Array; } bool is_tuple() const { return idx == TypeIndex::Tuple; } + bool is_struct() const { return idx == TypeIndex::Struct; } bool is_map() const { return idx == TypeIndex::Map; } bool is_set() const { return idx == TypeIndex::Set; } bool is_interval() const { return idx == TypeIndex::Interval; } @@ -431,5 +432,10 @@ inline bool is_compilable_type(const DataTypePtr& data_type) { return data_type->is_value_represented_by_number() && !is_decimal(data_type); } +inline bool is_complex_type(const DataTypePtr& data_type) { + WhichDataType which(data_type); + return which.is_array() || which.is_map() || which.is_struct(); +} + } // namespace vectorized } // namespace doris diff --git a/be/src/vec/data_types/data_type_struct.cpp b/be/src/vec/data_types/data_type_struct.cpp index feb8a3478db152..81bf774c12db9d 100644 --- a/be/src/vec/data_types/data_type_struct.cpp +++ b/be/src/vec/data_types/data_type_struct.cpp @@ -159,6 +159,9 @@ Status DataTypeStruct::from_string(ReadBuffer& rb, IColumn* column) const { // here need handle the empty struct '{}' if (rb.count() == 2) { + for (size_t i = 0; i < struct_column->tuple_size(); ++i) { + struct_column->get_column(i).insert_default(); + } return Status::OK(); } diff --git a/be/src/vec/functions/function_cast.h b/be/src/vec/functions/function_cast.h index f3e5d6caf14093..9d6d7f29e65bff 100644 --- a/be/src/vec/functions/function_cast.h +++ b/be/src/vec/functions/function_cast.h @@ -367,6 +367,10 @@ struct ConvertImplGenericFromString { // Note: here we should handle the null element if (val.size == 0) { col_to->insert_default(); + // empty string('') is an invalid format for complex type, set null_map to 1 + if (is_complex_type(data_type_to)) { + (*vec_null_map_to)[i] = 1; + } continue; } ReadBuffer read_buffer((char*)(val.data), val.size); @@ -1263,6 +1267,7 @@ class FunctionCast final : public IFunctionBase { public: using WrapperType = std::function; + using ElementWrappers = std::vector; using MonotonicityForRange = std::function; @@ -1552,15 +1557,80 @@ class FunctionCast final : public IFunctionBase { } } + ElementWrappers get_element_wrappers(FunctionContext* context, + const DataTypes& from_element_types, + const DataTypes& to_element_types) const { + DCHECK(from_element_types.size() == to_element_types.size()); + ElementWrappers element_wrappers; + element_wrappers.reserve(from_element_types.size()); + for (size_t i = 0; i < from_element_types.size(); ++i) { + const DataTypePtr& from_element_type = from_element_types[i]; + const DataTypePtr& to_element_type = to_element_types[i]; + element_wrappers.push_back( + prepare_unpack_dictionaries(context, from_element_type, to_element_type)); + } + return element_wrappers; + } + // check struct value type and get to_type value // TODO: need handle another type to cast struct - WrapperType create_struct_wrapper(const DataTypePtr& from_type, + WrapperType create_struct_wrapper(FunctionContext* context, const DataTypePtr& from_type, const DataTypeStruct& to_type) const { - switch (from_type->get_type_id()) { - case TypeIndex::String: - default: + // support CAST AS Struct from string + if (from_type->get_type_id() == TypeIndex::String) { return &ConvertImplGenericFromString::execute; } + + // only support CAST AS Struct from struct or string types + auto from = check_and_get_data_type(from_type.get()); + if (!from) { + return create_unsupport_wrapper( + fmt::format("CAST AS Struct can only be performed between struct types or from " + "String. Left type: {}, right type: {}", + from_type->get_name(), to_type.get_name())); + } + + const auto& from_element_types = from->get_elements(); + const auto& to_element_types = to_type.get_elements(); + // only support CAST AS Struct from struct type with same number of elements + if (from_element_types.size() != to_element_types.size()) { + return create_unsupport_wrapper( + fmt::format("CAST AS Struct can only be performed between struct types with " + "the same number of elements. Left type: {}, right type: {}", + from_type->get_name(), to_type.get_name())); + } + + auto element_wrappers = get_element_wrappers(context, from_element_types, to_element_types); + return [element_wrappers, from_element_types, to_element_types]( + FunctionContext* context, Block& block, const ColumnNumbers& arguments, + const size_t result, size_t /*input_rows_count*/) -> Status { + auto& from_column = block.get_by_position(arguments.front()).column; + auto from_col_struct = check_and_get_column(from_column.get()); + if (!from_col_struct) { + return Status::RuntimeError("Illegal column {} for function CAST AS Struct", + from_column->get_name()); + } + + size_t elements_num = to_element_types.size(); + Columns converted_columns(elements_num); + for (size_t i = 0; i < elements_num; ++i) { + ColumnWithTypeAndName from_element_column {from_col_struct->get_column_ptr(i), + from_element_types[i], ""}; + ColumnNumbers element_arguments {block.columns()}; + block.insert(from_element_column); + + size_t element_result = block.columns(); + block.insert({to_element_types[i], ""}); + + RETURN_IF_ERROR(element_wrappers[i](context, block, element_arguments, + element_result, + from_col_struct->get_column(i).size())); + converted_columns[i] = block.get_by_position(element_result).column; + } + + block.get_by_position(result).column = ColumnStruct::create(converted_columns); + return Status::OK(); + }; } WrapperType prepare_unpack_dictionaries(FunctionContext* context, const DataTypePtr& from_type, @@ -1737,7 +1807,8 @@ class FunctionCast final : public IFunctionBase { return create_array_wrapper(context, from_type, static_cast(*to_type)); case TypeIndex::Struct: - return create_struct_wrapper(from_type, static_cast(*to_type)); + return create_struct_wrapper(context, from_type, + static_cast(*to_type)); case TypeIndex::Map: return create_map_wrapper(from_type, static_cast(*to_type)); default: diff --git a/regression-test/data/query_p0/sql_functions/cast_function/test_cast_array_functions_by_literal.out b/regression-test/data/query_p0/sql_functions/cast_function/test_cast_array_functions_by_literal.out index a6f4a7bf99aa0c..6bfa221c574435 100644 --- a/regression-test/data/query_p0/sql_functions/cast_function/test_cast_array_functions_by_literal.out +++ b/regression-test/data/query_p0/sql_functions/cast_function/test_cast_array_functions_by_literal.out @@ -20,3 +20,6 @@ -- !sql7 -- ['a', 'b', 'c'] +-- !sql8 -- +\N + diff --git a/regression-test/data/query_p0/sql_functions/cast_function/test_cast_struct.out b/regression-test/data/query_p0/sql_functions/cast_function/test_cast_struct.out new file mode 100644 index 00000000000000..7a39d8170a4318 --- /dev/null +++ b/regression-test/data/query_p0/sql_functions/cast_function/test_cast_struct.out @@ -0,0 +1,43 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql1 -- +\N + +-- !sql2 -- +\N + +-- !sql3 -- +\N + +-- !sql4 -- +\N + +-- !sql5 -- +\N + +-- !sql6 -- +{NULL} + +-- !sql7 -- +{1, '2'} + +-- !sql8 -- +{NULL, NULL} + +-- !sql9 -- +{'a', 'b'} + +-- !sql10 -- +{1, '2'} + +-- !sql11 -- +{1, '2'} + +-- !sql12 -- +{1, '2'} + +-- !sql13 -- +{1, 2022-10-10} + +-- !sql14 -- +{1, 2022-10-10 00:00:00} + diff --git a/regression-test/suites/query_p0/sql_functions/cast_function/test_cast_array_functions_by_literal.groovy b/regression-test/suites/query_p0/sql_functions/cast_function/test_cast_array_functions_by_literal.groovy index e075395305a887..7f9fb604bc74cf 100644 --- a/regression-test/suites/query_p0/sql_functions/cast_function/test_cast_array_functions_by_literal.groovy +++ b/regression-test/suites/query_p0/sql_functions/cast_function/test_cast_array_functions_by_literal.groovy @@ -38,6 +38,8 @@ suite("test_cast_array_functions_by_literal") { qt_sql5 "select cast('[]' as array)" qt_sql6 """select cast('["a", "b", "c"]' as array)""" qt_sql7 """select cast('["a", "b", "c"]' as array)""" + // empty string is invalid array, return NULL + qt_sql8 """select cast('' as array)""" test { sql "select cast(NULL as array)" diff --git a/regression-test/suites/query_p0/sql_functions/cast_function/test_cast_struct.groovy b/regression-test/suites/query_p0/sql_functions/cast_function/test_cast_struct.groovy new file mode 100644 index 00000000000000..3056dab2a36480 --- /dev/null +++ b/regression-test/suites/query_p0/sql_functions/cast_function/test_cast_struct.groovy @@ -0,0 +1,78 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_cast_struct") { + // cast NULL to struct type + qt_sql1 "select cast(NULL as struct)" + + // string with invalid struct literal format cast to struct type + qt_sql2 "select cast('' as struct)" + qt_sql3 "select cast(cast('' as char) as struct)" + qt_sql4 "select cast(cast('x' as char) as struct)" + qt_sql5 "select cast(cast('x' as string) as struct)" + + // valid string format cast to struct type + qt_sql6 "select cast('{}' as struct)" + qt_sql7 "select cast('{1,2}' as struct)" + qt_sql8 """select cast('{"a", "b"}' as struct)""" + qt_sql9 """select cast('{"a", "b"}' as struct)""" + + // struct literal cast to struct + qt_sql10 """select cast({1,2} as struct)""" + qt_sql11 """select cast({'1','2'} as struct)""" + qt_sql12 """select cast({"1","2"} as struct)""" + qt_sql13 """select cast({1,'2022-10-10'} as struct)""" + + // struct type cast to struct + qt_sql14 "select cast(cast({1,'2022-10-10'} as struct) as struct)" + + // basic types except string can not cast to struct + test { + sql "select cast(cast(1 as int) as struct)" + exception "errCode = 2," + } + test { + sql "select cast(cast(999.999 as double) as struct)" + exception "errCode = 2," + } + + // struct literal can not cast to basic types + test { + sql "select cast({1,2} as string)" + exception "errCode = 2," + } + + // struct literal cast to struct MUST with same field number + test { + sql "select cast({1,2} as struct)" + exception "errCode = 2," + } + test { + sql "select cast({1,2} as struct)" + exception "errCode = 2," + } + + // struct type cast to struct MUST with same field number + test { + sql "select cast(cast({1,'2022-10-10'} as struct) as struct)" + exception "errCode = 2," + } + test { + sql "select cast(cast({1,'2022-10-10'} as struct) as struct)" + exception "errCode = 2," + } +}