diff --git a/be/src/vec/CMakeLists.txt b/be/src/vec/CMakeLists.txt index ce61f91af94ef9..a927690ffd9184 100644 --- a/be/src/vec/CMakeLists.txt +++ b/be/src/vec/CMakeLists.txt @@ -193,6 +193,7 @@ set(VEC_FILES functions/array/function_array_pushfront.cpp functions/array/function_array_first_index.cpp functions/function_map.cpp + functions/function_struct.cpp exprs/table_function/vexplode_json_array.cpp functions/math.cpp functions/function_bitmap.cpp diff --git a/be/src/vec/data_types/data_type_nullable.cpp b/be/src/vec/data_types/data_type_nullable.cpp index 2892b051d40dc5..7fe364a2d99c4f 100644 --- a/be/src/vec/data_types/data_type_nullable.cpp +++ b/be/src/vec/data_types/data_type_nullable.cpp @@ -161,6 +161,14 @@ DataTypePtr make_nullable(const DataTypePtr& type) { return std::make_shared(type); } +DataTypes make_nullable(const DataTypes& types) { + DataTypes nullable_types; + for (auto& type : types) { + nullable_types.push_back(make_nullable(type)); + } + return nullable_types; +} + DataTypePtr remove_nullable(const DataTypePtr& type) { if (type->is_nullable()) return static_cast(*type).get_nested_type(); return type; diff --git a/be/src/vec/data_types/data_type_nullable.h b/be/src/vec/data_types/data_type_nullable.h index 8ca6174a763bf6..c1c920e06808e3 100644 --- a/be/src/vec/data_types/data_type_nullable.h +++ b/be/src/vec/data_types/data_type_nullable.h @@ -103,6 +103,7 @@ class DataTypeNullable final : public IDataType { }; DataTypePtr make_nullable(const DataTypePtr& type); +DataTypes make_nullable(const DataTypes& types); DataTypePtr remove_nullable(const DataTypePtr& type); DataTypes remove_nullable(const DataTypes& types); bool have_nullable(const DataTypes& types); diff --git a/be/src/vec/data_types/data_type_struct.cpp b/be/src/vec/data_types/data_type_struct.cpp index 5e1ec60ae2a089..f0dfd34c8f1b44 100644 --- a/be/src/vec/data_types/data_type_struct.cpp +++ b/be/src/vec/data_types/data_type_struct.cpp @@ -69,6 +69,7 @@ std::string DataTypeStruct::do_get_name() const { if (i != 0) { s << ", "; } + s << names[i] << ":"; s << elems[i]->get_name(); } s << ")"; diff --git a/be/src/vec/functions/function_map.cpp b/be/src/vec/functions/function_map.cpp index 9cb5c5898d2af2..129cb9f686d919 100644 --- a/be/src/vec/functions/function_map.cpp +++ b/be/src/vec/functions/function_map.cpp @@ -101,7 +101,7 @@ class FunctionMap : public IFunction { } } - // insert value into array + // insert value into map ColumnArray::Offset64 offset = 0; for (size_t row = 0; row < input_rows_count; ++row) { for (size_t i = 0; i < num_element; i += 2) { diff --git a/be/src/vec/functions/function_struct.cpp b/be/src/vec/functions/function_struct.cpp new file mode 100644 index 00000000000000..d8cac8406b6d50 --- /dev/null +++ b/be/src/vec/functions/function_struct.cpp @@ -0,0 +1,135 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "vec/columns/column_const.h" +#include "vec/columns/column_string.h" +#include "vec/columns/column_struct.h" +#include "vec/data_types/data_type.h" +#include "vec/data_types/data_type_nullable.h" +#include "vec/data_types/data_type_struct.h" +#include "vec/functions/function.h" +#include "vec/functions/function_helpers.h" +#include "vec/functions/simple_function_factory.h" + +namespace doris::vectorized { + +// construct a struct +template +class FunctionStruct : public IFunction { +public: + static constexpr auto name = Impl::name; + static FunctionPtr create() { return std::make_shared(); } + + /// Get function name. + String get_name() const override { return name; } + + bool is_variadic() const override { return true; } + + bool use_default_implementation_for_nulls() const override { return false; } + + size_t get_number_of_arguments() const override { return 0; } + + void check_number_of_arguments(size_t number_of_arguments) const override { + DCHECK(number_of_arguments > 0) + << "function: " << get_name() << ", arguments should not be empty."; + return Impl::check_number_of_arguments(number_of_arguments); + } + + DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { + return Impl::get_return_type_impl(arguments); + } + + Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, + size_t result, size_t input_rows_count) override { + auto result_col = block.get_by_position(result).type->create_column(); + auto struct_column = typeid_cast(result_col.get()); + if (!struct_column) { + return Status::RuntimeError("unsupported types for function {} return {}", get_name(), + block.get_by_position(result).type->get_name()); + } + ColumnNumbers args_num; + for (size_t i = 0; i < arguments.size(); i++) { + if (Impl::pred(i)) { + args_num.push_back(arguments[i]); + } + } + size_t num_element = args_num.size(); + if (num_element != struct_column->tuple_size()) { + return Status::RuntimeError( + "function {} args number {} is not equal to result struct field number {}.", + get_name(), num_element, struct_column->tuple_size()); + } + for (size_t i = 0; i < num_element; ++i) { + auto& nested_col = struct_column->get_column(i); + nested_col.reserve(input_rows_count); + bool is_nullable = nested_col.is_nullable(); + auto& col = block.get_by_position(args_num[i]).column; + col = col->convert_to_full_column_if_const(); + if (is_nullable && !col->is_nullable()) { + col = ColumnNullable::create(col, ColumnUInt8::create(col->size(), 0)); + } + } + + // insert value into struct column by column + for (size_t i = 0; i < num_element; ++i) { + struct_column->get_column(i).insert_range_from( + *block.get_by_position(args_num[i]).column, 0, input_rows_count); + } + block.replace_by_position(result, std::move(result_col)); + return Status::OK(); + } +}; + +// struct(value1, value2, value3) -> {value1, value2, value3} +struct StructImpl { + static constexpr auto name = "struct"; + static constexpr auto pred = [](size_t i) { return true; }; + + static void check_number_of_arguments(size_t number_of_arguments) {} + + static DataTypePtr get_return_type_impl(const DataTypes& arguments) { + return std::make_shared(make_nullable(arguments)); + } +}; + +// named_struct(name1, value1, name2, value2) -> {name1:value1, name2:value2} +struct NamedStructImpl { + static constexpr auto name = "named_struct"; + static constexpr auto pred = [](size_t i) { return (i & 1) == 1; }; + + static void check_number_of_arguments(size_t number_of_arguments) { + DCHECK(number_of_arguments % 2 == 0) + << "function: " << name << ", arguments size should be even number."; + } + + static DataTypePtr get_return_type_impl(const DataTypes& arguments) { + DataTypes data_types(arguments.size() / 2); + size_t even_idx = 1; + for (size_t i = 0; i < data_types.size(); i++) { + data_types[i] = arguments[even_idx]; + even_idx += 2; + } + return std::make_shared(make_nullable(data_types)); + } +}; + +void register_function_struct(SimpleFunctionFactory& factory) { + factory.register_function>(); + factory.register_function>(); +} + +} // namespace doris::vectorized diff --git a/be/src/vec/functions/simple_function_factory.h b/be/src/vec/functions/simple_function_factory.h index 8a9213eae02bea..dcd20fdc876393 100644 --- a/be/src/vec/functions/simple_function_factory.h +++ b/be/src/vec/functions/simple_function_factory.h @@ -82,6 +82,7 @@ void register_function_least_greast(SimpleFunctionFactory& factory); void register_function_fake(SimpleFunctionFactory& factory); void register_function_array(SimpleFunctionFactory& factory); void register_function_map(SimpleFunctionFactory& factory); +void register_function_struct(SimpleFunctionFactory& factory); void register_function_geo(SimpleFunctionFactory& factory); void register_function_multi_string_position(SimpleFunctionFactory& factory); void register_function_multi_string_search(SimpleFunctionFactory& factory); @@ -232,6 +233,7 @@ class SimpleFunctionFactory { register_function_hex_variadic(instance); register_function_array(instance); register_function_map(instance); + register_function_struct(instance); register_function_geo(instance); register_function_url(instance); register_function_multi_string_position(instance); diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java index 0dbbe6f62a0c17..dc68ce5cf504dc 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java @@ -1448,6 +1448,21 @@ && collectChildReturnTypes()[0].isDecimalV3()) { fn.getReturnType().getPrimitiveType().setTimeType(); } + if (fnName.getFunction().equalsIgnoreCase("named_struct")) { + if ((children.size() & 1) == 1) { + throw new AnalysisException("named_struct can't be odd parameters, need even parameters: " + + this.toSql()); + } + for (int i = 0; i < children.size(); i++) { + if ((i & 1) == 0) { + if (!(getChild(i) instanceof StringLiteral)) { + throw new AnalysisException( + "named_struct only allows constant string parameter in odd position: " + this.toSql()); + } + } + } + } + if (isAggregateFunction()) { final String functionName = fnName.getFunction(); // subexprs must not contain aggregates @@ -1620,6 +1635,15 @@ private void analyzeNestedFunction() { .getType()).getItemType().isDatetimeV2())) { this.type = children.get(1).getType(); } + } else if (fnName.getFunction().equalsIgnoreCase("named_struct")) { + List fieldNames = Lists.newArrayList(); + for (int i = 0; i < children.size(); i++) { + if ((i & 1) == 0) { + StringLiteral nameLiteral = (StringLiteral) children.get(i); + fieldNames.add(nameLiteral.getStringValue()); + } + } + this.type = ((StructType) type).replaceFieldsWithNames(fieldNames); } else if (fnName.getFunction().equalsIgnoreCase("array_distinct") || fnName.getFunction() .equalsIgnoreCase("array_remove") || fnName.getFunction().equalsIgnoreCase("array_sort") || fnName.getFunction().equalsIgnoreCase("array_reverse_sort") diff --git a/regression-test/data/query_p0/sql_functions/struct_functions/test_struct_functions.out b/regression-test/data/query_p0/sql_functions/struct_functions/test_struct_functions.out new file mode 100644 index 00000000000000..7a0afc46c185a8 Binary files /dev/null and b/regression-test/data/query_p0/sql_functions/struct_functions/test_struct_functions.out differ diff --git a/regression-test/data/query_p0/sql_functions/struct_functions/test_struct_functions_by_literal.out b/regression-test/data/query_p0/sql_functions/struct_functions/test_struct_functions_by_literal.out new file mode 100644 index 00000000000000..84c444912dfe85 --- /dev/null +++ b/regression-test/data/query_p0/sql_functions/struct_functions/test_struct_functions_by_literal.out @@ -0,0 +1,28 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql -- +{1, 2, 3} + +-- !sql -- +{1, 1000, 10000000000} + +-- !sql -- +{'a', 1, 'doris', 'aaaaa', 1.32} + +-- !sql -- +{1, 'a', NULL} + +-- !sql -- +{NULL, NULL, NULL} + +-- !sql -- +{1, 2, 3} + +-- !sql -- +{1, 1000, 10000000000} + +-- !sql -- +{1, 'doris', 1.32} + +-- !sql -- +{NULL, NULL, NULL} + diff --git a/regression-test/suites/query_p0/sql_functions/struct_functions/test_struct_functions.groovy b/regression-test/suites/query_p0/sql_functions/struct_functions/test_struct_functions.groovy new file mode 100644 index 00000000000000..b627dd195c3b3b --- /dev/null +++ b/regression-test/suites/query_p0/sql_functions/struct_functions/test_struct_functions.groovy @@ -0,0 +1,45 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_struct_functions") { + def tableName = "tbl_test_struct_functions" + sql """ADMIN SET FRONTEND CONFIG('enable_struct_type'='true')""" + sql """DROP TABLE IF EXISTS ${tableName}""" + sql """ + CREATE TABLE IF NOT EXISTS ${tableName} ( + `k1` INT(11) NULL, + `k2` STRUCT NULL, + `k3` STRUCT NULL, + `k4` STRUCT NULL, + `k5` STRUCT NOT NULL + ) + DUPLICATE KEY(`k1`) + DISTRIBUTED BY HASH(`k1`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "storage_format" = "V2" + ) + """ + sql """ INSERT INTO ${tableName} VALUES(1,{1,2,3,4,5},{1.0,3.33,1.001},{"2023-04-01","2023-04-01 12:00:00","2023-04-01","2023-04-01 12:00:00.999"},{'a','abc','abc'}) """ + sql """ INSERT INTO ${tableName} VALUES(2,struct(1,1000,10000000,100000000000,100000000000),struct(1.0,2.143,1.001),struct("2023-04-01","2023-04-01 12:00:00","2023-04-01","2023-04-01 12:00:00.999"),struct("hi","doris","hello doris")) """ + sql """ INSERT INTO ${tableName} VALUES(3,named_struct("f1",5,"f2",4,"f3",3,"f4",2,"f5",1),named_struct("f1",2.3,"f2",23.3,"f3",2.333),named_struct('f1','2023-04-01','f2','2023-04-01 12:00:00','f3','2023-04-01','f4','2023-04-01 12:00:00.999'),named_struct('f1','a','f2','abc','f3','abc')) """ + sql """ INSERT INTO ${tableName} VALUES(4,struct(1,NULL,3,NULL,5),{2.0,NULL,2.000},{'2023-04-01',NULL,'2023-04-01',NULL},struct('a',NULL,'abc')) """ + sql """ INSERT INTO ${tableName} VALUES(5,NULL,NULL,NULL,{NULL, NULL, NULL}) """ + sql """ INSERT INTO ${tableName} VALUES(6,NULL,NULL,NULL,{"NULL",'null',NULL}) """ + + qt_select_all "SELECT * FROM ${tableName} ORDER BY k1" +} diff --git a/regression-test/suites/query_p0/sql_functions/struct_functions/test_struct_functions_by_literal.groovy b/regression-test/suites/query_p0/sql_functions/struct_functions/test_struct_functions_by_literal.groovy new file mode 100644 index 00000000000000..bb7512882e6960 --- /dev/null +++ b/regression-test/suites/query_p0/sql_functions/struct_functions/test_struct_functions_by_literal.groovy @@ -0,0 +1,30 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_struct_functions_by_literal") { + // struct constructor + qt_sql "select struct(1, 2, 3)" + qt_sql "select struct(1, 1000, 10000000000)" + qt_sql "select struct('a', 1, 'doris', 'aaaaa', 1.32)" + qt_sql "select struct(1, 'a', null)" + qt_sql "select struct(null, null, null)" + + qt_sql "select named_struct('f1', 1, 'f2', 2, 'f3', 3)" + qt_sql "select named_struct('f1', 1, 'f2', 1000, 'f3', 10000000000)" + qt_sql "select named_struct('f1', 1, 'f2', 'doris', 'f3', 1.32)" + qt_sql "select named_struct('f1', null, 'f2', null, 'f3', null)" +}