From 2afee54e6f67af5391e5c7ce740388c93ef80954 Mon Sep 17 00:00:00 2001 From: xy720 Date: Tue, 28 Mar 2023 18:30:25 +0800 Subject: [PATCH 01/12] save --- be/src/vec/data_types/data_type_nullable.cpp | 8 ++ be/src/vec/functions/function_map.cpp | 2 +- be/src/vec/functions/function_struct.cpp | 91 +++++++++++++++++++ .../vec/functions/simple_function_factory.h | 1 + 4 files changed, 101 insertions(+), 1 deletion(-) create mode 100644 be/src/vec/functions/function_struct.cpp diff --git a/be/src/vec/data_types/data_type_nullable.cpp b/be/src/vec/data_types/data_type_nullable.cpp index 2892b051d40dc5..7fe364a2d99c4f 100644 --- a/be/src/vec/data_types/data_type_nullable.cpp +++ b/be/src/vec/data_types/data_type_nullable.cpp @@ -161,6 +161,14 @@ DataTypePtr make_nullable(const DataTypePtr& type) { return std::make_shared(type); } +DataTypes make_nullable(const DataTypes& types) { + DataTypes nullable_types; + for (auto& type : types) { + nullable_types.push_back(make_nullable(type)); + } + return nullable_types; +} + DataTypePtr remove_nullable(const DataTypePtr& type) { if (type->is_nullable()) return static_cast(*type).get_nested_type(); return type; diff --git a/be/src/vec/functions/function_map.cpp b/be/src/vec/functions/function_map.cpp index 9cb5c5898d2af2..129cb9f686d919 100644 --- a/be/src/vec/functions/function_map.cpp +++ b/be/src/vec/functions/function_map.cpp @@ -101,7 +101,7 @@ class FunctionMap : public IFunction { } } - // insert value into array + // insert value into map ColumnArray::Offset64 offset = 0; for (size_t row = 0; row < input_rows_count; ++row) { for (size_t i = 0; i < num_element; i += 2) { diff --git a/be/src/vec/functions/function_struct.cpp b/be/src/vec/functions/function_struct.cpp new file mode 100644 index 00000000000000..8f1ec490640f7b --- /dev/null +++ b/be/src/vec/functions/function_struct.cpp @@ -0,0 +1,91 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "vec/columns/column_const.h" +#include "vec/columns/column_struct.h" +#include "vec/data_types/data_type.h" +#include "vec/data_types/data_type_nullable.h" +#include "vec/data_types/data_type_struct.h" +#include "vec/functions/function.h" +#include "vec/functions/function_helpers.h" +#include "vec/functions/simple_function_factory.h" + +namespace doris::vectorized { + +// construct a struct +// struct(value1, value2, value3, value4) -> {value1, value2, value3, value4} +class FunctionStruct : public IFunction { +public: + static constexpr auto name = "struct"; + static FunctionPtr create() { return std::make_shared(); } + + /// Get function name. + String get_name() const override { return name; } + + bool is_variadic() const override { return true; } + + bool use_default_implementation_for_nulls() const override { return false; } + + size_t get_number_of_arguments() const override { return 0; } + + DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { + DCHECK(arguments.size() > 0) + << "function: " << get_name() << ", arguments should not be empty."; + return std::make_shared(make_nullable(arguments)); + } + + Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, + size_t result, size_t input_rows_count) override { + auto result_col = block.get_by_position(result).type->create_column(); + auto struct_column = check_and_get_column(result_col.get()); + if (!struct_column) { + return Status::RuntimeError("unsupported types for function {} return {}", get_name(), + block.get_by_position(result).type->get_name()); + } + size_t num_element = struct_column.tuple_size(); + DCHECK(arguments.size() == num_element) + << "function: " << get_name() << ", argument number should equal to return field number."; + // convert to nullable column + for (size_t i = 0; i < num_element; ++i) { + auto& col = block.get_by_position(arguments[i]).column; + col = col->convert_to_full_column_if_const(); + IColumn& nested_col = struct_column->get_column(i); + nested_col.reserve(input_rows_count); + bool is_nullable = nested_col.is_nullable(); + // for now, column in struct is always nullable + if (is_nullable && !col->is_nullable()) { + col = ColumnNullable::create(col, ColumnUInt8::create(col->size(), 0)); + } + } + + // insert value into struct + for (size_t row = 0; row < input_rows_count; ++row) { + for (size_t i = 0; i < num_element; ++i) { + struct_column->get_column(i).insert_from(*block.get_by_position(arguments[i]).column, + row); + } + } + block.replace_by_position(result, std::move(result_col)); + return Status::OK(); + } +}; + +void register_function_struct(SimpleFunctionFactory& factory) { + factory.register_function(); +} + +} // namespace doris::vectorized diff --git a/be/src/vec/functions/simple_function_factory.h b/be/src/vec/functions/simple_function_factory.h index 8a9213eae02bea..1b019774980fdd 100644 --- a/be/src/vec/functions/simple_function_factory.h +++ b/be/src/vec/functions/simple_function_factory.h @@ -82,6 +82,7 @@ void register_function_least_greast(SimpleFunctionFactory& factory); void register_function_fake(SimpleFunctionFactory& factory); void register_function_array(SimpleFunctionFactory& factory); void register_function_map(SimpleFunctionFactory& factory); +void register_function_struct(SimpleFunctionFactory& factory); void register_function_geo(SimpleFunctionFactory& factory); void register_function_multi_string_position(SimpleFunctionFactory& factory); void register_function_multi_string_search(SimpleFunctionFactory& factory); From aa22d9403fc65e5e5fb5e88bec58d0b5485eadf6 Mon Sep 17 00:00:00 2001 From: xy720 Date: Tue, 28 Mar 2023 22:34:46 +0800 Subject: [PATCH 02/12] save --- be/src/vec/CMakeLists.txt | 1 + be/src/vec/data_types/data_type_nullable.h | 1 + be/src/vec/functions/function_struct.cpp | 14 ++++++-------- be/src/vec/functions/simple_function_factory.h | 1 + 4 files changed, 9 insertions(+), 8 deletions(-) diff --git a/be/src/vec/CMakeLists.txt b/be/src/vec/CMakeLists.txt index ce61f91af94ef9..a927690ffd9184 100644 --- a/be/src/vec/CMakeLists.txt +++ b/be/src/vec/CMakeLists.txt @@ -193,6 +193,7 @@ set(VEC_FILES functions/array/function_array_pushfront.cpp functions/array/function_array_first_index.cpp functions/function_map.cpp + functions/function_struct.cpp exprs/table_function/vexplode_json_array.cpp functions/math.cpp functions/function_bitmap.cpp diff --git a/be/src/vec/data_types/data_type_nullable.h b/be/src/vec/data_types/data_type_nullable.h index 8ca6174a763bf6..c1c920e06808e3 100644 --- a/be/src/vec/data_types/data_type_nullable.h +++ b/be/src/vec/data_types/data_type_nullable.h @@ -103,6 +103,7 @@ class DataTypeNullable final : public IDataType { }; DataTypePtr make_nullable(const DataTypePtr& type); +DataTypes make_nullable(const DataTypes& types); DataTypePtr remove_nullable(const DataTypePtr& type); DataTypes remove_nullable(const DataTypes& types); bool have_nullable(const DataTypes& types); diff --git a/be/src/vec/functions/function_struct.cpp b/be/src/vec/functions/function_struct.cpp index 8f1ec490640f7b..3ae1d1678dee55 100644 --- a/be/src/vec/functions/function_struct.cpp +++ b/be/src/vec/functions/function_struct.cpp @@ -51,19 +51,19 @@ class FunctionStruct : public IFunction { Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, size_t result, size_t input_rows_count) override { auto result_col = block.get_by_position(result).type->create_column(); - auto struct_column = check_and_get_column(result_col.get()); + auto struct_column = typeid_cast(result_col.get()); if (!struct_column) { return Status::RuntimeError("unsupported types for function {} return {}", get_name(), block.get_by_position(result).type->get_name()); } - size_t num_element = struct_column.tuple_size(); + size_t num_element = struct_column->tuple_size(); DCHECK(arguments.size() == num_element) << "function: " << get_name() << ", argument number should equal to return field number."; // convert to nullable column for (size_t i = 0; i < num_element; ++i) { auto& col = block.get_by_position(arguments[i]).column; col = col->convert_to_full_column_if_const(); - IColumn& nested_col = struct_column->get_column(i); + auto& nested_col = struct_column->get_column(i); nested_col.reserve(input_rows_count); bool is_nullable = nested_col.is_nullable(); // for now, column in struct is always nullable @@ -73,11 +73,9 @@ class FunctionStruct : public IFunction { } // insert value into struct - for (size_t row = 0; row < input_rows_count; ++row) { - for (size_t i = 0; i < num_element; ++i) { - struct_column->get_column(i).insert_from(*block.get_by_position(arguments[i]).column, - row); - } + for (size_t i = 0; i < num_element; ++i) { + struct_column->get_column(i).insert_range_from(*block.get_by_position(arguments[i]).column, + 0, input_rows_count); } block.replace_by_position(result, std::move(result_col)); return Status::OK(); diff --git a/be/src/vec/functions/simple_function_factory.h b/be/src/vec/functions/simple_function_factory.h index 1b019774980fdd..dcd20fdc876393 100644 --- a/be/src/vec/functions/simple_function_factory.h +++ b/be/src/vec/functions/simple_function_factory.h @@ -233,6 +233,7 @@ class SimpleFunctionFactory { register_function_hex_variadic(instance); register_function_array(instance); register_function_map(instance); + register_function_struct(instance); register_function_geo(instance); register_function_url(instance); register_function_multi_string_position(instance); From 7159171f6105cf9e418db9d1598c243074b84ad3 Mon Sep 17 00:00:00 2001 From: xy720 Date: Tue, 28 Mar 2023 22:41:54 +0800 Subject: [PATCH 03/12] format --- be/src/vec/functions/function_struct.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/be/src/vec/functions/function_struct.cpp b/be/src/vec/functions/function_struct.cpp index 3ae1d1678dee55..08bc92773789f9 100644 --- a/be/src/vec/functions/function_struct.cpp +++ b/be/src/vec/functions/function_struct.cpp @@ -58,7 +58,8 @@ class FunctionStruct : public IFunction { } size_t num_element = struct_column->tuple_size(); DCHECK(arguments.size() == num_element) - << "function: " << get_name() << ", argument number should equal to return field number."; + << "function: " << get_name() + << ", argument number should equal to return field number."; // convert to nullable column for (size_t i = 0; i < num_element; ++i) { auto& col = block.get_by_position(arguments[i]).column; @@ -74,8 +75,8 @@ class FunctionStruct : public IFunction { // insert value into struct for (size_t i = 0; i < num_element; ++i) { - struct_column->get_column(i).insert_range_from(*block.get_by_position(arguments[i]).column, - 0, input_rows_count); + struct_column->get_column(i).insert_range_from( + *block.get_by_position(arguments[i]).column, 0, input_rows_count); } block.replace_by_position(result, std::move(result_col)); return Status::OK(); From 2c5b3111c51c2f5f5d96d7647435d765337f8149 Mon Sep 17 00:00:00 2001 From: xy720 Date: Wed, 29 Mar 2023 23:31:19 +0800 Subject: [PATCH 04/12] save --- be/src/vec/functions/function_struct.cpp | 78 +++++++++++++++++++----- 1 file changed, 63 insertions(+), 15 deletions(-) diff --git a/be/src/vec/functions/function_struct.cpp b/be/src/vec/functions/function_struct.cpp index 08bc92773789f9..5b0ec3df3a2eaf 100644 --- a/be/src/vec/functions/function_struct.cpp +++ b/be/src/vec/functions/function_struct.cpp @@ -15,7 +15,10 @@ // specific language governing permissions and limitations // under the License. +#include + #include "vec/columns/column_const.h" +#include "vec/columns/column_string.h" #include "vec/columns/column_struct.h" #include "vec/data_types/data_type.h" #include "vec/data_types/data_type_nullable.h" @@ -27,10 +30,10 @@ namespace doris::vectorized { // construct a struct -// struct(value1, value2, value3, value4) -> {value1, value2, value3, value4} +template class FunctionStruct : public IFunction { public: - static constexpr auto name = "struct"; + static constexpr auto name = Impl::name; static FunctionPtr create() { return std::make_shared(); } /// Get function name. @@ -42,10 +45,14 @@ class FunctionStruct : public IFunction { size_t get_number_of_arguments() const override { return 0; } - DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { + void check_number_of_arguments(size_t number_of_arguments) const override { DCHECK(arguments.size() > 0) << "function: " << get_name() << ", arguments should not be empty."; - return std::make_shared(make_nullable(arguments)); + return Impl::check_number_of_arguments(number_of_arguments); + } + + DataTypePtr get_return_type_impl(const ColumnsWithTypeAndName& arguments) const override { + return Impl::get_return_type_impl(arguments); } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, @@ -56,35 +63,76 @@ class FunctionStruct : public IFunction { return Status::RuntimeError("unsupported types for function {} return {}", get_name(), block.get_by_position(result).type->get_name()); } - size_t num_element = struct_column->tuple_size(); - DCHECK(arguments.size() == num_element) - << "function: " << get_name() - << ", argument number should equal to return field number."; - // convert to nullable column + ColumnNumbers args_num; + std::copy_if(arguments.begin(), arguments.end(), std::back_inserter(args_num), + Impl::types_index); + size_t num_element = args_num.size(); + if (num_element != struct_column.tuple_size()) { + return Status::RuntimeError("function {} args number {} is not equal to result struct field number {}.", get_name(), num_element, struct_column.tuple_size()); + } for (size_t i = 0; i < num_element; ++i) { - auto& col = block.get_by_position(arguments[i]).column; - col = col->convert_to_full_column_if_const(); auto& nested_col = struct_column->get_column(i); nested_col.reserve(input_rows_count); bool is_nullable = nested_col.is_nullable(); - // for now, column in struct is always nullable + auto& col = block.get_by_position(args_num[i]).column->convert_to_full_column_if_const(); if (is_nullable && !col->is_nullable()) { col = ColumnNullable::create(col, ColumnUInt8::create(col->size(), 0)); } } - // insert value into struct + // insert value into struct column by column for (size_t i = 0; i < num_element; ++i) { struct_column->get_column(i).insert_range_from( - *block.get_by_position(arguments[i]).column, 0, input_rows_count); + *block.get_by_position(args_num[i]).column, 0, input_rows_count); } block.replace_by_position(result, std::move(result_col)); return Status::OK(); } }; +// struct(value1, value2, value3) -> {value1, value2, value3} +struct StructImpl { + static constexpr auto name = "struct"; + static auto types_index = [](size_t i) { return true; } + + static void check_number_of_arguments(size_t number_of_arguments) { + return; + } + + static DataTypePtr get_return_type_impl(const ColumnsWithTypeAndName& arguments) { + return std::make_shared(make_nullable(arguments)); + } +}; + +// named_struct(name1, value1, name2, value2) -> {name1:value1, name2:value2} +struct NamedStructImpl { + static constexpr auto name = "named_struct"; + static auto types_index = [](size_t i) { return i % 2 == 0; } + + static void check_number_of_arguments(size_t number_of_arguments) { + DCHECK(arguments.size() % 2 == 0) + << "function: " << get_name() << ", arguments size should be even number."; + return; + } + + static DataTypePtr get_return_type_impl(const ColumnsWithTypeAndName& arguments) { + Strings names; + DataTypes dataTypes; + for (size_t i = 0; i < arguments.size(); i += 2) { + const ColumnConst* const_string = + check_and_get_column_const(arguments[i].column.get()); + DCHECK(const_string) + << "Only const StringType arguments are allowed at odd position."; + names.push_back(const_string->get_value()); + dataTypes.push_back(arguments[i + 1].type); + } + return std::make_shared(make_nullable(dataTypes), names); + } +}; + void register_function_struct(SimpleFunctionFactory& factory) { - factory.register_function(); + factory.register_function>(); + factory.register_function>(); } } // namespace doris::vectorized From 4b4a5b3ba7754b67ccb42d618347976b9c1ce5f2 Mon Sep 17 00:00:00 2001 From: xy720 Date: Tue, 4 Apr 2023 20:07:18 +0800 Subject: [PATCH 05/12] save --- be/src/vec/data_types/data_type_struct.cpp | 1 + be/src/vec/functions/function_struct.cpp | 39 ++++++++++------------ 2 files changed, 19 insertions(+), 21 deletions(-) diff --git a/be/src/vec/data_types/data_type_struct.cpp b/be/src/vec/data_types/data_type_struct.cpp index 5e1ec60ae2a089..f0dfd34c8f1b44 100644 --- a/be/src/vec/data_types/data_type_struct.cpp +++ b/be/src/vec/data_types/data_type_struct.cpp @@ -69,6 +69,7 @@ std::string DataTypeStruct::do_get_name() const { if (i != 0) { s << ", "; } + s << names[i] << ":"; s << elems[i]->get_name(); } s << ")"; diff --git a/be/src/vec/functions/function_struct.cpp b/be/src/vec/functions/function_struct.cpp index 5b0ec3df3a2eaf..a99b89b2c8cb88 100644 --- a/be/src/vec/functions/function_struct.cpp +++ b/be/src/vec/functions/function_struct.cpp @@ -46,12 +46,12 @@ class FunctionStruct : public IFunction { size_t get_number_of_arguments() const override { return 0; } void check_number_of_arguments(size_t number_of_arguments) const override { - DCHECK(arguments.size() > 0) + DCHECK(number_of_arguments > 0) << "function: " << get_name() << ", arguments should not be empty."; return Impl::check_number_of_arguments(number_of_arguments); } - DataTypePtr get_return_type_impl(const ColumnsWithTypeAndName& arguments) const override { + DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { return Impl::get_return_type_impl(arguments); } @@ -67,14 +67,15 @@ class FunctionStruct : public IFunction { std::copy_if(arguments.begin(), arguments.end(), std::back_inserter(args_num), Impl::types_index); size_t num_element = args_num.size(); - if (num_element != struct_column.tuple_size()) { - return Status::RuntimeError("function {} args number {} is not equal to result struct field number {}.", get_name(), num_element, struct_column.tuple_size()); + if (num_element != struct_column->tuple_size()) { + return Status::RuntimeError("function {} args number {} is not equal to result struct field number {}.", get_name(), num_element, struct_column->tuple_size()); } for (size_t i = 0; i < num_element; ++i) { auto& nested_col = struct_column->get_column(i); nested_col.reserve(input_rows_count); bool is_nullable = nested_col.is_nullable(); - auto& col = block.get_by_position(args_num[i]).column->convert_to_full_column_if_const(); + auto& col = block.get_by_position(args_num[i]).column; + col = col->convert_to_full_column_if_const(); if (is_nullable && !col->is_nullable()) { col = ColumnNullable::create(col, ColumnUInt8::create(col->size(), 0)); } @@ -93,13 +94,13 @@ class FunctionStruct : public IFunction { // struct(value1, value2, value3) -> {value1, value2, value3} struct StructImpl { static constexpr auto name = "struct"; - static auto types_index = [](size_t i) { return true; } + static constexpr auto types_index = [](size_t i) { return true; }; static void check_number_of_arguments(size_t number_of_arguments) { return; } - static DataTypePtr get_return_type_impl(const ColumnsWithTypeAndName& arguments) { + static DataTypePtr get_return_type_impl(const DataTypes& arguments) { return std::make_shared(make_nullable(arguments)); } }; @@ -107,26 +108,22 @@ struct StructImpl { // named_struct(name1, value1, name2, value2) -> {name1:value1, name2:value2} struct NamedStructImpl { static constexpr auto name = "named_struct"; - static auto types_index = [](size_t i) { return i % 2 == 0; } + static constexpr auto types_index = [](size_t i) { return i % 2 == 0; }; static void check_number_of_arguments(size_t number_of_arguments) { - DCHECK(arguments.size() % 2 == 0) - << "function: " << get_name() << ", arguments size should be even number."; + DCHECK(number_of_arguments % 2 == 0) + << "function: " << name << ", arguments size should be even number."; return; } - static DataTypePtr get_return_type_impl(const ColumnsWithTypeAndName& arguments) { - Strings names; - DataTypes dataTypes; - for (size_t i = 0; i < arguments.size(); i += 2) { - const ColumnConst* const_string = - check_and_get_column_const(arguments[i].column.get()); - DCHECK(const_string) - << "Only const StringType arguments are allowed at odd position."; - names.push_back(const_string->get_value()); - dataTypes.push_back(arguments[i + 1].type); + static DataTypePtr get_return_type_impl(const DataTypes& arguments) { + DataTypes data_types(arguments.size() / 2); + size_t even_idx = 1; + for (size_t i = 0; i < data_types.size(); i++) { + data_types[i] = arguments[even_idx]; + even_idx += 2; } - return std::make_shared(make_nullable(dataTypes), names); + return std::make_shared(make_nullable(data_types)); } }; From 4139d2d26fe906b2fb0ead3b2750fc44dda129cc Mon Sep 17 00:00:00 2001 From: xy720 Date: Wed, 5 Apr 2023 01:50:37 +0800 Subject: [PATCH 06/12] save --- be/src/vec/functions/function_struct.cpp | 2 +- fe/fe-common/src/main/java/org/apache/doris/catalog/Type.java | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/be/src/vec/functions/function_struct.cpp b/be/src/vec/functions/function_struct.cpp index a99b89b2c8cb88..a5ab1c4b0b6b05 100644 --- a/be/src/vec/functions/function_struct.cpp +++ b/be/src/vec/functions/function_struct.cpp @@ -108,7 +108,7 @@ struct StructImpl { // named_struct(name1, value1, name2, value2) -> {name1:value1, name2:value2} struct NamedStructImpl { static constexpr auto name = "named_struct"; - static constexpr auto types_index = [](size_t i) { return i % 2 == 0; }; + static constexpr auto types_index = [](size_t i) { return (i & 1) == 1; }; static void check_number_of_arguments(size_t number_of_arguments) { DCHECK(number_of_arguments % 2 == 0) diff --git a/fe/fe-common/src/main/java/org/apache/doris/catalog/Type.java b/fe/fe-common/src/main/java/org/apache/doris/catalog/Type.java index 3479230e176bca..48037297aaf68c 100644 --- a/fe/fe-common/src/main/java/org/apache/doris/catalog/Type.java +++ b/fe/fe-common/src/main/java/org/apache/doris/catalog/Type.java @@ -111,7 +111,6 @@ public abstract class Type { new StructField("generic_struct", new ScalarType(PrimitiveType.NULL_TYPE)))); public static final StructType STRUCT = new StructType(); public static final VariantType VARIANT = new VariantType(); - public static final AnyType ANY_TYPE = new AnyType(); private static final Logger LOG = LogManager.getLogger(Type.class); private static final ArrayList integerTypes; From 46108ab7e6045c18cee19e75cdbc33ba94e7e1f2 Mon Sep 17 00:00:00 2001 From: xy720 Date: Wed, 5 Apr 2023 01:57:56 +0800 Subject: [PATCH 07/12] rebase --- .../org/apache/doris/catalog/FunctionSet.java | 37 +------------------ gensrc/script/doris_builtins_functions.py | 1 - 2 files changed, 2 insertions(+), 36 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java index be8d9444231bff..6475f3e3ec4a03 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java @@ -1232,12 +1232,7 @@ public Function getFunction(Function desc, Function.CompareMode mode, boolean is List normalFunctions = Lists.newArrayList(); List templateFunctions = Lists.newArrayList(); List variadicTemplateFunctions = Lists.newArrayList(); - List inferenceFunctions = Lists.newArrayList(); for (Function fn : fns) { - if (fn.isInferenceFunction()) { - inferenceFunctions.add(fn); - continue; - } if (fn.hasTemplateArg()) { if (!fn.hasVariadicTemplateArg()) { templateFunctions.add(fn); @@ -1279,25 +1274,8 @@ public Function getFunction(Function desc, Function.CompareMode mode, boolean is } } - // try variadic template function third - fn = getFunction(desc, mode, specializedVariadicTemplateFunctions); - if (fn != null) { - return fn; - } - - List inferredFunctions = Lists.newArrayList(); - for (Function f : inferenceFunctions) { - if (f.hasTemplateArg()) { - f = specializeTemplateFunction(f, desc, f.hasVariadicTemplateArg()); - } - f = resolveInferenceFunction(f, desc); - if (f != null) { - inferredFunctions.add(f); - } - } - - // try inference function at last - return getFunction(desc, mode, inferredFunctions); + // try variadic template function + return getFunction(desc, mode, specializedVariadicTemplateFunctions); } private Function getFunction(Function desc, Function.CompareMode mode, List fns) { @@ -1406,17 +1384,6 @@ public Function specializeTemplateFunction(Function templateFunction, Function r } } - public Function resolveInferenceFunction(Function inferenceFunction, Function requestFunction) { - Type[] args = requestFunction.getArgs(); - Type newRetType = FunctionTypeDeducers.deduce(inferenceFunction.functionName(), args); - if (newRetType != null && inferenceFunction instanceof ScalarFunction) { - ScalarFunction f = (ScalarFunction) inferenceFunction; - return new ScalarFunction(f.getFunctionName(), Lists.newArrayList(f.getArgs()), newRetType, f.hasVarArgs(), - f.getSymbolName(), f.getBinaryType(), f.isUserVisible(), f.isVectorized(), f.getNullableMode()); - } - return null; - } - /** * There are essential differences in the implementation of some functions for different * types params, which should be prohibited. diff --git a/gensrc/script/doris_builtins_functions.py b/gensrc/script/doris_builtins_functions.py index 4024cdab6ae5b6..3d175d0fa32cfa 100644 --- a/gensrc/script/doris_builtins_functions.py +++ b/gensrc/script/doris_builtins_functions.py @@ -81,7 +81,6 @@ # struct functions [['struct'], 'STRUCT', ['TYPES'], 'ALWAYS_NOT_NULLABLE', ['TYPES...']], - [['named_struct'], 'ANY_TYPE', ['TYPES'], 'ALWAYS_NOT_NULLABLE', ['TYPES...']], # array functions [['array'], 'ARRAY', ['BOOLEAN', '...'], 'ALWAYS_NOT_NULLABLE'], From 940a60599e5ecd958aed63ee4c46fa01e18b7c48 Mon Sep 17 00:00:00 2001 From: xy720 Date: Wed, 5 Apr 2023 02:02:48 +0800 Subject: [PATCH 08/12] format --- be/src/vec/functions/function_struct.cpp | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/be/src/vec/functions/function_struct.cpp b/be/src/vec/functions/function_struct.cpp index a5ab1c4b0b6b05..fb0b7b1b1b27e1 100644 --- a/be/src/vec/functions/function_struct.cpp +++ b/be/src/vec/functions/function_struct.cpp @@ -68,7 +68,9 @@ class FunctionStruct : public IFunction { Impl::types_index); size_t num_element = args_num.size(); if (num_element != struct_column->tuple_size()) { - return Status::RuntimeError("function {} args number {} is not equal to result struct field number {}.", get_name(), num_element, struct_column->tuple_size()); + return Status::RuntimeError( + "function {} args number {} is not equal to result struct field number {}.", + get_name(), num_element, struct_column->tuple_size()); } for (size_t i = 0; i < num_element; ++i) { auto& nested_col = struct_column->get_column(i); @@ -96,9 +98,7 @@ struct StructImpl { static constexpr auto name = "struct"; static constexpr auto types_index = [](size_t i) { return true; }; - static void check_number_of_arguments(size_t number_of_arguments) { - return; - } + static void check_number_of_arguments(size_t number_of_arguments) {} static DataTypePtr get_return_type_impl(const DataTypes& arguments) { return std::make_shared(make_nullable(arguments)); @@ -113,7 +113,6 @@ struct NamedStructImpl { static void check_number_of_arguments(size_t number_of_arguments) { DCHECK(number_of_arguments % 2 == 0) << "function: " << name << ", arguments size should be even number."; - return; } static DataTypePtr get_return_type_impl(const DataTypes& arguments) { From a5973771f59f86beed89d39dafd0592d56c1b5c6 Mon Sep 17 00:00:00 2001 From: xy720 Date: Wed, 5 Apr 2023 02:53:49 +0800 Subject: [PATCH 09/12] fix --- be/src/vec/functions/function_struct.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/be/src/vec/functions/function_struct.cpp b/be/src/vec/functions/function_struct.cpp index fb0b7b1b1b27e1..336ef1328ec4dc 100644 --- a/be/src/vec/functions/function_struct.cpp +++ b/be/src/vec/functions/function_struct.cpp @@ -108,7 +108,7 @@ struct StructImpl { // named_struct(name1, value1, name2, value2) -> {name1:value1, name2:value2} struct NamedStructImpl { static constexpr auto name = "named_struct"; - static constexpr auto types_index = [](size_t i) { return (i & 1) == 1; }; + static constexpr auto types_index = [](size_t i) { return (i & 1) == 0; }; static void check_number_of_arguments(size_t number_of_arguments) { DCHECK(number_of_arguments % 2 == 0) From a4ee82da9b693d969b479f3d1e6cccf7b68b2d3a Mon Sep 17 00:00:00 2001 From: xy720 Date: Wed, 5 Apr 2023 04:35:31 +0800 Subject: [PATCH 10/12] add test --- .../test_struct_functions.out | Bin 0 -> 682 bytes .../test_struct_functions_by_literal.out | 28 +++++++++++ .../test_struct_functions.groovy | 45 ++++++++++++++++++ .../test_struct_functions_by_literal.groovy | 30 ++++++++++++ 4 files changed, 103 insertions(+) create mode 100644 regression-test/data/query_p0/sql_functions/struct_functions/test_struct_functions.out create mode 100644 regression-test/data/query_p0/sql_functions/struct_functions/test_struct_functions_by_literal.out create mode 100644 regression-test/suites/query_p0/sql_functions/struct_functions/test_struct_functions.groovy create mode 100644 regression-test/suites/query_p0/sql_functions/struct_functions/test_struct_functions_by_literal.groovy diff --git a/regression-test/data/query_p0/sql_functions/struct_functions/test_struct_functions.out b/regression-test/data/query_p0/sql_functions/struct_functions/test_struct_functions.out new file mode 100644 index 0000000000000000000000000000000000000000..b0bda1fbdbe213e0bb1a5387f603458206c63a67 GIT binary patch literal 682 zcmb_Zv2Md45bYYi;yQT&3$W9n-w9QN_RafW)FG54oJx z5FmIk+G57BgfX}ujWb<%!-2pU5u0y8K~iWav=p{6-mW!jf3#y#*&F9P z6c|Z^eq_VWG5J%Y5Q&~)MB^Hy_RqT=h3oi-5p6c2G~ujLq#wbj3XlmbKd`V@%*`{U hIlCUocJsPF?;DwsJgiQu)K@G?J`9s{m=hTZ`2inuoqYfR literal 0 HcmV?d00001 diff --git a/regression-test/data/query_p0/sql_functions/struct_functions/test_struct_functions_by_literal.out b/regression-test/data/query_p0/sql_functions/struct_functions/test_struct_functions_by_literal.out new file mode 100644 index 00000000000000..84c444912dfe85 --- /dev/null +++ b/regression-test/data/query_p0/sql_functions/struct_functions/test_struct_functions_by_literal.out @@ -0,0 +1,28 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql -- +{1, 2, 3} + +-- !sql -- +{1, 1000, 10000000000} + +-- !sql -- +{'a', 1, 'doris', 'aaaaa', 1.32} + +-- !sql -- +{1, 'a', NULL} + +-- !sql -- +{NULL, NULL, NULL} + +-- !sql -- +{1, 2, 3} + +-- !sql -- +{1, 1000, 10000000000} + +-- !sql -- +{1, 'doris', 1.32} + +-- !sql -- +{NULL, NULL, NULL} + diff --git a/regression-test/suites/query_p0/sql_functions/struct_functions/test_struct_functions.groovy b/regression-test/suites/query_p0/sql_functions/struct_functions/test_struct_functions.groovy new file mode 100644 index 00000000000000..b627dd195c3b3b --- /dev/null +++ b/regression-test/suites/query_p0/sql_functions/struct_functions/test_struct_functions.groovy @@ -0,0 +1,45 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_struct_functions") { + def tableName = "tbl_test_struct_functions" + sql """ADMIN SET FRONTEND CONFIG('enable_struct_type'='true')""" + sql """DROP TABLE IF EXISTS ${tableName}""" + sql """ + CREATE TABLE IF NOT EXISTS ${tableName} ( + `k1` INT(11) NULL, + `k2` STRUCT NULL, + `k3` STRUCT NULL, + `k4` STRUCT NULL, + `k5` STRUCT NOT NULL + ) + DUPLICATE KEY(`k1`) + DISTRIBUTED BY HASH(`k1`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "storage_format" = "V2" + ) + """ + sql """ INSERT INTO ${tableName} VALUES(1,{1,2,3,4,5},{1.0,3.33,1.001},{"2023-04-01","2023-04-01 12:00:00","2023-04-01","2023-04-01 12:00:00.999"},{'a','abc','abc'}) """ + sql """ INSERT INTO ${tableName} VALUES(2,struct(1,1000,10000000,100000000000,100000000000),struct(1.0,2.143,1.001),struct("2023-04-01","2023-04-01 12:00:00","2023-04-01","2023-04-01 12:00:00.999"),struct("hi","doris","hello doris")) """ + sql """ INSERT INTO ${tableName} VALUES(3,named_struct("f1",5,"f2",4,"f3",3,"f4",2,"f5",1),named_struct("f1",2.3,"f2",23.3,"f3",2.333),named_struct('f1','2023-04-01','f2','2023-04-01 12:00:00','f3','2023-04-01','f4','2023-04-01 12:00:00.999'),named_struct('f1','a','f2','abc','f3','abc')) """ + sql """ INSERT INTO ${tableName} VALUES(4,struct(1,NULL,3,NULL,5),{2.0,NULL,2.000},{'2023-04-01',NULL,'2023-04-01',NULL},struct('a',NULL,'abc')) """ + sql """ INSERT INTO ${tableName} VALUES(5,NULL,NULL,NULL,{NULL, NULL, NULL}) """ + sql """ INSERT INTO ${tableName} VALUES(6,NULL,NULL,NULL,{"NULL",'null',NULL}) """ + + qt_select_all "SELECT * FROM ${tableName} ORDER BY k1" +} diff --git a/regression-test/suites/query_p0/sql_functions/struct_functions/test_struct_functions_by_literal.groovy b/regression-test/suites/query_p0/sql_functions/struct_functions/test_struct_functions_by_literal.groovy new file mode 100644 index 00000000000000..bb7512882e6960 --- /dev/null +++ b/regression-test/suites/query_p0/sql_functions/struct_functions/test_struct_functions_by_literal.groovy @@ -0,0 +1,30 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_struct_functions_by_literal") { + // struct constructor + qt_sql "select struct(1, 2, 3)" + qt_sql "select struct(1, 1000, 10000000000)" + qt_sql "select struct('a', 1, 'doris', 'aaaaa', 1.32)" + qt_sql "select struct(1, 'a', null)" + qt_sql "select struct(null, null, null)" + + qt_sql "select named_struct('f1', 1, 'f2', 2, 'f3', 3)" + qt_sql "select named_struct('f1', 1, 'f2', 1000, 'f3', 10000000000)" + qt_sql "select named_struct('f1', 1, 'f2', 'doris', 'f3', 1.32)" + qt_sql "select named_struct('f1', null, 'f2', null, 'f3', null)" +} From 65336c50b4aab4ea87d0f57f3726b45693e9fc57 Mon Sep 17 00:00:00 2001 From: xy720 Date: Tue, 11 Apr 2023 20:02:21 +0800 Subject: [PATCH 11/12] revert fe --- .../java/org/apache/doris/catalog/Type.java | 1 + .../doris/analysis/FunctionCallExpr.java | 24 ++++++++++++ .../org/apache/doris/catalog/FunctionSet.java | 37 ++++++++++++++++++- gensrc/script/doris_builtins_functions.py | 1 + 4 files changed, 61 insertions(+), 2 deletions(-) diff --git a/fe/fe-common/src/main/java/org/apache/doris/catalog/Type.java b/fe/fe-common/src/main/java/org/apache/doris/catalog/Type.java index 48037297aaf68c..3479230e176bca 100644 --- a/fe/fe-common/src/main/java/org/apache/doris/catalog/Type.java +++ b/fe/fe-common/src/main/java/org/apache/doris/catalog/Type.java @@ -111,6 +111,7 @@ public abstract class Type { new StructField("generic_struct", new ScalarType(PrimitiveType.NULL_TYPE)))); public static final StructType STRUCT = new StructType(); public static final VariantType VARIANT = new VariantType(); + public static final AnyType ANY_TYPE = new AnyType(); private static final Logger LOG = LogManager.getLogger(Type.class); private static final ArrayList integerTypes; diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java index 0dbbe6f62a0c17..dc68ce5cf504dc 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java @@ -1448,6 +1448,21 @@ && collectChildReturnTypes()[0].isDecimalV3()) { fn.getReturnType().getPrimitiveType().setTimeType(); } + if (fnName.getFunction().equalsIgnoreCase("named_struct")) { + if ((children.size() & 1) == 1) { + throw new AnalysisException("named_struct can't be odd parameters, need even parameters: " + + this.toSql()); + } + for (int i = 0; i < children.size(); i++) { + if ((i & 1) == 0) { + if (!(getChild(i) instanceof StringLiteral)) { + throw new AnalysisException( + "named_struct only allows constant string parameter in odd position: " + this.toSql()); + } + } + } + } + if (isAggregateFunction()) { final String functionName = fnName.getFunction(); // subexprs must not contain aggregates @@ -1620,6 +1635,15 @@ private void analyzeNestedFunction() { .getType()).getItemType().isDatetimeV2())) { this.type = children.get(1).getType(); } + } else if (fnName.getFunction().equalsIgnoreCase("named_struct")) { + List fieldNames = Lists.newArrayList(); + for (int i = 0; i < children.size(); i++) { + if ((i & 1) == 0) { + StringLiteral nameLiteral = (StringLiteral) children.get(i); + fieldNames.add(nameLiteral.getStringValue()); + } + } + this.type = ((StructType) type).replaceFieldsWithNames(fieldNames); } else if (fnName.getFunction().equalsIgnoreCase("array_distinct") || fnName.getFunction() .equalsIgnoreCase("array_remove") || fnName.getFunction().equalsIgnoreCase("array_sort") || fnName.getFunction().equalsIgnoreCase("array_reverse_sort") diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java index 6475f3e3ec4a03..be8d9444231bff 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java @@ -1232,7 +1232,12 @@ public Function getFunction(Function desc, Function.CompareMode mode, boolean is List normalFunctions = Lists.newArrayList(); List templateFunctions = Lists.newArrayList(); List variadicTemplateFunctions = Lists.newArrayList(); + List inferenceFunctions = Lists.newArrayList(); for (Function fn : fns) { + if (fn.isInferenceFunction()) { + inferenceFunctions.add(fn); + continue; + } if (fn.hasTemplateArg()) { if (!fn.hasVariadicTemplateArg()) { templateFunctions.add(fn); @@ -1274,8 +1279,25 @@ public Function getFunction(Function desc, Function.CompareMode mode, boolean is } } - // try variadic template function - return getFunction(desc, mode, specializedVariadicTemplateFunctions); + // try variadic template function third + fn = getFunction(desc, mode, specializedVariadicTemplateFunctions); + if (fn != null) { + return fn; + } + + List inferredFunctions = Lists.newArrayList(); + for (Function f : inferenceFunctions) { + if (f.hasTemplateArg()) { + f = specializeTemplateFunction(f, desc, f.hasVariadicTemplateArg()); + } + f = resolveInferenceFunction(f, desc); + if (f != null) { + inferredFunctions.add(f); + } + } + + // try inference function at last + return getFunction(desc, mode, inferredFunctions); } private Function getFunction(Function desc, Function.CompareMode mode, List fns) { @@ -1384,6 +1406,17 @@ public Function specializeTemplateFunction(Function templateFunction, Function r } } + public Function resolveInferenceFunction(Function inferenceFunction, Function requestFunction) { + Type[] args = requestFunction.getArgs(); + Type newRetType = FunctionTypeDeducers.deduce(inferenceFunction.functionName(), args); + if (newRetType != null && inferenceFunction instanceof ScalarFunction) { + ScalarFunction f = (ScalarFunction) inferenceFunction; + return new ScalarFunction(f.getFunctionName(), Lists.newArrayList(f.getArgs()), newRetType, f.hasVarArgs(), + f.getSymbolName(), f.getBinaryType(), f.isUserVisible(), f.isVectorized(), f.getNullableMode()); + } + return null; + } + /** * There are essential differences in the implementation of some functions for different * types params, which should be prohibited. diff --git a/gensrc/script/doris_builtins_functions.py b/gensrc/script/doris_builtins_functions.py index 3d175d0fa32cfa..4024cdab6ae5b6 100644 --- a/gensrc/script/doris_builtins_functions.py +++ b/gensrc/script/doris_builtins_functions.py @@ -81,6 +81,7 @@ # struct functions [['struct'], 'STRUCT', ['TYPES'], 'ALWAYS_NOT_NULLABLE', ['TYPES...']], + [['named_struct'], 'ANY_TYPE', ['TYPES'], 'ALWAYS_NOT_NULLABLE', ['TYPES...']], # array functions [['array'], 'ARRAY', ['BOOLEAN', '...'], 'ALWAYS_NOT_NULLABLE'], From a1272fa3ee4cbac4352431546ce668c5eef312f1 Mon Sep 17 00:00:00 2001 From: xy720 Date: Wed, 12 Apr 2023 14:31:46 +0800 Subject: [PATCH 12/12] save --- be/src/vec/functions/function_struct.cpp | 13 +++++++------ .../struct_functions/test_struct_functions.out | Bin 682 -> 724 bytes 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/be/src/vec/functions/function_struct.cpp b/be/src/vec/functions/function_struct.cpp index 336ef1328ec4dc..d8cac8406b6d50 100644 --- a/be/src/vec/functions/function_struct.cpp +++ b/be/src/vec/functions/function_struct.cpp @@ -15,8 +15,6 @@ // specific language governing permissions and limitations // under the License. -#include - #include "vec/columns/column_const.h" #include "vec/columns/column_string.h" #include "vec/columns/column_struct.h" @@ -64,8 +62,11 @@ class FunctionStruct : public IFunction { block.get_by_position(result).type->get_name()); } ColumnNumbers args_num; - std::copy_if(arguments.begin(), arguments.end(), std::back_inserter(args_num), - Impl::types_index); + for (size_t i = 0; i < arguments.size(); i++) { + if (Impl::pred(i)) { + args_num.push_back(arguments[i]); + } + } size_t num_element = args_num.size(); if (num_element != struct_column->tuple_size()) { return Status::RuntimeError( @@ -96,7 +97,7 @@ class FunctionStruct : public IFunction { // struct(value1, value2, value3) -> {value1, value2, value3} struct StructImpl { static constexpr auto name = "struct"; - static constexpr auto types_index = [](size_t i) { return true; }; + static constexpr auto pred = [](size_t i) { return true; }; static void check_number_of_arguments(size_t number_of_arguments) {} @@ -108,7 +109,7 @@ struct StructImpl { // named_struct(name1, value1, name2, value2) -> {name1:value1, name2:value2} struct NamedStructImpl { static constexpr auto name = "named_struct"; - static constexpr auto types_index = [](size_t i) { return (i & 1) == 0; }; + static constexpr auto pred = [](size_t i) { return (i & 1) == 1; }; static void check_number_of_arguments(size_t number_of_arguments) { DCHECK(number_of_arguments % 2 == 0) diff --git a/regression-test/data/query_p0/sql_functions/struct_functions/test_struct_functions.out b/regression-test/data/query_p0/sql_functions/struct_functions/test_struct_functions.out index b0bda1fbdbe213e0bb1a5387f603458206c63a67..7a0afc46c185a8c1698c354d41e263734553aaa1 100644 GIT binary patch delta 63 zcmZ3*dWCgE1tX*3zWm6pm6CDL(9R(vD1;bj-Y9l=m+Zash85I@$9M(+?{5#} delta 75 zcmcb@x{7r}1tX)uzOCucx1ghn%HqtW&iWq}QJ!4~IppajvkB^Q5Iysqx=?nn7 C2oj_K