diff --git a/be/src/vec/functions/function_string.cpp b/be/src/vec/functions/function_string.cpp index 4da6d11a5e1ee9..8aee837ac63fbb 100644 --- a/be/src/vec/functions/function_string.cpp +++ b/be/src/vec/functions/function_string.cpp @@ -1138,7 +1138,6 @@ void register_function_string(SimpleFunctionFactory& factory) { factory.register_alternative_function(); factory.register_alternative_function(); factory.register_alternative_function(); - factory.register_alternative_function(); factory.register_alternative_function(); factory.register_alternative_function(); diff --git a/be/src/vec/functions/function_string.h b/be/src/vec/functions/function_string.h index 9b08248c6a2ad6..4dc68eecc83ffc 100644 --- a/be/src/vec/functions/function_string.h +++ b/be/src/vec/functions/function_string.h @@ -1440,103 +1440,6 @@ class FunctionStringRepeat : public IFunction { String get_name() const override { return name; } size_t get_number_of_arguments() const override { return 2; } - DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { - return std::make_shared(); - } - Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { - DCHECK_EQ(arguments.size(), 2); - auto res = ColumnString::create(); - - ColumnPtr argument_ptr[2]; - argument_ptr[0] = - block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(); - argument_ptr[1] = block.get_by_position(arguments[1]).column; - - if (auto* col1 = check_and_get_column(*argument_ptr[0])) { - if (auto* col2 = check_and_get_column(*argument_ptr[1])) { - vector_vector(col1->get_chars(), col1->get_offsets(), col2->get_data(), - res->get_chars(), res->get_offsets(), - context->state()->repeat_max_num()); - block.replace_by_position(result, std::move(res)); - return Status::OK(); - } else if (auto* col2_const = check_and_get_column(*argument_ptr[1])) { - DCHECK(check_and_get_column(col2_const->get_data_column())); - int repeat = 0; - repeat = std::min(col2_const->get_int(0), context->state()->repeat_max_num()); - - if (repeat <= 0) { - res->insert_many_defaults(input_rows_count); - } else { - vector_const(col1->get_chars(), col1->get_offsets(), repeat, res->get_chars(), - res->get_offsets()); - } - block.replace_by_position(result, std::move(res)); - return Status::OK(); - } - } - - return Status::RuntimeError("repeat function get error param: {}, {}", - argument_ptr[0]->get_name(), argument_ptr[1]->get_name()); - } - - void vector_vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets, - const ColumnInt32::Container& repeats, ColumnString::Chars& res_data, - ColumnString::Offsets& res_offsets, const int repeat_max_num) const { - size_t input_row_size = offsets.size(); - - fmt::memory_buffer buffer; - res_offsets.resize(input_row_size); - for (ssize_t i = 0; i < input_row_size; ++i) { - buffer.clear(); - const char* raw_str = reinterpret_cast(&data[offsets[i - 1]]); - size_t size = offsets[i] - offsets[i - 1]; - int repeat = 0; - repeat = std::min(repeats[i], repeat_max_num); - - if (repeat <= 0) { - StringOP::push_empty_string(i, res_data, res_offsets); - } else { - for (int j = 0; j < repeat; ++j) { - buffer.append(raw_str, raw_str + size); - } - StringOP::push_value_string(std::string_view(buffer.data(), buffer.size()), i, - res_data, res_offsets); - } - } - } - - // TODO: 1. use pmr::vector replace fmt_buffer may speed up the code - // 2. abstract the `vector_vector` and `vector_const` - // 3. rethink we should use `DEFAULT_MAX_STRING_SIZE` to bigger here - void vector_const(const ColumnString::Chars& data, const ColumnString::Offsets& offsets, - int repeat, ColumnString::Chars& res_data, - ColumnString::Offsets& res_offsets) const { - size_t input_row_size = offsets.size(); - - fmt::memory_buffer buffer; - res_offsets.resize(input_row_size); - for (ssize_t i = 0; i < input_row_size; ++i) { - buffer.clear(); - const char* raw_str = reinterpret_cast(&data[offsets[i - 1]]); - size_t size = offsets[i] - offsets[i - 1]; - - for (int j = 0; j < repeat; ++j) { - buffer.append(raw_str, raw_str + size); - } - StringOP::push_value_string(std::string_view(buffer.data(), buffer.size()), i, res_data, - res_offsets); - } - } -}; - -class FunctionStringRepeatOld : public IFunction { -public: - static constexpr auto name = "repeat"; - static FunctionPtr create() { return std::make_shared(); } - String get_name() const override { return name; } - size_t get_number_of_arguments() const override { return 2; } - DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { return make_nullable(std::make_shared()); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/Repeat.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/Repeat.java index 918443e81613a0..b85a812197f55b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/Repeat.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/Repeat.java @@ -19,8 +19,8 @@ import org.apache.doris.catalog.FunctionSignature; import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.functions.AlwaysNullable; import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature; -import org.apache.doris.nereids.trees.expressions.functions.PropagateNullable; import org.apache.doris.nereids.trees.expressions.shape.BinaryExpression; import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; import org.apache.doris.nereids.types.IntegerType; @@ -35,7 +35,7 @@ * ScalarFunction 'repeat'. This class is generated by GenerateFunction. */ public class Repeat extends ScalarFunction - implements BinaryExpression, ExplicitlyCastableSignature, PropagateNullable { + implements BinaryExpression, ExplicitlyCastableSignature, AlwaysNullable { public static final List SIGNATURES = ImmutableList.of( FunctionSignature.ret(StringType.INSTANCE).args(StringType.INSTANCE, IntegerType.INSTANCE) diff --git a/gensrc/script/doris_builtins_functions.py b/gensrc/script/doris_builtins_functions.py index 629f83f6a13cef..d97c499f2da30b 100644 --- a/gensrc/script/doris_builtins_functions.py +++ b/gensrc/script/doris_builtins_functions.py @@ -1566,7 +1566,7 @@ [['null_or_empty'], 'BOOLEAN', ['VARCHAR'], 'ALWAYS_NOT_NULLABLE'], [['not_null_or_empty'], 'BOOLEAN', ['VARCHAR'], 'ALWAYS_NOT_NULLABLE'], [['space'], 'VARCHAR', ['INT'], ''], - [['repeat'], 'VARCHAR', ['VARCHAR', 'INT'], 'DEPEND_ON_ARGUMENT'], + [['repeat'], 'VARCHAR', ['VARCHAR', 'INT'], 'ALWAYS_NULLABLE'], [['lpad'], 'VARCHAR', ['VARCHAR', 'INT', 'VARCHAR'], 'ALWAYS_NULLABLE'], [['rpad'], 'VARCHAR', ['VARCHAR', 'INT', 'VARCHAR'], 'ALWAYS_NULLABLE'], [['append_trailing_char_if_absent'], 'VARCHAR', ['VARCHAR', 'VARCHAR'], 'ALWAYS_NULLABLE'], @@ -1628,7 +1628,7 @@ [['null_or_empty'], 'BOOLEAN', ['STRING'], 'ALWAYS_NOT_NULLABLE'], [['not_null_or_empty'], 'BOOLEAN', ['STRING'], 'ALWAYS_NOT_NULLABLE'], [['space'], 'STRING', ['INT'], ''], - [['repeat'], 'STRING', ['STRING', 'INT'], 'DEPEND_ON_ARGUMENT'], + [['repeat'], 'STRING', ['STRING', 'INT'], 'ALWAYS_NULLABLE'], [['lpad'], 'STRING', ['STRING', 'INT', 'STRING'], 'ALWAYS_NULLABLE'], [['rpad'], 'STRING', ['STRING', 'INT', 'STRING'], 'ALWAYS_NULLABLE'], [['append_trailing_char_if_absent'], 'STRING', ['STRING', 'STRING'], 'ALWAYS_NULLABLE'],