Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion be/src/vec/functions/function_string.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1138,7 +1138,6 @@ void register_function_string(SimpleFunctionFactory& factory) {
factory.register_alternative_function<FunctionLeftOld>();
factory.register_alternative_function<FunctionRightOld>();
factory.register_alternative_function<FunctionSubstringIndexOld>();
factory.register_alternative_function<FunctionStringRepeatOld>();
factory.register_alternative_function<FunctionUnHexOld>();
factory.register_alternative_function<FunctionToBase64Old>();

Expand Down
97 changes: 0 additions & 97 deletions be/src/vec/functions/function_string.h
Original file line number Diff line number Diff line change
Expand Up @@ -1440,103 +1440,6 @@ class FunctionStringRepeat : public IFunction {
String get_name() const override { return name; }
size_t get_number_of_arguments() const override { return 2; }

DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
return std::make_shared<DataTypeString>();
}
Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
size_t result, size_t input_rows_count) const override {
DCHECK_EQ(arguments.size(), 2);
auto res = ColumnString::create();

ColumnPtr argument_ptr[2];
argument_ptr[0] =
block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
argument_ptr[1] = block.get_by_position(arguments[1]).column;

if (auto* col1 = check_and_get_column<ColumnString>(*argument_ptr[0])) {
if (auto* col2 = check_and_get_column<ColumnInt32>(*argument_ptr[1])) {
vector_vector(col1->get_chars(), col1->get_offsets(), col2->get_data(),
res->get_chars(), res->get_offsets(),
context->state()->repeat_max_num());
block.replace_by_position(result, std::move(res));
return Status::OK();
} else if (auto* col2_const = check_and_get_column<ColumnConst>(*argument_ptr[1])) {
DCHECK(check_and_get_column<ColumnInt32>(col2_const->get_data_column()));
int repeat = 0;
repeat = std::min<int>(col2_const->get_int(0), context->state()->repeat_max_num());

if (repeat <= 0) {
res->insert_many_defaults(input_rows_count);
} else {
vector_const(col1->get_chars(), col1->get_offsets(), repeat, res->get_chars(),
res->get_offsets());
}
block.replace_by_position(result, std::move(res));
return Status::OK();
}
}

return Status::RuntimeError("repeat function get error param: {}, {}",
argument_ptr[0]->get_name(), argument_ptr[1]->get_name());
}

void vector_vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
const ColumnInt32::Container& repeats, ColumnString::Chars& res_data,
ColumnString::Offsets& res_offsets, const int repeat_max_num) const {
size_t input_row_size = offsets.size();

fmt::memory_buffer buffer;
res_offsets.resize(input_row_size);
for (ssize_t i = 0; i < input_row_size; ++i) {
buffer.clear();
const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
size_t size = offsets[i] - offsets[i - 1];
int repeat = 0;
repeat = std::min<int>(repeats[i], repeat_max_num);

if (repeat <= 0) {
StringOP::push_empty_string(i, res_data, res_offsets);
} else {
for (int j = 0; j < repeat; ++j) {
buffer.append(raw_str, raw_str + size);
}
StringOP::push_value_string(std::string_view(buffer.data(), buffer.size()), i,
res_data, res_offsets);
}
}
}

// TODO: 1. use pmr::vector<char> replace fmt_buffer may speed up the code
// 2. abstract the `vector_vector` and `vector_const`
// 3. rethink we should use `DEFAULT_MAX_STRING_SIZE` to bigger here
void vector_const(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
int repeat, ColumnString::Chars& res_data,
ColumnString::Offsets& res_offsets) const {
size_t input_row_size = offsets.size();

fmt::memory_buffer buffer;
res_offsets.resize(input_row_size);
for (ssize_t i = 0; i < input_row_size; ++i) {
buffer.clear();
const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
size_t size = offsets[i] - offsets[i - 1];

for (int j = 0; j < repeat; ++j) {
buffer.append(raw_str, raw_str + size);
}
StringOP::push_value_string(std::string_view(buffer.data(), buffer.size()), i, res_data,
res_offsets);
}
}
};

class FunctionStringRepeatOld : public IFunction {
public:
static constexpr auto name = "repeat";
static FunctionPtr create() { return std::make_shared<FunctionStringRepeatOld>(); }
String get_name() const override { return name; }
size_t get_number_of_arguments() const override { return 2; }

DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
return make_nullable(std::make_shared<DataTypeString>());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@

import org.apache.doris.catalog.FunctionSignature;
import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.functions.AlwaysNullable;
import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature;
import org.apache.doris.nereids.trees.expressions.functions.PropagateNullable;
import org.apache.doris.nereids.trees.expressions.shape.BinaryExpression;
import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
import org.apache.doris.nereids.types.IntegerType;
Expand All @@ -35,7 +35,7 @@
* ScalarFunction 'repeat'. This class is generated by GenerateFunction.
*/
public class Repeat extends ScalarFunction
implements BinaryExpression, ExplicitlyCastableSignature, PropagateNullable {
implements BinaryExpression, ExplicitlyCastableSignature, AlwaysNullable {

public static final List<FunctionSignature> SIGNATURES = ImmutableList.of(
FunctionSignature.ret(StringType.INSTANCE).args(StringType.INSTANCE, IntegerType.INSTANCE)
Expand Down
4 changes: 2 additions & 2 deletions gensrc/script/doris_builtins_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -1566,7 +1566,7 @@
[['null_or_empty'], 'BOOLEAN', ['VARCHAR'], 'ALWAYS_NOT_NULLABLE'],
[['not_null_or_empty'], 'BOOLEAN', ['VARCHAR'], 'ALWAYS_NOT_NULLABLE'],
[['space'], 'VARCHAR', ['INT'], ''],
[['repeat'], 'VARCHAR', ['VARCHAR', 'INT'], 'DEPEND_ON_ARGUMENT'],
[['repeat'], 'VARCHAR', ['VARCHAR', 'INT'], 'ALWAYS_NULLABLE'],
[['lpad'], 'VARCHAR', ['VARCHAR', 'INT', 'VARCHAR'], 'ALWAYS_NULLABLE'],
[['rpad'], 'VARCHAR', ['VARCHAR', 'INT', 'VARCHAR'], 'ALWAYS_NULLABLE'],
[['append_trailing_char_if_absent'], 'VARCHAR', ['VARCHAR', 'VARCHAR'], 'ALWAYS_NULLABLE'],
Expand Down Expand Up @@ -1628,7 +1628,7 @@
[['null_or_empty'], 'BOOLEAN', ['STRING'], 'ALWAYS_NOT_NULLABLE'],
[['not_null_or_empty'], 'BOOLEAN', ['STRING'], 'ALWAYS_NOT_NULLABLE'],
[['space'], 'STRING', ['INT'], ''],
[['repeat'], 'STRING', ['STRING', 'INT'], 'DEPEND_ON_ARGUMENT'],
[['repeat'], 'STRING', ['STRING', 'INT'], 'ALWAYS_NULLABLE'],
[['lpad'], 'STRING', ['STRING', 'INT', 'STRING'], 'ALWAYS_NULLABLE'],
[['rpad'], 'STRING', ['STRING', 'INT', 'STRING'], 'ALWAYS_NULLABLE'],
[['append_trailing_char_if_absent'], 'STRING', ['STRING', 'STRING'], 'ALWAYS_NULLABLE'],
Expand Down