Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 27 additions & 12 deletions be/src/vec/functions/function_string.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
#include <ostream>
#include <random>
#include <sstream>
#include <stdexcept>
#include <tuple>
#include <utility>
#include <vector>
Expand Down Expand Up @@ -1439,6 +1440,14 @@ class FunctionStringRepeat : public IFunction {
static FunctionPtr create() { return std::make_shared<FunctionStringRepeat>(); }
String get_name() const override { return name; }
size_t get_number_of_arguments() const override { return 2; }
std::string error_msg(int default_value, int repeat_value) const {
auto error_msg = fmt::format(
"The second parameter of repeat function exceeded maximum default value, "
"default_value is {}, and now input is {} . you could try change default value "
"greater than value eg: set repeat_max_num = {}.",
default_value, repeat_value, repeat_value + 10);
return error_msg;
}

DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
return make_nullable(std::make_shared<DataTypeString>());
Expand All @@ -1456,17 +1465,20 @@ class FunctionStringRepeat : public IFunction {

if (auto* col1 = check_and_get_column<ColumnString>(*argument_ptr[0])) {
if (auto* col2 = check_and_get_column<ColumnInt32>(*argument_ptr[1])) {
vector_vector(col1->get_chars(), col1->get_offsets(), col2->get_data(),
res->get_chars(), res->get_offsets(), null_map->get_data(),
context->state()->repeat_max_num());
RETURN_IF_ERROR(vector_vector(col1->get_chars(), col1->get_offsets(),
col2->get_data(), res->get_chars(),
res->get_offsets(), null_map->get_data(),
context->state()->repeat_max_num()));
block.replace_by_position(
result, ColumnNullable::create(std::move(res), std::move(null_map)));
return Status::OK();
} else if (auto* col2_const = check_and_get_column<ColumnConst>(*argument_ptr[1])) {
DCHECK(check_and_get_column<ColumnInt32>(col2_const->get_data_column()));
int repeat = 0;
repeat = std::min<int>(col2_const->get_int(0), context->state()->repeat_max_num());

int repeat = col2_const->get_int(0);
if (repeat > context->state()->repeat_max_num()) {
return Status::InvalidArgument(
error_msg(context->state()->repeat_max_num(), repeat));
}
if (repeat <= 0) {
null_map->get_data().resize_fill(input_rows_count, 0);
res->insert_many_defaults(input_rows_count);
Expand All @@ -1484,10 +1496,10 @@ class FunctionStringRepeat : public IFunction {
argument_ptr[0]->get_name(), argument_ptr[1]->get_name());
}

void vector_vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
const ColumnInt32::Container& repeats, ColumnString::Chars& res_data,
ColumnString::Offsets& res_offsets, ColumnUInt8::Container& null_map,
const int repeat_max_num) const {
Status vector_vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

warning: method 'vector_vector' can be made static [readability-convert-member-functions-to-static]

Suggested change
Status vector_vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
static Status vector_vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,

be/src/vec/functions/function_string.h:1501:

-                          const int repeat_max_num) const {
+                          const int repeat_max_num) {

const ColumnInt32::Container& repeats, ColumnString::Chars& res_data,
ColumnString::Offsets& res_offsets, ColumnUInt8::Container& null_map,
const int repeat_max_num) const {
size_t input_row_size = offsets.size();

fmt::memory_buffer buffer;
Expand All @@ -1497,8 +1509,10 @@ class FunctionStringRepeat : public IFunction {
buffer.clear();
const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
size_t size = offsets[i] - offsets[i - 1];
int repeat = 0;
repeat = std::min<int>(repeats[i], repeat_max_num);
int repeat = repeats[i];
if (repeat > repeat_max_num) {
return Status::InvalidArgument(error_msg(repeat_max_num, repeat));
}

if (repeat <= 0) {
StringOP::push_empty_string(i, res_data, res_offsets);
Expand All @@ -1512,6 +1526,7 @@ class FunctionStringRepeat : public IFunction {
res_data, res_offsets);
}
}
return Status::OK();
}

// TODO: 1. use pmr::vector<char> replace fmt_buffer may speed up the code
Expand Down
21 changes: 13 additions & 8 deletions be/test/vec/function/function_string_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -174,15 +174,20 @@ TEST(function_string_test, function_string_repeat_test) {
std::string func_name = "repeat";
InputTypeSet input_types = {TypeIndex::String, TypeIndex::Int32};

DataSet data_set = {
{{std::string("a"), 3}, std::string("aaa")},
{{std::string("hel lo"), 2}, std::string("hel lohel lo")},
{{std::string("hello word"), -1}, std::string("")},
{{std::string(""), 1}, std::string("")},
{{std::string("a"), 1073741825}, std::string("aaaaaaaaaa")}, // ut repeat max num 10
{{std::string("HELLO,!^%"), 2}, std::string("HELLO,!^%HELLO,!^%")},
{{std::string("你"), 2}, std::string("你你")}};
DataSet data_set = {{{std::string("a"), 3}, std::string("aaa")},
{{std::string("hel lo"), 2}, std::string("hel lohel lo")},
{{std::string("hello word"), -1}, std::string("")},
{{std::string(""), 1}, std::string("")},
{{std::string("HELLO,!^%"), 2}, std::string("HELLO,!^%HELLO,!^%")},
{{std::string("你"), 2}, std::string("你你")}};
static_cast<void>(check_function<DataTypeString, true>(func_name, input_types, data_set));

{
DataSet data_set = {{{std::string("a"), 1073741825},
std::string("aaaaaaaaaa")}}; // ut repeat max num 10
Status st = check_function<DataTypeString, true>(func_name, input_types, data_set, true);
EXPECT_NE(Status::OK(), st);
}
}

TEST(function_string_test, function_string_reverse_test) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,10 @@ suite("test_string_basic") {
(2, repeat("test1111", 131072))
"""
order_qt_select_str_tb "select k1, md5(v1), length(v1) from ${tbName}"

test {
sql """SELECT repeat("test1111", 131073 + 100);"""
exception "repeat function exceeded maximum default value"
}
sql """drop table if exists test_string_cmp;"""

sql """
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,14 @@ suite("max_msg_size_of_result_receiver") {
ENGINE=OLAP DISTRIBUTED BY HASH(id)
PROPERTIES("replication_num"="1")
"""

sql """set repeat_max_num=100000;"""
sql """set max_msg_size_of_result_receiver=90000;""" // so the test of repeat("a", 80000) could pass, and repeat("a", 100000) will be failed
sql """
INSERT INTO ${table_name} VALUES (104, repeat("a", ${MESSAGE_SIZE_BASE * 104}))
INSERT INTO ${table_name} VALUES (104, repeat("a", 80000))
"""

sql """
INSERT INTO ${table_name} VALUES (105, repeat("a", ${MESSAGE_SIZE_BASE * 105}))
INSERT INTO ${table_name} VALUES (105, repeat("a", 100000))
"""

def with_exception = false
Expand All @@ -44,10 +45,9 @@ suite("max_msg_size_of_result_receiver") {
}
assertEquals(with_exception, false)

try {
sql "SELECT * FROM ${table_name} WHERE id = 105"
} catch (Exception e) {
assertTrue(e.getMessage().contains('MaxMessageSize reached, try increase max_msg_size_of_result_receiver'))
test {
sql """SELECT * FROM ${table_name} WHERE id = 105;"""
exception "MaxMessageSize reached, try increase max_msg_size_of_result_receiver"
}

try {
Expand Down