From 7fdf56d15fce734f88cf1318d1fec551a66a8c33 Mon Sep 17 00:00:00 2001 From: zhangstar333 <2561612514@qq.com> Date: Thu, 14 Mar 2024 11:05:54 +0800 Subject: [PATCH 1/4] [improve](function) add error msg if exceeded maximum default value in repeat function --- be/src/vec/functions/function_string.h | 39 +++++++++++++------ .../string/test_string_basic.groovy | 7 +++- 2 files changed, 33 insertions(+), 13 deletions(-) diff --git a/be/src/vec/functions/function_string.h b/be/src/vec/functions/function_string.h index 4dc68eecc83ffc..81aa2bfc5684a4 100644 --- a/be/src/vec/functions/function_string.h +++ b/be/src/vec/functions/function_string.h @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -1439,6 +1440,14 @@ class FunctionStringRepeat : public IFunction { static FunctionPtr create() { return std::make_shared(); } String get_name() const override { return name; } size_t get_number_of_arguments() const override { return 2; } + std::string error_msg(int default_value, int repeat_value) const { + auto error_msg = fmt::format( + "The second parameter of repeat function exceeded maximum default value, " + "default_value is {}, and now input is {} . you could try change default value " + "greater than value eg: set repeat_max_num = {}.", + default_value, repeat_value, repeat_value + 10); + return error_msg; + } DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { return make_nullable(std::make_shared()); @@ -1456,17 +1465,20 @@ class FunctionStringRepeat : public IFunction { if (auto* col1 = check_and_get_column(*argument_ptr[0])) { if (auto* col2 = check_and_get_column(*argument_ptr[1])) { - vector_vector(col1->get_chars(), col1->get_offsets(), col2->get_data(), - res->get_chars(), res->get_offsets(), null_map->get_data(), - context->state()->repeat_max_num()); + RETURN_IF_ERROR(vector_vector(col1->get_chars(), col1->get_offsets(), + col2->get_data(), res->get_chars(), + res->get_offsets(), null_map->get_data(), + context->state()->repeat_max_num())); block.replace_by_position( result, ColumnNullable::create(std::move(res), std::move(null_map))); return Status::OK(); } else if (auto* col2_const = check_and_get_column(*argument_ptr[1])) { DCHECK(check_and_get_column(col2_const->get_data_column())); - int repeat = 0; - repeat = std::min(col2_const->get_int(0), context->state()->repeat_max_num()); - + int repeat = col2_const->get_int(0); + if (repeat > context->state()->repeat_max_num()) { + return Status::InvalidArgument( + error_msg(context->state()->repeat_max_num(), repeat)); + } if (repeat <= 0) { null_map->get_data().resize_fill(input_rows_count, 0); res->insert_many_defaults(input_rows_count); @@ -1484,10 +1496,10 @@ class FunctionStringRepeat : public IFunction { argument_ptr[0]->get_name(), argument_ptr[1]->get_name()); } - void vector_vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets, - const ColumnInt32::Container& repeats, ColumnString::Chars& res_data, - ColumnString::Offsets& res_offsets, ColumnUInt8::Container& null_map, - const int repeat_max_num) const { + Status vector_vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets, + const ColumnInt32::Container& repeats, ColumnString::Chars& res_data, + ColumnString::Offsets& res_offsets, ColumnUInt8::Container& null_map, + const int repeat_max_num) const { size_t input_row_size = offsets.size(); fmt::memory_buffer buffer; @@ -1497,8 +1509,10 @@ class FunctionStringRepeat : public IFunction { buffer.clear(); const char* raw_str = reinterpret_cast(&data[offsets[i - 1]]); size_t size = offsets[i] - offsets[i - 1]; - int repeat = 0; - repeat = std::min(repeats[i], repeat_max_num); + int repeat = repeats[i]; + if (repeat > repeat_max_num) { + return Status::InvalidArgument(error_msg(repeat_max_num, repeat)); + } if (repeat <= 0) { StringOP::push_empty_string(i, res_data, res_offsets); @@ -1512,6 +1526,7 @@ class FunctionStringRepeat : public IFunction { res_data, res_offsets); } } + return Status::OK(); } // TODO: 1. use pmr::vector replace fmt_buffer may speed up the code diff --git a/regression-test/suites/datatype_p0/string/test_string_basic.groovy b/regression-test/suites/datatype_p0/string/test_string_basic.groovy index 2aa9f9e86e4522..dfd40b79622da5 100644 --- a/regression-test/suites/datatype_p0/string/test_string_basic.groovy +++ b/regression-test/suites/datatype_p0/string/test_string_basic.groovy @@ -129,7 +129,12 @@ suite("test_string_basic") { (2, repeat("test1111", 131072)) """ order_qt_select_str_tb "select k1, md5(v1), length(v1) from ${tbName}" - + try { + sql """ SELECT repeat("test1111", 131073 + 100); """ + } catch (Exception ex) { + log.info(ex.getMessage()); + assertTrue(ex.getMessage().contains("repeat function exceeded maximum default value")) + } sql """drop table if exists test_string_cmp;""" sql """ From 07b96b971fce7500c65affbb137e28d6be782060 Mon Sep 17 00:00:00 2001 From: zhangstar333 <2561612514@qq.com> Date: Mon, 18 Mar 2024 10:26:23 +0800 Subject: [PATCH 2/4] update case --- be/test/vec/function/function_string_test.cpp | 21 ++++++++++++------- .../max_msg_size_of_result_receiver.groovy | 4 +++- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/be/test/vec/function/function_string_test.cpp b/be/test/vec/function/function_string_test.cpp index 39a9dca901c09b..612a6fff0ccd6f 100644 --- a/be/test/vec/function/function_string_test.cpp +++ b/be/test/vec/function/function_string_test.cpp @@ -174,15 +174,20 @@ TEST(function_string_test, function_string_repeat_test) { std::string func_name = "repeat"; InputTypeSet input_types = {TypeIndex::String, TypeIndex::Int32}; - DataSet data_set = { - {{std::string("a"), 3}, std::string("aaa")}, - {{std::string("hel lo"), 2}, std::string("hel lohel lo")}, - {{std::string("hello word"), -1}, std::string("")}, - {{std::string(""), 1}, std::string("")}, - {{std::string("a"), 1073741825}, std::string("aaaaaaaaaa")}, // ut repeat max num 10 - {{std::string("HELLO,!^%"), 2}, std::string("HELLO,!^%HELLO,!^%")}, - {{std::string("你"), 2}, std::string("你你")}}; + DataSet data_set = {{{std::string("a"), 3}, std::string("aaa")}, + {{std::string("hel lo"), 2}, std::string("hel lohel lo")}, + {{std::string("hello word"), -1}, std::string("")}, + {{std::string(""), 1}, std::string("")}, + {{std::string("HELLO,!^%"), 2}, std::string("HELLO,!^%HELLO,!^%")}, + {{std::string("你"), 2}, std::string("你你")}}; static_cast(check_function(func_name, input_types, data_set)); + + { + DataSet data_set = {{{std::string("a"), 1073741825}, + std::string("aaaaaaaaaa")}}; // ut repeat max num 10 + Status st = check_function(func_name, input_types, data_set, true); + EXPECT_NE(Status::OK(), st); + } } TEST(function_string_test, function_string_reverse_test) { diff --git a/regression-test/suites/variable_p0/max_msg_size_of_result_receiver.groovy b/regression-test/suites/variable_p0/max_msg_size_of_result_receiver.groovy index e7fead33d90ec0..9f76d31c412e52 100644 --- a/regression-test/suites/variable_p0/max_msg_size_of_result_receiver.groovy +++ b/regression-test/suites/variable_p0/max_msg_size_of_result_receiver.groovy @@ -27,7 +27,9 @@ suite("max_msg_size_of_result_receiver") { ENGINE=OLAP DISTRIBUTED BY HASH(id) PROPERTIES("replication_num"="1") """ - + sql """ + set repeat_max_num = 1000*1000*105; + """ sql """ INSERT INTO ${table_name} VALUES (104, repeat("a", ${MESSAGE_SIZE_BASE * 104})) """ From af629919beea006ef4212188541b0da196cd5e2c Mon Sep 17 00:00:00 2001 From: zhangstar333 <2561612514@qq.com> Date: Mon, 18 Mar 2024 14:48:49 +0800 Subject: [PATCH 3/4] update case --- .../variable_p0/max_msg_size_of_result_receiver.groovy | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/regression-test/suites/variable_p0/max_msg_size_of_result_receiver.groovy b/regression-test/suites/variable_p0/max_msg_size_of_result_receiver.groovy index 9f76d31c412e52..63a6e2d8a9c09b 100644 --- a/regression-test/suites/variable_p0/max_msg_size_of_result_receiver.groovy +++ b/regression-test/suites/variable_p0/max_msg_size_of_result_receiver.groovy @@ -27,15 +27,14 @@ suite("max_msg_size_of_result_receiver") { ENGINE=OLAP DISTRIBUTED BY HASH(id) PROPERTIES("replication_num"="1") """ + sql """set repeat_max_num=100000;""" + sql """set max_msg_size_of_result_receiver=90000;""" // so the test of repeat("a", 80000) could pass, and repeat("a", 100000) will be failed sql """ - set repeat_max_num = 1000*1000*105; - """ - sql """ - INSERT INTO ${table_name} VALUES (104, repeat("a", ${MESSAGE_SIZE_BASE * 104})) + INSERT INTO ${table_name} VALUES (104, repeat("a", 80000)) """ sql """ - INSERT INTO ${table_name} VALUES (105, repeat("a", ${MESSAGE_SIZE_BASE * 105})) + INSERT INTO ${table_name} VALUES (105, repeat("a", 100000)) """ def with_exception = false @@ -49,6 +48,7 @@ suite("max_msg_size_of_result_receiver") { try { sql "SELECT * FROM ${table_name} WHERE id = 105" } catch (Exception e) { + log.info("error msg: " + e.getMessage()) assertTrue(e.getMessage().contains('MaxMessageSize reached, try increase max_msg_size_of_result_receiver')) } From d26be07c9c3d0a027f7dea621d1a0267c1c43e74 Mon Sep 17 00:00:00 2001 From: zhangstar333 <2561612514@qq.com> Date: Mon, 18 Mar 2024 16:41:11 +0800 Subject: [PATCH 4/4] test exception --- .../suites/datatype_p0/string/test_string_basic.groovy | 8 +++----- .../variable_p0/max_msg_size_of_result_receiver.groovy | 8 +++----- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/regression-test/suites/datatype_p0/string/test_string_basic.groovy b/regression-test/suites/datatype_p0/string/test_string_basic.groovy index dfd40b79622da5..36fbddede2dede 100644 --- a/regression-test/suites/datatype_p0/string/test_string_basic.groovy +++ b/regression-test/suites/datatype_p0/string/test_string_basic.groovy @@ -129,11 +129,9 @@ suite("test_string_basic") { (2, repeat("test1111", 131072)) """ order_qt_select_str_tb "select k1, md5(v1), length(v1) from ${tbName}" - try { - sql """ SELECT repeat("test1111", 131073 + 100); """ - } catch (Exception ex) { - log.info(ex.getMessage()); - assertTrue(ex.getMessage().contains("repeat function exceeded maximum default value")) + test { + sql """SELECT repeat("test1111", 131073 + 100);""" + exception "repeat function exceeded maximum default value" } sql """drop table if exists test_string_cmp;""" diff --git a/regression-test/suites/variable_p0/max_msg_size_of_result_receiver.groovy b/regression-test/suites/variable_p0/max_msg_size_of_result_receiver.groovy index 63a6e2d8a9c09b..f9afdd8eadb358 100644 --- a/regression-test/suites/variable_p0/max_msg_size_of_result_receiver.groovy +++ b/regression-test/suites/variable_p0/max_msg_size_of_result_receiver.groovy @@ -45,11 +45,9 @@ suite("max_msg_size_of_result_receiver") { } assertEquals(with_exception, false) - try { - sql "SELECT * FROM ${table_name} WHERE id = 105" - } catch (Exception e) { - log.info("error msg: " + e.getMessage()) - assertTrue(e.getMessage().contains('MaxMessageSize reached, try increase max_msg_size_of_result_receiver')) + test { + sql """SELECT * FROM ${table_name} WHERE id = 105;""" + exception "MaxMessageSize reached, try increase max_msg_size_of_result_receiver" } try {