From fdcb4a4cdd966be5233513838f587b79a2ac9d11 Mon Sep 17 00:00:00 2001 From: Mryange <2319153948@qq.com> Date: Sat, 14 Sep 2024 16:59:40 +0800 Subject: [PATCH] fix --- be/src/vec/functions/function_string.h | 76 +++++++++++++------ .../nereids_function_p0/scalar_function/R.out | 6 ++ .../scalar_function/R.groovy | 2 + 3 files changed, 59 insertions(+), 25 deletions(-) diff --git a/be/src/vec/functions/function_string.h b/be/src/vec/functions/function_string.h index 160cc484a74931..d2d17342783f6f 100644 --- a/be/src/vec/functions/function_string.h +++ b/be/src/vec/functions/function_string.h @@ -3176,26 +3176,39 @@ class FunctionReplace : public IFunction { size_t result, size_t input_rows_count) const override { // We need a local variable to hold a reference to the converted column. // So that the converted column will not be released before we use it. - auto col_origin = - block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(); - auto col_origin_str = assert_cast(col_origin.get()); - auto col_old = - block.get_by_position(arguments[1]).column->convert_to_full_column_if_const(); - auto col_old_str = assert_cast(col_old.get()); - auto col_new = - block.get_by_position(arguments[2]).column->convert_to_full_column_if_const(); - auto col_new_str = assert_cast(col_new.get()); + ColumnPtr col[3]; + bool col_const[3]; + for (size_t i = 0; i < 3; ++i) { + std::tie(col[i], col_const[i]) = + unpack_if_const(block.get_by_position(arguments[i]).column); + } + + const auto* col_origin_str = assert_cast(col[0].get()); + const auto* col_old_str = assert_cast(col[1].get()); + const auto* col_new_str = assert_cast(col[2].get()); ColumnString::MutablePtr col_res = ColumnString::create(); - for (int i = 0; i < input_rows_count; ++i) { - StringRef origin_str = col_origin_str->get_data_at(i); - StringRef old_str = col_old_str->get_data_at(i); - StringRef new_str = col_new_str->get_data_at(i); - std::string result = replace(origin_str.to_string(), old_str.to_string_view(), - new_str.to_string_view()); - col_res->insert_data(result.data(), result.length()); - } + std::visit( + [&](auto origin_str_const, auto old_str_const, auto new_str_const) { + for (int i = 0; i < input_rows_count; ++i) { + StringRef origin_str = + col_origin_str->get_data_at(index_check_const(i)); + StringRef old_str = + col_old_str->get_data_at(index_check_const(i)); + StringRef new_str = + col_new_str->get_data_at(index_check_const(i)); + + std::string result = + replace(origin_str.to_string(), old_str.to_string_view(), + new_str.to_string_view()); + + col_res->insert_data(result.data(), result.length()); + } + }, + vectorized::make_bool_variant(col_const[0]), + vectorized::make_bool_variant(col_const[1]), + vectorized::make_bool_variant(col_const[2])); block.replace_by_position(result, std::move(col_res)); return Status::OK(); @@ -3212,16 +3225,29 @@ class FunctionReplace : public IFunction { if (new_str.empty()) { return str; } - std::string result; - ColumnString::check_chars_length( - str.length() * (new_str.length() + 1) + new_str.length(), 0); - result.reserve(str.length() * (new_str.length() + 1) + new_str.length()); - for (char c : str) { + if (simd::VStringFunctions::is_ascii({str.data(), str.size()})) { + std::string result; + ColumnString::check_chars_length( + str.length() * (new_str.length() + 1) + new_str.length(), 0); + result.reserve(str.length() * (new_str.length() + 1) + new_str.length()); + for (char c : str) { + result += new_str; + result += c; + } + result += new_str; + return result; + } else { + std::string result; + result.reserve(str.length() * (new_str.length() + 1) + new_str.length()); + for (size_t i = 0, utf8_char_len = 0; i < str.size(); i += utf8_char_len) { + utf8_char_len = UTF8_BYTE_LENGTH[(unsigned char)str[i]]; + result += new_str; + result.append(&str[i], utf8_char_len); + } result += new_str; - result += c; + ColumnString::check_chars_length(result.size(), 0); + return result; } - result += new_str; - return result; } } else { std::string::size_type pos = 0; diff --git a/regression-test/data/nereids_function_p0/scalar_function/R.out b/regression-test/data/nereids_function_p0/scalar_function/R.out index e90736971184c1..a4fe94bbeb1fa2 100644 --- a/regression-test/data/nereids_function_p0/scalar_function/R.out +++ b/regression-test/data/nereids_function_p0/scalar_function/R.out @@ -429,6 +429,12 @@ xyz -- !sql_relace_empty08 -- abc +-- !sql_relace_empty09 -- +b你bab好bbb世bcb界b + +-- !sql_relace_empty10 -- +你a好b世c界 + -- !sql_right_Varchar_Integer -- \N 1 diff --git a/regression-test/suites/nereids_function_p0/scalar_function/R.groovy b/regression-test/suites/nereids_function_p0/scalar_function/R.groovy index bf21154192b655..2f79eeea94274c 100644 --- a/regression-test/suites/nereids_function_p0/scalar_function/R.groovy +++ b/regression-test/suites/nereids_function_p0/scalar_function/R.groovy @@ -59,6 +59,8 @@ suite("nereids_scalar_fn_R") { qt_sql_relace_empty06 "select replace_empty('xyz', 'x', '');" qt_sql_relace_empty07 "select replace_empty('xyz', '', '');" qt_sql_relace_empty08 "select replace_empty('', '', 'abc');" + qt_sql_relace_empty09 "select replace_empty('你a好b世c界','','b');" + qt_sql_relace_empty10 "select replace_empty('你a好b世c界','','');" qt_sql_right_Varchar_Integer "select right(kvchrs1, kint) from fn_test order by kvchrs1, kint" qt_sql_right_Varchar_Integer_notnull "select right(kvchrs1, kint) from fn_test_not_nullable order by kvchrs1, kint" qt_sql_right_String_Integer "select right(kstr, kint) from fn_test order by kstr, kint"