From c22c24317aba07289abf055c496aee193049f743 Mon Sep 17 00:00:00 2001 From: cambyzhu Date: Sun, 5 Jan 2025 18:31:49 +0800 Subject: [PATCH] fix heap-buffer-overflow for function convert_to --- be/src/vec/functions/function_string.h | 13 +++++++++---- .../nereids_function_p0/scalar_function/C.out | 3 +++ .../string_functions/test_string_function.out | Bin 4838 -> 4892 bytes .../scalar_function/C.groovy | 2 +- .../test_string_function.groovy | 2 ++ 5 files changed, 15 insertions(+), 5 deletions(-) diff --git a/be/src/vec/functions/function_string.h b/be/src/vec/functions/function_string.h index a729af5948a73f..1eb437168069b7 100644 --- a/be/src/vec/functions/function_string.h +++ b/be/src/vec/functions/function_string.h @@ -3770,9 +3770,10 @@ class FunctionConvertTo : public IFunction { auto& res_offset = col_res->get_offsets(); auto& res_chars = col_res->get_chars(); res_offset.resize(input_rows_count); - // max pinyin size is 6, double of utf8 chinese word 3, add one char to set '~' - ColumnString::check_chars_length(str_chars.size() * 2 + input_rows_count, 0); - res_chars.resize(str_chars.size() * 2 + input_rows_count); + // max pinyin size is 6 + 1 (first '~') for utf8 chinese word 3 + size_t pinyin_size = (str_chars.size() + 2) / 3 * 7; + ColumnString::check_chars_length(pinyin_size, 0); + res_chars.resize(pinyin_size); size_t in_len = 0, out_len = 0; for (int i = 0; i < input_rows_count; ++i) { @@ -3813,7 +3814,11 @@ class FunctionConvertTo : public IFunction { } auto end = strchr(buf, ' '); - auto len = end != nullptr ? end - buf : MAX_PINYIN_LEN; + // max len for pinyin is 6 + int len = MAX_PINYIN_LEN; + if (end != nullptr && end - buf < MAX_PINYIN_LEN) { + len = end - buf; + } // set first char '~' just make sure all english word lower than chinese word *dest = 126; memcpy(dest + 1, buf, len); diff --git a/regression-test/data/nereids_function_p0/scalar_function/C.out b/regression-test/data/nereids_function_p0/scalar_function/C.out index e36fdc7f3742be..924ddf5b148105 100644 --- a/regression-test/data/nereids_function_p0/scalar_function/C.out +++ b/regression-test/data/nereids_function_p0/scalar_function/C.out @@ -1418,3 +1418,6 @@ Doris Doris -- !convert -- 1 1 +-- !convert_1 -- +~zhuang~zhuang~zhuang~zhuang~zhuang + diff --git a/regression-test/data/query_p0/sql_functions/string_functions/test_string_function.out b/regression-test/data/query_p0/sql_functions/string_functions/test_string_function.out index cadf5039794dd87ff4e1af559db1ccb3172a8b23..4af2997eda286b96c77e3edcc4991ab13d04bf13 100644 GIT binary patch delta 61 qcmaE+I!A580S<%Y{JgT%qLO$+1zlaPx~h!Q#JqHT)aG3rI$Qwt?-*?W delta 12 TcmbQE_Dprd0glbzIMlcRCHw^^ diff --git a/regression-test/suites/nereids_function_p0/scalar_function/C.groovy b/regression-test/suites/nereids_function_p0/scalar_function/C.groovy index bf072c9ad8cf74..0a14602eff9c17 100644 --- a/regression-test/suites/nereids_function_p0/scalar_function/C.groovy +++ b/regression-test/suites/nereids_function_p0/scalar_function/C.groovy @@ -197,5 +197,5 @@ suite("nereids_scalar_fn_C") { qt_bitmap_not_nullable "select count(kbitmap) from fn_test_bitmap_not_nullable" qt_char "select char(68, 111, 114, 105, 115), char(68, 111, 114, 105, 115 using utf8);" qt_convert "select convert(1 using gbk), convert(1, string);" - + qt_convert_1 "select convert('装装装装装' using gbk);" } diff --git a/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function.groovy b/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function.groovy index 6e18fb57eeb4cf..c1a3e6b57fe809 100644 --- a/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function.groovy +++ b/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function.groovy @@ -163,6 +163,8 @@ suite("test_string_function", "arrow_flight_sql") { qt_sql "select right(\"Hello doris\", 120);" qt_sql "select right(\"Hello doris\", -6);" + qt_convert_1 "select convert('装装装装装' using gbk);" + sql """ drop table if exists left_right_test; """ sql """ create table left_right_test ( id INT NULL,