diff --git a/be/src/util/simd/vstring_function.h b/be/src/util/simd/vstring_function.h index f17768cf891199..e48d677eb4be63 100644 --- a/be/src/util/simd/vstring_function.h +++ b/be/src/util/simd/vstring_function.h @@ -20,6 +20,7 @@ #include #include +#include #include #ifdef __aarch64__ @@ -144,7 +145,15 @@ class VStringFunctions { } else { for (size_t i = 0, char_size = 0; i < str.len; i += char_size) { char_size = UTF8_BYTE_LENGTH[(unsigned char)(str.ptr)[i]]; - std::copy(str.ptr + i, str.ptr + i + char_size, dst.ptr + str.len - i - char_size); + // there exists occasion where the last character is an illegal UTF-8 one which returns + // a char_size larger than the actual space, which would cause offset execeeding the buffer range + // for example, consider str.len=4, i = 3, then the last char returns char_size 2, then + // the str.ptr + offset would exceed the buffer range + size_t offset = i + char_size; + if (offset > str.len) { + offset = str.len; + } + std::copy(str.ptr + i, str.ptr + offset, dst.ptr + str.len - offset); } } } diff --git a/regression-test/data/query_p0/sql_functions/string_functions/test_reverse.out b/regression-test/data/query_p0/sql_functions/string_functions/test_reverse.out index 57cb8f128ed8f4..55a6d0a6349091 100644 --- a/regression-test/data/query_p0/sql_functions/string_functions/test_reverse.out +++ b/regression-test/data/query_p0/sql_functions/string_functions/test_reverse.out @@ -4,3 +4,6 @@ fsnnygnaw kn@8jlnuy +-- !select -- +4 + diff --git a/regression-test/suites/query_p0/sql_functions/string_functions/test_reverse.groovy b/regression-test/suites/query_p0/sql_functions/string_functions/test_reverse.groovy index a5e3bb44ee152d..3da1ce734c5826 100644 --- a/regression-test/suites/query_p0/sql_functions/string_functions/test_reverse.groovy +++ b/regression-test/suites/query_p0/sql_functions/string_functions/test_reverse.groovy @@ -17,5 +17,6 @@ suite("test_reverse") { qt_select "select reverse(k7) from test_query_db.test order by k1" + qt_select "select length( cast(reverse( cast(unhex( cast(hex( cast(651603156 as bigint)) as varchar)) as varchar)) as varchar)) as c3" }