From 9533b4e221583097b0d982c512676c34ac83d9e7 Mon Sep 17 00:00:00 2001 From: Byte Yue Date: Thu, 27 Oct 2022 16:11:09 +0800 Subject: [PATCH 1/5] resolve yet another conflict --- be/src/util/simd/vstring_function.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/be/src/util/simd/vstring_function.h b/be/src/util/simd/vstring_function.h index f17768cf891199..1fbf52298b63ff 100644 --- a/be/src/util/simd/vstring_function.h +++ b/be/src/util/simd/vstring_function.h @@ -20,6 +20,7 @@ #include #include +#include #include #ifdef __aarch64__ @@ -144,7 +145,12 @@ class VStringFunctions { } else { for (size_t i = 0, char_size = 0; i < str.len; i += char_size) { char_size = UTF8_BYTE_LENGTH[(unsigned char)(str.ptr)[i]]; - std::copy(str.ptr + i, str.ptr + i + char_size, dst.ptr + str.len - i - char_size); + // str.len = 4, i = 3, char_size = 2 might cause dynamic stack buffer overflow + size_t offset = i + char_size; + if (offset > str.len) { + offset = str.len; + } + std::copy(str.ptr + i, str.ptr + offset, dst.ptr + str.len - offset); } } } From 1b36471967a41a529e7a166d22efae76847bd8dc Mon Sep 17 00:00:00 2001 From: Byte Yue Date: Tue, 25 Oct 2022 23:18:41 +0800 Subject: [PATCH 2/5] add regression test --- .../query_p0/sql_functions/string_functions/test_reverse.groovy | 1 + 1 file changed, 1 insertion(+) diff --git a/regression-test/suites/query_p0/sql_functions/string_functions/test_reverse.groovy b/regression-test/suites/query_p0/sql_functions/string_functions/test_reverse.groovy index a5e3bb44ee152d..d804d7b798dd6e 100644 --- a/regression-test/suites/query_p0/sql_functions/string_functions/test_reverse.groovy +++ b/regression-test/suites/query_p0/sql_functions/string_functions/test_reverse.groovy @@ -17,5 +17,6 @@ suite("test_reverse") { qt_select "select reverse(k7) from test_query_db.test order by k1" + qt_select "select reverse(cast(unhex( cast(hex(651603156) as varchar)) as varchar));" } From 567e86f4effd970dcd3c684a7588ed77693954f4 Mon Sep 17 00:00:00 2001 From: Byte Yue Date: Wed, 26 Oct 2022 10:49:35 +0800 Subject: [PATCH 3/5] add out file for regression test --- .../query_p0/sql_functions/string_functions/test_reverse.out | 3 +++ 1 file changed, 3 insertions(+) diff --git a/regression-test/data/query_p0/sql_functions/string_functions/test_reverse.out b/regression-test/data/query_p0/sql_functions/string_functions/test_reverse.out index 57cb8f128ed8f4..9be71d50c0b004 100644 --- a/regression-test/data/query_p0/sql_functions/string_functions/test_reverse.out +++ b/regression-test/data/query_p0/sql_functions/string_functions/test_reverse.out @@ -4,3 +4,6 @@ fsnnygnaw kn@8jlnuy +-- !select -- +�֬& + From 0acbcb8438b1a2de04398ebfa3fe387673fa3274 Mon Sep 17 00:00:00 2001 From: Byte Yue Date: Wed, 26 Oct 2022 12:20:31 +0800 Subject: [PATCH 4/5] to reduce encode error --- .../query_p0/sql_functions/string_functions/test_reverse.out | 2 +- .../query_p0/sql_functions/string_functions/test_reverse.groovy | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/regression-test/data/query_p0/sql_functions/string_functions/test_reverse.out b/regression-test/data/query_p0/sql_functions/string_functions/test_reverse.out index 9be71d50c0b004..55a6d0a6349091 100644 --- a/regression-test/data/query_p0/sql_functions/string_functions/test_reverse.out +++ b/regression-test/data/query_p0/sql_functions/string_functions/test_reverse.out @@ -5,5 +5,5 @@ fsnnygnaw kn@8jlnuy -- !select -- -�֬& +4 diff --git a/regression-test/suites/query_p0/sql_functions/string_functions/test_reverse.groovy b/regression-test/suites/query_p0/sql_functions/string_functions/test_reverse.groovy index d804d7b798dd6e..3da1ce734c5826 100644 --- a/regression-test/suites/query_p0/sql_functions/string_functions/test_reverse.groovy +++ b/regression-test/suites/query_p0/sql_functions/string_functions/test_reverse.groovy @@ -17,6 +17,6 @@ suite("test_reverse") { qt_select "select reverse(k7) from test_query_db.test order by k1" - qt_select "select reverse(cast(unhex( cast(hex(651603156) as varchar)) as varchar));" + qt_select "select length( cast(reverse( cast(unhex( cast(hex( cast(651603156 as bigint)) as varchar)) as varchar)) as varchar)) as c3" } From a0a3f1cb67ef9abbebae27ef558e661e4e9ebe4b Mon Sep 17 00:00:00 2001 From: Byte Yue Date: Wed, 26 Oct 2022 14:51:21 +0800 Subject: [PATCH 5/5] add comment to explain the code --- be/src/util/simd/vstring_function.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/be/src/util/simd/vstring_function.h b/be/src/util/simd/vstring_function.h index 1fbf52298b63ff..e48d677eb4be63 100644 --- a/be/src/util/simd/vstring_function.h +++ b/be/src/util/simd/vstring_function.h @@ -145,7 +145,10 @@ class VStringFunctions { } else { for (size_t i = 0, char_size = 0; i < str.len; i += char_size) { char_size = UTF8_BYTE_LENGTH[(unsigned char)(str.ptr)[i]]; - // str.len = 4, i = 3, char_size = 2 might cause dynamic stack buffer overflow + // there exists occasion where the last character is an illegal UTF-8 one which returns + // a char_size larger than the actual space, which would cause offset execeeding the buffer range + // for example, consider str.len=4, i = 3, then the last char returns char_size 2, then + // the str.ptr + offset would exceed the buffer range size_t offset = i + char_size; if (offset > str.len) { offset = str.len;