diff --git a/be/src/exprs/string_functions.cpp b/be/src/exprs/string_functions.cpp index 7214b0bd819d8d..ed001cc8459415 100644 --- a/be/src/exprs/string_functions.cpp +++ b/be/src/exprs/string_functions.cpp @@ -761,4 +761,61 @@ StringVal StringFunctions::money_format(FunctionContext *context, const LargeInt return do_money_format(context, ss.str()); } +static int indexOf(const uint8_t* source, int sourceOffset, int sourceCount, + const uint8_t* target, int targetOffset, int targetCount, + int fromIndex) { + if (fromIndex >= sourceCount) { + return (targetCount == 0 ? sourceCount : -1); + } + if (fromIndex < 0) { + fromIndex = 0; + } + if (targetCount == 0) { + return fromIndex; + } + const uint8_t first = target[targetOffset]; + int max = sourceOffset + (sourceCount - targetCount); + for (int i = sourceOffset + fromIndex; i <= max; i++) { + if (source[i] != first) { // Look for first character + while (++i <= max && source[i] != first); + } + if (i <= max) { // Found first character, now look at the rest of v2 + int j = i + 1; + int end = j + targetCount - 1; + for (int k = targetOffset + 1; j < end && source[j] == target[k]; j++, k++); + if (j == end) { + return i - sourceOffset; // Found whole string. + } + } + } + return -1; +} + + +StringVal StringFunctions::split_part(FunctionContext* context,const StringVal& content, + const StringVal& delimiter,const IntVal& field) { + if (field.val <= 0) return StringVal::null(); + int find[field.val]; //store substring position + for(int i=0;i<=field.val;i++) find[i] = -1; // init + int from = 0; + for(int i=1;i<=field.val;i++){ // find + find[i-1] = indexOf(content.ptr,0,content.len, delimiter.ptr,0,delimiter.len,from); + from = find[i-1] + 1; + if (find[i-1] == -1) { + break; + } + } + if ((field.val>1 && find[field.val-2] == -1) || (field.val==1 && find[field.val-1] == -1)){ // not find + return StringVal::null(); + } + int start_pos,len; + if (field.val == 1) { // find need split first part + start_pos = 0; + } else { + start_pos = find[field.val-2] + delimiter.len; + } + len = (find[field.val - 1] == -1 ? content.len : find[field.val-1]) - start_pos; + return StringVal(content.ptr + start_pos, len); +} + } diff --git a/be/src/exprs/string_functions.h b/be/src/exprs/string_functions.h index a4ca3a769d951e..7b94fec81b4d9e 100644 --- a/be/src/exprs/string_functions.h +++ b/be/src/exprs/string_functions.h @@ -174,6 +174,9 @@ class StringFunctions { ss << std::put_money(v); return AnyValUtil::from_string_temp(context, ss.str()); }; + + static StringVal split_part(FunctionContext* context,const StringVal& content, + const StringVal& delimiter,const IntVal& field); }; } diff --git a/be/test/exprs/string_functions_test.cpp b/be/test/exprs/string_functions_test.cpp index e0e355505df657..4aa1e8dfe60871 100644 --- a/be/test/exprs/string_functions_test.cpp +++ b/be/test/exprs/string_functions_test.cpp @@ -123,6 +123,35 @@ TEST_F(StringFunctionsTest, money_format_decimal_v2) { ASSERT_EQ(expected, result); } +TEST_F(StringFunctionsTest, split_part) { + doris_udf::FunctionContext* context = new doris_udf::FunctionContext(); + ASSERT_EQ(AnyValUtil::from_string_temp(context,std::string("hello")), + StringFunctions::split_part(context, StringVal("hello word"), StringVal(" "), 1)); + + ASSERT_EQ(AnyValUtil::from_string_temp(context,std::string("word")), + StringFunctions::split_part(context, StringVal("hello word"), StringVal(" "), 2)); + + ASSERT_EQ(StringVal::null(), + StringFunctions::split_part(context, StringVal("hello word"), StringVal(" "), 3)); + + ASSERT_EQ(AnyValUtil::from_string_temp(context,std::string("")), + StringFunctions::split_part(context, StringVal("hello word"), StringVal("hello"), 1)); + + ASSERT_EQ(AnyValUtil::from_string_temp(context,std::string(" word")), + StringFunctions::split_part(context, StringVal("hello word"), StringVal("hello"), 2)); + + ASSERT_EQ(AnyValUtil::from_string_temp(context,std::string("2019年9")), + StringFunctions::split_part(context, StringVal("2019年9月8日"), StringVal("月"), 1)); + ASSERT_EQ(AnyValUtil::from_string_temp(context,std::string("")), + StringFunctions::split_part(context, StringVal("abcdabda"), StringVal("a"), 1)); + ASSERT_EQ(AnyValUtil::from_string_temp(context,std::string("bcd")), + StringFunctions::split_part(context, StringVal("abcdabda"), StringVal("a"), 2)); + ASSERT_EQ(AnyValUtil::from_string_temp(context,std::string("bd")), + StringFunctions::split_part(context, StringVal("abcdabda"), StringVal("a"), 3)); + ASSERT_EQ(AnyValUtil::from_string_temp(context,std::string("")), + StringFunctions::split_part(context, StringVal("abcdabda"), StringVal("a"), 4)); +} + } int main(int argc, char** argv) { diff --git a/docs/documentation/cn/sql-reference/sql-functions/string-functions/split_part.md b/docs/documentation/cn/sql-reference/sql-functions/string-functions/split_part.md new file mode 100644 index 00000000000000..2564a57855294f --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-functions/string-functions/split_part.md @@ -0,0 +1,42 @@ +# split_part + +## Syntax + +`VARCHAR split_part(VARCHAR content, VARCHAR delimiter, INT field)` + +## Description + +根据分割符拆分字符串, 返回指定的分割部分(从一开始计数)。 + +## Examples + +``` +mysql> select split_part("hello word", " ", 1); ++----------------------------------+ +| split_part('hello word', ' ', 1) | ++----------------------------------+ +| hello | ++----------------------------------+ + + +mysql> select split_part("hello word", " ", 2); ++----------------------------------+ +| split_part('hello word', ' ', 2) | ++----------------------------------+ +| word | ++----------------------------------+ + +mysql> select split_part("2019年7月8号", "月", 1); ++-----------------------------------------+ +| split_part('2019年7月8号', '月', 1) | ++-----------------------------------------+ +| 2019年7 | ++-----------------------------------------+ + +mysql> select split_part("abca", "a", 1); ++----------------------------+ +| split_part('abca', 'a', 1) | ++----------------------------+ +| | ++----------------------------+ +``` diff --git a/gensrc/script/doris_builtins_functions.py b/gensrc/script/doris_builtins_functions.py index 0ba161b0fad9ec..80a0e15184ef84 100755 --- a/gensrc/script/doris_builtins_functions.py +++ b/gensrc/script/doris_builtins_functions.py @@ -558,6 +558,8 @@ '_ZN5doris15StringFunctions12money_formatEPN9doris_udf15FunctionContextERKNS1_10DecimalValE'], [['money_format'], 'VARCHAR', ['DECIMALV2'], '_ZN5doris15StringFunctions12money_formatEPN9doris_udf15FunctionContextERKNS1_12DecimalV2ValE'], + [['split_part'], 'VARCHAR', ['VARCHAR', 'VARCHAR', 'INT'], + '_ZN5doris15StringFunctions10split_partEPN9doris_udf15FunctionContextERKNS1_9StringValES6_RKNS1_6IntValE'], # Utility functions [['sleep'], 'BOOLEAN', ['INT'],