From 368f95deb5139cc24e726f33f2a0898b775ebeed Mon Sep 17 00:00:00 2001 From: LiBinfeng Date: Mon, 17 Mar 2025 19:35:57 +0800 Subject: [PATCH] [fix](Nereids) initcap constant folding should upper first character in all words (#49061) --- .../executable/StringArithmetic.java | 11 +- .../fold_constant_string_arithmatic.groovy | 138 ++++++++++++++++++ 2 files changed, 142 insertions(+), 7 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/StringArithmetic.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/StringArithmetic.java index 027f75bbc86089..532ec04d8aadef 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/StringArithmetic.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/StringArithmetic.java @@ -412,12 +412,9 @@ public static Expression characterLength(StringLikeLiteral first) { return new IntegerLiteral(first.getValue().length()); } - private static boolean isSeparator(char c) { - if (".$|()[{^?*+\\".indexOf(c) == -1) { - return false; - } else { - return true; - } + private static boolean isAlphabetic(char c) { + Pattern pattern = Pattern.compile("\\p{Alnum}"); + return pattern.matcher(String.valueOf(c)).find(); } /** @@ -429,7 +426,7 @@ public static Expression initCap(StringLikeLiteral first) { boolean capitalizeNext = true; for (char c : first.getValue().toCharArray()) { - if (Character.isWhitespace(c) || isSeparator(c)) { + if (Character.isWhitespace(c) || !isAlphabetic(c)) { result.append(c); capitalizeNext = true; // Next character should be capitalized } else if (capitalizeNext) { diff --git a/regression-test/suites/nereids_p0/expression/fold_constant/fold_constant_string_arithmatic.groovy b/regression-test/suites/nereids_p0/expression/fold_constant/fold_constant_string_arithmatic.groovy index 08f9fca9801927..8d440289073019 100644 --- a/regression-test/suites/nereids_p0/expression/fold_constant/fold_constant_string_arithmatic.groovy +++ b/regression-test/suites/nereids_p0/expression/fold_constant/fold_constant_string_arithmatic.groovy @@ -213,6 +213,144 @@ suite("fold_constant_string_arithmatic") { testFoldConst("select initcap(' hello world')") testFoldConst("select initcap('こんにちは')") testFoldConst("select initcap('上海天津北京杭州')") + testFoldConst("select initcap('ab')") + testFoldConst("select initcap('aBc')") + testFoldConst("select initcap('a,b,c')") + testFoldConst("select initcap('a;b;c')") + testFoldConst("select initcap(null)") + testFoldConst("select initcap('')") + testFoldConst("select initcap(123)") + testFoldConst("select initcap(0)") + testFoldConst("select initcap(true)") + testFoldConst("select initcap(' a ')") + testFoldConst("select initcap('中文字')") + testFoldConst("select initcap('abc')") + testFoldConst("select initcap('2023-01-01')") + testFoldConst("select initcap('aBcDeF')") + testFoldConst("select initcap('hello world!')") + testFoldConst("select initcap('123abcDEF')") + testFoldConst("select initcap(' ')") + testFoldConst("select initcap('null')") + testFoldConst("select initcap('ärger')") + testFoldConst("select initcap('über')") + testFoldConst("select initcap('a1!b2@c3#')") + testFoldConst("select initcap('john o''connor')") + testFoldConst("select initcap('mcdonald''s')") + testFoldConst("select initcap('abc-def')") + testFoldConst("select initcap('foo_bar')") + testFoldConst("select initcap(' test ')") + testFoldConst("select initcap('xyz,zyx')") + testFoldConst("select initcap('123 456')") + testFoldConst("select initcap('.,abc')") + testFoldConst("select initcap('[]test')") + testFoldConst("select initcap('')") + testFoldConst("select initcap('aaAAaa')") + testFoldConst("select initcap(substring('abcd', 2))") + testFoldConst("select initcap(concat('a', '-test'))") + testFoldConst("select initcap('hello world')") + testFoldConst("select initcap('mixedCASE')") + testFoldConst("select initcap('UPPERCASE')") + testFoldConst("select initcap('lowercase')") + testFoldConst("select initcap('multiple spaces')") + testFoldConst("select initcap('hyphenated-word')") + testFoldConst("select initcap('under_score')") + testFoldConst("select initcap('dot.test')") + testFoldConst("select initcap('colon:test')") + testFoldConst("select initcap('semi;test')") + testFoldConst("select initcap('quote''test')") + testFoldConst("select initcap('slash/test')") + testFoldConst("select initcap('back\slash')") + testFoldConst("select initcap('emojitest')") + testFoldConst("select initcap('数字123test')") + testFoldConst("select initcap(' leading space')") + testFoldConst("select initcap('trailing space ')") + testFoldConst("select initcap(' multiple ')") + testFoldConst("select initcap('a.b.c.d')") + testFoldConst("select initcap('test-123-test')") + testFoldConst("select initcap('mixed_separators-here')") + testFoldConst("select initcap('ÄÖÜäöü')") + testFoldConst("select initcap('àçèñ')") + testFoldConst("select initcap('')") + testFoldConst("select initcap(' ')") + testFoldConst("select initcap('9am')") + testFoldConst("select initcap('sign')") + testFoldConst("select initcap('hash#tag')") + testFoldConst("select initcap('at@sign')") + testFoldConst("select initcap('caret^test')") + testFoldConst("select initcap('amp&test')") + testFoldConst("select initcap('star*test')") + testFoldConst("select initcap('plus+test')") + testFoldConst("select initcap('minus-test')") + testFoldConst("select initcap('equals=test')") + testFoldConst("select initcap('tilde~test')") + testFoldConst("select initcap('backtick`test')") + testFoldConst("select initcap('pipe|test')") + testFoldConst("select initcap('brace{test')") + testFoldConst("select initcap('bracket[test')") + testFoldConst("select initcap('lesstest')") + testFoldConst("select initcap('slash/test')") + testFoldConst("select initcap('question?test')") + testFoldConst("select initcap('space test')") + testFoldConst("select initcap('emojimix')") + testFoldConst("select initcap('unicodeñtest')") + testFoldConst("select initcap('ÆØÅtest')") + testFoldConst("select initcap('çédîñ')") + testFoldConst("select initcap('русский')") + testFoldConst("select initcap('日本語')") + testFoldConst("select initcap('한글')") + testFoldConst("select initcap('ﺎﻠﻋﺮﺒﻳﺓ')") + testFoldConst("select initcap('test')") + testFoldConst("select initcap('music')") + testFoldConst("select initcap('button')") + testFoldConst("select initcap('flag')") + testFoldConst("select initcap('family')") + testFoldConst("select initcap('fire')") + testFoldConst("select initcap('rocket')") + testFoldConst("select initcap('2023')") + testFoldConst("select initcap('√square')") + testFoldConst("select initcap('∞infinity')") + testFoldConst("select initcap('µmicro')") + testFoldConst("select initcap('¶pilcrow')") + testFoldConst("select initcap('©copyright')") + testFoldConst("select initcap('®registered')") + testFoldConst("select initcap('™trademark')") + testFoldConst("select initcap('§section')") + testFoldConst("select initcap('°degree')") + testFoldConst("select initcap('±plusminus')") + testFoldConst("select initcap('×multiply')") + testFoldConst("select initcap('÷divide')") + testFoldConst("select initcap('¹superscript')") + testFoldConst("select initcap('₂subscript')") + testFoldConst("select initcap('Ωomega')") + testFoldConst("select initcap('∆delta')") + testFoldConst("select initcap('∑sum')") + testFoldConst("select initcap('∏product')") + testFoldConst("select initcap('∫integral')") + testFoldConst("select initcap('⌘command')") + testFoldConst("select initcap('⌥option')") + testFoldConst("select initcap('⇧shift')") + testFoldConst("select initcap('⌃control')") + testFoldConst("select initcap('⌦delete')") + testFoldConst("select initcap('⇨arrow')") + testFoldConst("select initcap('★star')") + testFoldConst("select initcap('☀sun')") + testFoldConst("select initcap('☔ umbrella')") + testFoldConst("select initcap('☎phone')") + testFoldConst("select initcap('✉email')") + testFoldConst("select initcap('✓check')") + testFoldConst("select initcap('✗cross')") + testFoldConst("select initcap('⚠warning')") + testFoldConst("select initcap('⏰ clock')") + testFoldConst("select initcap('cake')") + testFoldConst("select initcap('party')") + testFoldConst("select initcap('⚡ bolt')") + testFoldConst("select initcap('⛔ forbidden')") + testFoldConst("select initcap('✅ check')") + testFoldConst("select initcap('✈plane')") + testFoldConst("select initcap('❤heart')") + testFoldConst("select initcap('⏩ fast')") + testFoldConst("select initcap('key')") // instr testFoldConst("select instr('上海天津北京杭州', '北京')")