Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -407,8 +407,6 @@
import org.apache.doris.nereids.trees.expressions.functions.scalar.StX;
import org.apache.doris.nereids.trees.expressions.functions.scalar.StY;
import org.apache.doris.nereids.trees.expressions.functions.scalar.StartsWith;
import org.apache.doris.nereids.trees.expressions.functions.scalar.StrLeft;
import org.apache.doris.nereids.trees.expressions.functions.scalar.StrRight;
import org.apache.doris.nereids.trees.expressions.functions.scalar.StrToDate;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Strcmp;
import org.apache.doris.nereids.trees.expressions.functions.scalar.StructElement;
Expand Down Expand Up @@ -751,7 +749,7 @@ public class BuiltinScalarFunctions implements FunctionHelper {
scalar(L2Distance.class, "l2_distance"),
scalar(LastDay.class, "last_day"),
scalar(Least.class, "least"),
scalar(Left.class, "left"),
scalar(Left.class, "left", "strleft"),
scalar(Length.class, "length"),
scalar(Crc32.class, "crc32"),
scalar(Like.class, "like"),
Expand Down Expand Up @@ -834,7 +832,7 @@ public class BuiltinScalarFunctions implements FunctionHelper {
scalar(Replace.class, "replace"),
scalar(ReplaceEmpty.class, "replace_empty"),
scalar(Reverse.class, "reverse"),
scalar(Right.class, "right"),
scalar(Right.class, "right", "strright"),
scalar(Round.class, "round"),
scalar(RoundBankers.class, "round_bankers"),
scalar(Rpad.class, "rpad"),
Expand Down Expand Up @@ -894,8 +892,6 @@ public class BuiltinScalarFunctions implements FunctionHelper {
scalar(StY.class, "st_y"),
scalar(StartsWith.class, "starts_with"),
scalar(Strcmp.class, "strcmp"),
scalar(StrLeft.class, "strleft"),
scalar(StrRight.class, "strright"),
scalar(StrToDate.class, "str_to_date"),
scalar(SubBitmap.class, "sub_bitmap"),
scalar(SubReplace.class, "sub_replace"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
import org.apache.doris.nereids.types.ArrayType;

import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;

import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
Expand All @@ -60,7 +61,7 @@
* concat
*/
public class StringArithmetic {
private static Expression castStringLikeLiteral(StringLikeLiteral first, String value) {
private static Literal castStringLikeLiteral(StringLikeLiteral first, String value) {
if (first instanceof StringLiteral) {
return new StringLiteral(value);
} else if (first instanceof VarcharLiteral) {
Expand All @@ -79,7 +80,7 @@ public static Expression concatVarcharVarchar(StringLikeLiteral first, StringLik
}

private static String substringImpl(String first, int second, int third) {
int stringLength = first.length();
int stringLength = first.codePointCount(0, first.length());
if (stringLength == 0) {
return "";
}
Expand All @@ -101,8 +102,11 @@ private static String substringImpl(String first, int second, int third) {
} else {
rightIndex = third + leftIndex;
}
// at here leftIndex and rightIndex can not be exceeding boundary
int finalLeftIndex = first.codePointCount(0, (int) leftIndex);
int finalRightIndex = first.codePointCount(0, (int) rightIndex);
// left index and right index are in integer range because of definition, so we can safely cast it to int
return first.substring((int) leftIndex, (int) rightIndex);
return first.substring(finalLeftIndex, finalRightIndex);
}

/**
Expand Down Expand Up @@ -293,12 +297,14 @@ public static Expression replace(StringLikeLiteral first, StringLikeLiteral seco
*/
@ExecFunction(name = "left")
public static Expression left(StringLikeLiteral first, IntegerLiteral second) {
int inputLength = first.getValue().codePointCount(0, first.getValue().length());
if (second.getValue() <= 0) {
return castStringLikeLiteral(first, "");
} else if (second.getValue() < first.getValue().length()) {
return castStringLikeLiteral(first, first.getValue().substring(0, second.getValue()));
} else {
} else if (second.getValue() >= inputLength) {
return first;
} else {
int index = first.getValue().codePointCount(0, second.getValue());
return castStringLikeLiteral(first, first.getValue().substring(0, index));
}
}

Expand All @@ -307,17 +313,20 @@ public static Expression left(StringLikeLiteral first, IntegerLiteral second) {
*/
@ExecFunction(name = "right")
public static Expression right(StringLikeLiteral first, IntegerLiteral second) {
if (second.getValue() < (- first.getValue().length()) || Math.abs(second.getValue()) == 0) {
int inputLength = first.getValue().codePointCount(0, first.getValue().length());
if (second.getValue() < (- inputLength) || Math.abs(second.getValue()) == 0) {
return castStringLikeLiteral(first, "");
} else if (second.getValue() > first.getValue().length()) {
} else if (second.getValue() >= inputLength) {
return first;
} else {
if (second.getValue() > 0) {
if (second.getValue() >= 0) {
int index = first.getValue().codePointCount(0, second.getValue());
return castStringLikeLiteral(first, first.getValue().substring(
first.getValue().length() - second.getValue(), first.getValue().length()));
inputLength - index, inputLength));
} else {
int index = first.getValue().codePointCount(Math.abs(second.getValue()) - 1, first.getValue().length());
return castStringLikeLiteral(first, first.getValue().substring(
Math.abs(second.getValue()) - 1, first.getValue().length()));
Math.abs(index) - 1, inputLength));
}
}
}
Expand All @@ -337,7 +346,7 @@ public static Expression locate(StringLikeLiteral first, StringLikeLiteral secon
public static Expression locate(StringLikeLiteral first, StringLikeLiteral second, IntegerLiteral third) {
int result = second.getValue().indexOf(first.getValue()) + 1;
if (third.getValue() <= 0 || !substringImpl(second.getValue(), third.getValue(),
second.getValue().length()).contains(first.getValue())) {
second.getValue().codePointCount(0, second.getValue().length())).contains(first.getValue())) {
result = 0;
}
return new IntegerLiteral(result);
Expand Down Expand Up @@ -408,7 +417,7 @@ public static Expression concatWsVarcharVarchar(StringLikeLiteral first, Varchar
*/
@ExecFunction(name = "character_length")
public static Expression characterLength(StringLikeLiteral first) {
return new IntegerLiteral(first.getValue().length());
return new IntegerLiteral(first.getValue().codePointCount(0, first.getValue().length()));
}

private static boolean isSeparator(char c) {
Expand Down Expand Up @@ -666,6 +675,23 @@ public static Expression space(IntegerLiteral first) {
return new VarcharLiteral(sb.toString());
}

/**
* split by char by empty string considering emoji
* @param str input string to be split
* @return ArrayLiteral
*/
public static List<String> splitByGrapheme(StringLikeLiteral str) {
List<String> result = Lists.newArrayListWithExpectedSize(str.getValue().length());
int length = str.getValue().length();
for (int i = 0; i < length; ) {
int codePoint = str.getValue().codePointAt(i);
int charCount = Character.charCount(codePoint);
result.add(new String(new int[]{codePoint}, 0, 1));
i += charCount;
}
return result;
}

/**
* Executable arithmetic functions split_by_string
*/
Expand All @@ -674,11 +700,17 @@ public static Expression splitByString(StringLikeLiteral first, StringLikeLitera
if (first.getValue().isEmpty()) {
return new ArrayLiteral(ImmutableList.of(), ArrayType.of(first.getDataType()));
}
int limit = second.getValue().isEmpty() ? 0 : -1;
String[] result = first.getValue().split(Pattern.quote(second.getValue()), limit);
if (second.getValue().isEmpty()) {
List<Literal> result = Lists.newArrayListWithExpectedSize(first.getValue().length());
for (String resultStr : splitByGrapheme(first)) {
result.add(castStringLikeLiteral(first, resultStr));
}
return new ArrayLiteral(result);
}
String[] result = first.getValue().split(Pattern.quote(second.getValue()), -1);
List<Literal> items = new ArrayList<>();
for (String s : result) {
items.add((Literal) castStringLikeLiteral(first, s));
items.add(castStringLikeLiteral(first, s));
}
return new ArrayLiteral(items);
}
Expand Down Expand Up @@ -775,40 +807,6 @@ public static Expression strcmp(StringLikeLiteral first, StringLikeLiteral secon
}
}

/**
* Executable arithmetic functions strLeft
*/
@ExecFunction(name = "strleft")
public static Expression strLeft(StringLikeLiteral first, IntegerLiteral second) {
if (second.getValue() <= 0) {
return castStringLikeLiteral(first, "");
} else if (second.getValue() > first.getValue().length()) {
return first;
} else {
return castStringLikeLiteral(first, first.getValue().substring(0, second.getValue()));
}
}

/**
* Executable arithmetic functions strRight
*/
@ExecFunction(name = "strright")
public static Expression strRight(StringLikeLiteral first, IntegerLiteral second) {
if (second.getValue() < (- first.getValue().length()) || Math.abs(second.getValue()) == 0) {
return castStringLikeLiteral(first, "");
} else if (second.getValue() > first.getValue().length()) {
return first;
} else {
if (second.getValue() > 0) {
return castStringLikeLiteral(first, first.getValue().substring(
first.getValue().length() - second.getValue(), first.getValue().length()));
} else {
return castStringLikeLiteral(first, first.getValue().substring(
Math.abs(second.getValue()) - 1, first.getValue().length()));
}
}
}

/**
* Executable arithmetic functions overlay
*/
Expand Down Expand Up @@ -949,7 +947,7 @@ public static Expression urlEncode(StringLikeLiteral first) {
*/
@ExecFunction(name = "append_trailing_char_if_absent")
public static Expression appendTrailingCharIfAbsent(StringLikeLiteral first, StringLikeLiteral second) {
if (second.getValue().length() != 1) {
if (second.getValue().codePointCount(0, second.getValue().length()) != 1) {
return new NullLiteral(first.getDataType());
}
if (first.getValue().endsWith(second.getValue())) {
Expand Down Expand Up @@ -1013,6 +1011,19 @@ public static Expression quote(StringLikeLiteral first) {
*/
@ExecFunction(name = "replace_empty")
public static Expression replaceEmpty(StringLikeLiteral first, StringLikeLiteral second, StringLikeLiteral third) {
if (second.getValue().isEmpty()) {
if (first.getValue().isEmpty()) {
return castStringLikeLiteral(first, third.getValue());
}
List<String> inputs = splitByGrapheme(first);
StringBuilder sb = new StringBuilder();
sb.append(third.getValue());
for (String input : inputs) {
sb.append(input);
sb.append(third.getValue());
}
return castStringLikeLiteral(first, sb.toString());
}
return castStringLikeLiteral(first, first.getValue().replace(second.getValue(), third.getValue()));
}

Expand Down

This file was deleted.

This file was deleted.

Loading
Loading