diff --git a/core/src/main/java/org/apache/druid/java/util/common/StringUtils.java b/core/src/main/java/org/apache/druid/java/util/common/StringUtils.java index 33a4e3c74d58..bf9540015d33 100644 --- a/core/src/main/java/org/apache/druid/java/util/common/StringUtils.java +++ b/core/src/main/java/org/apache/druid/java/util/common/StringUtils.java @@ -21,6 +21,7 @@ import com.google.common.base.Strings; +import javax.annotation.Nonnull; import javax.annotation.Nullable; import java.io.UnsupportedEncodingException; import java.net.URLDecoder; @@ -472,28 +473,34 @@ public static String repeat(String s, int count) /** * Returns the string left-padded with the string pad to a length of len characters. * If str is longer than len, the return value is shortened to len characters. - * Lpad and rpad functions are migrated from flink's scala function with minor refactor + * This function is migrated from flink's scala function with minor refactor * https://github.com/apache/flink/blob/master/flink-table/flink-table-planner/src/main/scala/org/apache/flink/table/runtime/functions/ScalarFunctions.scala + * - Modified to handle empty pad string. + * - Padding of negative length return an empty string. * * @param base The base string to be padded * @param len The length of padded string * @param pad The pad string * - * @return the string left-padded with pad to a length of len + * @return the string left-padded with pad to a length of len or null if the pad is empty or the len is less than 0. */ - public static String lpad(String base, Integer len, String pad) + @Nonnull + public static String lpad(@Nonnull String base, int len, @Nonnull String pad) { - if (len < 0) { - return null; - } else if (len == 0) { + if (len <= 0) { return ""; } - char[] data = new char[len]; - // The length of the padding needed int pos = Math.max(len - base.length(), 0); + // short-circuit if there is no pad and we need to add a padding + if (pos > 0 && pad.isEmpty()) { + return base; + } + + char[] data = new char[len]; + // Copy the padding for (int i = 0; i < pos; i += pad.length()) { for (int j = 0; j < pad.length() && j < pos - i; j++) { @@ -512,37 +519,48 @@ public static String lpad(String base, Integer len, String pad) /** * Returns the string right-padded with the string pad to a length of len characters. * If str is longer than len, the return value is shortened to len characters. + * This function is migrated from flink's scala function with minor refactor + * https://github.com/apache/flink/blob/master/flink-table/flink-table-planner/src/main/scala/org/apache/flink/table/runtime/functions/ScalarFunctions.scala + * - Modified to handle empty pad string. + * - Modified to only copy the pad string if needed (this implementation mimics lpad). + * - Padding of negative length return an empty string. * * @param base The base string to be padded * @param len The length of padded string * @param pad The pad string * - * @return the string right-padded with pad to a length of len + * @return the string right-padded with pad to a length of len or null if the pad is empty or the len is less than 0. */ - public static String rpad(String base, Integer len, String pad) + @Nonnull + public static String rpad(@Nonnull String base, int len, @Nonnull String pad) { - if (len < 0) { - return null; - } else if (len == 0) { + if (len <= 0) { return ""; } - char[] data = new char[len]; - - int pos = 0; + // The length of the padding needed + int paddingLen = Math.max(len - base.length(), 0); - // Copy the base - for (; pos < base.length() && pos < len; pos++) { - data[pos] = base.charAt(pos); + // short-circuit if there is no pad and we need to add a padding + if (paddingLen > 0 && pad.isEmpty()) { + return base; } + char[] data = new char[len]; + + // Copy the padding - for (; pos < len; pos += pad.length()) { - for (int i = 0; i < pad.length() && i < len - pos; i++) { - data[pos + i] = pad.charAt(i); + for (int i = len - paddingLen; i < len; i += pad.length()) { + for (int j = 0; j < pad.length() && i + j < data.length; j++) { + data[i + j] = pad.charAt(j); } } + // Copy the base + for (int i = 0; i < len && i < base.length(); i++) { + data[i] = base.charAt(i); + } + return new String(data); } diff --git a/core/src/test/java/org/apache/druid/java/util/common/StringUtilsTest.java b/core/src/test/java/org/apache/druid/java/util/common/StringUtilsTest.java index 1d4cb6dbade9..bd5c0b2ba6c1 100644 --- a/core/src/test/java/org/apache/druid/java/util/common/StringUtilsTest.java +++ b/core/src/test/java/org/apache/druid/java/util/common/StringUtilsTest.java @@ -133,13 +133,6 @@ public void fromUtf8ByteBufferDirect() Assert.assertEquals("abcd", StringUtils.fromUtf8(bytes)); } - @Test - public void testCharsetShowsUpAsDeprecated() - { - // Not actually a runnable test, just checking the IDE - Assert.assertNotNull(StringUtils.UTF8_CHARSET); - } - @SuppressWarnings("MalformedFormatString") @Test public void testNonStrictFormat() @@ -211,39 +204,51 @@ public void testRepeat() @Test public void testLpad() { - String s1 = StringUtils.lpad("abc", 7, "de"); - Assert.assertEquals(s1, "dedeabc"); + String lpad = StringUtils.lpad("abc", 7, "de"); + Assert.assertEquals("dedeabc", lpad); - String s2 = StringUtils.lpad("abc", 6, "de"); - Assert.assertEquals(s2, "dedabc"); + lpad = StringUtils.lpad("abc", 6, "de"); + Assert.assertEquals("dedabc", lpad); - String s3 = StringUtils.lpad("abc", 2, "de"); - Assert.assertEquals(s3, "ab"); + lpad = StringUtils.lpad("abc", 2, "de"); + Assert.assertEquals("ab", lpad); - String s4 = StringUtils.lpad("abc", 0, "de"); - Assert.assertEquals(s4, ""); + lpad = StringUtils.lpad("abc", 0, "de"); + Assert.assertEquals("", lpad); - String s5 = StringUtils.lpad("abc", -1, "de"); - Assert.assertEquals(s5, null); + lpad = StringUtils.lpad("abc", -1, "de"); + Assert.assertEquals("", lpad); + + lpad = StringUtils.lpad("abc", 10, ""); + Assert.assertEquals("abc", lpad); + + lpad = StringUtils.lpad("abc", 1, ""); + Assert.assertEquals("a", lpad); } @Test public void testRpad() { - String s1 = StringUtils.rpad("abc", 7, "de"); - Assert.assertEquals(s1, "abcdede"); + String rpad = StringUtils.rpad("abc", 7, "de"); + Assert.assertEquals("abcdede", rpad); + + rpad = StringUtils.rpad("abc", 6, "de"); + Assert.assertEquals("abcded", rpad); + + rpad = StringUtils.rpad("abc", 2, "de"); + Assert.assertEquals("ab", rpad); - String s2 = StringUtils.rpad("abc", 6, "de"); - Assert.assertEquals(s2, "abcded"); + rpad = StringUtils.rpad("abc", 0, "de"); + Assert.assertEquals("", rpad); - String s3 = StringUtils.rpad("abc", 2, "de"); - Assert.assertEquals(s3, "ab"); + rpad = StringUtils.rpad("abc", -1, "de"); + Assert.assertEquals("", rpad); - String s4 = StringUtils.rpad("abc", 0, "de"); - Assert.assertEquals(s4, ""); + rpad = StringUtils.rpad("abc", 10, ""); + Assert.assertEquals("abc", rpad); - String s5 = StringUtils.rpad("abc", -1, "de"); - Assert.assertEquals(s5, null); + rpad = StringUtils.rpad("abc", 1, ""); + Assert.assertEquals("a", rpad); } @Test diff --git a/core/src/test/java/org/apache/druid/math/expr/FunctionTest.java b/core/src/test/java/org/apache/druid/math/expr/FunctionTest.java index 2fe52490c400..fc83b4f7dcdf 100644 --- a/core/src/test/java/org/apache/druid/math/expr/FunctionTest.java +++ b/core/src/test/java/org/apache/druid/math/expr/FunctionTest.java @@ -146,9 +146,23 @@ public void testLpad() assertExpr("lpad(x, 5, 'ab')", "abfoo"); assertExpr("lpad(x, 4, 'ab')", "afoo"); assertExpr("lpad(x, 2, 'ab')", "fo"); - assertArrayExpr("lpad(x, 0, 'ab')", null); - assertArrayExpr("lpad(x, 5, null)", null); - assertArrayExpr("lpad(null, 5, x)", null); + assertExpr("lpad(x, -1, 'ab')", NullHandling.replaceWithDefault() ? null : ""); + assertExpr("lpad(null, 5, 'ab')", null); + assertExpr("lpad(x, 2, '')", NullHandling.replaceWithDefault() ? null : "fo"); + assertExpr("lpad(x, 6, '')", NullHandling.replaceWithDefault() ? null : "foo"); + assertExpr("lpad('', 3, '*')", NullHandling.replaceWithDefault() ? null : "***"); + assertExpr("lpad(x, 2, null)", null); + assertExpr("lpad(a, 4, '*')", "[foo"); + assertExpr("lpad(a, 2, '*')", "[f"); + assertExpr("lpad(a, 2, '')", NullHandling.replaceWithDefault() ? null : "[f"); + assertExpr("lpad(b, 4, '*')", "[1, "); + assertExpr("lpad(b, 2, '')", NullHandling.replaceWithDefault() ? null : "[1"); + assertExpr("lpad(b, 2, null)", null); + assertExpr("lpad(x, 5, x)", "fofoo"); + assertExpr("lpad(x, 5, y)", "22foo"); + assertExpr("lpad(x, 5, z)", "3.foo"); + assertExpr("lpad(y, 5, x)", "foof2"); + assertExpr("lpad(z, 5, y)", "223.1"); } @Test @@ -157,9 +171,22 @@ public void testRpad() assertExpr("rpad(x, 5, 'ab')", "fooab"); assertExpr("rpad(x, 4, 'ab')", "fooa"); assertExpr("rpad(x, 2, 'ab')", "fo"); - assertArrayExpr("rpad(x, 0, 'ab')", null); - assertArrayExpr("rpad(x, 5, null)", null); - assertArrayExpr("rpad(null, 5, x)", null); + assertExpr("rpad(x, -1, 'ab')", NullHandling.replaceWithDefault() ? null : ""); + assertExpr("rpad(null, 5, 'ab')", null); + assertExpr("rpad(x, 2, '')", NullHandling.replaceWithDefault() ? null : "fo"); + assertExpr("rpad(x, 6, '')", NullHandling.replaceWithDefault() ? null : "foo"); + assertExpr("rpad('', 3, '*')", NullHandling.replaceWithDefault() ? null : "***"); + assertExpr("rpad(x, 2, null)", null); + assertExpr("rpad(a, 2, '*')", "[f"); + assertExpr("rpad(a, 2, '')", NullHandling.replaceWithDefault() ? null : "[f"); + assertExpr("rpad(b, 4, '*')", "[1, "); + assertExpr("rpad(b, 2, '')", NullHandling.replaceWithDefault() ? null : "[1"); + assertExpr("rpad(b, 2, null)", null); + assertExpr("rpad(x, 5, x)", "foofo"); + assertExpr("rpad(x, 5, y)", "foo22"); + assertExpr("rpad(x, 5, z)", "foo3."); + assertExpr("rpad(y, 5, x)", "2foof"); + assertExpr("rpad(z, 5, y)", "3.122"); } @Test diff --git a/docs/misc/math-expr.md b/docs/misc/math-expr.md index 249eb1ef209c..709715ac9c1e 100644 --- a/docs/misc/math-expr.md +++ b/docs/misc/math-expr.md @@ -91,8 +91,8 @@ The following built-in functions are available. |upper|upper(expr) converts a string to uppercase| |reverse|reverse(expr) reverses a string| |repeat|repeat(expr, N) repeats a string N times| -|lpad|lpad(expr, length, chars) returns a string of `length` from `expr` left-padded with `chars`. If `length` is shorter than the length of `expr`, the result is `expr` which is truncated to `length`. If either `expr` or `chars` are null, the result will be null.| -|rpad|rpad(expr, length, chars) returns a string of `length` from `expr` right-padded with `chars`. If `length` is shorter than the length of `expr`, the result is `expr` which is truncated to `length`. If either `expr` or `chars` are null, the result will be null.| +|lpad|lpad(expr, length, chars) returns a string of `length` from `expr` left-padded with `chars`. If `length` is shorter than the length of `expr`, the result is `expr` which is truncated to `length`. The result will be null if either `expr` or `chars` is null. If `chars` is an empty string, no padding is added, however `expr` may be trimmed if necessary.| +|rpad|rpad(expr, length, chars) returns a string of `length` from `expr` right-padded with `chars`. If `length` is shorter than the length of `expr`, the result is `expr` which is truncated to `length`. The result will be null if either `expr` or `chars` is null. If `chars` is an empty string, no padding is added, however `expr` may be trimmed if necessary.| ## Time functions diff --git a/docs/querying/sql.md b/docs/querying/sql.md index dff5c7176e1f..1f6e7e3ab716 100644 --- a/docs/querying/sql.md +++ b/docs/querying/sql.md @@ -337,8 +337,8 @@ String functions accept strings, and return a type appropriate to the function. |`UPPER(expr)`|Returns expr in all uppercase.| |`REVERSE(expr)`|Reverses expr.| |`REPEAT(expr, [N])`|Repeats expr N times| -|`LPAD(expr, length[, chars])`|Returns a string of "length" from "expr" left-padded with "chars". If "length" is shorter than the length of "expr", the result is "expr" which is truncated to "length". If either "expr" or "chars" are null, the result will be null.| -|`RPAD(expr, length[, chars])`|Returns a string of "length" from "expr" right-padded with "chars". If "length" is shorter than the length of "expr", the result is "expr" which is truncated to "length". If either "expr" or "chars" are null, the result will be null.| +|`LPAD(expr, length[, chars])`|Returns a string of `length` from `expr` left-padded with `chars`. If `length` is shorter than the length of `expr`, the result is `expr` which is truncated to `length`. The result will be null if either `expr` or `chars` is null. If `chars` is an empty string, no padding is added, however `expr` may be trimmed if necessary.| +|`RPAD(expr, length[, chars])`|Returns a string of `length` from `expr` right-padded with `chars`. If `length` is shorter than the length of `expr`, the result is `expr` which is truncated to `length`. The result will be null if either `expr` or `chars` is null. If `chars` is an empty string, no padding is added, however `expr` may be trimmed if necessary.| ### Time functions