From 9898f9104ff6edbdc521e83f657e6f8a6127106c Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Mon, 7 Aug 2017 17:56:56 -0700 Subject: [PATCH 1/6] SQL: Full TRIM support. - Support trimming arbitrary characters - Support BOTH, LEADING, and TRAILING --- .../java/io/druid/math/expr/Function.java | 20 -- .../java/io/druid/math/expr/FunctionTest.java | 6 - docs/content/misc/math-expr.md | 10 +- .../druid/query/expression/TrimExprMacro.java | 276 ++++++++++++++++++ .../druid/query/expression/ExprMacroTest.java | 37 +++ .../query/expression/TestExprMacroTable.java | 5 +- .../java/io/druid/guice/ExpressionModule.java | 4 + .../sql/calcite/expression/Expressions.java | 1 - .../expression/TrimOperatorConversion.java | 89 ++++++ .../java/io/druid/sql/guice/SqlModule.java | 2 + .../druid/sql/calcite/CalciteQueryTest.java | 36 ++- .../calcite/expression/ExpressionsTest.java | 41 +++ 12 files changed, 494 insertions(+), 33 deletions(-) create mode 100644 processing/src/main/java/io/druid/query/expression/TrimExprMacro.java create mode 100644 sql/src/main/java/io/druid/sql/calcite/expression/TrimOperatorConversion.java diff --git a/common/src/main/java/io/druid/math/expr/Function.java b/common/src/main/java/io/druid/math/expr/Function.java index b3e6ac87e047..e1510a9647f4 100644 --- a/common/src/main/java/io/druid/math/expr/Function.java +++ b/common/src/main/java/io/druid/math/expr/Function.java @@ -990,26 +990,6 @@ public ExprEval apply(List args, Expr.ObjectBinding bindings) } } - class TrimFunc implements Function - { - @Override - public String name() - { - return "trim"; - } - - @Override - public ExprEval apply(List args, Expr.ObjectBinding bindings) - { - if (args.size() != 1) { - throw new IAE("Function[%s] needs 1 argument", name()); - } - - final String arg = args.get(0).eval(bindings).asString(); - return ExprEval.of(Strings.nullToEmpty(arg).trim()); - } - } - class LowerFunc implements Function { @Override diff --git a/common/src/test/java/io/druid/math/expr/FunctionTest.java b/common/src/test/java/io/druid/math/expr/FunctionTest.java index 0f731d6f7e50..76f590ad875e 100644 --- a/common/src/test/java/io/druid/math/expr/FunctionTest.java +++ b/common/src/test/java/io/druid/math/expr/FunctionTest.java @@ -85,12 +85,6 @@ public void testStrlen() assertExpr("strlen(nonexistent)", 0L); } - @Test - public void testTrim() - { - assertExpr("trim(concat(' ',x,' '))", "foo"); - } - @Test public void testLower() { diff --git a/docs/content/misc/math-expr.md b/docs/content/misc/math-expr.md index fb40fcf1a1ca..0ee6d78300b6 100644 --- a/docs/content/misc/math-expr.md +++ b/docs/content/misc/math-expr.md @@ -42,10 +42,12 @@ Also, the following built-in functions are supported. |regexp_extract|regexp_extract(expr, pattern[, index]) applies a regular expression pattern and extracts a capture group index, or null if there is no match. If index is unspecified or zero, returns the substring that matched the pattern.| |replace|replace(expr, pattern, replacement) replaces pattern with replacement| |substring|substring(expr, index, length) behaves like java.lang.String's substring| -|strlen|returns length of a string in UTF-16 code units| -|trim|remove leading and trailing whitespace from a string| -|lower|convert a string to lowercase| -|upper|convert a string to uppercase| +|strlen|strlen(expr) returns length of a string in UTF-16 code units| +|trim|trim(expr[, chars]) remove leading and trailing characters from `expr` if they are present in `chars`. `chars` defaults to ' ' (space) if not provided.| +|ltrim|ltrim(expr[, chars]) remove leading characters from `expr` if they are present in `chars`. `chars` defaults to ' ' (space) if not provided.| +|rtrim|rtrim(expr[, chars]) remove trailing characters from `expr` if they are present in `chars`. `chars` defaults to ' ' (space) if not provided.| +|lower|lower(expr) converts a string to lowercase| +|upper|upper(expr) converts a string to uppercase| ## Time functions diff --git a/processing/src/main/java/io/druid/query/expression/TrimExprMacro.java b/processing/src/main/java/io/druid/query/expression/TrimExprMacro.java new file mode 100644 index 000000000000..49190f9821da --- /dev/null +++ b/processing/src/main/java/io/druid/query/expression/TrimExprMacro.java @@ -0,0 +1,276 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.query.expression; + +import io.druid.java.util.common.IAE; +import io.druid.math.expr.Expr; +import io.druid.math.expr.ExprEval; +import io.druid.math.expr.ExprMacroTable; + +import javax.annotation.Nonnull; +import java.util.List; + +public abstract class TrimExprMacro implements ExprMacroTable.ExprMacro +{ + private static final char[] EMPTY_CHARS = new char[0]; + private static final char[] DEFAULT_CHARS = new char[]{' '}; + + enum TrimMode + { + BOTH(true, true), + LEFT(true, false), + RIGHT(false, true); + + private final boolean left; + private final boolean right; + + TrimMode(final boolean left, final boolean right) + { + this.left = left; + this.right = right; + } + + public boolean isLeft() + { + return left; + } + + public boolean isRight() + { + return right; + } + } + + private final TrimMode mode; + private final String name; + + public TrimExprMacro(final String name, final TrimMode mode) + { + this.name = name; + this.mode = mode; + } + + @Override + public String name() + { + return name; + } + + @Override + public Expr apply(final List args) + { + if (args.size() < 1 || args.size() > 2) { + throw new IAE("Function[%s] must have 1 or 2 arguments", name()); + } + + if (args.size() == 1) { + return new TrimStaticCharsExpr(mode, args.get(0), DEFAULT_CHARS); + } else { + final Expr charsArg = args.get(1); + if (charsArg.isLiteral()) { + final String charsString = charsArg.eval(ExprUtils.nilBindings()).asString(); + final char[] chars = charsString == null ? EMPTY_CHARS : charsString.toCharArray(); + return new TrimStaticCharsExpr(mode, args.get(0), chars); + } else { + return new TrimDynamicCharsExpr(mode, args.get(0), args.get(1)); + } + } + } + + private static class TrimStaticCharsExpr implements Expr + { + private final TrimMode mode; + private final Expr stringExpr; + private final char[] chars; + + public TrimStaticCharsExpr(final TrimMode mode, final Expr stringExpr, final char[] chars) + { + this.mode = mode; + this.stringExpr = stringExpr; + this.chars = chars; + } + + @Nonnull + @Override + public ExprEval eval(final ObjectBinding bindings) + { + final ExprEval stringEval = stringExpr.eval(bindings); + + if (chars.length == 0 || stringEval.isNull()) { + return stringEval; + } + + final String s = stringEval.asString(); + + int start = 0; + int end = s.length(); + + if (mode.isLeft()) { + while (start < s.length()) { + if (arrayContains(chars, s.charAt(start))) { + start++; + } else { + break; + } + } + } + + if (mode.isRight()) { + while (end > start) { + if (arrayContains(chars, s.charAt(end - 1))) { + end--; + } else { + break; + } + } + } + + if (start == 0 && end == s.length()) { + return stringEval; + } else { + return ExprEval.of(s.substring(start, end)); + } + } + + @Override + public void visit(final Visitor visitor) + { + stringExpr.visit(visitor); + visitor.visit(this); + } + } + + private static class TrimDynamicCharsExpr implements Expr + { + private final TrimMode mode; + private final Expr stringExpr; + private final Expr charsExpr; + + public TrimDynamicCharsExpr(final TrimMode mode, final Expr stringExpr, final Expr charsExpr) + { + this.mode = mode; + this.stringExpr = stringExpr; + this.charsExpr = charsExpr; + } + + @Nonnull + @Override + public ExprEval eval(final ObjectBinding bindings) + { + final ExprEval stringEval = stringExpr.eval(bindings); + + if (stringEval.isNull()) { + return stringEval; + } + + final ExprEval charsEval = charsExpr.eval(bindings); + + if (charsEval.isNull()) { + return stringEval; + } + + final String s = stringEval.asString(); + final String chars = charsEval.asString(); + + int start = 0; + int end = s.length(); + + if (mode.isLeft()) { + while (start < s.length()) { + if (stringContains(chars, s.charAt(start))) { + start++; + } else { + break; + } + } + } + + if (mode.isRight()) { + while (end > start) { + if (stringContains(chars, s.charAt(end - 1))) { + end--; + } else { + break; + } + } + } + + if (start == 0 && end == s.length()) { + return stringEval; + } else { + return ExprEval.of(s.substring(start, end)); + } + } + + @Override + public void visit(final Visitor visitor) + { + stringExpr.visit(visitor); + charsExpr.visit(visitor); + visitor.visit(this); + } + } + + private static boolean arrayContains(char[] array, char c) + { + for (int i = 0; i < array.length; i++) { + if (array[i] == c) { + return true; + } + } + + return false; + } + + private static boolean stringContains(String string, char c) + { + for (int i = 0; i < string.length(); i++) { + if (string.charAt(i) == c) { + return true; + } + } + + return false; + } + + public static class BothTrimExprMacro extends TrimExprMacro + { + public BothTrimExprMacro() + { + super("trim", TrimMode.BOTH); + } + } + + public static class LeftTrimExprMacro extends TrimExprMacro + { + public LeftTrimExprMacro() + { + super("ltrim", TrimMode.LEFT); + } + } + + public static class RightTrimExprMacro extends TrimExprMacro + { + public RightTrimExprMacro() + { + super("rtrim", TrimMode.RIGHT); + } + } +} diff --git a/processing/src/test/java/io/druid/query/expression/ExprMacroTest.java b/processing/src/test/java/io/druid/query/expression/ExprMacroTest.java index fb8119842987..10b42157c95e 100644 --- a/processing/src/test/java/io/druid/query/expression/ExprMacroTest.java +++ b/processing/src/test/java/io/druid/query/expression/ExprMacroTest.java @@ -39,6 +39,7 @@ public class ExprMacroTest .put("y", 2) .put("z", 3.1) .put("CityOfAngels", "America/Los_Angeles") + .put("spacey", " hey there ") .build() ); @@ -135,6 +136,42 @@ public void testTimestampFormat() assertExpr("timestamp_format(t,'yyyy-MM-dd HH:mm:ss','America/Los_Angeles')", "2000-02-02 20:05:06"); } + @Test + public void testTrim() + { + assertExpr("trim('')", null); + assertExpr("trim(concat(' ',x,' '))", "foo"); + assertExpr("trim(spacey)", "hey there"); + assertExpr("trim(spacey, '')", " hey there "); + assertExpr("trim(spacey, 'he ')", "y ther"); + assertExpr("trim(spacey, spacey)", null); + assertExpr("trim(spacey, substring(spacey, 0, 4))", "y ther"); + } + + @Test + public void testLTrim() + { + assertExpr("ltrim('')", null); + assertExpr("ltrim(concat(' ',x,' '))", "foo "); + assertExpr("ltrim(spacey)", "hey there "); + assertExpr("ltrim(spacey, '')", " hey there "); + assertExpr("ltrim(spacey, 'he ')", "y there "); + assertExpr("ltrim(spacey, spacey)", null); + assertExpr("ltrim(spacey, substring(spacey, 0, 4))", "y there "); + } + + @Test + public void testRTrim() + { + assertExpr("rtrim('')", null); + assertExpr("rtrim(concat(' ',x,' '))", " foo"); + assertExpr("rtrim(spacey)", " hey there"); + assertExpr("rtrim(spacey, '')", " hey there "); + assertExpr("rtrim(spacey, 'he ')", " hey ther"); + assertExpr("rtrim(spacey, spacey)", null); + assertExpr("rtrim(spacey, substring(spacey, 0, 4))", " hey ther"); + } + private void assertExpr(final String expression, final Object expectedResult) { final Expr expr = Parser.parse(expression, TestExprMacroTable.INSTANCE); diff --git a/processing/src/test/java/io/druid/query/expression/TestExprMacroTable.java b/processing/src/test/java/io/druid/query/expression/TestExprMacroTable.java index 4321a03f1193..b1319cbe0146 100644 --- a/processing/src/test/java/io/druid/query/expression/TestExprMacroTable.java +++ b/processing/src/test/java/io/druid/query/expression/TestExprMacroTable.java @@ -48,7 +48,10 @@ private TestExprMacroTable() new TimestampFloorExprMacro(), new TimestampFormatExprMacro(), new TimestampParseExprMacro(), - new TimestampShiftExprMacro() + new TimestampShiftExprMacro(), + new TrimExprMacro.BothTrimExprMacro(), + new TrimExprMacro.LeftTrimExprMacro(), + new TrimExprMacro.RightTrimExprMacro() ) ); } diff --git a/server/src/main/java/io/druid/guice/ExpressionModule.java b/server/src/main/java/io/druid/guice/ExpressionModule.java index d9c1da61bad3..7b5cbce5f56f 100644 --- a/server/src/main/java/io/druid/guice/ExpressionModule.java +++ b/server/src/main/java/io/druid/guice/ExpressionModule.java @@ -34,6 +34,7 @@ import io.druid.query.expression.TimestampFormatExprMacro; import io.druid.query.expression.TimestampParseExprMacro; import io.druid.query.expression.TimestampShiftExprMacro; +import io.druid.query.expression.TrimExprMacro; import java.util.List; @@ -51,6 +52,9 @@ public class ExpressionModule implements DruidModule .add(TimestampFormatExprMacro.class) .add(TimestampParseExprMacro.class) .add(TimestampShiftExprMacro.class) + .add(TrimExprMacro.BothTrimExprMacro.class) + .add(TrimExprMacro.LeftTrimExprMacro.class) + .add(TrimExprMacro.RightTrimExprMacro.class) .build(); @Override diff --git a/sql/src/main/java/io/druid/sql/calcite/expression/Expressions.java b/sql/src/main/java/io/druid/sql/calcite/expression/Expressions.java index 6b8f9ed7561f..52d2176b0f5a 100644 --- a/sql/src/main/java/io/druid/sql/calcite/expression/Expressions.java +++ b/sql/src/main/java/io/druid/sql/calcite/expression/Expressions.java @@ -89,7 +89,6 @@ public class Expressions .put(SqlStdOperatorTable.POWER, "pow") .put(SqlStdOperatorTable.REPLACE, "replace") .put(SqlStdOperatorTable.SQRT, "sqrt") - .put(SqlStdOperatorTable.TRIM, "trim") .put(SqlStdOperatorTable.UPPER, "upper") .build(); diff --git a/sql/src/main/java/io/druid/sql/calcite/expression/TrimOperatorConversion.java b/sql/src/main/java/io/druid/sql/calcite/expression/TrimOperatorConversion.java new file mode 100644 index 000000000000..5e4e20b2c300 --- /dev/null +++ b/sql/src/main/java/io/druid/sql/calcite/expression/TrimOperatorConversion.java @@ -0,0 +1,89 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.sql.calcite.expression; + +import com.google.common.collect.ImmutableList; +import io.druid.sql.calcite.planner.PlannerContext; +import io.druid.sql.calcite.table.RowSignature; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexLiteral; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.sql.SqlOperator; +import org.apache.calcite.sql.fun.SqlStdOperatorTable; +import org.apache.calcite.sql.fun.SqlTrimFunction; + +public class TrimOperatorConversion implements SqlOperatorConversion +{ + @Override + public SqlOperator calciteOperator() + { + return SqlStdOperatorTable.TRIM; + } + + @Override + public DruidExpression toDruidExpression( + final PlannerContext plannerContext, + final RowSignature rowSignature, + final RexNode rexNode + ) + { + // TRIM(