diff --git a/docs/misc/math-expr.md b/docs/misc/math-expr.md index e4ad694738f8..249eb1ef209c 100644 --- a/docs/misc/math-expr.md +++ b/docs/misc/math-expr.md @@ -76,7 +76,8 @@ The following built-in functions are available. |like|like(expr, pattern[, escape]) is equivalent to SQL `expr LIKE pattern`| |lookup|lookup(expr, lookup-name) looks up expr in a registered [query-time lookup](../querying/lookups.md)| |parse_long|parse_long(string[, radix]) parses a string as a long with the given radix, or 10 (decimal) if a radix is not provided.| -|regexp_extract|regexp_extract(expr, pattern[, index]) applies a regular expression pattern and extracts a capture group index, or null if there is no match. If index is unspecified or zero, returns the substring that matched the pattern.| +|regexp_extract|regexp_extract(expr, pattern[, index]) applies a regular expression pattern and extracts a capture group index, or null if there is no match. If index is unspecified or zero, returns the substring that matched the pattern. The pattern may match anywhere inside `expr`; if you want to match the entire string instead, use the `^` and `$` markers at the start and end of your pattern.| +|regexp_like|regexp_like(expr, pattern) returns whether `expr` matches regular expression `pattern`. The pattern may match anywhere inside `expr`; if you want to match the entire string instead, use the `^` and `$` markers at the start and end of your pattern. | |replace|replace(expr, pattern, replacement) replaces pattern with replacement| |substring|substring(expr, index, length) behaves like java.lang.String's substring| |right|right(expr, length) returns the rightmost length characters from a string| diff --git a/docs/querying/sql.md b/docs/querying/sql.md index 9a49009a9bcd..71faf07245d4 100644 --- a/docs/querying/sql.md +++ b/docs/querying/sql.md @@ -322,7 +322,8 @@ String functions accept strings, and return a type appropriate to the function. |`LOWER(expr)`|Returns expr in all lowercase.| |`PARSE_LONG(string[, radix])`|Parses a string into a long (BIGINT) with the given radix, or 10 (decimal) if a radix is not provided.| |`POSITION(needle IN haystack [FROM fromIndex])`|Returns the index of needle within haystack, with indexes starting from 1. The search will begin at fromIndex, or 1 if fromIndex is not specified. If the needle is not found, returns 0.| -|`REGEXP_EXTRACT(expr, pattern, [index])`|Apply regular expression pattern and extract a capture group, or null if there is no match. If index is unspecified or zero, returns the substring that matched the pattern.| +|`REGEXP_EXTRACT(expr, pattern, [index])`|Apply regular expression `pattern` to `expr` and extract a capture group, or `NULL` if there is no match. If index is unspecified or zero, returns the first substring that matched the pattern. The pattern may match anywhere inside `expr`; if you want to match the entire string instead, use the `^` and `$` markers at the start and end of your pattern. Note: when `druid.generic.useDefaultValueForNull = true`, it is not possible to differentiate an empty-string match from a non-match (both will return `NULL`).| +|`REGEXP_LIKE(expr, pattern)`|Returns whether `expr` matches regular expression `pattern`. The pattern may match anywhere inside `expr`; if you want to match the entire string instead, use the `^` and `$` markers at the start and end of your pattern. Similar to [`LIKE`](#comparison-operators), but uses regexps instead of LIKE patterns. Especially useful in WHERE clauses.| |`REPLACE(expr, pattern, replacement)`|Replaces pattern with replacement in expr, and returns the result.| |`STRPOS(haystack, needle)`|Returns the index of needle within haystack, with indexes starting from 1. If the needle is not found, returns 0.| |`SUBSTRING(expr, index, [length])`|Returns a substring of expr starting at index, with a max length, both measured in UTF-16 code units.| @@ -330,9 +331,9 @@ String functions accept strings, and return a type appropriate to the function. |`LEFT(expr, [length])`|Returns the leftmost length characters from expr.| |`SUBSTR(expr, index, [length])`|Synonym for SUBSTRING.| |TRIM([BOTH | LEADING | TRAILING] [ FROM] expr)|Returns expr with characters removed from the leading, trailing, or both ends of "expr" if they are in "chars". If "chars" is not provided, it defaults to " " (a space). If the directional argument is not provided, it defaults to "BOTH".| -|`BTRIM(expr[, chars])`|Alternate form of `TRIM(BOTH FROM `).| -|`LTRIM(expr[, chars])`|Alternate form of `TRIM(LEADING FROM `).| -|`RTRIM(expr[, chars])`|Alternate form of `TRIM(TRAILING FROM `).| +|`BTRIM(expr[, chars])`|Alternate form of `TRIM(BOTH FROM )`.| +|`LTRIM(expr[, chars])`|Alternate form of `TRIM(LEADING FROM )`.| +|`RTRIM(expr[, chars])`|Alternate form of `TRIM(TRAILING FROM )`.| |`UPPER(expr)`|Returns expr in all uppercase.| |`REVERSE(expr)`|Reverses expr.| |`REPEAT(expr, [N])`|Repeats expr N times| diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchEstimateWithErrorBoundsOperatorConversion.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchEstimateWithErrorBoundsOperatorConversion.java index a0435430c02c..c645ca724a57 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchEstimateWithErrorBoundsOperatorConversion.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchEstimateWithErrorBoundsOperatorConversion.java @@ -47,7 +47,7 @@ public class HllSketchEstimateWithErrorBoundsOperatorConversion extends DirectOp .operatorBuilder(StringUtils.toUpperCase(FUNCTION_NAME)) .operandTypes(SqlTypeFamily.ANY, SqlTypeFamily.INTEGER) .requiredOperands(1) - .returnType(SqlTypeName.OTHER) + .returnTypeNonNull(SqlTypeName.OTHER) .build(); diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchToStringOperatorConversion.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchToStringOperatorConversion.java index 189b3b7c916e..fe0c56b56adc 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchToStringOperatorConversion.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchToStringOperatorConversion.java @@ -44,7 +44,7 @@ public class HllSketchToStringOperatorConversion extends DirectOperatorConversio private static final SqlFunction SQL_FUNCTION = OperatorConversions .operatorBuilder(StringUtils.toUpperCase(FUNCTION_NAME)) .operandTypes(SqlTypeFamily.ANY) - .returnType(SqlTypeName.VARCHAR) + .returnTypeNonNull(SqlTypeName.VARCHAR) .build(); public HllSketchToStringOperatorConversion() diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/quantiles/sql/DoublesSketchQuantileOperatorConversion.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/quantiles/sql/DoublesSketchQuantileOperatorConversion.java index c571be13e403..2387fe8cccb9 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/quantiles/sql/DoublesSketchQuantileOperatorConversion.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/quantiles/sql/DoublesSketchQuantileOperatorConversion.java @@ -34,7 +34,7 @@ public class DoublesSketchQuantileOperatorConversion extends DoublesSketchSingle private static final SqlFunction SQL_FUNCTION = OperatorConversions .operatorBuilder(StringUtils.toUpperCase(FUNCTION_NAME)) .operandTypes(SqlTypeFamily.ANY, SqlTypeFamily.NUMERIC) - .returnType(SqlTypeName.DOUBLE) + .returnTypeNonNull(SqlTypeName.DOUBLE) .build(); diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/quantiles/sql/DoublesSketchRankOperatorConversion.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/quantiles/sql/DoublesSketchRankOperatorConversion.java index ab54cb1441e1..327f757a7182 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/quantiles/sql/DoublesSketchRankOperatorConversion.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/quantiles/sql/DoublesSketchRankOperatorConversion.java @@ -34,7 +34,7 @@ public class DoublesSketchRankOperatorConversion extends DoublesSketchSingleArgB private static final SqlFunction SQL_FUNCTION = OperatorConversions .operatorBuilder(StringUtils.toUpperCase(FUNCTION_NAME)) .operandTypes(SqlTypeFamily.ANY, SqlTypeFamily.NUMERIC) - .returnType(SqlTypeName.DOUBLE) + .returnTypeNonNull(SqlTypeName.DOUBLE) .build(); public DoublesSketchRankOperatorConversion() diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/quantiles/sql/DoublesSketchSummaryOperatorConversion.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/quantiles/sql/DoublesSketchSummaryOperatorConversion.java index 6465ef71f45e..4dd01fd85402 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/quantiles/sql/DoublesSketchSummaryOperatorConversion.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/quantiles/sql/DoublesSketchSummaryOperatorConversion.java @@ -44,7 +44,7 @@ public class DoublesSketchSummaryOperatorConversion extends DirectOperatorConver private static final SqlFunction SQL_FUNCTION = OperatorConversions .operatorBuilder(StringUtils.toUpperCase(FUNCTION_NAME)) .operandTypes(SqlTypeFamily.ANY) - .returnType(SqlTypeName.VARCHAR) + .returnTypeNonNull(SqlTypeName.VARCHAR) .build(); public DoublesSketchSummaryOperatorConversion() diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchEstimateWithErrorBoundsOperatorConversion.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchEstimateWithErrorBoundsOperatorConversion.java index 7ddd74ee80c7..c54f2c59b454 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchEstimateWithErrorBoundsOperatorConversion.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchEstimateWithErrorBoundsOperatorConversion.java @@ -46,7 +46,7 @@ public class ThetaSketchEstimateWithErrorBoundsOperatorConversion extends Direct private static final SqlFunction SQL_FUNCTION = OperatorConversions .operatorBuilder(StringUtils.toUpperCase(FUNCTION_NAME)) .operandTypes(SqlTypeFamily.ANY, SqlTypeFamily.INTEGER) - .returnType(SqlTypeName.OTHER) + .returnTypeNonNull(SqlTypeName.OTHER) .build(); diff --git a/processing/src/main/java/org/apache/druid/query/expression/ExprUtils.java b/processing/src/main/java/org/apache/druid/query/expression/ExprUtils.java index 75fe2740057c..23ef6847ec06 100644 --- a/processing/src/main/java/org/apache/druid/query/expression/ExprUtils.java +++ b/processing/src/main/java/org/apache/druid/query/expression/ExprUtils.java @@ -98,4 +98,31 @@ static void checkLiteralArgument(String functionName, Expr arg, String argName) { Preconditions.checkArgument(arg.isLiteral(), createErrMsg(functionName, argName + " arg must be a literal")); } + + /** + * True if Expr is a string literal. + * + * In non-SQL-compliant null handling mode, this method will return true for null literals as well (because they are + * treated equivalently to empty strings, and we cannot tell the difference.) + * + * In SQL-compliant null handling mode, this method will return true for actual strings only, not nulls. + */ + static boolean isStringLiteral(final Expr expr) + { + return (expr.isLiteral() && expr.getLiteralValue() instanceof String) + || (NullHandling.replaceWithDefault() && isNullLiteral(expr)); + } + + /** + * True if Expr is a null literal. + * + * In non-SQL-compliant null handling mode, this method will return true for either a null literal or an empty string + * literal (they are treated equivalently and we cannot tell the difference). + * + * In SQL-compliant null handling mode, this method will only return true for an actual null literal. + */ + static boolean isNullLiteral(final Expr expr) + { + return expr.isLiteral() && expr.getLiteralValue() == null; + } } diff --git a/processing/src/main/java/org/apache/druid/query/expression/RegexpExtractExprMacro.java b/processing/src/main/java/org/apache/druid/query/expression/RegexpExtractExprMacro.java index 7428ede84647..9bef704a663e 100644 --- a/processing/src/main/java/org/apache/druid/query/expression/RegexpExtractExprMacro.java +++ b/processing/src/main/java/org/apache/druid/query/expression/RegexpExtractExprMacro.java @@ -52,12 +52,18 @@ public Expr apply(final List args) final Expr patternExpr = args.get(1); final Expr indexExpr = args.size() > 2 ? args.get(2) : null; - if (!patternExpr.isLiteral() || (indexExpr != null && !indexExpr.isLiteral())) { - throw new IAE("Function[%s] pattern and index must be literals", name()); + if (!ExprUtils.isStringLiteral(patternExpr)) { + throw new IAE("Function[%s] pattern must be a string literal", name()); + } + + if (indexExpr != null && (!indexExpr.isLiteral() || !(indexExpr.getLiteralValue() instanceof Number))) { + throw new IAE("Function[%s] index must be a numeric literal", name()); } // Precompile the pattern. - final Pattern pattern = Pattern.compile(String.valueOf(patternExpr.getLiteralValue())); + final Pattern pattern = Pattern.compile( + StringUtils.nullToEmptyNonDruidDataString((String) patternExpr.getLiteralValue()) + ); final int index = indexExpr == null ? 0 : ((Number) indexExpr.getLiteralValue()).intValue(); @@ -72,10 +78,16 @@ private RegexpExtractExpr(Expr arg) @Override public ExprEval eval(final ObjectBinding bindings) { - String s = arg.eval(bindings).asString(); - final Matcher matcher = pattern.matcher(NullHandling.nullToEmptyIfNeeded(s)); - final String retVal = matcher.find() ? matcher.group(index) : null; - return ExprEval.of(NullHandling.emptyToNullIfNeeded(retVal)); + final String s = NullHandling.nullToEmptyIfNeeded(arg.eval(bindings).asString()); + + if (s == null) { + // True nulls do not match anything. Note: this branch only executes in SQL-compatible null handling mode. + return ExprEval.of(null); + } else { + final Matcher matcher = pattern.matcher(NullHandling.nullToEmptyIfNeeded(s)); + final String retVal = matcher.find() ? matcher.group(index) : null; + return ExprEval.of(retVal); + } } @Override diff --git a/processing/src/main/java/org/apache/druid/query/expression/RegexpLikeExprMacro.java b/processing/src/main/java/org/apache/druid/query/expression/RegexpLikeExprMacro.java new file mode 100644 index 000000000000..83735e863494 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/expression/RegexpLikeExprMacro.java @@ -0,0 +1,101 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.expression; + +import org.apache.druid.common.config.NullHandling; +import org.apache.druid.java.util.common.IAE; +import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.math.expr.Expr; +import org.apache.druid.math.expr.ExprEval; +import org.apache.druid.math.expr.ExprMacroTable; +import org.apache.druid.math.expr.ExprType; + +import javax.annotation.Nonnull; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +public class RegexpLikeExprMacro implements ExprMacroTable.ExprMacro +{ + private static final String FN_NAME = "regexp_like"; + + @Override + public String name() + { + return FN_NAME; + } + + @Override + public Expr apply(final List args) + { + if (args.size() != 2) { + throw new IAE("Function[%s] must have 2 arguments", name()); + } + + final Expr arg = args.get(0); + final Expr patternExpr = args.get(1); + + if (!ExprUtils.isStringLiteral(patternExpr)) { + throw new IAE("Function[%s] pattern must be a string literal", name()); + } + + // Precompile the pattern. + final Pattern pattern = Pattern.compile( + StringUtils.nullToEmptyNonDruidDataString((String) patternExpr.getLiteralValue()) + ); + + class RegexpLikeExpr extends ExprMacroTable.BaseScalarUnivariateMacroFunctionExpr + { + private RegexpLikeExpr(Expr arg) + { + super(FN_NAME, arg); + } + + @Nonnull + @Override + public ExprEval eval(final ObjectBinding bindings) + { + final String s = NullHandling.nullToEmptyIfNeeded(arg.eval(bindings).asString()); + + if (s == null) { + // True nulls do not match anything. Note: this branch only executes in SQL-compatible null handling mode. + return ExprEval.of(false, ExprType.LONG); + } else { + final Matcher matcher = pattern.matcher(s); + return ExprEval.of(matcher.find(), ExprType.LONG); + } + } + + @Override + public Expr visit(Shuttle shuttle) + { + Expr newArg = arg.visit(shuttle); + return shuttle.visit(new RegexpLikeExpr(newArg)); + } + + @Override + public String stringify() + { + return StringUtils.format("%s(%s, %s)", FN_NAME, arg.stringify(), patternExpr.stringify()); + } + } + return new RegexpLikeExpr(arg); + } +} diff --git a/processing/src/test/java/org/apache/druid/query/expression/IPv4AddressMatchExprMacroTest.java b/processing/src/test/java/org/apache/druid/query/expression/IPv4AddressMatchExprMacroTest.java index 0b63a720b840..aa5bd917bf13 100644 --- a/processing/src/test/java/org/apache/druid/query/expression/IPv4AddressMatchExprMacroTest.java +++ b/processing/src/test/java/org/apache/druid/query/expression/IPv4AddressMatchExprMacroTest.java @@ -23,7 +23,6 @@ import org.apache.druid.math.expr.ExprEval; import org.apache.druid.math.expr.ExprMacroTable; import org.junit.Assert; -import org.junit.Before; import org.junit.Test; import java.util.Arrays; @@ -42,12 +41,9 @@ public class IPv4AddressMatchExprMacroTest extends MacroTestBase private static final Expr SUBNET_10 = ExprEval.of("10.0.0.0/8").toExpr(); private static final Expr NOT_LITERAL = new NotLiteralExpr(null); - private IPv4AddressMatchExprMacro target; - - @Before - public void setUp() + public IPv4AddressMatchExprMacroTest() { - target = new IPv4AddressMatchExprMacro(); + super(new IPv4AddressMatchExprMacro()); } @Test @@ -55,7 +51,7 @@ public void testTooFewArgs() { expectException(IllegalArgumentException.class, "must have 2 arguments"); - target.apply(Collections.emptyList()); + apply(Collections.emptyList()); } @Test @@ -63,7 +59,7 @@ public void testTooManyArgs() { expectException(IllegalArgumentException.class, "must have 2 arguments"); - target.apply(Arrays.asList(IPV4, SUBNET_192_168, NOT_LITERAL)); + apply(Arrays.asList(IPV4, SUBNET_192_168, NOT_LITERAL)); } @Test @@ -71,7 +67,7 @@ public void testSubnetArgNotLiteral() { expectException(IllegalArgumentException.class, "subnet arg must be a literal"); - target.apply(Arrays.asList(IPV4, NOT_LITERAL)); + apply(Arrays.asList(IPV4, NOT_LITERAL)); } @Test @@ -80,7 +76,7 @@ public void testSubnetArgInvalid() expectException(IllegalArgumentException.class, "subnet arg has an invalid format"); Expr invalidSubnet = ExprEval.of("192.168.0.1/invalid").toExpr(); - target.apply(Arrays.asList(IPV4, invalidSubnet)); + apply(Arrays.asList(IPV4, invalidSubnet)); } @Test @@ -182,7 +178,7 @@ public void testInclusive() private boolean eval(Expr... args) { - Expr expr = target.apply(Arrays.asList(args)); + Expr expr = apply(Arrays.asList(args)); ExprEval eval = expr.eval(ExprUtils.nilBindings()); return eval.asBoolean(); } diff --git a/processing/src/test/java/org/apache/druid/query/expression/IPv4AddressParseExprMacroTest.java b/processing/src/test/java/org/apache/druid/query/expression/IPv4AddressParseExprMacroTest.java index 2bf392141d51..0d70b2cce886 100644 --- a/processing/src/test/java/org/apache/druid/query/expression/IPv4AddressParseExprMacroTest.java +++ b/processing/src/test/java/org/apache/druid/query/expression/IPv4AddressParseExprMacroTest.java @@ -23,7 +23,6 @@ import org.apache.druid.math.expr.Expr; import org.apache.druid.math.expr.ExprEval; import org.junit.Assert; -import org.junit.Before; import org.junit.Test; import java.util.Arrays; @@ -35,12 +34,9 @@ public class IPv4AddressParseExprMacroTest extends MacroTestBase private static final long EXPECTED = 3232235521L; private static final Long NULL = NullHandling.replaceWithDefault() ? NullHandling.ZERO_LONG : null; - private IPv4AddressParseExprMacro target; - - @Before - public void setUp() + public IPv4AddressParseExprMacroTest() { - target = new IPv4AddressParseExprMacro(); + super(new IPv4AddressParseExprMacro()); } @Test @@ -48,7 +44,7 @@ public void testTooFewArgs() { expectException(IllegalArgumentException.class, "must have 1 argument"); - target.apply(Collections.emptyList()); + apply(Collections.emptyList()); } @Test @@ -56,7 +52,7 @@ public void testTooManyArgs() { expectException(IllegalArgumentException.class, "must have 1 argument"); - target.apply(Arrays.asList(VALID, VALID)); + apply(Arrays.asList(VALID, VALID)); } @Test @@ -154,7 +150,7 @@ public void testValidLongArg() private Object eval(Expr arg) { - Expr expr = target.apply(Collections.singletonList(arg)); + Expr expr = apply(Collections.singletonList(arg)); ExprEval eval = expr.eval(ExprUtils.nilBindings()); return eval.value(); } diff --git a/processing/src/test/java/org/apache/druid/query/expression/IPv4AddressStringifyExprMacroTest.java b/processing/src/test/java/org/apache/druid/query/expression/IPv4AddressStringifyExprMacroTest.java index 602d00cfcc4e..1b4235b30686 100644 --- a/processing/src/test/java/org/apache/druid/query/expression/IPv4AddressStringifyExprMacroTest.java +++ b/processing/src/test/java/org/apache/druid/query/expression/IPv4AddressStringifyExprMacroTest.java @@ -23,7 +23,6 @@ import org.apache.druid.math.expr.Expr; import org.apache.druid.math.expr.ExprEval; import org.junit.Assert; -import org.junit.Before; import org.junit.Test; import java.util.Arrays; @@ -35,12 +34,9 @@ public class IPv4AddressStringifyExprMacroTest extends MacroTestBase private static final String EXPECTED = "192.168.0.1"; private static final String NULL = NullHandling.replaceWithDefault() ? "0.0.0.0" : null; - private IPv4AddressStringifyExprMacro target; - - @Before - public void setUp() + public IPv4AddressStringifyExprMacroTest() { - target = new IPv4AddressStringifyExprMacro(); + super(new IPv4AddressStringifyExprMacro()); } @Test @@ -48,7 +44,7 @@ public void testTooFewArgs() { expectException(IllegalArgumentException.class, "must have 1 argument"); - target.apply(Collections.emptyList()); + apply(Collections.emptyList()); } @Test @@ -56,7 +52,7 @@ public void testTooManyArgs() { expectException(IllegalArgumentException.class, "must have 1 argument"); - target.apply(Arrays.asList(VALID, VALID)); + apply(Arrays.asList(VALID, VALID)); } @Test @@ -150,7 +146,7 @@ public void testValidStringArgUnsignedInt() private Object eval(Expr arg) { - Expr expr = target.apply(Collections.singletonList(arg)); + Expr expr = apply(Collections.singletonList(arg)); ExprEval eval = expr.eval(ExprUtils.nilBindings()); return eval.value(); } diff --git a/processing/src/test/java/org/apache/druid/query/expression/MacroTestBase.java b/processing/src/test/java/org/apache/druid/query/expression/MacroTestBase.java index 38e607c2b8f4..7cf79073d0c9 100644 --- a/processing/src/test/java/org/apache/druid/query/expression/MacroTestBase.java +++ b/processing/src/test/java/org/apache/druid/query/expression/MacroTestBase.java @@ -19,18 +19,80 @@ package org.apache.druid.query.expression; +import com.google.common.collect.ImmutableSet; +import org.apache.druid.math.expr.Expr; +import org.apache.druid.math.expr.ExprEval; +import org.apache.druid.math.expr.ExprMacroTable; +import org.apache.druid.math.expr.Parser; import org.apache.druid.testing.InitializedNullHandlingTest; +import org.junit.Assert; import org.junit.Rule; import org.junit.rules.ExpectedException; +import java.util.List; +import java.util.concurrent.atomic.AtomicLong; + public abstract class MacroTestBase extends InitializedNullHandlingTest { @Rule public ExpectedException expectedException = ExpectedException.none(); - void expectException(Class type, String message) + private final ExprMacroTable.ExprMacro macro; + + protected MacroTestBase(ExprMacroTable.ExprMacro macro) + { + this.macro = macro; + } + + protected void expectException(Class type, String message) { expectedException.expect(type); expectedException.expectMessage(message); } + + protected Expr apply(final List args) + { + return macro.apply(args); + } + + /** + * Evalutes {@code expr} using our macro. + * + * @param expression expression to evalute + * @param bindings bindings for evaluation + * + * @throws AssertionError if {@link ExprMacroTable.ExprMacro#apply} is not called on our macro during parsing + */ + protected ExprEval eval( + final String expression, + final Expr.ObjectBinding bindings + ) + { + // WrappedExprMacro allows us to confirm that our ExprMacro was actually called. + class WrappedExprMacro implements ExprMacroTable.ExprMacro + { + private final AtomicLong calls = new AtomicLong(); + + @Override + public String name() + { + return macro.name(); + } + + @Override + public Expr apply(List args) + { + calls.incrementAndGet(); + return macro.apply(args); + } + } + + final WrappedExprMacro wrappedMacro = new WrappedExprMacro(); + final GuiceExprMacroTable macroTable = new GuiceExprMacroTable(ImmutableSet.of(wrappedMacro)); + final Expr expr = Parser.parse(expression, macroTable); + + Assert.assertTrue("Calls made to macro.apply", wrappedMacro.calls.get() > 0); + + return expr.eval(bindings); + } } diff --git a/processing/src/test/java/org/apache/druid/query/expression/RegexpExtractExprMacroTest.java b/processing/src/test/java/org/apache/druid/query/expression/RegexpExtractExprMacroTest.java new file mode 100644 index 000000000000..2f811d788b4f --- /dev/null +++ b/processing/src/test/java/org/apache/druid/query/expression/RegexpExtractExprMacroTest.java @@ -0,0 +1,141 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.expression; + +import com.google.common.collect.ImmutableMap; +import org.apache.druid.common.config.NullHandling; +import org.apache.druid.math.expr.ExprEval; +import org.apache.druid.math.expr.Parser; +import org.junit.Assert; +import org.junit.Test; + +public class RegexpExtractExprMacroTest extends MacroTestBase +{ + public RegexpExtractExprMacroTest() + { + super(new RegexpExtractExprMacro()); + } + + @Test + public void testErrorZeroArguments() + { + expectException(IllegalArgumentException.class, "Function[regexp_extract] must have 2 to 3 arguments"); + eval("regexp_extract()", Parser.withMap(ImmutableMap.of())); + } + + @Test + public void testErrorFourArguments() + { + expectException(IllegalArgumentException.class, "Function[regexp_extract] must have 2 to 3 arguments"); + eval("regexp_extract('a', 'b', 'c', 'd')", Parser.withMap(ImmutableMap.of())); + } + + @Test + public void testMatch() + { + final ExprEval result = eval("regexp_extract(a, 'f(.o)')", Parser.withMap(ImmutableMap.of("a", "foo"))); + Assert.assertEquals("foo", result.value()); + } + + @Test + public void testMatchGroup0() + { + final ExprEval result = eval("regexp_extract(a, 'f(.o)', 0)", Parser.withMap(ImmutableMap.of("a", "foo"))); + Assert.assertEquals("foo", result.value()); + } + + @Test + public void testMatchGroup1() + { + final ExprEval result = eval("regexp_extract(a, 'f(.o)', 1)", Parser.withMap(ImmutableMap.of("a", "foo"))); + Assert.assertEquals("oo", result.value()); + } + + @Test + public void testMatchGroup2() + { + expectedException.expectMessage("No group 2"); + final ExprEval result = eval("regexp_extract(a, 'f(.o)', 2)", Parser.withMap(ImmutableMap.of("a", "foo"))); + } + + @Test + public void testNoMatch() + { + final ExprEval result = eval("regexp_extract(a, 'f(.x)')", Parser.withMap(ImmutableMap.of("a", "foo"))); + Assert.assertNull(result.value()); + } + + @Test + public void testMatchInMiddle() + { + final ExprEval result = eval("regexp_extract(a, '.o$')", Parser.withMap(ImmutableMap.of("a", "foo"))); + Assert.assertEquals("oo", result.value()); + } + + @Test + public void testNullPattern() + { + if (NullHandling.sqlCompatible()) { + expectException(IllegalArgumentException.class, "Function[regexp_extract] pattern must be a string literal"); + } + + final ExprEval result = eval("regexp_extract(a, null)", Parser.withMap(ImmutableMap.of("a", "foo"))); + Assert.assertNull(result.value()); + } + + @Test + public void testEmptyStringPattern() + { + final ExprEval result = eval("regexp_extract(a, '')", Parser.withMap(ImmutableMap.of("a", "foo"))); + Assert.assertEquals(NullHandling.emptyToNullIfNeeded(""), result.value()); + } + + @Test + public void testNumericPattern() + { + expectException(IllegalArgumentException.class, "Function[regexp_extract] pattern must be a string literal"); + eval("regexp_extract(a, 1)", Parser.withMap(ImmutableMap.of("a", "foo"))); + } + + @Test + public void testNonLiteralPattern() + { + expectException(IllegalArgumentException.class, "Function[regexp_extract] pattern must be a string literal"); + eval("regexp_extract(a, a)", Parser.withMap(ImmutableMap.of("a", "foo"))); + } + + @Test + public void testNullPatternOnNull() + { + if (NullHandling.sqlCompatible()) { + expectException(IllegalArgumentException.class, "Function[regexp_extract] pattern must be a string literal"); + } + + final ExprEval result = eval("regexp_extract(a, null)", Parser.withSuppliers(ImmutableMap.of("a", () -> null))); + Assert.assertNull(result.value()); + } + + @Test + public void testEmptyStringPatternOnNull() + { + final ExprEval result = eval("regexp_extract(a, '')", Parser.withSuppliers(ImmutableMap.of("a", () -> null))); + Assert.assertNull(result.value()); + } +} diff --git a/processing/src/test/java/org/apache/druid/query/expression/RegexpLikeExprMacroTest.java b/processing/src/test/java/org/apache/druid/query/expression/RegexpLikeExprMacroTest.java new file mode 100644 index 000000000000..a6bdfb36a03a --- /dev/null +++ b/processing/src/test/java/org/apache/druid/query/expression/RegexpLikeExprMacroTest.java @@ -0,0 +1,142 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.expression; + +import com.google.common.collect.ImmutableMap; +import org.apache.druid.common.config.NullHandling; +import org.apache.druid.math.expr.ExprEval; +import org.apache.druid.math.expr.ExprType; +import org.apache.druid.math.expr.Parser; +import org.junit.Assert; +import org.junit.Test; + +public class RegexpLikeExprMacroTest extends MacroTestBase +{ + public RegexpLikeExprMacroTest() + { + super(new RegexpLikeExprMacro()); + } + + @Test + public void testErrorZeroArguments() + { + expectException(IllegalArgumentException.class, "Function[regexp_like] must have 2 arguments"); + eval("regexp_like()", Parser.withMap(ImmutableMap.of())); + } + + @Test + public void testErrorThreeArguments() + { + expectException(IllegalArgumentException.class, "Function[regexp_like] must have 2 arguments"); + eval("regexp_like('a', 'b', 'c')", Parser.withMap(ImmutableMap.of())); + } + + @Test + public void testMatch() + { + final ExprEval result = eval("regexp_like(a, 'f.o')", Parser.withMap(ImmutableMap.of("a", "foo"))); + Assert.assertEquals( + ExprEval.of(true, ExprType.LONG).value(), + result.value() + ); + } + + @Test + public void testNoMatch() + { + final ExprEval result = eval("regexp_like(a, 'f.x')", Parser.withMap(ImmutableMap.of("a", "foo"))); + Assert.assertEquals( + ExprEval.of(false, ExprType.LONG).value(), + result.value() + ); + } + + @Test + public void testNullPattern() + { + if (NullHandling.sqlCompatible()) { + expectException(IllegalArgumentException.class, "Function[regexp_like] pattern must be a string literal"); + } + + final ExprEval result = eval("regexp_like(a, null)", Parser.withMap(ImmutableMap.of("a", "foo"))); + Assert.assertEquals( + ExprEval.of(true, ExprType.LONG).value(), + result.value() + ); + } + + @Test + public void testEmptyStringPattern() + { + final ExprEval result = eval("regexp_like(a, '')", Parser.withMap(ImmutableMap.of("a", "foo"))); + Assert.assertEquals( + ExprEval.of(true, ExprType.LONG).value(), + result.value() + ); + } + + @Test + public void testNullPatternOnEmptyString() + { + if (NullHandling.sqlCompatible()) { + expectException(IllegalArgumentException.class, "Function[regexp_like] pattern must be a string literal"); + } + + final ExprEval result = eval("regexp_like(a, null)", Parser.withMap(ImmutableMap.of("a", ""))); + Assert.assertEquals( + ExprEval.of(true, ExprType.LONG).value(), + result.value() + ); + } + + @Test + public void testEmptyStringPatternOnEmptyString() + { + final ExprEval result = eval("regexp_like(a, '')", Parser.withMap(ImmutableMap.of("a", ""))); + Assert.assertEquals( + ExprEval.of(true, ExprType.LONG).value(), + result.value() + ); + } + + @Test + public void testNullPatternOnNull() + { + if (NullHandling.sqlCompatible()) { + expectException(IllegalArgumentException.class, "Function[regexp_like] pattern must be a string literal"); + } + + final ExprEval result = eval("regexp_like(a, null)", Parser.withSuppliers(ImmutableMap.of("a", () -> null))); + Assert.assertEquals( + ExprEval.of(true, ExprType.LONG).value(), + result.value() + ); + } + + @Test + public void testEmptyStringPatternOnNull() + { + final ExprEval result = eval("regexp_like(a, '')", Parser.withSuppliers(ImmutableMap.of("a", () -> null))); + Assert.assertEquals( + ExprEval.of(NullHandling.replaceWithDefault(), ExprType.LONG).value(), + result.value() + ); + } +} diff --git a/server/src/main/java/org/apache/druid/guice/ExpressionModule.java b/server/src/main/java/org/apache/druid/guice/ExpressionModule.java index f695563d352f..9e451e8f12f9 100644 --- a/server/src/main/java/org/apache/druid/guice/ExpressionModule.java +++ b/server/src/main/java/org/apache/druid/guice/ExpressionModule.java @@ -31,6 +31,7 @@ import org.apache.druid.query.expression.IPv4AddressStringifyExprMacro; import org.apache.druid.query.expression.LikeExprMacro; import org.apache.druid.query.expression.RegexpExtractExprMacro; +import org.apache.druid.query.expression.RegexpLikeExprMacro; import org.apache.druid.query.expression.TimestampCeilExprMacro; import org.apache.druid.query.expression.TimestampExtractExprMacro; import org.apache.druid.query.expression.TimestampFloorExprMacro; @@ -41,8 +42,6 @@ import java.util.List; -/** - */ public class ExpressionModule implements DruidModule { public static final List> EXPR_MACROS = @@ -52,6 +51,7 @@ public class ExpressionModule implements DruidModule .add(IPv4AddressStringifyExprMacro.class) .add(LikeExprMacro.class) .add(RegexpExtractExprMacro.class) + .add(RegexpLikeExprMacro.class) .add(TimestampCeilExprMacro.class) .add(TimestampExtractExprMacro.class) .add(TimestampFloorExprMacro.class) diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/expression/OperatorConversions.java b/sql/src/main/java/org/apache/druid/sql/calcite/expression/OperatorConversions.java index 6b4a4539bee5..23c48df601b1 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/expression/OperatorConversions.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/expression/OperatorConversions.java @@ -24,11 +24,13 @@ import com.google.common.collect.Iterables; import it.unimi.dsi.fastutil.ints.IntArraySet; import it.unimi.dsi.fastutil.ints.IntSet; +import it.unimi.dsi.fastutil.ints.IntSets; import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rex.RexCall; import org.apache.calcite.rex.RexInputRef; import org.apache.calcite.rex.RexLiteral; import org.apache.calcite.rex.RexNode; +import org.apache.calcite.runtime.CalciteException; import org.apache.calcite.sql.SqlCallBinding; import org.apache.calcite.sql.SqlFunction; import org.apache.calcite.sql.SqlFunctionCategory; @@ -222,6 +224,10 @@ public static PostAggregator toPostAggregator( } } + /** + * Returns a builder that helps {@link SqlOperatorConversion} implementations create the {@link SqlFunction} + * objects they need to return from {@link SqlOperatorConversion#calciteOperator()}. + */ public static OperatorBuilder operatorBuilder(final String name) { return new OperatorBuilder(name); @@ -238,6 +244,7 @@ public static class OperatorBuilder private SqlOperandTypeChecker operandTypeChecker; private List operandTypes; private Integer requiredOperands = null; + private int[] literalOperands = null; private SqlOperandTypeInference operandTypeInference; private OperatorBuilder(final String name) @@ -245,64 +252,123 @@ private OperatorBuilder(final String name) this.name = Preconditions.checkNotNull(name, "name"); } - public OperatorBuilder kind(final SqlKind kind) + /** + * Sets the return type of the operator to "typeName", marked as non-nullable. + * + * One of {@link #returnTypeNonNull}, {@link #returnTypeNullable}, or + * {@link #returnTypeInference(SqlReturnTypeInference)} must be used before calling {@link #build()}. These methods + * cannot be mixed; you must call exactly one. + */ + public OperatorBuilder returnTypeNonNull(final SqlTypeName typeName) { - this.kind = kind; - return this; - } + Preconditions.checkState(this.returnTypeInference == null, "Cannot set return type multiple times"); - public OperatorBuilder returnType(final SqlTypeName typeName) - { this.returnTypeInference = ReturnTypes.explicit( factory -> Calcites.createSqlType(factory, typeName) ); return this; } - public OperatorBuilder nullableReturnType(final SqlTypeName typeName) + /** + * Sets the return type of the operator to "typeName", marked as nullable. + * + * One of {@link #returnTypeNonNull}, {@link #returnTypeNullable}, or + * {@link #returnTypeInference(SqlReturnTypeInference)} must be used before calling {@link #build()}. These methods + * cannot be mixed; you must call exactly one. + */ + public OperatorBuilder returnTypeNullable(final SqlTypeName typeName) { + Preconditions.checkState(this.returnTypeInference == null, "Cannot set return type multiple times"); + this.returnTypeInference = ReturnTypes.explicit( factory -> Calcites.createSqlTypeWithNullability(factory, typeName, true) ); return this; } + /** + * Provides customized return type inference logic. + * + * One of {@link #returnTypeNonNull}, {@link #returnTypeNullable}, or + * {@link #returnTypeInference(SqlReturnTypeInference)} must be used before calling {@link #build()}. These methods + * cannot be mixed; you must call exactly one. + */ public OperatorBuilder returnTypeInference(final SqlReturnTypeInference returnTypeInference) { + Preconditions.checkState(this.returnTypeInference == null, "Cannot set return type multiple times"); + this.returnTypeInference = returnTypeInference; return this; } + /** + * Sets the {@link SqlKind} of the operator. + * + * The default, if not provided, is {@link SqlFunctionCategory#USER_DEFINED_FUNCTION}. + */ public OperatorBuilder functionCategory(final SqlFunctionCategory functionCategory) { this.functionCategory = functionCategory; return this; } + /** + * Provides customized operand type checking logic. + * + * One of {@link #operandTypes(SqlTypeFamily...)} or {@link #operandTypeChecker(SqlOperandTypeChecker)} must be used + * before calling {@link #build()}. These methods cannot be mixed; you must call exactly one. + */ public OperatorBuilder operandTypeChecker(final SqlOperandTypeChecker operandTypeChecker) { this.operandTypeChecker = operandTypeChecker; return this; } + /** + * Signifies that a function accepts operands of type family given by {@param operandTypes}. + * + * May be used in conjunction with {@link #requiredOperands(int)} and {@link #literalOperands(int...)} in order + * to further refine operand checking logic. + * + * For deeper control, use {@link #operandTypeChecker(SqlOperandTypeChecker)} instead. + */ public OperatorBuilder operandTypes(final SqlTypeFamily... operandTypes) { this.operandTypes = Arrays.asList(operandTypes); return this; } - public OperatorBuilder operandTypeInference(final SqlOperandTypeInference operandTypeInference) + /** + * Signifies that the first {@code requiredOperands} operands are required, and all later operands are optional. + * + * Required operands are not allowed to be null. Optional operands can either be skipped or explicitly provided as + * literal NULLs. For example, if {@code requiredOperands == 1}, then {@code F(x, NULL)} and {@code F(x)} are both + * accepted, and {@code x} must not be null. + * + * Must be used in conjunction with {@link #operandTypes(SqlTypeFamily...)}; this method is not compatible with + * {@link #operandTypeChecker(SqlOperandTypeChecker)}. + */ + public OperatorBuilder requiredOperands(final int requiredOperands) { - this.operandTypeInference = operandTypeInference; + this.requiredOperands = requiredOperands; return this; } - public OperatorBuilder requiredOperands(final int requiredOperands) + /** + * Signifies that the operands at positions given by {@code literalOperands} must be literals. + * + * Must be used in conjunction with {@link #operandTypes(SqlTypeFamily...)}; this method is not compatible with + * {@link #operandTypeChecker(SqlOperandTypeChecker)}. + */ + public OperatorBuilder literalOperands(final int... literalOperands) { - this.requiredOperands = requiredOperands; + this.literalOperands = literalOperands; return this; } + /** + * Creates a {@link SqlFunction} from this builder. + */ public SqlFunction build() { // Create "nullableOperands" set including all optional arguments. @@ -317,13 +383,14 @@ public SqlFunction build() theOperandTypeChecker = new DefaultOperandTypeChecker( operandTypes, requiredOperands == null ? operandTypes.size() : requiredOperands, - nullableOperands + nullableOperands, + literalOperands ); - } else if (operandTypes == null && requiredOperands == null) { + } else if (operandTypes == null && requiredOperands == null && literalOperands == null) { theOperandTypeChecker = operandTypeChecker; } else { throw new ISE( - "Cannot have both 'operandTypeChecker' and 'operandTypes' / 'requiredOperands'" + "Cannot have both 'operandTypeChecker' and 'operandTypes' / 'requiredOperands' / 'literalOperands'" ); } @@ -430,36 +497,56 @@ public void inferOperandTypes( /** * Operand type checker that is used in 'simple' situations: there are a particular number of operands, with - * particular types, some of which may be optional or nullable. + * particular types, some of which may be optional or nullable, and some of which may be required to be literals. */ private static class DefaultOperandTypeChecker implements SqlOperandTypeChecker { private final List operandTypes; private final int requiredOperands; private final IntSet nullableOperands; + private final IntSet literalOperands; DefaultOperandTypeChecker( final List operandTypes, final int requiredOperands, - final IntSet nullableOperands + final IntSet nullableOperands, + @Nullable final int[] literalOperands ) { Preconditions.checkArgument(requiredOperands <= operandTypes.size() && requiredOperands >= 0); this.operandTypes = Preconditions.checkNotNull(operandTypes, "operandTypes"); this.requiredOperands = requiredOperands; this.nullableOperands = Preconditions.checkNotNull(nullableOperands, "nullableOperands"); + + if (literalOperands == null) { + this.literalOperands = IntSets.EMPTY_SET; + } else { + this.literalOperands = new IntArraySet(); + Arrays.stream(literalOperands).forEach(this.literalOperands::add); + } } @Override public boolean checkOperandTypes(SqlCallBinding callBinding, boolean throwOnFailure) { - if (operandTypes.size() != callBinding.getOperandCount()) { - // Just like FamilyOperandTypeChecker: assume this is an inapplicable sub-rule of a composite rule; don't throw - return false; - } - for (int i = 0; i < callBinding.operands().size(); i++) { final SqlNode operand = callBinding.operands().get(i); + + if (literalOperands.contains(i)) { + // Verify that 'operand' is a literal. + if (!SqlUtil.isLiteral(operand)) { + return throwOrReturn( + throwOnFailure, + callBinding, + cb -> cb.getValidator() + .newValidationError( + operand, + Static.RESOURCE.argumentMustBeLiteral(callBinding.getOperator().getName()) + ) + ); + } + } + final RelDataType operandType = callBinding.getValidator().deriveType(callBinding.getScope(), operand); final SqlTypeFamily expectedFamily = operandTypes.get(i); @@ -467,21 +554,21 @@ public boolean checkOperandTypes(SqlCallBinding callBinding, boolean throwOnFail // ANY matches anything. This operand is all good; do nothing. } else if (expectedFamily.getTypeNames().contains(operandType.getSqlTypeName())) { // Operand came in with one of the expected types. - } else if (operandType.getSqlTypeName() == SqlTypeName.NULL) { + } else if (operandType.getSqlTypeName() == SqlTypeName.NULL || SqlUtil.isNullLiteral(operand, true)) { // Null came in, check if operand is a nullable type. if (!nullableOperands.contains(i)) { - if (throwOnFailure) { - throw callBinding.getValidator().newValidationError(operand, Static.RESOURCE.nullIllegal()); - } else { - return false; - } + return throwOrReturn( + throwOnFailure, + callBinding, + cb -> cb.getValidator().newValidationError(operand, Static.RESOURCE.nullIllegal()) + ); } } else { - if (throwOnFailure) { - throw callBinding.newValidationSignatureError(); - } else { - return false; - } + return throwOrReturn( + throwOnFailure, + callBinding, + SqlCallBinding::newValidationSignatureError + ); } } @@ -512,4 +599,17 @@ public boolean isOptional(int i) return i + 1 > requiredOperands; } } + + private static boolean throwOrReturn( + final boolean throwOnFailure, + final SqlCallBinding callBinding, + final Function exceptionMapper + ) + { + if (throwOnFailure) { + throw exceptionMapper.apply(callBinding); + } else { + return false; + } + } } diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayLengthOperatorConversion.java b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayLengthOperatorConversion.java index bcafbb605c69..3bcd6ec0de06 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayLengthOperatorConversion.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayLengthOperatorConversion.java @@ -44,7 +44,7 @@ public class ArrayLengthOperatorConversion implements SqlOperatorConversion ) ) .functionCategory(SqlFunctionCategory.STRING) - .returnType(SqlTypeName.INTEGER) + .returnTypeNonNull(SqlTypeName.INTEGER) .build(); @Override diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayOffsetOfOperatorConversion.java b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayOffsetOfOperatorConversion.java index 2e06ef9aa290..8d8da7461bb0 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayOffsetOfOperatorConversion.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayOffsetOfOperatorConversion.java @@ -48,7 +48,7 @@ public class ArrayOffsetOfOperatorConversion implements SqlOperatorConversion ) ) .functionCategory(SqlFunctionCategory.STRING) - .returnType(SqlTypeName.INTEGER) + .returnTypeNonNull(SqlTypeName.INTEGER) .build(); @Override diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayOffsetOperatorConversion.java b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayOffsetOperatorConversion.java index 42a874da5e45..2fdee5d79b24 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayOffsetOperatorConversion.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayOffsetOperatorConversion.java @@ -48,7 +48,7 @@ public class ArrayOffsetOperatorConversion implements SqlOperatorConversion ) ) .functionCategory(SqlFunctionCategory.STRING) - .returnType(SqlTypeName.VARCHAR) + .returnTypeNonNull(SqlTypeName.VARCHAR) .build(); @Override diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayOrdinalOfOperatorConversion.java b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayOrdinalOfOperatorConversion.java index 367d80ff1cc4..14fa9d61f3fd 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayOrdinalOfOperatorConversion.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayOrdinalOfOperatorConversion.java @@ -48,7 +48,7 @@ public class ArrayOrdinalOfOperatorConversion implements SqlOperatorConversion ) ) .functionCategory(SqlFunctionCategory.STRING) - .returnType(SqlTypeName.INTEGER) + .returnTypeNonNull(SqlTypeName.INTEGER) .build(); @Override diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayOrdinalOperatorConversion.java b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayOrdinalOperatorConversion.java index 1dd9c4a800d0..c3d1302dc41e 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayOrdinalOperatorConversion.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayOrdinalOperatorConversion.java @@ -48,7 +48,7 @@ public class ArrayOrdinalOperatorConversion implements SqlOperatorConversion ) ) .functionCategory(SqlFunctionCategory.STRING) - .returnType(SqlTypeName.VARCHAR) + .returnTypeNonNull(SqlTypeName.VARCHAR) .build(); @Override diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayToStringOperatorConversion.java b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayToStringOperatorConversion.java index 3d9331fa7100..802a95e5a369 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayToStringOperatorConversion.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayToStringOperatorConversion.java @@ -48,7 +48,7 @@ public class ArrayToStringOperatorConversion implements SqlOperatorConversion ) ) .functionCategory(SqlFunctionCategory.STRING) - .returnType(SqlTypeName.VARCHAR) + .returnTypeNonNull(SqlTypeName.VARCHAR) .build(); @Override diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/BTrimOperatorConversion.java b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/BTrimOperatorConversion.java index 4e45c5a0fd97..d77c20b5d787 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/BTrimOperatorConversion.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/BTrimOperatorConversion.java @@ -37,7 +37,7 @@ public class BTrimOperatorConversion implements SqlOperatorConversion private static final SqlFunction SQL_FUNCTION = OperatorConversions .operatorBuilder("BTRIM") .operandTypes(SqlTypeFamily.CHARACTER, SqlTypeFamily.CHARACTER) - .returnType(SqlTypeName.VARCHAR) + .returnTypeNonNull(SqlTypeName.VARCHAR) .functionCategory(SqlFunctionCategory.STRING) .requiredOperands(1) .build(); diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/DateTruncOperatorConversion.java b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/DateTruncOperatorConversion.java index 9e4bf3c3d50e..f496e0ab9671 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/DateTruncOperatorConversion.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/DateTruncOperatorConversion.java @@ -67,7 +67,7 @@ public class DateTruncOperatorConversion implements SqlOperatorConversion .operatorBuilder("DATE_TRUNC") .operandTypes(SqlTypeFamily.CHARACTER, SqlTypeFamily.TIMESTAMP) .requiredOperands(2) - .returnType(SqlTypeName.TIMESTAMP) + .returnTypeNonNull(SqlTypeName.TIMESTAMP) .functionCategory(SqlFunctionCategory.TIMEDATE) .build(); diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/IPv4AddressStringifyOperatorConversion.java b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/IPv4AddressStringifyOperatorConversion.java index 6bd02b41221e..134723188d2e 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/IPv4AddressStringifyOperatorConversion.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/IPv4AddressStringifyOperatorConversion.java @@ -39,7 +39,7 @@ public class IPv4AddressStringifyOperatorConversion extends DirectOperatorConver OperandTypes.family(SqlTypeFamily.INTEGER), OperandTypes.family(SqlTypeFamily.STRING) )) - .nullableReturnType(SqlTypeName.CHAR) + .returnTypeNullable(SqlTypeName.CHAR) .functionCategory(SqlFunctionCategory.USER_DEFINED_FUNCTION) .build(); diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/LPadOperatorConversion.java b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/LPadOperatorConversion.java index 6a2d4f59b6ee..2d13b02fcbbd 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/LPadOperatorConversion.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/LPadOperatorConversion.java @@ -37,7 +37,7 @@ public class LPadOperatorConversion implements SqlOperatorConversion private static final SqlFunction SQL_FUNCTION = OperatorConversions .operatorBuilder("LPAD") .operandTypes(SqlTypeFamily.CHARACTER, SqlTypeFamily.INTEGER, SqlTypeFamily.CHARACTER) - .returnType(SqlTypeName.VARCHAR) + .returnTypeNonNull(SqlTypeName.VARCHAR) .functionCategory(SqlFunctionCategory.STRING) .requiredOperands(2) .build(); diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/LTrimOperatorConversion.java b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/LTrimOperatorConversion.java index d3ac45da0fc1..70ec0c97e621 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/LTrimOperatorConversion.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/LTrimOperatorConversion.java @@ -37,7 +37,7 @@ public class LTrimOperatorConversion implements SqlOperatorConversion private static final SqlFunction SQL_FUNCTION = OperatorConversions .operatorBuilder("LTRIM") .operandTypes(SqlTypeFamily.CHARACTER, SqlTypeFamily.CHARACTER) - .returnType(SqlTypeName.VARCHAR) + .returnTypeNonNull(SqlTypeName.VARCHAR) .functionCategory(SqlFunctionCategory.STRING) .requiredOperands(1) .build(); diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/LeftOperatorConversion.java b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/LeftOperatorConversion.java index 7363974eb79e..252343cddba1 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/LeftOperatorConversion.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/LeftOperatorConversion.java @@ -39,7 +39,7 @@ public class LeftOperatorConversion implements SqlOperatorConversion .operatorBuilder("LEFT") .operandTypes(SqlTypeFamily.CHARACTER, SqlTypeFamily.INTEGER) .functionCategory(SqlFunctionCategory.STRING) - .returnType(SqlTypeName.VARCHAR) + .returnTypeNonNull(SqlTypeName.VARCHAR) .build(); @Override diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/MillisToTimestampOperatorConversion.java b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/MillisToTimestampOperatorConversion.java index 227279f5a2f7..e8b8e748a640 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/MillisToTimestampOperatorConversion.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/MillisToTimestampOperatorConversion.java @@ -39,7 +39,7 @@ public class MillisToTimestampOperatorConversion implements SqlOperatorConversio private static final SqlFunction SQL_FUNCTION = OperatorConversions .operatorBuilder("MILLIS_TO_TIMESTAMP") .operandTypes(SqlTypeFamily.EXACT_NUMERIC) - .returnType(SqlTypeName.TIMESTAMP) + .returnTypeNonNull(SqlTypeName.TIMESTAMP) .functionCategory(SqlFunctionCategory.TIMEDATE) .build(); diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/MultiValueStringAppendOperatorConversion.java b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/MultiValueStringAppendOperatorConversion.java index fb4745425ada..d57d0fcb1a4a 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/MultiValueStringAppendOperatorConversion.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/MultiValueStringAppendOperatorConversion.java @@ -47,7 +47,7 @@ public class MultiValueStringAppendOperatorConversion implements SqlOperatorConv ) ) .functionCategory(SqlFunctionCategory.STRING) - .returnType(SqlTypeName.VARCHAR) + .returnTypeNonNull(SqlTypeName.VARCHAR) .build(); @Override diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/MultiValueStringConcatOperatorConversion.java b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/MultiValueStringConcatOperatorConversion.java index 03e1c7c63bca..455715fe8f5c 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/MultiValueStringConcatOperatorConversion.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/MultiValueStringConcatOperatorConversion.java @@ -44,7 +44,7 @@ public class MultiValueStringConcatOperatorConversion implements SqlOperatorConv ) ) .functionCategory(SqlFunctionCategory.STRING) - .returnType(SqlTypeName.VARCHAR) + .returnTypeNonNull(SqlTypeName.VARCHAR) .build(); @Override diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/MultiValueStringPrependOperatorConversion.java b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/MultiValueStringPrependOperatorConversion.java index 4b7cc968df1f..6fa04ce176cd 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/MultiValueStringPrependOperatorConversion.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/MultiValueStringPrependOperatorConversion.java @@ -47,7 +47,7 @@ public class MultiValueStringPrependOperatorConversion implements SqlOperatorCon ) ) .functionCategory(SqlFunctionCategory.STRING) - .returnType(SqlTypeName.VARCHAR) + .returnTypeNonNull(SqlTypeName.VARCHAR) .build(); @Override diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/MultiValueStringSliceOperatorConversion.java b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/MultiValueStringSliceOperatorConversion.java index cba6ee66eec9..f278b3bf7ebb 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/MultiValueStringSliceOperatorConversion.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/MultiValueStringSliceOperatorConversion.java @@ -52,7 +52,7 @@ public class MultiValueStringSliceOperatorConversion implements SqlOperatorConve ) ) .functionCategory(SqlFunctionCategory.STRING) - .returnType(SqlTypeName.VARCHAR) + .returnTypeNonNull(SqlTypeName.VARCHAR) .build(); @Override diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ParseLongOperatorConversion.java b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ParseLongOperatorConversion.java index 29b0906e4174..9fd710fb1cb4 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ParseLongOperatorConversion.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ParseLongOperatorConversion.java @@ -38,7 +38,7 @@ public class ParseLongOperatorConversion implements SqlOperatorConversion private static final SqlFunction SQL_FUNCTION = OperatorConversions .operatorBuilder(NAME) .operandTypes(SqlTypeFamily.CHARACTER, SqlTypeFamily.INTEGER) - .returnType(SqlTypeName.BIGINT) + .returnTypeNonNull(SqlTypeName.BIGINT) .functionCategory(SqlFunctionCategory.STRING) .requiredOperands(1) .build(); diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/QueryLookupOperatorConversion.java b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/QueryLookupOperatorConversion.java index 4fa27af75037..5c1665c7083c 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/QueryLookupOperatorConversion.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/QueryLookupOperatorConversion.java @@ -40,7 +40,7 @@ public class QueryLookupOperatorConversion implements SqlOperatorConversion private static final SqlFunction SQL_FUNCTION = OperatorConversions .operatorBuilder("LOOKUP") .operandTypes(SqlTypeFamily.CHARACTER, SqlTypeFamily.CHARACTER) - .returnType(SqlTypeName.VARCHAR) + .returnTypeNonNull(SqlTypeName.VARCHAR) .functionCategory(SqlFunctionCategory.STRING) .build(); diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/RPadOperatorConversion.java b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/RPadOperatorConversion.java index 68874ef86ba1..47c8eadc2f85 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/RPadOperatorConversion.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/RPadOperatorConversion.java @@ -37,7 +37,7 @@ public class RPadOperatorConversion implements SqlOperatorConversion private static final SqlFunction SQL_FUNCTION = OperatorConversions .operatorBuilder("RPAD") .operandTypes(SqlTypeFamily.CHARACTER, SqlTypeFamily.INTEGER, SqlTypeFamily.CHARACTER) - .returnType(SqlTypeName.VARCHAR) + .returnTypeNonNull(SqlTypeName.VARCHAR) .functionCategory(SqlFunctionCategory.STRING) .requiredOperands(2) .build(); diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/RTrimOperatorConversion.java b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/RTrimOperatorConversion.java index 8361a8a5e8f6..6aa8f1b28a6f 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/RTrimOperatorConversion.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/RTrimOperatorConversion.java @@ -37,7 +37,7 @@ public class RTrimOperatorConversion implements SqlOperatorConversion private static final SqlFunction SQL_FUNCTION = OperatorConversions .operatorBuilder("RTRIM") .operandTypes(SqlTypeFamily.CHARACTER, SqlTypeFamily.CHARACTER) - .returnType(SqlTypeName.VARCHAR) + .returnTypeNonNull(SqlTypeName.VARCHAR) .functionCategory(SqlFunctionCategory.STRING) .requiredOperands(1) .build(); diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/RegexpExtractOperatorConversion.java b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/RegexpExtractOperatorConversion.java index 18da1b31bff5..b6469718255e 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/RegexpExtractOperatorConversion.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/RegexpExtractOperatorConversion.java @@ -39,7 +39,8 @@ public class RegexpExtractOperatorConversion implements SqlOperatorConversion .operatorBuilder("REGEXP_EXTRACT") .operandTypes(SqlTypeFamily.CHARACTER, SqlTypeFamily.CHARACTER, SqlTypeFamily.INTEGER) .requiredOperands(2) - .returnType(SqlTypeName.VARCHAR) + .literalOperands(1, 2) + .returnTypeNullable(SqlTypeName.VARCHAR) .functionCategory(SqlFunctionCategory.STRING) .build(); @@ -71,9 +72,13 @@ public DruidExpression toDruidExpression( : null; if (arg.isSimpleExtraction() && patternExpr.isLiteral() && (indexExpr == null || indexExpr.isLiteral())) { + final String pattern = (String) patternExpr.getLiteralValue(); + return arg.getSimpleExtraction().cascade( new RegexDimExtractionFn( - (String) patternExpr.getLiteralValue(), + // Undo the empty-to-null conversion from patternExpr parsing (patterns cannot be null, even in + // non-SQL-compliant null handling mode). + StringUtils.nullToEmptyNonDruidDataString(pattern), indexExpr == null ? DEFAULT_INDEX : ((Number) indexExpr.getLiteralValue()).intValue(), true, null diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/RegexpLikeOperatorConversion.java b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/RegexpLikeOperatorConversion.java new file mode 100644 index 000000000000..ea699abe0215 --- /dev/null +++ b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/RegexpLikeOperatorConversion.java @@ -0,0 +1,116 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.sql.calcite.expression.builtin; + +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexLiteral; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.sql.SqlFunction; +import org.apache.calcite.sql.SqlFunctionCategory; +import org.apache.calcite.sql.type.SqlTypeFamily; +import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.druid.query.filter.DimFilter; +import org.apache.druid.query.filter.RegexDimFilter; +import org.apache.druid.segment.VirtualColumn; +import org.apache.druid.segment.column.RowSignature; +import org.apache.druid.sql.calcite.expression.DruidExpression; +import org.apache.druid.sql.calcite.expression.Expressions; +import org.apache.druid.sql.calcite.expression.OperatorConversions; +import org.apache.druid.sql.calcite.expression.SqlOperatorConversion; +import org.apache.druid.sql.calcite.planner.PlannerContext; +import org.apache.druid.sql.calcite.rel.VirtualColumnRegistry; + +import javax.annotation.Nullable; +import java.util.List; + +public class RegexpLikeOperatorConversion implements SqlOperatorConversion +{ + private static final SqlFunction SQL_FUNCTION = OperatorConversions + .operatorBuilder("REGEXP_LIKE") + .operandTypes(SqlTypeFamily.CHARACTER, SqlTypeFamily.CHARACTER) + .requiredOperands(2) + .literalOperands(1) + .returnTypeNonNull(SqlTypeName.BOOLEAN) + .functionCategory(SqlFunctionCategory.STRING) + .build(); + + @Override + public SqlFunction calciteOperator() + { + return SQL_FUNCTION; + } + + @Override + public DruidExpression toDruidExpression( + final PlannerContext plannerContext, + final RowSignature rowSignature, + final RexNode rexNode + ) + { + return OperatorConversions.convertCall( + plannerContext, + rowSignature, + rexNode, + operands -> DruidExpression.fromFunctionCall("regexp_like", operands) + ); + } + + @Nullable + @Override + public DimFilter toDruidFilter( + final PlannerContext plannerContext, + final RowSignature rowSignature, + @Nullable final VirtualColumnRegistry virtualColumnRegistry, + final RexNode rexNode + ) + { + final List operands = ((RexCall) rexNode).getOperands(); + final DruidExpression druidExpression = Expressions.toDruidExpression( + plannerContext, + rowSignature, + operands.get(0) + ); + + if (druidExpression == null) { + return null; + } + + final String pattern = RexLiteral.stringValue(operands.get(1)); + + if (druidExpression.isSimpleExtraction()) { + return new RegexDimFilter( + druidExpression.getSimpleExtraction().getColumn(), + pattern, + druidExpression.getSimpleExtraction().getExtractionFn(), + null + ); + } else if (virtualColumnRegistry != null) { + VirtualColumn v = virtualColumnRegistry.getOrCreateVirtualColumnForExpression( + plannerContext, + druidExpression, + operands.get(0).getType().getSqlTypeName() + ); + + return new RegexDimFilter(v.getOutputName(), pattern, null, null); + } else { + return null; + } + } +} diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/RepeatOperatorConversion.java b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/RepeatOperatorConversion.java index 318e95d9b4b0..9521a0443bcc 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/RepeatOperatorConversion.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/RepeatOperatorConversion.java @@ -39,7 +39,7 @@ public class RepeatOperatorConversion implements SqlOperatorConversion .operatorBuilder("REPEAT") .operandTypes(SqlTypeFamily.CHARACTER, SqlTypeFamily.INTEGER) .functionCategory(SqlFunctionCategory.STRING) - .returnType(SqlTypeName.VARCHAR) + .returnTypeNonNull(SqlTypeName.VARCHAR) .build(); @Override diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ReverseOperatorConversion.java b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ReverseOperatorConversion.java index 7cb0f0552976..70280abf2f98 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ReverseOperatorConversion.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ReverseOperatorConversion.java @@ -37,7 +37,7 @@ public class ReverseOperatorConversion implements SqlOperatorConversion .operatorBuilder("REVERSE") .operandTypes(SqlTypeFamily.CHARACTER) .functionCategory(SqlFunctionCategory.STRING) - .returnType(SqlTypeName.VARCHAR) + .returnTypeNonNull(SqlTypeName.VARCHAR) .build(); @Override diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/RightOperatorConversion.java b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/RightOperatorConversion.java index 9fc3736d9f60..863bbccd5578 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/RightOperatorConversion.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/RightOperatorConversion.java @@ -39,7 +39,7 @@ public class RightOperatorConversion implements SqlOperatorConversion .operatorBuilder("RIGHT") .operandTypes(SqlTypeFamily.CHARACTER, SqlTypeFamily.INTEGER) .functionCategory(SqlFunctionCategory.STRING) - .returnType(SqlTypeName.VARCHAR) + .returnTypeNonNull(SqlTypeName.VARCHAR) .build(); @Override diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/StringFormatOperatorConversion.java b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/StringFormatOperatorConversion.java index 3a535cdd9eab..b2aabbb2d11c 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/StringFormatOperatorConversion.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/StringFormatOperatorConversion.java @@ -42,7 +42,7 @@ public class StringFormatOperatorConversion implements SqlOperatorConversion .operatorBuilder("STRING_FORMAT") .operandTypeChecker(new StringFormatOperandTypeChecker()) .functionCategory(SqlFunctionCategory.STRING) - .returnType(SqlTypeName.VARCHAR) + .returnTypeNonNull(SqlTypeName.VARCHAR) .build(); @Override diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/StringToMultiValueStringOperatorConversion.java b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/StringToMultiValueStringOperatorConversion.java index bdb590888aaa..7a6442f2dc8c 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/StringToMultiValueStringOperatorConversion.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/StringToMultiValueStringOperatorConversion.java @@ -45,7 +45,7 @@ public class StringToMultiValueStringOperatorConversion implements SqlOperatorCo ) ) .functionCategory(SqlFunctionCategory.STRING) - .returnType(SqlTypeName.VARCHAR) + .returnTypeNonNull(SqlTypeName.VARCHAR) .build(); @Override diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/StrposOperatorConversion.java b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/StrposOperatorConversion.java index 336c9d3b3723..e18c0896a5d4 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/StrposOperatorConversion.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/StrposOperatorConversion.java @@ -38,7 +38,7 @@ public class StrposOperatorConversion implements SqlOperatorConversion .operatorBuilder("STRPOS") .operandTypes(SqlTypeFamily.CHARACTER, SqlTypeFamily.CHARACTER) .functionCategory(SqlFunctionCategory.STRING) - .returnType(SqlTypeName.INTEGER) + .returnTypeNonNull(SqlTypeName.INTEGER) .build(); @Override diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/TextcatOperatorConversion.java b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/TextcatOperatorConversion.java index 09652ee08ff6..ee160d6b3ef6 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/TextcatOperatorConversion.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/TextcatOperatorConversion.java @@ -36,7 +36,7 @@ public class TextcatOperatorConversion implements SqlOperatorConversion .operatorBuilder("textcat") .operandTypes(SqlTypeFamily.CHARACTER, SqlTypeFamily.CHARACTER) .requiredOperands(2) - .returnType(SqlTypeName.VARCHAR) + .returnTypeNonNull(SqlTypeName.VARCHAR) .functionCategory(SqlFunctionCategory.STRING) .build(); diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/TimeCeilOperatorConversion.java b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/TimeCeilOperatorConversion.java index 8b2ee0c7e78b..81b2dfa12ae2 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/TimeCeilOperatorConversion.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/TimeCeilOperatorConversion.java @@ -41,7 +41,7 @@ public class TimeCeilOperatorConversion implements SqlOperatorConversion .operatorBuilder("TIME_CEIL") .operandTypes(SqlTypeFamily.TIMESTAMP, SqlTypeFamily.CHARACTER, SqlTypeFamily.TIMESTAMP, SqlTypeFamily.CHARACTER) .requiredOperands(2) - .returnType(SqlTypeName.TIMESTAMP) + .returnTypeNonNull(SqlTypeName.TIMESTAMP) .functionCategory(SqlFunctionCategory.TIMEDATE) .build(); diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/TimeExtractOperatorConversion.java b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/TimeExtractOperatorConversion.java index ea041cfee2a9..35accd1f9b3f 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/TimeExtractOperatorConversion.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/TimeExtractOperatorConversion.java @@ -44,7 +44,7 @@ public class TimeExtractOperatorConversion implements SqlOperatorConversion .operatorBuilder("TIME_EXTRACT") .operandTypes(SqlTypeFamily.TIMESTAMP, SqlTypeFamily.CHARACTER, SqlTypeFamily.CHARACTER) .requiredOperands(2) - .returnType(SqlTypeName.BIGINT) + .returnTypeNonNull(SqlTypeName.BIGINT) .functionCategory(SqlFunctionCategory.TIMEDATE) .build(); diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/TimeFloorOperatorConversion.java b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/TimeFloorOperatorConversion.java index af2716e39c63..87c07f25b7e5 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/TimeFloorOperatorConversion.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/TimeFloorOperatorConversion.java @@ -56,7 +56,7 @@ public class TimeFloorOperatorConversion implements SqlOperatorConversion .operatorBuilder("TIME_FLOOR") .operandTypes(SqlTypeFamily.TIMESTAMP, SqlTypeFamily.CHARACTER, SqlTypeFamily.TIMESTAMP, SqlTypeFamily.CHARACTER) .requiredOperands(2) - .returnType(SqlTypeName.TIMESTAMP) + .returnTypeNonNull(SqlTypeName.TIMESTAMP) .functionCategory(SqlFunctionCategory.TIMEDATE) .build(); diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/TimeFormatOperatorConversion.java b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/TimeFormatOperatorConversion.java index fa988a38a976..1f7b6f95d32e 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/TimeFormatOperatorConversion.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/TimeFormatOperatorConversion.java @@ -47,7 +47,7 @@ public class TimeFormatOperatorConversion implements SqlOperatorConversion .operatorBuilder("TIME_FORMAT") .operandTypes(SqlTypeFamily.TIMESTAMP, SqlTypeFamily.CHARACTER, SqlTypeFamily.CHARACTER) .requiredOperands(1) - .returnType(SqlTypeName.VARCHAR) + .returnTypeNonNull(SqlTypeName.VARCHAR) .functionCategory(SqlFunctionCategory.TIMEDATE) .build(); diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/TimeParseOperatorConversion.java b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/TimeParseOperatorConversion.java index 8bba7321ac46..c79d981af2df 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/TimeParseOperatorConversion.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/TimeParseOperatorConversion.java @@ -45,7 +45,7 @@ public class TimeParseOperatorConversion implements SqlOperatorConversion .operatorBuilder("TIME_PARSE") .operandTypes(SqlTypeFamily.CHARACTER, SqlTypeFamily.CHARACTER, SqlTypeFamily.CHARACTER) .requiredOperands(1) - .nullableReturnType(SqlTypeName.TIMESTAMP) + .returnTypeNullable(SqlTypeName.TIMESTAMP) .functionCategory(SqlFunctionCategory.TIMEDATE) .build(); diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/TimeShiftOperatorConversion.java b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/TimeShiftOperatorConversion.java index 4ca033a29141..25b05c40f1d1 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/TimeShiftOperatorConversion.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/TimeShiftOperatorConversion.java @@ -45,7 +45,7 @@ public class TimeShiftOperatorConversion implements SqlOperatorConversion .operatorBuilder("TIME_SHIFT") .operandTypes(SqlTypeFamily.TIMESTAMP, SqlTypeFamily.CHARACTER, SqlTypeFamily.INTEGER, SqlTypeFamily.CHARACTER) .requiredOperands(3) - .returnType(SqlTypeName.TIMESTAMP) + .returnTypeNonNull(SqlTypeName.TIMESTAMP) .functionCategory(SqlFunctionCategory.TIMEDATE) .build(); diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/TimestampToMillisOperatorConversion.java b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/TimestampToMillisOperatorConversion.java index ec1742d03c8d..ae4565579fb7 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/TimestampToMillisOperatorConversion.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/TimestampToMillisOperatorConversion.java @@ -39,7 +39,7 @@ public class TimestampToMillisOperatorConversion implements SqlOperatorConversio private static final SqlFunction SQL_FUNCTION = OperatorConversions .operatorBuilder("TIMESTAMP_TO_MILLIS") .operandTypes(SqlTypeFamily.TIMESTAMP) - .returnType(SqlTypeName.BIGINT) + .returnTypeNonNull(SqlTypeName.BIGINT) .functionCategory(SqlFunctionCategory.TIMEDATE) .build(); diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/planner/DruidOperatorTable.java b/sql/src/main/java/org/apache/druid/sql/calcite/planner/DruidOperatorTable.java index 0fd811a0d28b..3f7699fa9f18 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/planner/DruidOperatorTable.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/planner/DruidOperatorTable.java @@ -83,6 +83,7 @@ import org.apache.druid.sql.calcite.expression.builtin.RPadOperatorConversion; import org.apache.druid.sql.calcite.expression.builtin.RTrimOperatorConversion; import org.apache.druid.sql.calcite.expression.builtin.RegexpExtractOperatorConversion; +import org.apache.druid.sql.calcite.expression.builtin.RegexpLikeOperatorConversion; import org.apache.druid.sql.calcite.expression.builtin.ReinterpretOperatorConversion; import org.apache.druid.sql.calcite.expression.builtin.RepeatOperatorConversion; import org.apache.druid.sql.calcite.expression.builtin.ReverseOperatorConversion; @@ -162,6 +163,7 @@ public class DruidOperatorTable implements SqlOperatorTable .add(new LTrimOperatorConversion()) .add(new PositionOperatorConversion()) .add(new RegexpExtractOperatorConversion()) + .add(new RegexpLikeOperatorConversion()) .add(new RTrimOperatorConversion()) .add(new ParseLongOperatorConversion()) .add(new StringFormatOperatorConversion()) diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java index 94edd222dc67..9ea52b138d6a 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java @@ -81,6 +81,7 @@ import org.apache.druid.query.filter.InDimFilter; import org.apache.druid.query.filter.LikeDimFilter; import org.apache.druid.query.filter.NotDimFilter; +import org.apache.druid.query.filter.RegexDimFilter; import org.apache.druid.query.filter.SelectorDimFilter; import org.apache.druid.query.groupby.GroupByQuery; import org.apache.druid.query.groupby.orderby.DefaultLimitSpec; @@ -7313,6 +7314,74 @@ public void testRegexpExtract() throws Exception ); } + @Test + public void testRegexpExtractFilterViaNotNullCheck() throws Exception + { + // Cannot vectorize due to extractionFn in dimension spec. + cannotVectorize(); + + testQuery( + "SELECT COUNT(*)\n" + + "FROM foo\n" + + "WHERE REGEXP_EXTRACT(dim1, '^1') IS NOT NULL OR REGEXP_EXTRACT('Z' || dim1, '^Z2') IS NOT NULL", + ImmutableList.of( + Druids.newTimeseriesQueryBuilder() + .dataSource(CalciteTests.DATASOURCE1) + .intervals(querySegmentSpec(Filtration.eternity())) + .granularity(Granularities.ALL) + .virtualColumns( + expressionVirtualColumn("v0", "regexp_extract(concat('Z',\"dim1\"),'^Z2')", ValueType.STRING) + ) + .filters( + or( + not(selector("dim1", null, new RegexDimExtractionFn("^1", 0, true, null))), + not(selector("v0", null, null)) + ) + ) + .aggregators(new CountAggregatorFactory("a0")) + .context(TIMESERIES_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{3L} + ) + ); + } + + @Test + public void testRegexpLikeFilter() throws Exception + { + // Cannot vectorize due to usage of regex filter. + cannotVectorize(); + + testQuery( + "SELECT COUNT(*)\n" + + "FROM foo\n" + + "WHERE REGEXP_LIKE(dim1, '^1') OR REGEXP_LIKE('Z' || dim1, '^Z2')", + ImmutableList.of( + Druids.newTimeseriesQueryBuilder() + .dataSource(CalciteTests.DATASOURCE1) + .intervals(querySegmentSpec(Filtration.eternity())) + .granularity(Granularities.ALL) + .virtualColumns( + expressionVirtualColumn("v0", "concat('Z',\"dim1\")", ValueType.STRING) + ) + .filters( + or( + new RegexDimFilter("dim1", "^1", null), + new RegexDimFilter("v0", "^Z2", null) + ) + ) + .aggregators(new CountAggregatorFactory("a0")) + .context(TIMESERIES_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{3L} + ) + ); + } + @Test public void testGroupBySortPushDown() throws Exception { @@ -14456,6 +14525,42 @@ public void testRepeatedIdenticalVirtualExpressionGrouping() throws Exception ); } + @Test + public void testValidationErrorNullLiteralIllegal() throws Exception + { + expectedException.expectMessage("Illegal use of 'NULL'"); + + testQuery( + "SELECT REGEXP_LIKE('x', NULL)", + ImmutableList.of(), + ImmutableList.of() + ); + } + + @Test + public void testValidationErrorNonLiteralIllegal() throws Exception + { + expectedException.expectMessage("Argument to function 'REGEXP_LIKE' must be a literal"); + + testQuery( + "SELECT REGEXP_LIKE('x', dim1) FROM foo", + ImmutableList.of(), + ImmutableList.of() + ); + } + + @Test + public void testValidationErrorWrongTypeLiteral() throws Exception + { + expectedException.expectMessage("Cannot apply 'REGEXP_LIKE' to arguments"); + + testQuery( + "SELECT REGEXP_LIKE('x', 1) FROM foo", + ImmutableList.of(), + ImmutableList.of() + ); + } + /** * This is a provider of query contexts that should be used by join tests. * It tests various configs that can be passed to join queries. All the configs provided by this provider should diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/expression/ExpressionTestHelper.java b/sql/src/test/java/org/apache/druid/sql/calcite/expression/ExpressionTestHelper.java index 3136508ee9b7..1d842175ef46 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/expression/ExpressionTestHelper.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/expression/ExpressionTestHelper.java @@ -30,12 +30,19 @@ import org.apache.calcite.sql.SqlIntervalQualifier; import org.apache.calcite.sql.SqlOperator; import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.druid.data.input.MapBasedRow; import org.apache.druid.math.expr.ExprEval; import org.apache.druid.math.expr.Parser; +import org.apache.druid.query.filter.DimFilter; +import org.apache.druid.query.filter.ValueMatcher; +import org.apache.druid.segment.RowAdapters; +import org.apache.druid.segment.RowBasedColumnSelectorFactory; +import org.apache.druid.segment.VirtualColumn; import org.apache.druid.segment.column.RowSignature; import org.apache.druid.sql.calcite.planner.Calcites; import org.apache.druid.sql.calcite.planner.PlannerConfig; import org.apache.druid.sql.calcite.planner.PlannerContext; +import org.apache.druid.sql.calcite.rel.VirtualColumnRegistry; import org.apache.druid.sql.calcite.table.RowSignatures; import org.apache.druid.sql.calcite.util.CalciteTests; import org.joda.time.DateTime; @@ -46,8 +53,10 @@ import java.math.BigDecimal; import java.util.Arrays; import java.util.Collections; +import java.util.Comparator; import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.stream.Collectors; class ExpressionTestHelper @@ -197,11 +206,11 @@ private static String quoteIfNeeded(@Nullable Object arg) } void testExpression( - SqlTypeName sqlTypeName, - SqlOperator op, - List exprs, - DruidExpression expectedExpression, - Object expectedResult + final SqlTypeName sqlTypeName, + final SqlOperator op, + final List exprs, + final DruidExpression expectedExpression, + final Object expectedResult ) { RelDataType returnType = createSqlType(sqlTypeName); @@ -209,36 +218,79 @@ void testExpression( } void testExpression( - SqlOperator op, - RexNode expr, - DruidExpression expectedExpression, - Object expectedResult + final SqlOperator op, + final RexNode expr, + final DruidExpression expectedExpression, + final Object expectedResult ) { testExpression(op, Collections.singletonList(expr), expectedExpression, expectedResult); } void testExpression( - SqlOperator op, - List exprs, - DruidExpression expectedExpression, - Object expectedResult + final SqlOperator op, + final List exprs, + final DruidExpression expectedExpression, + final Object expectedResult ) { testExpression(rexBuilder.makeCall(op, exprs), expectedExpression, expectedResult); } void testExpression( - RexNode rexNode, - DruidExpression expectedExpression, - Object expectedResult + final RexNode rexNode, + final DruidExpression expectedExpression, + final Object expectedResult ) { DruidExpression expression = Expressions.toDruidExpression(PLANNER_CONTEXT, rowSignature, rexNode); Assert.assertEquals("Expression for: " + rexNode, expectedExpression, expression); - ExprEval result = Parser.parse(expression.getExpression(), PLANNER_CONTEXT.getExprMacroTable()) - .eval(Parser.withMap(bindings)); + ExprEval result = Parser.parse(expression.getExpression(), PLANNER_CONTEXT.getExprMacroTable()) + .eval(Parser.withMap(bindings)); + Assert.assertEquals("Result for: " + rexNode, expectedResult, result.value()); } + + void testFilter( + final SqlOperator op, + final List exprs, + final List expectedVirtualColumns, + final DimFilter expectedFilter, + final boolean expectedResult + ) + { + final RexNode rexNode = rexBuilder.makeCall(op, exprs); + final VirtualColumnRegistry virtualColumnRegistry = VirtualColumnRegistry.create(rowSignature); + + final DimFilter filter = Expressions.toFilter(PLANNER_CONTEXT, rowSignature, virtualColumnRegistry, rexNode); + Assert.assertEquals("Filter for: " + rexNode, expectedFilter, filter); + + final List virtualColumns = + filter.getRequiredColumns() + .stream() + .map(virtualColumnRegistry::getVirtualColumn) + .filter(Objects::nonNull) + .sorted(Comparator.comparing(VirtualColumn::getOutputName)) + .collect(Collectors.toList()); + + Assert.assertEquals( + "Virtual columns for: " + rexNode, + expectedVirtualColumns.stream() + .sorted(Comparator.comparing(VirtualColumn::getOutputName)) + .collect(Collectors.toList()), + virtualColumns + ); + + final ValueMatcher matcher = expectedFilter.toFilter().makeMatcher( + RowBasedColumnSelectorFactory.create( + RowAdapters.standardRow(), + () -> new MapBasedRow(0L, bindings), + rowSignature, + false + ) + ); + + Assert.assertEquals("Result for: " + rexNode, expectedResult, matcher.matches()); + } } diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/expression/ExpressionsTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/expression/ExpressionsTest.java index d2370d793e9e..25034521f99d 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/expression/ExpressionsTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/expression/ExpressionsTest.java @@ -32,15 +32,19 @@ import org.apache.druid.common.config.NullHandling; import org.apache.druid.java.util.common.DateTimes; import org.apache.druid.java.util.common.IAE; +import org.apache.druid.query.expression.TestExprMacroTable; import org.apache.druid.query.extraction.RegexDimExtractionFn; +import org.apache.druid.query.filter.RegexDimFilter; import org.apache.druid.segment.column.RowSignature; import org.apache.druid.segment.column.ValueType; +import org.apache.druid.segment.virtual.ExpressionVirtualColumn; import org.apache.druid.sql.calcite.expression.builtin.DateTruncOperatorConversion; import org.apache.druid.sql.calcite.expression.builtin.LPadOperatorConversion; import org.apache.druid.sql.calcite.expression.builtin.LeftOperatorConversion; import org.apache.druid.sql.calcite.expression.builtin.ParseLongOperatorConversion; import org.apache.druid.sql.calcite.expression.builtin.RPadOperatorConversion; import org.apache.druid.sql.calcite.expression.builtin.RegexpExtractOperatorConversion; +import org.apache.druid.sql.calcite.expression.builtin.RegexpLikeOperatorConversion; import org.apache.druid.sql.calcite.expression.builtin.RepeatOperatorConversion; import org.apache.druid.sql.calcite.expression.builtin.ReverseOperatorConversion; import org.apache.druid.sql.calcite.expression.builtin.RightOperatorConversion; @@ -59,6 +63,7 @@ import org.junit.Test; import java.math.BigDecimal; +import java.util.Collections; import java.util.Map; public class ExpressionsTest extends ExpressionTestBase @@ -75,6 +80,7 @@ public class ExpressionsTest extends ExpressionTestBase .add("hexstr", ValueType.STRING) .add("intstr", ValueType.STRING) .add("spacey", ValueType.STRING) + .add("newliney", ValueType.STRING) .add("tstr", ValueType.STRING) .add("dstr", ValueType.STRING) .build(); @@ -90,6 +96,7 @@ public class ExpressionsTest extends ExpressionTestBase .put("hexstr", "EF") .put("intstr", "-100") .put("spacey", " hey there ") + .put("newliney", "beep\nboop") .put("tstr", "2000-02-03 04:05:06") .put("dstr", "2000-02-03") .build(); @@ -131,6 +138,50 @@ public void testCharacterLength() @Test public void testRegexpExtract() { + testHelper.testExpression( + new RegexpExtractOperatorConversion().calciteOperator(), + ImmutableList.of( + testHelper.makeInputRef("s"), + testHelper.makeLiteral("x(.)"), + testHelper.makeLiteral(1) + ), + DruidExpression.of( + SimpleExtraction.of("s", new RegexDimExtractionFn("x(.)", 1, true, null)), + "regexp_extract(\"s\",'x(.)',1)" + ), + null + ); + + testHelper.testExpression( + new RegexpExtractOperatorConversion().calciteOperator(), + ImmutableList.of( + testHelper.makeInputRef("s"), + testHelper.makeLiteral("(o)"), + testHelper.makeLiteral(1) + ), + DruidExpression.of( + SimpleExtraction.of("s", new RegexDimExtractionFn("(o)", 1, true, null)), + "regexp_extract(\"s\",'(o)',1)" + ), + + // Column "s" contains an 'o', but not at the beginning; we do match this. + "o" + ); + + testHelper.testExpression( + new RegexpExtractOperatorConversion().calciteOperator(), + ImmutableList.of( + testHelper.makeCall( + SqlStdOperatorTable.CONCAT, + testHelper.makeLiteral("Z"), + testHelper.makeInputRef("s") + ), + testHelper.makeLiteral("Zf(.)") + ), + DruidExpression.fromExpression("regexp_extract(concat('Z',\"s\"),'Zf(.)')"), + "Zfo" + ); + testHelper.testExpression( new RegexpExtractOperatorConversion().calciteOperator(), ImmutableList.of( @@ -157,6 +208,307 @@ public void testRegexpExtract() ), "fo" ); + + testHelper.testExpression( + new RegexpExtractOperatorConversion().calciteOperator(), + ImmutableList.of( + testHelper.makeInputRef("s"), + testHelper.makeLiteral("") + ), + DruidExpression.of( + SimpleExtraction.of("s", new RegexDimExtractionFn("", 0, true, null)), + "regexp_extract(\"s\",'')" + ), + NullHandling.emptyToNullIfNeeded("") + ); + + testHelper.testExpression( + new RegexpExtractOperatorConversion().calciteOperator(), + ImmutableList.of( + testHelper.makeInputRef("s"), + testHelper.makeLiteral("") + ), + DruidExpression.of( + SimpleExtraction.of("s", new RegexDimExtractionFn("", 0, true, null)), + "regexp_extract(\"s\",'')" + ), + NullHandling.emptyToNullIfNeeded("") + ); + + testHelper.testExpression( + new RegexpExtractOperatorConversion().calciteOperator(), + ImmutableList.of( + testHelper.makeNullLiteral(SqlTypeName.VARCHAR), + testHelper.makeLiteral("(.)") + ), + DruidExpression.fromExpression("regexp_extract(null,'(.)')"), + null + ); + + testHelper.testExpression( + new RegexpExtractOperatorConversion().calciteOperator(), + ImmutableList.of( + testHelper.makeNullLiteral(SqlTypeName.VARCHAR), + testHelper.makeLiteral("") + ), + DruidExpression.fromExpression("regexp_extract(null,'')"), + null + ); + + testHelper.testExpression( + new RegexpExtractOperatorConversion().calciteOperator(), + ImmutableList.of( + testHelper.makeNullLiteral(SqlTypeName.VARCHAR), + testHelper.makeLiteral("null") + ), + DruidExpression.fromExpression("regexp_extract(null,'null')"), + null + ); + } + + @Test + public void testRegexpLike() + { + testHelper.testExpression( + new RegexpLikeOperatorConversion().calciteOperator(), + ImmutableList.of( + testHelper.makeInputRef("s"), + testHelper.makeLiteral("f.") + ), + DruidExpression.fromExpression("regexp_like(\"s\",'f.')"), + 1L + ); + + testHelper.testExpression( + new RegexpLikeOperatorConversion().calciteOperator(), + ImmutableList.of( + testHelper.makeInputRef("s"), + testHelper.makeLiteral("o") + ), + DruidExpression.fromExpression("regexp_like(\"s\",'o')"), + + // Column "s" contains an 'o', but not at the beginning; we do match this. + 1L + ); + + testHelper.testExpression( + new RegexpLikeOperatorConversion().calciteOperator(), + ImmutableList.of( + testHelper.makeInputRef("s"), + testHelper.makeLiteral("x.") + ), + DruidExpression.fromExpression("regexp_like(\"s\",'x.')"), + 0L + ); + + testHelper.testExpression( + new RegexpLikeOperatorConversion().calciteOperator(), + ImmutableList.of( + testHelper.makeInputRef("s"), + testHelper.makeLiteral("") + ), + DruidExpression.fromExpression("regexp_like(\"s\",'')"), + 1L + ); + + testHelper.testExpression( + new RegexpLikeOperatorConversion().calciteOperator(), + ImmutableList.of( + testHelper.makeLiteral("beep\nboop"), + testHelper.makeLiteral("^beep$") + ), + DruidExpression.fromExpression("regexp_like('beep\\u000Aboop','^beep$')"), + 0L + ); + + testHelper.testExpression( + new RegexpLikeOperatorConversion().calciteOperator(), + ImmutableList.of( + testHelper.makeLiteral("beep\nboop"), + testHelper.makeLiteral("^beep\\nboop$") + ), + DruidExpression.fromExpression("regexp_like('beep\\u000Aboop','^beep\\u005Cnboop$')"), + 1L + ); + + testHelper.testExpression( + new RegexpLikeOperatorConversion().calciteOperator(), + ImmutableList.of( + testHelper.makeInputRef("newliney"), + testHelper.makeLiteral("^beep$") + ), + DruidExpression.fromExpression("regexp_like(\"newliney\",'^beep$')"), + 0L + ); + + testHelper.testExpression( + new RegexpLikeOperatorConversion().calciteOperator(), + ImmutableList.of( + testHelper.makeInputRef("newliney"), + testHelper.makeLiteral("^beep\\nboop$") + ), + DruidExpression.fromExpression("regexp_like(\"newliney\",'^beep\\u005Cnboop$')"), + 1L + ); + + testHelper.testExpression( + new RegexpLikeOperatorConversion().calciteOperator(), + ImmutableList.of( + testHelper.makeInputRef("newliney"), + testHelper.makeLiteral("boo") + ), + DruidExpression.fromExpression("regexp_like(\"newliney\",'boo')"), + 1L + ); + + testHelper.testExpression( + new RegexpLikeOperatorConversion().calciteOperator(), + ImmutableList.of( + testHelper.makeInputRef("newliney"), + testHelper.makeLiteral("^boo") + ), + DruidExpression.fromExpression("regexp_like(\"newliney\",'^boo')"), + 0L + ); + + testHelper.testExpression( + new RegexpLikeOperatorConversion().calciteOperator(), + ImmutableList.of( + testHelper.makeCall( + SqlStdOperatorTable.CONCAT, + testHelper.makeLiteral("Z"), + testHelper.makeInputRef("s") + ), + testHelper.makeLiteral("x(.)") + ), + DruidExpression.fromExpression("regexp_like(concat('Z',\"s\"),'x(.)')"), + 0L + ); + + testHelper.testExpression( + new RegexpLikeOperatorConversion().calciteOperator(), + ImmutableList.of( + testHelper.makeNullLiteral(SqlTypeName.VARCHAR), + testHelper.makeLiteral("(.)") + ), + DruidExpression.fromExpression("regexp_like(null,'(.)')"), + 0L + ); + + testHelper.testExpression( + new RegexpLikeOperatorConversion().calciteOperator(), + ImmutableList.of( + testHelper.makeNullLiteral(SqlTypeName.VARCHAR), + testHelper.makeLiteral("") + ), + DruidExpression.fromExpression("regexp_like(null,'')"), + + // In SQL-compatible mode, nulls don't match anything. Otherwise, they match like empty strings. + NullHandling.sqlCompatible() ? 0L : 1L + ); + + testHelper.testExpression( + new RegexpLikeOperatorConversion().calciteOperator(), + ImmutableList.of( + testHelper.makeNullLiteral(SqlTypeName.VARCHAR), + testHelper.makeLiteral("null") + ), + DruidExpression.fromExpression("regexp_like(null,'null')"), + 0L + ); + } + + @Test + public void testRegexpLikeAsFilter() + { + testHelper.testFilter( + new RegexpLikeOperatorConversion().calciteOperator(), + ImmutableList.of( + testHelper.makeInputRef("s"), + testHelper.makeLiteral("f.") + ), + Collections.emptyList(), + new RegexDimFilter("s", "f.", null), + true + ); + + testHelper.testFilter( + new RegexpLikeOperatorConversion().calciteOperator(), + ImmutableList.of( + testHelper.makeInputRef("s"), + testHelper.makeLiteral("o") + ), + Collections.emptyList(), + // Column "s" contains an 'o', but not at the beginning, so we don't match + new RegexDimFilter("s", "o", null), + true + ); + + testHelper.testFilter( + new RegexpLikeOperatorConversion().calciteOperator(), + ImmutableList.of( + testHelper.makeInputRef("s"), + testHelper.makeLiteral("x.") + ), + Collections.emptyList(), + new RegexDimFilter("s", "x.", null), + false + ); + + testHelper.testFilter( + new RegexpLikeOperatorConversion().calciteOperator(), + ImmutableList.of( + testHelper.makeInputRef("s"), + testHelper.makeLiteral("") + ), + Collections.emptyList(), + new RegexDimFilter("s", "", null), + true + ); + + testHelper.testFilter( + new RegexpLikeOperatorConversion().calciteOperator(), + ImmutableList.of( + testHelper.makeInputRef("newliney"), + testHelper.makeLiteral("^beep$") + ), + Collections.emptyList(), + new RegexDimFilter("newliney", "^beep$", null), + false + ); + + testHelper.testFilter( + new RegexpLikeOperatorConversion().calciteOperator(), + ImmutableList.of( + testHelper.makeInputRef("newliney"), + testHelper.makeLiteral("^beep\\nboop$") + ), + Collections.emptyList(), + new RegexDimFilter("newliney", "^beep\\nboop$", null), + true + ); + + testHelper.testFilter( + new RegexpLikeOperatorConversion().calciteOperator(), + ImmutableList.of( + testHelper.makeCall( + SqlStdOperatorTable.CONCAT, + testHelper.makeLiteral("Z"), + testHelper.makeInputRef("s") + ), + testHelper.makeLiteral("x(.)") + ), + ImmutableList.of( + new ExpressionVirtualColumn( + "v0", + "concat('Z',\"s\")", + ValueType.STRING, + TestExprMacroTable.INSTANCE + ) + ), + new RegexDimFilter("v0", "x(.)", null), + false + ); } @Test diff --git a/website/.spelling b/website/.spelling index ce11862db740..7452d1403f92 100644 --- a/website/.spelling +++ b/website/.spelling @@ -1069,6 +1069,7 @@ nextafter nvl parse_long regexp_extract +regexp_like result1 result2 rint