diff --git a/core/src/main/java/org/apache/druid/math/expr/BinaryLogicalOperatorExpr.java b/core/src/main/java/org/apache/druid/math/expr/BinaryLogicalOperatorExpr.java index f5960bc7c122..8ff150fdf042 100644 --- a/core/src/main/java/org/apache/druid/math/expr/BinaryLogicalOperatorExpr.java +++ b/core/src/main/java/org/apache/druid/math/expr/BinaryLogicalOperatorExpr.java @@ -74,7 +74,7 @@ public ExprType getOutputType(InputBindingInspector inspector) @Override public boolean canVectorize(InputBindingInspector inspector) { - return inspector.areNumeric(left, right) && inspector.canVectorize(left, right); + return inspector.canVectorize(left, right); } @Override @@ -130,7 +130,7 @@ public ExprType getOutputType(InputBindingInspector inspector) @Override public boolean canVectorize(InputBindingInspector inspector) { - return inspector.areNumeric(left, right) && inspector.canVectorize(left, right); + return inspector.canVectorize(left, right); } @Override @@ -185,7 +185,7 @@ public ExprType getOutputType(InputBindingInspector inspector) @Override public boolean canVectorize(InputBindingInspector inspector) { - return inspector.areNumeric(left, right) && inspector.canVectorize(left, right); + return inspector.canVectorize(left, right); } @Override @@ -241,7 +241,7 @@ public ExprType getOutputType(InputBindingInspector inspector) @Override public boolean canVectorize(InputBindingInspector inspector) { - return inspector.areNumeric(left, right) && inspector.canVectorize(left, right); + return inspector.canVectorize(left, right); } @Override @@ -296,7 +296,7 @@ public ExprType getOutputType(InputBindingInspector inspector) @Override public boolean canVectorize(InputBindingInspector inspector) { - return inspector.areNumeric(left, right) && inspector.canVectorize(left, right); + return inspector.canVectorize(left, right); } @Override @@ -351,7 +351,7 @@ public ExprType getOutputType(InputBindingInspector inspector) @Override public boolean canVectorize(InputBindingInspector inspector) { - return inspector.areNumeric(left, right) && inspector.canVectorize(left, right); + return inspector.canVectorize(left, right); } @Override diff --git a/core/src/main/java/org/apache/druid/math/expr/BinaryOperatorExpr.java b/core/src/main/java/org/apache/druid/math/expr/BinaryOperatorExpr.java index be347e3b7427..280d99b9d095 100644 --- a/core/src/main/java/org/apache/druid/math/expr/BinaryOperatorExpr.java +++ b/core/src/main/java/org/apache/druid/math/expr/BinaryOperatorExpr.java @@ -83,12 +83,6 @@ public BindingAnalysis analyzeInputs() @Override public ExprType getOutputType(InputBindingInspector inspector) { - if (left.isNullLiteral()) { - return right.getOutputType(inspector); - } - if (right.isNullLiteral()) { - return left.getOutputType(inspector); - } return ExprTypeConversion.operator(left.getOutputType(inspector), right.getOutputType(inspector)); } diff --git a/core/src/main/java/org/apache/druid/math/expr/ConstantExpr.java b/core/src/main/java/org/apache/druid/math/expr/ConstantExpr.java index 035823ed6bcc..dd64629e2696 100644 --- a/core/src/main/java/org/apache/druid/math/expr/ConstantExpr.java +++ b/core/src/main/java/org/apache/druid/math/expr/ConstantExpr.java @@ -35,20 +35,24 @@ * {@link Expr.ObjectBinding}. {@link ConstantExpr} are terminal nodes of an expression tree, and have no children * {@link Expr}. */ -abstract class ConstantExpr implements Expr +abstract class ConstantExpr implements Expr { final ExprType outputType; + @Nullable + final T value; - protected ConstantExpr(ExprType outputType) + protected ConstantExpr(ExprType outputType, @Nullable T value) { this.outputType = outputType; + this.value = value; } @Nullable @Override public ExprType getOutputType(InputBindingInspector inspector) { - return outputType; + // null isn't really a type, so don't claim anything + return value == null ? null : outputType; } @Override @@ -58,68 +62,47 @@ public boolean isLiteral() } @Override - public Expr visit(Shuttle shuttle) + public boolean isNullLiteral() { - return shuttle.visit(this); + return value == null; } @Override - public BindingAnalysis analyzeInputs() + public Object getLiteralValue() { - return new BindingAnalysis(); + return value; } @Override - public String stringify() - { - return toString(); - } -} - -/** - * Base class for typed 'null' value constants (or default value, depending on {@link NullHandling#sqlCompatible}) - */ -abstract class NullNumericConstantExpr extends ConstantExpr -{ - protected NullNumericConstantExpr(ExprType outputType) + public Expr visit(Shuttle shuttle) { - super(outputType); + return shuttle.visit(this); } @Override - public Object getLiteralValue() + public BindingAnalysis analyzeInputs() { - return null; + return new BindingAnalysis(); } @Override - public String toString() + public boolean canVectorize(InputBindingInspector inspector) { - return NULL_LITERAL; + return true; } - @Override - public boolean isNullLiteral() + public String stringify() { - return true; + return toString(); } } -class LongExpr extends ConstantExpr +class LongExpr extends ConstantExpr { - private final Long value; - LongExpr(Long value) { - super(ExprType.LONG); - this.value = Preconditions.checkNotNull(value, "value"); - } - - @Override - public Object getLiteralValue() - { - return value; + super(ExprType.LONG, Preconditions.checkNotNull(value, "value")); } @Override @@ -134,12 +117,6 @@ public ExprEval eval(ObjectBinding bindings) return ExprEval.ofLong(value); } - @Override - public boolean canVectorize(InputBindingInspector inspector) - { - return true; - } - @Override public ExprVectorProcessor buildVectorized(VectorInputBindingInspector inspector) { @@ -166,11 +143,11 @@ public int hashCode() } } -class NullLongExpr extends NullNumericConstantExpr +class NullLongExpr extends ConstantExpr { NullLongExpr() { - super(ExprType.LONG); + super(ExprType.LONG, null); } @Override @@ -179,12 +156,6 @@ public ExprEval eval(ObjectBinding bindings) return ExprEval.ofLong(null); } - @Override - public boolean canVectorize(InputBindingInspector inspector) - { - return true; - } - @Override public ExprVectorProcessor buildVectorized(VectorInputBindingInspector inspector) { @@ -202,22 +173,19 @@ public final boolean equals(Object obj) { return obj instanceof NullLongExpr; } -} -class LongArrayExpr extends ConstantExpr -{ - private final Long[] value; - - LongArrayExpr(Long[] value) + @Override + public String toString() { - super(ExprType.LONG_ARRAY); - this.value = Preconditions.checkNotNull(value, "value"); + return NULL_LITERAL; } +} - @Override - public Object getLiteralValue() +class LongArrayExpr extends ConstantExpr +{ + LongArrayExpr(Long[] value) { - return value; + super(ExprType.LONG_ARRAY, Preconditions.checkNotNull(value, "value")); } @Override @@ -232,6 +200,12 @@ public ExprEval eval(ObjectBinding bindings) return ExprEval.ofLongArray(value); } + @Override + public boolean canVectorize(InputBindingInspector inspector) + { + return false; + } + @Override public String stringify() { @@ -261,20 +235,11 @@ public int hashCode() } } -class DoubleExpr extends ConstantExpr +class DoubleExpr extends ConstantExpr { - private final Double value; - DoubleExpr(Double value) { - super(ExprType.DOUBLE); - this.value = Preconditions.checkNotNull(value, "value"); - } - - @Override - public Object getLiteralValue() - { - return value; + super(ExprType.DOUBLE, Preconditions.checkNotNull(value, "value")); } @Override @@ -289,17 +254,12 @@ public ExprEval eval(ObjectBinding bindings) return ExprEval.ofDouble(value); } - @Override - public boolean canVectorize(InputBindingInspector inspector) - { - return true; - } - @Override public ExprVectorProcessor buildVectorized(VectorInputBindingInspector inspector) { return VectorProcessors.constantDouble(value, inspector.getMaxVectorSize()); } + @Override public boolean equals(Object o) { @@ -320,11 +280,11 @@ public int hashCode() } } -class NullDoubleExpr extends NullNumericConstantExpr +class NullDoubleExpr extends ConstantExpr { NullDoubleExpr() { - super(ExprType.DOUBLE); + super(ExprType.DOUBLE, null); } @Override @@ -333,12 +293,6 @@ public ExprEval eval(ObjectBinding bindings) return ExprEval.ofDouble(null); } - @Override - public boolean canVectorize(InputBindingInspector inspector) - { - return true; - } - @Override public ExprVectorProcessor buildVectorized(VectorInputBindingInspector inspector) { @@ -356,22 +310,19 @@ public final boolean equals(Object obj) { return obj instanceof NullDoubleExpr; } -} -class DoubleArrayExpr extends ConstantExpr -{ - private final Double[] value; - - DoubleArrayExpr(Double[] value) + @Override + public String toString() { - super(ExprType.DOUBLE_ARRAY); - this.value = Preconditions.checkNotNull(value, "value"); + return NULL_LITERAL; } +} - @Override - public Object getLiteralValue() +class DoubleArrayExpr extends ConstantExpr +{ + DoubleArrayExpr(Double[] value) { - return value; + super(ExprType.DOUBLE_ARRAY, Preconditions.checkNotNull(value, "value")); } @Override @@ -386,6 +337,12 @@ public ExprEval eval(ObjectBinding bindings) return ExprEval.ofDoubleArray(value); } + @Override + public boolean canVectorize(InputBindingInspector inspector) + { + return false; + } + @Override public String stringify() { @@ -415,28 +372,11 @@ public int hashCode() } } -class StringExpr extends ConstantExpr +class StringExpr extends ConstantExpr { - @Nullable - private final String value; - StringExpr(@Nullable String value) { - super(ExprType.STRING); - this.value = NullHandling.emptyToNullIfNeeded(value); - } - - @Nullable - @Override - public Object getLiteralValue() - { - return value; - } - - @Override - public boolean isNullLiteral() - { - return value == null; + super(ExprType.STRING, NullHandling.emptyToNullIfNeeded(value)); } @Override @@ -451,12 +391,6 @@ public ExprEval eval(ObjectBinding bindings) return ExprEval.of(value); } - @Override - public boolean canVectorize(InputBindingInspector inspector) - { - return true; - } - @Override public ExprVectorProcessor buildVectorized(VectorInputBindingInspector inspector) { @@ -490,20 +424,11 @@ public int hashCode() } } -class StringArrayExpr extends ConstantExpr +class StringArrayExpr extends ConstantExpr { - private final String[] value; - StringArrayExpr(String[] value) { - super(ExprType.STRING_ARRAY); - this.value = Preconditions.checkNotNull(value, "value"); - } - - @Override - public Object getLiteralValue() - { - return value; + super(ExprType.STRING_ARRAY, Preconditions.checkNotNull(value, "value")); } @Override @@ -518,6 +443,12 @@ public ExprEval eval(ObjectBinding bindings) return ExprEval.ofStringArray(value); } + @Override + public boolean canVectorize(InputBindingInspector inspector) + { + return false; + } + @Override public String stringify() { diff --git a/core/src/main/java/org/apache/druid/math/expr/Expr.java b/core/src/main/java/org/apache/druid/math/expr/Expr.java index b80291cb0453..3c0e4c741c70 100644 --- a/core/src/main/java/org/apache/druid/math/expr/Expr.java +++ b/core/src/main/java/org/apache/druid/math/expr/Expr.java @@ -132,11 +132,17 @@ default String getBindingIfIdentifier() BindingAnalysis analyzeInputs(); /** - * Given an {@link InputBindingInspector}, compute what the output {@link ExprType} will be for this expression. A return - * value of null indicates that the given type information was not enough to resolve the output type, so the - * expression must be evaluated using default {@link #eval} handling where types are only known after evaluation, - * through {@link ExprEval#type}. - * @param inspector + * Given an {@link InputBindingInspector}, compute what the output {@link ExprType} will be for this expression. + * + * In the vectorized expression engine, if {@link #canVectorize(InputBindingInspector)} returns true, a return value + * of null MUST ONLY indicate that the expression has all null inputs (non-existent columns) or null constants for + * the entire expression. Otherwise, all vectorizable expressions must produce an output type to correctly operate + * with the vectorized engine. + * + * Outside of the context of vectorized expressions, a return value of null can also indicate that the given type + * information was not enough to resolve the output type, so the expression must be evaluated using default + * {@link #eval} handling where types are only known after evaluation, through {@link ExprEval#type}, such as + * transform expressions at ingestion time */ @Nullable default ExprType getOutputType(InputBindingInspector inspector) diff --git a/core/src/main/java/org/apache/druid/math/expr/ExprTypeConversion.java b/core/src/main/java/org/apache/druid/math/expr/ExprTypeConversion.java index e5f6388f1735..a5bd093ad777 100644 --- a/core/src/main/java/org/apache/druid/math/expr/ExprTypeConversion.java +++ b/core/src/main/java/org/apache/druid/math/expr/ExprTypeConversion.java @@ -26,18 +26,14 @@ public class ExprTypeConversion { - /** * Infer the output type of a list of possible 'conditional' expression outputs (where any of these could be the * output expression if the corresponding case matching expression evaluates to true) */ - static ExprType conditional(Expr.InputBindingInspector inspector, List args) + public static ExprType conditional(Expr.InputBindingInspector inspector, List args) { ExprType type = null; for (Expr arg : args) { - if (arg.isNullLiteral()) { - continue; - } if (type == null) { type = arg.getOutputType(inspector); } else { diff --git a/core/src/main/java/org/apache/druid/math/expr/Exprs.java b/core/src/main/java/org/apache/druid/math/expr/Exprs.java index 13783ee5d6eb..cdff172361f9 100644 --- a/core/src/main/java/org/apache/druid/math/expr/Exprs.java +++ b/core/src/main/java/org/apache/druid/math/expr/Exprs.java @@ -44,6 +44,11 @@ public static UnsupportedOperationException cannotVectorize() return new UOE("Unable to vectorize expression"); } + public static UnsupportedOperationException cannotVectorize(String msg) + { + return new UOE("Unable to vectorize expression: %s", msg); + } + /** * Decomposes any expr into a list of exprs that, if ANDed together, are equivalent to the input expr. * diff --git a/core/src/main/java/org/apache/druid/math/expr/Function.java b/core/src/main/java/org/apache/druid/math/expr/Function.java index fef0776ecff3..5322cd64cd79 100644 --- a/core/src/main/java/org/apache/druid/math/expr/Function.java +++ b/core/src/main/java/org/apache/druid/math/expr/Function.java @@ -1084,8 +1084,10 @@ protected ExprEval eval(final double x, final double y) @Override public ExprType getOutputType(Expr.InputBindingInspector inspector, List args) { - return ExprTypeConversion.integerMathFunction(args.get(0).getOutputType(inspector), args.get(1).getOutputType( - inspector)); + return ExprTypeConversion.integerMathFunction( + args.get(0).getOutputType(inspector), + args.get(1).getOutputType(inspector) + ); } @Override diff --git a/core/src/main/java/org/apache/druid/math/expr/IdentifierExpr.java b/core/src/main/java/org/apache/druid/math/expr/IdentifierExpr.java index 9b9027fe1ea0..a1423c1590b1 100644 --- a/core/src/main/java/org/apache/druid/math/expr/IdentifierExpr.java +++ b/core/src/main/java/org/apache/druid/math/expr/IdentifierExpr.java @@ -28,6 +28,7 @@ import org.apache.druid.math.expr.vector.ExprVectorProcessor; import javax.annotation.Nullable; +import java.util.Arrays; import java.util.Objects; /** @@ -149,13 +150,17 @@ public ExprVectorProcessor buildVectorized(VectorInputBindingInspector inspec ExprType inputType = inspector.getType(binding); if (inputType == null) { - // nil column, we can be anything, why not be a double - return new IdentifierVectorProcessor(ExprType.DOUBLE) + // nil column, we can be anything, so be a string because it's the most flexible + // (numbers will be populated with default values in default mode and non-null) + return new IdentifierVectorProcessor(ExprType.STRING) { @Override - public ExprEvalVector evalVector(VectorInputBinding bindings) + public ExprEvalVector evalVector(VectorInputBinding bindings) { - return new ExprEvalDoubleVector(bindings.getDoubleVector(binding), bindings.getNullVector(binding)); + // need to cast to string[] because null columns come out as object[] + return new ExprEvalStringVector( + Arrays.stream(bindings.getObjectVector(binding)).map(x -> (String) x).toArray(String[]::new) + ); } }; } diff --git a/core/src/main/java/org/apache/druid/math/expr/vector/BivariateFunctionVectorObjectProcessor.java b/core/src/main/java/org/apache/druid/math/expr/vector/BivariateFunctionVectorObjectProcessor.java new file mode 100644 index 000000000000..ff92b9f6a896 --- /dev/null +++ b/core/src/main/java/org/apache/druid/math/expr/vector/BivariateFunctionVectorObjectProcessor.java @@ -0,0 +1,84 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.math.expr.vector; + +import org.apache.druid.common.config.NullHandling; +import org.apache.druid.math.expr.Expr; + +import java.lang.reflect.Array; + +/** + * Base {@link ExprVectorProcessor} for expressions and functions with 2 'object' typed inputs (strings, arrays) + */ +public abstract class BivariateFunctionVectorObjectProcessor + implements ExprVectorProcessor +{ + final ExprVectorProcessor left; + final ExprVectorProcessor right; + final int maxVectorSize; + final TOutput outValues; + final boolean sqlCompatible = NullHandling.sqlCompatible(); + + protected BivariateFunctionVectorObjectProcessor( + ExprVectorProcessor left, + ExprVectorProcessor right, + int maxVectorSize, + TOutput outValues + ) + { + this.left = left; + this.right = right; + this.maxVectorSize = maxVectorSize; + this.outValues = outValues; + } + + @Override + public ExprEvalVector evalVector(Expr.VectorInputBinding bindings) + { + final ExprEvalVector lhs = left.evalVector(bindings); + final ExprEvalVector rhs = right.evalVector(bindings); + + final int currentSize = bindings.getCurrentVectorSize(); + + final TLeftInput leftInput = lhs.values(); + final TRightInput rightInput = rhs.values(); + + if (sqlCompatible) { + for (int i = 0; i < currentSize; i++) { + if (Array.get(leftInput, i) == null || Array.get(rightInput, i) == null) { + processNull(i); + } else { + processIndex(leftInput, rightInput, i); + } + } + } else { + for (int i = 0; i < currentSize; i++) { + processIndex(leftInput, rightInput, i); + } + } + return asEval(); + } + + abstract void processIndex(TLeftInput leftInput, TRightInput rightInput, int i); + + abstract void processNull(int i); + + abstract ExprEvalVector asEval(); +} diff --git a/core/src/main/java/org/apache/druid/math/expr/vector/CastToStringVectorProcessor.java b/core/src/main/java/org/apache/druid/math/expr/vector/CastToStringVectorProcessor.java index 5903d17ac25a..7be4cadd5a6e 100644 --- a/core/src/main/java/org/apache/druid/math/expr/vector/CastToStringVectorProcessor.java +++ b/core/src/main/java/org/apache/druid/math/expr/vector/CastToStringVectorProcessor.java @@ -22,6 +22,8 @@ import org.apache.druid.math.expr.Expr; import org.apache.druid.math.expr.ExprType; +import java.util.Arrays; + public final class CastToStringVectorProcessor extends CastToTypeVectorProcessor { public CastToStringVectorProcessor(ExprVectorProcessor delegate) @@ -33,7 +35,9 @@ public CastToStringVectorProcessor(ExprVectorProcessor delegate) public ExprEvalVector evalVector(Expr.VectorInputBinding bindings) { ExprEvalVector result = delegate.evalVector(bindings); - return new ExprEvalStringVector(result.asObjectVector(ExprType.STRING)); + return new ExprEvalStringVector( + Arrays.stream(result.getObjectVector()).map(x -> x != null ? x.toString() : null).toArray(String[]::new) + ); } @Override diff --git a/core/src/main/java/org/apache/druid/math/expr/vector/ExprEvalDoubleVector.java b/core/src/main/java/org/apache/druid/math/expr/vector/ExprEvalDoubleVector.java index 3eae05d4df36..8817c344ab25 100644 --- a/core/src/main/java/org/apache/druid/math/expr/vector/ExprEvalDoubleVector.java +++ b/core/src/main/java/org/apache/druid/math/expr/vector/ExprEvalDoubleVector.java @@ -19,7 +19,6 @@ package org.apache.druid.math.expr.vector; -import org.apache.druid.java.util.common.IAE; import org.apache.druid.math.expr.ExprType; import java.util.Arrays; @@ -56,19 +55,18 @@ public double[] getDoubleVector() } @Override - public E asObjectVector(ExprType type) + public Object[] getObjectVector() { - switch (type) { - case STRING: - String[] s = new String[values.length]; - if (nulls != null) { - for (int i = 0; i < values.length; i++) { - s[i] = nulls[i] ? null : String.valueOf(values[i]); - } - } - return (E) s; - default: - throw new IAE("Cannot convert %s to %s object vector", getType(), type); + Double[] objects = new Double[values.length]; + if (nulls != null) { + for (int i = 0; i < values.length; i++) { + objects[i] = nulls[i] ? null : values[i]; + } + } else { + for (int i = 0; i < values.length; i++) { + objects[i] = values[i]; + } } + return objects; } } diff --git a/core/src/main/java/org/apache/druid/math/expr/vector/ExprEvalLongVector.java b/core/src/main/java/org/apache/druid/math/expr/vector/ExprEvalLongVector.java index 3f91da475445..93f2cbab54dd 100644 --- a/core/src/main/java/org/apache/druid/math/expr/vector/ExprEvalLongVector.java +++ b/core/src/main/java/org/apache/druid/math/expr/vector/ExprEvalLongVector.java @@ -19,7 +19,6 @@ package org.apache.druid.math.expr.vector; -import org.apache.druid.java.util.common.IAE; import org.apache.druid.math.expr.ExprType; import javax.annotation.Nullable; @@ -51,19 +50,19 @@ public double[] getDoubleVector() } @Override - public E asObjectVector(ExprType type) + public Object[] getObjectVector() { - switch (type) { - case STRING: - String[] s = new String[values.length]; - if (nulls != null) { - for (int i = 0; i < values.length; i++) { - s[i] = nulls[i] ? null : String.valueOf(values[i]); - } - } - return (E) s; - default: - throw new IAE("Cannot convert %s to %s object vector", getType(), type); + Long[] objects = new Long[values.length]; + if (nulls != null) { + for (int i = 0; i < values.length; i++) { + objects[i] = nulls[i] ? null : values[i]; + } + } else { + for (int i = 0; i < values.length; i++) { + objects[i] = values[i]; + } } + return objects; } + } diff --git a/core/src/main/java/org/apache/druid/math/expr/vector/ExprEvalStringVector.java b/core/src/main/java/org/apache/druid/math/expr/vector/ExprEvalStringVector.java index 73a4a9501daf..304e70e5edb4 100644 --- a/core/src/main/java/org/apache/druid/math/expr/vector/ExprEvalStringVector.java +++ b/core/src/main/java/org/apache/druid/math/expr/vector/ExprEvalStringVector.java @@ -20,7 +20,6 @@ package org.apache.druid.math.expr.vector; import org.apache.druid.common.config.NullHandling; -import org.apache.druid.java.util.common.IAE; import org.apache.druid.math.expr.ExprEval; import org.apache.druid.math.expr.ExprType; @@ -91,19 +90,8 @@ public double[] getDoubleVector() } @Override - public E getObjectVector() + public Object[] getObjectVector() { - return (E) values; - } - - @Override - public E asObjectVector(ExprType type) - { - switch (type) { - case STRING: - return (E) values; - default: - throw new IAE("Cannot convert %s to %s object vector", getType(), type); - } + return values; } } diff --git a/core/src/main/java/org/apache/druid/math/expr/vector/ExprEvalVector.java b/core/src/main/java/org/apache/druid/math/expr/vector/ExprEvalVector.java index 7da17783e090..ddb607d36c34 100644 --- a/core/src/main/java/org/apache/druid/math/expr/vector/ExprEvalVector.java +++ b/core/src/main/java/org/apache/druid/math/expr/vector/ExprEvalVector.java @@ -69,11 +69,5 @@ public boolean[] getNullVector() public abstract double[] getDoubleVector(); - public E getObjectVector() - { - // non-primitives should implement this - throw new IllegalArgumentException("Object vector not available"); - } - - public abstract E asObjectVector(ExprType type); + public abstract Object[] getObjectVector(); } diff --git a/core/src/main/java/org/apache/druid/math/expr/vector/LongOutStringsInFunctionVectorProcessor.java b/core/src/main/java/org/apache/druid/math/expr/vector/LongOutStringsInFunctionVectorProcessor.java new file mode 100644 index 000000000000..e7ddb6d5e99a --- /dev/null +++ b/core/src/main/java/org/apache/druid/math/expr/vector/LongOutStringsInFunctionVectorProcessor.java @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.math.expr.vector; + +import org.apache.druid.math.expr.ExprType; + +import javax.annotation.Nullable; + +public abstract class LongOutStringsInFunctionVectorProcessor extends BivariateFunctionVectorObjectProcessor +{ + private final boolean[] outNulls; + + protected LongOutStringsInFunctionVectorProcessor( + ExprVectorProcessor left, + ExprVectorProcessor right, + int maxVectorSize + ) + { + super( + CastToTypeVectorProcessor.cast(left, ExprType.STRING), + CastToTypeVectorProcessor.cast(right, ExprType.STRING), + maxVectorSize, + new long[maxVectorSize] + ); + this.outNulls = new boolean[maxVectorSize]; + } + + @Nullable + abstract Long processValue(@Nullable String leftVal, @Nullable String rightVal); + + @Override + void processIndex(String[] strings, String[] strings2, int i) + { + final Long outVal = processValue(strings[i], strings2[i]); + if (outVal == null) { + outValues[i] = 0L; + outNulls[i] = true; + } else { + outValues[i] = outVal; + outNulls[i] = false; + } + } + + @Override + void processNull(int i) + { + outValues[i] = 0L; + outNulls[i] = true; + } + + @Override + ExprEvalVector asEval() + { + return new ExprEvalLongVector(outValues, outNulls); + } + + @Override + public ExprType getOutputType() + { + return ExprType.LONG; + } +} diff --git a/core/src/main/java/org/apache/druid/math/expr/vector/StringOutStringsInFunctionVectorProcessor.java b/core/src/main/java/org/apache/druid/math/expr/vector/StringOutStringsInFunctionVectorProcessor.java new file mode 100644 index 000000000000..b744b03743a2 --- /dev/null +++ b/core/src/main/java/org/apache/druid/math/expr/vector/StringOutStringsInFunctionVectorProcessor.java @@ -0,0 +1,72 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.math.expr.vector; + +import org.apache.druid.common.config.NullHandling; +import org.apache.druid.math.expr.ExprType; + +import javax.annotation.Nullable; + +public abstract class StringOutStringsInFunctionVectorProcessor + extends BivariateFunctionVectorObjectProcessor +{ + final boolean sqlCompatible = NullHandling.sqlCompatible(); + + protected StringOutStringsInFunctionVectorProcessor( + ExprVectorProcessor left, + ExprVectorProcessor right, + int maxVectorSize + ) + { + super( + CastToTypeVectorProcessor.cast(left, ExprType.STRING), + CastToTypeVectorProcessor.cast(right, ExprType.STRING), + maxVectorSize, + new String[maxVectorSize] + ); + } + + @Nullable + abstract String processValue(@Nullable String leftVal, @Nullable String rightVal); + + @Override + void processIndex(String[] strings, String[] strings2, int i) + { + outValues[i] = processValue(strings[i], strings2[i]); + } + + @Override + void processNull(int i) + { + outValues[i] = null; + } + + @Override + ExprEvalVector asEval() + { + return new ExprEvalStringVector(outValues); + } + + @Override + public ExprType getOutputType() + { + return ExprType.STRING; + } +} diff --git a/core/src/main/java/org/apache/druid/math/expr/vector/VectorComparisonProcessors.java b/core/src/main/java/org/apache/druid/math/expr/vector/VectorComparisonProcessors.java index cdcf2bf73358..712b6b924643 100644 --- a/core/src/main/java/org/apache/druid/math/expr/vector/VectorComparisonProcessors.java +++ b/core/src/main/java/org/apache/druid/math/expr/vector/VectorComparisonProcessors.java @@ -19,21 +19,82 @@ package org.apache.druid.math.expr.vector; +import org.apache.druid.java.util.common.guava.Comparators; import org.apache.druid.math.expr.Evals; import org.apache.druid.math.expr.Expr; +import org.apache.druid.math.expr.ExprType; + +import javax.annotation.Nullable; +import java.util.Objects; +import java.util.function.Supplier; public class VectorComparisonProcessors { + public static ExprVectorProcessor makeComparisonProcessor( + Expr.VectorInputBindingInspector inspector, + Expr left, + Expr right, + Supplier longOutStringsInFunctionVectorProcessor, + Supplier longOutLongsInProcessor, + Supplier doubleOutLongDoubleInProcessor, + Supplier doubleOutDoubleLongInProcessor, + Supplier doubleOutDoublesInProcessor + ) + { + final ExprType leftType = left.getOutputType(inspector); + final ExprType rightType = right.getOutputType(inspector); + ExprVectorProcessor processor = null; + if (leftType == ExprType.STRING) { + if (rightType == null || rightType == ExprType.STRING) { + processor = longOutStringsInFunctionVectorProcessor.get(); + } else { + processor = doubleOutDoublesInProcessor.get(); + } + } else if (leftType == null) { + if (rightType == ExprType.STRING || rightType == null) { + processor = longOutStringsInFunctionVectorProcessor.get(); + } + } else if (leftType == ExprType.DOUBLE || rightType == ExprType.DOUBLE) { + processor = doubleOutDoublesInProcessor.get(); + } + if (processor != null) { + return (ExprVectorProcessor) processor; + } + // fall through to normal math processor logic + return VectorMathProcessors.makeMathProcessor( + inspector, + left, + right, + longOutLongsInProcessor, + doubleOutLongDoubleInProcessor, + doubleOutDoubleLongInProcessor, + doubleOutDoublesInProcessor + ); + } + public static ExprVectorProcessor equal( Expr.VectorInputBindingInspector inspector, Expr left, Expr right ) { - return VectorMathProcessors.makeMathProcessor( + return makeComparisonProcessor( inspector, left, right, + () -> new LongOutStringsInFunctionVectorProcessor( + left.buildVectorized(inspector), + right.buildVectorized(inspector), + inspector.getMaxVectorSize() + ) + { + @Nullable + @Override + Long processValue(@Nullable String leftVal, @Nullable String rightVal) + { + return Evals.asLong(Objects.equals(leftVal, rightVal)); + } + }, () -> new LongOutLongsInFunctionVectorProcessor( left.buildVectorized(inspector), right.buildVectorized(inspector), @@ -91,10 +152,23 @@ public static ExprVectorProcessor notEqual( Expr right ) { - return VectorMathProcessors.makeMathProcessor( + return makeComparisonProcessor( inspector, left, right, + () -> new LongOutStringsInFunctionVectorProcessor( + left.buildVectorized(inspector), + right.buildVectorized(inspector), + inspector.getMaxVectorSize() + ) + { + @Nullable + @Override + Long processValue(@Nullable String leftVal, @Nullable String rightVal) + { + return Evals.asLong(!Objects.equals(leftVal, rightVal)); + } + }, () -> new LongOutLongsInFunctionVectorProcessor( left.buildVectorized(inspector), right.buildVectorized(inspector), @@ -152,10 +226,23 @@ public static ExprVectorProcessor greaterThanOrEqual( Expr right ) { - return VectorMathProcessors.makeMathProcessor( + return makeComparisonProcessor( inspector, left, right, + () -> new LongOutStringsInFunctionVectorProcessor( + left.buildVectorized(inspector), + right.buildVectorized(inspector), + inspector.getMaxVectorSize() + ) + { + @Nullable + @Override + Long processValue(@Nullable String leftVal, @Nullable String rightVal) + { + return Evals.asLong(Comparators.naturalNullsFirst().compare(leftVal, rightVal) >= 0); + } + }, () -> new LongOutLongsInFunctionVectorProcessor( left.buildVectorized(inspector), right.buildVectorized(inspector), @@ -213,10 +300,23 @@ public static ExprVectorProcessor greaterThan( Expr right ) { - return VectorMathProcessors.makeMathProcessor( + return makeComparisonProcessor( inspector, left, right, + () -> new LongOutStringsInFunctionVectorProcessor( + left.buildVectorized(inspector), + right.buildVectorized(inspector), + inspector.getMaxVectorSize() + ) + { + @Nullable + @Override + Long processValue(@Nullable String leftVal, @Nullable String rightVal) + { + return Evals.asLong(Comparators.naturalNullsFirst().compare(leftVal, rightVal) > 0); + } + }, () -> new LongOutLongsInFunctionVectorProcessor( left.buildVectorized(inspector), right.buildVectorized(inspector), @@ -274,10 +374,23 @@ public static ExprVectorProcessor lessThanOrEqual( Expr right ) { - return VectorMathProcessors.makeMathProcessor( + return makeComparisonProcessor( inspector, left, right, + () -> new LongOutStringsInFunctionVectorProcessor( + left.buildVectorized(inspector), + right.buildVectorized(inspector), + inspector.getMaxVectorSize() + ) + { + @Nullable + @Override + Long processValue(@Nullable String leftVal, @Nullable String rightVal) + { + return Evals.asLong(Comparators.naturalNullsFirst().compare(leftVal, rightVal) <= 0); + } + }, () -> new LongOutLongsInFunctionVectorProcessor( left.buildVectorized(inspector), right.buildVectorized(inspector), @@ -335,10 +448,23 @@ public static ExprVectorProcessor lessThan( Expr right ) { - return VectorMathProcessors.makeMathProcessor( + return makeComparisonProcessor( inspector, left, right, + () -> new LongOutStringsInFunctionVectorProcessor( + left.buildVectorized(inspector), + right.buildVectorized(inspector), + inspector.getMaxVectorSize() + ) + { + @Nullable + @Override + Long processValue(@Nullable String leftVal, @Nullable String rightVal) + { + return Evals.asLong(Comparators.naturalNullsFirst().compare(leftVal, rightVal) < 0); + } + }, () -> new LongOutLongsInFunctionVectorProcessor( left.buildVectorized(inspector), right.buildVectorized(inspector), diff --git a/core/src/main/java/org/apache/druid/math/expr/vector/VectorMathProcessors.java b/core/src/main/java/org/apache/druid/math/expr/vector/VectorMathProcessors.java index 0a170ac24969..0737f252f8b4 100644 --- a/core/src/main/java/org/apache/druid/math/expr/vector/VectorMathProcessors.java +++ b/core/src/main/java/org/apache/druid/math/expr/vector/VectorMathProcessors.java @@ -21,6 +21,7 @@ import com.google.common.math.LongMath; import com.google.common.primitives.Ints; +import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.math.expr.Expr; import org.apache.druid.math.expr.ExprType; import org.apache.druid.math.expr.Exprs; @@ -130,16 +131,24 @@ public static ExprVectorProcessor makeMathProcessor( if (leftType == ExprType.LONG) { if (rightType == null || rightType == ExprType.LONG) { processor = longOutLongsInProcessor.get(); - } else if (rightType == ExprType.DOUBLE) { + } else if (rightType == ExprType.STRING || rightType == ExprType.DOUBLE) { processor = doubleOutLongDoubleInProcessor.get(); } } else if (leftType == ExprType.DOUBLE) { if (rightType == ExprType.LONG) { processor = doubleOutDoubleLongInProcessor.get(); - } else if (rightType == null || rightType == ExprType.DOUBLE) { + } else if (rightType == null || rightType == ExprType.STRING || rightType == ExprType.DOUBLE) { processor = doubleOutDoublesInProcessor.get(); } } else if (leftType == null) { + if (rightType == ExprType.LONG) { + processor = longOutLongsInProcessor.get(); + } else if (rightType == ExprType.DOUBLE) { + processor = doubleOutLongDoubleInProcessor.get(); + } else if (rightType == null) { + processor = longOutLongsInProcessor.get(); + } + } else if (leftType == ExprType.STRING) { if (rightType == ExprType.LONG) { processor = longOutLongsInProcessor.get(); } else if (rightType == ExprType.DOUBLE) { @@ -147,7 +156,7 @@ public static ExprVectorProcessor makeMathProcessor( } } if (processor == null) { - throw Exprs.cannotVectorize(); + throw Exprs.cannotVectorize(StringUtils.format("%s %s", leftType, rightType)); } return (ExprVectorProcessor) processor; } @@ -677,9 +686,9 @@ public static ExprVectorProcessor doublePower( Expr right ) { + final ExprType leftType = left.getOutputType(inspector); + final ExprType rightType = right.getOutputType(inspector); BivariateFunctionVectorProcessor processor = null; - ExprType leftType = left.getOutputType(inspector); - ExprType rightType = right.getOutputType(inspector); if ((leftType == ExprType.LONG && (rightType == null || rightType == ExprType.LONG)) || (leftType == null && rightType == ExprType.LONG)) { processor = new DoubleOutLongsInFunctionVectorProcessor( diff --git a/core/src/test/java/org/apache/druid/math/expr/ExprTest.java b/core/src/test/java/org/apache/druid/math/expr/ExprTest.java index 21239e91bdde..25815591190b 100644 --- a/core/src/test/java/org/apache/druid/math/expr/ExprTest.java +++ b/core/src/test/java/org/apache/druid/math/expr/ExprTest.java @@ -161,6 +161,7 @@ public void testEqualsContractForStringArrayExpr() { EqualsVerifier.forClass(StringArrayExpr.class) .withIgnoredFields("outputType") + .withPrefabValues(Object.class, new String[]{"foo"}, new String[0]) .usingGetClass() .verify(); } @@ -170,6 +171,7 @@ public void testEqualsContractForLongArrayExpr() { EqualsVerifier.forClass(LongArrayExpr.class) .withIgnoredFields("outputType") + .withPrefabValues(Object.class, new Long[]{1L}, new Long[0]) .usingGetClass() .verify(); } @@ -179,6 +181,7 @@ public void testEqualsContractForDoubleArrayExpr() { EqualsVerifier.forClass(DoubleArrayExpr.class) .withIgnoredFields("outputType") + .withPrefabValues(Object.class, new Double[]{1.0}, new Double[0]) .usingGetClass() .verify(); } @@ -198,7 +201,7 @@ public void testEqualsContractForLambdaExpr() public void testEqualsContractForNullLongExpr() { EqualsVerifier.forClass(NullLongExpr.class) - .withIgnoredFields("outputType") + .withIgnoredFields("outputType", "value") .verify(); } @@ -206,7 +209,7 @@ public void testEqualsContractForNullLongExpr() public void testEqualsContractForNullDoubleExpr() { EqualsVerifier.forClass(NullDoubleExpr.class) - .withIgnoredFields("outputType") + .withIgnoredFields("outputType", "value") .verify(); } } diff --git a/core/src/test/java/org/apache/druid/math/expr/ParserTest.java b/core/src/test/java/org/apache/druid/math/expr/ParserTest.java index 4fd7c13e9afc..6998a0e93d37 100644 --- a/core/src/test/java/org/apache/druid/math/expr/ParserTest.java +++ b/core/src/test/java/org/apache/druid/math/expr/ParserTest.java @@ -55,7 +55,7 @@ public void testSimple() @Test public void testParseConstants() { - validateLiteral("null", ExprType.STRING, null); + validateLiteral("null", null, null); validateLiteral("'hello'", ExprType.STRING, "hello"); validateLiteral("'hello \\uD83E\\uDD18'", ExprType.STRING, "hello \uD83E\uDD18"); validateLiteral("1", ExprType.LONG, 1L); diff --git a/core/src/test/java/org/apache/druid/math/expr/VectorExprSanityTest.java b/core/src/test/java/org/apache/druid/math/expr/VectorExprSanityTest.java index f7d0668a6ce7..7fd55048e236 100644 --- a/core/src/test/java/org/apache/druid/math/expr/VectorExprSanityTest.java +++ b/core/src/test/java/org/apache/druid/math/expr/VectorExprSanityTest.java @@ -70,16 +70,31 @@ public void testUnaryOperators() } @Test - public void testBinaryOperators() + public void testBinaryMathOperators() { - final String[] columns = new String[]{"d1", "d2", "l1", "l2", "1", "1.0", "nonexistent"}; + final String[] columns = new String[]{"d1", "d2", "l1", "l2", "1", "1.0", "nonexistent", "null"}; final String[] columns2 = new String[]{"d1", "d2", "l1", "l2", "1", "1.0"}; final String[][] templateInputs = makeTemplateArgs(columns, columns2); final String[] templates = Arrays.stream(templateInputs) .map(i -> StringUtils.format("%s %s %s", i[0], "%s", i[1])) .toArray(String[]::new); - final String[] args = new String[]{"+", "-", "*", "/", "^", "%", ">", ">=", "<", "<=", "==", "!="}; + final String[] args = new String[]{"+", "-", "*", "/", "^", "%"}; + + testFunctions(types, templates, args); + } + + @Test + public void testBinaryComparisonOperators() + { + final String[] columns = new String[]{"d1", "d2", "l1", "l2", "1", "1.0", "s1", "s2", "nonexistent", "null"}; + final String[] columns2 = new String[]{"d1", "d2", "l1", "l2", "1", "1.0", "s1", "s2", "null"}; + final String[][] templateInputs = makeTemplateArgs(columns, columns2); + final String[] templates = + Arrays.stream(templateInputs) + .map(i -> StringUtils.format("%s %s %s", i[0], "%s", i[1])) + .toArray(String[]::new); + final String[] args = new String[]{">", ">=", "<", "<=", "==", "!="}; testFunctions(types, templates, args); } @@ -87,7 +102,7 @@ public void testBinaryOperators() @Test public void testBinaryOperatorTrees() { - final String[] columns = new String[]{"d1", "l1", "1", "1.0", "nonexistent"}; + final String[] columns = new String[]{"d1", "l1", "1", "1.0", "nonexistent", "null"}; final String[] columns2 = new String[]{"d2", "l2", "2", "2.0"}; final String[][] templateInputs = makeTemplateArgs(columns, columns2, columns2); final String[] templates = @@ -103,7 +118,7 @@ public void testBinaryOperatorTrees() public void testUnivariateFunctions() { final String[] functions = new String[]{"parse_long"}; - final String[] templates = new String[]{"%s(s1)", "%s(l1)", "%s(d1)", "%s(nonexistent)"}; + final String[] templates = new String[]{"%s(s1)", "%s(l1)", "%s(d1)", "%s(nonexistent)", "%s(null)"}; testFunctions(types, templates, functions); } @@ -142,7 +157,7 @@ public void testUnivariateMathFunctions() "bitwiseConvertDoubleToLongBits", "bitwiseConvertLongBitsToDouble" }; - final String[] templates = new String[]{"%s(l1)", "%s(d1)", "%s(pi())"}; + final String[] templates = new String[]{"%s(l1)", "%s(d1)", "%s(pi())", "%s(null)"}; testFunctions(types, templates, functions); } @@ -230,10 +245,14 @@ private static void testExpressionWithBindings( Assert.assertTrue(StringUtils.format("Cannot vectorize %s", expr), parsed.canVectorize(bindings.rhs)); ExprType outputType = parsed.getOutputType(bindings.rhs); ExprEvalVector vectorEval = parsed.buildVectorized(bindings.rhs).evalVector(bindings.rhs); - Assert.assertEquals(outputType, vectorEval.getType()); + // 'null' expressions can have an output type of null, but still evaluate in default mode, so skip type checks + if (outputType != null) { + Assert.assertEquals(outputType, vectorEval.getType()); + } for (int i = 0; i < VECTOR_SIZE; i++) { ExprEval eval = parsed.eval(bindings.lhs[i]); - if (!eval.isNumericNull()) { + // 'null' expressions can have an output type of null, but still evaluate in default mode, so skip type checks + if (outputType != null && !eval.isNumericNull()) { Assert.assertEquals(eval.type(), outputType); } Assert.assertEquals( diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/FilteredAggregatorFactory.java b/processing/src/main/java/org/apache/druid/query/aggregation/FilteredAggregatorFactory.java index afdde5b650d4..d8de2d7bc110 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/FilteredAggregatorFactory.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/FilteredAggregatorFactory.java @@ -111,7 +111,7 @@ public VectorAggregator factorizeVector(VectorColumnSelectorFactory columnSelect @Override public boolean canVectorize(ColumnInspector columnInspector) { - return delegate.canVectorize(columnInspector) && filter.canVectorizeMatcher(); + return delegate.canVectorize(columnInspector) && filter.canVectorizeMatcher(columnInspector); } @Override diff --git a/processing/src/main/java/org/apache/druid/query/dimension/DimensionSpec.java b/processing/src/main/java/org/apache/druid/query/dimension/DimensionSpec.java index 738c65df9143..5d245b2fe408 100644 --- a/processing/src/main/java/org/apache/druid/query/dimension/DimensionSpec.java +++ b/processing/src/main/java/org/apache/druid/query/dimension/DimensionSpec.java @@ -58,13 +58,34 @@ public interface DimensionSpec extends Cacheable @Nullable ExtractionFn getExtractionFn(); + /** + * Decorate a {@link DimensionSelector}, allowing custom transformation of underlying behavior (e.g. performing + * extraction functions in the case of {@link ExtractionDimensionSpec}, regex filtering in the case of + * {@link RegexFilteredDimensionSpec}, and so on). + */ DimensionSelector decorate(DimensionSelector selector); + /** + * Vectorized analog of {@link #decorate(DimensionSelector)} for {@link SingleValueDimensionVectorSelector}, most + * likely produced with + * {@link org.apache.druid.segment.vector.VectorColumnSelectorFactory#makeSingleValueDimensionSelector(DimensionSpec)} + * + * Decoration allows a {@link DimensionSpec} to customize the behavior of the underlying selector, for example + * transforming or filtering values. + */ default SingleValueDimensionVectorSelector decorate(SingleValueDimensionVectorSelector selector) { throw new UOE("DimensionSpec[%s] cannot vectorize", getClass().getName()); } + /** + * Vectorized analog of {@link #decorate(DimensionSelector) for {@link MultiValueDimensionVectorSelector}, most likely + * produced with + * {@link org.apache.druid.segment.vector.VectorColumnSelectorFactory#makeMultiValueDimensionSelector(DimensionSpec)} + * + * Decoration allows a {@link DimensionSpec} to customize the behavior of the underlying selector, for example + * transforming or filtering values. + */ default MultiValueDimensionVectorSelector decorate(MultiValueDimensionVectorSelector selector) { throw new UOE("DimensionSpec[%s] cannot vectorize", getClass().getName()); @@ -84,6 +105,10 @@ default boolean canVectorize() return false; } + /** + * If the {@link #decorate} methods alter the underlying behavior of the dimension selector, does this alteration + * preserve the original ordering? + */ boolean preservesOrdering(); /** diff --git a/processing/src/main/java/org/apache/druid/query/filter/DruidPredicateFactory.java b/processing/src/main/java/org/apache/druid/query/filter/DruidPredicateFactory.java index 62d32f90610d..aff4346c4350 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/DruidPredicateFactory.java +++ b/processing/src/main/java/org/apache/druid/query/filter/DruidPredicateFactory.java @@ -32,4 +32,20 @@ public interface DruidPredicateFactory DruidFloatPredicate makeFloatPredicate(); DruidDoublePredicate makeDoublePredicate(); + + /** + * Object predicate is currently only used by vectorized matchers for non-string object selectors. This currently + * means it will be used only if we encounter COMPLEX types, but will also include array types once they are more + * supported throughout the query engines. + * + * To preserve behavior with non-vectorized matchers which use a string predicate with null inputs for these 'nil' + * matchers, we do the same thing here. + * + * @see org.apache.druid.segment.VectorColumnProcessorFactory#makeObjectProcessor + */ + default Predicate makeObjectPredicate() + { + final Predicate stringPredicate = makeStringPredicate(); + return o -> stringPredicate.apply(null); + } } diff --git a/processing/src/main/java/org/apache/druid/query/filter/Filter.java b/processing/src/main/java/org/apache/druid/query/filter/Filter.java index 097c61ed9030..c554e8f0280f 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/Filter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/Filter.java @@ -25,6 +25,7 @@ import org.apache.druid.query.BitmapResultFactory; import org.apache.druid.query.DefaultBitmapResultFactory; import org.apache.druid.query.filter.vector.VectorValueMatcher; +import org.apache.druid.segment.ColumnInspector; import org.apache.druid.segment.ColumnSelector; import org.apache.druid.segment.ColumnSelectorFactory; import org.apache.druid.segment.vector.VectorColumnSelectorFactory; @@ -154,8 +155,9 @@ default VectorValueMatcher makeVectorMatcher(VectorColumnSelectorFactory factory /** * Returns true if this filter can produce a vectorized matcher from its "makeVectorMatcher" method. + * @param inspector Supplies type information for the selectors this filter will match against */ - default boolean canVectorizeMatcher() + default boolean canVectorizeMatcher(ColumnInspector inspector) { return false; } diff --git a/processing/src/main/java/org/apache/druid/query/filter/InDimFilter.java b/processing/src/main/java/org/apache/druid/query/filter/InDimFilter.java index 80e2c03d8d33..ca12c2c19809 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/InDimFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/InDimFilter.java @@ -55,6 +55,7 @@ import org.apache.druid.query.filter.vector.VectorValueMatcherColumnProcessorFactory; import org.apache.druid.query.lookup.LookupExtractionFn; import org.apache.druid.query.lookup.LookupExtractor; +import org.apache.druid.segment.ColumnInspector; import org.apache.druid.segment.ColumnProcessors; import org.apache.druid.segment.ColumnSelector; import org.apache.druid.segment.ColumnSelectorFactory; @@ -321,7 +322,7 @@ public VectorValueMatcher makeVectorMatcher(final VectorColumnSelectorFactory fa } @Override - public boolean canVectorizeMatcher() + public boolean canVectorizeMatcher(ColumnInspector inspector) { return true; } diff --git a/processing/src/main/java/org/apache/druid/query/filter/vector/NilVectorValueMatcher.java b/processing/src/main/java/org/apache/druid/query/filter/vector/NilVectorValueMatcher.java deleted file mode 100644 index badf4b9af0e2..000000000000 --- a/processing/src/main/java/org/apache/druid/query/filter/vector/NilVectorValueMatcher.java +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.query.filter.vector; - -import org.apache.druid.query.filter.DruidPredicateFactory; -import org.apache.druid.segment.vector.VectorSizeInspector; - -import javax.annotation.Nullable; - -/** - * Treats all rows as null. - */ -public class NilVectorValueMatcher implements VectorValueMatcherFactory -{ - private final VectorSizeInspector vectorInspector; - - public NilVectorValueMatcher(final VectorSizeInspector vectorInspector) - { - this.vectorInspector = vectorInspector; - } - - @Override - public VectorValueMatcher makeMatcher(@Nullable String value) - { - return BooleanVectorValueMatcher.of(vectorInspector, value == null); - } - - @Override - public VectorValueMatcher makeMatcher(DruidPredicateFactory predicateFactory) - { - return BooleanVectorValueMatcher.of(vectorInspector, predicateFactory.makeStringPredicate().apply(null)); - } -} diff --git a/processing/src/main/java/org/apache/druid/query/filter/vector/ObjectVectorValueMatcher.java b/processing/src/main/java/org/apache/druid/query/filter/vector/ObjectVectorValueMatcher.java new file mode 100644 index 000000000000..05f30ce8fbef --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/filter/vector/ObjectVectorValueMatcher.java @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.filter.vector; + +import com.google.common.base.Predicate; +import org.apache.druid.query.filter.DruidPredicateFactory; +import org.apache.druid.segment.vector.VectorObjectSelector; + +import javax.annotation.Nullable; + +/** + * Generic object matcher on top of a {@link VectorObjectSelector}. String object selectors specifically should use + * {@link StringObjectVectorValueMatcher} instead. + * + * Note that this matcher currently will always behave as a nil matcher when used with a string matcher, since there is + * not enough machinery in place to allow custom comparison of string values against arbitrary array or complex types. + * In other words, how do we compare a string value from a selector filter against an array or a sketch? The short + * answer is we can't right now. Perhaps complex type serde could be extended to provide a string matcher to handle the + * complex type case? For array types, we might consider creating a different filter (which would also be an option for + * complex 'selectors'), or extending selector to support arrays of values. + */ +public class ObjectVectorValueMatcher implements VectorValueMatcherFactory +{ + protected final VectorObjectSelector selector; + + public ObjectVectorValueMatcher(final VectorObjectSelector selector) + { + this.selector = selector; + } + + @Override + public VectorValueMatcher makeMatcher(@Nullable String value) + { + // return a traditional nil matcher, as is the custom of our people + return BooleanVectorValueMatcher.of(selector, value == null); + } + + @Override + public VectorValueMatcher makeMatcher(DruidPredicateFactory predicateFactory) + { + final Predicate predicate = predicateFactory.makeObjectPredicate(); + + return new BaseVectorValueMatcher(selector) + { + final VectorMatch match = VectorMatch.wrap(new int[selector.getMaxVectorSize()]); + + @Override + public ReadableVectorMatch match(final ReadableVectorMatch mask) + { + final Object[] vector = selector.getObjectVector(); + final int[] selection = match.getSelection(); + + int numRows = 0; + + for (int i = 0; i < mask.getSelectionSize(); i++) { + final int rowNum = mask.getSelection()[i]; + if (predicate.apply(vector[rowNum])) { + selection[numRows++] = rowNum; + } + } + + match.setSelectionSize(numRows); + assert match.isValid(mask); + return match; + } + }; + } +} diff --git a/processing/src/main/java/org/apache/druid/query/filter/vector/StringObjectVectorValueMatcher.java b/processing/src/main/java/org/apache/druid/query/filter/vector/StringObjectVectorValueMatcher.java new file mode 100644 index 000000000000..6e5e09dbc253 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/filter/vector/StringObjectVectorValueMatcher.java @@ -0,0 +1,100 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.filter.vector; + +import com.google.common.base.Predicate; +import org.apache.druid.query.filter.DruidPredicateFactory; +import org.apache.druid.segment.vector.VectorObjectSelector; + +import javax.annotation.Nullable; +import java.util.Objects; + +/** + * String matcher on top of a {@link VectorObjectSelector} + */ +public class StringObjectVectorValueMatcher implements VectorValueMatcherFactory +{ + protected final VectorObjectSelector selector; + + public StringObjectVectorValueMatcher(final VectorObjectSelector selector) + { + this.selector = selector; + } + + @Override + public VectorValueMatcher makeMatcher(@Nullable String value) + { + return new BaseVectorValueMatcher(selector) + { + final VectorMatch match = VectorMatch.wrap(new int[selector.getMaxVectorSize()]); + + @Override + public ReadableVectorMatch match(final ReadableVectorMatch mask) + { + final Object[] vector = selector.getObjectVector(); + final int[] selection = match.getSelection(); + + int numRows = 0; + + for (int i = 0; i < mask.getSelectionSize(); i++) { + final int rowNum = mask.getSelection()[i]; + if (Objects.equals(value, vector[rowNum])) { + selection[numRows++] = rowNum; + } + } + + match.setSelectionSize(numRows); + assert match.isValid(mask); + return match; + } + }; + } + + @Override + public VectorValueMatcher makeMatcher(DruidPredicateFactory predicateFactory) + { + final Predicate predicate = predicateFactory.makeStringPredicate(); + + return new BaseVectorValueMatcher(selector) + { + final VectorMatch match = VectorMatch.wrap(new int[selector.getMaxVectorSize()]); + + @Override + public ReadableVectorMatch match(final ReadableVectorMatch mask) + { + final Object[] vector = selector.getObjectVector(); + final int[] selection = match.getSelection(); + + int numRows = 0; + + for (int i = 0; i < mask.getSelectionSize(); i++) { + final int rowNum = mask.getSelection()[i]; + if (predicate.apply((String) vector[rowNum])) { + selection[numRows++] = rowNum; + } + } + + match.setSelectionSize(numRows); + assert match.isValid(mask); + return match; + } + }; + } +} diff --git a/processing/src/main/java/org/apache/druid/query/filter/vector/VectorValueMatcherColumnProcessorFactory.java b/processing/src/main/java/org/apache/druid/query/filter/vector/VectorValueMatcherColumnProcessorFactory.java index 4d30c2d6de2e..b95ebc0c5ff0 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/vector/VectorValueMatcherColumnProcessorFactory.java +++ b/processing/src/main/java/org/apache/druid/query/filter/vector/VectorValueMatcherColumnProcessorFactory.java @@ -21,6 +21,7 @@ import org.apache.druid.segment.VectorColumnProcessorFactory; import org.apache.druid.segment.column.ColumnCapabilities; +import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.vector.MultiValueDimensionVectorSelector; import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector; import org.apache.druid.segment.vector.VectorObjectSelector; @@ -91,6 +92,9 @@ public VectorValueMatcherFactory makeObjectProcessor( final VectorObjectSelector selector ) { - return new NilVectorValueMatcher(selector); + if (ValueType.STRING.equals(capabilities.getType())) { + return new StringObjectVectorValueMatcher(selector); + } + return new ObjectVectorValueMatcher(selector); } } diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/GroupByVectorColumnProcessorFactory.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/GroupByVectorColumnProcessorFactory.java index 53748dfcc5df..6f332def41b3 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/GroupByVectorColumnProcessorFactory.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/GroupByVectorColumnProcessorFactory.java @@ -65,7 +65,9 @@ public GroupByVectorColumnSelector makeMultiValueDimensionProcessor( ValueType.STRING == capabilities.getType(), "groupBy dimension processors must be STRING typed" ); - throw new UnsupportedOperationException("Multi-value dimensions not yet implemented for vectorized groupBys"); + throw new UnsupportedOperationException( + "Vectorized groupBys on multi-value dictionary-encoded dimensions are not yet implemented" + ); } @Override @@ -110,6 +112,11 @@ public GroupByVectorColumnSelector makeObjectProcessor( final VectorObjectSelector selector ) { + if (ValueType.STRING.equals(capabilities.getType())) { + throw new UnsupportedOperationException( + "Vectorized groupBys on non-dictionary encoded string columns with object selectors are not yet implemented" + ); + } return NilGroupByVectorColumnSelector.INSTANCE; } } diff --git a/processing/src/main/java/org/apache/druid/segment/ColumnProcessors.java b/processing/src/main/java/org/apache/druid/segment/ColumnProcessors.java index e2857a1a630d..18307121d2e9 100644 --- a/processing/src/main/java/org/apache/druid/segment/ColumnProcessors.java +++ b/processing/src/main/java/org/apache/druid/segment/ColumnProcessors.java @@ -318,6 +318,14 @@ private static T makeVectorProcessorInternal( switch (capabilities.getType()) { case STRING: + // if column is not uniquely dictionary encoded, use an object selector + if (capabilities.isDictionaryEncoded().isFalse() || capabilities.areDictionaryValuesUnique().isFalse()) { + return processorFactory.makeObjectProcessor( + capabilities, + objectSelectorFn.apply(selectorFactory) + ); + } + if (mayBeMultiValue(capabilities)) { return processorFactory.makeMultiValueDimensionProcessor( capabilities, diff --git a/processing/src/main/java/org/apache/druid/segment/QueryableIndexStorageAdapter.java b/processing/src/main/java/org/apache/druid/segment/QueryableIndexStorageAdapter.java index 8d9720673e6b..cbfe79b84367 100644 --- a/processing/src/main/java/org/apache/druid/segment/QueryableIndexStorageAdapter.java +++ b/processing/src/main/java/org/apache/druid/segment/QueryableIndexStorageAdapter.java @@ -210,8 +210,7 @@ public boolean canVectorize( { if (filter != null) { final boolean filterCanVectorize = - filter.shouldUseBitmapIndex(makeBitmapIndexSelector(virtualColumns)) - || filter.canVectorizeMatcher(); + filter.shouldUseBitmapIndex(makeBitmapIndexSelector(virtualColumns)) || filter.canVectorizeMatcher(this); if (!filterCanVectorize) { return false; diff --git a/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilitiesImpl.java b/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilitiesImpl.java index 784986a55b6a..7b3dd448c5d3 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilitiesImpl.java +++ b/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilitiesImpl.java @@ -177,6 +177,21 @@ public static ColumnCapabilitiesImpl createSimpleNumericColumnCapabilities(Value return builder; } + /** + * Simple, single valued, non dictionary encoded string without bitmap index or anything fancy + */ + public static ColumnCapabilitiesImpl createSimpleSingleValueStringColumnCapabilities() + { + return new ColumnCapabilitiesImpl().setType(ValueType.STRING) + .setHasMultipleValues(false) + .setHasBitmapIndexes(false) + .setDictionaryEncoded(false) + .setDictionaryValuesSorted(false) + .setDictionaryValuesUnique(false) + .setHasSpatialIndexes(false) + .setHasNulls(true); + } + /** * Similar to {@link #createSimpleNumericColumnCapabilities} except {@link #hasMultipleValues} is explicitly true * and {@link #hasNulls} is not set diff --git a/processing/src/main/java/org/apache/druid/segment/filter/AndFilter.java b/processing/src/main/java/org/apache/druid/segment/filter/AndFilter.java index 9f03f254a21f..eb27ec8928ac 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/AndFilter.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/AndFilter.java @@ -36,6 +36,7 @@ import org.apache.druid.query.filter.vector.ReadableVectorMatch; import org.apache.druid.query.filter.vector.VectorValueMatcher; import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; +import org.apache.druid.segment.ColumnInspector; import org.apache.druid.segment.ColumnSelectorFactory; import org.apache.druid.segment.vector.VectorColumnSelectorFactory; @@ -132,9 +133,9 @@ public VectorValueMatcher makeVectorMatcher(final VectorColumnSelectorFactory fa } @Override - public boolean canVectorizeMatcher() + public boolean canVectorizeMatcher(ColumnInspector inspector) { - return filters.stream().allMatch(Filter::canVectorizeMatcher); + return filters.stream().allMatch(filter -> filter.canVectorizeMatcher(inspector)); } @Override diff --git a/processing/src/main/java/org/apache/druid/segment/filter/BoundFilter.java b/processing/src/main/java/org/apache/druid/segment/filter/BoundFilter.java index 424bfe4e85e9..73c10e71a18e 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/BoundFilter.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/BoundFilter.java @@ -41,6 +41,7 @@ import org.apache.druid.query.filter.vector.VectorValueMatcher; import org.apache.druid.query.filter.vector.VectorValueMatcherColumnProcessorFactory; import org.apache.druid.query.ordering.StringComparators; +import org.apache.druid.segment.ColumnInspector; import org.apache.druid.segment.ColumnProcessors; import org.apache.druid.segment.ColumnSelector; import org.apache.druid.segment.ColumnSelectorFactory; @@ -137,7 +138,7 @@ public VectorValueMatcher makeVectorMatcher(final VectorColumnSelectorFactory fa } @Override - public boolean canVectorizeMatcher() + public boolean canVectorizeMatcher(ColumnInspector inspector) { return true; } diff --git a/processing/src/main/java/org/apache/druid/segment/filter/DimensionPredicateFilter.java b/processing/src/main/java/org/apache/druid/segment/filter/DimensionPredicateFilter.java index bba9d552346f..d2f85190c9a9 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/DimensionPredicateFilter.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/DimensionPredicateFilter.java @@ -36,6 +36,7 @@ import org.apache.druid.query.filter.ValueMatcher; import org.apache.druid.query.filter.vector.VectorValueMatcher; import org.apache.druid.query.filter.vector.VectorValueMatcherColumnProcessorFactory; +import org.apache.druid.segment.ColumnInspector; import org.apache.druid.segment.ColumnProcessors; import org.apache.druid.segment.ColumnSelector; import org.apache.druid.segment.ColumnSelectorFactory; @@ -107,7 +108,7 @@ public VectorValueMatcher makeVectorMatcher(final VectorColumnSelectorFactory fa } @Override - public boolean canVectorizeMatcher() + public boolean canVectorizeMatcher(ColumnInspector inspector) { return true; } diff --git a/processing/src/main/java/org/apache/druid/segment/filter/ExpressionFilter.java b/processing/src/main/java/org/apache/druid/segment/filter/ExpressionFilter.java index 51cfbf56c312..9fcaae8ffebb 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/ExpressionFilter.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/ExpressionFilter.java @@ -19,24 +19,39 @@ package org.apache.druid.segment.filter; +import com.google.common.base.Predicate; import com.google.common.base.Supplier; import com.google.common.base.Suppliers; import com.google.common.collect.Iterables; import org.apache.druid.common.config.NullHandling; +import org.apache.druid.java.util.common.UOE; import org.apache.druid.math.expr.Evals; import org.apache.druid.math.expr.Expr; import org.apache.druid.math.expr.ExprEval; +import org.apache.druid.math.expr.ExprType; import org.apache.druid.query.BitmapResultFactory; import org.apache.druid.query.expression.ExprUtils; import org.apache.druid.query.filter.BitmapIndexSelector; +import org.apache.druid.query.filter.DruidDoublePredicate; +import org.apache.druid.query.filter.DruidFloatPredicate; +import org.apache.druid.query.filter.DruidLongPredicate; +import org.apache.druid.query.filter.DruidPredicateFactory; import org.apache.druid.query.filter.Filter; import org.apache.druid.query.filter.FilterTuning; import org.apache.druid.query.filter.ValueMatcher; +import org.apache.druid.query.filter.vector.BooleanVectorValueMatcher; +import org.apache.druid.query.filter.vector.VectorValueMatcher; +import org.apache.druid.query.filter.vector.VectorValueMatcherColumnProcessorFactory; import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; +import org.apache.druid.segment.ColumnInspector; import org.apache.druid.segment.ColumnSelector; import org.apache.druid.segment.ColumnSelectorFactory; import org.apache.druid.segment.ColumnValueSelector; +import org.apache.druid.segment.column.ColumnCapabilitiesImpl; +import org.apache.druid.segment.column.ValueType; +import org.apache.druid.segment.vector.VectorColumnSelectorFactory; import org.apache.druid.segment.virtual.ExpressionSelectors; +import org.apache.druid.segment.virtual.ExpressionVectorSelectors; import java.util.Arrays; import java.util.Objects; @@ -55,6 +70,105 @@ public ExpressionFilter(final Supplier expr, final FilterTuning filterTuni this.filterTuning = filterTuning; } + @Override + public boolean canVectorizeMatcher(ColumnInspector inspector) + { + return expr.get().canVectorize(inspector); + } + + @Override + public VectorValueMatcher makeVectorMatcher(VectorColumnSelectorFactory factory) + { + final Expr theExpr = expr.get(); + + DruidPredicateFactory predicateFactory = new DruidPredicateFactory() + { + @Override + public Predicate makeStringPredicate() + { + return Evals::asBoolean; + } + + @Override + public DruidLongPredicate makeLongPredicate() + { + return Evals::asBoolean; + } + + @Override + public DruidFloatPredicate makeFloatPredicate() + { + return Evals::asBoolean; + } + + @Override + public DruidDoublePredicate makeDoublePredicate() + { + return Evals::asBoolean; + } + + // The hashcode and equals are to make SubclassesMustOverrideEqualsAndHashCodeTest stop complaining.. + // DruidPredicateFactory currently doesn't really need equals or hashcode since 'toString' method that is actually + // called when testing equality of DimensionPredicateFilter, so it's the truly required method, but that seems + // a bit strange. DimensionPredicateFilter should probably be reworked to use equals from DruidPredicateFactory + // instead of using toString. + @Override + public int hashCode() + { + return super.hashCode(); + } + + @Override + public boolean equals(Object obj) + { + return super.equals(obj); + } + }; + + + final ExprType outputType = theExpr.getOutputType(factory); + + // for all vectorizable expressions, outputType will only ever be null in cases where there is absolutely no + // input type information, so composed entirely of null constants or missing columns. the expression is + // effectively constant + if (outputType == null) { + + // in sql compatible mode, this means no matches ever because null doesn't equal anything so just use the + // false matcher + if (NullHandling.sqlCompatible()) { + return BooleanVectorValueMatcher.of(factory.getReadableVectorInspector(), false); + } + // however in default mode, we still need to evaluate the expression since it might end up... strange, from + // default values. Since it is effectively constant though, we can just do that up front and decide if it matches + // or not. + return BooleanVectorValueMatcher.of( + factory.getReadableVectorInspector(), + theExpr.eval(ExprUtils.nilBindings()).asBoolean() + ); + } + + // if we got here, we really have to evaluate the expressions to match + switch (outputType) { + case LONG: + return VectorValueMatcherColumnProcessorFactory.instance().makeLongProcessor( + ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.LONG), + ExpressionVectorSelectors.makeVectorValueSelector(factory, theExpr) + ).makeMatcher(predicateFactory); + case DOUBLE: + return VectorValueMatcherColumnProcessorFactory.instance().makeDoubleProcessor( + ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.DOUBLE), + ExpressionVectorSelectors.makeVectorValueSelector(factory, theExpr) + ).makeMatcher(predicateFactory); + case STRING: + return VectorValueMatcherColumnProcessorFactory.instance().makeObjectProcessor( + ColumnCapabilitiesImpl.createSimpleSingleValueStringColumnCapabilities(), + ExpressionVectorSelectors.makeVectorObjectSelector(factory, theExpr) + ).makeMatcher(predicateFactory); + default: + throw new UOE("Vectorized expression matchers not implemented for type: [%s]", outputType); + } + } + @Override public ValueMatcher makeMatcher(final ColumnSelectorFactory factory) { diff --git a/processing/src/main/java/org/apache/druid/segment/filter/FalseFilter.java b/processing/src/main/java/org/apache/druid/segment/filter/FalseFilter.java index 27223d92fbc2..4760b7297a00 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/FalseFilter.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/FalseFilter.java @@ -25,6 +25,7 @@ import org.apache.druid.query.filter.ValueMatcher; import org.apache.druid.query.filter.vector.BooleanVectorValueMatcher; import org.apache.druid.query.filter.vector.VectorValueMatcher; +import org.apache.druid.segment.ColumnInspector; import org.apache.druid.segment.ColumnSelector; import org.apache.druid.segment.ColumnSelectorFactory; import org.apache.druid.segment.vector.VectorColumnSelectorFactory; @@ -89,7 +90,7 @@ public boolean supportsSelectivityEstimation(ColumnSelector columnSelector, Bitm } @Override - public boolean canVectorizeMatcher() + public boolean canVectorizeMatcher(ColumnInspector inspector) { return true; } diff --git a/processing/src/main/java/org/apache/druid/segment/filter/LikeFilter.java b/processing/src/main/java/org/apache/druid/segment/filter/LikeFilter.java index 473ef9527ac9..a0dac33a456e 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/LikeFilter.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/LikeFilter.java @@ -35,6 +35,7 @@ import org.apache.druid.query.filter.ValueMatcher; import org.apache.druid.query.filter.vector.VectorValueMatcher; import org.apache.druid.query.filter.vector.VectorValueMatcherColumnProcessorFactory; +import org.apache.druid.segment.ColumnInspector; import org.apache.druid.segment.ColumnProcessors; import org.apache.druid.segment.ColumnSelector; import org.apache.druid.segment.ColumnSelectorFactory; @@ -99,7 +100,7 @@ public VectorValueMatcher makeVectorMatcher(final VectorColumnSelectorFactory fa } @Override - public boolean canVectorizeMatcher() + public boolean canVectorizeMatcher(ColumnInspector inspector) { return true; } diff --git a/processing/src/main/java/org/apache/druid/segment/filter/NotFilter.java b/processing/src/main/java/org/apache/druid/segment/filter/NotFilter.java index d4f7b6cfb9e9..5e239ac3ddd4 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/NotFilter.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/NotFilter.java @@ -29,6 +29,7 @@ import org.apache.druid.query.filter.vector.VectorMatch; import org.apache.druid.query.filter.vector.VectorValueMatcher; import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; +import org.apache.druid.segment.ColumnInspector; import org.apache.druid.segment.ColumnSelector; import org.apache.druid.segment.ColumnSelectorFactory; import org.apache.druid.segment.vector.VectorColumnSelectorFactory; @@ -101,9 +102,9 @@ public ReadableVectorMatch match(final ReadableVectorMatch mask) } @Override - public boolean canVectorizeMatcher() + public boolean canVectorizeMatcher(ColumnInspector inspector) { - return baseFilter.canVectorizeMatcher(); + return baseFilter.canVectorizeMatcher(inspector); } @Override diff --git a/processing/src/main/java/org/apache/druid/segment/filter/OrFilter.java b/processing/src/main/java/org/apache/druid/segment/filter/OrFilter.java index 7e532c878e8a..14b1149f19e7 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/OrFilter.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/OrFilter.java @@ -35,6 +35,7 @@ import org.apache.druid.query.filter.vector.VectorMatch; import org.apache.druid.query.filter.vector.VectorValueMatcher; import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; +import org.apache.druid.segment.ColumnInspector; import org.apache.druid.segment.ColumnSelectorFactory; import org.apache.druid.segment.vector.VectorColumnSelectorFactory; @@ -103,9 +104,9 @@ public VectorValueMatcher makeVectorMatcher(final VectorColumnSelectorFactory fa } @Override - public boolean canVectorizeMatcher() + public boolean canVectorizeMatcher(ColumnInspector inspector) { - return filters.stream().allMatch(Filter::canVectorizeMatcher); + return filters.stream().allMatch(filter -> filter.canVectorizeMatcher(inspector)); } @Override diff --git a/processing/src/main/java/org/apache/druid/segment/filter/PredicateValueMatcherFactory.java b/processing/src/main/java/org/apache/druid/segment/filter/PredicateValueMatcherFactory.java index b7f3f5dd1dca..0a78f3c4a7a3 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/PredicateValueMatcherFactory.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/PredicateValueMatcherFactory.java @@ -86,9 +86,27 @@ public ValueMatcher makeLongProcessor(BaseLongColumnValueSelector selector) @Override public ValueMatcher makeComplexProcessor(BaseObjectColumnValueSelector selector) { - if (selector instanceof NilColumnValueSelector || !mayBeFilterable(selector.classOfObject())) { + if (selector instanceof NilColumnValueSelector) { // Column does not exist, or is unfilterable. Treat it as all nulls. return BooleanValueMatcher.of(predicateFactory.makeStringPredicate().apply(null)); + } else if (!isNumberOrString(selector.classOfObject())) { + // if column is definitely not a number of string, use the object predicate + final Predicate predicate = predicateFactory.makeObjectPredicate(); + return new ValueMatcher() + { + @Override + public boolean matches() + { + return predicate.apply(selector.getObject()); + } + + @Override + public void inspectRuntimeShape(RuntimeShapeInspector inspector) + { + inspector.visit("selector", selector); + inspector.visit("predicate", predicate); + } + }; } else { // Column exists but the type of value is unknown (we might have got here because "defaultType" is COMPLEX). // Return a ValueMatcher that inspects the object and does type-based comparison. @@ -140,7 +158,7 @@ public boolean matches() public void inspectRuntimeShape(RuntimeShapeInspector inspector) { inspector.visit("selector", selector); - inspector.visit("value", predicateFactory); + inspector.visit("factory", predicateFactory); } private Predicate getStringPredicate() @@ -188,7 +206,7 @@ private DruidDoublePredicate getDoublePredicate() * * @param clazz class of object */ - private static boolean mayBeFilterable(final Class clazz) + private static boolean isNumberOrString(final Class clazz) { if (Number.class.isAssignableFrom(clazz) || String.class.isAssignableFrom(clazz)) { // clazz is a Number or String. diff --git a/processing/src/main/java/org/apache/druid/segment/filter/SelectorFilter.java b/processing/src/main/java/org/apache/druid/segment/filter/SelectorFilter.java index 54dcef8306cb..b4241ebac8c7 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/SelectorFilter.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/SelectorFilter.java @@ -29,6 +29,7 @@ import org.apache.druid.query.filter.ValueMatcher; import org.apache.druid.query.filter.vector.VectorValueMatcher; import org.apache.druid.query.filter.vector.VectorValueMatcherColumnProcessorFactory; +import org.apache.druid.segment.ColumnInspector; import org.apache.druid.segment.ColumnProcessors; import org.apache.druid.segment.ColumnSelector; import org.apache.druid.segment.ColumnSelectorFactory; @@ -119,7 +120,7 @@ public double estimateSelectivity(BitmapIndexSelector indexSelector) } @Override - public boolean canVectorizeMatcher() + public boolean canVectorizeMatcher(ColumnInspector inspector) { return true; } diff --git a/processing/src/main/java/org/apache/druid/segment/filter/TrueFilter.java b/processing/src/main/java/org/apache/druid/segment/filter/TrueFilter.java index 40f4923b3518..9238089c988e 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/TrueFilter.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/TrueFilter.java @@ -25,6 +25,7 @@ import org.apache.druid.query.filter.ValueMatcher; import org.apache.druid.query.filter.vector.BooleanVectorValueMatcher; import org.apache.druid.query.filter.vector.VectorValueMatcher; +import org.apache.druid.segment.ColumnInspector; import org.apache.druid.segment.ColumnSelector; import org.apache.druid.segment.ColumnSelectorFactory; import org.apache.druid.segment.vector.VectorColumnSelectorFactory; @@ -86,7 +87,7 @@ public boolean supportsSelectivityEstimation(ColumnSelector columnSelector, Bitm } @Override - public boolean canVectorizeMatcher() + public boolean canVectorizeMatcher(ColumnInspector inspector) { return true; } diff --git a/processing/src/main/java/org/apache/druid/segment/vector/FilteredVectorOffset.java b/processing/src/main/java/org/apache/druid/segment/vector/FilteredVectorOffset.java index a6313339c664..4ced41acb0cd 100644 --- a/processing/src/main/java/org/apache/druid/segment/vector/FilteredVectorOffset.java +++ b/processing/src/main/java/org/apache/druid/segment/vector/FilteredVectorOffset.java @@ -51,7 +51,7 @@ public static FilteredVectorOffset create( // This is not the same logic as the row-by-row FilteredOffset, which uses bitmaps whenever possible. // I am not convinced that approach is best in all cases (it's potentially too eager) and also have not implemented // it for vector matchers yet. So let's keep this method simple for now, and try to harmonize them in the future. - Preconditions.checkState(filter.canVectorizeMatcher(), "Cannot vectorize"); + Preconditions.checkState(filter.canVectorizeMatcher(baseColumnSelectorFactory), "Cannot vectorize"); final VectorValueMatcher filterMatcher = filter.makeVectorMatcher(baseColumnSelectorFactory); return new FilteredVectorOffset(baseOffset, filterMatcher); } diff --git a/processing/src/main/java/org/apache/druid/segment/vector/VectorColumnSelectorFactory.java b/processing/src/main/java/org/apache/druid/segment/vector/VectorColumnSelectorFactory.java index f33ed7b08384..4928b43fe630 100644 --- a/processing/src/main/java/org/apache/druid/segment/vector/VectorColumnSelectorFactory.java +++ b/processing/src/main/java/org/apache/druid/segment/vector/VectorColumnSelectorFactory.java @@ -52,8 +52,12 @@ default int getMaxVectorSize() } /** - * Returns a string-typed, single-value-per-row column selector. Should only be called on columns where - * {@link #getColumnCapabilities} indicates they return STRING, or on nonexistent columns. + * Returns a dictionary encoded, string-typed, single-value-per-row column selector. Should only be called on columns + * where {@link #getColumnCapabilities} indicates they return STRING, or on nonexistent columns. Since the selector + * created with this method operates directly on the dictionary encoded input, STRING values must be translated from + * the dictionary id for a given row value using {@link SingleValueDimensionVectorSelector#lookupName(int)}, but + * this selector can prove optimal for operations which can be done directly on the underlying dictionary ids, such + * as grouping within a segment. * * If you need to write code that adapts to different input types, you should write a * {@link org.apache.druid.segment.VectorColumnProcessorFactory} and use one of the @@ -62,9 +66,9 @@ default int getMaxVectorSize() SingleValueDimensionVectorSelector makeSingleValueDimensionSelector(DimensionSpec dimensionSpec); /** - * Returns a string-typed, multi-value-per-row column selector. Should only be called on columns where - * {@link #getColumnCapabilities} indicates they return STRING. Unlike {@link #makeSingleValueDimensionSelector}, - * this should not be called on nonexistent columns. + * Returns a dictionary encoded, string-typed, multi-value-per-row column selector. Should only be called on columns + * where {@link #getColumnCapabilities} indicates they return STRING. Unlike + * {@link #makeSingleValueDimensionSelector}, this should not be called on nonexistent columns. * * If you need to write code that adapts to different input types, you should write a * {@link org.apache.druid.segment.VectorColumnProcessorFactory} and use one of the @@ -86,6 +90,11 @@ default int getMaxVectorSize() * Returns an object selector. Should only be called on columns where {@link #getColumnCapabilities} indicates that * they return STRING or COMPLEX, or on nonexistent columns. * + * For STRING, this is needed if values are not dictionary encoded, such as computed virtual columns, or can + * optionally be used in place of {@link SingleValueDimensionVectorSelector} when using the dictionary isn't helpful. + * Currently, this should only be called on single valued STRING inputs (multi-value STRING vector object selector + * is not yet implemented). + * * If you need to write code that adapts to different input types, you should write a * {@link org.apache.druid.segment.VectorColumnProcessorFactory} and use one of the * {@link org.apache.druid.segment.ColumnProcessors#makeVectorProcessor} functions instead of using this method. diff --git a/processing/src/test/java/org/apache/druid/segment/filter/BaseFilterTest.java b/processing/src/test/java/org/apache/druid/segment/filter/BaseFilterTest.java index 931a794dbdae..67d1c24fbe62 100644 --- a/processing/src/test/java/org/apache/druid/segment/filter/BaseFilterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/filter/BaseFilterTest.java @@ -45,6 +45,9 @@ import org.apache.druid.java.util.common.granularity.Granularities; import org.apache.druid.java.util.common.guava.Sequence; import org.apache.druid.java.util.common.guava.Sequences; +import org.apache.druid.math.expr.Expr; +import org.apache.druid.math.expr.ExprType; +import org.apache.druid.math.expr.Parser; import org.apache.druid.query.BitmapResultFactory; import org.apache.druid.query.aggregation.Aggregator; import org.apache.druid.query.aggregation.CountAggregatorFactory; @@ -57,6 +60,7 @@ import org.apache.druid.query.filter.Filter; import org.apache.druid.query.filter.ValueMatcher; import org.apache.druid.query.filter.vector.VectorValueMatcher; +import org.apache.druid.segment.ColumnInspector; import org.apache.druid.segment.ColumnSelector; import org.apache.druid.segment.ColumnSelectorFactory; import org.apache.druid.segment.Cursor; @@ -82,6 +86,8 @@ import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector; import org.apache.druid.segment.vector.VectorColumnSelectorFactory; import org.apache.druid.segment.vector.VectorCursor; +import org.apache.druid.segment.vector.VectorObjectSelector; +import org.apache.druid.segment.vector.VectorValueSelector; import org.apache.druid.segment.virtual.ExpressionVirtualColumn; import org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMediumFactory; import org.apache.druid.segment.writeout.SegmentWriteOutMediumFactory; @@ -113,7 +119,12 @@ public abstract class BaseFilterTest extends InitializedNullHandlingTest ImmutableList.of( new ExpressionVirtualColumn("expr", "1.0 + 0.1", ValueType.FLOAT, TestExprMacroTable.INSTANCE), new ExpressionVirtualColumn("exprDouble", "1.0 + 1.1", ValueType.DOUBLE, TestExprMacroTable.INSTANCE), - new ExpressionVirtualColumn("exprLong", "1 + 2", ValueType.LONG, TestExprMacroTable.INSTANCE) + new ExpressionVirtualColumn("exprLong", "1 + 2", ValueType.LONG, TestExprMacroTable.INSTANCE), + new ExpressionVirtualColumn("vdim0", "dim0", ValueType.STRING, TestExprMacroTable.INSTANCE), + new ExpressionVirtualColumn("vdim1", "dim1", ValueType.STRING, TestExprMacroTable.INSTANCE), + new ExpressionVirtualColumn("vd0", "d0", ValueType.DOUBLE, TestExprMacroTable.INSTANCE), + new ExpressionVirtualColumn("vf0", "f0", ValueType.FLOAT, TestExprMacroTable.INSTANCE), + new ExpressionVirtualColumn("vl0", "l0", ValueType.LONG, TestExprMacroTable.INSTANCE) ) ); @@ -382,12 +393,11 @@ private Sequence makeCursorSequence(final Filter filter) private VectorCursor makeVectorCursor(final Filter filter) { + return adapter.makeVectorCursor( filter, Intervals.ETERNITY, - // VirtualColumns do not support vectorization yet. Avoid passing them in, and any tests that need virtual - // columns should skip vectorization tests. - VirtualColumns.EMPTY, + VIRTUAL_COLUMNS, false, 3, // Vector size smaller than the number of rows, to ensure we use more than one. null @@ -445,7 +455,11 @@ private long selectCountUsingFilteredAggregator(final DimFilter filter) private long selectCountUsingVectorizedFilteredAggregator(final DimFilter dimFilter) { - Preconditions.checkState(makeFilter(dimFilter).canVectorizeMatcher(), "Cannot vectorize filter: %s", dimFilter); + Preconditions.checkState( + makeFilter(dimFilter).canVectorizeMatcher(adapter), + "Cannot vectorize filter: %s", + dimFilter + ); try (final VectorCursor cursor = makeVectorCursor(null)) { final FilteredAggregatorFactory aggregatorFactory = new FilteredAggregatorFactory( @@ -597,9 +611,9 @@ public VectorValueMatcher makeVectorMatcher(VectorColumnSelectorFactory factory) } @Override - public boolean canVectorizeMatcher() + public boolean canVectorizeMatcher(ColumnInspector inspector) { - return theFilter.canVectorizeMatcher(); + return theFilter.canVectorizeMatcher(inspector); } @Override @@ -664,6 +678,63 @@ private List selectColumnValuesMatchingFilterUsingVectorCursor( } } + private List selectColumnValuesMatchingFilterUsingVectorVirtualColumnCursor( + final DimFilter filter, + final String virtualColumn, + final String selectColumn + ) + { + final Expr parsedIdentifier = Parser.parse(selectColumn, TestExprMacroTable.INSTANCE); + try (final VectorCursor cursor = makeVectorCursor(makeFilter(filter))) { + + final ExprType outputType = parsedIdentifier.getOutputType(cursor.getColumnSelectorFactory()); + final List values = new ArrayList<>(); + + if (ExprType.STRING.equals(outputType)) { + final VectorObjectSelector objectSelector = cursor.getColumnSelectorFactory().makeObjectSelector( + virtualColumn + ); + while (!cursor.isDone()) { + final Object[] rowVector = objectSelector.getObjectVector(); + for (int i = 0; i < cursor.getCurrentVectorSize(); i++) { + values.add((String) rowVector[i]); + } + cursor.advance(); + } + } else { + final VectorValueSelector valueSelector = cursor.getColumnSelectorFactory().makeValueSelector(virtualColumn); + while (!cursor.isDone()) { + final boolean[] nulls = valueSelector.getNullVector(); + if (ExprType.DOUBLE.equals(outputType)) { + final double[] doubles = valueSelector.getDoubleVector(); + for (int i = 0; i < cursor.getCurrentVectorSize(); i++) { + if (nulls != null && nulls[i]) { + values.add(null); + } else { + values.add(String.valueOf(doubles[i])); + } + } + } else { + final long[] longs = valueSelector.getLongVector(); + for (int i = 0; i < cursor.getCurrentVectorSize(); i++) { + if (nulls != null && nulls[i]) { + values.add(null); + } else { + values.add(String.valueOf(longs[i])); + } + } + } + + cursor.advance(); + } + } + + + + return values; + } + } + private List selectColumnValuesMatchingFilterUsingRowBasedColumnSelectorFactory( final DimFilter filter, final String selectColumn @@ -734,6 +805,12 @@ private void assertFilterMatches( expectedRows, selectColumnValuesMatchingFilterUsingVectorCursor(filter, "dim0") ); + + Assert.assertEquals( + "Cursor Virtual Column (vectorized): " + filter, + expectedRows, + selectColumnValuesMatchingFilterUsingVectorVirtualColumnCursor(filter, "vdim0", "dim0") + ); } Assert.assertEquals( diff --git a/processing/src/test/java/org/apache/druid/segment/filter/BoundFilterTest.java b/processing/src/test/java/org/apache/druid/segment/filter/BoundFilterTest.java index 8263538e0386..1e6d188e4095 100644 --- a/processing/src/test/java/org/apache/druid/segment/filter/BoundFilterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/filter/BoundFilterTest.java @@ -80,9 +80,13 @@ public void testLexicographicMatchEverything() { final List filters = ImmutableList.of( new BoundDimFilter("dim0", null, "z", false, false, false, null, StringComparators.LEXICOGRAPHIC), + new BoundDimFilter("vdim0", null, "z", false, false, false, null, StringComparators.LEXICOGRAPHIC), new BoundDimFilter("dim1", null, "z", false, false, false, null, StringComparators.LEXICOGRAPHIC), + new BoundDimFilter("vdim1", null, "z", false, false, false, null, StringComparators.LEXICOGRAPHIC), new BoundDimFilter("dim2", null, "z", false, false, false, null, StringComparators.LEXICOGRAPHIC), - new BoundDimFilter("dim3", null, "z", false, false, false, null, StringComparators.LEXICOGRAPHIC) + new BoundDimFilter("vdim2", null, "z", false, false, false, null, StringComparators.LEXICOGRAPHIC), + new BoundDimFilter("dim3", null, "z", false, false, false, null, StringComparators.LEXICOGRAPHIC), + new BoundDimFilter("vdim3", null, "z", false, false, false, null, StringComparators.LEXICOGRAPHIC) ); for (BoundDimFilter filter : filters) { @@ -424,12 +428,12 @@ public void testNumericMatchTooStrict() @Test public void testNumericMatchVirtualColumn() { - assertFilterMatchesSkipVectorize( + assertFilterMatches( new BoundDimFilter("expr", "1", "2", false, false, false, null, StringComparators.NUMERIC), ImmutableList.of("0", "1", "2", "3", "4", "5", "6", "7") ); - assertFilterMatchesSkipVectorize( + assertFilterMatches( new BoundDimFilter("expr", "2", "3", false, false, false, null, StringComparators.NUMERIC), ImmutableList.of() ); @@ -690,6 +694,54 @@ public void testNumericNullsAndZeros() ); } + @Test + public void testVirtualNumericNullsAndZeros() + { + assertFilterMatches( + new BoundDimFilter( + "vd0", + "0.0", + "1.0", + false, + false, + false, + null, + StringComparators.NUMERIC + ), + canTestNumericNullsAsDefaultValues ? ImmutableList.of("0", "2", "7") : ImmutableList.of("0") + ); + + assertFilterMatches( + new BoundDimFilter( + "vf0", + "0.0", + "1.0", + false, + false, + false, + null, + StringComparators.NUMERIC + ), + canTestNumericNullsAsDefaultValues ? ImmutableList.of("0", "4", "6") : ImmutableList.of("0") + ); + + assertFilterMatches( + new BoundDimFilter( + "vl0", + "0.0", + "1.0", + false, + false, + false, + null, + StringComparators.NUMERIC + ), + NullHandling.replaceWithDefault() && canTestNumericNullsAsDefaultValues + ? ImmutableList.of("0", "3", "7") + : ImmutableList.of("0") + ); + } + @Test public void testNumericNulls() { diff --git a/processing/src/test/java/org/apache/druid/segment/filter/ExpressionFilterTest.java b/processing/src/test/java/org/apache/druid/segment/filter/ExpressionFilterTest.java index 704394ec7bfa..efa3cf5555b7 100644 --- a/processing/src/test/java/org/apache/druid/segment/filter/ExpressionFilterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/filter/ExpressionFilterTest.java @@ -127,20 +127,20 @@ public static void tearDown() throws Exception @Test public void testOneSingleValuedStringColumn() { - assertFilterMatchesSkipVectorize(edf("dim3 == ''"), ImmutableList.of("0")); - assertFilterMatchesSkipVectorize(edf("dim3 == '1'"), ImmutableList.of("3", "4", "6")); - assertFilterMatchesSkipVectorize(edf("dim3 == 'a'"), ImmutableList.of("7")); - assertFilterMatchesSkipVectorize(edf("dim3 == 1"), ImmutableList.of("3", "4", "6")); - assertFilterMatchesSkipVectorize(edf("dim3 == 1.0"), ImmutableList.of("3", "4", "6")); - assertFilterMatchesSkipVectorize(edf("dim3 == 1.234"), ImmutableList.of("9")); - assertFilterMatchesSkipVectorize(edf("dim3 < '2'"), ImmutableList.of("0", "1", "3", "4", "6", "9")); + assertFilterMatches(edf("dim3 == ''"), ImmutableList.of("0")); + assertFilterMatches(edf("dim3 == '1'"), ImmutableList.of("3", "4", "6")); + assertFilterMatches(edf("dim3 == 'a'"), ImmutableList.of("7")); + assertFilterMatches(edf("dim3 == 1"), ImmutableList.of("3", "4", "6")); + assertFilterMatches(edf("dim3 == 1.0"), ImmutableList.of("3", "4", "6")); + assertFilterMatches(edf("dim3 == 1.234"), ImmutableList.of("9")); + assertFilterMatches(edf("dim3 < '2'"), ImmutableList.of("0", "1", "3", "4", "6", "9")); if (NullHandling.replaceWithDefault()) { - assertFilterMatchesSkipVectorize(edf("dim3 < 2"), ImmutableList.of("0", "3", "4", "6", "7", "9")); - assertFilterMatchesSkipVectorize(edf("dim3 < 2.0"), ImmutableList.of("0", "3", "4", "6", "7", "9")); + assertFilterMatches(edf("dim3 < 2"), ImmutableList.of("0", "3", "4", "6", "7", "9")); + assertFilterMatches(edf("dim3 < 2.0"), ImmutableList.of("0", "3", "4", "6", "7", "9")); } else { // Empty String and "a" will not match - assertFilterMatchesSkipVectorize(edf("dim3 < 2"), ImmutableList.of("3", "4", "6", "9")); - assertFilterMatchesSkipVectorize(edf("dim3 < 2.0"), ImmutableList.of("3", "4", "6", "9")); + assertFilterMatches(edf("dim3 < 2"), ImmutableList.of("3", "4", "6", "9")); + assertFilterMatches(edf("dim3 < 2.0"), ImmutableList.of("3", "4", "6", "9")); } assertFilterMatchesSkipVectorize(edf("like(dim3, '1%')"), ImmutableList.of("1", "3", "4", "6", "9")); assertFilterMatchesSkipVectorize(edf("array_contains(dim3, '1')"), ImmutableList.of("3", "4", "6")); @@ -175,16 +175,16 @@ public void testSingleAndMultiValuedStringColumn() public void testOneLongColumn() { if (NullHandling.replaceWithDefault()) { - assertFilterMatchesSkipVectorize(edf("dim1 == ''"), ImmutableList.of("0")); + assertFilterMatches(edf("dim1 == ''"), ImmutableList.of("0")); } else { // A long does not match empty string - assertFilterMatchesSkipVectorize(edf("dim1 == ''"), ImmutableList.of()); + assertFilterMatches(edf("dim1 == ''"), ImmutableList.of()); } - assertFilterMatchesSkipVectorize(edf("dim1 == '1'"), ImmutableList.of("1")); - assertFilterMatchesSkipVectorize(edf("dim1 == 2"), ImmutableList.of("2")); - assertFilterMatchesSkipVectorize(edf("dim1 < '2'"), ImmutableList.of("0", "1")); - assertFilterMatchesSkipVectorize(edf("dim1 < 2"), ImmutableList.of("0", "1")); - assertFilterMatchesSkipVectorize(edf("dim1 < 2.0"), ImmutableList.of("0", "1")); + assertFilterMatches(edf("dim1 == '1'"), ImmutableList.of("1")); + assertFilterMatches(edf("dim1 == 2"), ImmutableList.of("2")); + assertFilterMatches(edf("dim1 < '2'"), ImmutableList.of("0", "1")); + assertFilterMatches(edf("dim1 < 2"), ImmutableList.of("0", "1")); + assertFilterMatches(edf("dim1 < 2.0"), ImmutableList.of("0", "1")); assertFilterMatchesSkipVectorize(edf("like(dim1, '1%')"), ImmutableList.of("1")); } @@ -192,47 +192,47 @@ public void testOneLongColumn() public void testOneFloatColumn() { if (NullHandling.replaceWithDefault()) { - assertFilterMatchesSkipVectorize(edf("dim2 == ''"), ImmutableList.of("0")); + assertFilterMatches(edf("dim2 == ''"), ImmutableList.of("0")); } else { // A float does not match empty string - assertFilterMatchesSkipVectorize(edf("dim2 == ''"), ImmutableList.of()); + assertFilterMatches(edf("dim2 == ''"), ImmutableList.of()); } - assertFilterMatchesSkipVectorize(edf("dim2 == '1'"), ImmutableList.of("1")); - assertFilterMatchesSkipVectorize(edf("dim2 == 2"), ImmutableList.of("2")); - assertFilterMatchesSkipVectorize(edf("dim2 < '2'"), ImmutableList.of("0", "1")); - assertFilterMatchesSkipVectorize(edf("dim2 < 2"), ImmutableList.of("0", "1")); - assertFilterMatchesSkipVectorize(edf("dim2 < 2.0"), ImmutableList.of("0", "1")); + assertFilterMatches(edf("dim2 == '1'"), ImmutableList.of("1")); + assertFilterMatches(edf("dim2 == 2"), ImmutableList.of("2")); + assertFilterMatches(edf("dim2 < '2'"), ImmutableList.of("0", "1")); + assertFilterMatches(edf("dim2 < 2"), ImmutableList.of("0", "1")); + assertFilterMatches(edf("dim2 < 2.0"), ImmutableList.of("0", "1")); assertFilterMatchesSkipVectorize(edf("like(dim2, '1%')"), ImmutableList.of("1")); } @Test public void testConstantExpression() { - assertFilterMatchesSkipVectorize(edf("1 + 1"), ImmutableList.of("0", "1", "2", "3", "4", "5", "6", "7", "8", "9")); - assertFilterMatchesSkipVectorize(edf("'true'"), ImmutableList.of("0", "1", "2", "3", "4", "5", "6", "7", "8", "9")); + assertFilterMatches(edf("1 + 1"), ImmutableList.of("0", "1", "2", "3", "4", "5", "6", "7", "8", "9")); + assertFilterMatches(edf("'true'"), ImmutableList.of("0", "1", "2", "3", "4", "5", "6", "7", "8", "9")); - assertFilterMatchesSkipVectorize(edf("0 + 0"), ImmutableList.of()); - assertFilterMatchesSkipVectorize(edf("'false'"), ImmutableList.of()); + assertFilterMatches(edf("0 + 0"), ImmutableList.of()); + assertFilterMatches(edf("'false'"), ImmutableList.of()); } @Test public void testCompareColumns() { // String vs string - assertFilterMatchesSkipVectorize(edf("dim0 == dim3"), ImmutableList.of("2", "5", "8")); + assertFilterMatches(edf("dim0 == dim3"), ImmutableList.of("2", "5", "8")); if (NullHandling.replaceWithDefault()) { // String vs long - assertFilterMatchesSkipVectorize(edf("dim1 == dim3"), ImmutableList.of("0", "2", "5", "8")); + assertFilterMatches(edf("dim1 == dim3"), ImmutableList.of("0", "2", "5", "8")); // String vs float - assertFilterMatchesSkipVectorize(edf("dim2 == dim3"), ImmutableList.of("0", "2", "5", "8")); + assertFilterMatches(edf("dim2 == dim3"), ImmutableList.of("0", "2", "5", "8")); } else { // String vs long - assertFilterMatchesSkipVectorize(edf("dim1 == dim3"), ImmutableList.of("2", "5", "8")); + assertFilterMatches(edf("dim1 == dim3"), ImmutableList.of("2", "5", "8")); // String vs float - assertFilterMatchesSkipVectorize(edf("dim2 == dim3"), ImmutableList.of("2", "5", "8")); + assertFilterMatches(edf("dim2 == dim3"), ImmutableList.of("2", "5", "8")); } // String vs. multi-value string @@ -243,39 +243,48 @@ public void testCompareColumns() public void testMissingColumn() { if (NullHandling.replaceWithDefault()) { - assertFilterMatchesSkipVectorize( + assertFilterMatches( edf("missing == ''"), ImmutableList.of("0", "1", "2", "3", "4", "5", "6", "7", "8", "9") ); + assertFilterMatches( + edf("missing == otherMissing"), + ImmutableList.of("0", "1", "2", "3", "4", "5", "6", "7", "8", "9") + ); } else { // AS per SQL standard null == null returns false. - assertFilterMatchesSkipVectorize(edf("missing == null"), ImmutableList.of()); + assertFilterMatches(edf("missing == null"), ImmutableList.of()); + // also this madness doesn't do madness + assertFilterMatches( + edf("missing == otherMissing"), + ImmutableList.of() + ); } - assertFilterMatchesSkipVectorize(edf("missing == '1'"), ImmutableList.of()); - assertFilterMatchesSkipVectorize(edf("missing == 2"), ImmutableList.of()); + assertFilterMatches(edf("missing == '1'"), ImmutableList.of()); + assertFilterMatches(edf("missing == 2"), ImmutableList.of()); if (NullHandling.replaceWithDefault()) { // missing equivaluent to 0 - assertFilterMatchesSkipVectorize( + assertFilterMatches( edf("missing < '2'"), ImmutableList.of("0", "1", "2", "3", "4", "5", "6", "7", "8", "9") ); - assertFilterMatchesSkipVectorize( + assertFilterMatches( edf("missing < 2"), ImmutableList.of("0", "1", "2", "3", "4", "5", "6", "7", "8", "9") ); - assertFilterMatchesSkipVectorize( + assertFilterMatches( edf("missing < 2.0"), ImmutableList.of("0", "1", "2", "3", "4", "5", "6", "7", "8", "9") ); } else { // missing equivalent to null - assertFilterMatchesSkipVectorize(edf("missing < '2'"), ImmutableList.of()); - assertFilterMatchesSkipVectorize(edf("missing < 2"), ImmutableList.of()); - assertFilterMatchesSkipVectorize(edf("missing < 2.0"), ImmutableList.of()); + assertFilterMatches(edf("missing < '2'"), ImmutableList.of()); + assertFilterMatches(edf("missing < 2"), ImmutableList.of()); + assertFilterMatches(edf("missing < 2.0"), ImmutableList.of()); } - assertFilterMatchesSkipVectorize(edf("missing > '2'"), ImmutableList.of()); - assertFilterMatchesSkipVectorize(edf("missing > 2"), ImmutableList.of()); - assertFilterMatchesSkipVectorize(edf("missing > 2.0"), ImmutableList.of()); + assertFilterMatches(edf("missing > '2'"), ImmutableList.of()); + assertFilterMatches(edf("missing > 2"), ImmutableList.of()); + assertFilterMatches(edf("missing > 2.0"), ImmutableList.of()); assertFilterMatchesSkipVectorize(edf("like(missing, '1%')"), ImmutableList.of()); } diff --git a/processing/src/test/java/org/apache/druid/segment/filter/SelectorFilterTest.java b/processing/src/test/java/org/apache/druid/segment/filter/SelectorFilterTest.java index aa104cedd791..81aa2efa4d87 100644 --- a/processing/src/test/java/org/apache/druid/segment/filter/SelectorFilterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/filter/SelectorFilterTest.java @@ -71,6 +71,10 @@ public void testWithTimeExtractionFnNull() new SelectorDimFilter("dim0", null, new TimeDimExtractionFn("yyyy-MM-dd", "yyyy-MM", true)), ImmutableList.of() ); + assertFilterMatches( + new SelectorDimFilter("vdim0", null, new TimeDimExtractionFn("yyyy-MM-dd", "yyyy-MM", true)), + ImmutableList.of() + ); assertFilterMatches( new SelectorDimFilter("timeDim", null, new TimeDimExtractionFn("yyyy-MM-dd", "yyyy-MM", true)), ImmutableList.of("4") @@ -102,9 +106,19 @@ public void testSingleValueStringColumnWithoutNulls() assertFilterMatches(new SelectorDimFilter("dim0", "1", null), ImmutableList.of("1")); } + @Test + public void testSingleValueVirtualStringColumnWithoutNulls() + { + assertFilterMatches(new SelectorDimFilter("vdim0", null, null), ImmutableList.of()); + assertFilterMatches(new SelectorDimFilter("vdim0", "", null), ImmutableList.of()); + assertFilterMatches(new SelectorDimFilter("vdim0", "0", null), ImmutableList.of("0")); + assertFilterMatches(new SelectorDimFilter("vdim0", "1", null), ImmutableList.of("1")); + } + @Test public void testSingleValueStringColumnWithNulls() { + // testSingleValueStringColumnWithoutNulls but with virtual column selector if (NullHandling.replaceWithDefault()) { assertFilterMatches(new SelectorDimFilter("dim1", null, null), ImmutableList.of("0")); } else { @@ -119,6 +133,24 @@ public void testSingleValueStringColumnWithNulls() assertFilterMatches(new SelectorDimFilter("dim1", "ab", null), ImmutableList.of()); } + @Test + public void testSingleValueVirtualStringColumnWithNulls() + { + // testSingleValueStringColumnWithNulls but with virtual column selector + if (NullHandling.replaceWithDefault()) { + assertFilterMatches(new SelectorDimFilter("vdim1", null, null), ImmutableList.of("0")); + } else { + assertFilterMatches(new SelectorDimFilter("vdim1", null, null), ImmutableList.of()); + } + assertFilterMatches(new SelectorDimFilter("vdim1", "", null), ImmutableList.of("0")); + assertFilterMatches(new SelectorDimFilter("vdim1", "10", null), ImmutableList.of("1")); + assertFilterMatches(new SelectorDimFilter("vdim1", "2", null), ImmutableList.of("2")); + assertFilterMatches(new SelectorDimFilter("vdim1", "1", null), ImmutableList.of("3")); + assertFilterMatches(new SelectorDimFilter("vdim1", "abdef", null), ImmutableList.of("4")); + assertFilterMatches(new SelectorDimFilter("vdim1", "abc", null), ImmutableList.of("5")); + assertFilterMatches(new SelectorDimFilter("vdim1", "ab", null), ImmutableList.of()); + } + @Test public void testMultiValueStringColumn() { @@ -166,11 +198,11 @@ public void testMissingColumnNotSpecifiedInDimensionList() @Test public void testExpressionVirtualColumn() { - assertFilterMatchesSkipVectorize( + assertFilterMatches( new SelectorDimFilter("expr", "1.1", null), ImmutableList.of("0", "1", "2", "3", "4", "5") ); - assertFilterMatchesSkipVectorize(new SelectorDimFilter("expr", "1.2", null), ImmutableList.of()); + assertFilterMatches(new SelectorDimFilter("expr", "1.2", null), ImmutableList.of()); } @Test @@ -329,6 +361,26 @@ public void testNumericColumnNullsAndDefaults() } } + @Test + public void testVirtualNumericColumnNullsAndDefaults() + { + if (canTestNumericNullsAsDefaultValues) { + assertFilterMatches(new SelectorDimFilter("vf0", "0", null), ImmutableList.of("0", "4")); + assertFilterMatches(new SelectorDimFilter("vd0", "0", null), ImmutableList.of("0", "2")); + assertFilterMatches(new SelectorDimFilter("vl0", "0", null), ImmutableList.of("0", "3")); + assertFilterMatches(new SelectorDimFilter("vf0", null, null), ImmutableList.of()); + assertFilterMatches(new SelectorDimFilter("vd0", null, null), ImmutableList.of()); + assertFilterMatches(new SelectorDimFilter("vl0", null, null), ImmutableList.of()); + } else { + assertFilterMatches(new SelectorDimFilter("vf0", "0", null), ImmutableList.of("0")); + assertFilterMatches(new SelectorDimFilter("vd0", "0", null), ImmutableList.of("0")); + assertFilterMatches(new SelectorDimFilter("vl0", "0", null), ImmutableList.of("0")); + assertFilterMatches(new SelectorDimFilter("vf0", null, null), ImmutableList.of("4")); + assertFilterMatches(new SelectorDimFilter("vd0", null, null), ImmutableList.of("2")); + assertFilterMatches(new SelectorDimFilter("vl0", null, null), ImmutableList.of("3")); + } + } + @Test public void test_equals() { diff --git a/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVectorSelectorsTest.java b/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVectorSelectorsTest.java index 566c6cb2722d..c53676bea3ae 100644 --- a/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVectorSelectorsTest.java +++ b/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVectorSelectorsTest.java @@ -129,7 +129,6 @@ public static Iterable constructorFeeder() return EXPRESSIONS.stream().map(x -> new Object[]{x}).collect(Collectors.toList()); } - @Nullable private ExprType outputType; private String expression; @@ -153,6 +152,9 @@ public ColumnCapabilities getColumnCapabilities(String column) } } ); + if (outputType == null) { + outputType = ExprType.STRING; + } } @Test @@ -207,7 +209,7 @@ public static void sanityTestVectorizedExpressionSelectors( } else { VectorValueSelector selector = null; VectorObjectSelector objectSelector = null; - if (outputType.isNumeric()) { + if (outputType != null && outputType.isNumeric()) { selector = cursor.getColumnSelectorFactory().makeValueSelector("v"); } else { objectSelector = cursor.getColumnSelectorFactory().makeObjectSelector("v"); diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteParameterQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteParameterQueryTest.java index ff32ac19b997..94e47d07fc8a 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteParameterQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteParameterQueryTest.java @@ -244,7 +244,6 @@ public void testParamsInSelectExpressionAndLimit() throws Exception @Test public void testParamsTuckedInACast() throws Exception { - cannotVectorize(); testQuery( "SELECT dim1, m1, COUNT(*) FROM druid.foo WHERE m1 - CAST(? as INT) = dim1 GROUP BY dim1, m1", ImmutableList.of( diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java index 0dc242a52069..e96686f778b4 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java @@ -3533,9 +3533,6 @@ public void testHavingOnFloatSum() throws Exception @Test public void testColumnComparison() throws Exception { - // Cannot vectorize due to expression filter. - cannotVectorize(); - testQuery( "SELECT dim1, m1, COUNT(*) FROM druid.foo WHERE m1 - 1 = dim1 GROUP BY dim1, m1", ImmutableList.of(