From 4672efbc9b3e4bd355f1342e3fb2dd986f1dd395 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Thu, 2 May 2019 03:52:02 -0700 Subject: [PATCH 01/48] array support for expression language for multi-value string columns --- .../org/apache/druid/math/expr/antlr/Expr.g4 | 7 + .../apache/druid/math/expr/ApplyFunction.java | 607 ++++++++++++++++++ .../apache/druid/math/expr/CartesianList.java | 135 ++++ .../java/org/apache/druid/math/expr/Expr.java | 419 +++++++++++- .../org/apache/druid/math/expr/ExprEval.java | 451 ++++++++++++- .../druid/math/expr/ExprListenerImpl.java | 79 ++- .../org/apache/druid/math/expr/ExprType.java | 2 +- .../org/apache/druid/math/expr/Function.java | 355 +++++++++- .../org/apache/druid/math/expr/Parser.java | 213 ++++++ .../druid/math/expr/ApplyFunctionTest.java | 146 +++++ .../apache/druid/math/expr/FunctionTest.java | 74 ++- .../apache/druid/math/expr/ParserTest.java | 38 ++ .../expressions/BloomFilterExprMacro.java | 16 +- .../druid/query/expression/LikeExprMacro.java | 16 +- .../query/expression/LookupExprMacro.java | 16 +- .../expression/RegexpExtractExprMacro.java | 16 +- .../expression/TimestampCeilExprMacro.java | 16 + .../expression/TimestampExtractExprMacro.java | 16 +- .../expression/TimestampFloorExprMacro.java | 17 + .../expression/TimestampFormatExprMacro.java | 16 +- .../expression/TimestampParseExprMacro.java | 16 +- .../expression/TimestampShiftExprMacro.java | 17 + .../druid/query/expression/TrimExprMacro.java | 15 + .../apache/druid/segment/IndexMergerV9.java | 4 +- .../druid/segment/column/ColumnBuilder.java | 1 + .../segment/column/ColumnCapabilities.java | 8 + .../column/ColumnCapabilitiesImpl.java | 16 + .../segment/filter/ExpressionFilter.java | 19 +- .../segment/incremental/IncrementalIndex.java | 6 +- ...MultiValueExpressionDimensionSelector.java | 172 +++++ .../ExpressionColumnValueSelector.java | 4 +- .../segment/virtual/ExpressionSelectors.java | 220 ++++++- ...ueStringExpressionColumnValueSelector.java | 38 ++ ...ueStringExpressionColumnValueSelector.java | 81 +++ ...tCachingExpressionColumnValueSelector.java | 2 +- .../druid/query/MultiValuedDimensionTest.java | 414 +++++++++++- .../segment/filter/ExpressionFilterTest.java | 11 +- .../ExpressionColumnValueSelectorTest.java | 3 +- .../virtual/ExpressionVirtualColumnTest.java | 49 +- 39 files changed, 3630 insertions(+), 121 deletions(-) create mode 100644 core/src/main/java/org/apache/druid/math/expr/ApplyFunction.java create mode 100644 core/src/main/java/org/apache/druid/math/expr/CartesianList.java create mode 100644 core/src/test/java/org/apache/druid/math/expr/ApplyFunctionTest.java create mode 100644 processing/src/main/java/org/apache/druid/segment/virtual/BaseMultiValueExpressionDimensionSelector.java create mode 100644 processing/src/main/java/org/apache/druid/segment/virtual/ForcedMultiValueStringExpressionColumnValueSelector.java create mode 100644 processing/src/main/java/org/apache/druid/segment/virtual/OpportunisticMultiValueStringExpressionColumnValueSelector.java diff --git a/core/src/main/antlr4/org/apache/druid/math/expr/antlr/Expr.g4 b/core/src/main/antlr4/org/apache/druid/math/expr/antlr/Expr.g4 index 348b5037a5d1..dd72a2a2f0d5 100644 --- a/core/src/main/antlr4/org/apache/druid/math/expr/antlr/Expr.g4 +++ b/core/src/main/antlr4/org/apache/druid/math/expr/antlr/Expr.g4 @@ -23,13 +23,20 @@ expr : 'null' # null | expr ('<'|'<='|'>'|'>='|'=='|'!=') expr # logicalOpExpr | expr ('&&'|'||') expr # logicalAndOrExpr | '(' expr ')' # nestedExpr + | IDENTIFIER '(' lambda ',' fnArgs ')' # applyFunctionExpr | IDENTIFIER '(' fnArgs? ')' # functionExpr | IDENTIFIER # identifierExpr | DOUBLE # doubleExpr | LONG # longExpr | STRING # string + | '[' DOUBLE (','? DOUBLE)* ']' # doubleArray + | '[' LONG (','? LONG)* ']' # longArray + | '[' STRING (','? STRING)* ']' # stringArray ; +lambda : (IDENTIFIER | '(' IDENTIFIER (','? IDENTIFIER)* ')') '->' expr + ; + fnArgs : expr (',' expr)* # functionArgs ; diff --git a/core/src/main/java/org/apache/druid/math/expr/ApplyFunction.java b/core/src/main/java/org/apache/druid/math/expr/ApplyFunction.java new file mode 100644 index 000000000000..8cc345d62a28 --- /dev/null +++ b/core/src/main/java/org/apache/druid/math/expr/ApplyFunction.java @@ -0,0 +1,607 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.math.expr; + +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableSet; +import it.unimi.dsi.fastutil.objects.Object2IntArrayMap; +import it.unimi.dsi.fastutil.objects.Object2IntMap; +import org.apache.druid.java.util.common.IAE; +import org.apache.druid.java.util.common.RE; + +import javax.annotation.Nullable; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Stream; + +public interface ApplyFunction +{ + String name(); + + ExprEval apply(LambdaExpr lambdaExpr, List argsExpr, Expr.ObjectBinding bindings); + + Set getArrayInputs(List args); + + abstract class BaseMapFunction implements ApplyFunction + { + ExprEval applyMap(LambdaExpr expr, int length, IndexableMapLambdaObjectBinding bindings) + { + String[] stringsOut = null; + Long[] longsOut = null; + Double[] doublesOut = null; + + ExprType outputType = null; + Object out = null; + for (int i = 0; i < length; i++) { + + ExprEval evaluated = expr.eval(bindings.withIndex(i)); + if (outputType == null) { + outputType = evaluated.type(); + switch (outputType) { + case STRING: + stringsOut = new String[length]; + out = stringsOut; + break; + case LONG: + longsOut = new Long[length]; + out = longsOut; + break; + case DOUBLE: + doublesOut = new Double[length]; + out = doublesOut; + break; + default: + throw new RE("Unhandled map function output type [%s]", outputType); + } + } + + switch (outputType) { + case STRING: + stringsOut[i] = evaluated.asString(); + break; + case LONG: + longsOut[i] = evaluated.asLong(); + break; + case DOUBLE: + doublesOut[i] = evaluated.asDouble(); + break; + } + } + return ExprEval.bestEffortOf(out); + } + } + + class MapFunction extends BaseMapFunction + { + static final String NAME = "map"; + + @Override + public String name() + { + return NAME; + } + + @Override + public ExprEval apply(LambdaExpr lambdaExpr, List argsExpr, Expr.ObjectBinding bindings) + { + Preconditions.checkArgument(argsExpr.size() == 1); + Expr arrayExpr = argsExpr.get(0); + ExprEval arrayEval = arrayExpr.eval(bindings); + + Object[] array = arrayEval.asArray(); + if (array == null) { + return ExprEval.of(null); + } + + MapLambdaBinding lambdaBinding = new MapLambdaBinding(array, lambdaExpr, bindings); + return applyMap(lambdaExpr, array.length, lambdaBinding); + } + + @Override + public Set getArrayInputs(List args) + { + if (args.size() != 1) { + throw new IAE("ApplyFunction[%s] needs 1 argument", name()); + } + + return ImmutableSet.of(args.get(0)); + } + } + + class CartesianMapFunction extends BaseMapFunction + { + static final String NAME = "cartesian_map"; + + @Override + public String name() + { + return NAME; + } + + @Override + public ExprEval apply(LambdaExpr lambdaExpr, List argsExpr, Expr.ObjectBinding bindings) + { + List> arrayInputs = new ArrayList<>(); + boolean hadNull = false; + boolean hadEmpty = false; + for (Expr expr : argsExpr) { + ExprEval arrayEval = expr.eval(bindings); + Object[] array = arrayEval.asArray(); + if (array == null) { + hadNull = true; + continue; + } + if (array.length == 0) { + hadEmpty = true; + continue; + } + arrayInputs.add(Arrays.asList(array)); + } + if (hadNull) { + return ExprEval.of(null); + } + if (hadEmpty) { + return ExprEval.ofStringArray(new String[0]); + } + + List> product = CartesianList.create(arrayInputs); + CartesianMapLambdaBinding lambdaBinding = new CartesianMapLambdaBinding(product, lambdaExpr, bindings); + return applyMap(lambdaExpr, product.size(), lambdaBinding); + } + + @Override + public Set getArrayInputs(List args) + { + return ImmutableSet.copyOf(args); + } + } + + abstract class BaseFoldrFunction implements ApplyFunction + { + public ExprEval applyFoldr(LambdaExpr lambdaExpr, Object accumulator, int length, IndexableFoldLambdaBinding bindings) + { + for (int i = 0; i < length; i++) { + ExprEval evaluated = lambdaExpr.eval(bindings.accumulateWithIndex(i, accumulator)); + accumulator = evaluated.value(); + } + return ExprEval.bestEffortOf(accumulator); + } + } + + class FoldrFunction extends BaseFoldrFunction + { + static final String NAME = "foldr"; + + @Override + public String name() + { + return NAME; + } + + @Override + public ExprEval apply(LambdaExpr lambdaExpr, List argsExpr, Expr.ObjectBinding bindings) + { + Preconditions.checkArgument(argsExpr.size() == 2); + Expr arrayExpr = argsExpr.get(0); + Expr accExpr = argsExpr.get(1); + + ExprEval arrayEval = arrayExpr.eval(bindings); + ExprEval accEval = accExpr.eval(bindings); + + Object[] array = arrayEval.asArray(); + if (array == null) { + return ExprEval.of(null); + } + Object accumlator = accEval.value(); + + FoldLambdaBinding lambdaBinding = new FoldLambdaBinding(array, accumlator, lambdaExpr, bindings); + return applyFoldr(lambdaExpr, accumlator, array.length, lambdaBinding); + } + + @Override + public Set getArrayInputs(List args) + { + // accumulator argument cannot be inferred, so ignore it until think of something better to do + return ImmutableSet.of(args.get(0)); + } + } + + class CartesianFoldrFunction extends BaseFoldrFunction + { + static final String NAME = "cartesian_foldr"; + + @Override + public String name() + { + return NAME; + } + + @Override + public ExprEval apply(LambdaExpr lambdaExpr, List argsExpr, Expr.ObjectBinding bindings) + { + List> arrayInputs = new ArrayList<>(); + boolean hadNull = false; + boolean hadEmpty = false; + for (int i = 0; i < argsExpr.size() - 1; i++) { + Expr expr = argsExpr.get(i); + ExprEval arrayEval = expr.eval(bindings); + Object[] array = arrayEval.asArray(); + if (array == null) { + hadNull = true; + continue; + } + if (array.length == 0) { + hadEmpty = true; + continue; + } + arrayInputs.add(Arrays.asList(array)); + } + if (hadNull) { + return ExprEval.of(null); + } + if (hadEmpty) { + return ExprEval.ofStringArray(new String[0]); + } + Expr accExpr = argsExpr.get(argsExpr.size() - 1); + + List> product = CartesianList.create(arrayInputs); + + ExprEval accEval = accExpr.eval(bindings); + + Object accumlator = accEval.value(); + + CartesianFoldLambdaBinding lambdaBindings = + new CartesianFoldLambdaBinding(product, accumlator, lambdaExpr, bindings); + return applyFoldr(lambdaExpr, accumlator, product.size(), lambdaBindings); + } + + @Override + public Set getArrayInputs(List args) + { + return ImmutableSet.copyOf(args); + } + } + + class FilterFunction implements ApplyFunction + { + static final String NAME = "filter"; + + @Override + public String name() + { + return NAME; + } + + @Override + public ExprEval apply(LambdaExpr lambdaExpr, List argsExpr, Expr.ObjectBinding bindings) + { + Preconditions.checkArgument(argsExpr.size() == 1); + Expr arrayExpr = argsExpr.get(0); + ExprEval arrayEval = arrayExpr.eval(bindings); + + Object[] array = arrayEval.asArray(); + if (array == null) { + return ExprEval.of(null); + } + + SettableLambdaBinding lambdaBinding = new SettableLambdaBinding(lambdaExpr, bindings); + switch (arrayEval.type()) { + case STRING: + case STRING_ARRAY: + String[] filteredString = + this.filter(arrayEval.asStringArray(), lambdaExpr, lambdaBinding).toArray(String[]::new); + return ExprEval.ofStringArray(filteredString); + case LONG: + case LONG_ARRAY: + Long[] filteredLong = + this.filter(arrayEval.asLongArray(), lambdaExpr, lambdaBinding).toArray(Long[]::new); + return ExprEval.ofLongArray(filteredLong); + case DOUBLE: + case DOUBLE_ARRAY: + Double[] filteredDouble = + this.filter(arrayEval.asDoubleArray(), lambdaExpr, lambdaBinding).toArray(Double[]::new); + return ExprEval.ofDoubleArray(filteredDouble); + default: + throw new RE("Unhandled filter function input type [%s]", arrayEval.type()); + } + } + + @Override + public Set getArrayInputs(List args) + { + if (args.size() != 1) { + throw new IAE("ApplyFunction[%s] needs 1 argument", name()); + } + + return ImmutableSet.of(args.get(0)); + } + + private Stream filter(T[] array, LambdaExpr expr, SettableLambdaBinding binding) + { + return Arrays.stream(array).filter(s -> expr.eval(binding.withBinding(expr.getIdentifier(), s)).asBoolean()); + } + } + + abstract class MatchFunction implements ApplyFunction + { + @Override + public ExprEval apply(LambdaExpr lambdaExpr, List argsExpr, Expr.ObjectBinding bindings) + { + Preconditions.checkArgument(argsExpr.size() == 1); + Expr arrayExpr = argsExpr.get(0); + ExprEval arrayEval = arrayExpr.eval(bindings); + + final Object[] array = arrayEval.asArray(); + if (array == null) { + return ExprEval.bestEffortOf(false); + } + + SettableLambdaBinding lambdaBinding = new SettableLambdaBinding(lambdaExpr, bindings); + return match(array, lambdaExpr, lambdaBinding); + } + + @Override + public Set getArrayInputs(List args) + { + if (args.size() != 1) { + throw new IAE("ApplyFunction[%s] needs 1 argument", name()); + } + + return ImmutableSet.of(args.get(0)); + } + + public abstract ExprEval match(Object[] values, LambdaExpr expr, SettableLambdaBinding bindings); + } + + class AnyMatchFunction extends MatchFunction + { + static final String NAME = "any"; + + @Override + public String name() + { + return NAME; + } + + @Override + public ExprEval match(Object[] values, LambdaExpr expr, SettableLambdaBinding bindings) + { + boolean anyMatch = Arrays.stream(values) + .anyMatch(o -> expr.eval(bindings.withBinding(expr.getIdentifier(), o)).asBoolean()); + return ExprEval.bestEffortOf(anyMatch); + } + } + + class AllMatchFunction extends MatchFunction + { + static final String NAME = "all"; + + @Override + public String name() + { + return NAME; + } + + @Override + public ExprEval match(Object[] values, LambdaExpr expr, SettableLambdaBinding bindings) + { + boolean allMatch = Arrays.stream(values) + .allMatch(o -> expr.eval(bindings.withBinding(expr.getIdentifier(), o)).asBoolean()); + return ExprEval.bestEffortOf(allMatch); + } + } + + class SettableLambdaBinding implements Expr.ObjectBinding + { + private final Expr.ObjectBinding bindings; + private final Map lambdaBindings; + + SettableLambdaBinding(LambdaExpr expr, Expr.ObjectBinding bindings) + { + this.lambdaBindings = new HashMap<>(); + for (String lambdaIdentifier : expr.getIdentifiers()) { + lambdaBindings.put(lambdaIdentifier, null); + } + this.bindings = bindings; + } + + @Nullable + @Override + public Object get(String name) + { + if (lambdaBindings.containsKey(name)) { + return lambdaBindings.get(name); + } + return bindings.get(name); + } + + SettableLambdaBinding withBinding(String key, Object value) + { + this.lambdaBindings.put(key, value); + return this; + } + } + + interface IndexableMapLambdaObjectBinding extends Expr.ObjectBinding + { + IndexableMapLambdaObjectBinding withIndex(int index); + } + + class MapLambdaBinding implements IndexableMapLambdaObjectBinding + { + private final Expr.ObjectBinding bindings; + private final String lambdaIdentifier; + private final Object[] arrayValues; + private int index = 0; + + MapLambdaBinding(Object[] arrayValues, LambdaExpr expr, Expr.ObjectBinding bindings) + { + this.lambdaIdentifier = expr.getIdentifier(); + this.arrayValues = arrayValues; + this.bindings = bindings; + } + + @Nullable + @Override + public Object get(String name) + { + if (name.equals(lambdaIdentifier)) { + return arrayValues[index]; + } + return bindings.get(name); + } + + @Override + public MapLambdaBinding withIndex(int index) + { + this.index = index; + return this; + } + } + + class CartesianMapLambdaBinding implements IndexableMapLambdaObjectBinding + { + private final Expr.ObjectBinding bindings; + private final Object2IntMap lambdaIdentifiers; + private final List> lambdaInputs; + private int index = 0; + + CartesianMapLambdaBinding(List> inputs, LambdaExpr expr, Expr.ObjectBinding bindings) + { + this.lambdaInputs = inputs; + List ids = expr.getIdentifiers(); + this.lambdaIdentifiers = new Object2IntArrayMap<>(ids.size()); + for (int i = 0; i < ids.size(); i++) { + lambdaIdentifiers.put(ids.get(i), i); + } + + this.bindings = bindings; + } + + @Nullable + @Override + public Object get(String name) + { + if (lambdaIdentifiers.containsKey(name)) { + return lambdaInputs.get(index).get(lambdaIdentifiers.getInt(name)); + } + return bindings.get(name); + } + + @Override + public CartesianMapLambdaBinding withIndex(int index) + { + this.index = index; + return this; + } + } + + interface IndexableFoldLambdaBinding extends Expr.ObjectBinding + { + IndexableFoldLambdaBinding accumulateWithIndex(int index, Object accumulator); + } + + class FoldLambdaBinding implements IndexableFoldLambdaBinding + { + private final Expr.ObjectBinding bindings; + private final String elementIdentifier; + private final Object[] arrayValues; + private final String accumulatorIdentifier; + private Object accumulatorValue; + private int index; + + FoldLambdaBinding(Object[] arrayValues, Object initialAccumulator, LambdaExpr expr, Expr.ObjectBinding bindings) + { + List ids = expr.getIdentifiers(); + this.elementIdentifier = ids.get(0); + this.accumulatorIdentifier = ids.get(1); + this.arrayValues = arrayValues; + this.accumulatorValue = initialAccumulator; + this.bindings = bindings; + } + + @Nullable + @Override + public Object get(String name) + { + if (name.equals(elementIdentifier)) { + return arrayValues[index]; + } else if (name.equals(accumulatorIdentifier)) { + return accumulatorValue; + } + return bindings.get(name); + } + + @Override + public FoldLambdaBinding accumulateWithIndex(int index, Object acc) + { + this.index = index; + this.accumulatorValue = acc; + return this; + } + } + + class CartesianFoldLambdaBinding implements IndexableFoldLambdaBinding + { + private final Expr.ObjectBinding bindings; + private final Object2IntMap lambdaIdentifiers; + private final List> lambdaInputs; + private final String accumulatorIdentifier; + private Object accumulatorValue; + private int index = 0; + + CartesianFoldLambdaBinding(List> inputs, Object accumulatorValue, LambdaExpr expr, Expr.ObjectBinding bindings) + { + this.lambdaInputs = inputs; + List ids = expr.getIdentifiers(); + this.lambdaIdentifiers = new Object2IntArrayMap<>(ids.size()); + for (int i = 0; i < ids.size() - 1; i++) { + lambdaIdentifiers.put(ids.get(i), i); + } + this.accumulatorIdentifier = ids.get(ids.size() - 1); + this.bindings = bindings; + this.accumulatorValue = accumulatorValue; + } + + @Nullable + @Override + public Object get(String name) + { + if (lambdaIdentifiers.containsKey(name)) { + return lambdaInputs.get(index).get(lambdaIdentifiers.getInt(name)); + } else if (accumulatorIdentifier.equals(name)) { + return accumulatorValue; + } + return bindings.get(name); + } + + @Override + public CartesianFoldLambdaBinding accumulateWithIndex(int index, Object acc) + { + this.index = index; + this.accumulatorValue = acc; + return this; + } + } +} diff --git a/core/src/main/java/org/apache/druid/math/expr/CartesianList.java b/core/src/main/java/org/apache/druid/math/expr/CartesianList.java new file mode 100644 index 000000000000..d1373bd62a4b --- /dev/null +++ b/core/src/main/java/org/apache/druid/math/expr/CartesianList.java @@ -0,0 +1,135 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.math.expr; + +import com.google.common.base.Preconditions; +import com.google.common.math.IntMath; + +import javax.annotation.Nullable; +import java.util.AbstractList; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.ListIterator; +import java.util.RandomAccess; + +/** + * {@link CartesianList} computes the cartesian product of n lists. It is adapted from and is *nearly* identical to one + * Guava CartesianList which comes from a version from "the future" that we don't yet have, with the key difference that + * it is not {@link com.google.common.collect.ImmutableList} based, so it can hold null values to be compatible with the + * evaluation and handling of cartesian products of expression arrays with null elements, e.g. ['a', 'b', null] + */ + +public final class CartesianList extends AbstractList> implements RandomAccess +{ + private final transient List> axes; + private final transient int[] axesSizeProduct; + + public static List> create(List> lists) + { + List> axesBuilder = new ArrayList<>(lists.size()); + for (List list : lists) { + if (list.isEmpty()) { + return Collections.emptyList(); + } + axesBuilder.add(new ArrayList<>(list)); + } + return new CartesianList(axesBuilder); + } + + CartesianList(List> axes) + { + this.axes = axes; + int[] axesSizeProduct = new int[axes.size() + 1]; + axesSizeProduct[axes.size()] = 1; + try { + for (int i = axes.size() - 1; i >= 0; i--) { + axesSizeProduct[i] = IntMath.checkedMultiply(axesSizeProduct[i + 1], axes.get(i).size()); + } + } + catch (ArithmeticException e) { + throw new IllegalArgumentException( + "Cartesian product too large; must have size at most Integer.MAX_VALUE"); + } + this.axesSizeProduct = axesSizeProduct; + } + + private int getAxisIndexForProductIndex(int index, int axis) + { + return (index / axesSizeProduct[axis + 1]) % axes.get(axis).size(); + } + + @Override + public int indexOf(Object o) + { + if (!(o instanceof List)) { + return -1; + } + List list = (List) o; + if (list.size() != axes.size()) { + return -1; + } + ListIterator itr = list.listIterator(); + int computedIndex = 0; + while (itr.hasNext()) { + int axisIndex = itr.nextIndex(); + int elemIndex = axes.get(axisIndex).indexOf(itr.next()); + if (elemIndex == -1) { + return -1; + } + computedIndex += elemIndex * axesSizeProduct[axisIndex + 1]; + } + return computedIndex; + } + + @Override + public List get(final int index) + { + Preconditions.checkElementIndex(index, size()); + return new AbstractList() + { + @Override + public int size() + { + return axes.size(); + } + + @Override + public E get(int axis) + { + Preconditions.checkElementIndex(axis, size()); + int axisIndex = getAxisIndexForProductIndex(index, axis); + return axes.get(axis).get(axisIndex); + } + }; + } + + @Override + public int size() + { + return axesSizeProduct[0]; + } + + @Override + public boolean contains(@Nullable Object o) + { + return indexOf(o) != -1; + } +} diff --git a/core/src/main/java/org/apache/druid/math/expr/Expr.java b/core/src/main/java/org/apache/druid/math/expr/Expr.java index cce15b662c24..fb309b0e8ad6 100644 --- a/core/src/main/java/org/apache/druid/math/expr/Expr.java +++ b/core/src/main/java/org/apache/druid/math/expr/Expr.java @@ -29,10 +29,13 @@ import javax.annotation.Nonnull; import javax.annotation.Nullable; +import java.util.Arrays; import java.util.List; import java.util.Objects; +import java.util.stream.Collectors; /** + * Base interface of Druid expression language abstract syntax tree */ public interface Expr { @@ -42,6 +45,12 @@ default boolean isLiteral() return false; } + default boolean isArray() + { + // Overridden by things that are arrays. + return false; + } + /** * Returns the value of expr if expr is a literal, or throws an exception otherwise. * @@ -56,21 +65,61 @@ default Object getLiteralValue() throw new ISE("Not a literal"); } + /** + * Evaluate the {@link Expr} with the bindings which supply {@link IdentifierExpr} with their values, producing an + * {@link ExprEval} with the result. + */ @Nonnull ExprEval eval(ObjectBinding bindings); + /** + * Mechanism to supply values to back {@link IdentifierExpr} during expression evaluation + */ interface ObjectBinding { + /** + * Get value binding for string identifier of {@link IdentifierExpr} + */ @Nullable Object get(String name); } + /** + * Programmatically inspect the {@link Expr} tree with a {@link Visitor}. Each {@link Expr} is responsible for + * ensuring the {@link Visitor} can reach all of it's {@link Expr} children. + */ void visit(Visitor visitor); + /** + * Programatically rewrite the {@link Expr} tree with a {@link Shuttle}.Each {@link Expr} is responsible for + * ensuring the {@link Shuttle} can reach all of it's {@link Expr} children, as well as updating it's children + * {@link Expr} with the results from the {@link Shuttle}. + */ + Expr visit(Shuttle shuttle); + + /** + * Mechanism to inspect an {@link Expr}, implementing a {@link Visitor} allows visiting all children of an + * {@link Expr} + */ interface Visitor { + /** + * Provide the {@link Visitor} with an {@link Expr} to inspect + */ void visit(Expr expr); } + + /** + * Mechanism to rewrite an {@link Expr}, implementing a {@link Shuttle} allows visiting all children of an + * {@link Expr}, and replacing them as desired. + */ + interface Shuttle + { + /** + * Provide the {@link Shuttle} with an {@link Expr} to inspect and potentially rewrite. + */ + Expr visit(Expr expr); + } } abstract class ConstantExpr implements Expr @@ -86,6 +135,21 @@ public void visit(Visitor visitor) { visitor.visit(this); } + + @Override + public Expr visit(Shuttle shuttle) + { + return shuttle.visit(this); + } +} + +abstract class ConstantArrayExpr extends ConstantExpr +{ + @Override + public boolean isArray() + { + return true; + } } class LongExpr extends ConstantExpr @@ -118,6 +182,36 @@ public ExprEval eval(ObjectBinding bindings) } } +class LongArrayExpr extends ConstantArrayExpr +{ + private final Long[] value; + + public LongArrayExpr(Long[] value) + { + this.value = Preconditions.checkNotNull(value, "value"); + } + + @Nonnull + @Override + public Object getLiteralValue() + { + return value; + } + + @Override + public String toString() + { + return Arrays.stream(value).map(String::valueOf).collect(Collectors.joining(", ")); + } + + @Nonnull + @Override + public ExprEval eval(ObjectBinding bindings) + { + return ExprEval.ofLongArray(value); + } +} + class StringExpr extends ConstantExpr { private final String value; @@ -148,6 +242,36 @@ public ExprEval eval(ObjectBinding bindings) } } +class StringArrayExpr extends ConstantArrayExpr +{ + private final String[] value; + + public StringArrayExpr(String[] value) + { + this.value = Preconditions.checkNotNull(value, "value"); + } + + @Nonnull + @Override + public Object getLiteralValue() + { + return value; + } + + @Override + public String toString() + { + return String.join(", ", value); + } + + @Nonnull + @Override + public ExprEval eval(ObjectBinding bindings) + { + return ExprEval.ofStringArray(value); + } +} + class DoubleExpr extends ConstantExpr { private final Double value; @@ -178,6 +302,36 @@ public ExprEval eval(ObjectBinding bindings) } } +class DoubleArrayExpr extends ConstantArrayExpr +{ + private final Double[] value; + + public DoubleArrayExpr(Double[] value) + { + this.value = Preconditions.checkNotNull(value, "value"); + } + + @Nonnull + @Override + public Object getLiteralValue() + { + return value; + } + + @Override + public String toString() + { + return Arrays.stream(value).map(String::valueOf).collect(Collectors.joining(", ")); + } + + @Nonnull + @Override + public ExprEval eval(ObjectBinding bindings) + { + return ExprEval.ofDoubleArray(value); + } +} + class IdentifierExpr implements Expr { private final String value; @@ -205,6 +359,83 @@ public void visit(Visitor visitor) { visitor.visit(this); } + + @Override + public Expr visit(Shuttle shuttle) + { + return shuttle.visit(this); + } +} + +class LambdaExpr implements Expr +{ + private final List args; + private final Expr expr; + + public LambdaExpr(List args, Expr expr) + { + this.args = args; + this.expr = expr; + } + + @Override + public String toString() + { + return "(" + args + " " + expr + ")"; + } + + public String getIdentifier() + { + Preconditions.checkState(args.size() == 1, "LambdaExpr has no or multiple arguments"); + return args.get(0).toString(); + } + + public List getIdentifiers() + { + return args.stream().map(IdentifierExpr::toString).collect(Collectors.toList()); + } + + public List getIdentifierExprs() + { + return args; + } + + public Expr getExpr() + { + return expr; + } + + @Nonnull + @Override + public ExprEval eval(ObjectBinding bindings) + { + return expr.eval(bindings); + } + + @Override + public void visit(Visitor visitor) + { + // return free variables only + expr.visit( + _expr -> { + if (_expr instanceof IdentifierExpr) { + if (args.stream().noneMatch(x -> _expr.toString().equals(x.toString()))) { + visitor.visit(_expr); + } + } + } + ); + visitor.visit(this); + } + + @Override + public Expr visit(Shuttle shuttle) + { + List newArgs = + args.stream().map(arg -> (IdentifierExpr) shuttle.visit(arg)).collect(Collectors.toList()); + Expr newBody = expr.visit(shuttle); + return shuttle.visit(new LambdaExpr(newArgs, newBody)); + } } class FunctionExpr implements Expr @@ -241,6 +472,60 @@ public void visit(Visitor visitor) } visitor.visit(this); } + + @Override + public Expr visit(Shuttle shuttle) + { + List newArgs = args.stream().map(shuttle::visit).collect(Collectors.toList()); + return shuttle.visit(new FunctionExpr(function, name, newArgs)); + } +} + +class ApplyFunctionExpr implements Expr +{ + final ApplyFunction function; + final String name; + final LambdaExpr lambdaExpr; + final List argsExpr; + + public ApplyFunctionExpr(ApplyFunction function, String name, LambdaExpr expr, List args) + { + this.function = function; + this.name = name; + this.argsExpr = args; + this.lambdaExpr = expr; + } + + @Override + public String toString() + { + return "(" + name + " " + lambdaExpr + " " + argsExpr + ")"; + } + + @Nonnull + @Override + public ExprEval eval(ObjectBinding bindings) + { + return function.apply(lambdaExpr, argsExpr, bindings); + } + + @Override + public void visit(Visitor visitor) + { + lambdaExpr.visit(visitor); + for (Expr arg : argsExpr) { + arg.visit(visitor); + } + visitor.visit(this); + } + + @Override + public Expr visit(Shuttle shuttle) + { + LambdaExpr newLambda = (LambdaExpr) lambdaExpr.visit(shuttle); + List newArgs = argsExpr.stream().map(shuttle::visit).collect(Collectors.toList()); + return shuttle.visit(new ApplyFunctionExpr(function, name, newLambda, newArgs)); + } } abstract class UnaryExpr implements Expr @@ -252,12 +537,24 @@ abstract class UnaryExpr implements Expr this.expr = expr; } + abstract UnaryExpr copy(Expr expr); + @Override public void visit(Visitor visitor) { expr.visit(visitor); visitor.visit(this); } + + @Override + public Expr visit(Shuttle shuttle) + { + Expr newExpr = expr.visit(shuttle); + if (newExpr != expr) { + return shuttle.visit(copy(newExpr)); + } + return shuttle.visit(this); + } } class UnaryMinusExpr extends UnaryExpr @@ -267,6 +564,12 @@ class UnaryMinusExpr extends UnaryExpr super(expr); } + @Override + UnaryExpr copy(Expr expr) + { + return new UnaryMinusExpr(expr); + } + @Nonnull @Override public ExprEval eval(ObjectBinding bindings) @@ -284,13 +587,6 @@ public ExprEval eval(ObjectBinding bindings) throw new IAE("unsupported type " + ret.type()); } - @Override - public void visit(Visitor visitor) - { - expr.visit(visitor); - visitor.visit(this); - } - @Override public String toString() { @@ -305,6 +601,12 @@ class UnaryNotExpr extends UnaryExpr super(expr); } + @Override + UnaryExpr copy(Expr expr) + { + return new UnaryNotExpr(expr); + } + @Nonnull @Override public ExprEval eval(ObjectBinding bindings) @@ -330,8 +632,8 @@ public String toString() abstract class BinaryOpExprBase implements Expr { protected final String op; - protected final Expr left; - protected final Expr right; + protected Expr left; + protected Expr right; public BinaryOpExprBase(String op, Expr left, Expr right) { @@ -348,11 +650,25 @@ public void visit(Visitor visitor) visitor.visit(this); } + @Override + public Expr visit(Shuttle shuttle) + { + Expr newLeft = left.visit(shuttle); + Expr newRight = right.visit(shuttle); + if (left != newLeft || right != newRight) { + return shuttle.visit(copy(newLeft, newRight)); + } + return shuttle.visit(this); + } + @Override public String toString() { return "(" + op + " " + left + " " + right + ")"; } + + protected abstract BinaryOpExprBase copy(Expr left, Expr right); + } abstract class BinaryEvalOpExprBase extends BinaryOpExprBase @@ -375,6 +691,7 @@ public ExprEval eval(ObjectBinding bindings) return ExprEval.of(null); } + if (leftVal.type() == ExprType.STRING && rightVal.type() == ExprType.STRING) { return evalString(leftVal.asString(), rightVal.asString()); } else if (leftVal.type() == ExprType.LONG && rightVal.type() == ExprType.LONG) { @@ -407,6 +724,12 @@ class BinMinusExpr extends BinaryEvalOpExprBase super(op, left, right); } + @Override + protected BinaryOpExprBase copy(Expr left, Expr right) + { + return new BinMinusExpr(op, left, right); + } + @Override protected final long evalLong(long left, long right) { @@ -427,6 +750,12 @@ class BinPowExpr extends BinaryEvalOpExprBase super(op, left, right); } + @Override + protected BinaryOpExprBase copy(Expr left, Expr right) + { + return new BinPowExpr(op, left, right); + } + @Override protected final long evalLong(long left, long right) { @@ -447,6 +776,12 @@ class BinMulExpr extends BinaryEvalOpExprBase super(op, left, right); } + @Override + protected BinaryOpExprBase copy(Expr left, Expr right) + { + return new BinMulExpr(op, left, right); + } + @Override protected final long evalLong(long left, long right) { @@ -467,6 +802,12 @@ class BinDivExpr extends BinaryEvalOpExprBase super(op, left, right); } + @Override + protected BinaryOpExprBase copy(Expr left, Expr right) + { + return new BinDivExpr(op, left, right); + } + @Override protected final long evalLong(long left, long right) { @@ -487,6 +828,12 @@ class BinModuloExpr extends BinaryEvalOpExprBase super(op, left, right); } + @Override + protected BinaryOpExprBase copy(Expr left, Expr right) + { + return new BinModuloExpr(op, left, right); + } + @Override protected final long evalLong(long left, long right) { @@ -507,6 +854,12 @@ class BinPlusExpr extends BinaryEvalOpExprBase super(op, left, right); } + @Override + protected BinaryOpExprBase copy(Expr left, Expr right) + { + return new BinPlusExpr(op, left, right); + } + @Override protected ExprEval evalString(@Nullable String left, @Nullable String right) { @@ -534,6 +887,12 @@ class BinLtExpr extends BinaryEvalOpExprBase super(op, left, right); } + @Override + protected BinaryOpExprBase copy(Expr left, Expr right) + { + return new BinLtExpr(op, left, right); + } + @Override protected ExprEval evalString(@Nullable String left, @Nullable String right) { @@ -561,6 +920,12 @@ class BinLeqExpr extends BinaryEvalOpExprBase super(op, left, right); } + @Override + protected BinaryOpExprBase copy(Expr left, Expr right) + { + return new BinLeqExpr(op, left, right); + } + @Override protected ExprEval evalString(@Nullable String left, @Nullable String right) { @@ -588,6 +953,12 @@ class BinGtExpr extends BinaryEvalOpExprBase super(op, left, right); } + @Override + protected BinaryOpExprBase copy(Expr left, Expr right) + { + return new BinGtExpr(op, left, right); + } + @Override protected ExprEval evalString(@Nullable String left, @Nullable String right) { @@ -615,6 +986,12 @@ class BinGeqExpr extends BinaryEvalOpExprBase super(op, left, right); } + @Override + protected BinaryOpExprBase copy(Expr left, Expr right) + { + return new BinGeqExpr(op, left, right); + } + @Override protected ExprEval evalString(@Nullable String left, @Nullable String right) { @@ -642,6 +1019,12 @@ class BinEqExpr extends BinaryEvalOpExprBase super(op, left, right); } + @Override + protected BinaryOpExprBase copy(Expr left, Expr right) + { + return new BinEqExpr(op, left, right); + } + @Override protected ExprEval evalString(@Nullable String left, @Nullable String right) { @@ -668,6 +1051,12 @@ class BinNeqExpr extends BinaryEvalOpExprBase super(op, left, right); } + @Override + protected BinaryOpExprBase copy(Expr left, Expr right) + { + return new BinNeqExpr(op, left, right); + } + @Override protected ExprEval evalString(@Nullable String left, @Nullable String right) { @@ -694,6 +1083,12 @@ class BinAndExpr extends BinaryOpExprBase super(op, left, right); } + @Override + protected BinaryOpExprBase copy(Expr left, Expr right) + { + return new BinAndExpr(op, left, right); + } + @Nonnull @Override public ExprEval eval(ObjectBinding bindings) @@ -710,6 +1105,12 @@ class BinOrExpr extends BinaryOpExprBase super(op, left, right); } + @Override + protected BinaryOpExprBase copy(Expr left, Expr right) + { + return new BinOrExpr(op, left, right); + } + @Nonnull @Override public ExprEval eval(ObjectBinding bindings) diff --git a/core/src/main/java/org/apache/druid/math/expr/ExprEval.java b/core/src/main/java/org/apache/druid/math/expr/ExprEval.java index 4dad8100c952..a2cbdc756c39 100644 --- a/core/src/main/java/org/apache/druid/math/expr/ExprEval.java +++ b/core/src/main/java/org/apache/druid/math/expr/ExprEval.java @@ -25,15 +25,13 @@ import org.apache.druid.java.util.common.IAE; import javax.annotation.Nullable; +import java.util.Arrays; +import java.util.stream.Collectors; /** */ public abstract class ExprEval { - // Cached String values. Protected so they can be used by subclasses. - private boolean stringValueValid = false; - private String stringValue; - public static ExprEval ofLong(@Nullable Number longValue) { return new LongExprEval(longValue); @@ -62,6 +60,21 @@ public static ExprEval of(@Nullable String stringValue) return new StringExprEval(stringValue); } + public static ExprEval ofLongArray(@Nullable Long[] longValue) + { + return new LongArrayExprEval(longValue); + } + + public static ExprEval ofDoubleArray(@Nullable Double[] doubleValue) + { + return new DoubleArrayExprEval(doubleValue); + } + + public static ExprEval ofStringArray(@Nullable String[] stringValue) + { + return new StringArrayExprEval(stringValue); + } + public static ExprEval of(boolean value, ExprType type) { switch (type) { @@ -87,9 +100,26 @@ public static ExprEval bestEffortOf(@Nullable Object val) } return new LongExprEval((Number) val); } + if (val instanceof Long[]) { + return new LongArrayExprEval((Long[]) val); + } + if (val instanceof Double[]) { + return new DoubleArrayExprEval((Double[]) val); + } + if (val instanceof Float[]) { + return new DoubleArrayExprEval(Arrays.stream((Float[]) val).map(x -> x.doubleValue()).toArray(Double[]::new)); + } + if (val instanceof String[]) { + return new StringArrayExprEval((String[]) val); + } + return new StringExprEval(val == null ? null : String.valueOf(val)); } + // Cached String values. Protected so they can be used by subclasses. + private boolean stringValueValid = false; + private String stringValue; + @Nullable final T value; @@ -100,22 +130,11 @@ private ExprEval(@Nullable T value) public abstract ExprType type(); - public Object value() + public T value() { return value; } - /** - * returns true if numeric primitive value for this ExprEval is null, otherwise false. - */ - public abstract boolean isNumericNull(); - - public abstract int asInt(); - - public abstract long asLong(); - - public abstract double asDouble(); - @Nullable public String asString() { @@ -132,15 +151,39 @@ public String asString() return stringValue; } + /** + * returns true if numeric primitive value for this ExprEval is null, otherwise false. + */ + public abstract boolean isNumericNull(); + + public boolean isArray() + { + return false; + } + + public abstract int asInt(); + + public abstract long asLong(); + + public abstract double asDouble(); + public abstract boolean asBoolean(); + public abstract Object[] asArray(); + + public abstract String[] asStringArray(); + + public abstract Long[] asLongArray(); + + public abstract Double[] asDoubleArray(); + public abstract ExprEval castTo(ExprType castTo); public abstract Expr toExpr(); - private abstract static class NumericExprEval extends ExprEval + private abstract static class NumericExprEval extends ExprEval { - private NumericExprEval(@Nullable Number value) + private NumericExprEval(@Nullable T value) { super(value); } @@ -163,6 +206,27 @@ public final double asDouble() return value.doubleValue(); } + @Nullable + @Override + public String[] asStringArray() + { + return isNumericNull() ? null : new String[] {value.toString()}; + } + + @Nullable + @Override + public Long[] asLongArray() + { + return isNumericNull() ? null : new Long[] {value.longValue()}; + } + + @Nullable + @Override + public Double[] asDoubleArray() + { + return isNumericNull() ? null : new Double[] {value.doubleValue()}; + } + @Override public boolean isNumericNull() { @@ -170,11 +234,11 @@ public boolean isNumericNull() } } - private static class DoubleExprEval extends NumericExprEval + private static class DoubleExprEval extends NumericExprEval { private DoubleExprEval(@Nullable Number value) { - super(value == null ? NullHandling.defaultDoubleValue() : value); + super(value == null ? NullHandling.defaultDoubleValue() : value.doubleValue()); } @Override @@ -189,6 +253,19 @@ public final boolean asBoolean() return Evals.asBoolean(asDouble()); } + @Override + public Object[] asArray() + { + return asDoubleArray(); + } + + @Nullable + @Override + public Double[] asDoubleArray() + { + return isNumericNull() ? null : new Double[]{value}; + } + @Override public final ExprEval castTo(ExprType castTo) { @@ -203,6 +280,12 @@ public final ExprEval castTo(ExprType castTo) } case STRING: return ExprEval.of(asString()); + case DOUBLE_ARRAY: + return ExprEval.ofDoubleArray(asDoubleArray()); + case LONG_ARRAY: + return ExprEval.ofLongArray(asLongArray()); + case STRING_ARRAY: + return ExprEval.ofStringArray(asStringArray()); } throw new IAE("invalid type " + castTo); } @@ -214,11 +297,11 @@ public Expr toExpr() } } - private static class LongExprEval extends NumericExprEval + private static class LongExprEval extends NumericExprEval { private LongExprEval(@Nullable Number value) { - super(value == null ? NullHandling.defaultLongValue() : value); + super(value == null ? NullHandling.defaultLongValue() : value.longValue()); } @Override @@ -233,6 +316,19 @@ public final boolean asBoolean() return Evals.asBoolean(asLong()); } + @Override + public Object[] asArray() + { + return asLongArray(); + } + + @Nullable + @Override + public Long[] asLongArray() + { + return isNumericNull() ? null : new Long[]{value}; + } + @Override public final ExprEval castTo(ExprType castTo) { @@ -247,6 +343,12 @@ public final ExprEval castTo(ExprType castTo) return this; case STRING: return ExprEval.of(asString()); + case DOUBLE_ARRAY: + return ExprEval.ofDoubleArray(asDoubleArray()); + case LONG_ARRAY: + return ExprEval.ofLongArray(asLongArray()); + case STRING_ARRAY: + return ExprEval.ofStringArray(asStringArray()); } throw new IAE("invalid type " + castTo); } @@ -256,6 +358,7 @@ public Expr toExpr() { return new LongExpr(value.longValue()); } + } private static class StringExprEval extends ExprEval @@ -325,6 +428,13 @@ public String asString() return value; } + @Nullable + @Override + public Object[] asArray() + { + return asStringArray(); + } + private int computeInt() { Number number = computeNumber(); @@ -395,6 +505,27 @@ public final boolean asBoolean() return booleanValue; } + @Nullable + @Override + public String[] asStringArray() + { + return value == null ? null : new String[] {value}; + } + + @Nullable + @Override + public Long[] asLongArray() + { + return value == null ? null : new Long[] {computeLong()}; + } + + @Nullable + @Override + public Double[] asDoubleArray() + { + return value == null ? null : new Double[] {computeDouble()}; + } + @Override public final ExprEval castTo(ExprType castTo) { @@ -405,6 +536,12 @@ public final ExprEval castTo(ExprType castTo) return ExprEval.ofLong(computeNumber()); case STRING: return this; + case DOUBLE_ARRAY: + return ExprEval.ofDoubleArray(asDoubleArray()); + case LONG_ARRAY: + return ExprEval.ofLongArray(asLongArray()); + case STRING_ARRAY: + return ExprEval.ofStringArray(asStringArray()); } throw new IAE("invalid type " + castTo); } @@ -415,4 +552,274 @@ public Expr toExpr() return new StringExpr(value); } } + + abstract static class ArrayExprEval extends ExprEval + { + private ArrayExprEval(@Nullable T[] value) + { + super(value); + } + + @Override + public boolean isNumericNull() + { + return false; + } + + @Override + public boolean isArray() + { + return true; + } + + @Override + public int asInt() + { + return 0; + } + + @Override + public long asLong() + { + return 0; + } + + @Override + public double asDouble() + { + return 0; + } + + @Override + public boolean asBoolean() + { + return false; + } + + @Nullable + @Override + public T[] asArray() + { + return value; + } + + @Nullable + public T getIndex(int index) + { + return value == null ? null : value[index]; + } + } + + private static class LongArrayExprEval extends ArrayExprEval + { + private LongArrayExprEval(@Nullable Long[] value) + { + super(value); + } + + @Override + public ExprType type() + { + return ExprType.LONG_ARRAY; + } + + @Nullable + @Override + public String[] asStringArray() + { + return value == null ? null : Arrays.stream(value).map(String::valueOf).toArray(String[]::new); + } + + @Nullable + @Override + public Long[] asLongArray() + { + return value; + } + + @Nullable + @Override + public Double[] asDoubleArray() + { + return value == null ? null : Arrays.stream(value).map(Long::doubleValue).toArray(Double[]::new); + } + + @Override + public ExprEval castTo(ExprType castTo) + { + if (value == null) { + return StringExprEval.OF_NULL; + } + switch (castTo) { + case STRING: + return ExprEval.of(Arrays.stream(value).map(String::valueOf).collect(Collectors.joining(", "))); + case LONG_ARRAY: + return this; + case DOUBLE_ARRAY: + return ExprEval.ofDoubleArray(asDoubleArray()); + case STRING_ARRAY: + return ExprEval.ofStringArray(asStringArray()); + } + + throw new IAE("invalid type " + castTo); + } + + @Override + public Expr toExpr() + { + return new LongArrayExpr(value); + } + } + + private static class DoubleArrayExprEval extends ArrayExprEval + { + private DoubleArrayExprEval(@Nullable Double[] value) + { + super(value); + } + + @Override + public ExprType type() + { + return ExprType.DOUBLE_ARRAY; + } + + @Nullable + @Override + public String[] asStringArray() + { + return value == null ? null : Arrays.stream(value).map(String::valueOf).toArray(String[]::new); + } + + @Nullable + @Override + public Long[] asLongArray() + { + return value == null ? null : Arrays.stream(value).map(Double::longValue).toArray(Long[]::new); + } + + @Nullable + @Override + public Double[] asDoubleArray() + { + return value; + } + + @Override + public ExprEval castTo(ExprType castTo) + { + if (value == null) { + return StringExprEval.OF_NULL; + } + switch (castTo) { + case STRING: + return ExprEval.of(Arrays.stream(value).map(String::valueOf).collect(Collectors.joining(", "))); + case LONG_ARRAY: + return ExprEval.ofLongArray(asLongArray()); + case DOUBLE_ARRAY: + return this; + case STRING_ARRAY: + return ExprEval.ofStringArray(asStringArray()); + } + + throw new IAE("invalid type " + castTo); + } + + @Override + public Expr toExpr() + { + return new DoubleArrayExpr(value); + } + } + + private static class StringArrayExprEval extends ArrayExprEval + { + private boolean longValueValid = false; + private boolean doubleValueValid = false; + private Long[] longValues; + private Double[] doubleValues; + + private StringArrayExprEval(@Nullable String[] value) + { + super(value); + } + + @Override + public ExprType type() + { + return ExprType.STRING_ARRAY; + } + + @Nullable + @Override + public String[] asStringArray() + { + return value; + } + + @Nullable + @Override + public Long[] asLongArray() + { + if (!longValueValid) { + longValues = computeLongs(); + longValueValid = true; + } + return longValues; + } + + @Nullable + @Override + public Double[] asDoubleArray() + { + if (!doubleValueValid) { + doubleValues = computeDoubles(); + doubleValueValid = true; + } + return doubleValues; + } + + @Override + public ExprEval castTo(ExprType castTo) + { + if (value == null) { + return StringExprEval.OF_NULL; + } + switch (castTo) { + case STRING: + return ExprEval.of(Arrays.stream(value).map(String::valueOf).collect(Collectors.joining(", "))); + case STRING_ARRAY: + return this; + case LONG_ARRAY: + return ExprEval.ofLongArray(asLongArray()); + case DOUBLE_ARRAY: + return ExprEval.ofDoubleArray(asDoubleArray()); + } + throw new IAE("invalid type " + castTo); + } + + @Override + public Expr toExpr() + { + return new StringArrayExpr(value); + } + + @Nullable + private Long[] computeLongs() + { + if (value == null) { + return null; + } + return Arrays.stream(value).map(GuavaUtils::tryParseLong).toArray(Long[]::new); + } + + @Nullable + private Double[] computeDoubles() + { + if (value == null) { + return null; + } + return Arrays.stream(value).map(Doubles::tryParse).toArray(Double[]::new); + } + } } diff --git a/core/src/main/java/org/apache/druid/math/expr/ExprListenerImpl.java b/core/src/main/java/org/apache/druid/math/expr/ExprListenerImpl.java index 8b33224867d3..067f685c9a8f 100644 --- a/core/src/main/java/org/apache/druid/math/expr/ExprListenerImpl.java +++ b/core/src/main/java/org/apache/druid/math/expr/ExprListenerImpl.java @@ -69,6 +69,22 @@ public void exitUnaryOpExpr(ExprParser.UnaryOpExprContext ctx) } } + @Override + public void exitApplyFunctionExpr(ExprParser.ApplyFunctionExprContext ctx) + { + String fnName = ctx.getChild(0).getText(); + // Built-in functions. + final ApplyFunction function = Parser.getApplyFunction(fnName); + if (function == null) { + throw new RE("function '%s' is not defined.", fnName); + } + + nodes.put( + ctx, + new ApplyFunctionExpr(function, fnName, (LambdaExpr) nodes.get(ctx.lambda()), (List) nodes.get(ctx.fnArgs())) + ); + } + @Override public void exitDoubleExpr(ExprParser.DoubleExprContext ctx) { @@ -78,6 +94,16 @@ public void exitDoubleExpr(ExprParser.DoubleExprContext ctx) ); } + @Override + public void exitDoubleArray(ExprParser.DoubleArrayContext ctx) + { + Double[] values = new Double[ctx.DOUBLE().size()]; + for (int i = 0; i < values.length; i++) { + values[i] = Double.parseDouble(ctx.DOUBLE(i).getText()); + } + nodes.put(ctx, new DoubleArrayExpr(values)); + } + @Override public void exitAddSubExpr(ExprParser.AddSubExprContext ctx) { @@ -147,6 +173,16 @@ public void exitLogicalAndOrExpr(ExprParser.LogicalAndOrExprContext ctx) } } + @Override + public void exitLongArray(ExprParser.LongArrayContext ctx) + { + Long[] values = new Long[ctx.LONG().size()]; + for (int i = 0; i < values.length; i++) { + values[i] = Long.parseLong(ctx.LONG(i).getText()); + } + nodes.put(ctx, new LongArrayExpr(values)); + } + @Override public void exitNestedExpr(ExprParser.NestedExprContext ctx) { @@ -156,10 +192,7 @@ public void exitNestedExpr(ExprParser.NestedExprContext ctx) @Override public void exitString(ExprParser.StringContext ctx) { - String text = ctx.getText(); - String unquoted = text.substring(1, text.length() - 1); - String unescaped = unquoted.indexOf('\\') >= 0 ? StringEscapeUtils.unescapeJava(unquoted) : unquoted; - nodes.put(ctx, new StringExpr(unescaped)); + nodes.put(ctx, new StringExpr(escapeStringLiteral(ctx.getText()))); } @Override @@ -321,16 +354,28 @@ public void exitIdentifierExpr(ExprParser.IdentifierExprContext ctx) ); } + @Override + public void exitLambda(ExprParser.LambdaContext ctx) + { + List identifiers = new ArrayList<>(ctx.IDENTIFIER().size()); + for (int i = 0; i < ctx.IDENTIFIER().size(); i++) { + String text = ctx.IDENTIFIER(i).getText(); + if (text.charAt(0) == '"' && text.charAt(text.length() - 1) == '"') { + text = StringEscapeUtils.unescapeJava(text.substring(1, text.length() - 1)); + } + identifiers.add(i, new IdentifierExpr(text)); + } + + nodes.put(ctx, new LambdaExpr(identifiers, (Expr) nodes.get(ctx.expr()))); + } + @Override public void exitFunctionArgs(ExprParser.FunctionArgsContext ctx) { List args = new ArrayList<>(); - args.add((Expr) nodes.get(ctx.getChild(0))); - if (ctx.getChildCount() > 1) { - for (int i = 1; i <= ctx.getChildCount() / 2; i++) { - args.add((Expr) nodes.get(ctx.getChild(2 * i))); - } + for (ExprParser.ExprContext exprCtx : ctx.expr()) { + args.add((Expr) nodes.get(exprCtx)); } nodes.put(ctx, args); @@ -341,4 +386,20 @@ public void exitNull(ExprParser.NullContext ctx) { nodes.put(ctx, new StringExpr(null)); } + + @Override + public void exitStringArray(ExprParser.StringArrayContext ctx) + { + String[] values = new String[ctx.STRING().size()]; + for (int i = 0; i < values.length; i++) { + values[i] = escapeStringLiteral(ctx.STRING(i).getText()); + } + nodes.put(ctx, new StringArrayExpr(values)); + } + + private static String escapeStringLiteral(String text) + { + String unquoted = text.substring(1, text.length() - 1); + return unquoted.indexOf('\\') >= 0 ? StringEscapeUtils.unescapeJava(unquoted) : unquoted; + } } diff --git a/core/src/main/java/org/apache/druid/math/expr/ExprType.java b/core/src/main/java/org/apache/druid/math/expr/ExprType.java index 050cc6100008..a648e07156c1 100644 --- a/core/src/main/java/org/apache/druid/math/expr/ExprType.java +++ b/core/src/main/java/org/apache/druid/math/expr/ExprType.java @@ -23,5 +23,5 @@ */ public enum ExprType { - DOUBLE, LONG, STRING + DOUBLE, LONG, STRING, DOUBLE_ARRAY, LONG_ARRAY, STRING_ARRAY } diff --git a/core/src/main/java/org/apache/druid/math/expr/Function.java b/core/src/main/java/org/apache/druid/math/expr/Function.java index 31cdd8e6ca22..99fbcb17d4d3 100644 --- a/core/src/main/java/org/apache/druid/math/expr/Function.java +++ b/core/src/main/java/org/apache/druid/math/expr/Function.java @@ -19,16 +19,22 @@ package org.apache.druid.math.expr; +import com.google.common.collect.ImmutableSet; import org.apache.druid.common.config.NullHandling; import org.apache.druid.java.util.common.DateTimes; import org.apache.druid.java.util.common.IAE; +import org.apache.druid.java.util.common.RE; import org.apache.druid.java.util.common.StringUtils; import org.joda.time.DateTime; import org.joda.time.format.DateTimeFormat; import java.math.BigDecimal; import java.math.RoundingMode; +import java.util.Arrays; +import java.util.Collections; import java.util.List; +import java.util.Set; +import java.util.stream.Stream; /** * Do NOT remove "unused" members in this class. They are used by generated Antlr @@ -40,6 +46,11 @@ interface Function ExprEval apply(List args, Expr.ObjectBinding bindings); + default Set getScalarInputs(List args) + { + return ImmutableSet.copyOf(args); + } + abstract class SingleParam implements Function { @Override @@ -1382,7 +1393,7 @@ public ExprEval apply(List args, Expr.ObjectBinding bindings) if (args.size() != 3) { throw new IAE("Function[%s] needs 3 arguments", name()); } - + String base = args.get(0).eval(bindings).asString(); int len = args.get(1).eval(bindings).asInt(); String pad = args.get(2).eval(bindings).asString(); @@ -1424,4 +1435,346 @@ public ExprEval apply(List args, Expr.ObjectBinding bindings) } } + abstract class ArrayFunction implements Function + { + public Set getArrayInputs(List args) + { + if (args.size() != 1) { + throw new IAE("Function[%s] needs 1 argument", name()); + } + + return ImmutableSet.of(args.get(0)); + } + + @Override + public Set getScalarInputs(List args) + { + return Collections.emptySet(); + } + } + + class ArrayLengthFunction extends ArrayFunction + { + @Override + public String name() + { + return "array_length"; + } + + @Override + public ExprEval apply(List args, Expr.ObjectBinding bindings) + { + if (args.size() != 1) { + throw new IAE("Function[%s] needs 1 argument", name()); + } + + final ExprEval expr = args.get(0).eval(bindings); + final Object[] array = expr.asArray(); + if (array == null) { + return ExprEval.of(null); + } + + return ExprEval.ofLong(array.length); + } + } + + class ArrayOffsetFunction extends ArrayFunction + { + @Override + public String name() + { + return "array_offset"; + } + + @Override + public ExprEval apply(List args, Expr.ObjectBinding bindings) + { + if (args.size() != 2) { + throw new IAE("Function[%s] needs 2 argument", name()); + } + + final ExprEval expr = args.get(0).eval(bindings); + final Object[] array = expr.asArray(); + if (array == null) { + return ExprEval.of(null); + } + + final int position = args.get(1).eval(bindings).asInt(); + + if (array.length > position) { + return ExprEval.bestEffortOf(array[position]); + } + return ExprEval.of(null); + } + } + + class ArrayOrdinalFunction extends ArrayFunction + { + @Override + public String name() + { + return "array_ordinal"; + } + + @Override + public ExprEval apply(List args, Expr.ObjectBinding bindings) + { + if (args.size() != 2) { + throw new IAE("Function[%s] needs 2 argument", name()); + } + + final ExprEval expr = args.get(0).eval(bindings); + final Object[] array = expr.asArray(); + if (array == null) { + return ExprEval.of(null); + } + + final int position = args.get(1).eval(bindings).asInt() - 1; + + if (array.length > position) { + return ExprEval.bestEffortOf(array[position]); + } + return ExprEval.of(null); + } + } + + class ArrayContainsFunction extends ArrayFunction + { + @Override + public String name() + { + return "array_contains"; + } + + @Override + public ExprEval apply(List args, Expr.ObjectBinding bindings) + { + if (args.size() != 2) { + throw new IAE("Function[%s] needs 2 argument", name()); + } + + final ExprEval expr = args.get(0).eval(bindings); + final ExprEval toCheck = args.get(1).eval(bindings); + + final Object[] array1 = expr.asArray(); + final Object[] array2 = toCheck.asArray(); + + if (array1 == null || array2 == null) { + return ExprEval.of(null); + } + + return ExprEval.bestEffortOf(Arrays.asList(array1).containsAll(Arrays.asList(array2))); + } + } + + class ArrayOverlapFunction extends ArrayFunction + { + @Override + public String name() + { + return "array_overlap"; + } + + @Override + public ExprEval apply(List args, Expr.ObjectBinding bindings) + { + if (args.size() != 2) { + throw new IAE("Function[%s] needs 2 argument", name()); + } + + final ExprEval expr = args.get(0).eval(bindings); + final ExprEval toCheck = args.get(1).eval(bindings); + + final Object[] array1 = expr.asArray(); + final Object[] array2 = toCheck.asArray(); + + if (array1 == null || array2 == null) { + return ExprEval.of(null); + } + + List olst = Arrays.asList(array1); + List o2lst = Arrays.asList(array2); + boolean any = false; + for (Object check : olst) { + any |= o2lst.contains(check); + } + return ExprEval.bestEffortOf(any); + } + } + + class ArrayOffsetOfFunction extends ArrayFunction + { + @Override + public String name() + { + return "array_offset_of"; + } + + @Override + public ExprEval apply(List args, Expr.ObjectBinding bindings) + { + if (args.size() != 2) { + throw new IAE("Function[%s] needs 2 argument", name()); + } + + final ExprEval expr = args.get(0).eval(bindings); + final ExprEval toCheck = args.get(1).eval(bindings); + + final Object[] array = expr.asArray(); + if (array == null) { + return ExprEval.of(null); + } + switch (toCheck.type()) { + case STRING: + case LONG: + case DOUBLE: + int index = Arrays.asList(array).indexOf(toCheck.value()); + return ExprEval.bestEffortOf(index < 0 ? null : index); + default: + throw new IAE("Function[%s] argument must be a a scalar type", name()); + } + } + } + + class ArrayOrdinalOfFunction extends ArrayFunction + { + @Override + public String name() + { + return "array_ordinal_of"; + } + + @Override + public ExprEval apply(List args, Expr.ObjectBinding bindings) + { + if (args.size() != 2) { + throw new IAE("Function[%s] needs 2 argument", name()); + } + + final ExprEval expr = args.get(0).eval(bindings); + final ExprEval toCheck = args.get(1).eval(bindings); + final Object[] array = expr.asArray(); + if (array == null) { + return ExprEval.of(null); + } + + switch (toCheck.type()) { + case STRING: + case LONG: + case DOUBLE: + int index = Arrays.asList(array).indexOf(toCheck.value()); + return ExprEval.bestEffortOf(index < 0 ? null : index + 1); + default: + throw new IAE("Function[%s] argument must be a a scalar type", name()); + } + } + } + + class ArrayAppendFunction extends ArrayFunction + { + @Override + public String name() + { + return "array_append"; + } + + @Override + public ExprEval apply(List args, Expr.ObjectBinding bindings) + { + if (args.size() != 2) { + throw new IAE("Function[%s] needs 2 arguments", name()); + } + + final ExprEval lhs = args.get(0).eval(bindings); + final ExprEval rhs = args.get(1).eval(bindings); + + final Object[] array = lhs.asArray(); + + if (array == null) { + return ExprEval.of(null); + } + + switch (lhs.type()) { + case STRING: + case STRING_ARRAY: + return ExprEval.ofStringArray(this.append(lhs.asStringArray(), rhs.asString()).toArray(String[]::new)); + case LONG: + case LONG_ARRAY: + return ExprEval.ofLongArray(this.append(lhs.asLongArray(), rhs.asLong()).toArray(Long[]::new)); + case DOUBLE: + case DOUBLE_ARRAY: + return ExprEval.ofDoubleArray(this.append(lhs.asDoubleArray(), rhs.asDouble()).toArray(Double[]::new)); + } + throw new RuntimeException("impossible"); + } + + private Stream append(T[] array, T val) + { + List l = Arrays.asList(array); + l.add(val); + return l.stream(); + } + + @Override + public Set getArrayInputs(List args) + { + return ImmutableSet.copyOf(args); + } + } + + class ArrayConcatFunction extends ArrayFunction + { + @Override + public String name() + { + return "array_concat"; + } + + @Override + public ExprEval apply(List args, Expr.ObjectBinding bindings) + { + if (args.size() != 2) { + throw new IAE("Function[%s] needs 2 arguments", name()); + } + + final ExprEval lhs = args.get(0).eval(bindings); + final ExprEval rhs = args.get(1).eval(bindings); + + final Object[] array1 = lhs.asArray(); + final Object[] array2 = rhs.asArray(); + + if (array1 == null) { + return ExprEval.of(null); + } + if (array2 == null) { + return lhs; + } + + switch (lhs.type()) { + case STRING: + case STRING_ARRAY: + return ExprEval.ofStringArray(this.cat(lhs.asStringArray(), rhs.asStringArray()).toArray(String[]::new)); + case LONG: + case LONG_ARRAY: + return ExprEval.ofLongArray(this.cat(lhs.asLongArray(), rhs.asLongArray()).toArray(Long[]::new)); + case DOUBLE: + case DOUBLE_ARRAY: + return ExprEval.ofDoubleArray(this.cat(lhs.asDoubleArray(), rhs.asDoubleArray()).toArray(Double[]::new)); + } + throw new RE("Unable to concatenate unknown type %s", lhs.type()); + } + + private Stream cat(T[] array1, T[] array2) + { + List l = Arrays.asList(array1); + l.addAll(Arrays.asList(array2)); + return l.stream(); + } + + @Override + public Set getArrayInputs(List args) + { + return ImmutableSet.copyOf(args); + } + } } diff --git a/core/src/main/java/org/apache/druid/math/expr/Parser.java b/core/src/main/java/org/apache/druid/math/expr/Parser.java index 861fc3c2d92b..cc9d9e2a9aa1 100644 --- a/core/src/main/java/org/apache/druid/math/expr/Parser.java +++ b/core/src/main/java/org/apache/druid/math/expr/Parser.java @@ -21,13 +21,16 @@ import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Preconditions; import com.google.common.base.Supplier; +import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; import org.antlr.v4.runtime.ANTLRInputStream; import org.antlr.v4.runtime.CommonTokenStream; import org.antlr.v4.runtime.tree.ParseTree; import org.antlr.v4.runtime.tree.ParseTreeWalker; +import org.apache.druid.java.util.common.RE; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.java.util.common.logger.Logger; import org.apache.druid.math.expr.antlr.ExprLexer; @@ -35,16 +38,20 @@ import javax.annotation.Nullable; import java.lang.reflect.Modifier; +import java.util.ArrayList; +import java.util.Collections; import java.util.HashMap; import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Set; +import java.util.stream.Collectors; public class Parser { private static final Logger log = new Logger(Parser.class); private static final Map FUNCTIONS; + private static final Map APPLY_FUNCTIONS; static { Map functionMap = new HashMap<>(); @@ -60,6 +67,20 @@ public class Parser } } FUNCTIONS = ImmutableMap.copyOf(functionMap); + + Map applyFunctionMap = new HashMap<>(); + for (Class clazz : ApplyFunction.class.getClasses()) { + if (!Modifier.isAbstract(clazz.getModifiers()) && ApplyFunction.class.isAssignableFrom(clazz)) { + try { + ApplyFunction function = (ApplyFunction) clazz.newInstance(); + applyFunctionMap.put(StringUtils.toLowerCase(function.name()), function); + } + catch (Exception e) { + log.info("failed to instantiate " + clazz.getName() + ".. ignoring", e); + } + } + } + APPLY_FUNCTIONS = ImmutableMap.copyOf(applyFunctionMap); } public static Function getFunction(String name) @@ -67,6 +88,11 @@ public static Function getFunction(String name) return FUNCTIONS.get(StringUtils.toLowerCase(name)); } + public static ApplyFunction getApplyFunction(String name) + { + return APPLY_FUNCTIONS.get(StringUtils.toLowerCase(name)); + } + public static Expr parse(String in, ExprMacroTable macroTable) { return parse(in, macroTable, true); @@ -130,6 +156,151 @@ public static Expr flatten(Expr expr) return expr; } + public static Expr applyUnappliedIdentifiers(Expr expr, List unapplied) + { + Preconditions.checkArgument(unapplied.size() > 0); + + // special handle if expr is just array identifier or array is being directly cast, that doesn't count + String s = Parser.getIdentifierOrCastIdentifier(expr); + if (s != null) { + return expr; + } + + ApplyFunction fn; + final LambdaExpr lambdaExpr; + final List args; + + // any unapplied identifiers that are inside a lambda expression need that lambda expression to be rewritten + Expr newExpr = expr.visit( + new Expr.Shuttle() + { + @Override + public Expr visit(Expr expr) + { + if (expr instanceof ApplyFunctionExpr) { + return liftApplyLambda((ApplyFunctionExpr) expr, unapplied); + } + return expr; + } + } + ); + final Set expectedArrays = Parser.findArrayFnBindings(newExpr); + List remainingUnappliedArgs = + unapplied.stream().filter(x -> !expectedArrays.contains(x)).collect(Collectors.toList()); + + // if lifting the lambdas got rid of all missing bindings, return the transformed expression + if (remainingUnappliedArgs.size() == 0) { + return newExpr; + } + + // else, it *should be safe* to wrap in either map or cartesian_map because we still have missing bindings that + // were *not* referenced in a lambda body + if (remainingUnappliedArgs.size() == 1) { + fn = new ApplyFunction.MapFunction(); + IdentifierExpr lambdaArg = new IdentifierExpr(remainingUnappliedArgs.iterator().next()); + lambdaExpr = new LambdaExpr(ImmutableList.of(lambdaArg), expr); + args = ImmutableList.of(lambdaArg); + } else { + fn = new ApplyFunction.CartesianMapFunction(); + List identifiers = new ArrayList<>(remainingUnappliedArgs.size()); + args = new ArrayList<>(remainingUnappliedArgs.size()); + for (String remainingUnappliedArg : remainingUnappliedArgs) { + IdentifierExpr arg = new IdentifierExpr(remainingUnappliedArg); + identifiers.add(arg); + args.add(arg); + } + lambdaExpr = new LambdaExpr(identifiers, expr); + } + + Expr magic = new ApplyFunctionExpr(fn, fn.name(), lambdaExpr, args); + return magic; + } + + /** + * Performs partial lifting of free identifiers of the lambda expression of an {@link ApplyFunctionExpr}, constrained + * by a list of "unapplied" identifiers, and translating them into arguments of a new {@link LambdaExpr} and + * {@link ApplyFunctionExpr} as appropriate. + * + * The "unapplied" identifiers list is used to allow say only lifting array identifiers and adding it to the cartesian + * product to allow "magical" translation of multi-value string dimensions which are expressed as single value + * dimensions to function correctly and as expected. + */ + private static ApplyFunctionExpr liftApplyLambda(ApplyFunctionExpr expr, List unappliedArgs) + { + // this will _not_ include the lambda identifiers.. anything in this list needs to be applied + List unappliedLambdaBindings = Parser.findRequiredBindings(expr.lambdaExpr) + .stream() + .filter(unappliedArgs::contains) + .map(IdentifierExpr::new) + .collect(Collectors.toList()); + + if (unappliedLambdaBindings.size() == 0) { + return expr; + } + + final ApplyFunction newFn; + final ApplyFunctionExpr newExpr; + + final List newArgs = new ArrayList<>(expr.argsExpr); + newArgs.addAll(unappliedLambdaBindings); + + switch (expr.function.name()) { + case ApplyFunction.MapFunction.NAME: + case ApplyFunction.CartesianMapFunction.NAME: + // map(x -> x + y, x) => cartesian_map((x, y) -> x + y, x, y) + // cartesian_map((x, y) -> x + y + z, x, y) => cartesian_map((x, y, z) -> x + y + z, x, y, z) + final List lambdaIds = new ArrayList<>(expr.lambdaExpr.getIdentifiers().size() + unappliedArgs.size()); + lambdaIds.addAll(expr.lambdaExpr.getIdentifierExprs()); + lambdaIds.addAll(unappliedLambdaBindings); + final LambdaExpr newLambda = new LambdaExpr(lambdaIds, expr.lambdaExpr.getExpr()); + newFn = new ApplyFunction.CartesianMapFunction(); + newExpr = new ApplyFunctionExpr(newFn, newFn.name(), newLambda, newArgs); + break; + case ApplyFunction.AllMatchFunction.NAME: + case ApplyFunction.AnyMatchFunction.NAME: + case ApplyFunction.FilterFunction.NAME: + // i'm lazy and didn't add 'cartesian_filter', 'cartesian_any', and 'cartesian_and', so instead steal the match + // expressions lambda and translate it into a 'cartesian_map', and apply that to the match function with a new + // identity expression lambda since the input is an array of boolean expression results (or should be..) + // filter(x -> x > y, x) => filter(x -> x, cartesian_map((x,y) -> x > y, x, y)) + // any(x -> x > y, x) => any(x -> x, cartesian_map((x, y) -> x > y, x, y)) + // all(x -> x > y, x) => all(x -> x, cartesian_map((x, y) -> x > y, x, y)) + ApplyFunction newArrayFn = new ApplyFunction.CartesianMapFunction(); + IdentifierExpr identityExprIdentifier = new IdentifierExpr("_"); + LambdaExpr identityExpr = new LambdaExpr(ImmutableList.of(identityExprIdentifier), identityExprIdentifier); + ApplyFunctionExpr arrayExpr = new ApplyFunctionExpr(newArrayFn, newArrayFn.name(), identityExpr, newArgs); + newExpr = new ApplyFunctionExpr(expr.function, expr.function.name(), identityExpr, ImmutableList.of(arrayExpr)); + break; + case ApplyFunction.FoldrFunction.NAME: + case ApplyFunction.CartesianFoldrFunction.NAME: + // foldr((x, acc) -> acc + x + y, x, acc) => cartesian_foldr((x, y, acc) -> acc + x + y, x, y, acc) + // cartesian_foldr((x, y, acc) -> acc + x + y + z, x, y, acc) => cartesian_foldr((x, y, z, acc) -> acc + x + y + z, x, y, z, acc) + + final List newFoldArgs = new ArrayList<>(expr.argsExpr.size() + unappliedArgs.size()); + final List newFoldLambdaIdentifiers = new ArrayList<>(expr.lambdaExpr.getIdentifiers().size() + unappliedArgs.size()); + final List existingFoldLambdaIdentifiers = expr.lambdaExpr.getIdentifierExprs(); + // accumulator argument is last argument, slice it off when constructing new arg list and lambda args identifiers + for (int i = 0; i < expr.argsExpr.size() - 1; i++) { + newFoldArgs.add(expr.argsExpr.get(i)); + newFoldLambdaIdentifiers.add(existingFoldLambdaIdentifiers.get(i)); + } + newFoldArgs.addAll(unappliedLambdaBindings); + newFoldLambdaIdentifiers.addAll(unappliedLambdaBindings); + // add accumulator last + newFoldLambdaIdentifiers.add(existingFoldLambdaIdentifiers.get(existingFoldLambdaIdentifiers.size() - 1)); + newArgs.addAll(unappliedLambdaBindings); + final LambdaExpr newFoldLambda = new LambdaExpr(newFoldLambdaIdentifiers, expr.lambdaExpr.getExpr()); + + newFn = new ApplyFunction.CartesianFoldrFunction(); + newExpr = new ApplyFunctionExpr(newFn, newFn.name(), newFoldLambda, newFoldArgs); + break; + default: + throw new RE("Unable to transform apply function:[%s]", expr.function.name()); + } + + return newExpr; + } + public static List findRequiredBindings(Expr expr) { final Set found = new LinkedHashSet<>(); @@ -148,6 +319,48 @@ public void visit(Expr expr) return Lists.newArrayList(found); } + public static Set findArrayFnBindings(Expr expr) + { + final Set arrayFnBindings = new LinkedHashSet<>(); + expr.visit(new Expr.Visitor() + { + @Override + public void visit(Expr expr) + { + final Set arrayArgs; + if (expr instanceof FunctionExpr && ((FunctionExpr) expr).function instanceof Function.ArrayFunction) { + FunctionExpr fnExpr = (FunctionExpr) expr; + Function.ArrayFunction fn = (Function.ArrayFunction) fnExpr.function; + arrayArgs = fn.getArrayInputs(fnExpr.args); + } else if (expr instanceof ApplyFunctionExpr) { + ApplyFunctionExpr applyExpr = (ApplyFunctionExpr) expr; + arrayArgs = applyExpr.function.getArrayInputs(applyExpr.argsExpr); + } else { + arrayArgs = Collections.emptySet(); + } + for (Expr arg : arrayArgs) { + String s = getIdentifierOrCastIdentifier(arg); + if (s != null) { + arrayFnBindings.add(s); + } + } + } + }); + return arrayFnBindings; + } + + @Nullable + public static String getIdentifierOrCastIdentifier(Expr expr) + { + if (expr instanceof IdentifierExpr) { + return expr.toString(); + } else if (expr instanceof FunctionExpr && ((FunctionExpr) expr).function instanceof Function.CastFunc) { + FunctionExpr fn = (FunctionExpr) expr; + return getIdentifierOrCastIdentifier(fn.args.get(0)); + } + return null; + } + @Nullable public static String getIdentifierIfIdentifier(Expr expr) { diff --git a/core/src/test/java/org/apache/druid/math/expr/ApplyFunctionTest.java b/core/src/test/java/org/apache/druid/math/expr/ApplyFunctionTest.java new file mode 100644 index 000000000000..0f02eaf6ca14 --- /dev/null +++ b/core/src/test/java/org/apache/druid/math/expr/ApplyFunctionTest.java @@ -0,0 +1,146 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.math.expr; + +import com.google.common.collect.ImmutableMap; +import org.apache.druid.common.config.NullHandling; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +public class ApplyFunctionTest +{ + private Expr.ObjectBinding bindings; + + @Before + public void setup() + { + ImmutableMap.Builder builder = ImmutableMap.builder(); + builder.put("x", "foo"); + builder.put("y", 2); + builder.put("z", 3.1); + builder.put("a", new String[] {"foo", "bar", "baz", "foobar"}); + builder.put("b", new Long[] {1L, 2L, 3L, 4L, 5L}); + builder.put("c", new Double[] {3.1, 4.2, 5.3}); + builder.put("d", new String[] {null}); + builder.put("e", new String[] {null, "foo", "bar"}); + builder.put("f", new String[0]); + bindings = Parser.withMap(builder.build()); + } + + @Test + public void testMap() + { + assertExpr("map((x) -> concat(x, 'foo'), ['foo', 'bar', 'baz', 'foobar'])", new String[] {"foofoo", "barfoo", "bazfoo", "foobarfoo"}); + assertExpr("map((x) -> concat(x, 'foo'), a)", new String[] {"foofoo", "barfoo", "bazfoo", "foobarfoo"}); + + assertExpr("map((x) -> x + 1, [1, 2, 3, 4, 5])", new Long[] {2L, 3L, 4L, 5L, 6L}); + assertExpr("map((x) -> x + 1, b)", new Long[] {2L, 3L, 4L, 5L, 6L}); + + assertExpr("map((c) -> c + z, [3.1, 4.2, 5.3])", new Double[]{6.2, 7.3, 8.4}); + assertExpr("map((c) -> c + z, c)", new Double[]{6.2, 7.3, 8.4}); + + assertExpr("map((x) -> x + 1, map((x) -> x + 1, [1, 2, 3, 4, 5]))", new Long[] {3L, 4L, 5L, 6L, 7L}); + assertExpr("map((x) -> x + 1, map((x) -> x + 1, b))", new Long[] {3L, 4L, 5L, 6L, 7L}); + } + + @Test + public void testCartesianMap() + { + assertExpr("cartesian_map((x, y) -> concat(x, y), ['foo', 'bar', 'baz', 'foobar'], ['bar', 'baz'])", new String[] {"foobar", "foobaz", "barbar", "barbaz", "bazbar", "bazbaz", "foobarbar", "foobarbaz"}); + assertExpr("cartesian_map((x, y, z) -> concat(concat(x, y), z), ['foo', 'bar', 'baz', 'foobar'], ['bar', 'baz'], ['omg'])", new String[] {"foobaromg", "foobazomg", "barbaromg", "barbazomg", "bazbaromg", "bazbazomg", "foobarbaromg", "foobarbazomg"}); + + assertExpr("cartesian_map((x, y) -> concat(x, y), d, d)", new String[] {null}); + assertExpr("cartesian_map((x, y) -> concat(x, y), d, f)", new String[0]); + if (NullHandling.replaceWithDefault()) { + assertExpr("cartesian_map((x, y) -> concat(x, y), d, e)", new String[]{null, "foo", "bar"}); + assertExpr("cartesian_map((x, y) -> concat(x, y), e, e)", new String[] {null, "foo", "bar", "foo", "foofoo", "foobar", "bar", "barfoo", "barbar"}); + } else { + assertExpr("cartesian_map((x, y) -> concat(x, y), d, e)", new String[]{null, null, null}); + assertExpr("cartesian_map((x, y) -> concat(x, y), e, e)", new String[] {null, null, null, null, "foofoo", "foobar", null, "barfoo", "barbar"}); + } + } + + @Test + public void testFilter() + { + assertExpr("filter((x) -> strlen(x) > 3, ['foo', 'bar', 'baz', 'foobar'])", new String[] {"foobar"}); + assertExpr("filter((x) -> strlen(x) > 3, a)", new String[] {"foobar"}); + + assertExpr("filter((x) -> x > 2, [1, 2, 3, 4, 5])", new Long[] {3L, 4L, 5L}); + assertExpr("filter((x) -> x > 2, b)", new Long[] {3L, 4L, 5L}); + } + + @Test + public void testFoldr() + { + assertExpr("foldr((x, y) -> x + y, [1, 1, 1, 1, 1], 0)", 5L); + assertExpr("foldr((b, acc) -> b * acc, map((b) -> b * 2, filter(b -> b > 3, b)), 1)", 80L); + } + + @Test + public void testCartesianFoldr() + { + assertExpr("cartesian_foldr((x, y, acc) -> x + y + acc, [1, 1, 1, 1, 1], [1, 1], 0)", 20L); + } + + @Test + public void testAnyMatch() + { + assertExpr("any(x -> x > 3, [1, 2, 3, 4])", "true"); + assertExpr("any(x -> x > 3, [1, 2, 3])", "false"); + assertExpr("any(x -> x, map(x -> x > 3, [1, 2, 3, 4]))", "true"); + assertExpr("any(x -> x, map(x -> x > 3, [1, 2, 3]))", "false"); + } + + @Test + public void testAllMatch() + { + assertExpr("all(x -> x > 0, [1 2 3 4])", "true"); + assertExpr("all(x -> x > 1, [1 2 3 4])", "false"); + assertExpr("all(x -> x, map(x -> x > 0, [1 2 3 4]))", "true"); + assertExpr("all(x -> x, map(x -> x > 1, [1 2 3 4]))", "false"); + } + + private void assertExpr(final String expression, final Object expectedResult) + { + final Expr expr = Parser.parse(expression, ExprMacroTable.nil()); + Assert.assertEquals(expression, expectedResult, expr.eval(bindings).value()); + } + + private void assertExpr(final String expression, final Object[] expectedResult) + { + final Expr expr = Parser.parse(expression, ExprMacroTable.nil()); + final Object[] result = expr.eval(bindings).asArray(); + if (expectedResult.length != 0 || result == null || result.length != 0) { + Assert.assertArrayEquals(expression, expectedResult, result); + } + } + + private void assertExpr(final String expression, final Double[] expectedResult) + { + final Expr expr = Parser.parse(expression, ExprMacroTable.nil()); + Double[] result = (Double[]) expr.eval(bindings).value(); + Assert.assertEquals(expectedResult.length, result.length); + for (int i = 0; i < result.length; i++) { + Assert.assertEquals(expression, expectedResult[i], result[i], 0.00001); // something is lame somewhere.. + } + } +} diff --git a/core/src/test/java/org/apache/druid/math/expr/FunctionTest.java b/core/src/test/java/org/apache/druid/math/expr/FunctionTest.java index 2a5bfc95cc07..793028c718c8 100644 --- a/core/src/test/java/org/apache/druid/math/expr/FunctionTest.java +++ b/core/src/test/java/org/apache/druid/math/expr/FunctionTest.java @@ -22,17 +22,25 @@ import com.google.common.collect.ImmutableMap; import org.apache.druid.common.config.NullHandling; import org.junit.Assert; +import org.junit.Before; import org.junit.Test; public class FunctionTest { - private final Expr.ObjectBinding bindings = Parser.withMap( - ImmutableMap.of( - "x", "foo", - "y", 2, - "z", 3.1 - ) - ); + private Expr.ObjectBinding bindings; + + @Before + public void setup() + { + ImmutableMap.Builder builder = ImmutableMap.builder(); + builder.put("x", "foo"); + builder.put("y", 2); + builder.put("z", 3.1); + builder.put("a", new String[] {"foo", "bar", "baz", "foobar"}); + builder.put("b", new Long[] {1L, 2L, 3L, 4L, 5L}); + builder.put("c", new Double[] {3.1, 4.2, 5.3}); + bindings = Parser.withMap(builder.build()); + } @Test public void testCaseSimple() @@ -115,12 +123,6 @@ public void testUpper() assertExpr("upper(x)", "FOO"); } - private void assertExpr(final String expression, final Object expectedResult) - { - final Expr expr = Parser.parse(expression, ExprMacroTable.nil()); - Assert.assertEquals(expression, expectedResult, expr.eval(bindings).value()); - } - @Test public void testIsNull() { @@ -156,4 +158,50 @@ public void testRpad() assertExpr("rpad(x, 5, null)", null); assertExpr("rpad(null, 5, x)", null); } + + @Test + public void testArrayLength() + { + assertExpr("array_length([1, 2, 3])", 3L); + assertExpr("array_length(a)", 4L); + } + + @Test + public void testArrayOffset() + { + assertExpr("array_offset([1, 2, 3], 2)", 3L); + assertExpr("array_offset([1, 2, 3], 3)", null); + assertExpr("array_offset(a, 2)", "baz"); + } + + @Test + public void testArrayOrdinal() + { + assertExpr("array_ordinal([1, 2, 3], 3)", 3L); + assertExpr("array_ordinal([1, 2, 3], 4)", null); + assertExpr("array_ordinal(a, 3)", "baz"); + } + + @Test + public void testArrayContains() + { + assertExpr("array_contains([1 2 3], 2)", "true"); + assertExpr("array_contains([1 2 3], 4)", "false"); + assertExpr("array_contains([1 2 3], [2 3])", "true"); + assertExpr("array_contains([1 2 3], [3 4])", "false"); + assertExpr("array_contains(b, [3 4])", "true"); + } + + @Test + public void testArrayOverlap() + { + assertExpr("array_overlap([1 2 3], [2 4 6])", "true"); + assertExpr("array_overlap([1 2 3], [4 5 6])", "false"); + } + + private void assertExpr(final String expression, final Object expectedResult) + { + final Expr expr = Parser.parse(expression, ExprMacroTable.nil()); + Assert.assertEquals(expression, expectedResult, expr.eval(bindings).value()); + } } diff --git a/core/src/test/java/org/apache/druid/math/expr/ParserTest.java b/core/src/test/java/org/apache/druid/math/expr/ParserTest.java index c108d8b1053d..20b35d7f181d 100644 --- a/core/src/test/java/org/apache/druid/math/expr/ParserTest.java +++ b/core/src/test/java/org/apache/druid/math/expr/ParserTest.java @@ -24,6 +24,7 @@ import org.junit.Assert; import org.junit.Test; +import javax.annotation.Nullable; import java.util.List; /** @@ -179,6 +180,34 @@ public void testLiterals() validateConstantExpression("\'f\\u000Ao \\'b\\\\\\\"ar\'", "f\no 'b\\\"ar"); } + @Test + public void testLiteralArrays() + { + validateConstantExpression("[1.0, 2.345]", new Double[] {1.0, 2.345}); + validateConstantExpression("[1.0 2.345]", new Double[] {1.0, 2.345}); + validateConstantExpression("[1, 3]", new Long[] {1L, 3L}); + validateConstantExpression("[1 3]", new Long[] {1L, 3L}); + validateConstantExpression("[\'hello\', \'world\']", new String[] {"hello", "world"}); + validateConstantExpression("[\'hello\' \'world\']", new String[] {"hello", "world"}); + } + + @Test + public void testApplyFunctions() + { + final Expr parsed = Parser.parse("map((x) -> x + 1, [1, 2, 3])", ExprMacroTable.nil()); + Expr.ObjectBinding binding = new Expr.ObjectBinding() + { + @Nullable + @Override + public Object get(String name) + { + return null; + } + }; + ExprEval eval = parsed.eval(binding); + Assert.assertArrayEquals(new Long[]{2L, 3L, 4L}, (Long[]) eval.value()); + } + @Test public void testFunctions() { @@ -207,4 +236,13 @@ private void validateConstantExpression(String expression, Object expected) Parser.parse(expression, ExprMacroTable.nil()).eval(Parser.withMap(ImmutableMap.of())).value() ); } + + private void validateConstantExpression(String expression, Object[] expected) + { + Assert.assertArrayEquals( + expression, + expected, + (Object[]) Parser.parse(expression, ExprMacroTable.nil()).eval(Parser.withMap(ImmutableMap.of())).value() + ); + } } diff --git a/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/query/expressions/BloomFilterExprMacro.java b/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/query/expressions/BloomFilterExprMacro.java index ec6007a37e5c..c9f561a94251 100644 --- a/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/query/expressions/BloomFilterExprMacro.java +++ b/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/query/expressions/BloomFilterExprMacro.java @@ -69,6 +69,13 @@ public Expr apply(List args) class BloomExpr implements Expr { + private final Expr arg; + + private BloomExpr(Expr arg) + { + this.arg = arg; + } + @Nonnull @Override public ExprEval eval(final ObjectBinding bindings) @@ -117,8 +124,15 @@ public void visit(final Visitor visitor) arg.visit(visitor); visitor.visit(this); } + + @Override + public Expr visit(Shuttle shuttle) + { + Expr newArg = arg.visit(shuttle); + return shuttle.visit(new BloomExpr(newArg)); + } } - return new BloomExpr(); + return new BloomExpr(arg); } } diff --git a/processing/src/main/java/org/apache/druid/query/expression/LikeExprMacro.java b/processing/src/main/java/org/apache/druid/query/expression/LikeExprMacro.java index f6329100a1f2..54d9264d04bc 100644 --- a/processing/src/main/java/org/apache/druid/query/expression/LikeExprMacro.java +++ b/processing/src/main/java/org/apache/druid/query/expression/LikeExprMacro.java @@ -69,6 +69,13 @@ public Expr apply(final List args) class LikeExtractExpr implements Expr { + private final Expr arg; + + private LikeExtractExpr(Expr arg) + { + this.arg = arg; + } + @Nonnull @Override public ExprEval eval(final ObjectBinding bindings) @@ -82,8 +89,15 @@ public void visit(final Visitor visitor) arg.visit(visitor); visitor.visit(this); } + + @Override + public Expr visit(Shuttle shuttle) + { + Expr newArg = arg.visit(shuttle); + return shuttle.visit(new LikeExtractExpr(newArg)); + } } - return new LikeExtractExpr(); + return new LikeExtractExpr(arg); } } diff --git a/processing/src/main/java/org/apache/druid/query/expression/LookupExprMacro.java b/processing/src/main/java/org/apache/druid/query/expression/LookupExprMacro.java index 3a5e40ee53a6..42990542972c 100644 --- a/processing/src/main/java/org/apache/druid/query/expression/LookupExprMacro.java +++ b/processing/src/main/java/org/apache/druid/query/expression/LookupExprMacro.java @@ -73,6 +73,13 @@ public Expr apply(final List args) class LookupExpr implements Expr { + private final Expr arg; + + private LookupExpr(Expr arg) + { + this.arg = arg; + } + @Nonnull @Override public ExprEval eval(final ObjectBinding bindings) @@ -86,8 +93,15 @@ public void visit(final Visitor visitor) arg.visit(visitor); visitor.visit(this); } + + @Override + public Expr visit(Shuttle shuttle) + { + Expr newArg = arg.visit(shuttle); + return shuttle.visit(new LookupExpr(newArg)); + } } - return new LookupExpr(); + return new LookupExpr(arg); } } diff --git a/processing/src/main/java/org/apache/druid/query/expression/RegexpExtractExprMacro.java b/processing/src/main/java/org/apache/druid/query/expression/RegexpExtractExprMacro.java index 305c03e4ed51..82190a4aab4b 100644 --- a/processing/src/main/java/org/apache/druid/query/expression/RegexpExtractExprMacro.java +++ b/processing/src/main/java/org/apache/druid/query/expression/RegexpExtractExprMacro.java @@ -59,6 +59,13 @@ public Expr apply(final List args) final int index = indexExpr == null ? 0 : ((Number) indexExpr.getLiteralValue()).intValue(); class RegexpExtractExpr implements Expr { + private final Expr arg; + + private RegexpExtractExpr(Expr arg) + { + this.arg = arg; + } + @Nonnull @Override public ExprEval eval(final ObjectBinding bindings) @@ -75,7 +82,14 @@ public void visit(final Visitor visitor) arg.visit(visitor); visitor.visit(this); } + + @Override + public Expr visit(Shuttle shuttle) + { + Expr newArg = arg.visit(shuttle); + return shuttle.visit(new RegexpExtractExpr(newArg)); + } } - return new RegexpExtractExpr(); + return new RegexpExtractExpr(arg); } } diff --git a/processing/src/main/java/org/apache/druid/query/expression/TimestampCeilExprMacro.java b/processing/src/main/java/org/apache/druid/query/expression/TimestampCeilExprMacro.java index a775c69b7969..3dd9e7372eef 100644 --- a/processing/src/main/java/org/apache/druid/query/expression/TimestampCeilExprMacro.java +++ b/processing/src/main/java/org/apache/druid/query/expression/TimestampCeilExprMacro.java @@ -19,6 +19,7 @@ package org.apache.druid.query.expression; +import com.google.common.collect.ImmutableList; import org.apache.druid.java.util.common.DateTimes; import org.apache.druid.java.util.common.IAE; import org.apache.druid.java.util.common.granularity.Granularity; @@ -29,6 +30,7 @@ import javax.annotation.Nonnull; import java.util.List; +import java.util.stream.Collectors; public class TimestampCeilExprMacro implements ExprMacroTable.ExprMacro { @@ -81,6 +83,13 @@ public void visit(final Visitor visitor) arg.visit(visitor); visitor.visit(this); } + + @Override + public Expr visit(Shuttle shuttle) + { + Expr newArg = arg.visit(shuttle); + return shuttle.visit(new TimestampCeilExpr(ImmutableList.of(newArg))); + } } private static PeriodGranularity getGranularity(final List args, final Expr.ObjectBinding bindings) @@ -118,5 +127,12 @@ public void visit(final Visitor visitor) } visitor.visit(this); } + + @Override + public Expr visit(Shuttle shuttle) + { + List newArgs = args.stream().map(x -> x.visit(shuttle)).collect(Collectors.toList()); + return shuttle.visit(new TimestampCeilDynamicExpr(newArgs)); + } } } diff --git a/processing/src/main/java/org/apache/druid/query/expression/TimestampExtractExprMacro.java b/processing/src/main/java/org/apache/druid/query/expression/TimestampExtractExprMacro.java index 3f1f6836bdc9..0b2991aa5d79 100644 --- a/processing/src/main/java/org/apache/druid/query/expression/TimestampExtractExprMacro.java +++ b/processing/src/main/java/org/apache/druid/query/expression/TimestampExtractExprMacro.java @@ -84,6 +84,13 @@ public Expr apply(final List args) class TimestampExtractExpr implements Expr { + private final Expr arg; + + private TimestampExtractExpr(Expr arg) + { + this.arg = arg; + } + @Nonnull @Override public ExprEval eval(final ObjectBinding bindings) @@ -128,8 +135,15 @@ public void visit(final Visitor visitor) arg.visit(visitor); visitor.visit(this); } + + @Override + public Expr visit(Shuttle shuttle) + { + Expr newArg = arg.visit(shuttle); + return shuttle.visit(new TimestampExtractExpr(newArg)); + } } - return new TimestampExtractExpr(); + return new TimestampExtractExpr(arg); } } diff --git a/processing/src/main/java/org/apache/druid/query/expression/TimestampFloorExprMacro.java b/processing/src/main/java/org/apache/druid/query/expression/TimestampFloorExprMacro.java index ae0f16b2151a..a828d038b3c9 100644 --- a/processing/src/main/java/org/apache/druid/query/expression/TimestampFloorExprMacro.java +++ b/processing/src/main/java/org/apache/druid/query/expression/TimestampFloorExprMacro.java @@ -19,6 +19,7 @@ package org.apache.druid.query.expression; +import com.google.common.collect.ImmutableList; import org.apache.druid.java.util.common.DateTimes; import org.apache.druid.java.util.common.IAE; import org.apache.druid.java.util.common.granularity.PeriodGranularity; @@ -28,6 +29,7 @@ import javax.annotation.Nonnull; import java.util.List; +import java.util.stream.Collectors; public class TimestampFloorExprMacro implements ExprMacroTable.ExprMacro { @@ -106,6 +108,14 @@ public void visit(final Visitor visitor) arg.visit(visitor); visitor.visit(this); } + + + @Override + public Expr visit(Shuttle shuttle) + { + Expr newArg = arg.visit(shuttle); + return shuttle.visit(new TimestampFloorExpr(ImmutableList.of(newArg))); + } } public static class TimestampFloorDynamicExpr implements Expr @@ -133,5 +143,12 @@ public void visit(final Visitor visitor) } visitor.visit(this); } + + @Override + public Expr visit(Shuttle shuttle) + { + List newArgs = args.stream().map(x -> x.visit(shuttle)).collect(Collectors.toList()); + return shuttle.visit(new TimestampFloorDynamicExpr(newArgs)); + } } } diff --git a/processing/src/main/java/org/apache/druid/query/expression/TimestampFormatExprMacro.java b/processing/src/main/java/org/apache/druid/query/expression/TimestampFormatExprMacro.java index 196b61ea42c3..b2142a91192a 100644 --- a/processing/src/main/java/org/apache/druid/query/expression/TimestampFormatExprMacro.java +++ b/processing/src/main/java/org/apache/druid/query/expression/TimestampFormatExprMacro.java @@ -70,6 +70,13 @@ public Expr apply(final List args) class TimestampFormatExpr implements Expr { + private final Expr arg; + + private TimestampFormatExpr(Expr arg) + { + this.arg = arg; + } + @Nonnull @Override public ExprEval eval(final ObjectBinding bindings) @@ -88,8 +95,15 @@ public void visit(final Visitor visitor) arg.visit(visitor); visitor.visit(this); } + + @Override + public Expr visit(Shuttle shuttle) + { + Expr newArg = arg.visit(shuttle); + return shuttle.visit(new TimestampFormatExpr(newArg)); + } } - return new TimestampFormatExpr(); + return new TimestampFormatExpr(arg); } } diff --git a/processing/src/main/java/org/apache/druid/query/expression/TimestampParseExprMacro.java b/processing/src/main/java/org/apache/druid/query/expression/TimestampParseExprMacro.java index 2b65fb54653e..4bbfdbcaf433 100644 --- a/processing/src/main/java/org/apache/druid/query/expression/TimestampParseExprMacro.java +++ b/processing/src/main/java/org/apache/druid/query/expression/TimestampParseExprMacro.java @@ -66,6 +66,13 @@ public Expr apply(final List args) class TimestampParseExpr implements Expr { + private final Expr arg; + + private TimestampParseExpr(Expr arg) + { + this.arg = arg; + } + @Nonnull @Override public ExprEval eval(final ObjectBinding bindings) @@ -91,9 +98,16 @@ public void visit(final Visitor visitor) arg.visit(visitor); visitor.visit(this); } + + @Override + public Expr visit(Shuttle shuttle) + { + Expr newArg = arg.visit(shuttle); + return shuttle.visit(new TimestampParseExpr(newArg)); + } } - return new TimestampParseExpr(); + return new TimestampParseExpr(arg); } /** diff --git a/processing/src/main/java/org/apache/druid/query/expression/TimestampShiftExprMacro.java b/processing/src/main/java/org/apache/druid/query/expression/TimestampShiftExprMacro.java index c76803809d7b..e9b70dd4d328 100644 --- a/processing/src/main/java/org/apache/druid/query/expression/TimestampShiftExprMacro.java +++ b/processing/src/main/java/org/apache/druid/query/expression/TimestampShiftExprMacro.java @@ -19,6 +19,7 @@ package org.apache.druid.query.expression; +import com.google.common.collect.ImmutableList; import org.apache.druid.java.util.common.IAE; import org.apache.druid.java.util.common.granularity.PeriodGranularity; import org.apache.druid.math.expr.Expr; @@ -30,6 +31,7 @@ import javax.annotation.Nonnull; import java.util.List; +import java.util.stream.Collectors; public class TimestampShiftExprMacro implements ExprMacroTable.ExprMacro { @@ -99,6 +101,14 @@ public void visit(final Visitor visitor) arg.visit(visitor); visitor.visit(this); } + + + @Override + public Expr visit(Shuttle shuttle) + { + Expr newArg = arg.visit(shuttle); + return shuttle.visit(new TimestampShiftExpr(ImmutableList.of(newArg))); + } } private static class TimestampShiftDynamicExpr implements Expr @@ -129,5 +139,12 @@ public void visit(final Visitor visitor) } visitor.visit(this); } + + @Override + public Expr visit(Shuttle shuttle) + { + List newArgs = args.stream().map(x -> x.visit(shuttle)).collect(Collectors.toList()); + return shuttle.visit(new TimestampShiftDynamicExpr(newArgs)); + } } } diff --git a/processing/src/main/java/org/apache/druid/query/expression/TrimExprMacro.java b/processing/src/main/java/org/apache/druid/query/expression/TrimExprMacro.java index 777cb90b07a1..f2f6d3b9bc03 100644 --- a/processing/src/main/java/org/apache/druid/query/expression/TrimExprMacro.java +++ b/processing/src/main/java/org/apache/druid/query/expression/TrimExprMacro.java @@ -155,6 +155,13 @@ public void visit(final Visitor visitor) stringExpr.visit(visitor); visitor.visit(this); } + + @Override + public Expr visit(Shuttle shuttle) + { + Expr newStringExpr = stringExpr.visit(shuttle); + return shuttle.visit(new TrimStaticCharsExpr(mode, newStringExpr, chars)); + } } private static class TrimDynamicCharsExpr implements Expr @@ -226,6 +233,14 @@ public void visit(final Visitor visitor) charsExpr.visit(visitor); visitor.visit(this); } + + @Override + public Expr visit(Shuttle shuttle) + { + Expr newStringExpr = stringExpr.visit(shuttle); + Expr newCharsExpr = charsExpr.visit(shuttle); + return shuttle.visit(new TrimDynamicCharsExpr(mode, newStringExpr, newCharsExpr)); + } } private static boolean arrayContains(char[] array, char c) diff --git a/processing/src/main/java/org/apache/druid/segment/IndexMergerV9.java b/processing/src/main/java/org/apache/druid/segment/IndexMergerV9.java index 950eb5fcd0a2..de6284926fa5 100644 --- a/processing/src/main/java/org/apache/druid/segment/IndexMergerV9.java +++ b/processing/src/main/java/org/apache/druid/segment/IndexMergerV9.java @@ -712,11 +712,11 @@ private void mergeCapabilities( for (IndexableAdapter adapter : adapters) { for (String dimension : adapter.getDimensionNames()) { ColumnCapabilities capabilities = adapter.getCapabilities(dimension); - capabilitiesMap.computeIfAbsent(dimension, d -> new ColumnCapabilitiesImpl()).merge(capabilities); + capabilitiesMap.computeIfAbsent(dimension, d -> new ColumnCapabilitiesImpl().setIsComplete(true)).merge(capabilities); } for (String metric : adapter.getMetricNames()) { ColumnCapabilities capabilities = adapter.getCapabilities(metric); - capabilitiesMap.computeIfAbsent(metric, m -> new ColumnCapabilitiesImpl()).merge(capabilities); + capabilitiesMap.computeIfAbsent(metric, m -> new ColumnCapabilitiesImpl().setIsComplete(true)).merge(capabilities); metricsValueTypes.put(metric, capabilities.getType()); metricTypeNames.put(metric, adapter.getMetricType(metric)); } diff --git a/processing/src/main/java/org/apache/druid/segment/column/ColumnBuilder.java b/processing/src/main/java/org/apache/druid/segment/column/ColumnBuilder.java index 7ef78c674e55..ce081dffe9a6 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/ColumnBuilder.java +++ b/processing/src/main/java/org/apache/druid/segment/column/ColumnBuilder.java @@ -109,6 +109,7 @@ public ColumnHolder build() .setHasBitmapIndexes(bitmapIndex != null) .setHasSpatialIndexes(spatialIndex != null) .setHasMultipleValues(hasMultipleValues) + .setIsComplete(true) .setFilterable(filterable), columnSupplier, bitmapIndex, diff --git a/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilities.java b/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilities.java index f3bf54efac6a..4e1902d87f8d 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilities.java +++ b/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilities.java @@ -31,4 +31,12 @@ public interface ColumnCapabilities boolean hasSpatialIndexes(); boolean hasMultipleValues(); boolean isFilterable(); + + /** + * This property indicates that this {@link ColumnCapabilities} is "complete" in that all properties can be expected + * to supply valid responses. Not all {@link ColumnCapabilities} are created equal. Some, such as those provided by + * {@link org.apache.druid.query.groupby.RowBasedColumnSelectorFactory} only have type information, if even that, and + * cannot supply information like {@link ColumnCapabilities#hasMultipleValues}, and will report as false. + */ + boolean isComplete(); } diff --git a/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilitiesImpl.java b/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilitiesImpl.java index 65c94ae091ab..5141dba7308a 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilitiesImpl.java +++ b/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilitiesImpl.java @@ -38,6 +38,9 @@ public class ColumnCapabilitiesImpl implements ColumnCapabilities @JsonIgnore private boolean filterable; + @JsonIgnore + private boolean complete = false; + @Override @JsonProperty public ValueType getType() @@ -114,6 +117,12 @@ public boolean isFilterable() filterable; } + @Override + public boolean isComplete() + { + return complete; + } + public ColumnCapabilitiesImpl setFilterable(boolean filterable) { this.filterable = filterable; @@ -126,6 +135,12 @@ public ColumnCapabilitiesImpl setHasMultipleValues(boolean hasMultipleValues) return this; } + public ColumnCapabilitiesImpl setIsComplete(boolean complete) + { + this.complete = complete; + return this; + } + public void merge(ColumnCapabilities other) { if (other == null) { @@ -145,6 +160,7 @@ public void merge(ColumnCapabilities other) this.hasInvertedIndexes |= other.hasBitmapIndexes(); this.hasSpatialIndexes |= other.hasSpatialIndexes(); this.hasMultipleValues |= other.hasMultipleValues(); + this.complete &= other.isComplete(); // these should always be the same? this.filterable &= other.isFilterable(); } } diff --git a/processing/src/main/java/org/apache/druid/segment/filter/ExpressionFilter.java b/processing/src/main/java/org/apache/druid/segment/filter/ExpressionFilter.java index 208ea4d87895..3bb1005257fe 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/ExpressionFilter.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/ExpressionFilter.java @@ -37,6 +37,7 @@ import org.apache.druid.segment.ColumnValueSelector; import org.apache.druid.segment.virtual.ExpressionSelectors; +import java.util.Arrays; import java.util.Set; public class ExpressionFilter implements Filter @@ -62,7 +63,23 @@ public boolean matches() if (NullHandling.sqlCompatible() && selector.isNull()) { return false; } - return Evals.asBoolean(selector.getLong()); + ExprEval eval = selector.getObject(); + if (eval == null) { + return false; + } + switch (eval.type()) { + case LONG_ARRAY: + Long[] lResult = eval.asLongArray(); + return Arrays.stream(lResult).anyMatch(Evals::asBoolean); + case STRING_ARRAY: + String[] sResult = eval.asStringArray(); + return Arrays.stream(sResult).anyMatch(Evals::asBoolean); + case DOUBLE_ARRAY: + Double[] dResult = eval.asDoubleArray(); + return Arrays.stream(dResult).anyMatch(Evals::asBoolean); + default: + return Evals.asBoolean(selector.getLong()); + } } @Override diff --git a/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndex.java b/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndex.java index 39df02406ded..032327c39f29 100644 --- a/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndex.java +++ b/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndex.java @@ -317,7 +317,7 @@ protected IncrementalIndex( } //__time capabilities - ColumnCapabilitiesImpl timeCapabilities = new ColumnCapabilitiesImpl(); + ColumnCapabilitiesImpl timeCapabilities = new ColumnCapabilitiesImpl().setIsComplete(true); timeCapabilities.setType(ValueType.LONG); columnCapabilities.put(ColumnHolder.TIME_COLUMN_NAME, timeCapabilities); @@ -654,6 +654,7 @@ IncrementalIndexRowResult toIncrementalIndexRow(InputRow row) capabilities.setType(ValueType.STRING); capabilities.setDictionaryEncoded(true); capabilities.setHasBitmapIndexes(true); + capabilities.setIsComplete(true); columnCapabilities.put(dimension, capabilities); } DimensionHandler handler = DimensionHandlerUtils.getHandlerFromCapabilities(dimension, capabilities, null); @@ -912,6 +913,7 @@ private ColumnCapabilitiesImpl makeCapabilitiesFromValueType(ValueType type) capabilities.setDictionaryEncoded(type == ValueType.STRING); capabilities.setHasBitmapIndexes(type == ValueType.STRING); capabilities.setType(type); + capabilities.setIsComplete(true); return capabilities; } @@ -1106,7 +1108,7 @@ public MetricDesc(int index, AggregatorFactory factory) this.name = factory.getName(); String typeInfo = factory.getTypeName(); - this.capabilities = new ColumnCapabilitiesImpl(); + this.capabilities = new ColumnCapabilitiesImpl().setIsComplete(true); if ("float".equalsIgnoreCase(typeInfo)) { capabilities.setType(ValueType.FLOAT); this.type = typeInfo; diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/BaseMultiValueExpressionDimensionSelector.java b/processing/src/main/java/org/apache/druid/segment/virtual/BaseMultiValueExpressionDimensionSelector.java new file mode 100644 index 000000000000..dfe486152f82 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/segment/virtual/BaseMultiValueExpressionDimensionSelector.java @@ -0,0 +1,172 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.virtual; + +import com.google.common.base.Predicate; +import org.apache.druid.common.config.NullHandling; +import org.apache.druid.math.expr.ExprEval; +import org.apache.druid.query.filter.ValueMatcher; +import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; +import org.apache.druid.segment.ColumnValueSelector; +import org.apache.druid.segment.DimensionSelector; +import org.apache.druid.segment.IdLookup; +import org.apache.druid.segment.data.IndexedInts; +import org.apache.druid.segment.data.RangeIndexedInts; +import org.apache.druid.segment.data.ZeroIndexedInts; + +import javax.annotation.Nullable; +import java.util.List; +import java.util.Objects; + +public abstract class BaseMultiValueExpressionDimensionSelector implements DimensionSelector +{ + private final ColumnValueSelector baseSelector; + + public BaseMultiValueExpressionDimensionSelector(ColumnValueSelector baseSelector) + { + this.baseSelector = baseSelector; + } + + ExprEval getEvaluated() + { + return baseSelector.getObject(); + } + + abstract String getValue(ExprEval evaluated); + + abstract List getArray(ExprEval evaluated); + + abstract String getArrayValue(ExprEval evaluated, int i); + + @Override + public IndexedInts getRow() + { + ExprEval evaluated = getEvaluated(); + if (evaluated.isArray()) { + RangeIndexedInts ints = new RangeIndexedInts(); + ints.setSize(evaluated.asArray() != null ? evaluated.asArray().length : 0); + return ints; + } + return ZeroIndexedInts.instance(); + } + + @Override + public int getValueCardinality() + { + return CARDINALITY_UNKNOWN; + } + + @Nullable + @Override + public String lookupName(int id) + { + ExprEval evaluated = getEvaluated(); + if (evaluated.isArray()) { + return getArrayValue(evaluated, id); + } + assert id == 0; + return NullHandling.emptyToNullIfNeeded(evaluated.asString()); + } + + @Override + public ValueMatcher makeValueMatcher(@Nullable String value) + { + return new ValueMatcher() + { + @Override + public boolean matches() + { + ExprEval evaluated = getEvaluated(); + if (evaluated.isArray()) { + List array = getArray(evaluated); + return array.stream().anyMatch(x -> Objects.equals(x, value)); + } + return Objects.equals(getValue(evaluated), value); + } + + @Override + public void inspectRuntimeShape(RuntimeShapeInspector inspector) + { + inspector.visit("selector", baseSelector); + } + }; + } + + @Override + public ValueMatcher makeValueMatcher(Predicate predicate) + { + return new ValueMatcher() + { + @Override + public boolean matches() + { + ExprEval evaluated = getEvaluated(); + if (evaluated.isArray()) { + List array = getArray(evaluated); + return array.stream().anyMatch(x -> predicate.apply(x)); + } + return predicate.apply(getValue(evaluated)); + } + + @Override + public void inspectRuntimeShape(RuntimeShapeInspector inspector) + { + inspector.visit("selector", baseSelector); + inspector.visit("predicate", predicate); + } + }; + } + + @Override + public void inspectRuntimeShape(RuntimeShapeInspector inspector) + { + inspector.visit("baseSelector", baseSelector); + } + + @Override + public boolean nameLookupPossibleInAdvance() + { + return false; + } + + @Nullable + @Override + public IdLookup idLookup() + { + return null; + } + + @Nullable + @Override + public Object getObject() + { + ExprEval evaluated = getEvaluated(); + if (evaluated.isArray()) { + return getArray(evaluated); + } + return getValue(evaluated); + } + + @Override + public Class classOfObject() + { + return Object.class; + } +} diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionColumnValueSelector.java b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionColumnValueSelector.java index caffc48b9bd1..bd60ee9b4191 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionColumnValueSelector.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionColumnValueSelector.java @@ -29,8 +29,8 @@ public class ExpressionColumnValueSelector implements ColumnValueSelector { - private final Expr.ObjectBinding bindings; - private final Expr expression; + final Expr.ObjectBinding bindings; + final Expr expression; public ExpressionColumnValueSelector(Expr expression, Expr.ObjectBinding bindings) { diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java index 3e452406b060..fc7ead0b8802 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java @@ -45,9 +45,13 @@ import javax.annotation.Nonnull; import javax.annotation.Nullable; +import java.util.Arrays; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; public class ExpressionSelectors { @@ -132,6 +136,9 @@ public static ColumnValueSelector makeExprEvalSelector( ) { final List columns = Parser.findRequiredBindings(expression); + final Set expectedArrays = Parser.findArrayFnBindings(expression); + final Set actualArrays = Parser.findArrayFnBindings(expression); + final Set unknownIfArrays = new HashSet<>(); if (columns.size() == 1) { final String column = Iterables.getOnlyElement(columns); @@ -146,7 +153,10 @@ public static ColumnValueSelector makeExprEvalSelector( ); } else if (capabilities != null && capabilities.getType() == ValueType.STRING - && capabilities.isDictionaryEncoded()) { + && capabilities.isDictionaryEncoded() + && capabilities.isComplete() + && !capabilities.hasMultipleValues() + && !expectedArrays.contains(column)) { // Optimization for expressions that hit one string column and nothing else. return new SingleStringInputCachingExpressionColumnValueSelector( columnSelectorFactory.makeDimensionSelector(new DefaultDimensionSpec(column, column, ValueType.STRING)), @@ -155,15 +165,40 @@ public static ColumnValueSelector makeExprEvalSelector( } } - final Expr.ObjectBinding bindings = createBindings(expression, columnSelectorFactory); + for (String column : columns) { + final ColumnCapabilities capabilities = columnSelectorFactory.getColumnCapabilities(column); + if (capabilities != null) { + if (capabilities.hasMultipleValues()) { + actualArrays.add(column); + } else if (!capabilities.isComplete() && capabilities.getType().equals(ValueType.STRING) && (actualArrays.contains(column) || !expectedArrays.contains(column))) { + unknownIfArrays.add(column); + } + } else { + unknownIfArrays.add(column); + } + } + + final List needsApplied = columns.stream().filter(c -> actualArrays.contains(c) && !expectedArrays.contains(c)).collect(Collectors.toList()); + final Expr finalExpr; + if (needsApplied.size() > 0) { + finalExpr = Parser.applyUnappliedIdentifiers(expression, needsApplied); + } else { + finalExpr = expression; + } + + + final Expr.ObjectBinding bindings = createBindings(expression, columnSelectorFactory, unknownIfArrays); if (bindings.equals(ExprUtils.nilBindings())) { // Optimization for constant expressions. return new ConstantExprEvalSelector(expression.eval(bindings)); } + if (unknownIfArrays.size() > 0) { + return new OpportunisticMultiValueStringExpressionColumnValueSelector(finalExpr, bindings, unknownIfArrays); + } // No special optimization. - return new ExpressionColumnValueSelector(expression, bindings); + return new ExpressionColumnValueSelector(finalExpr, bindings); } public static DimensionSelector makeDimensionSelector( @@ -173,6 +208,9 @@ public static DimensionSelector makeDimensionSelector( ) { final List columns = Parser.findRequiredBindings(expression); + final Set expectedArrays = Parser.findArrayFnBindings(expression); + final Set actualArrays = Parser.findArrayFnBindings(expression); + final Set unknownIfArrays = new HashSet<>(); if (columns.size() == 1) { final String column = Iterables.getOnlyElement(columns); @@ -180,7 +218,11 @@ public static DimensionSelector makeDimensionSelector( if (capabilities != null && capabilities.getType() == ValueType.STRING - && capabilities.isDictionaryEncoded()) { + && capabilities.isDictionaryEncoded() + && capabilities.isComplete() + && !capabilities.hasMultipleValues() + && !expectedArrays.contains(column) + ) { // Optimization for dimension selectors that wrap a single underlying string column. return new SingleStringInputDimensionSelector( columnSelectorFactory.makeDimensionSelector(new DefaultDimensionSpec(column, column, ValueType.STRING)), @@ -189,7 +231,21 @@ public static DimensionSelector makeDimensionSelector( } } + for (String column : columns) { + final ColumnCapabilities capabilities = columnSelectorFactory.getColumnCapabilities(column); + if (capabilities != null) { + if (capabilities.hasMultipleValues()) { + actualArrays.add(column); + } else if (!capabilities.isComplete() && capabilities.getType().equals(ValueType.STRING) && (actualArrays.contains(column) || !expectedArrays.contains(column))) { + unknownIfArrays.add(column); + } + } else { + unknownIfArrays.add(column); + } + } + final ColumnValueSelector baseSelector = makeExprEvalSelector(columnSelectorFactory, expression); + final boolean multiVal = actualArrays.size() > 0 || expectedArrays.size() > 0 || unknownIfArrays.size() > 0; if (baseSelector instanceof ConstantExprEvalSelector) { // Optimization for dimension selectors on constants. @@ -198,42 +254,124 @@ public static DimensionSelector makeDimensionSelector( // Optimization for null dimension selector. return DimensionSelector.constant(null); } else if (extractionFn == null) { - class DefaultExpressionDimensionSelector extends BaseSingleValueDimensionSelector - { - @Override - protected String getValue() + + if (multiVal) { + class MultiValueDimensionSelector extends BaseMultiValueExpressionDimensionSelector { - return NullHandling.emptyToNullIfNeeded(baseSelector.getObject().asString()); + private MultiValueDimensionSelector() + { + super(baseSelector); + } + + @Override + String getValue(ExprEval evaluated) + { + assert !evaluated.isArray(); + return NullHandling.emptyToNullIfNeeded(evaluated.asString()); + } + + @Override + List getArray(ExprEval evaluated) + { + assert evaluated.isArray(); + return Arrays.stream(evaluated.asStringArray()) + .map(NullHandling::emptyToNullIfNeeded) + .collect(Collectors.toList()); + } + + @Override + String getArrayValue(ExprEval evaluated, int i) + { + assert evaluated.isArray(); + String[] stringArray = evaluated.asStringArray(); + assert i < stringArray.length; + return NullHandling.emptyToNullIfNeeded(stringArray[i]); + } } - - @Override - public void inspectRuntimeShape(RuntimeShapeInspector inspector) + return new MultiValueDimensionSelector(); + } else { + class DefaultExpressionDimensionSelector extends BaseSingleValueDimensionSelector { - inspector.visit("baseSelector", baseSelector); + @Override + protected String getValue() + { + + return NullHandling.emptyToNullIfNeeded(baseSelector.getObject().asString()); + } + + @Override + public void inspectRuntimeShape(RuntimeShapeInspector inspector) + { + inspector.visit("baseSelector", baseSelector); + } } + return new DefaultExpressionDimensionSelector(); } - return new DefaultExpressionDimensionSelector(); } else { - class ExtractionExpressionDimensionSelector extends BaseSingleValueDimensionSelector - { - @Override - protected String getValue() + if (multiVal) { + class ExtractionMultiValueDimensionSelector extends BaseMultiValueExpressionDimensionSelector { - return extractionFn.apply(NullHandling.emptyToNullIfNeeded(baseSelector.getObject().asString())); + ExtractionMultiValueDimensionSelector() + { + super(baseSelector); + } + + @Override + String getValue(ExprEval evaluated) + { + assert !evaluated.isArray(); + return extractionFn.apply(NullHandling.emptyToNullIfNeeded(evaluated.asString())); + } + + @Override + List getArray(ExprEval evaluated) + { + assert evaluated.isArray(); + return Arrays.stream(evaluated.asStringArray()) + .map(x -> extractionFn.apply(NullHandling.emptyToNullIfNeeded(x))) + .collect(Collectors.toList()); + } + + @Override + String getArrayValue(ExprEval evaluated, int i) + { + assert evaluated.isArray(); + String[] stringArray = evaluated.asStringArray(); + assert i < stringArray.length; + return extractionFn.apply(NullHandling.emptyToNullIfNeeded(stringArray[i])); + } + + @Override + public void inspectRuntimeShape(RuntimeShapeInspector inspector) + { + inspector.visit("baseSelector", baseSelector); + inspector.visit("extractionFn", extractionFn); + } } + return new ExtractionMultiValueDimensionSelector(); - @Override - public void inspectRuntimeShape(RuntimeShapeInspector inspector) + } else { + class ExtractionExpressionDimensionSelector extends BaseSingleValueDimensionSelector { - inspector.visit("baseSelector", baseSelector); - inspector.visit("extractionFn", extractionFn); + @Override + protected String getValue() + { + return extractionFn.apply(NullHandling.emptyToNullIfNeeded(baseSelector.getObject().asString())); + } + + @Override + public void inspectRuntimeShape(RuntimeShapeInspector inspector) + { + inspector.visit("baseSelector", baseSelector); + inspector.visit("extractionFn", extractionFn); + } } + return new ExtractionExpressionDimensionSelector(); } - return new ExtractionExpressionDimensionSelector(); } } - private static Expr.ObjectBinding createBindings(Expr expression, ColumnSelectorFactory columnSelectorFactory) + private static Expr.ObjectBinding createBindings(Expr expression, ColumnSelectorFactory columnSelectorFactory, Set unknownMultiValue) { final Map> suppliers = new HashMap<>(); final List columns = Parser.findRequiredBindings(expression); @@ -241,6 +379,9 @@ private static Expr.ObjectBinding createBindings(Expr expression, ColumnSelector final ColumnCapabilities columnCapabilities = columnSelectorFactory .getColumnCapabilities(columnName); final ValueType nativeType = columnCapabilities != null ? columnCapabilities.getType() : null; + // final boolean multiVal = unknownMultiValue.contains(columnName) || + // (columnCapabilities != null && columnCapabilities.hasMultipleValues()); + final boolean multiVal = columnCapabilities != null && columnCapabilities.hasMultipleValues(); final Supplier supplier; if (nativeType == ValueType.FLOAT) { @@ -257,8 +398,8 @@ private static Expr.ObjectBinding createBindings(Expr expression, ColumnSelector supplier = makeNullableSupplier(selector, selector::getDouble); } else if (nativeType == ValueType.STRING) { supplier = supplierFromDimensionSelector( - columnSelectorFactory - .makeDimensionSelector(new DefaultDimensionSpec(columnName, columnName)) + columnSelectorFactory.makeDimensionSelector(new DefaultDimensionSpec(columnName, columnName)), + multiVal ); } else if (nativeType == null) { // Unknown ValueType. Try making an Object selector and see if that gives us anything useful. @@ -310,18 +451,24 @@ private static Supplier makeNullableSupplier( @VisibleForTesting @Nonnull - static Supplier supplierFromDimensionSelector(final DimensionSelector selector) + static Supplier supplierFromDimensionSelector(final DimensionSelector selector, boolean multiValue) { Preconditions.checkNotNull(selector, "selector"); return () -> { final IndexedInts row = selector.getRow(); - if (row.size() == 1) { + if (row.size() == 1 && !multiValue) { return selector.lookupName(row.get(0)); } else { - // Can't handle non-singly-valued rows in expressions. - // Treat them as nulls until we think of something better to do. - return null; + // column selector factories hate you and use [] and [null] interchangeably for nullish data + if (row.size() == 0) { + return new String[]{null}; + } + final String[] strings = new String[row.size()]; + for (int i = 0; i < row.size(); i++) { + strings[i] = selector.lookupName(row.get(i)); + } + return strings; } }; } @@ -343,6 +490,15 @@ static Supplier supplierFromObjectSelector(final BaseObjectColumnValueSe final Object val = selector.getObject(); if (val instanceof Number || val instanceof String) { return val; + } else if (val instanceof List) { + // strings can be lists of strings!! + // this can happen from an "unknown" capabilites multi-value string dimension row, and we fallback to the + // object selector + Object[] arrayVal = ((List) val).stream().map(Object::toString).toArray(String[]::new); + if (arrayVal.length > 0) { + return arrayVal; + } + return new String[]{null}; } else { return null; } diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/ForcedMultiValueStringExpressionColumnValueSelector.java b/processing/src/main/java/org/apache/druid/segment/virtual/ForcedMultiValueStringExpressionColumnValueSelector.java new file mode 100644 index 000000000000..bca9bf3fca8c --- /dev/null +++ b/processing/src/main/java/org/apache/druid/segment/virtual/ForcedMultiValueStringExpressionColumnValueSelector.java @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.virtual; + +import org.apache.druid.math.expr.Expr; +import org.apache.druid.math.expr.Parser; + +import java.util.ArrayList; +import java.util.Set; + +public class ForcedMultiValueStringExpressionColumnValueSelector extends ExpressionColumnValueSelector +{ + public ForcedMultiValueStringExpressionColumnValueSelector( + Expr expression, + Expr.ObjectBinding bindings, + Set unknownColumnsSet + ) + { + super(Parser.applyUnappliedIdentifiers(expression, new ArrayList<>(unknownColumnsSet)), bindings); + } +} diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/OpportunisticMultiValueStringExpressionColumnValueSelector.java b/processing/src/main/java/org/apache/druid/segment/virtual/OpportunisticMultiValueStringExpressionColumnValueSelector.java new file mode 100644 index 000000000000..0d723a2eaad0 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/segment/virtual/OpportunisticMultiValueStringExpressionColumnValueSelector.java @@ -0,0 +1,81 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.virtual; + +import net.thisptr.jackson.jq.internal.misc.Strings; +import org.apache.druid.math.expr.Expr; +import org.apache.druid.math.expr.ExprEval; +import org.apache.druid.math.expr.Parser; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; + +public class OpportunisticMultiValueStringExpressionColumnValueSelector extends ExpressionColumnValueSelector +{ + private final List unknownColumns; + private final Set arrayInputs; + private final Map transformedCache; + + public OpportunisticMultiValueStringExpressionColumnValueSelector( + Expr expression, + Expr.ObjectBinding bindings, + Set unknownColumnsSet + ) + { + super(expression, bindings); + this.unknownColumns = new ArrayList<>(unknownColumnsSet); + this.arrayInputs = Parser.findArrayFnBindings(expression); + this.transformedCache = new HashMap<>(); + } + + @Override + public ExprEval getObject() + { + List arrayBindings = + unknownColumns.stream().filter(x -> !arrayInputs.contains(x) && isBindingArray(x)).collect(Collectors.toList()); + + if (arrayBindings.size() > 0) { + final String key = Strings.join(",", arrayBindings); + if (transformedCache.containsKey(key)) { + return transformedCache.get(key).eval(bindings); + } + Expr transformed = Parser.applyUnappliedIdentifiers(expression, arrayBindings); + transformedCache.put(key, transformed); + return transformed.eval(bindings); + } + return expression.eval(bindings); + } + + private boolean isBindingArray(String x) + { + Object binding = bindings.get(x); + if (binding != null) { + if (binding instanceof String[] && ((String[]) binding).length > 1) { + // if (binding instanceof String[]) { + return true; + } + } + return false; + } +} diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/SingleStringInputCachingExpressionColumnValueSelector.java b/processing/src/main/java/org/apache/druid/segment/virtual/SingleStringInputCachingExpressionColumnValueSelector.java index 87c5df19d1f6..f7b2aedbd7f0 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/SingleStringInputCachingExpressionColumnValueSelector.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/SingleStringInputCachingExpressionColumnValueSelector.java @@ -62,7 +62,7 @@ public SingleStringInputCachingExpressionColumnValueSelector( this.selector = Preconditions.checkNotNull(selector, "selector"); this.expression = Preconditions.checkNotNull(expression, "expression"); - final Supplier inputSupplier = ExpressionSelectors.supplierFromDimensionSelector(selector); + final Supplier inputSupplier = ExpressionSelectors.supplierFromDimensionSelector(selector, false); this.bindings = name -> inputSupplier.get(); if (selector.getValueCardinality() == DimensionSelector.CARDINALITY_UNKNOWN) { diff --git a/processing/src/test/java/org/apache/druid/query/MultiValuedDimensionTest.java b/processing/src/test/java/org/apache/druid/query/MultiValuedDimensionTest.java index c4c4a0b99af6..c06b540bf867 100644 --- a/processing/src/test/java/org/apache/druid/query/MultiValuedDimensionTest.java +++ b/processing/src/test/java/org/apache/druid/query/MultiValuedDimensionTest.java @@ -28,6 +28,7 @@ import org.apache.druid.data.input.Row; import org.apache.druid.data.input.impl.CSVParseSpec; import org.apache.druid.data.input.impl.DimensionsSpec; +import org.apache.druid.data.input.impl.JSONParseSpec; import org.apache.druid.data.input.impl.StringInputRowParser; import org.apache.druid.data.input.impl.TimestampSpec; import org.apache.druid.java.util.common.DateTimes; @@ -38,11 +39,13 @@ import org.apache.druid.query.dimension.DefaultDimensionSpec; import org.apache.druid.query.dimension.ListFilteredDimensionSpec; import org.apache.druid.query.dimension.RegexFilteredDimensionSpec; +import org.apache.druid.query.expression.TestExprMacroTable; import org.apache.druid.query.filter.SelectorDimFilter; import org.apache.druid.query.groupby.GroupByQuery; import org.apache.druid.query.groupby.GroupByQueryConfig; import org.apache.druid.query.groupby.GroupByQueryRunnerTest; import org.apache.druid.query.groupby.GroupByQueryRunnerTestHelper; +import org.apache.druid.query.groupby.strategy.GroupByStrategySelector; import org.apache.druid.query.spec.LegacySegmentSpec; import org.apache.druid.query.topn.TopNQuery; import org.apache.druid.query.topn.TopNQueryBuilder; @@ -55,14 +58,18 @@ import org.apache.druid.segment.QueryableIndex; import org.apache.druid.segment.QueryableIndexSegment; import org.apache.druid.segment.TestHelper; +import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.incremental.IncrementalIndex; +import org.apache.druid.segment.virtual.ExpressionVirtualColumn; import org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMediumFactory; import org.apache.druid.segment.writeout.SegmentWriteOutMediumFactory; import org.apache.druid.segment.writeout.TmpFileSegmentWriteOutMediumFactory; import org.apache.druid.timeline.SegmentId; import org.junit.After; import org.junit.Before; +import org.junit.Rule; import org.junit.Test; +import org.junit.rules.ExpectedException; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; @@ -98,9 +105,17 @@ public static Collection constructorFeeder() private IncrementalIndex incrementalIndex; private QueryableIndex queryableIndex; - private File persistedSegmentDir; + private IncrementalIndex incrementalIndexNullSampler; + private QueryableIndex queryableIndexNullSampler; + private File persistedSegmentDirNullSampler; + + private final GroupByQueryConfig config; + + @Rule + public ExpectedException expectedException = ExpectedException.none(); + public MultiValuedDimensionTest(final GroupByQueryConfig config, SegmentWriteOutMediumFactory segmentWriteOutMediumFactory) { helper = AggregationTestHelper.createGroupByQueryAggregationTestHelper( @@ -108,6 +123,7 @@ public MultiValuedDimensionTest(final GroupByQueryConfig config, SegmentWriteOut config, null ); + this.config = config; this.segmentWriteOutMediumFactory = segmentWriteOutMediumFactory; } @@ -122,9 +138,9 @@ public void setup() throws Exception StringInputRowParser parser = new StringInputRowParser( new CSVParseSpec( new TimestampSpec("timestamp", "iso", null), - new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("product", "tags")), null, null), + new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("product", "tags", "othertags")), null, null), "\t", - ImmutableList.of("timestamp", "product", "tags"), + ImmutableList.of("timestamp", "product", "tags", "othertags"), false, 0 ), @@ -132,21 +148,55 @@ public void setup() throws Exception ); String[] rows = new String[]{ - "2011-01-12T00:00:00.000Z,product_1,t1\tt2\tt3", - "2011-01-13T00:00:00.000Z,product_2,t3\tt4\tt5", - "2011-01-14T00:00:00.000Z,product_3,t5\tt6\tt7", - "2011-01-14T00:00:00.000Z,product_4" + "2011-01-12T00:00:00.000Z,product_1,t1\tt2\tt3,u1\tu2", + "2011-01-13T00:00:00.000Z,product_2,t3\tt4\tt5,u3\tu4", + "2011-01-14T00:00:00.000Z,product_3,t5\tt6\tt7,u1\tu5", + "2011-01-14T00:00:00.000Z,product_4,,u2" }; for (String row : rows) { incrementalIndex.add(parser.parse(row)); } + persistedSegmentDir = Files.createTempDir(); TestHelper.getTestIndexMergerV9(segmentWriteOutMediumFactory) .persist(incrementalIndex, persistedSegmentDir, new IndexSpec(), null); - queryableIndex = TestHelper.getTestIndexIO().loadIndex(persistedSegmentDir); + + + StringInputRowParser parserNullSampler = new StringInputRowParser( + new JSONParseSpec( + new TimestampSpec("time", "iso", null), + new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("product", "tags", "othertags")), null, null) + ), + "UTF-8" + ); + + incrementalIndexNullSampler = new IncrementalIndex.Builder() + .setSimpleTestingIndexSchema(new CountAggregatorFactory("count")) + .setMaxRowCount(5000) + .buildOnheap(); + + String[] rowsNullSampler = new String[]{ + "{\"time\":\"2011-01-13T00:00:00.000Z\",\"product\":\"product_1\",\"tags\":[],\"othertags\":[\"u1\", \"u2\"]}", + "{\"time\":\"2011-01-12T00:00:00.000Z\",\"product\":\"product_2\",\"othertags\":[\"u3\", \"u4\"]}", + "{\"time\":\"2011-01-14T00:00:00.000Z\",\"product\":\"product_3\",\"tags\":[\"\"],\"othertags\":[\"u1\", \"u5\"]}", + "{\"time\":\"2011-01-15T00:00:00.000Z\",\"product\":\"product_4\",\"tags\":[\"t1\", \"t2\", \"\"],\"othertags\":[\"u6\", \"u7\"]}", + "{\"time\":\"2011-01-16T00:00:00.000Z\",\"product\":\"product_5\",\"tags\":[],\"othertags\":[]}", + "{\"time\":\"2011-01-16T00:00:00.000Z\",\"product\":\"product_6\"}", + "{\"time\":\"2011-01-16T00:00:00.000Z\",\"product\":\"product_7\",\"othertags\":[]}", + "{\"time\":\"2011-01-16T00:00:00.000Z\",\"product\":\"product_8\",\"tags\":[\"\"],\"othertags\":[]}" + }; + + for (String row : rowsNullSampler) { + incrementalIndexNullSampler.add(parserNullSampler.parse(row)); + } + persistedSegmentDirNullSampler = Files.createTempDir(); + TestHelper.getTestIndexMergerV9(segmentWriteOutMediumFactory) + .persist(incrementalIndexNullSampler, persistedSegmentDirNullSampler, new IndexSpec(), null); + + queryableIndexNullSampler = TestHelper.getTestIndexIO().loadIndex(persistedSegmentDirNullSampler); } @After @@ -249,6 +299,231 @@ public void testGroupByWithDimFilterAndWithFilteredDimSpec() TestHelper.assertExpectedObjects(expectedResults, result.toList(), "filteredDim"); } + @Test + public void testGroupByExpression() + { + if (config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V1)) { + expectedException.expect(RuntimeException.class); + expectedException.expectMessage("GroupBy v1 does not support dimension selectors with unknown cardinality."); + } + GroupByQuery query = GroupByQuery + .builder() + .setDataSource("xx") + .setQuerySegmentSpec(new LegacySegmentSpec("1970/3000")) + .setGranularity(Granularities.ALL) + .setDimensions(new DefaultDimensionSpec("texpr", "texpr")) + .setVirtualColumns( + new ExpressionVirtualColumn( + "texpr", + "map(x -> concat(x, 'foo'), tags)", + ValueType.STRING, + TestExprMacroTable.INSTANCE + ) + ) + .setAggregatorSpecs(new CountAggregatorFactory("count")) + .build(); + + Sequence result = helper.runQueryOnSegmentsObjs( + ImmutableList.of( + new QueryableIndexSegment(queryableIndex, SegmentId.dummy("sid1")), + new IncrementalIndexSegment(incrementalIndex, SegmentId.dummy("sid2")) + ), + query + ); + + List expectedResults = Arrays.asList( + GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "texpr", "foo", "count", 2L), + GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "texpr", "t1foo", "count", 2L), + GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "texpr", "t2foo", "count", 2L), + GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "texpr", "t3foo", "count", 4L), + GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "texpr", "t4foo", "count", 2L), + GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "texpr", "t5foo", "count", 4L), + GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "texpr", "t6foo", "count", 2L), + GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "texpr", "t7foo", "count", 2L) + ); + + TestHelper.assertExpectedObjects(expectedResults, result.toList(), "noFilter"); + } + + @Test + public void testGroupByExpressionMultiMulti() + { + if (config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V1)) { + expectedException.expect(RuntimeException.class); + expectedException.expectMessage("GroupBy v1 does not support dimension selectors with unknown cardinality."); + } + GroupByQuery query = GroupByQuery + .builder() + .setDataSource("xx") + .setQuerySegmentSpec(new LegacySegmentSpec("1970/3000")) + .setGranularity(Granularities.ALL) + .setDimensions(new DefaultDimensionSpec("texpr", "texpr")) + .setVirtualColumns( + new ExpressionVirtualColumn( + "texpr", + "cartesian_map((x,y) -> concat(x, y), tags, othertags)", + ValueType.STRING, + TestExprMacroTable.INSTANCE + ) + ) + .setLimit(5) + .setAggregatorSpecs(new CountAggregatorFactory("count")) + .build(); + + Sequence result = helper.runQueryOnSegmentsObjs( + ImmutableList.of( + new QueryableIndexSegment(queryableIndex, SegmentId.dummy("sid1")), + new IncrementalIndexSegment(incrementalIndex, SegmentId.dummy("sid2")) + ), + query + ); + + List expectedResults = Arrays.asList( + GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "texpr", "t1u1", "count", 2L), + GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "texpr", "t1u2", "count", 2L), + GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "texpr", "t2u1", "count", 2L), + GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "texpr", "t2u2", "count", 2L), + GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "texpr", "t3u1", "count", 2L) + ); + + TestHelper.assertExpectedObjects(expectedResults, result.toList(), "noFilter"); + } + + @Test + public void testGroupByExpressionMultiMultiAuto() + { + if (config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V1)) { + expectedException.expect(RuntimeException.class); + expectedException.expectMessage("GroupBy v1 does not support dimension selectors with unknown cardinality."); + } + GroupByQuery query = GroupByQuery + .builder() + .setDataSource("xx") + .setQuerySegmentSpec(new LegacySegmentSpec("1970/3000")) + .setGranularity(Granularities.ALL) + .setDimensions(new DefaultDimensionSpec("texpr", "texpr")) + .setVirtualColumns( + new ExpressionVirtualColumn( + "texpr", + "map((x) -> concat(x, othertags), tags)", + ValueType.STRING, + TestExprMacroTable.INSTANCE + ) + ) + .setLimit(5) + .setAggregatorSpecs(new CountAggregatorFactory("count")) + .build(); + + Sequence result = helper.runQueryOnSegmentsObjs( + ImmutableList.of( + new QueryableIndexSegment(queryableIndex, SegmentId.dummy("sid1")), + new IncrementalIndexSegment(incrementalIndex, SegmentId.dummy("sid2")) + ), + query + ); + + List expectedResults = Arrays.asList( + GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "texpr", "t1u1", "count", 2L), + GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "texpr", "t1u2", "count", 2L), + GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "texpr", "t2u1", "count", 2L), + GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "texpr", "t2u2", "count", 2L), + GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "texpr", "t3u1", "count", 2L) + ); + + TestHelper.assertExpectedObjects(expectedResults, result.toList(), "noFilter"); + } + + @Test + public void testGroupByExpressionMultiMultiAutoAuto() + { + if (config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V1)) { + expectedException.expect(RuntimeException.class); + expectedException.expectMessage("GroupBy v1 does not support dimension selectors with unknown cardinality."); + } + GroupByQuery query = GroupByQuery + .builder() + .setDataSource("xx") + .setQuerySegmentSpec(new LegacySegmentSpec("1970/3000")) + .setGranularity(Granularities.ALL) + .setDimensions(new DefaultDimensionSpec("texpr", "texpr")) + .setVirtualColumns( + new ExpressionVirtualColumn( + "texpr", + "concat(tags, othertags)", + ValueType.STRING, + TestExprMacroTable.INSTANCE + ) + ) + .setLimit(5) + .setAggregatorSpecs(new CountAggregatorFactory("count")) + .build(); + + Sequence result = helper.runQueryOnSegmentsObjs( + ImmutableList.of( + new QueryableIndexSegment(queryableIndex, SegmentId.dummy("sid1")), + new IncrementalIndexSegment(incrementalIndex, SegmentId.dummy("sid2")) + ), + query + ); + + List expectedResults = Arrays.asList( + GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "texpr", "t1u1", "count", 2L), + GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "texpr", "t1u2", "count", 2L), + GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "texpr", "t2u1", "count", 2L), + GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "texpr", "t2u2", "count", 2L), + GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "texpr", "t3u1", "count", 2L) + ); + + TestHelper.assertExpectedObjects(expectedResults, result.toList(), "noFilter"); + } + + @Test + public void testGroupByExpressionAuto() + { + if (config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V1)) { + expectedException.expect(RuntimeException.class); + expectedException.expectMessage("GroupBy v1 does not support dimension selectors with unknown cardinality."); + } + GroupByQuery query = GroupByQuery + .builder() + .setDataSource("xx") + .setQuerySegmentSpec(new LegacySegmentSpec("1970/3000")) + .setGranularity(Granularities.ALL) + .setDimensions(new DefaultDimensionSpec("tt", "tt")) + .setVirtualColumns( + new ExpressionVirtualColumn( + "tt", + "concat(tags, 'foo')", + ValueType.STRING, + TestExprMacroTable.INSTANCE + ) + ) + .setAggregatorSpecs(new CountAggregatorFactory("count")) + .build(); + + Sequence result = helper.runQueryOnSegmentsObjs( + ImmutableList.of( + new QueryableIndexSegment(queryableIndex, SegmentId.dummy("sid1")), + new IncrementalIndexSegment(incrementalIndex, SegmentId.dummy("sid2")) + ), + query + ); + + List expectedResults = Arrays.asList( + GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tt", "foo", "count", 2L), + GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tt", "t1foo", "count", 2L), + GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tt", "t2foo", "count", 2L), + GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tt", "t3foo", "count", 4L), + GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tt", "t4foo", "count", 2L), + GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tt", "t5foo", "count", 4L), + GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tt", "t6foo", "count", 2L), + GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tt", "t7foo", "count", 2L) + ); + + TestHelper.assertExpectedObjects(expectedResults, result.toList(), "noFilter"); + } + + @Test public void testTopNWithDimFilterAndWithFilteredDimSpec() { @@ -299,6 +574,129 @@ public void testTopNWithDimFilterAndWithFilteredDimSpec() } } + @Test + public void testTopNExpression() + { + TopNQuery query = new TopNQueryBuilder() + .dataSource("xx") + .granularity(Granularities.ALL) + .dimension(new DefaultDimensionSpec("texpr", "texpr")) + .virtualColumns( + new ExpressionVirtualColumn( + "texpr", + "map(x -> concat(x, 'foo'), tags)", + ValueType.STRING, + TestExprMacroTable.INSTANCE + ) + ) + .metric("count") + .intervals(QueryRunnerTestHelper.fullOnIntervalSpec) + .aggregators(Collections.singletonList(new CountAggregatorFactory("count"))) + .threshold(15) + .build(); + + try (CloseableStupidPool pool = TestQueryRunners.createDefaultNonBlockingPool()) { + QueryRunnerFactory factory = new TopNQueryRunnerFactory( + pool, + new TopNQueryQueryToolChest( + new TopNQueryConfig(), + QueryRunnerTestHelper.noopIntervalChunkingQueryRunnerDecorator() + ), + QueryRunnerTestHelper.NOOP_QUERYWATCHER + ); + QueryRunner> runner = QueryRunnerTestHelper.makeQueryRunner( + factory, + new QueryableIndexSegment(queryableIndex, SegmentId.dummy("sid1")), + null + ); + Map context = new HashMap<>(); + Sequence> result = runner.run(QueryPlus.wrap(query), context); + List> expected = + ImmutableList.>builder() + .add(ImmutableMap.of("texpr", "t3foo", "count", 2L)) + .add(ImmutableMap.of("texpr", "t5foo", "count", 2L)) + .add(ImmutableMap.of("texpr", "foo", "count", 1L)) + .add(ImmutableMap.of("texpr", "t1foo", "count", 1L)) + .add(ImmutableMap.of("texpr", "t2foo", "count", 1L)) + .add(ImmutableMap.of("texpr", "t4foo", "count", 1L)) + .add(ImmutableMap.of("texpr", "t6foo", "count", 1L)) + .add(ImmutableMap.of("texpr", "t7foo", "count", 1L)) + .build(); + + List> expectedResults = Collections.singletonList( + new Result( + DateTimes.of("2011-01-12T00:00:00.000Z"), + new TopNResultValue( + expected + ) + ) + ); + TestHelper.assertExpectedObjects(expectedResults, result.toList(), "filteredDim"); + } + } + + @Test + public void testTopNExpressionAutoTransform() + { + TopNQuery query = new TopNQueryBuilder() + .dataSource("xx") + .granularity(Granularities.ALL) + .dimension(new DefaultDimensionSpec("texpr", "texpr")) + .virtualColumns( + new ExpressionVirtualColumn( + "texpr", + "concat(tags, 'foo')", + ValueType.STRING, + TestExprMacroTable.INSTANCE + ) + ) + .metric("count") + .intervals(QueryRunnerTestHelper.fullOnIntervalSpec) + .aggregators(Collections.singletonList(new CountAggregatorFactory("count"))) + .threshold(15) + .build(); + + try (CloseableStupidPool pool = TestQueryRunners.createDefaultNonBlockingPool()) { + QueryRunnerFactory factory = new TopNQueryRunnerFactory( + pool, + new TopNQueryQueryToolChest( + new TopNQueryConfig(), + QueryRunnerTestHelper.noopIntervalChunkingQueryRunnerDecorator() + ), + QueryRunnerTestHelper.NOOP_QUERYWATCHER + ); + QueryRunner> runner = QueryRunnerTestHelper.makeQueryRunner( + factory, + new QueryableIndexSegment(queryableIndex, SegmentId.dummy("sid1")), + null + ); + Map context = new HashMap<>(); + Sequence> result = runner.run(QueryPlus.wrap(query), context); + + List> expected = + ImmutableList.>builder() + .add(ImmutableMap.of("texpr", "t3foo", "count", 2L)) + .add(ImmutableMap.of("texpr", "t5foo", "count", 2L)) + .add(ImmutableMap.of("texpr", "foo", "count", 1L)) + .add(ImmutableMap.of("texpr", "t1foo", "count", 1L)) + .add(ImmutableMap.of("texpr", "t2foo", "count", 1L)) + .add(ImmutableMap.of("texpr", "t4foo", "count", 1L)) + .add(ImmutableMap.of("texpr", "t6foo", "count", 1L)) + .add(ImmutableMap.of("texpr", "t7foo", "count", 1L)) + .build(); + + List> expectedResults = Collections.singletonList( + new Result( + DateTimes.of("2011-01-12T00:00:00.000Z"), + new TopNResultValue( + expected + ) + ) + ); + TestHelper.assertExpectedObjects(expectedResults, result.toList(), "filteredDim"); + } + } + @After public void cleanup() throws Exception { diff --git a/processing/src/test/java/org/apache/druid/segment/filter/ExpressionFilterTest.java b/processing/src/test/java/org/apache/druid/segment/filter/ExpressionFilterTest.java index f9d4a91d19ef..91b89652392e 100644 --- a/processing/src/test/java/org/apache/druid/segment/filter/ExpressionFilterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/filter/ExpressionFilterTest.java @@ -138,16 +138,14 @@ public void testOneSingleValuedStringColumn() @Test public void testOneMultiValuedStringColumn() { - // Expressions currently treat multi-valued arrays as nulls. - // This test is just documenting the current behavior, not necessarily saying it makes sense. if (NullHandling.replaceWithDefault()) { - assertFilterMatches(edf("dim4 == ''"), ImmutableList.of("0", "1", "2", "4", "5", "6", "7", "8")); + assertFilterMatches(edf("dim4 == ''"), ImmutableList.of("1", "2", "6", "7", "8")); } else { assertFilterMatches(edf("dim4 == ''"), ImmutableList.of("2")); // AS per SQL standard null == null returns false. assertFilterMatches(edf("dim4 == null"), ImmutableList.of()); } - assertFilterMatches(edf("dim4 == '1'"), ImmutableList.of()); + assertFilterMatches(edf("dim4 == '1'"), ImmutableList.of("0")); assertFilterMatches(edf("dim4 == '3'"), ImmutableList.of("3")); } @@ -212,10 +210,7 @@ public void testCompareColumns() assertFilterMatches(edf("dim2 == dim3"), ImmutableList.of("2", "5", "8")); } - // String vs. multi-value string - // Expressions currently treat multi-valued arrays as nulls. - // This test is just documenting the current behavior, not necessarily saying it makes sense. - assertFilterMatches(edf("dim0 == dim4"), ImmutableList.of("3")); + assertFilterMatches(edf("dim0 == dim4"), ImmutableList.of("3", "4", "5")); } @Test diff --git a/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionColumnValueSelectorTest.java b/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionColumnValueSelectorTest.java index ae95dbe3b0e4..c65a6ab5a88b 100644 --- a/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionColumnValueSelectorTest.java +++ b/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionColumnValueSelectorTest.java @@ -38,7 +38,8 @@ public void testSupplierFromDimensionSelector() { final SettableSupplier settableSupplier = new SettableSupplier<>(); final Supplier supplier = ExpressionSelectors.supplierFromDimensionSelector( - dimensionSelectorFromSupplier(settableSupplier) + dimensionSelectorFromSupplier(settableSupplier), + false ); Assert.assertNotNull(supplier); diff --git a/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVirtualColumnTest.java b/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVirtualColumnTest.java index b28fd5b8af9b..81a589240762 100644 --- a/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVirtualColumnTest.java +++ b/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVirtualColumnTest.java @@ -70,6 +70,18 @@ public class ExpressionVirtualColumnTest ImmutableMap.of("x", 2L, "y", 3L, "z", "foobar") ); + private static final InputRow ROWMULTI = new MapBasedInputRow( + DateTimes.of("2000-01-02T01:00:00").getMillis(), + ImmutableList.of(), + ImmutableMap.of( + "x", 2L, + "y", 3L, + "a", ImmutableList.of("a", "b", "c"), + "b", ImmutableList.of("1", "2", "3"), + "c", ImmutableList.of("4", "5", "6") + ) + ); + private static final ExpressionVirtualColumn X_PLUS_Y = new ExpressionVirtualColumn( "expr", "x + y", @@ -125,6 +137,13 @@ public class ExpressionVirtualColumnTest TestExprMacroTable.INSTANCE ); + private static final ExpressionVirtualColumn SCALE_LIST = new ExpressionVirtualColumn( + "expr", + "map(b -> b * 2, b)", + ValueType.STRING, + TestExprMacroTable.INSTANCE + ); + private static final ThreadLocal CURRENT_ROW = new ThreadLocal<>(); private static final ColumnSelectorFactory COLUMN_SELECTOR_FACTORY = RowBasedColumnSelectorFactory.create( CURRENT_ROW, @@ -154,6 +173,16 @@ public void testObjectSelector() Assert.assertEquals(5L, selector.getObject()); } + @Test + public void testMultiObjectSelector() + { + CURRENT_ROW.set(ROWMULTI); + + final BaseObjectColumnValueSelector selector = SCALE_LIST.makeDimensionSelector(new DefaultDimensionSpec("expr", "expr"), COLUMN_SELECTOR_FACTORY); + + Assert.assertEquals(ImmutableList.of("2.0", "4.0", "6.0"), selector.getObject()); + } + @Test public void testLongSelector() { @@ -288,6 +317,22 @@ public void testDimensionSelector() Assert.assertEquals("5", selector.lookupName(selector.getRow().get(0))); } + @Test + public void testNullDimensionSelector() + { + final DimensionSelector selector = X_PLUS_Y.makeDimensionSelector( + new DefaultDimensionSpec("expr", "expr"), + COLUMN_SELECTOR_FACTORY + ); + + final ValueMatcher nonNullMatcher = selector.makeValueMatcher(Predicates.notNull()); + + CURRENT_ROW.set(ROW0); + Assert.assertEquals(false, nonNullMatcher.matches()); + + + } + @Test public void testDimensionSelectorUsingStringFunction() { @@ -374,7 +419,7 @@ public void testDimensionSelectorWithExtraction() Assert.assertEquals(false, nullMatcher.matches()); Assert.assertEquals(false, fiveMatcher.matches()); Assert.assertEquals(true, nonNullMatcher.matches()); - Assert.assertEquals("4", selector.lookupName(selector.getRow().get(0))); + Assert.assertEquals("4.0", selector.lookupName(selector.getRow().get(0))); } else { // y is null in row1 Assert.assertEquals(true, nullMatcher.matches()); @@ -387,7 +432,7 @@ public void testDimensionSelectorWithExtraction() Assert.assertEquals(false, nullMatcher.matches()); Assert.assertEquals(true, fiveMatcher.matches()); Assert.assertEquals(true, nonNullMatcher.matches()); - Assert.assertEquals("5", selector.lookupName(selector.getRow().get(0))); + Assert.assertEquals("5.1", selector.lookupName(selector.getRow().get(0))); CURRENT_ROW.set(ROW3); Assert.assertEquals(false, nullMatcher.matches()); From 907c55096b2e8ae08771441b5e8e21c3e13ea6b5 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Thu, 2 May 2019 12:08:05 -0700 Subject: [PATCH 02/48] fix tests? --- .../org/apache/druid/math/expr/ExprEval.java | 21 ++++------- .../apache/druid/math/expr/FunctionTest.java | 2 +- .../druid/query/MultiValuedDimensionTest.java | 35 +++++++++++++------ 3 files changed, 33 insertions(+), 25 deletions(-) diff --git a/core/src/main/java/org/apache/druid/math/expr/ExprEval.java b/core/src/main/java/org/apache/druid/math/expr/ExprEval.java index a2cbdc756c39..9b1b64d27d85 100644 --- a/core/src/main/java/org/apache/druid/math/expr/ExprEval.java +++ b/core/src/main/java/org/apache/druid/math/expr/ExprEval.java @@ -181,9 +181,9 @@ public boolean isArray() public abstract Expr toExpr(); - private abstract static class NumericExprEval extends ExprEval + private abstract static class NumericExprEval extends ExprEval { - private NumericExprEval(@Nullable T value) + private NumericExprEval(@Nullable Number value) { super(value); } @@ -234,11 +234,11 @@ public boolean isNumericNull() } } - private static class DoubleExprEval extends NumericExprEval + private static class DoubleExprEval extends NumericExprEval { private DoubleExprEval(@Nullable Number value) { - super(value == null ? NullHandling.defaultDoubleValue() : value.doubleValue()); + super(value == null ? NullHandling.defaultDoubleValue() : value); } @Override @@ -259,13 +259,6 @@ public Object[] asArray() return asDoubleArray(); } - @Nullable - @Override - public Double[] asDoubleArray() - { - return isNumericNull() ? null : new Double[]{value}; - } - @Override public final ExprEval castTo(ExprType castTo) { @@ -297,11 +290,11 @@ public Expr toExpr() } } - private static class LongExprEval extends NumericExprEval + private static class LongExprEval extends NumericExprEval { private LongExprEval(@Nullable Number value) { - super(value == null ? NullHandling.defaultLongValue() : value.longValue()); + super(value == null ? NullHandling.defaultLongValue() : value); } @Override @@ -326,7 +319,7 @@ public Object[] asArray() @Override public Long[] asLongArray() { - return isNumericNull() ? null : new Long[]{value}; + return isNumericNull() ? null : new Long[]{value.longValue()}; } @Override diff --git a/core/src/test/java/org/apache/druid/math/expr/FunctionTest.java b/core/src/test/java/org/apache/druid/math/expr/FunctionTest.java index 793028c718c8..3a19ffb93ee4 100644 --- a/core/src/test/java/org/apache/druid/math/expr/FunctionTest.java +++ b/core/src/test/java/org/apache/druid/math/expr/FunctionTest.java @@ -163,7 +163,7 @@ public void testRpad() public void testArrayLength() { assertExpr("array_length([1, 2, 3])", 3L); - assertExpr("array_length(a)", 4L); + assertExpr("array_length(a)", 4); } @Test diff --git a/processing/src/test/java/org/apache/druid/query/MultiValuedDimensionTest.java b/processing/src/test/java/org/apache/druid/query/MultiValuedDimensionTest.java index c06b540bf867..c7ce5e3e79c4 100644 --- a/processing/src/test/java/org/apache/druid/query/MultiValuedDimensionTest.java +++ b/processing/src/test/java/org/apache/druid/query/MultiValuedDimensionTest.java @@ -25,6 +25,7 @@ import com.google.common.io.Files; import org.apache.commons.io.FileUtils; import org.apache.druid.collections.CloseableStupidPool; +import org.apache.druid.common.config.NullHandling; import org.apache.druid.data.input.Row; import org.apache.druid.data.input.impl.CSVParseSpec; import org.apache.druid.data.input.impl.DimensionsSpec; @@ -225,16 +226,30 @@ public void testGroupByNoFilter() query ); - List expectedResults = Arrays.asList( - GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tags", null, "count", 2L), - GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tags", "t1", "count", 2L), - GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tags", "t2", "count", 2L), - GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tags", "t3", "count", 4L), - GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tags", "t4", "count", 2L), - GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tags", "t5", "count", 4L), - GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tags", "t6", "count", 2L), - GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tags", "t7", "count", 2L) - ); + List expectedResults; + if (NullHandling.replaceWithDefault()) { + expectedResults = Arrays.asList( + GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tags", null, "count", 2L), + GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tags", "t1", "count", 2L), + GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tags", "t2", "count", 2L), + GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tags", "t3", "count", 4L), + GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tags", "t4", "count", 2L), + GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tags", "t5", "count", 4L), + GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tags", "t6", "count", 2L), + GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tags", "t7", "count", 2L) + ); + } else { + expectedResults = Arrays.asList( + GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tags", "", "count", 2L), + GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tags", "t1", "count", 2L), + GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tags", "t2", "count", 2L), + GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tags", "t3", "count", 4L), + GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tags", "t4", "count", 2L), + GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tags", "t5", "count", 4L), + GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tags", "t6", "count", 2L), + GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tags", "t7", "count", 2L) + ); + } TestHelper.assertExpectedObjects(expectedResults, result.toList(), "noFilter"); } From 5c997dbee49b59699c0da73c5195e5a2b8a87298 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Thu, 2 May 2019 16:29:04 -0700 Subject: [PATCH 03/48] fixes --- .../org/apache/druid/math/expr/Function.java | 26 ++++++++++--- .../apache/druid/math/expr/FunctionTest.java | 38 +++++++++++++++++++ .../overlord/sampler/FirehoseSamplerTest.java | 2 +- .../segment/virtual/ExpressionSelectors.java | 4 +- 4 files changed, 60 insertions(+), 10 deletions(-) diff --git a/core/src/main/java/org/apache/druid/math/expr/Function.java b/core/src/main/java/org/apache/druid/math/expr/Function.java index 99fbcb17d4d3..95d52fe90787 100644 --- a/core/src/main/java/org/apache/druid/math/expr/Function.java +++ b/core/src/main/java/org/apache/druid/math/expr/Function.java @@ -30,9 +30,11 @@ import java.math.BigDecimal; import java.math.RoundingMode; +import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.List; +import java.util.Objects; import java.util.Set; import java.util.stream.Stream; @@ -1628,8 +1630,14 @@ public ExprEval apply(List args, Expr.ObjectBinding bindings) case STRING: case LONG: case DOUBLE: - int index = Arrays.asList(array).indexOf(toCheck.value()); - return ExprEval.bestEffortOf(index < 0 ? null : index); + int index = -1; + for (int i = 0; i < array.length; i++) { + if (Objects.equals(array[i], toCheck.value())) { + index = i; + break; + } + } + return index < 0 ? ExprEval.of(null) : ExprEval.ofLong(index); default: throw new IAE("Function[%s] argument must be a a scalar type", name()); } @@ -1662,8 +1670,14 @@ public ExprEval apply(List args, Expr.ObjectBinding bindings) case STRING: case LONG: case DOUBLE: - int index = Arrays.asList(array).indexOf(toCheck.value()); - return ExprEval.bestEffortOf(index < 0 ? null : index + 1); + int index = -1; + for (int i = 0; i < array.length; i++) { + if (Objects.equals(array[i], toCheck.value())) { + index = i; + break; + } + } + return index < 0 ? ExprEval.of(null) : ExprEval.ofLong(index + 1); default: throw new IAE("Function[%s] argument must be a a scalar type", name()); } @@ -1710,7 +1724,7 @@ public ExprEval apply(List args, Expr.ObjectBinding bindings) private Stream append(T[] array, T val) { - List l = Arrays.asList(array); + List l = new ArrayList<>(Arrays.asList(array)); l.add(val); return l.stream(); } @@ -1766,7 +1780,7 @@ public ExprEval apply(List args, Expr.ObjectBinding bindings) private Stream cat(T[] array1, T[] array2) { - List l = Arrays.asList(array1); + List l = new ArrayList<>(Arrays.asList(array1)); l.addAll(Arrays.asList(array2)); return l.stream(); } diff --git a/core/src/test/java/org/apache/druid/math/expr/FunctionTest.java b/core/src/test/java/org/apache/druid/math/expr/FunctionTest.java index 3a19ffb93ee4..43ca2ce16783 100644 --- a/core/src/test/java/org/apache/druid/math/expr/FunctionTest.java +++ b/core/src/test/java/org/apache/druid/math/expr/FunctionTest.java @@ -182,6 +182,22 @@ public void testArrayOrdinal() assertExpr("array_ordinal(a, 3)", "baz"); } + @Test + public void testArrayOffsetOf() + { + assertExpr("array_offset_of([1, 2, 3], 3)", 2L); + assertExpr("array_offset_of([1, 2, 3], 4)", null); + assertExpr("array_offset_of(a, 'baz')", 2); + } + + @Test + public void testArrayOrdinalOf() + { + assertExpr("array_ordinal_of([1, 2, 3], 3)", 3L); + assertExpr("array_ordinal_of([1, 2, 3], 4)", null); + assertExpr("array_ordinal_of(a, 'baz')", 3); + } + @Test public void testArrayContains() { @@ -199,9 +215,31 @@ public void testArrayOverlap() assertExpr("array_overlap([1 2 3], [4 5 6])", "false"); } + @Test + public void testArrayAppend() + { + assertExpr("array_append([1 2 3], 4)", new Long[]{1L,2L,3L,4L}); + assertExpr("array_append([1 2 3], 'bar')", new Long[]{1L,2L,3L,0L}); + } + + @Test + public void testArrayConcat() + { + assertExpr("array_concat([1 2 3], [2 4 6])", new Long[]{1L,2L,3L,2L,4L,6L}); + assertExpr("array_concat([1 2 3], 4)", new Long[]{1L,2L,3L,4L}); + assertExpr("array_concat(0, [1 2 3])", new Long[]{0L,1L,2L,3L}); + assertExpr("array_concat(0, 1)", new Long[]{0L,1L}); + } + private void assertExpr(final String expression, final Object expectedResult) { final Expr expr = Parser.parse(expression, ExprMacroTable.nil()); Assert.assertEquals(expression, expectedResult, expr.eval(bindings).value()); } + + private void assertExpr(final String expression, final Object[] expectedResult) + { + final Expr expr = Parser.parse(expression, ExprMacroTable.nil()); + Assert.assertArrayEquals(expression, expectedResult, expr.eval(bindings).asArray()); + } } diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/sampler/FirehoseSamplerTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/sampler/FirehoseSamplerTest.java index 9c3da5b2c46a..782247d9cd2f 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/sampler/FirehoseSamplerTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/sampler/FirehoseSamplerTest.java @@ -606,7 +606,7 @@ public void testWithTransformsAutoDimensions() GranularitySpec granularitySpec = new UniformGranularitySpec(Granularities.DAY, Granularities.HOUR, true, null); TransformSpec transformSpec = new TransformSpec( null, - ImmutableList.of(new ExpressionTransform("dim1PlusBar", "concat(dim1 + 'bar')", TestExprMacroTable.INSTANCE)) + ImmutableList.of(new ExpressionTransform("dim1PlusBar", "concat(dim1, 'bar')", TestExprMacroTable.INSTANCE)) ); DataSchema dataSchema = new DataSchema( diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java index fc7ead0b8802..7a7dea962cfd 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java @@ -371,7 +371,7 @@ public void inspectRuntimeShape(RuntimeShapeInspector inspector) } } - private static Expr.ObjectBinding createBindings(Expr expression, ColumnSelectorFactory columnSelectorFactory, Set unknownMultiValue) + private static Expr.ObjectBinding createBindings(Expr expression, ColumnSelectorFactory columnSelectorFactory) { final Map> suppliers = new HashMap<>(); final List columns = Parser.findRequiredBindings(expression); @@ -379,8 +379,6 @@ private static Expr.ObjectBinding createBindings(Expr expression, ColumnSelector final ColumnCapabilities columnCapabilities = columnSelectorFactory .getColumnCapabilities(columnName); final ValueType nativeType = columnCapabilities != null ? columnCapabilities.getType() : null; - // final boolean multiVal = unknownMultiValue.contains(columnName) || - // (columnCapabilities != null && columnCapabilities.hasMultipleValues()); final boolean multiVal = columnCapabilities != null && columnCapabilities.hasMultipleValues(); final Supplier supplier; From cefe3e77297c3ea5a9dc1f2ed53bb6404ad6d23d Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Thu, 2 May 2019 17:12:36 -0700 Subject: [PATCH 04/48] more tests --- .../org/apache/druid/math/expr/Function.java | 54 +++++++++++++++++++ .../apache/druid/math/expr/FunctionTest.java | 16 ++++++ 2 files changed, 70 insertions(+) diff --git a/core/src/main/java/org/apache/druid/math/expr/Function.java b/core/src/main/java/org/apache/druid/math/expr/Function.java index 95d52fe90787..bf6dafcc4f06 100644 --- a/core/src/main/java/org/apache/druid/math/expr/Function.java +++ b/core/src/main/java/org/apache/druid/math/expr/Function.java @@ -20,6 +20,7 @@ package org.apache.druid.math.expr; import com.google.common.collect.ImmutableSet; +import net.thisptr.jackson.jq.internal.misc.Strings; import org.apache.druid.common.config.NullHandling; import org.apache.druid.java.util.common.DateTimes; import org.apache.druid.java.util.common.IAE; @@ -36,6 +37,7 @@ import java.util.List; import java.util.Objects; import java.util.Set; +import java.util.stream.Collectors; import java.util.stream.Stream; /** @@ -1791,4 +1793,56 @@ public Set getArrayInputs(List args) return ImmutableSet.copyOf(args); } } + + class ArrayToStringFunction extends ArrayFunction + { + @Override + public String name() + { + return "array_to_string"; + } + + @Override + public ExprEval apply(List args, Expr.ObjectBinding bindings) + { + if (args.size() != 2) { + throw new IAE("Function[%s] needs 2 argument", name()); + } + + final ExprEval expr = args.get(0).eval(bindings); + final Object[] array = expr.asArray(); + if (array == null) { + return ExprEval.of(null); + } + + final String join = args.get(1).eval(bindings).asString(); + return ExprEval.of(Arrays.stream(array).map(String::valueOf).collect(Collectors.joining(join != null ? join : ""))); + } + } + + class StringToArrayFunction extends ArrayFunction + { + @Override + public String name() + { + return "string_to_array"; + } + + @Override + public ExprEval apply(List args, Expr.ObjectBinding bindings) + { + if (args.size() != 2) { + throw new IAE("Function[%s] needs 2 argument", name()); + } + + final ExprEval expr = args.get(0).eval(bindings); + final String arrayString = expr.asString(); + if (arrayString == null) { + return ExprEval.of(null); + } + + final String split = args.get(1).eval(bindings).asString(); + return ExprEval.ofStringArray(Strings.splitToArray(arrayString, split != null ? split : "")); + } + } } diff --git a/core/src/test/java/org/apache/druid/math/expr/FunctionTest.java b/core/src/test/java/org/apache/druid/math/expr/FunctionTest.java index 43ca2ce16783..787c4af59a01 100644 --- a/core/src/test/java/org/apache/druid/math/expr/FunctionTest.java +++ b/core/src/test/java/org/apache/druid/math/expr/FunctionTest.java @@ -231,6 +231,22 @@ public void testArrayConcat() assertExpr("array_concat(0, 1)", new Long[]{0L,1L}); } + @Test + public void testArrayToString() + { + assertExpr("array_to_string([1, 2, 3], ',')", "1,2,3"); + assertExpr("array_to_string([1], '|')", "1"); + assertExpr("array_to_string(a, '|')", "foo|bar|baz|foobar"); + } + + @Test + public void testStringToArray() + { + assertExpr("string_to_array('1,2,3', ',')", new String[]{"1", "2", "3"}); + assertExpr("string_to_array('1', ',')", new String[]{"1"}); + assertExpr("string_to_array(array_to_string(a, ','), ',')", new String[]{"foo", "bar", "baz", "foobar"}); + } + private void assertExpr(final String expression, final Object expectedResult) { final Expr expr = Parser.parse(expression, ExprMacroTable.nil()); From a46a9a2cdb764a0fecd22b3e2c77b793be64d2c3 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Fri, 3 May 2019 17:36:32 -0700 Subject: [PATCH 05/48] fixes --- .../org/apache/druid/math/expr/Function.java | 18 +++++++++++++++--- .../apache/druid/math/expr/FunctionTest.java | 12 ++++++------ .../segment/virtual/ExpressionSelectors.java | 2 +- 3 files changed, 22 insertions(+), 10 deletions(-) diff --git a/core/src/main/java/org/apache/druid/math/expr/Function.java b/core/src/main/java/org/apache/druid/math/expr/Function.java index bf6dafcc4f06..a721c6c3e0f0 100644 --- a/core/src/main/java/org/apache/druid/math/expr/Function.java +++ b/core/src/main/java/org/apache/druid/math/expr/Function.java @@ -1716,10 +1716,20 @@ public ExprEval apply(List args, Expr.ObjectBinding bindings) return ExprEval.ofStringArray(this.append(lhs.asStringArray(), rhs.asString()).toArray(String[]::new)); case LONG: case LONG_ARRAY: - return ExprEval.ofLongArray(this.append(lhs.asLongArray(), rhs.asLong()).toArray(Long[]::new)); + return ExprEval.ofLongArray( + this.append( + lhs.asLongArray(), + rhs.isNumericNull() ? null : rhs.asLong()).toArray(Long[]::new + ) + ); case DOUBLE: case DOUBLE_ARRAY: - return ExprEval.ofDoubleArray(this.append(lhs.asDoubleArray(), rhs.asDouble()).toArray(Double[]::new)); + return ExprEval.ofDoubleArray( + this.append( + lhs.asDoubleArray(), + rhs.isNumericNull() ? null : rhs.asDouble()).toArray(Double[]::new + ) + ); } throw new RuntimeException("impossible"); } @@ -1816,7 +1826,9 @@ public ExprEval apply(List args, Expr.ObjectBinding bindings) } final String join = args.get(1).eval(bindings).asString(); - return ExprEval.of(Arrays.stream(array).map(String::valueOf).collect(Collectors.joining(join != null ? join : ""))); + return ExprEval.of( + Arrays.stream(array).map(String::valueOf).collect(Collectors.joining(join != null ? join : "")) + ); } } diff --git a/core/src/test/java/org/apache/druid/math/expr/FunctionTest.java b/core/src/test/java/org/apache/druid/math/expr/FunctionTest.java index 787c4af59a01..bb538c61edfb 100644 --- a/core/src/test/java/org/apache/druid/math/expr/FunctionTest.java +++ b/core/src/test/java/org/apache/druid/math/expr/FunctionTest.java @@ -218,17 +218,17 @@ public void testArrayOverlap() @Test public void testArrayAppend() { - assertExpr("array_append([1 2 3], 4)", new Long[]{1L,2L,3L,4L}); - assertExpr("array_append([1 2 3], 'bar')", new Long[]{1L,2L,3L,0L}); + assertExpr("array_append([1 2 3], 4)", new Long[]{1L, 2L, 3L, 4L}); + assertExpr("array_append([1 2 3], 'bar')", new Long[]{1L, 2L, 3L, null}); } @Test public void testArrayConcat() { - assertExpr("array_concat([1 2 3], [2 4 6])", new Long[]{1L,2L,3L,2L,4L,6L}); - assertExpr("array_concat([1 2 3], 4)", new Long[]{1L,2L,3L,4L}); - assertExpr("array_concat(0, [1 2 3])", new Long[]{0L,1L,2L,3L}); - assertExpr("array_concat(0, 1)", new Long[]{0L,1L}); + assertExpr("array_concat([1 2 3], [2 4 6])", new Long[]{1L, 2L, 3L, 2L, 4L, 6L}); + assertExpr("array_concat([1 2 3], 4)", new Long[]{1L, 2L, 3L, 4L}); + assertExpr("array_concat(0, [1 2 3])", new Long[]{0L, 1L, 2L, 3L}); + assertExpr("array_concat(0, 1)", new Long[]{0L, 1L}); } @Test diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java index 7a7dea962cfd..bd315c1a7ecb 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java @@ -187,7 +187,7 @@ public static ColumnValueSelector makeExprEvalSelector( } - final Expr.ObjectBinding bindings = createBindings(expression, columnSelectorFactory, unknownIfArrays); + final Expr.ObjectBinding bindings = createBindings(expression, columnSelectorFactory); if (bindings.equals(ExprUtils.nilBindings())) { // Optimization for constant expressions. From 81aadce17e58314ef898f2bad8fb124663b212a2 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Wed, 8 May 2019 13:01:08 -0700 Subject: [PATCH 06/48] cleanup --- .../java/org/apache/druid/math/expr/Expr.java | 2 +- .../segment/virtual/ExpressionSelectors.java | 6 +-- ...ueStringExpressionColumnValueSelector.java | 38 ------------------- ...ueStringExpressionColumnValueSelector.java | 21 +++++++--- 4 files changed, 19 insertions(+), 48 deletions(-) delete mode 100644 processing/src/main/java/org/apache/druid/segment/virtual/ForcedMultiValueStringExpressionColumnValueSelector.java diff --git a/core/src/main/java/org/apache/druid/math/expr/Expr.java b/core/src/main/java/org/apache/druid/math/expr/Expr.java index fb309b0e8ad6..750497148a22 100644 --- a/core/src/main/java/org/apache/druid/math/expr/Expr.java +++ b/core/src/main/java/org/apache/druid/math/expr/Expr.java @@ -381,7 +381,7 @@ public LambdaExpr(List args, Expr expr) @Override public String toString() { - return "(" + args + " " + expr + ")"; + return "(" + args + " -> " + expr + ")"; } public String getIdentifier() diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java index bd315c1a7ecb..1cec8af3b724 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java @@ -137,7 +137,7 @@ public static ColumnValueSelector makeExprEvalSelector( { final List columns = Parser.findRequiredBindings(expression); final Set expectedArrays = Parser.findArrayFnBindings(expression); - final Set actualArrays = Parser.findArrayFnBindings(expression); + final Set actualArrays = new HashSet<>(); final Set unknownIfArrays = new HashSet<>(); if (columns.size() == 1) { @@ -449,13 +449,13 @@ private static Supplier makeNullableSupplier( @VisibleForTesting @Nonnull - static Supplier supplierFromDimensionSelector(final DimensionSelector selector, boolean multiValue) + static Supplier supplierFromDimensionSelector(final DimensionSelector selector, boolean coerceArray) { Preconditions.checkNotNull(selector, "selector"); return () -> { final IndexedInts row = selector.getRow(); - if (row.size() == 1 && !multiValue) { + if (row.size() == 1 && !coerceArray) { return selector.lookupName(row.get(0)); } else { // column selector factories hate you and use [] and [null] interchangeably for nullish data diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/ForcedMultiValueStringExpressionColumnValueSelector.java b/processing/src/main/java/org/apache/druid/segment/virtual/ForcedMultiValueStringExpressionColumnValueSelector.java deleted file mode 100644 index bca9bf3fca8c..000000000000 --- a/processing/src/main/java/org/apache/druid/segment/virtual/ForcedMultiValueStringExpressionColumnValueSelector.java +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.segment.virtual; - -import org.apache.druid.math.expr.Expr; -import org.apache.druid.math.expr.Parser; - -import java.util.ArrayList; -import java.util.Set; - -public class ForcedMultiValueStringExpressionColumnValueSelector extends ExpressionColumnValueSelector -{ - public ForcedMultiValueStringExpressionColumnValueSelector( - Expr expression, - Expr.ObjectBinding bindings, - Set unknownColumnsSet - ) - { - super(Parser.applyUnappliedIdentifiers(expression, new ArrayList<>(unknownColumnsSet)), bindings); - } -} diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/OpportunisticMultiValueStringExpressionColumnValueSelector.java b/processing/src/main/java/org/apache/druid/segment/virtual/OpportunisticMultiValueStringExpressionColumnValueSelector.java index 0d723a2eaad0..7e94a5d34a5d 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/OpportunisticMultiValueStringExpressionColumnValueSelector.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/OpportunisticMultiValueStringExpressionColumnValueSelector.java @@ -19,15 +19,15 @@ package org.apache.druid.segment.virtual; -import net.thisptr.jackson.jq.internal.misc.Strings; +import it.unimi.dsi.fastutil.ints.Int2ObjectArrayMap; +import it.unimi.dsi.fastutil.ints.Int2ObjectMap; import org.apache.druid.math.expr.Expr; import org.apache.druid.math.expr.ExprEval; import org.apache.druid.math.expr.Parser; import java.util.ArrayList; -import java.util.HashMap; +import java.util.HashSet; import java.util.List; -import java.util.Map; import java.util.Set; import java.util.stream.Collectors; @@ -35,7 +35,8 @@ public class OpportunisticMultiValueStringExpressionColumnValueSelector extends { private final List unknownColumns; private final Set arrayInputs; - private final Map transformedCache; + private final Set ignoredColumns; + private final Int2ObjectMap transformedCache; public OpportunisticMultiValueStringExpressionColumnValueSelector( Expr expression, @@ -46,7 +47,8 @@ public OpportunisticMultiValueStringExpressionColumnValueSelector( super(expression, bindings); this.unknownColumns = new ArrayList<>(unknownColumnsSet); this.arrayInputs = Parser.findArrayFnBindings(expression); - this.transformedCache = new HashMap<>(); + this.ignoredColumns = new HashSet<>(); + this.transformedCache = new Int2ObjectArrayMap(unknownColumns.size()); } @Override @@ -55,8 +57,13 @@ public ExprEval getObject() List arrayBindings = unknownColumns.stream().filter(x -> !arrayInputs.contains(x) && isBindingArray(x)).collect(Collectors.toList()); + if (ignoredColumns.size() > 0) { + unknownColumns.removeAll(ignoredColumns); + ignoredColumns.clear(); + } + if (arrayBindings.size() > 0) { - final String key = Strings.join(",", arrayBindings); + final int key = arrayBindings.hashCode(); if (transformedCache.containsKey(key)) { return transformedCache.get(key).eval(bindings); } @@ -74,6 +81,8 @@ private boolean isBindingArray(String x) if (binding instanceof String[] && ((String[]) binding).length > 1) { // if (binding instanceof String[]) { return true; + } else if (binding instanceof Number) { + ignoredColumns.add(x); } } return false; From b9410e9c47d87f7f8ccd1e532371bacc678f782b Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Wed, 8 May 2019 17:11:18 -0700 Subject: [PATCH 07/48] more better, more test --- .../segment/virtual/ExpressionSelectors.java | 27 +++++++++----- .../ExpressionColumnValueSelectorTest.java | 9 +++-- .../virtual/ExpressionVirtualColumnTest.java | 35 ++++++++++++++++--- 3 files changed, 57 insertions(+), 14 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java index 1cec8af3b724..316f0d8da3c4 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java @@ -489,21 +489,32 @@ static Supplier supplierFromObjectSelector(final BaseObjectColumnValueSe if (val instanceof Number || val instanceof String) { return val; } else if (val instanceof List) { - // strings can be lists of strings!! - // this can happen from an "unknown" capabilites multi-value string dimension row, and we fallback to the - // object selector - Object[] arrayVal = ((List) val).stream().map(Object::toString).toArray(String[]::new); - if (arrayVal.length > 0) { - return arrayVal; - } - return new String[]{null}; + return coerceListDimToStringArray((List) val); } else { return null; } }; + } else if (clazz.isAssignableFrom(List.class)) { + return () -> { + final Object val = selector.getObject(); + if (val != null) { + return coerceListDimToStringArray((List) val); + } + return null; + }; } else { // No numbers or strings. return null; } } + + @Nonnull + private static Object coerceListDimToStringArray(List val) + { + Object[] arrayVal = val.stream().map(Object::toString).toArray(String[]::new); + if (arrayVal.length > 0) { + return arrayVal; + } + return new String[]{null}; + } } diff --git a/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionColumnValueSelectorTest.java b/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionColumnValueSelectorTest.java index c65a6ab5a88b..079f692fc653 100644 --- a/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionColumnValueSelectorTest.java +++ b/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionColumnValueSelectorTest.java @@ -20,6 +20,7 @@ package org.apache.druid.segment.virtual; import com.google.common.base.Supplier; +import com.google.common.collect.ImmutableList; import org.apache.druid.common.guava.SettableSupplier; import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; import org.apache.druid.segment.BaseSingleValueDimensionSelector; @@ -121,8 +122,12 @@ public void testSupplierFromObjectSelectorList() objectSelectorFromSupplier(settableSupplier, List.class) ); - // List can't be a number, so supplierFromObjectSelector should return null. - Assert.assertNull(supplier); + Assert.assertNotNull(supplier); + Assert.assertEquals(null, supplier.get()); + + settableSupplier.set(ImmutableList.of("1", "2", "3")); + Assert.assertArrayEquals(new String[]{"1", "2", "3"}, (Object[]) supplier.get()); + } private static DimensionSelector dimensionSelectorFromSupplier( diff --git a/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVirtualColumnTest.java b/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVirtualColumnTest.java index 81a589240762..66223bed66e8 100644 --- a/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVirtualColumnTest.java +++ b/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVirtualColumnTest.java @@ -30,6 +30,7 @@ import org.apache.druid.math.expr.ExprEval; import org.apache.druid.math.expr.Parser; import org.apache.druid.query.dimension.DefaultDimensionSpec; +import org.apache.druid.query.dimension.DimensionSpec; import org.apache.druid.query.dimension.ExtractionDimensionSpec; import org.apache.druid.query.expression.TestExprMacroTable; import org.apache.druid.query.extraction.BucketExtractionFn; @@ -81,6 +82,17 @@ public class ExpressionVirtualColumnTest "c", ImmutableList.of("4", "5", "6") ) ); + private static final InputRow ROWMULTI2 = new MapBasedInputRow( + DateTimes.of("2000-01-02T01:00:00").getMillis(), + ImmutableList.of(), + ImmutableMap.of( + "x", 3L, + "y", 4L, + "a", ImmutableList.of("d", "e", "f"), + "b", ImmutableList.of("3", "4", "5"), + "c", ImmutableList.of("7", "8", "9") + ) + ); private static final ExpressionVirtualColumn X_PLUS_Y = new ExpressionVirtualColumn( "expr", @@ -137,7 +149,14 @@ public class ExpressionVirtualColumnTest TestExprMacroTable.INSTANCE ); - private static final ExpressionVirtualColumn SCALE_LIST = new ExpressionVirtualColumn( + private static final ExpressionVirtualColumn SCALE_LIST_IMPLICIT = new ExpressionVirtualColumn( + "expr", + "b * 2", + ValueType.STRING, + TestExprMacroTable.INSTANCE + ); + + private static final ExpressionVirtualColumn SCALE_LIST_EXPLICIT = new ExpressionVirtualColumn( "expr", "map(b -> b * 2, b)", ValueType.STRING, @@ -176,11 +195,19 @@ public void testObjectSelector() @Test public void testMultiObjectSelector() { - CURRENT_ROW.set(ROWMULTI); + DimensionSpec spec = new DefaultDimensionSpec("expr", "expr"); - final BaseObjectColumnValueSelector selector = SCALE_LIST.makeDimensionSelector(new DefaultDimensionSpec("expr", "expr"), COLUMN_SELECTOR_FACTORY); + final BaseObjectColumnValueSelector selectorImplicit = SCALE_LIST_IMPLICIT.makeDimensionSelector(spec, COLUMN_SELECTOR_FACTORY); + CURRENT_ROW.set(ROWMULTI); + Assert.assertEquals(ImmutableList.of("2.0", "4.0", "6.0"), selectorImplicit.getObject()); + CURRENT_ROW.set(ROWMULTI2); + Assert.assertEquals(ImmutableList.of("6.0", "8.0", "10.0"), selectorImplicit.getObject()); - Assert.assertEquals(ImmutableList.of("2.0", "4.0", "6.0"), selector.getObject()); + final BaseObjectColumnValueSelector selectorExplicit = SCALE_LIST_EXPLICIT.makeDimensionSelector(spec, COLUMN_SELECTOR_FACTORY); + CURRENT_ROW.set(ROWMULTI); + Assert.assertEquals(ImmutableList.of("2.0", "4.0", "6.0"), selectorExplicit.getObject()); + CURRENT_ROW.set(ROWMULTI2); + Assert.assertEquals(ImmutableList.of("6.0", "8.0", "10.0"), selectorExplicit.getObject()); } @Test From 97c38d4a1c82d322bd7b1d1c00db9c9c12f1c603 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Tue, 14 May 2019 13:03:20 -0700 Subject: [PATCH 08/48] ignore inspection --- .../org/apache/druid/segment/virtual/ExpressionSelectors.java | 1 + 1 file changed, 1 insertion(+) diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java index 316f0d8da3c4..4b99417138b3 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java @@ -463,6 +463,7 @@ static Supplier supplierFromDimensionSelector(final DimensionSelector se return new String[]{null}; } final String[] strings = new String[row.size()]; + // noinspection SSBasedInspection for (int i = 0; i < row.size(); i++) { strings[i] = selector.lookupName(row.get(i)); } From f3fff9860667843962dd00245d754e1c8601d3c7 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Tue, 14 May 2019 13:24:02 -0700 Subject: [PATCH 09/48] license --- LICENSE | 2 ++ 1 file changed, 2 insertions(+) diff --git a/LICENSE b/LICENSE index 107d1fe78560..fe2884f08371 100644 --- a/LICENSE +++ b/LICENSE @@ -257,6 +257,8 @@ SOURCE/JAVA-CORE * core/src/main/java/org/apache/druid/java/util/common/parsers/DelimitedParser.java DirectExecutorService class: * core/src/main/java/org/apache/druid/java/util/common/concurrent/DirectExecutorService.java + CartesianList class: + * core/src/main/java/org/apache/druid/math/expr/CartesianList This product contains modified versions of the Dockerfile and related configuration files from SequenceIQ's Hadoop Docker image, copyright SequenceIQ, Inc. (https://github.com/sequenceiq/hadoop-docker/) From 81055b33942af2c6cebc01f6e2afc901a89623ce Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Tue, 14 May 2019 16:29:05 -0700 Subject: [PATCH 10/48] license fix --- LICENSE | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/LICENSE b/LICENSE index fe2884f08371..d8e119643d8c 100644 --- a/LICENSE +++ b/LICENSE @@ -258,7 +258,7 @@ SOURCE/JAVA-CORE DirectExecutorService class: * core/src/main/java/org/apache/druid/java/util/common/concurrent/DirectExecutorService.java CartesianList class: - * core/src/main/java/org/apache/druid/math/expr/CartesianList + * core/src/main/java/org/apache/druid/math/expr/CartesianList.java This product contains modified versions of the Dockerfile and related configuration files from SequenceIQ's Hadoop Docker image, copyright SequenceIQ, Inc. (https://github.com/sequenceiq/hadoop-docker/) From 54e5e676aab2b46ed49ee696f494b535949a5128 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Tue, 14 May 2019 17:03:37 -0700 Subject: [PATCH 11/48] inspection --- .../main/java/org/apache/druid/math/expr/ExprListenerImpl.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/core/src/main/java/org/apache/druid/math/expr/ExprListenerImpl.java b/core/src/main/java/org/apache/druid/math/expr/ExprListenerImpl.java index 067f685c9a8f..152f6f174e91 100644 --- a/core/src/main/java/org/apache/druid/math/expr/ExprListenerImpl.java +++ b/core/src/main/java/org/apache/druid/math/expr/ExprListenerImpl.java @@ -373,8 +373,7 @@ public void exitLambda(ExprParser.LambdaContext ctx) public void exitFunctionArgs(ExprParser.FunctionArgsContext ctx) { List args = new ArrayList<>(); - - for (ExprParser.ExprContext exprCtx : ctx.expr()) { + for (ParseTree exprCtx : ctx.expr()) { args.add((Expr) nodes.get(exprCtx)); } From fd67f0eeccebf017d9f42585e4c077b8a28313da Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Wed, 15 May 2019 01:19:58 -0700 Subject: [PATCH 12/48] remove dumb import --- core/src/main/java/org/apache/druid/math/expr/Function.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/core/src/main/java/org/apache/druid/math/expr/Function.java b/core/src/main/java/org/apache/druid/math/expr/Function.java index a721c6c3e0f0..013ce7931f1d 100644 --- a/core/src/main/java/org/apache/druid/math/expr/Function.java +++ b/core/src/main/java/org/apache/druid/math/expr/Function.java @@ -20,7 +20,6 @@ package org.apache.druid.math.expr; import com.google.common.collect.ImmutableSet; -import net.thisptr.jackson.jq.internal.misc.Strings; import org.apache.druid.common.config.NullHandling; import org.apache.druid.java.util.common.DateTimes; import org.apache.druid.java.util.common.IAE; @@ -1854,7 +1853,7 @@ public ExprEval apply(List args, Expr.ObjectBinding bindings) } final String split = args.get(1).eval(bindings).asString(); - return ExprEval.ofStringArray(Strings.splitToArray(arrayString, split != null ? split : "")); + return ExprEval.ofStringArray(arrayString.split(split != null ? split : "")); } } } From 94efb2ca4ce8e7ef6d84b9ceb0e6343f67a9c0a8 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Wed, 22 May 2019 00:51:58 -0700 Subject: [PATCH 13/48] more better --- .../java/org/apache/druid/math/expr/Expr.java | 61 ++- .../org/apache/druid/math/expr/Function.java | 364 +++++++++--------- .../org/apache/druid/math/expr/Parser.java | 98 +++++ .../apache/druid/math/expr/FunctionTest.java | 2 +- .../apache/druid/math/expr/ParserTest.java | 13 +- docs/content/misc/math-expr.md | 52 ++- .../segment/virtual/ExpressionSelectors.java | 42 +- .../druid/query/MultiValuedDimensionTest.java | 68 ++++ 8 files changed, 451 insertions(+), 249 deletions(-) diff --git a/core/src/main/java/org/apache/druid/math/expr/Expr.java b/core/src/main/java/org/apache/druid/math/expr/Expr.java index 750497148a22..8a3c127a52f3 100644 --- a/core/src/main/java/org/apache/druid/math/expr/Expr.java +++ b/core/src/main/java/org/apache/druid/math/expr/Expr.java @@ -72,18 +72,6 @@ default Object getLiteralValue() @Nonnull ExprEval eval(ObjectBinding bindings); - /** - * Mechanism to supply values to back {@link IdentifierExpr} during expression evaluation - */ - interface ObjectBinding - { - /** - * Get value binding for string identifier of {@link IdentifierExpr} - */ - @Nullable - Object get(String name); - } - /** * Programmatically inspect the {@link Expr} tree with a {@link Visitor}. Each {@link Expr} is responsible for * ensuring the {@link Visitor} can reach all of it's {@link Expr} children. @@ -97,6 +85,18 @@ interface ObjectBinding */ Expr visit(Shuttle shuttle); + /** + * Mechanism to supply values to back {@link IdentifierExpr} during expression evaluation + */ + interface ObjectBinding + { + /** + * Get value binding for string identifier of {@link IdentifierExpr} + */ + @Nullable + Object get(String name); + } + /** * Mechanism to inspect an {@link Expr}, implementing a {@link Visitor} allows visiting all children of an * {@link Expr} @@ -156,7 +156,7 @@ class LongExpr extends ConstantExpr { private final Long value; - public LongExpr(Long value) + LongExpr(Long value) { this.value = Preconditions.checkNotNull(value, "value"); } @@ -186,7 +186,7 @@ class LongArrayExpr extends ConstantArrayExpr { private final Long[] value; - public LongArrayExpr(Long[] value) + LongArrayExpr(Long[] value) { this.value = Preconditions.checkNotNull(value, "value"); } @@ -216,7 +216,7 @@ class StringExpr extends ConstantExpr { private final String value; - public StringExpr(String value) + StringExpr(String value) { this.value = NullHandling.emptyToNullIfNeeded(value); } @@ -246,7 +246,7 @@ class StringArrayExpr extends ConstantArrayExpr { private final String[] value; - public StringArrayExpr(String[] value) + StringArrayExpr(String[] value) { this.value = Preconditions.checkNotNull(value, "value"); } @@ -276,7 +276,7 @@ class DoubleExpr extends ConstantExpr { private final Double value; - public DoubleExpr(Double value) + DoubleExpr(Double value) { this.value = Preconditions.checkNotNull(value, "value"); } @@ -306,7 +306,7 @@ class DoubleArrayExpr extends ConstantArrayExpr { private final Double[] value; - public DoubleArrayExpr(Double[] value) + DoubleArrayExpr(Double[] value) { this.value = Preconditions.checkNotNull(value, "value"); } @@ -336,7 +336,7 @@ class IdentifierExpr implements Expr { private final String value; - public IdentifierExpr(String value) + IdentifierExpr(String value) { this.value = value; } @@ -372,7 +372,7 @@ class LambdaExpr implements Expr private final List args; private final Expr expr; - public LambdaExpr(List args, Expr expr) + LambdaExpr(List args, Expr expr) { this.args = args; this.expr = expr; @@ -415,16 +415,7 @@ public ExprEval eval(ObjectBinding bindings) @Override public void visit(Visitor visitor) { - // return free variables only - expr.visit( - _expr -> { - if (_expr instanceof IdentifierExpr) { - if (args.stream().noneMatch(x -> _expr.toString().equals(x.toString()))) { - visitor.visit(_expr); - } - } - } - ); + expr.visit(visitor); visitor.visit(this); } @@ -444,7 +435,7 @@ class FunctionExpr implements Expr final String name; final List args; - public FunctionExpr(Function function, String name, List args) + FunctionExpr(Function function, String name, List args) { this.function = function; this.name = name; @@ -488,7 +479,7 @@ class ApplyFunctionExpr implements Expr final LambdaExpr lambdaExpr; final List argsExpr; - public ApplyFunctionExpr(ApplyFunction function, String name, LambdaExpr expr, List args) + ApplyFunctionExpr(ApplyFunction function, String name, LambdaExpr expr, List args) { this.function = function; this.name = name; @@ -499,7 +490,7 @@ public ApplyFunctionExpr(ApplyFunction function, String name, LambdaExpr expr, L @Override public String toString() { - return "(" + name + " " + lambdaExpr + " " + argsExpr + ")"; + return "(" + name + " " + lambdaExpr + ", " + argsExpr + ")"; } @Nonnull @@ -635,7 +626,7 @@ abstract class BinaryOpExprBase implements Expr protected Expr left; protected Expr right; - public BinaryOpExprBase(String op, Expr left, Expr right) + BinaryOpExprBase(String op, Expr left, Expr right) { this.op = op; this.left = left; @@ -673,7 +664,7 @@ public String toString() abstract class BinaryEvalOpExprBase extends BinaryOpExprBase { - public BinaryEvalOpExprBase(String op, Expr left, Expr right) + BinaryEvalOpExprBase(String op, Expr left, Expr right) { super(op, left, right); } diff --git a/core/src/main/java/org/apache/druid/math/expr/Function.java b/core/src/main/java/org/apache/druid/math/expr/Function.java index e7f7eb7e9b39..56a4be55b6e3 100644 --- a/core/src/main/java/org/apache/druid/math/expr/Function.java +++ b/core/src/main/java/org/apache/druid/math/expr/Function.java @@ -1467,14 +1467,62 @@ public ExprEval apply(List args, Expr.ObjectBinding bindings) } } - abstract class ArrayFunction implements Function + class StringToArrayFunction implements Function { + @Override + public String name() + { + return "string_to_array"; + } + + void validateArguments(List args) + { + if (args.size() != 2) { + throw new IAE("Function[%s] needs 2 argument", name()); + } + } + + @Override + public ExprEval apply(List args, Expr.ObjectBinding bindings) + { + validateArguments(args); + + final ExprEval expr = args.get(0).eval(bindings); + final String arrayString = expr.asString(); + if (arrayString == null) { + return ExprEval.of(null); + } + + final String split = args.get(1).eval(bindings).asString(); + return ExprEval.ofStringArray(arrayString.split(split != null ? split : "")); + } + public Set getArrayInputs(List args) + { + validateArguments(args); + return Collections.emptySet(); + } + + @Override + public Set getScalarInputs(List args) + { + validateArguments(args); + return ImmutableSet.copyOf(args); + } + } + + abstract class ArrayFunction implements Function + { + void validateArguments(List args) { if (args.size() != 1) { throw new IAE("Function[%s] needs 1 argument", name()); } + } + public Set getArrayInputs(List args) + { + validateArguments(args); return ImmutableSet.of(args.get(0)); } @@ -1485,53 +1533,114 @@ public Set getScalarInputs(List args) } } - class ArrayLengthFunction extends ArrayFunction + abstract class ArrayScalarFunction extends ArrayFunction { @Override - public String name() + void validateArguments(List args) { - return "array_length"; + if (args.size() != 2) { + throw new IAE("Function[%s] needs 2 argument", name()); + } + } + + @Override + public Set getScalarInputs(List args) + { + validateArguments(args); + return ImmutableSet.of(args.get(1)); } @Override public ExprEval apply(List args, Expr.ObjectBinding bindings) { - if (args.size() != 1) { - throw new IAE("Function[%s] needs 1 argument", name()); + validateArguments(args); + final ExprEval arrayExpr = args.get(0).eval(bindings); + final ExprEval scalarExpr = args.get(1).eval(bindings); + if (arrayExpr.asArray() == null) { + return ExprEval.of(null); } + return doApply(arrayExpr, scalarExpr); + } - final ExprEval expr = args.get(0).eval(bindings); - final Object[] array = expr.asArray(); - if (array == null) { + abstract ExprEval doApply(ExprEval arrayExpr, ExprEval scalarExpr); + + + @Override + public Set getArrayInputs(List args) + { + return ImmutableSet.copyOf(args); + } + } + + abstract class ArraysFunction extends ArrayFunction + { + @Override + void validateArguments(List args) + { + if (args.size() != 2) { + throw new IAE("Function[%s] needs 2 argument", name()); + } + } + + @Override + public Set getArrayInputs(List args) + { + validateArguments(args); + return ImmutableSet.copyOf(args); + } + + @Override + public ExprEval apply(List args, Expr.ObjectBinding bindings) + { + validateArguments(args); + final ExprEval arrayExpr1 = args.get(0).eval(bindings); + final ExprEval arrayExpr2 = args.get(1).eval(bindings); + + if (arrayExpr1.asArray() == null || arrayExpr2.asArray() == null) { return ExprEval.of(null); } - return ExprEval.ofLong(array.length); + return doApply(arrayExpr1, arrayExpr2); } + + abstract ExprEval doApply(ExprEval lhsExpr, ExprEval rhsExpr); } - class ArrayOffsetFunction extends ArrayFunction + class ArrayLengthFunction extends ArrayFunction { @Override public String name() { - return "array_offset"; + return "array_length"; } @Override public ExprEval apply(List args, Expr.ObjectBinding bindings) { - if (args.size() != 2) { - throw new IAE("Function[%s] needs 2 argument", name()); - } - + validateArguments(args); final ExprEval expr = args.get(0).eval(bindings); final Object[] array = expr.asArray(); if (array == null) { return ExprEval.of(null); } - final int position = args.get(1).eval(bindings).asInt(); + return ExprEval.ofLong(array.length); + } + } + + class ArrayOffsetFunction extends ArrayScalarFunction + { + @Override + public String name() + { + return "array_offset"; + } + + @Override + ExprEval doApply(ExprEval arrayExpr, ExprEval scalarExpr) + { + final Object[] array = arrayExpr.asArray(); + final int position = scalarExpr.asInt(); if (array.length > position) { return ExprEval.bestEffortOf(array[position]); @@ -1540,7 +1649,7 @@ public ExprEval apply(List args, Expr.ObjectBinding bindings) } } - class ArrayOrdinalFunction extends ArrayFunction + class ArrayOrdinalFunction extends ArrayScalarFunction { @Override public String name() @@ -1549,19 +1658,10 @@ public String name() } @Override - public ExprEval apply(List args, Expr.ObjectBinding bindings) + ExprEval doApply(ExprEval arrayExpr, ExprEval scalarExpr) { - if (args.size() != 2) { - throw new IAE("Function[%s] needs 2 argument", name()); - } - - final ExprEval expr = args.get(0).eval(bindings); - final Object[] array = expr.asArray(); - if (array == null) { - return ExprEval.of(null); - } - - final int position = args.get(1).eval(bindings).asInt() - 1; + final Object[] array = arrayExpr.asArray(); + final int position = scalarExpr.asInt() - 1; if (array.length > position) { return ExprEval.bestEffortOf(array[position]); @@ -1570,7 +1670,7 @@ public ExprEval apply(List args, Expr.ObjectBinding bindings) } } - class ArrayContainsFunction extends ArrayFunction + class ArrayContainsFunction extends ArraysFunction { @Override public String name() @@ -1579,27 +1679,15 @@ public String name() } @Override - public ExprEval apply(List args, Expr.ObjectBinding bindings) + ExprEval doApply(ExprEval lhsExpr, ExprEval rhsExpr) { - if (args.size() != 2) { - throw new IAE("Function[%s] needs 2 argument", name()); - } - - final ExprEval expr = args.get(0).eval(bindings); - final ExprEval toCheck = args.get(1).eval(bindings); - - final Object[] array1 = expr.asArray(); - final Object[] array2 = toCheck.asArray(); - - if (array1 == null || array2 == null) { - return ExprEval.of(null); - } - + final Object[] array1 = lhsExpr.asArray(); + final Object[] array2 = rhsExpr.asArray(); return ExprEval.bestEffortOf(Arrays.asList(array1).containsAll(Arrays.asList(array2))); } } - class ArrayOverlapFunction extends ArrayFunction + class ArrayOverlapFunction extends ArraysFunction { @Override public String name() @@ -1608,33 +1696,19 @@ public String name() } @Override - public ExprEval apply(List args, Expr.ObjectBinding bindings) + ExprEval doApply(ExprEval lhsExpr, ExprEval rhsExpr) { - if (args.size() != 2) { - throw new IAE("Function[%s] needs 2 argument", name()); - } - - final ExprEval expr = args.get(0).eval(bindings); - final ExprEval toCheck = args.get(1).eval(bindings); - - final Object[] array1 = expr.asArray(); - final Object[] array2 = toCheck.asArray(); - - if (array1 == null || array2 == null) { - return ExprEval.of(null); - } - - List olst = Arrays.asList(array1); - List o2lst = Arrays.asList(array2); + final Object[] array1 = lhsExpr.asArray(); + final List array2 = Arrays.asList(rhsExpr.asArray()); boolean any = false; - for (Object check : olst) { - any |= o2lst.contains(check); + for (Object check : array1) { + any |= array2.contains(check); } return ExprEval.bestEffortOf(any); } } - class ArrayOffsetOfFunction extends ArrayFunction + class ArrayOffsetOfFunction extends ArrayScalarFunction { @Override public String name() @@ -1643,38 +1717,29 @@ public String name() } @Override - public ExprEval apply(List args, Expr.ObjectBinding bindings) + ExprEval doApply(ExprEval arrayExpr, ExprEval scalarExpr) { - if (args.size() != 2) { - throw new IAE("Function[%s] needs 2 argument", name()); - } + final Object[] array = arrayExpr.asArray(); - final ExprEval expr = args.get(0).eval(bindings); - final ExprEval toCheck = args.get(1).eval(bindings); - - final Object[] array = expr.asArray(); - if (array == null) { - return ExprEval.of(null); - } - switch (toCheck.type()) { + switch (scalarExpr.type()) { case STRING: case LONG: case DOUBLE: int index = -1; for (int i = 0; i < array.length; i++) { - if (Objects.equals(array[i], toCheck.value())) { + if (Objects.equals(array[i], scalarExpr.value())) { index = i; break; } } return index < 0 ? ExprEval.of(null) : ExprEval.ofLong(index); default: - throw new IAE("Function[%s] argument must be a a scalar type", name()); + throw new IAE("Function[%s] 2nd argument must be a a scalar type", name()); } } } - class ArrayOrdinalOfFunction extends ArrayFunction + class ArrayOrdinalOfFunction extends ArrayScalarFunction { @Override public String name() @@ -1683,38 +1748,28 @@ public String name() } @Override - public ExprEval apply(List args, Expr.ObjectBinding bindings) + ExprEval doApply(ExprEval arrayExpr, ExprEval scalarExpr) { - if (args.size() != 2) { - throw new IAE("Function[%s] needs 2 argument", name()); - } - - final ExprEval expr = args.get(0).eval(bindings); - final ExprEval toCheck = args.get(1).eval(bindings); - final Object[] array = expr.asArray(); - if (array == null) { - return ExprEval.of(null); - } - - switch (toCheck.type()) { + final Object[] array = arrayExpr.asArray(); + switch (scalarExpr.type()) { case STRING: case LONG: case DOUBLE: int index = -1; for (int i = 0; i < array.length; i++) { - if (Objects.equals(array[i], toCheck.value())) { + if (Objects.equals(array[i], scalarExpr.value())) { index = i; break; } } return index < 0 ? ExprEval.of(null) : ExprEval.ofLong(index + 1); default: - throw new IAE("Function[%s] argument must be a a scalar type", name()); + throw new IAE("Function[%s] 2nd argument must be a a scalar type", name()); } } } - class ArrayAppendFunction extends ArrayFunction + class ArrayAppendFunction extends ArrayScalarFunction { @Override public String name() @@ -1723,43 +1778,31 @@ public String name() } @Override - public ExprEval apply(List args, Expr.ObjectBinding bindings) + ExprEval doApply(ExprEval arrayExpr, ExprEval scalarExpr) { - if (args.size() != 2) { - throw new IAE("Function[%s] needs 2 arguments", name()); - } - - final ExprEval lhs = args.get(0).eval(bindings); - final ExprEval rhs = args.get(1).eval(bindings); - - final Object[] array = lhs.asArray(); - - if (array == null) { - return ExprEval.of(null); - } - - switch (lhs.type()) { + switch (arrayExpr.type()) { case STRING: case STRING_ARRAY: - return ExprEval.ofStringArray(this.append(lhs.asStringArray(), rhs.asString()).toArray(String[]::new)); + return ExprEval.ofStringArray(this.append(arrayExpr.asStringArray(), scalarExpr.asString()).toArray(String[]::new)); case LONG: case LONG_ARRAY: return ExprEval.ofLongArray( this.append( - lhs.asLongArray(), - rhs.isNumericNull() ? null : rhs.asLong()).toArray(Long[]::new + arrayExpr.asLongArray(), + scalarExpr.isNumericNull() ? null : scalarExpr.asLong()).toArray(Long[]::new ) ); case DOUBLE: case DOUBLE_ARRAY: return ExprEval.ofDoubleArray( this.append( - lhs.asDoubleArray(), - rhs.isNumericNull() ? null : rhs.asDouble()).toArray(Double[]::new + arrayExpr.asDoubleArray(), + scalarExpr.isNumericNull() ? null : scalarExpr.asDouble()).toArray(Double[]::new ) ); } - throw new RuntimeException("impossible"); + + throw new RE("Unable to append to unknown type %s", arrayExpr.type()); } private Stream append(T[] array, T val) @@ -1768,15 +1811,9 @@ private Stream append(T[] array, T val) l.add(val); return l.stream(); } - - @Override - public Set getArrayInputs(List args) - { - return ImmutableSet.copyOf(args); - } } - class ArrayConcatFunction extends ArrayFunction + class ArrayConcatFunction extends ArraysFunction { @Override public String name() @@ -1785,37 +1822,36 @@ public String name() } @Override - public ExprEval apply(List args, Expr.ObjectBinding bindings) + ExprEval doApply(ExprEval lhsExpr, ExprEval rhsExpr) { - if (args.size() != 2) { - throw new IAE("Function[%s] needs 2 arguments", name()); - } - - final ExprEval lhs = args.get(0).eval(bindings); - final ExprEval rhs = args.get(1).eval(bindings); - - final Object[] array1 = lhs.asArray(); - final Object[] array2 = rhs.asArray(); + final Object[] array1 = lhsExpr.asArray(); + final Object[] array2 = rhsExpr.asArray(); if (array1 == null) { return ExprEval.of(null); } if (array2 == null) { - return lhs; + return lhsExpr; } - switch (lhs.type()) { + switch (lhsExpr.type()) { case STRING: case STRING_ARRAY: - return ExprEval.ofStringArray(this.cat(lhs.asStringArray(), rhs.asStringArray()).toArray(String[]::new)); + return ExprEval.ofStringArray( + cat(lhsExpr.asStringArray(), rhsExpr.asStringArray()).toArray(String[]::new) + ); case LONG: case LONG_ARRAY: - return ExprEval.ofLongArray(this.cat(lhs.asLongArray(), rhs.asLongArray()).toArray(Long[]::new)); + return ExprEval.ofLongArray( + cat(lhsExpr.asLongArray(), rhsExpr.asLongArray()).toArray(Long[]::new) + ); case DOUBLE: case DOUBLE_ARRAY: - return ExprEval.ofDoubleArray(this.cat(lhs.asDoubleArray(), rhs.asDoubleArray()).toArray(Double[]::new)); + return ExprEval.ofDoubleArray( + cat(lhsExpr.asDoubleArray(), rhsExpr.asDoubleArray()).toArray(Double[]::new) + ); } - throw new RE("Unable to concatenate unknown type %s", lhs.type()); + throw new RE("Unable to concatenate to unknown type %s", lhsExpr.type()); } private Stream cat(T[] array1, T[] array2) @@ -1832,7 +1868,7 @@ public Set getArrayInputs(List args) } } - class ArrayToStringFunction extends ArrayFunction + class ArrayToStringFunction extends ArrayScalarFunction { @Override public String name() @@ -1841,48 +1877,12 @@ public String name() } @Override - public ExprEval apply(List args, Expr.ObjectBinding bindings) + ExprEval doApply(ExprEval arrayExpr, ExprEval scalarExpr) { - if (args.size() != 2) { - throw new IAE("Function[%s] needs 2 argument", name()); - } - - final ExprEval expr = args.get(0).eval(bindings); - final Object[] array = expr.asArray(); - if (array == null) { - return ExprEval.of(null); - } - - final String join = args.get(1).eval(bindings).asString(); + final String join = scalarExpr.asString(); return ExprEval.of( - Arrays.stream(array).map(String::valueOf).collect(Collectors.joining(join != null ? join : "")) + Arrays.stream(arrayExpr.asArray()).map(String::valueOf).collect(Collectors.joining(join != null ? join : "")) ); } } - - class StringToArrayFunction extends ArrayFunction - { - @Override - public String name() - { - return "string_to_array"; - } - - @Override - public ExprEval apply(List args, Expr.ObjectBinding bindings) - { - if (args.size() != 2) { - throw new IAE("Function[%s] needs 2 argument", name()); - } - - final ExprEval expr = args.get(0).eval(bindings); - final String arrayString = expr.asString(); - if (arrayString == null) { - return ExprEval.of(null); - } - - final String split = args.get(1).eval(bindings).asString(); - return ExprEval.ofStringArray(arrayString.split(split != null ? split : "")); - } - } } diff --git a/core/src/main/java/org/apache/druid/math/expr/Parser.java b/core/src/main/java/org/apache/druid/math/expr/Parser.java index cc9d9e2a9aa1..2e5df3ec5faf 100644 --- a/core/src/main/java/org/apache/druid/math/expr/Parser.java +++ b/core/src/main/java/org/apache/druid/math/expr/Parser.java @@ -41,6 +41,7 @@ import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; +import java.util.HashSet; import java.util.LinkedHashSet; import java.util.List; import java.util.Map; @@ -312,6 +313,11 @@ public void visit(Expr expr) { if (expr instanceof IdentifierExpr) { found.add(expr.toString()); + } else if (expr instanceof LambdaExpr) { + LambdaExpr lambda = (LambdaExpr) expr; + for (String identifier : lambda.getIdentifiers()) { + found.remove(identifier); + } } } } @@ -349,6 +355,64 @@ public void visit(Expr expr) return arrayFnBindings; } + public static BindingDetails examineBindings(Expr expr) + { + final Set freeVariables = new HashSet<>(); + final Set scalarVariables = new HashSet<>(); + final Set arrayVariables = new HashSet<>(); + expr.visit(childExpr -> { + if (childExpr instanceof IdentifierExpr) { + freeVariables.add(childExpr.toString()); + } else if (childExpr instanceof LambdaExpr) { + LambdaExpr lambda = (LambdaExpr) childExpr; + for (String identifier : lambda.getIdentifiers()) { + freeVariables.remove(identifier); + scalarVariables.remove(identifier); + arrayVariables.remove(identifier); + } + } else { + final Set scalarArgs; + final Set arrayArgs; + if (childExpr instanceof FunctionExpr) { + FunctionExpr fnExpr = (FunctionExpr) childExpr; + scalarArgs = fnExpr.function.getScalarInputs(fnExpr.args); + + if (fnExpr.function instanceof Function.ArraysFunction) { + Function.ArrayFunction fn = (Function.ArrayFunction) fnExpr.function; + arrayArgs = fn.getArrayInputs(fnExpr.args); + } else { + arrayArgs = Collections.emptySet(); + } + } else if (childExpr instanceof ApplyFunctionExpr) { + ApplyFunctionExpr applyExpr = (ApplyFunctionExpr) childExpr; + scalarArgs = Collections.emptySet(); + arrayArgs = applyExpr.function.getArrayInputs(applyExpr.argsExpr); + } else { + scalarArgs = Collections.emptySet(); + arrayArgs = Collections.emptySet(); + } + for (Expr arg : scalarArgs) { + String s = getIdentifierIfIdentifier(arg); + if (s != null) { + scalarVariables.add(s); + } + } + for (Expr arg : arrayArgs) { + String s = getIdentifierOrCastIdentifier(arg); + if (s != null) { + arrayVariables.add(s); + } + } + } + }); + for (String identifier : scalarVariables) { + if (arrayVariables.contains(identifier)) { + throw new RE("Invalid expression: %s; identifier [%s] used as both scalar and array", expr, identifier); + } + } + return new BindingDetails(freeVariables, scalarVariables, arrayVariables); + } + @Nullable public static String getIdentifierOrCastIdentifier(Expr expr) { @@ -383,4 +447,38 @@ public static Expr.ObjectBinding withSuppliers(final Map freeVariables; + private final Set scalarVariables; + private final Set arrayVariables; + + BindingDetails(Set freeVariables, Set scalarVariables, Set arrayVariables) + { + this.freeVariables = freeVariables; + this.scalarVariables = scalarVariables; + this.arrayVariables = arrayVariables; + } + + public List getRequiredColumns() + { + return new ArrayList<>(freeVariables); + } + + public Set getFreeVariables() + { + return freeVariables; + } + + public Set getScalarVariables() + { + return scalarVariables; + } + + public Set getArrayVariables() + { + return arrayVariables; + } + } } diff --git a/core/src/test/java/org/apache/druid/math/expr/FunctionTest.java b/core/src/test/java/org/apache/druid/math/expr/FunctionTest.java index bb538c61edfb..211b598be4c5 100644 --- a/core/src/test/java/org/apache/druid/math/expr/FunctionTest.java +++ b/core/src/test/java/org/apache/druid/math/expr/FunctionTest.java @@ -162,7 +162,7 @@ public void testRpad() @Test public void testArrayLength() { - assertExpr("array_length([1, 2, 3])", 3L); + assertExpr("array_length([1,2,3])", 3L); assertExpr("array_length(a)", 4); } diff --git a/core/src/test/java/org/apache/druid/math/expr/ParserTest.java b/core/src/test/java/org/apache/druid/math/expr/ParserTest.java index 20b35d7f181d..ddf621426bc5 100644 --- a/core/src/test/java/org/apache/druid/math/expr/ParserTest.java +++ b/core/src/test/java/org/apache/druid/math/expr/ParserTest.java @@ -24,7 +24,6 @@ import org.junit.Assert; import org.junit.Test; -import javax.annotation.Nullable; import java.util.List; /** @@ -195,16 +194,8 @@ public void testLiteralArrays() public void testApplyFunctions() { final Expr parsed = Parser.parse("map((x) -> x + 1, [1, 2, 3])", ExprMacroTable.nil()); - Expr.ObjectBinding binding = new Expr.ObjectBinding() - { - @Nullable - @Override - public Object get(String name) - { - return null; - } - }; - ExprEval eval = parsed.eval(binding); + Assert.assertEquals("(map ([x] -> (+ x 1)), [1, 2, 3])", parsed.toString()); + ExprEval eval = parsed.eval(Parser.withMap(ImmutableMap.of())); Assert.assertArrayEquals(new Long[]{2L, 3L, 4L}, (Long[]) eval.value()); } diff --git a/docs/content/misc/math-expr.md b/docs/content/misc/math-expr.md index c207f01ed231..9c16f3b23749 100644 --- a/docs/content/misc/math-expr.md +++ b/docs/content/misc/math-expr.md @@ -25,7 +25,8 @@ title: "Apache Druid (incubating) Expressions" # Apache Druid (incubating) Expressions
-This feature is still experimental. It has not been optimized for performance yet, and its implementation is known to have significant inefficiencies. +This feature is still experimental. It has not been optimized for performance yet, and its implementation is known to + have significant inefficiencies.
This expression language supports the following operators (listed in decreasing order of precedence). @@ -39,14 +40,23 @@ This expression language supports the following operators (listed in decreasing |<, <=, >, >=, ==, !=|Binary Comparison| |&&, ||Binary Logical AND, OR| -Long, double, and string data types are supported. If a number contains a dot, it is interpreted as a double, otherwise it is interpreted as a long. That means, always add a '.' to your number if you want it interpreted as a double value. String literals should be quoted by single quotation marks. +Long, double, and string data types are supported. If a number contains a dot, it is interpreted as a double, otherwise +it is interpreted as a long. That means, always add a '.' to your number if you want it interpreted as a double value. +String literals should be quoted by single quotation marks. -Multi-value types are not fully supported yet. Expressions may behave inconsistently on multi-value types, and you -should not rely on the behavior in this case to stay the same in future releases. +Additionally, the expression language supports long, double, and string arrays. Array literals are created by wrapping +square brackets around a list of scalar literals values delimited by a comma or space character. All values in an array +literal must be the same type. -Expressions can contain variables. Variable names may contain letters, digits, '\_' and '$'. Variable names must not begin with a digit. To escape other special characters, you can quote it with double quotation marks. +Expressions can contain variables. Variable names may contain letters, digits, '\_' and '$'. Variable names must not +begin with a digit. To escape other special characters, you can quote it with double quotation marks. -For logical operators, a number is true if and only if it is positive (0 or negative value means false). For string type, it's the evaluation result of 'Boolean.valueOf(string)'. +For logical operators, a number is true if and only if it is positive (0 or negative value means false). For string +type, it's the evaluation result of 'Boolean.valueOf(string)'. + +Multi-value string dimensions are supported and may be treated as either scalar or array typed values. When treated as +a scalar type, an expression will automatically be transformed to apply the scalar operation across all values of the +multi-valued type, to mimic Druid's native behavior. The following built-in functions are available. @@ -146,3 +156,33 @@ See javadoc of java.lang.Math for detailed explanation for each function. |todegrees|todegrees(x) converts an angle measured in radians to an approximately equivalent angle measured in degrees| |toradians|toradians(x) converts an angle measured in degrees to an approximately equivalent angle measured in radians| |ulp|ulp(x) would return the size of an ulp of the argument x| + + +## Array Functions + +| function | description | +| --- | --- | +| `array_length(arr)` | returns length of array expression | +| `array_offset(arr,long)` | returns the array element at the 0 based index supplied, or null for an out of range index| +| `array_ordinal(arr,long)` | returns the array element at the 1 based index supplied, or null for an out of range index | +| `array_contains(arr,expr)` | returns true if the array contains the element specified by expr, or contains all elements specified by expr if expr is an array | +| `array_overlap(arr1,arr2)` | returns true if arr1 and arr2 have any elements in common | +| `array_offset_of(expr)` | returns the 0 based index of the first occurrence of expr in the array, or `null` if no matching elements exist in the array. | +| `array_ordinal_of(expr)` | returns the 1 based index of the first occurrence of expr in the array, or `null` if no matching elements exist in the array. | +| `array_append(arr1,expr)` | appends expr to arr +| `array_concat(arr1,arr2)` | concatenates 2 arrays | +| `array_to_string(arr,str)` | joins all elements of arr by the delimiter specified by str | +| `string_to_array(str1,str2)` | splits str1 into an array on the delimiter specified by str2 | + + +## Apply Functions + +| function | description | +| --- | --- | +| `map(lambda,arr)` | applies a transform specified by a single argument lambda expression to all elements of arr, returning a new array | +| `cartesian_map(lambda,arr1,arr2,...)` | applies a transform specified by a multi argument lambda expression to all elements of the cartesian product of all input arrays, returning a new array; the number of lambda arguments and array inputs must be the same | +| `filter(lambda,arr)` | filters arr by a single argument lambda, returning a new array with all matching elements, or null if no elements match | +| `foldr(lambda,arr)` | right folds a 2 argument lambda across arr. The first argument of the lambda is the array element and the second the accumulator, returning a single accumulated value. | +| `cartesian_foldr(lambda,arr1,arr2,...)` | right folds a multi argument lambda across the cartesian product of all input arrays. The first arguments of the lambda is the array element and the last is the accumulator, returning a single accumulated value. | +| `any(lambda,arr)` | returns true if any element in the array matches the lambda expression | +| `all(lambda,arr)` | returns true if all elements in the array matches the lambda expression | diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java index 4b99417138b3..e110e9981d13 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java @@ -135,10 +135,8 @@ public static ColumnValueSelector makeExprEvalSelector( Expr expression ) { - final List columns = Parser.findRequiredBindings(expression); - final Set expectedArrays = Parser.findArrayFnBindings(expression); - final Set actualArrays = new HashSet<>(); - final Set unknownIfArrays = new HashSet<>(); + final Parser.BindingDetails exprDetails = Parser.examineBindings(expression); + final List columns = exprDetails.getRequiredColumns(); if (columns.size() == 1) { final String column = Iterables.getOnlyElement(columns); @@ -156,7 +154,7 @@ public static ColumnValueSelector makeExprEvalSelector( && capabilities.isDictionaryEncoded() && capabilities.isComplete() && !capabilities.hasMultipleValues() - && !expectedArrays.contains(column)) { + && !exprDetails.getArrayVariables().contains(column)) { // Optimization for expressions that hit one string column and nothing else. return new SingleStringInputCachingExpressionColumnValueSelector( columnSelectorFactory.makeDimensionSelector(new DefaultDimensionSpec(column, column, ValueType.STRING)), @@ -165,12 +163,18 @@ public static ColumnValueSelector makeExprEvalSelector( } } + final Set actualArrays = new HashSet<>(); + final Set unknownIfArrays = new HashSet<>(); for (String column : columns) { final ColumnCapabilities capabilities = columnSelectorFactory.getColumnCapabilities(column); if (capabilities != null) { if (capabilities.hasMultipleValues()) { actualArrays.add(column); - } else if (!capabilities.isComplete() && capabilities.getType().equals(ValueType.STRING) && (actualArrays.contains(column) || !expectedArrays.contains(column))) { + } else if ( + !capabilities.isComplete() && + capabilities.getType().equals(ValueType.STRING) && + !exprDetails.getArrayVariables().contains(column) + ) { unknownIfArrays.add(column); } } else { @@ -178,7 +182,10 @@ public static ColumnValueSelector makeExprEvalSelector( } } - final List needsApplied = columns.stream().filter(c -> actualArrays.contains(c) && !expectedArrays.contains(c)).collect(Collectors.toList()); + final List needsApplied = + columns.stream() + .filter(c -> actualArrays.contains(c) && !exprDetails.getArrayVariables().contains(c)) + .collect(Collectors.toList()); final Expr finalExpr; if (needsApplied.size() > 0) { finalExpr = Parser.applyUnappliedIdentifiers(expression, needsApplied); @@ -207,10 +214,9 @@ public static DimensionSelector makeDimensionSelector( final ExtractionFn extractionFn ) { - final List columns = Parser.findRequiredBindings(expression); - final Set expectedArrays = Parser.findArrayFnBindings(expression); - final Set actualArrays = Parser.findArrayFnBindings(expression); - final Set unknownIfArrays = new HashSet<>(); + final Parser.BindingDetails exprDetails = Parser.examineBindings(expression); + final List columns = exprDetails.getRequiredColumns(); + if (columns.size() == 1) { final String column = Iterables.getOnlyElement(columns); @@ -221,7 +227,7 @@ public static DimensionSelector makeDimensionSelector( && capabilities.isDictionaryEncoded() && capabilities.isComplete() && !capabilities.hasMultipleValues() - && !expectedArrays.contains(column) + && !exprDetails.getArrayVariables().contains(column) ) { // Optimization for dimension selectors that wrap a single underlying string column. return new SingleStringInputDimensionSelector( @@ -231,12 +237,18 @@ public static DimensionSelector makeDimensionSelector( } } + final Set actualArrays = new HashSet<>(); + final Set unknownIfArrays = new HashSet<>(); for (String column : columns) { final ColumnCapabilities capabilities = columnSelectorFactory.getColumnCapabilities(column); if (capabilities != null) { if (capabilities.hasMultipleValues()) { actualArrays.add(column); - } else if (!capabilities.isComplete() && capabilities.getType().equals(ValueType.STRING) && (actualArrays.contains(column) || !expectedArrays.contains(column))) { + } else if ( + !capabilities.isComplete() && + capabilities.getType().equals(ValueType.STRING) && + !exprDetails.getArrayVariables().contains(column) + ) { unknownIfArrays.add(column); } } else { @@ -245,7 +257,9 @@ public static DimensionSelector makeDimensionSelector( } final ColumnValueSelector baseSelector = makeExprEvalSelector(columnSelectorFactory, expression); - final boolean multiVal = actualArrays.size() > 0 || expectedArrays.size() > 0 || unknownIfArrays.size() > 0; + final boolean multiVal = actualArrays.size() > 0 || + exprDetails.getArrayVariables().size() > 0 || + unknownIfArrays.size() > 0; if (baseSelector instanceof ConstantExprEvalSelector) { // Optimization for dimension selectors on constants. diff --git a/processing/src/test/java/org/apache/druid/query/MultiValuedDimensionTest.java b/processing/src/test/java/org/apache/druid/query/MultiValuedDimensionTest.java index c7ce5e3e79c4..b1dfece307e7 100644 --- a/processing/src/test/java/org/apache/druid/query/MultiValuedDimensionTest.java +++ b/processing/src/test/java/org/apache/druid/query/MultiValuedDimensionTest.java @@ -539,6 +539,74 @@ public void testGroupByExpressionAuto() } + @Test + public void testGroupByExpressionMultiConflicting() + { + expectedException.expect(RuntimeException.class); + expectedException.expectMessage( + "Invalid expression: (concat [(map ([x] -> (concat [x, othertags])), [tags]), tags]); identifier [tags] used as both scalar and array" + ); + GroupByQuery query = GroupByQuery + .builder() + .setDataSource("xx") + .setQuerySegmentSpec(new LegacySegmentSpec("1970/3000")) + .setGranularity(Granularities.ALL) + .setDimensions(new DefaultDimensionSpec("texpr", "texpr")) + .setVirtualColumns( + new ExpressionVirtualColumn( + "texpr", + "concat(map((x) -> concat(x, othertags), tags), tags)", + ValueType.STRING, + TestExprMacroTable.INSTANCE + ) + ) + .setLimit(5) + .setAggregatorSpecs(new CountAggregatorFactory("count")) + .build(); + + helper.runQueryOnSegmentsObjs( + ImmutableList.of( + new QueryableIndexSegment(queryableIndex, SegmentId.dummy("sid1")), + new IncrementalIndexSegment(incrementalIndex, SegmentId.dummy("sid2")) + ), + query + ).toList(); + } + + @Test + public void testGroupByExpressionMultiConflictingAlso() + { + expectedException.expect(RuntimeException.class); + expectedException.expectMessage( + "Invalid expression: (array_concat [tags, (array_append [othertags, tags])]); identifier [tags] used as both scalar and array" + ); + GroupByQuery query = GroupByQuery + .builder() + .setDataSource("xx") + .setQuerySegmentSpec(new LegacySegmentSpec("1970/3000")) + .setGranularity(Granularities.ALL) + .setDimensions(new DefaultDimensionSpec("texpr", "texpr")) + .setVirtualColumns( + new ExpressionVirtualColumn( + "texpr", + "array_concat(tags, (array_append(othertags, tags)))", + ValueType.STRING, + TestExprMacroTable.INSTANCE + ) + ) + .setLimit(5) + .setAggregatorSpecs(new CountAggregatorFactory("count")) + .build(); + + helper.runQueryOnSegmentsObjs( + ImmutableList.of( + new QueryableIndexSegment(queryableIndex, SegmentId.dummy("sid1")), + new IncrementalIndexSegment(incrementalIndex, SegmentId.dummy("sid2")) + ), + query + ).toList(); + } + @Test public void testTopNWithDimFilterAndWithFilteredDimSpec() { From ec5c420fd3c8db809e245f58402783f195ffa0c6 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Wed, 22 May 2019 02:01:01 -0700 Subject: [PATCH 14/48] some comments --- .../apache/druid/math/expr/ApplyFunction.java | 10 +- .../org/apache/druid/math/expr/Function.java | 110 ++++++++++-------- .../org/apache/druid/math/expr/Parser.java | 22 +++- 3 files changed, 87 insertions(+), 55 deletions(-) diff --git a/core/src/main/java/org/apache/druid/math/expr/ApplyFunction.java b/core/src/main/java/org/apache/druid/math/expr/ApplyFunction.java index 8cc345d62a28..fbe27c129fae 100644 --- a/core/src/main/java/org/apache/druid/math/expr/ApplyFunction.java +++ b/core/src/main/java/org/apache/druid/math/expr/ApplyFunction.java @@ -113,6 +113,9 @@ public ExprEval apply(LambdaExpr lambdaExpr, List argsExpr, Expr.ObjectBin if (array == null) { return ExprEval.of(null); } + if (array.length == 0) { + return arrayEval; + } MapLambdaBinding lambdaBinding = new MapLambdaBinding(array, lambdaExpr, bindings); return applyMap(lambdaExpr, array.length, lambdaBinding); @@ -179,7 +182,7 @@ public Set getArrayInputs(List args) abstract class BaseFoldrFunction implements ApplyFunction { - public ExprEval applyFoldr(LambdaExpr lambdaExpr, Object accumulator, int length, IndexableFoldLambdaBinding bindings) + ExprEval applyFoldr(LambdaExpr lambdaExpr, Object accumulator, int length, IndexableFoldLambdaBinding bindings) { for (int i = 0; i < length; i++) { ExprEval evaluated = lambdaExpr.eval(bindings.accumulateWithIndex(i, accumulator)); @@ -222,7 +225,7 @@ public ExprEval apply(LambdaExpr lambdaExpr, List argsExpr, Expr.ObjectBin @Override public Set getArrayInputs(List args) { - // accumulator argument cannot be inferred, so ignore it until think of something better to do + // accumulator argument cannot be inferred, so ignore it until we think of something better to do return ImmutableSet.of(args.get(0)); } } @@ -279,7 +282,8 @@ public ExprEval apply(LambdaExpr lambdaExpr, List argsExpr, Expr.ObjectBin @Override public Set getArrayInputs(List args) { - return ImmutableSet.copyOf(args); + // accumulator argument cannot be inferred, so ignore it until we think of something better to do + return ImmutableSet.copyOf(args.subList(0, args.size() - 1)); } } diff --git a/core/src/main/java/org/apache/druid/math/expr/Function.java b/core/src/main/java/org/apache/druid/math/expr/Function.java index 56a4be55b6e3..421b1fe4a952 100644 --- a/core/src/main/java/org/apache/druid/math/expr/Function.java +++ b/core/src/main/java/org/apache/druid/math/expr/Function.java @@ -50,6 +50,9 @@ interface Function ExprEval apply(List args, Expr.ObjectBinding bindings); + /** + * Given a list of arguments to this {@link Function}, get the set of arguments that must evaluate to a scalar value + */ default Set getScalarInputs(List args) { return ImmutableSet.copyOf(args); @@ -1497,12 +1500,6 @@ public ExprEval apply(List args, Expr.ObjectBinding bindings) return ExprEval.ofStringArray(arrayString.split(split != null ? split : "")); } - public Set getArrayInputs(List args) - { - validateArguments(args); - return Collections.emptySet(); - } - @Override public Set getScalarInputs(List args) { @@ -1511,6 +1508,9 @@ public Set getScalarInputs(List args) } } + /** + * Function that operates on array typed operands + */ abstract class ArrayFunction implements Function { void validateArguments(List args) @@ -1520,6 +1520,10 @@ void validateArguments(List args) } } + /** + * Given a list of arguments to this {@link ArrayFunction}, get the set of arguments that must evaluate to an array + * value + */ public Set getArrayInputs(List args) { validateArguments(args); @@ -1533,6 +1537,9 @@ public Set getScalarInputs(List args) } } + /** + * {@link ArraysFunction} that takes 1 array operand and 1 scalar operand + */ abstract class ArrayScalarFunction extends ArrayFunction { @Override @@ -1572,6 +1579,9 @@ public Set getArrayInputs(List args) } } + /** + * {@link ArraysFunction} that takes 2 array operands + */ abstract class ArraysFunction extends ArrayFunction { @Override @@ -1628,40 +1638,37 @@ public ExprEval apply(List args, Expr.ObjectBinding bindings) } } - class ArrayOffsetFunction extends ArrayScalarFunction + class ArrayToStringFunction extends ArrayScalarFunction { @Override public String name() { - return "array_offset"; + return "array_to_string"; } @Override ExprEval doApply(ExprEval arrayExpr, ExprEval scalarExpr) { - final Object[] array = arrayExpr.asArray(); - final int position = scalarExpr.asInt(); - - if (array.length > position) { - return ExprEval.bestEffortOf(array[position]); - } - return ExprEval.of(null); + final String join = scalarExpr.asString(); + return ExprEval.of( + Arrays.stream(arrayExpr.asArray()).map(String::valueOf).collect(Collectors.joining(join != null ? join : "")) + ); } } - class ArrayOrdinalFunction extends ArrayScalarFunction + class ArrayOffsetFunction extends ArrayScalarFunction { @Override public String name() { - return "array_ordinal"; + return "array_offset"; } @Override ExprEval doApply(ExprEval arrayExpr, ExprEval scalarExpr) { final Object[] array = arrayExpr.asArray(); - final int position = scalarExpr.asInt() - 1; + final int position = scalarExpr.asInt(); if (array.length > position) { return ExprEval.bestEffortOf(array[position]); @@ -1670,41 +1677,24 @@ ExprEval doApply(ExprEval arrayExpr, ExprEval scalarExpr) } } - class ArrayContainsFunction extends ArraysFunction + class ArrayOrdinalFunction extends ArrayScalarFunction { @Override public String name() { - return "array_contains"; - } - - @Override - ExprEval doApply(ExprEval lhsExpr, ExprEval rhsExpr) - { - final Object[] array1 = lhsExpr.asArray(); - final Object[] array2 = rhsExpr.asArray(); - return ExprEval.bestEffortOf(Arrays.asList(array1).containsAll(Arrays.asList(array2))); + return "array_ordinal"; } - } - class ArrayOverlapFunction extends ArraysFunction - { @Override - public String name() + ExprEval doApply(ExprEval arrayExpr, ExprEval scalarExpr) { - return "array_overlap"; - } + final Object[] array = arrayExpr.asArray(); + final int position = scalarExpr.asInt() - 1; - @Override - ExprEval doApply(ExprEval lhsExpr, ExprEval rhsExpr) - { - final Object[] array1 = lhsExpr.asArray(); - final List array2 = Arrays.asList(rhsExpr.asArray()); - boolean any = false; - for (Object check : array1) { - any |= array2.contains(check); + if (array.length > position) { + return ExprEval.bestEffortOf(array[position]); } - return ExprEval.bestEffortOf(any); + return ExprEval.of(null); } } @@ -1868,21 +1858,41 @@ public Set getArrayInputs(List args) } } - class ArrayToStringFunction extends ArrayScalarFunction + class ArrayContainsFunction extends ArraysFunction { @Override public String name() { - return "array_to_string"; + return "array_contains"; } @Override - ExprEval doApply(ExprEval arrayExpr, ExprEval scalarExpr) + ExprEval doApply(ExprEval lhsExpr, ExprEval rhsExpr) { - final String join = scalarExpr.asString(); - return ExprEval.of( - Arrays.stream(arrayExpr.asArray()).map(String::valueOf).collect(Collectors.joining(join != null ? join : "")) - ); + final Object[] array1 = lhsExpr.asArray(); + final Object[] array2 = rhsExpr.asArray(); + return ExprEval.bestEffortOf(Arrays.asList(array1).containsAll(Arrays.asList(array2))); + } + } + + class ArrayOverlapFunction extends ArraysFunction + { + @Override + public String name() + { + return "array_overlap"; + } + + @Override + ExprEval doApply(ExprEval lhsExpr, ExprEval rhsExpr) + { + final Object[] array1 = lhsExpr.asArray(); + final List array2 = Arrays.asList(rhsExpr.asArray()); + boolean any = false; + for (Object check : array1) { + any |= array2.contains(check); + } + return ExprEval.bestEffortOf(any); } } } diff --git a/core/src/main/java/org/apache/druid/math/expr/Parser.java b/core/src/main/java/org/apache/druid/math/expr/Parser.java index 2e5df3ec5faf..248e65c19ae5 100644 --- a/core/src/main/java/org/apache/druid/math/expr/Parser.java +++ b/core/src/main/java/org/apache/druid/math/expr/Parser.java @@ -25,6 +25,7 @@ import com.google.common.base.Supplier; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSet; import com.google.common.collect.Lists; import org.antlr.v4.runtime.ANTLRInputStream; import org.antlr.v4.runtime.CommonTokenStream; @@ -355,6 +356,9 @@ public void visit(Expr expr) return arrayFnBindings; } + /** + * Visits all nodes of an {@link Expr}, collecting information about how {@link IdentifierExpr} are used + */ public static BindingDetails examineBindings(Expr expr) { final Set freeVariables = new HashSet<>(); @@ -362,8 +366,11 @@ public static BindingDetails examineBindings(Expr expr) final Set arrayVariables = new HashSet<>(); expr.visit(childExpr -> { if (childExpr instanceof IdentifierExpr) { + // all identifiers are free variables ... freeVariables.add(childExpr.toString()); } else if (childExpr instanceof LambdaExpr) { + // ... unless they are erased by appearing in a lambda expression's arguments because they will be bound by + // the apply expression that wraps the lambda LambdaExpr lambda = (LambdaExpr) childExpr; for (String identifier : lambda.getIdentifiers()) { freeVariables.remove(identifier); @@ -371,6 +378,9 @@ public static BindingDetails examineBindings(Expr expr) arrayVariables.remove(identifier); } } else { + // shallowly examining function expressions and apply function expressions can give us some context about if + // identifiers are used as scalar or array arguments to these functions. all identifiers should be encountered + // at some point, so we can use this to validate that identifiers are not used in inconsistent ways final Set scalarArgs; final Set arrayArgs; if (childExpr instanceof FunctionExpr) { @@ -387,9 +397,17 @@ public static BindingDetails examineBindings(Expr expr) ApplyFunctionExpr applyExpr = (ApplyFunctionExpr) childExpr; scalarArgs = Collections.emptySet(); arrayArgs = applyExpr.function.getArrayInputs(applyExpr.argsExpr); - } else { - scalarArgs = Collections.emptySet(); + } else if (childExpr instanceof BinaryOpExprBase) { + BinaryOpExprBase binExpr = (BinaryOpExprBase) childExpr; + scalarArgs = ImmutableSet.of(binExpr.left, binExpr.right); + arrayArgs = Collections.emptySet(); + } else if (childExpr instanceof UnaryExpr) { + UnaryExpr unaryExpr = (UnaryExpr) childExpr; + scalarArgs = ImmutableSet.of(unaryExpr.expr); arrayArgs = Collections.emptySet(); + } else { + // bail, child expression is not a function, apply function, or operator, nothing for us here + return; } for (Expr arg : scalarArgs) { String s = getIdentifierIfIdentifier(arg); From 3371505d79ae8b0797928d6aa1c333d2d4c8a7b3 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Thu, 23 May 2019 17:11:57 -0700 Subject: [PATCH 15/48] add expr rewrite for arrayfn args for more magic, tests --- .../org/apache/druid/math/expr/Function.java | 13 ++- .../org/apache/druid/math/expr/Parser.java | 18 ++++ .../druid/query/MultiValuedDimensionTest.java | 84 +++++++++++++++++++ 3 files changed, 108 insertions(+), 7 deletions(-) diff --git a/core/src/main/java/org/apache/druid/math/expr/Function.java b/core/src/main/java/org/apache/druid/math/expr/Function.java index 421b1fe4a952..9fae0e6acb7e 100644 --- a/core/src/main/java/org/apache/druid/math/expr/Function.java +++ b/core/src/main/java/org/apache/druid/math/expr/Function.java @@ -1557,6 +1557,12 @@ public Set getScalarInputs(List args) return ImmutableSet.of(args.get(1)); } + @Override + public Set getArrayInputs(List args) + { + return ImmutableSet.of(args.get(0)); + } + @Override public ExprEval apply(List args, Expr.ObjectBinding bindings) { @@ -1570,13 +1576,6 @@ public ExprEval apply(List args, Expr.ObjectBinding bindings) } abstract ExprEval doApply(ExprEval arrayExpr, ExprEval scalarExpr); - - - @Override - public Set getArrayInputs(List args) - { - return ImmutableSet.copyOf(args); - } } /** diff --git a/core/src/main/java/org/apache/druid/math/expr/Parser.java b/core/src/main/java/org/apache/druid/math/expr/Parser.java index 248e65c19ae5..b358f4a53e83 100644 --- a/core/src/main/java/org/apache/druid/math/expr/Parser.java +++ b/core/src/main/java/org/apache/druid/math/expr/Parser.java @@ -180,7 +180,25 @@ public static Expr applyUnappliedIdentifiers(Expr expr, List unapplied) public Expr visit(Expr expr) { if (expr instanceof ApplyFunctionExpr) { + // try to lift unapplied arguments into the apply function lambda return liftApplyLambda((ApplyFunctionExpr) expr, unapplied); + } else if (expr instanceof FunctionExpr && ((FunctionExpr) expr).function instanceof Function.ArrayFunction) { + // check array function arguments for unapplied identifiers to transform if necessary + FunctionExpr fnExpr = (FunctionExpr) expr; + Function.ArrayFunction arrayFn = (Function.ArrayFunction) fnExpr.function; + Set arrayInputs = arrayFn.getArrayInputs(fnExpr.args); + List newArgs = new ArrayList<>(); + for (Expr arg : fnExpr.args) { + if (Parser.getIdentifierOrCastIdentifier(arg) == null && arrayInputs.contains(arg)) { + Expr newArg = applyUnappliedIdentifiers(arg, unapplied); + newArgs.add(newArg); + } else { + newArgs.add(arg); + } + } + + FunctionExpr newFnExpr = new FunctionExpr(arrayFn, arrayFn.name(), newArgs); + return newFnExpr; } return expr; } diff --git a/processing/src/test/java/org/apache/druid/query/MultiValuedDimensionTest.java b/processing/src/test/java/org/apache/druid/query/MultiValuedDimensionTest.java index b1dfece307e7..206d24608013 100644 --- a/processing/src/test/java/org/apache/druid/query/MultiValuedDimensionTest.java +++ b/processing/src/test/java/org/apache/druid/query/MultiValuedDimensionTest.java @@ -538,6 +538,90 @@ public void testGroupByExpressionAuto() TestHelper.assertExpectedObjects(expectedResults, result.toList(), "noFilter"); } + @Test + public void testGroupByExpressionArrayFnArg() + { + if (config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V1)) { + expectedException.expect(RuntimeException.class); + expectedException.expectMessage("GroupBy v1 does not support dimension selectors with unknown cardinality."); + } + GroupByQuery query = GroupByQuery + .builder() + .setDataSource("xx") + .setQuerySegmentSpec(new LegacySegmentSpec("1970/3000")) + .setGranularity(Granularities.ALL) + .setDimensions(new DefaultDimensionSpec("tt", "tt")) + .setVirtualColumns( + new ExpressionVirtualColumn( + "tt", + "array_to_string(map(tags -> concat('foo', tags), tags), ', ')", + ValueType.STRING, + TestExprMacroTable.INSTANCE + ) + ) + .setAggregatorSpecs(new CountAggregatorFactory("count")) + .build(); + + Sequence result = helper.runQueryOnSegmentsObjs( + ImmutableList.of( + new QueryableIndexSegment(queryableIndex, SegmentId.dummy("sid1")), + new IncrementalIndexSegment(incrementalIndex, SegmentId.dummy("sid2")) + ), + query + ); + + List expectedResults = Arrays.asList( + GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tt", "foo", "count", 2L), + GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tt", "foot1, foot2, foot3", "count", 2L), + GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tt", "foot3, foot4, foot5", "count", 2L), + GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tt", "foot5, foot6, foot7", "count", 2L) + ); + + TestHelper.assertExpectedObjects(expectedResults, result.toList(), "noFilter"); + } + + @Test + public void testGroupByExpressionAutoArrayFnArg() + { + if (config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V1)) { + expectedException.expect(RuntimeException.class); + expectedException.expectMessage("GroupBy v1 does not support dimension selectors with unknown cardinality."); + } + GroupByQuery query = GroupByQuery + .builder() + .setDataSource("xx") + .setQuerySegmentSpec(new LegacySegmentSpec("1970/3000")) + .setGranularity(Granularities.ALL) + .setDimensions(new DefaultDimensionSpec("tt", "tt")) + .setVirtualColumns( + new ExpressionVirtualColumn( + "tt", + "array_to_string(concat('foo', tags), ', ')", + ValueType.STRING, + TestExprMacroTable.INSTANCE + ) + ) + .setAggregatorSpecs(new CountAggregatorFactory("count")) + .build(); + + Sequence result = helper.runQueryOnSegmentsObjs( + ImmutableList.of( + new QueryableIndexSegment(queryableIndex, SegmentId.dummy("sid1")), + new IncrementalIndexSegment(incrementalIndex, SegmentId.dummy("sid2")) + ), + query + ); + + List expectedResults = Arrays.asList( + GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tt", "foo", "count", 2L), + GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tt", "foot1, foot2, foot3", "count", 2L), + GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tt", "foot3, foot4, foot5", "count", 2L), + GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tt", "foot5, foot6, foot7", "count", 2L) + ); + + TestHelper.assertExpectedObjects(expectedResults, result.toList(), "noFilter"); + } + @Test public void testGroupByExpressionMultiConflicting() From e6bd16b0dfb982e5c59e8a613e08f46be54b5a6d Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Thu, 23 May 2019 17:16:30 -0700 Subject: [PATCH 16/48] test stuff --- .../druid/query/MultiValuedDimensionTest.java | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/processing/src/test/java/org/apache/druid/query/MultiValuedDimensionTest.java b/processing/src/test/java/org/apache/druid/query/MultiValuedDimensionTest.java index 206d24608013..e285dc1c49fe 100644 --- a/processing/src/test/java/org/apache/druid/query/MultiValuedDimensionTest.java +++ b/processing/src/test/java/org/apache/druid/query/MultiValuedDimensionTest.java @@ -357,7 +357,7 @@ public void testGroupByExpression() GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "texpr", "t7foo", "count", 2L) ); - TestHelper.assertExpectedObjects(expectedResults, result.toList(), "noFilter"); + TestHelper.assertExpectedObjects(expectedResults, result.toList(), "expr"); } @Test @@ -401,7 +401,7 @@ public void testGroupByExpressionMultiMulti() GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "texpr", "t3u1", "count", 2L) ); - TestHelper.assertExpectedObjects(expectedResults, result.toList(), "noFilter"); + TestHelper.assertExpectedObjects(expectedResults, result.toList(), "expr-multi-multi"); } @Test @@ -445,7 +445,7 @@ public void testGroupByExpressionMultiMultiAuto() GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "texpr", "t3u1", "count", 2L) ); - TestHelper.assertExpectedObjects(expectedResults, result.toList(), "noFilter"); + TestHelper.assertExpectedObjects(expectedResults, result.toList(), "expr-multi-multi-auto"); } @Test @@ -489,7 +489,7 @@ public void testGroupByExpressionMultiMultiAutoAuto() GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "texpr", "t3u1", "count", 2L) ); - TestHelper.assertExpectedObjects(expectedResults, result.toList(), "noFilter"); + TestHelper.assertExpectedObjects(expectedResults, result.toList(), "expr-multi-multi-auto-auto"); } @Test @@ -535,7 +535,7 @@ public void testGroupByExpressionAuto() GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tt", "t7foo", "count", 2L) ); - TestHelper.assertExpectedObjects(expectedResults, result.toList(), "noFilter"); + TestHelper.assertExpectedObjects(expectedResults, result.toList(), "expr-auto"); } @Test @@ -577,7 +577,7 @@ public void testGroupByExpressionArrayFnArg() GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tt", "foot5, foot6, foot7", "count", 2L) ); - TestHelper.assertExpectedObjects(expectedResults, result.toList(), "noFilter"); + TestHelper.assertExpectedObjects(expectedResults, result.toList(), "expr-array-fn"); } @Test @@ -619,7 +619,7 @@ public void testGroupByExpressionAutoArrayFnArg() GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tt", "foot5, foot6, foot7", "count", 2L) ); - TestHelper.assertExpectedObjects(expectedResults, result.toList(), "noFilter"); + TestHelper.assertExpectedObjects(expectedResults, result.toList(), "expr-arrayfn-auto"); } From f210ba614f91e95a7f32919b7e919f37e9b6f739 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Fri, 24 May 2019 12:00:35 -0700 Subject: [PATCH 17/48] more tests --- .../druid/math/expr/ApplyFunctionTest.java | 1 + .../druid/query/MultiValuedDimensionTest.java | 84 +++++++++++++++++++ 2 files changed, 85 insertions(+) diff --git a/core/src/test/java/org/apache/druid/math/expr/ApplyFunctionTest.java b/core/src/test/java/org/apache/druid/math/expr/ApplyFunctionTest.java index 0f02eaf6ca14..0625f6c24cdc 100644 --- a/core/src/test/java/org/apache/druid/math/expr/ApplyFunctionTest.java +++ b/core/src/test/java/org/apache/druid/math/expr/ApplyFunctionTest.java @@ -93,6 +93,7 @@ public void testFoldr() { assertExpr("foldr((x, y) -> x + y, [1, 1, 1, 1, 1], 0)", 5L); assertExpr("foldr((b, acc) -> b * acc, map((b) -> b * 2, filter(b -> b > 3, b)), 1)", 80L); + assertExpr("foldr((a, acc) -> concat(a, acc), a, '')", "foobarbazbarfoo"); } @Test diff --git a/processing/src/test/java/org/apache/druid/query/MultiValuedDimensionTest.java b/processing/src/test/java/org/apache/druid/query/MultiValuedDimensionTest.java index e285dc1c49fe..fa58bcfbfb63 100644 --- a/processing/src/test/java/org/apache/druid/query/MultiValuedDimensionTest.java +++ b/processing/src/test/java/org/apache/druid/query/MultiValuedDimensionTest.java @@ -622,6 +622,90 @@ public void testGroupByExpressionAutoArrayFnArg() TestHelper.assertExpectedObjects(expectedResults, result.toList(), "expr-arrayfn-auto"); } + @Test + public void testGroupByExpressionFoldArrayToString() + { + if (config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V1)) { + expectedException.expect(RuntimeException.class); + expectedException.expectMessage("GroupBy v1 does not support dimension selectors with unknown cardinality."); + } + GroupByQuery query = GroupByQuery + .builder() + .setDataSource("xx") + .setQuerySegmentSpec(new LegacySegmentSpec("1970/3000")) + .setGranularity(Granularities.ALL) + .setDimensions(new DefaultDimensionSpec("tt", "tt")) + .setVirtualColumns( + new ExpressionVirtualColumn( + "tt", + "foldr((tag, acc) -> concat(acc, tag), tags, '')", + ValueType.STRING, + TestExprMacroTable.INSTANCE + ) + ) + .setAggregatorSpecs(new CountAggregatorFactory("count")) + .build(); + + Sequence result = helper.runQueryOnSegmentsObjs( + ImmutableList.of( + new QueryableIndexSegment(queryableIndex, SegmentId.dummy("sid1")), + new IncrementalIndexSegment(incrementalIndex, SegmentId.dummy("sid2")) + ), + query + ); + + List expectedResults = Arrays.asList( + GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tt", null, "count", 2L), + GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tt", "t1t2t3", "count", 2L), + GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tt", "t3t4t5", "count", 2L), + GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tt", "t5t6t7", "count", 2L) + ); + + TestHelper.assertExpectedObjects(expectedResults, result.toList(), "expr-arrayfn-auto"); + } + + @Test + public void testGroupByExpressionFoldArrayToStringWithConcats() + { + if (config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V1)) { + expectedException.expect(RuntimeException.class); + expectedException.expectMessage("GroupBy v1 does not support dimension selectors with unknown cardinality."); + } + GroupByQuery query = GroupByQuery + .builder() + .setDataSource("xx") + .setQuerySegmentSpec(new LegacySegmentSpec("1970/3000")) + .setGranularity(Granularities.ALL) + .setDimensions(new DefaultDimensionSpec("tt", "tt")) + .setVirtualColumns( + new ExpressionVirtualColumn( + "tt", + "foldr((tag, acc) -> concat('foo', concat(concat(acc, case_searched(acc == '', '', ', '), tag))), tags, '')", + ValueType.STRING, + TestExprMacroTable.INSTANCE + ) + ) + .setAggregatorSpecs(new CountAggregatorFactory("count")) + .build(); + + Sequence result = helper.runQueryOnSegmentsObjs( + ImmutableList.of( + new QueryableIndexSegment(queryableIndex, SegmentId.dummy("sid1")), + new IncrementalIndexSegment(incrementalIndex, SegmentId.dummy("sid2")) + ), + query + ); + + List expectedResults = Arrays.asList( + GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tt", "foo", "count", 2L), + GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tt", "foot1, foot2, foot3", "count", 2L), + GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tt", "foot3, foot4, foot5", "count", 2L), + GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tt", "foot5, foot6, foot7", "count", 2L) + ); + + TestHelper.assertExpectedObjects(expectedResults, result.toList(), "expr-arrayfn-auto"); + } + @Test public void testGroupByExpressionMultiConflicting() From e9d223e5bd8624d8a384253a15105a6ae72386d3 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Fri, 24 May 2019 14:29:07 -0700 Subject: [PATCH 18/48] fix test --- .../java/org/apache/druid/query/MultiValuedDimensionTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/processing/src/test/java/org/apache/druid/query/MultiValuedDimensionTest.java b/processing/src/test/java/org/apache/druid/query/MultiValuedDimensionTest.java index fa58bcfbfb63..c38dbc7228f6 100644 --- a/processing/src/test/java/org/apache/druid/query/MultiValuedDimensionTest.java +++ b/processing/src/test/java/org/apache/druid/query/MultiValuedDimensionTest.java @@ -680,7 +680,7 @@ public void testGroupByExpressionFoldArrayToStringWithConcats() .setVirtualColumns( new ExpressionVirtualColumn( "tt", - "foldr((tag, acc) -> concat('foo', concat(concat(acc, case_searched(acc == '', '', ', '), tag))), tags, '')", + "foldr((tag, acc) -> concat(concat(acc, case_searched(acc == '', '', ', '), concat('foo', tag)))), tags, '')", ValueType.STRING, TestExprMacroTable.INSTANCE ) From 2387597b9ce912b676f7c8c40512ccc2edd5df16 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Fri, 24 May 2019 23:01:02 -0700 Subject: [PATCH 19/48] fix test --- .../druid/query/MultiValuedDimensionTest.java | 49 +++++++++---------- 1 file changed, 24 insertions(+), 25 deletions(-) diff --git a/processing/src/test/java/org/apache/druid/query/MultiValuedDimensionTest.java b/processing/src/test/java/org/apache/druid/query/MultiValuedDimensionTest.java index c38dbc7228f6..98f01a68e79b 100644 --- a/processing/src/test/java/org/apache/druid/query/MultiValuedDimensionTest.java +++ b/processing/src/test/java/org/apache/druid/query/MultiValuedDimensionTest.java @@ -226,30 +226,22 @@ public void testGroupByNoFilter() query ); - List expectedResults; - if (NullHandling.replaceWithDefault()) { - expectedResults = Arrays.asList( - GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tags", null, "count", 2L), - GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tags", "t1", "count", 2L), - GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tags", "t2", "count", 2L), - GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tags", "t3", "count", 4L), - GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tags", "t4", "count", 2L), - GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tags", "t5", "count", 4L), - GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tags", "t6", "count", 2L), - GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tags", "t7", "count", 2L) - ); - } else { - expectedResults = Arrays.asList( - GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tags", "", "count", 2L), - GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tags", "t1", "count", 2L), - GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tags", "t2", "count", 2L), - GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tags", "t3", "count", 4L), - GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tags", "t4", "count", 2L), - GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tags", "t5", "count", 4L), - GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tags", "t6", "count", 2L), - GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tags", "t7", "count", 2L) - ); - } + List expectedResults = Arrays.asList( + GroupByQueryRunnerTestHelper.createExpectedRow( + "1970-01-01T00:00:00.000Z", + "tags", + NullHandling.replaceWithDefault() ? null : "", + "count", + 2L + ), + GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tags", "t1", "count", 2L), + GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tags", "t2", "count", 2L), + GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tags", "t3", "count", 4L), + GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tags", "t4", "count", 2L), + GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tags", "t5", "count", 4L), + GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tags", "t6", "count", 2L), + GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tags", "t7", "count", 2L) + ); TestHelper.assertExpectedObjects(expectedResults, result.toList(), "noFilter"); } @@ -654,8 +646,15 @@ public void testGroupByExpressionFoldArrayToString() query ); + List expectedResults = Arrays.asList( - GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tt", null, "count", 2L), + GroupByQueryRunnerTestHelper.createExpectedRow( + "1970-01-01T00:00:00.000Z", + "tt", + NullHandling.replaceWithDefault() ? null : "", + "count", + 2L + ), GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tt", "t1t2t3", "count", 2L), GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tt", "t3t4t5", "count", 2L), GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tt", "t5t6t7", "count", 2L) From e4aa0f54295a476ad49e60dfe9758c586fdf6fd4 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Thu, 30 May 2019 16:11:19 -0700 Subject: [PATCH 20/48] castfunc can deal with arrays --- .../org/apache/druid/math/expr/Function.java | 62 ++++++++++++++++--- 1 file changed, 52 insertions(+), 10 deletions(-) diff --git a/core/src/main/java/org/apache/druid/math/expr/Function.java b/core/src/main/java/org/apache/druid/math/expr/Function.java index 9fae0e6acb7e..d36ad38fa09b 100644 --- a/core/src/main/java/org/apache/druid/math/expr/Function.java +++ b/core/src/main/java/org/apache/druid/math/expr/Function.java @@ -933,7 +933,7 @@ public ExprEval apply(final List args, final Expr.ObjectBinding bindings) } } - class CastFunc extends DoubleParam + class CastFunc extends DoubleParam implements ArrayFunction { @Override public String name() @@ -956,6 +956,48 @@ protected ExprEval eval(ExprEval x, ExprEval y) } return x.castTo(castTo); } + + @Override + public void validateArguments(List args) + { + // side effect of array function interface, nothing to do + } + + @Override + public Set getScalarInputs(List args) + { + if (args.get(1).isLiteral()) { + ExprType castTo = ExprType.valueOf(StringUtils.toUpperCase(args.get(1).getLiteralValue().toString())); + switch (castTo) { + case LONG_ARRAY: + case DOUBLE_ARRAY: + case STRING_ARRAY: + return Collections.emptySet(); + default: + return ImmutableSet.of(args.get(0)); + } + } + // unknown cast, can't safely assume either way + return Collections.emptySet(); + } + + @Override + public Set getArrayInputs(List args) + { + if (args.get(1).isLiteral()) { + ExprType castTo = ExprType.valueOf(StringUtils.toUpperCase(args.get(1).getLiteralValue().toString())); + switch (castTo) { + case LONG: + case DOUBLE: + case STRING: + return Collections.emptySet(); + default: + return ImmutableSet.of(args.get(0)); + } + } + // unknown cast, can't safely assume either way + return Collections.emptySet(); + } } class TimestampFromEpochFunc implements Function @@ -1511,9 +1553,9 @@ public Set getScalarInputs(List args) /** * Function that operates on array typed operands */ - abstract class ArrayFunction implements Function + interface ArrayFunction extends Function { - void validateArguments(List args) + default void validateArguments(List args) { if (args.size() != 1) { throw new IAE("Function[%s] needs 1 argument", name()); @@ -1524,14 +1566,14 @@ void validateArguments(List args) * Given a list of arguments to this {@link ArrayFunction}, get the set of arguments that must evaluate to an array * value */ - public Set getArrayInputs(List args) + default Set getArrayInputs(List args) { validateArguments(args); return ImmutableSet.of(args.get(0)); } @Override - public Set getScalarInputs(List args) + default Set getScalarInputs(List args) { return Collections.emptySet(); } @@ -1540,10 +1582,10 @@ public Set getScalarInputs(List args) /** * {@link ArraysFunction} that takes 1 array operand and 1 scalar operand */ - abstract class ArrayScalarFunction extends ArrayFunction + abstract class ArrayScalarFunction implements ArrayFunction { @Override - void validateArguments(List args) + public void validateArguments(List args) { if (args.size() != 2) { throw new IAE("Function[%s] needs 2 argument", name()); @@ -1581,10 +1623,10 @@ public ExprEval apply(List args, Expr.ObjectBinding bindings) /** * {@link ArraysFunction} that takes 2 array operands */ - abstract class ArraysFunction extends ArrayFunction + abstract class ArraysFunction implements ArrayFunction { @Override - void validateArguments(List args) + public void validateArguments(List args) { if (args.size() != 2) { throw new IAE("Function[%s] needs 2 argument", name()); @@ -1615,7 +1657,7 @@ public ExprEval apply(List args, Expr.ObjectBinding bindings) abstract ExprEval doApply(ExprEval lhsExpr, ExprEval rhsExpr); } - class ArrayLengthFunction extends ArrayFunction + class ArrayLengthFunction implements ArrayFunction { @Override public String name() From db6c470302b92fdd66f9643cc671a67bc0297171 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Thu, 30 May 2019 16:31:14 -0700 Subject: [PATCH 21/48] needs more empty array --- .../main/antlr4/org/apache/druid/math/expr/antlr/Expr.g4 | 1 + .../java/org/apache/druid/math/expr/ExprListenerImpl.java | 6 ++++++ .../java/org/apache/druid/math/expr/ApplyFunctionTest.java | 2 ++ 3 files changed, 9 insertions(+) diff --git a/core/src/main/antlr4/org/apache/druid/math/expr/antlr/Expr.g4 b/core/src/main/antlr4/org/apache/druid/math/expr/antlr/Expr.g4 index dd72a2a2f0d5..85c301f8e8c6 100644 --- a/core/src/main/antlr4/org/apache/druid/math/expr/antlr/Expr.g4 +++ b/core/src/main/antlr4/org/apache/druid/math/expr/antlr/Expr.g4 @@ -32,6 +32,7 @@ expr : 'null' # null | '[' DOUBLE (','? DOUBLE)* ']' # doubleArray | '[' LONG (','? LONG)* ']' # longArray | '[' STRING (','? STRING)* ']' # stringArray + | '[]' # emptyArray ; lambda : (IDENTIFIER | '(' IDENTIFIER (','? IDENTIFIER)* ')') '->' expr diff --git a/core/src/main/java/org/apache/druid/math/expr/ExprListenerImpl.java b/core/src/main/java/org/apache/druid/math/expr/ExprListenerImpl.java index 152f6f174e91..9b1b0b89c065 100644 --- a/core/src/main/java/org/apache/druid/math/expr/ExprListenerImpl.java +++ b/core/src/main/java/org/apache/druid/math/expr/ExprListenerImpl.java @@ -396,6 +396,12 @@ public void exitStringArray(ExprParser.StringArrayContext ctx) nodes.put(ctx, new StringArrayExpr(values)); } + @Override + public void exitEmptyArray(ExprParser.EmptyArrayContext ctx) + { + nodes.put(ctx, new StringArrayExpr(new String[0])); + } + private static String escapeStringLiteral(String text) { String unquoted = text.substring(1, text.length() - 1); diff --git a/core/src/test/java/org/apache/druid/math/expr/ApplyFunctionTest.java b/core/src/test/java/org/apache/druid/math/expr/ApplyFunctionTest.java index 0625f6c24cdc..f1175bbe634d 100644 --- a/core/src/test/java/org/apache/druid/math/expr/ApplyFunctionTest.java +++ b/core/src/test/java/org/apache/druid/math/expr/ApplyFunctionTest.java @@ -94,6 +94,8 @@ public void testFoldr() assertExpr("foldr((x, y) -> x + y, [1, 1, 1, 1, 1], 0)", 5L); assertExpr("foldr((b, acc) -> b * acc, map((b) -> b * 2, filter(b -> b > 3, b)), 1)", 80L); assertExpr("foldr((a, acc) -> concat(a, acc), a, '')", "foobarbazbarfoo"); + assertExpr("foldr((a, acc) -> array_append(acc, a), a, [])", new String[]{"foo", "bar", "baz", "foobar"}); + assertExpr("foldr((a, acc) -> array_append(acc, a), b, cast([], 'LONG_ARRAY'))", new Long[]{1L, 2L, 3L, 4L, 5L}); } @Test From 364ddb13ab463104e07fc77d34d5d146b4523bea Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Thu, 30 May 2019 18:57:03 -0700 Subject: [PATCH 22/48] more tests, make cast to long array more forgiving --- .../java/org/apache/druid/math/expr/ExprEval.java | 5 ++++- .../java/org/apache/druid/math/expr/FunctionTest.java | 11 +++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/core/src/main/java/org/apache/druid/math/expr/ExprEval.java b/core/src/main/java/org/apache/druid/math/expr/ExprEval.java index 9b1b64d27d85..54e766b90769 100644 --- a/core/src/main/java/org/apache/druid/math/expr/ExprEval.java +++ b/core/src/main/java/org/apache/druid/math/expr/ExprEval.java @@ -803,7 +803,10 @@ private Long[] computeLongs() if (value == null) { return null; } - return Arrays.stream(value).map(GuavaUtils::tryParseLong).toArray(Long[]::new); + return Arrays.stream(value).map(value -> { + Long lv = GuavaUtils.tryParseLong(value); + return lv != null ? lv : Doubles.tryParse(value).longValue(); + }).toArray(Long[]::new); } @Nullable diff --git a/core/src/test/java/org/apache/druid/math/expr/FunctionTest.java b/core/src/test/java/org/apache/druid/math/expr/FunctionTest.java index 211b598be4c5..a2825bc5d53d 100644 --- a/core/src/test/java/org/apache/druid/math/expr/FunctionTest.java +++ b/core/src/test/java/org/apache/druid/math/expr/FunctionTest.java @@ -228,6 +228,7 @@ public void testArrayConcat() assertExpr("array_concat([1 2 3], [2 4 6])", new Long[]{1L, 2L, 3L, 2L, 4L, 6L}); assertExpr("array_concat([1 2 3], 4)", new Long[]{1L, 2L, 3L, 4L}); assertExpr("array_concat(0, [1 2 3])", new Long[]{0L, 1L, 2L, 3L}); + assertExpr("array_concat(map(y -> y * 3, b), [1 2 3])", new Double[]{1.0, 2.0, 3.3, 1.0, 2.0, 3.0}); assertExpr("array_concat(0, 1)", new Long[]{0L, 1L}); } @@ -247,6 +248,16 @@ public void testStringToArray() assertExpr("string_to_array(array_to_string(a, ','), ',')", new String[]{"foo", "bar", "baz", "foobar"}); } + @Test + public void testArrayCast() + { + assertExpr("cast([1, 2, 3], 'STRING_ARRAY')", new String[]{"1", "2", "3"}); + assertExpr("cast([1, 2, 3], 'DOUBLE_ARRAY')", new Double[]{1.0, 2.0, 3.0}); + assertExpr("cast(c, 'LONG_ARRAY')", new Long[]{3L, 4L, 5L}); + assertExpr("cast(string_to_array(array_to_string(b, ','), ','), 'LONG_ARRAY')", new Long[]{1L, 2L, 3L, 4L, 5L}); + assertExpr("cast(['1.0' '2.0', '3.0'], 'LONG_ARRAY')", new Long[]{1L, 2L, 3L}); + } + private void assertExpr(final String expression, final Object expectedResult) { final Expr expr = Parser.parse(expression, ExprMacroTable.nil()); From 1e242961cedeee650919c98a2a3e30390e132a4f Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Sat, 1 Jun 2019 02:16:05 -0700 Subject: [PATCH 23/48] refactor --- .../org/apache/druid/math/expr/antlr/Expr.g4 | 8 +- .../apache/druid/math/expr/ApplyFunction.java | 16 +- .../java/org/apache/druid/math/expr/Expr.java | 230 +++++++++++++++++- .../org/apache/druid/math/expr/Function.java | 22 +- .../org/apache/druid/math/expr/Parser.java | 216 ++-------------- .../druid/math/expr/ApplyFunctionTest.java | 24 +- .../apache/druid/math/expr/FunctionTest.java | 28 +-- .../apache/druid/math/expr/ParserTest.java | 141 +++++++++-- docs/content/misc/math-expr.md | 4 +- .../expressions/BloomFilterExprMacro.java | 11 + .../SimpleDoubleAggregatorFactory.java | 2 +- .../SimpleFloatAggregatorFactory.java | 2 +- .../SimpleLongAggregatorFactory.java | 2 +- .../post/ExpressionPostAggregator.java | 3 +- .../druid/query/expression/LikeExprMacro.java | 11 + .../query/expression/LookupExprMacro.java | 11 + .../expression/RegexpExtractExprMacro.java | 11 + .../expression/TimestampCeilExprMacro.java | 23 ++ .../expression/TimestampExtractExprMacro.java | 11 + .../expression/TimestampFloorExprMacro.java | 28 +++ .../expression/TimestampFormatExprMacro.java | 6 + .../expression/TimestampParseExprMacro.java | 11 + .../expression/TimestampShiftExprMacro.java | 28 +++ .../druid/query/expression/TrimExprMacro.java | 28 +++ .../query/filter/ExpressionDimFilter.java | 2 +- .../segment/filter/ExpressionFilter.java | 4 +- .../segment/virtual/ExpressionSelectors.java | 22 +- .../virtual/ExpressionVirtualColumn.java | 2 +- ...ueStringExpressionColumnValueSelector.java | 10 +- ...tCachingExpressionColumnValueSelector.java | 3 +- ...tCachingExpressionColumnValueSelector.java | 3 +- .../SingleStringInputDimensionSelector.java | 3 +- .../druid/query/MultiValuedDimensionTest.java | 8 +- .../sql/calcite/expression/Expressions.java | 2 +- 34 files changed, 628 insertions(+), 308 deletions(-) diff --git a/core/src/main/antlr4/org/apache/druid/math/expr/antlr/Expr.g4 b/core/src/main/antlr4/org/apache/druid/math/expr/antlr/Expr.g4 index 85c301f8e8c6..b2f98c23b296 100644 --- a/core/src/main/antlr4/org/apache/druid/math/expr/antlr/Expr.g4 +++ b/core/src/main/antlr4/org/apache/druid/math/expr/antlr/Expr.g4 @@ -29,13 +29,13 @@ expr : 'null' # null | DOUBLE # doubleExpr | LONG # longExpr | STRING # string - | '[' DOUBLE (','? DOUBLE)* ']' # doubleArray - | '[' LONG (','? LONG)* ']' # longArray - | '[' STRING (','? STRING)* ']' # stringArray + | '[' DOUBLE (',' DOUBLE)* ']' # doubleArray + | '[' LONG (',' LONG)* ']' # longArray + | '[' STRING (',' STRING)* ']' # stringArray | '[]' # emptyArray ; -lambda : (IDENTIFIER | '(' IDENTIFIER (','? IDENTIFIER)* ')') '->' expr +lambda : (IDENTIFIER | '(' (IDENTIFIER (',' IDENTIFIER)*)? ')') '->' expr ; fnArgs : expr (',' expr)* # functionArgs diff --git a/core/src/main/java/org/apache/druid/math/expr/ApplyFunction.java b/core/src/main/java/org/apache/druid/math/expr/ApplyFunction.java index fbe27c129fae..47833a999f49 100644 --- a/core/src/main/java/org/apache/druid/math/expr/ApplyFunction.java +++ b/core/src/main/java/org/apache/druid/math/expr/ApplyFunction.java @@ -180,9 +180,9 @@ public Set getArrayInputs(List args) } } - abstract class BaseFoldrFunction implements ApplyFunction + abstract class BaseFoldFunction implements ApplyFunction { - ExprEval applyFoldr(LambdaExpr lambdaExpr, Object accumulator, int length, IndexableFoldLambdaBinding bindings) + ExprEval applyFold(LambdaExpr lambdaExpr, Object accumulator, int length, IndexableFoldLambdaBinding bindings) { for (int i = 0; i < length; i++) { ExprEval evaluated = lambdaExpr.eval(bindings.accumulateWithIndex(i, accumulator)); @@ -192,9 +192,9 @@ ExprEval applyFoldr(LambdaExpr lambdaExpr, Object accumulator, int length, Index } } - class FoldrFunction extends BaseFoldrFunction + class FoldFunction extends BaseFoldFunction { - static final String NAME = "foldr"; + static final String NAME = "fold"; @Override public String name() @@ -219,7 +219,7 @@ public ExprEval apply(LambdaExpr lambdaExpr, List argsExpr, Expr.ObjectBin Object accumlator = accEval.value(); FoldLambdaBinding lambdaBinding = new FoldLambdaBinding(array, accumlator, lambdaExpr, bindings); - return applyFoldr(lambdaExpr, accumlator, array.length, lambdaBinding); + return applyFold(lambdaExpr, accumlator, array.length, lambdaBinding); } @Override @@ -230,9 +230,9 @@ public Set getArrayInputs(List args) } } - class CartesianFoldrFunction extends BaseFoldrFunction + class CartesianFoldFunction extends BaseFoldFunction { - static final String NAME = "cartesian_foldr"; + static final String NAME = "cartesian_fold"; @Override public String name() @@ -276,7 +276,7 @@ public ExprEval apply(LambdaExpr lambdaExpr, List argsExpr, Expr.ObjectBin CartesianFoldLambdaBinding lambdaBindings = new CartesianFoldLambdaBinding(product, accumlator, lambdaExpr, bindings); - return applyFoldr(lambdaExpr, accumlator, product.size(), lambdaBindings); + return applyFold(lambdaExpr, accumlator, product.size(), lambdaBindings); } @Override diff --git a/core/src/main/java/org/apache/druid/math/expr/Expr.java b/core/src/main/java/org/apache/druid/math/expr/Expr.java index 8a3c127a52f3..46b09e895c66 100644 --- a/core/src/main/java/org/apache/druid/math/expr/Expr.java +++ b/core/src/main/java/org/apache/druid/math/expr/Expr.java @@ -20,18 +20,25 @@ package org.apache.druid.math.expr; import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Sets; import com.google.common.math.LongMath; import com.google.common.primitives.Ints; import org.apache.druid.common.config.NullHandling; import org.apache.druid.java.util.common.IAE; import org.apache.druid.java.util.common.ISE; +import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.java.util.common.guava.Comparators; import javax.annotation.Nonnull; import javax.annotation.Nullable; +import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; +import java.util.HashSet; import java.util.List; import java.util.Objects; +import java.util.Set; import java.util.stream.Collectors; /** @@ -65,6 +72,16 @@ default Object getLiteralValue() throw new ISE("Not a literal"); } + /** + * Returns the string identifier of an {@link IdentifierExpr}, else null + */ + @Nullable + default String getIdentifierIfIdentifier() + { + // overridden by things that are identifiers + return null; + } + /** * Evaluate the {@link Expr} with the bindings which supply {@link IdentifierExpr} with their values, producing an * {@link ExprEval} with the result. @@ -85,6 +102,8 @@ default Object getLiteralValue() */ Expr visit(Shuttle shuttle); + BindingDetails analyzeInputs(); + /** * Mechanism to supply values to back {@link IdentifierExpr} during expression evaluation */ @@ -120,6 +139,86 @@ interface Shuttle */ Expr visit(Expr expr); } + + class BindingDetails + { + private final Set freeVariables; + private final Set scalarVariables; + private final Set arrayVariables; + + public BindingDetails() + { + this(Collections.emptySet(), Collections.emptySet(), Collections.emptySet()); + } + + public BindingDetails(String identifier) + { + this(ImmutableSet.of(identifier), Collections.emptySet(), Collections.emptySet()); + } + + public BindingDetails(Set freeVariables, Set scalarVariables, Set arrayVariables) + { + this.freeVariables = freeVariables; + this.scalarVariables = scalarVariables; + this.arrayVariables = arrayVariables; + } + + public List getRequiredColumns() + { + return new ArrayList<>(freeVariables); + } + + public Set getFreeVariables() + { + return freeVariables; + } + + public Set getScalarVariables() + { + return scalarVariables; + } + + public Set getArrayVariables() + { + return arrayVariables; + } + + public BindingDetails merge(BindingDetails other) + { + return new BindingDetails( + Sets.union(freeVariables, other.freeVariables), + Sets.union(scalarVariables, other.scalarVariables), + Sets.union(arrayVariables, other.arrayVariables) + ); + } + + public BindingDetails mergeWith(Set moreScalars, Set moreArrays) + { + return new BindingDetails( + Sets.union(freeVariables, Sets.union(moreScalars, moreArrays)), + Sets.union(scalarVariables, moreScalars), + Sets.union(arrayVariables, moreArrays) + ); + } + + public BindingDetails mergeWithScalars(Set moreScalars) + { + return new BindingDetails( + Sets.union(freeVariables, moreScalars), + Sets.union(scalarVariables, moreScalars), + arrayVariables + ); + } + + public BindingDetails mergeWithArrays(Set moreArrays) + { + return new BindingDetails( + Sets.union(freeVariables, moreArrays), + scalarVariables, + Sets.union(arrayVariables, moreArrays) + ); + } + } } abstract class ConstantExpr implements Expr @@ -141,6 +240,12 @@ public Expr visit(Shuttle shuttle) { return shuttle.visit(this); } + + @Override + public BindingDetails analyzeInputs() + { + return new BindingDetails(); + } } abstract class ConstantArrayExpr extends ConstantExpr @@ -201,7 +306,7 @@ public Object getLiteralValue() @Override public String toString() { - return Arrays.stream(value).map(String::valueOf).collect(Collectors.joining(", ")); + return Arrays.toString(value); } @Nonnull @@ -261,7 +366,7 @@ public Object getLiteralValue() @Override public String toString() { - return String.join(", ", value); + return Arrays.toString(value); } @Nonnull @@ -321,7 +426,7 @@ public Object getLiteralValue() @Override public String toString() { - return Arrays.stream(value).map(String::valueOf).collect(Collectors.joining(", ")); + return Arrays.toString(value); } @Nonnull @@ -347,6 +452,19 @@ public String toString() return value; } + @Nullable + @Override + public String getIdentifierIfIdentifier() + { + return value; + } + + @Override + public BindingDetails analyzeInputs() + { + return new BindingDetails(value); + } + @Nonnull @Override public ExprEval eval(ObjectBinding bindings) @@ -381,7 +499,7 @@ class LambdaExpr implements Expr @Override public String toString() { - return "(" + args + " -> " + expr + ")"; + return StringUtils.format("(%s -> %s)", args, expr); } public String getIdentifier() @@ -427,6 +545,18 @@ public Expr visit(Shuttle shuttle) Expr newBody = expr.visit(shuttle); return shuttle.visit(new LambdaExpr(newArgs, newBody)); } + + @Override + public BindingDetails analyzeInputs() + { + final Set lambdaArgs = args.stream().map(IdentifierExpr::toString).collect(Collectors.toSet()); + BindingDetails bodyDetails = expr.analyzeInputs(); + return new BindingDetails( + Sets.difference(bodyDetails.getFreeVariables(), lambdaArgs), + Sets.difference(bodyDetails.getScalarVariables(), lambdaArgs), + Sets.difference(bodyDetails.getArrayVariables(), lambdaArgs) + ); + } } class FunctionExpr implements Expr @@ -445,7 +575,7 @@ class FunctionExpr implements Expr @Override public String toString() { - return "(" + name + " " + args + ")"; + return StringUtils.format("(%s %s)", name, args); } @Nonnull @@ -470,6 +600,33 @@ public Expr visit(Shuttle shuttle) List newArgs = args.stream().map(shuttle::visit).collect(Collectors.toList()); return shuttle.visit(new FunctionExpr(function, name, newArgs)); } + + @Override + public BindingDetails analyzeInputs() + { + final Set scalarVariables = new HashSet<>(); + final Set arrayVariables = new HashSet<>(); + final Set scalarArgs = function.getScalarInputs(args); + final Set arrayArgs = function.getArrayInputs(args); + BindingDetails accumulator = new BindingDetails(); + + for (Expr arg : args) { + accumulator = accumulator.merge(arg.analyzeInputs()); + } + for (Expr arg : scalarArgs) { + String s = arg.getIdentifierIfIdentifier(); + if (s != null) { + scalarVariables.add(s); + } + } + for (Expr arg : arrayArgs) { + String s = arg.getIdentifierIfIdentifier(); + if (s != null) { + arrayVariables.add(s); + } + } + return accumulator.mergeWith(scalarVariables, arrayVariables); + } } class ApplyFunctionExpr implements Expr @@ -490,7 +647,7 @@ class ApplyFunctionExpr implements Expr @Override public String toString() { - return "(" + name + " " + lambdaExpr + ", " + argsExpr + ")"; + return StringUtils.format("(%s %s, %s)", name, lambdaExpr, argsExpr); } @Nonnull @@ -517,6 +674,26 @@ public Expr visit(Shuttle shuttle) List newArgs = argsExpr.stream().map(shuttle::visit).collect(Collectors.toList()); return shuttle.visit(new ApplyFunctionExpr(function, name, newLambda, newArgs)); } + + @Override + public BindingDetails analyzeInputs() + { + BindingDetails accumulator = new BindingDetails(); + for (Expr arg : argsExpr) { + accumulator = accumulator.merge(arg.analyzeInputs()); + } + + final Set arrayVariables = new HashSet<>(); + Set arrayArgs = function.getArrayInputs(argsExpr); + + for (Expr arg : arrayArgs) { + String s = arg.getIdentifierIfIdentifier(); + if (s != null) { + arrayVariables.add(s); + } + } + return accumulator.merge(lambdaExpr.analyzeInputs()).mergeWithArrays(arrayVariables); + } } abstract class UnaryExpr implements Expr @@ -546,6 +723,20 @@ public Expr visit(Shuttle shuttle) } return shuttle.visit(this); } + + @Override + public BindingDetails analyzeInputs() + { + // currently all unary operators only operate on scalar inputs + final Set scalars; + final String identifierMaybe = expr.getIdentifierIfIdentifier(); + if (identifierMaybe != null) { + scalars = ImmutableSet.of(identifierMaybe); + } else { + scalars = Collections.emptySet(); + } + return expr.analyzeInputs().mergeWithScalars(scalars); + } } class UnaryMinusExpr extends UnaryExpr @@ -581,7 +772,7 @@ public ExprEval eval(ObjectBinding bindings) @Override public String toString() { - return "-" + expr; + return StringUtils.format("-%s", expr); } } @@ -614,7 +805,7 @@ public ExprEval eval(ObjectBinding bindings) @Override public String toString() { - return "!" + expr; + return StringUtils.format("!%s", expr); } } @@ -623,8 +814,8 @@ public String toString() abstract class BinaryOpExprBase implements Expr { protected final String op; - protected Expr left; - protected Expr right; + protected final Expr left; + protected final Expr right; BinaryOpExprBase(String op, Expr left, Expr right) { @@ -655,11 +846,28 @@ public Expr visit(Shuttle shuttle) @Override public String toString() { - return "(" + op + " " + left + " " + right + ")"; + return StringUtils.format("(%s %s %s)", op, left, right); } protected abstract BinaryOpExprBase copy(Expr left, Expr right); + @Override + public BindingDetails analyzeInputs() + { + // currently all binary operators operate on scalar inputs + final Set scalars = new HashSet<>(); + final String leftIdentifer = left.getIdentifierIfIdentifier(); + final String rightIdentifier = right.getIdentifierIfIdentifier(); + if (leftIdentifer != null) { + scalars.add(leftIdentifer); + } + if (rightIdentifier != null) { + scalars.add(rightIdentifier); + } + return left.analyzeInputs() + .merge(right.analyzeInputs()) + .mergeWithScalars(scalars); + } } abstract class BinaryEvalOpExprBase extends BinaryOpExprBase diff --git a/core/src/main/java/org/apache/druid/math/expr/Function.java b/core/src/main/java/org/apache/druid/math/expr/Function.java index d36ad38fa09b..cda1989bfeb7 100644 --- a/core/src/main/java/org/apache/druid/math/expr/Function.java +++ b/core/src/main/java/org/apache/druid/math/expr/Function.java @@ -58,6 +58,15 @@ default Set getScalarInputs(List args) return ImmutableSet.copyOf(args); } + /** + * Given a list of arguments to this {@link Function}, get the set of arguments that must evaluate to an array + * value + */ + default Set getArrayInputs(List args) + { + return Collections.emptySet(); + } + abstract class SingleParam implements Function { @Override @@ -933,7 +942,7 @@ public ExprEval apply(final List args, final Expr.ObjectBinding bindings) } } - class CastFunc extends DoubleParam implements ArrayFunction + class CastFunc extends DoubleParam { @Override public String name() @@ -957,12 +966,6 @@ protected ExprEval eval(ExprEval x, ExprEval y) return x.castTo(castTo); } - @Override - public void validateArguments(List args) - { - // side effect of array function interface, nothing to do - } - @Override public Set getScalarInputs(List args) { @@ -1562,10 +1565,7 @@ default void validateArguments(List args) } } - /** - * Given a list of arguments to this {@link ArrayFunction}, get the set of arguments that must evaluate to an array - * value - */ + @Override default Set getArrayInputs(List args) { validateArguments(args); diff --git a/core/src/main/java/org/apache/druid/math/expr/Parser.java b/core/src/main/java/org/apache/druid/math/expr/Parser.java index b358f4a53e83..3a47e62ceaeb 100644 --- a/core/src/main/java/org/apache/druid/math/expr/Parser.java +++ b/core/src/main/java/org/apache/druid/math/expr/Parser.java @@ -25,8 +25,8 @@ import com.google.common.base.Supplier; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; -import com.google.common.collect.ImmutableSet; import com.google.common.collect.Lists; +import com.google.common.collect.Sets; import org.antlr.v4.runtime.ANTLRInputStream; import org.antlr.v4.runtime.CommonTokenStream; import org.antlr.v4.runtime.tree.ParseTree; @@ -37,13 +37,9 @@ import org.apache.druid.math.expr.antlr.ExprLexer; import org.apache.druid.math.expr.antlr.ExprParser; -import javax.annotation.Nullable; import java.lang.reflect.Modifier; import java.util.ArrayList; -import java.util.Collections; import java.util.HashMap; -import java.util.HashSet; -import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Set; @@ -162,12 +158,6 @@ public static Expr applyUnappliedIdentifiers(Expr expr, List unapplied) { Preconditions.checkArgument(unapplied.size() > 0); - // special handle if expr is just array identifier or array is being directly cast, that doesn't count - String s = Parser.getIdentifierOrCastIdentifier(expr); - if (s != null) { - return expr; - } - ApplyFunction fn; final LambdaExpr lambdaExpr; final List args; @@ -189,7 +179,7 @@ public Expr visit(Expr expr) Set arrayInputs = arrayFn.getArrayInputs(fnExpr.args); List newArgs = new ArrayList<>(); for (Expr arg : fnExpr.args) { - if (Parser.getIdentifierOrCastIdentifier(arg) == null && arrayInputs.contains(arg)) { + if (arg.getIdentifierIfIdentifier() == null && arrayInputs.contains(arg)) { Expr newArg = applyUnappliedIdentifiers(arg, unapplied); newArgs.add(newArg); } else { @@ -204,7 +194,9 @@ public Expr visit(Expr expr) } } ); - final Set expectedArrays = Parser.findArrayFnBindings(newExpr); + + Expr.BindingDetails newExprBindings = newExpr.analyzeInputs(); + final Set expectedArrays = newExprBindings.getArrayVariables(); List remainingUnappliedArgs = unapplied.stream().filter(x -> !expectedArrays.contains(x)).collect(Collectors.toList()); @@ -247,8 +239,9 @@ public Expr visit(Expr expr) */ private static ApplyFunctionExpr liftApplyLambda(ApplyFunctionExpr expr, List unappliedArgs) { + Expr.BindingDetails lambdaBinding = expr.lambdaExpr.analyzeInputs(); // this will _not_ include the lambda identifiers.. anything in this list needs to be applied - List unappliedLambdaBindings = Parser.findRequiredBindings(expr.lambdaExpr) + List unappliedLambdaBindings = lambdaBinding.getFreeVariables() .stream() .filter(unappliedArgs::contains) .map(IdentifierExpr::new) @@ -291,10 +284,10 @@ private static ApplyFunctionExpr liftApplyLambda(ApplyFunctionExpr expr, List acc + x + y, x, acc) => cartesian_foldr((x, y, acc) -> acc + x + y, x, y, acc) - // cartesian_foldr((x, y, acc) -> acc + x + y + z, x, y, acc) => cartesian_foldr((x, y, z, acc) -> acc + x + y + z, x, y, z, acc) + case ApplyFunction.FoldFunction.NAME: + case ApplyFunction.CartesianFoldFunction.NAME: + // fold((x, acc) -> acc + x + y, x, acc) => cartesian_fold((x, y, acc) -> acc + x + y, x, y, acc) + // cartesian_fold((x, y, acc) -> acc + x + y + z, x, y, acc) => cartesian_fold((x, y, z, acc) -> acc + x + y + z, x, y, z, acc) final List newFoldArgs = new ArrayList<>(expr.argsExpr.size() + unappliedArgs.size()); final List newFoldLambdaIdentifiers = new ArrayList<>(expr.lambdaExpr.getIdentifiers().size() + unappliedArgs.size()); @@ -311,7 +304,7 @@ private static ApplyFunctionExpr liftApplyLambda(ApplyFunctionExpr expr, List findRequiredBindings(Expr expr) - { - final Set found = new LinkedHashSet<>(); - expr.visit( - new Expr.Visitor() - { - @Override - public void visit(Expr expr) - { - if (expr instanceof IdentifierExpr) { - found.add(expr.toString()); - } else if (expr instanceof LambdaExpr) { - LambdaExpr lambda = (LambdaExpr) expr; - for (String identifier : lambda.getIdentifiers()) { - found.remove(identifier); - } - } - } - } - ); - return Lists.newArrayList(found); - } - - public static Set findArrayFnBindings(Expr expr) - { - final Set arrayFnBindings = new LinkedHashSet<>(); - expr.visit(new Expr.Visitor() - { - @Override - public void visit(Expr expr) - { - final Set arrayArgs; - if (expr instanceof FunctionExpr && ((FunctionExpr) expr).function instanceof Function.ArrayFunction) { - FunctionExpr fnExpr = (FunctionExpr) expr; - Function.ArrayFunction fn = (Function.ArrayFunction) fnExpr.function; - arrayArgs = fn.getArrayInputs(fnExpr.args); - } else if (expr instanceof ApplyFunctionExpr) { - ApplyFunctionExpr applyExpr = (ApplyFunctionExpr) expr; - arrayArgs = applyExpr.function.getArrayInputs(applyExpr.argsExpr); - } else { - arrayArgs = Collections.emptySet(); - } - for (Expr arg : arrayArgs) { - String s = getIdentifierOrCastIdentifier(arg); - if (s != null) { - arrayFnBindings.add(s); - } - } - } - }); - return arrayFnBindings; - } - - /** - * Visits all nodes of an {@link Expr}, collecting information about how {@link IdentifierExpr} are used - */ - public static BindingDetails examineBindings(Expr expr) - { - final Set freeVariables = new HashSet<>(); - final Set scalarVariables = new HashSet<>(); - final Set arrayVariables = new HashSet<>(); - expr.visit(childExpr -> { - if (childExpr instanceof IdentifierExpr) { - // all identifiers are free variables ... - freeVariables.add(childExpr.toString()); - } else if (childExpr instanceof LambdaExpr) { - // ... unless they are erased by appearing in a lambda expression's arguments because they will be bound by - // the apply expression that wraps the lambda - LambdaExpr lambda = (LambdaExpr) childExpr; - for (String identifier : lambda.getIdentifiers()) { - freeVariables.remove(identifier); - scalarVariables.remove(identifier); - arrayVariables.remove(identifier); - } - } else { - // shallowly examining function expressions and apply function expressions can give us some context about if - // identifiers are used as scalar or array arguments to these functions. all identifiers should be encountered - // at some point, so we can use this to validate that identifiers are not used in inconsistent ways - final Set scalarArgs; - final Set arrayArgs; - if (childExpr instanceof FunctionExpr) { - FunctionExpr fnExpr = (FunctionExpr) childExpr; - scalarArgs = fnExpr.function.getScalarInputs(fnExpr.args); - - if (fnExpr.function instanceof Function.ArraysFunction) { - Function.ArrayFunction fn = (Function.ArrayFunction) fnExpr.function; - arrayArgs = fn.getArrayInputs(fnExpr.args); - } else { - arrayArgs = Collections.emptySet(); - } - } else if (childExpr instanceof ApplyFunctionExpr) { - ApplyFunctionExpr applyExpr = (ApplyFunctionExpr) childExpr; - scalarArgs = Collections.emptySet(); - arrayArgs = applyExpr.function.getArrayInputs(applyExpr.argsExpr); - } else if (childExpr instanceof BinaryOpExprBase) { - BinaryOpExprBase binExpr = (BinaryOpExprBase) childExpr; - scalarArgs = ImmutableSet.of(binExpr.left, binExpr.right); - arrayArgs = Collections.emptySet(); - } else if (childExpr instanceof UnaryExpr) { - UnaryExpr unaryExpr = (UnaryExpr) childExpr; - scalarArgs = ImmutableSet.of(unaryExpr.expr); - arrayArgs = Collections.emptySet(); - } else { - // bail, child expression is not a function, apply function, or operator, nothing for us here - return; - } - for (Expr arg : scalarArgs) { - String s = getIdentifierIfIdentifier(arg); - if (s != null) { - scalarVariables.add(s); - } - } - for (Expr arg : arrayArgs) { - String s = getIdentifierOrCastIdentifier(arg); - if (s != null) { - arrayVariables.add(s); - } - } - } - }); - for (String identifier : scalarVariables) { - if (arrayVariables.contains(identifier)) { - throw new RE("Invalid expression: %s; identifier [%s] used as both scalar and array", expr, identifier); - } - } - return new BindingDetails(freeVariables, scalarVariables, arrayVariables); - } - - @Nullable - public static String getIdentifierOrCastIdentifier(Expr expr) + public static void validateExpr(Expr expression, Expr.BindingDetails bindingDetails) { - if (expr instanceof IdentifierExpr) { - return expr.toString(); - } else if (expr instanceof FunctionExpr && ((FunctionExpr) expr).function instanceof Function.CastFunc) { - FunctionExpr fn = (FunctionExpr) expr; - return getIdentifierOrCastIdentifier(fn.args.get(0)); - } - return null; - } - - @Nullable - public static String getIdentifierIfIdentifier(Expr expr) - { - if (expr instanceof IdentifierExpr) { - return expr.toString(); - } else { - return null; + final Set inconsistentIdentifierUsage = + Sets.intersection(bindingDetails.getScalarVariables(), bindingDetails.getArrayVariables()); + if (inconsistentIdentifierUsage.size() != 0) { + throw new RE("Invalid expression: %s; %s used as both scalar and array variables", expression, inconsistentIdentifierUsage); } } @@ -483,38 +335,4 @@ public static Expr.ObjectBinding withSuppliers(final Map freeVariables; - private final Set scalarVariables; - private final Set arrayVariables; - - BindingDetails(Set freeVariables, Set scalarVariables, Set arrayVariables) - { - this.freeVariables = freeVariables; - this.scalarVariables = scalarVariables; - this.arrayVariables = arrayVariables; - } - - public List getRequiredColumns() - { - return new ArrayList<>(freeVariables); - } - - public Set getFreeVariables() - { - return freeVariables; - } - - public Set getScalarVariables() - { - return scalarVariables; - } - - public Set getArrayVariables() - { - return arrayVariables; - } - } } diff --git a/core/src/test/java/org/apache/druid/math/expr/ApplyFunctionTest.java b/core/src/test/java/org/apache/druid/math/expr/ApplyFunctionTest.java index f1175bbe634d..30dcbe41293e 100644 --- a/core/src/test/java/org/apache/druid/math/expr/ApplyFunctionTest.java +++ b/core/src/test/java/org/apache/druid/math/expr/ApplyFunctionTest.java @@ -89,19 +89,19 @@ public void testFilter() } @Test - public void testFoldr() + public void testFold() { - assertExpr("foldr((x, y) -> x + y, [1, 1, 1, 1, 1], 0)", 5L); - assertExpr("foldr((b, acc) -> b * acc, map((b) -> b * 2, filter(b -> b > 3, b)), 1)", 80L); - assertExpr("foldr((a, acc) -> concat(a, acc), a, '')", "foobarbazbarfoo"); - assertExpr("foldr((a, acc) -> array_append(acc, a), a, [])", new String[]{"foo", "bar", "baz", "foobar"}); - assertExpr("foldr((a, acc) -> array_append(acc, a), b, cast([], 'LONG_ARRAY'))", new Long[]{1L, 2L, 3L, 4L, 5L}); + assertExpr("fold((x, y) -> x + y, [1, 1, 1, 1, 1], 0)", 5L); + assertExpr("fold((b, acc) -> b * acc, map((b) -> b * 2, filter(b -> b > 3, b)), 1)", 80L); + assertExpr("fold((a, acc) -> concat(a, acc), a, '')", "foobarbazbarfoo"); + assertExpr("fold((a, acc) -> array_append(acc, a), a, [])", new String[]{"foo", "bar", "baz", "foobar"}); + assertExpr("fold((a, acc) -> array_append(acc, a), b, cast([], 'LONG_ARRAY'))", new Long[]{1L, 2L, 3L, 4L, 5L}); } @Test - public void testCartesianFoldr() + public void testCartesianFold() { - assertExpr("cartesian_foldr((x, y, acc) -> x + y + acc, [1, 1, 1, 1, 1], [1, 1], 0)", 20L); + assertExpr("cartesian_fold((x, y, acc) -> x + y + acc, [1, 1, 1, 1, 1], [1, 1], 0)", 20L); } @Test @@ -116,10 +116,10 @@ public void testAnyMatch() @Test public void testAllMatch() { - assertExpr("all(x -> x > 0, [1 2 3 4])", "true"); - assertExpr("all(x -> x > 1, [1 2 3 4])", "false"); - assertExpr("all(x -> x, map(x -> x > 0, [1 2 3 4]))", "true"); - assertExpr("all(x -> x, map(x -> x > 1, [1 2 3 4]))", "false"); + assertExpr("all(x -> x > 0, [1, 2, 3, 4])", "true"); + assertExpr("all(x -> x > 1, [1, 2, 3, 4])", "false"); + assertExpr("all(x -> x, map(x -> x > 0, [1, 2, 3, 4]))", "true"); + assertExpr("all(x -> x, map(x -> x > 1, [1, 2, 3, 4]))", "false"); } private void assertExpr(final String expression, final Object expectedResult) diff --git a/core/src/test/java/org/apache/druid/math/expr/FunctionTest.java b/core/src/test/java/org/apache/druid/math/expr/FunctionTest.java index a2825bc5d53d..1e7a2c870b4e 100644 --- a/core/src/test/java/org/apache/druid/math/expr/FunctionTest.java +++ b/core/src/test/java/org/apache/druid/math/expr/FunctionTest.java @@ -201,34 +201,34 @@ public void testArrayOrdinalOf() @Test public void testArrayContains() { - assertExpr("array_contains([1 2 3], 2)", "true"); - assertExpr("array_contains([1 2 3], 4)", "false"); - assertExpr("array_contains([1 2 3], [2 3])", "true"); - assertExpr("array_contains([1 2 3], [3 4])", "false"); - assertExpr("array_contains(b, [3 4])", "true"); + assertExpr("array_contains([1, 2, 3], 2)", "true"); + assertExpr("array_contains([1, 2, 3], 4)", "false"); + assertExpr("array_contains([1, 2, 3], [2, 3])", "true"); + assertExpr("array_contains([1, 2, 3], [3, 4])", "false"); + assertExpr("array_contains(b, [3, 4])", "true"); } @Test public void testArrayOverlap() { - assertExpr("array_overlap([1 2 3], [2 4 6])", "true"); - assertExpr("array_overlap([1 2 3], [4 5 6])", "false"); + assertExpr("array_overlap([1, 2, 3], [2, 4, 6])", "true"); + assertExpr("array_overlap([1, 2, 3], [4, 5, 6])", "false"); } @Test public void testArrayAppend() { - assertExpr("array_append([1 2 3], 4)", new Long[]{1L, 2L, 3L, 4L}); - assertExpr("array_append([1 2 3], 'bar')", new Long[]{1L, 2L, 3L, null}); + assertExpr("array_append([1, 2, 3], 4)", new Long[]{1L, 2L, 3L, 4L}); + assertExpr("array_append([1, 2, 3], 'bar')", new Long[]{1L, 2L, 3L, null}); } @Test public void testArrayConcat() { - assertExpr("array_concat([1 2 3], [2 4 6])", new Long[]{1L, 2L, 3L, 2L, 4L, 6L}); - assertExpr("array_concat([1 2 3], 4)", new Long[]{1L, 2L, 3L, 4L}); - assertExpr("array_concat(0, [1 2 3])", new Long[]{0L, 1L, 2L, 3L}); - assertExpr("array_concat(map(y -> y * 3, b), [1 2 3])", new Double[]{1.0, 2.0, 3.3, 1.0, 2.0, 3.0}); + assertExpr("array_concat([1, 2, 3], [2, 4, 6])", new Long[]{1L, 2L, 3L, 2L, 4L, 6L}); + assertExpr("array_concat([1, 2, 3], 4)", new Long[]{1L, 2L, 3L, 4L}); + assertExpr("array_concat(0, [1, 2, 3])", new Long[]{0L, 1L, 2L, 3L}); + assertExpr("array_concat(map(y -> y * 3, b), [1, 2, 3])", new Long[]{3L, 6L, 9L, 12L, 15L, 1L, 2L, 3L}); assertExpr("array_concat(0, 1)", new Long[]{0L, 1L}); } @@ -255,7 +255,7 @@ public void testArrayCast() assertExpr("cast([1, 2, 3], 'DOUBLE_ARRAY')", new Double[]{1.0, 2.0, 3.0}); assertExpr("cast(c, 'LONG_ARRAY')", new Long[]{3L, 4L, 5L}); assertExpr("cast(string_to_array(array_to_string(b, ','), ','), 'LONG_ARRAY')", new Long[]{1L, 2L, 3L, 4L, 5L}); - assertExpr("cast(['1.0' '2.0', '3.0'], 'LONG_ARRAY')", new Long[]{1L, 2L, 3L}); + assertExpr("cast(['1.0', '2.0', '3.0'], 'LONG_ARRAY')", new Long[]{1L, 2L, 3L}); } private void assertExpr(final String expression, final Object expectedResult) diff --git a/core/src/test/java/org/apache/druid/math/expr/ParserTest.java b/core/src/test/java/org/apache/druid/math/expr/ParserTest.java index ddf621426bc5..a69c9d0a779e 100644 --- a/core/src/test/java/org/apache/druid/math/expr/ParserTest.java +++ b/core/src/test/java/org/apache/druid/math/expr/ParserTest.java @@ -21,10 +21,13 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSet; import org.junit.Assert; import org.junit.Test; +import java.util.Collections; import java.util.List; +import java.util.Set; /** */ @@ -163,10 +166,10 @@ public void testMixed() @Test public void testIdentifiers() { - validateParser("foo", "foo", ImmutableList.of("foo")); - validateParser("\"foo\"", "foo", ImmutableList.of("foo")); - validateParser("\"foo bar\"", "foo bar", ImmutableList.of("foo bar")); - validateParser("\"foo\\\"bar\"", "foo\"bar", ImmutableList.of("foo\"bar")); + validateParser("foo", "foo", ImmutableList.of("foo"), ImmutableSet.of()); + validateParser("\"foo\"", "foo", ImmutableList.of("foo"), ImmutableSet.of()); + validateParser("\"foo bar\"", "foo bar", ImmutableList.of("foo bar"), ImmutableSet.of()); + validateParser("\"foo\\\"bar\"", "foo\"bar", ImmutableList.of("foo\"bar"), ImmutableSet.of()); } @Test @@ -183,29 +186,116 @@ public void testLiterals() public void testLiteralArrays() { validateConstantExpression("[1.0, 2.345]", new Double[] {1.0, 2.345}); - validateConstantExpression("[1.0 2.345]", new Double[] {1.0, 2.345}); validateConstantExpression("[1, 3]", new Long[] {1L, 3L}); - validateConstantExpression("[1 3]", new Long[] {1L, 3L}); validateConstantExpression("[\'hello\', \'world\']", new String[] {"hello", "world"}); - validateConstantExpression("[\'hello\' \'world\']", new String[] {"hello", "world"}); } @Test - public void testApplyFunctions() + public void testFunctions() { - final Expr parsed = Parser.parse("map((x) -> x + 1, [1, 2, 3])", ExprMacroTable.nil()); - Assert.assertEquals("(map ([x] -> (+ x 1)), [1, 2, 3])", parsed.toString()); - ExprEval eval = parsed.eval(Parser.withMap(ImmutableMap.of())); - Assert.assertArrayEquals(new Long[]{2L, 3L, 4L}, (Long[]) eval.value()); + validateParser("sqrt(x)", "(sqrt [x])", ImmutableList.of("x")); + validateParser("if(cond,then,else)", "(if [cond, then, else])", ImmutableList.of("cond", "then", "else")); + validateParser("cast(x, 'STRING')", "(cast [x, STRING])", ImmutableList.of("x")); + validateParser("cast(x, 'LONG')", "(cast [x, LONG])", ImmutableList.of("x")); + validateParser("cast(x, 'DOUBLE')", "(cast [x, DOUBLE])", ImmutableList.of("x")); + validateParser("cast(x, 'STRING_ARRAY')", "(cast [x, STRING_ARRAY])", ImmutableList.of("x"), ImmutableSet.of(), ImmutableSet.of("x")); + validateParser("cast(x, 'LONG_ARRAY')", "(cast [x, LONG_ARRAY])", ImmutableList.of("x"), ImmutableSet.of(), ImmutableSet.of("x")); + validateParser("cast(x, 'DOUBLE_ARRAY')", "(cast [x, DOUBLE_ARRAY])", ImmutableList.of("x"), ImmutableSet.of(), ImmutableSet.of("x")); + validateParser( + "array_length(x)", + "(array_length [x])", + ImmutableList.of("x"), + ImmutableSet.of(), + ImmutableSet.of("x") + ); + validateParser( + "array_concat(x, y)", + "(array_concat [x, y])", + ImmutableList.of("x", "y"), + ImmutableSet.of(), + ImmutableSet.of("x", "y") + ); + validateParser( + "array_append(x, y)", + "(array_append [x, y])", + ImmutableList.of("x", "y"), + ImmutableSet.of("y"), + ImmutableSet.of("x") + ); } @Test - public void testFunctions() + public void testApplyFunctions() { - validateParser("sqrt(x)", "(sqrt [x])", ImmutableList.of("x")); - validateParser("if(cond,then,else)", "(if [cond, then, else])", ImmutableList.of("cond", "then", "else")); + validateParser( + "map((x) -> x + 1, x)", + "(map ([x] -> (+ x 1)), [x])", + ImmutableList.of("x"), + ImmutableSet.of(), + ImmutableSet.of("x") + ); + validateParser( + "x + map((x) -> x + 1, y)", + "(+ x (map ([x] -> (+ x 1)), [y]))", + ImmutableList.of("x", "y"), + ImmutableSet.of("x"), + ImmutableSet.of("y") + ); + validateParser( + "x + map((x) -> x + 1, x)", + "(+ x (map ([x] -> (+ x 1)), [x]))", + ImmutableList.of("x"), + ImmutableSet.of("x"), + ImmutableSet.of("x") + ); + validateParser( + "map((x) -> concat(x, y), z)", + "(map ([x] -> (concat [x, y])), [z])", + ImmutableList.of("z", "y"), + ImmutableSet.of("y"), + ImmutableSet.of("z") + ); + // 'y' is accumulator, and currently unknown + validateParser( + "fold((x, acc) -> acc + x, x, y)", + "(fold ([x, acc] -> (+ acc x)), [x, y])", + ImmutableList.of("x", "y"), + ImmutableSet.of(), + ImmutableSet.of("x") + ); + + validateParser( + "fold((x, acc) -> acc + x, map((x) -> x + 1, x), y)", + "(fold ([x, acc] -> (+ acc x)), [(map ([x] -> (+ x 1)), [x]), y])", + ImmutableList.of("x", "y"), + ImmutableSet.of(), + ImmutableSet.of("x") + ); + validateParser( + "array_append(z, fold((x, acc) -> acc + x, map((x) -> x + 1, x), y))", + "(array_append [z, (fold ([x, acc] -> (+ acc x)), [(map ([x] -> (+ x 1)), [x]), y])])", + ImmutableList.of("z", "x", "y"), + ImmutableSet.of(), + ImmutableSet.of("x", "z") + ); + validateParser( + "map(z -> z + 1, array_append(z, fold((x, acc) -> acc + x, map((x) -> x + 1, x), y)))", + "(map ([z] -> (+ z 1)), [(array_append [z, (fold ([x, acc] -> (+ acc x)), [(map ([x] -> (+ x 1)), [x]), y])])])", + ImmutableList.of("z", "x", "y"), + ImmutableSet.of(), + ImmutableSet.of("x", "z") + ); + + validateParser( + "array_append(map(z -> z + 1, array_append(z, fold((x, acc) -> acc + x, map((x) -> x + 1, x), y))), a)", + "(array_append [(map ([z] -> (+ z 1)), [(array_append [z, (fold ([x, acc] -> (+ acc x)), [(map ([x] -> (+ x 1)), [x]), y])])]), a])", + ImmutableList.of("z", "x", "y", "a"), + ImmutableSet.of("a"), + ImmutableSet.of("x", "z") + ); } + private void validateFlatten(String expression, String withoutFlatten, String withFlatten) { Assert.assertEquals(expression, withoutFlatten, Parser.parse(expression, ExprMacroTable.nil(), false).toString()); @@ -213,10 +303,29 @@ private void validateFlatten(String expression, String withoutFlatten, String wi } private void validateParser(String expression, String expected, List identifiers) + { + validateParser(expression, expected, identifiers, ImmutableSet.copyOf(identifiers), Collections.emptySet()); + } + + private void validateParser(String expression, String expected, List identifiers, Set scalars) + { + validateParser(expression, expected, identifiers, scalars, Collections.emptySet()); + } + + private void validateParser( + String expression, + String expected, + List identifiers, + Set scalars, + Set arrays + ) { final Expr parsed = Parser.parse(expression, ExprMacroTable.nil()); + final Expr.BindingDetails deets = parsed.analyzeInputs(); Assert.assertEquals(expression, expected, parsed.toString()); - Assert.assertEquals(expression, identifiers, Parser.findRequiredBindings(parsed)); + Assert.assertEquals(expression, identifiers, deets.getRequiredColumns()); + Assert.assertEquals(expression, scalars, deets.getScalarVariables()); + Assert.assertEquals(expression, arrays, deets.getArrayVariables()); } private void validateConstantExpression(String expression, Object expected) diff --git a/docs/content/misc/math-expr.md b/docs/content/misc/math-expr.md index 9c16f3b23749..bfdb1108b353 100644 --- a/docs/content/misc/math-expr.md +++ b/docs/content/misc/math-expr.md @@ -182,7 +182,7 @@ See javadoc of java.lang.Math for detailed explanation for each function. | `map(lambda,arr)` | applies a transform specified by a single argument lambda expression to all elements of arr, returning a new array | | `cartesian_map(lambda,arr1,arr2,...)` | applies a transform specified by a multi argument lambda expression to all elements of the cartesian product of all input arrays, returning a new array; the number of lambda arguments and array inputs must be the same | | `filter(lambda,arr)` | filters arr by a single argument lambda, returning a new array with all matching elements, or null if no elements match | -| `foldr(lambda,arr)` | right folds a 2 argument lambda across arr. The first argument of the lambda is the array element and the second the accumulator, returning a single accumulated value. | -| `cartesian_foldr(lambda,arr1,arr2,...)` | right folds a multi argument lambda across the cartesian product of all input arrays. The first arguments of the lambda is the array element and the last is the accumulator, returning a single accumulated value. | +| `fold(lambda,arr)` | folds a 2 argument lambda across arr. The first argument of the lambda is the array element and the second the accumulator, returning a single accumulated value. | +| `cartesian_fold(lambda,arr1,arr2,...)` | folds a multi argument lambda across the cartesian product of all input arrays. The first arguments of the lambda is the array element and the last is the accumulator, returning a single accumulated value. | | `any(lambda,arr)` | returns true if any element in the array matches the lambda expression | | `all(lambda,arr)` | returns true if all elements in the array matches the lambda expression | diff --git a/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/query/expressions/BloomFilterExprMacro.java b/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/query/expressions/BloomFilterExprMacro.java index c9f561a94251..22380e66074b 100644 --- a/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/query/expressions/BloomFilterExprMacro.java +++ b/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/query/expressions/BloomFilterExprMacro.java @@ -19,6 +19,7 @@ package org.apache.druid.query.expressions; +import com.google.common.collect.ImmutableSet; import org.apache.druid.guice.BloomFilterSerializersModule; import org.apache.druid.java.util.common.IAE; import org.apache.druid.java.util.common.StringUtils; @@ -131,6 +132,16 @@ public Expr visit(Shuttle shuttle) Expr newArg = arg.visit(shuttle); return shuttle.visit(new BloomExpr(newArg)); } + + @Override + public BindingDetails analyzeInputs() + { + final String identifier = arg.getIdentifierIfIdentifier(); + if (identifier == null) { + return arg.analyzeInputs(); + } + return arg.analyzeInputs().mergeWithScalars(ImmutableSet.of(identifier)); + } } return new BloomExpr(arg); diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/SimpleDoubleAggregatorFactory.java b/processing/src/main/java/org/apache/druid/query/aggregation/SimpleDoubleAggregatorFactory.java index bb3cfa7ac386..d20b07ac139b 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/SimpleDoubleAggregatorFactory.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/SimpleDoubleAggregatorFactory.java @@ -121,7 +121,7 @@ public List requiredFields() { return fieldName != null ? Collections.singletonList(fieldName) - : Parser.findRequiredBindings(fieldExpression.get()); + : fieldExpression.get().analyzeInputs().getRequiredColumns(); } @Override diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/SimpleFloatAggregatorFactory.java b/processing/src/main/java/org/apache/druid/query/aggregation/SimpleFloatAggregatorFactory.java index 6b43113c5eba..92dbc972f77c 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/SimpleFloatAggregatorFactory.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/SimpleFloatAggregatorFactory.java @@ -115,7 +115,7 @@ public List requiredFields() { return fieldName != null ? Collections.singletonList(fieldName) - : Parser.findRequiredBindings(fieldExpression.get()); + : fieldExpression.get().analyzeInputs().getRequiredColumns(); } @Override diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/SimpleLongAggregatorFactory.java b/processing/src/main/java/org/apache/druid/query/aggregation/SimpleLongAggregatorFactory.java index f53a57df0f09..3a77e3ce7e29 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/SimpleLongAggregatorFactory.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/SimpleLongAggregatorFactory.java @@ -111,7 +111,7 @@ public List requiredFields() { return fieldName != null ? Collections.singletonList(fieldName) - : Parser.findRequiredBindings(fieldExpression.get()); + : fieldExpression.get().analyzeInputs().getRequiredColumns(); } @Override diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/post/ExpressionPostAggregator.java b/processing/src/main/java/org/apache/druid/query/aggregation/post/ExpressionPostAggregator.java index 8b35a0344f1f..84421953b867 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/post/ExpressionPostAggregator.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/post/ExpressionPostAggregator.java @@ -27,7 +27,6 @@ import com.google.common.base.Supplier; import com.google.common.base.Suppliers; import com.google.common.collect.ImmutableMap; -import com.google.common.collect.ImmutableSet; import com.google.common.collect.Maps; import org.apache.druid.java.util.common.guava.Comparators; import org.apache.druid.math.expr.Expr; @@ -119,7 +118,7 @@ private ExpressionPostAggregator( macroTable, finalizers, parsed, - Suppliers.memoize(() -> ImmutableSet.copyOf(Parser.findRequiredBindings(parsed.get())))); + Suppliers.memoize(() -> parsed.get().analyzeInputs().getFreeVariables())); } private ExpressionPostAggregator( diff --git a/processing/src/main/java/org/apache/druid/query/expression/LikeExprMacro.java b/processing/src/main/java/org/apache/druid/query/expression/LikeExprMacro.java index 54d9264d04bc..947859833d76 100644 --- a/processing/src/main/java/org/apache/druid/query/expression/LikeExprMacro.java +++ b/processing/src/main/java/org/apache/druid/query/expression/LikeExprMacro.java @@ -19,6 +19,7 @@ package org.apache.druid.query.expression; +import com.google.common.collect.ImmutableSet; import org.apache.druid.common.config.NullHandling; import org.apache.druid.java.util.common.IAE; import org.apache.druid.math.expr.Expr; @@ -96,6 +97,16 @@ public Expr visit(Shuttle shuttle) Expr newArg = arg.visit(shuttle); return shuttle.visit(new LikeExtractExpr(newArg)); } + + @Override + public BindingDetails analyzeInputs() + { + final String identifier = arg.getIdentifierIfIdentifier(); + if (identifier == null) { + return arg.analyzeInputs(); + } + return arg.analyzeInputs().mergeWithScalars(ImmutableSet.of(identifier)); + } } return new LikeExtractExpr(arg); } diff --git a/processing/src/main/java/org/apache/druid/query/expression/LookupExprMacro.java b/processing/src/main/java/org/apache/druid/query/expression/LookupExprMacro.java index 42990542972c..9d7cdd299da3 100644 --- a/processing/src/main/java/org/apache/druid/query/expression/LookupExprMacro.java +++ b/processing/src/main/java/org/apache/druid/query/expression/LookupExprMacro.java @@ -19,6 +19,7 @@ package org.apache.druid.query.expression; +import com.google.common.collect.ImmutableSet; import com.google.inject.Inject; import org.apache.druid.common.config.NullHandling; import org.apache.druid.java.util.common.IAE; @@ -100,6 +101,16 @@ public Expr visit(Shuttle shuttle) Expr newArg = arg.visit(shuttle); return shuttle.visit(new LookupExpr(newArg)); } + + @Override + public BindingDetails analyzeInputs() + { + final String identifier = arg.getIdentifierIfIdentifier(); + if (identifier == null) { + return arg.analyzeInputs(); + } + return arg.analyzeInputs().mergeWithScalars(ImmutableSet.of(identifier)); + } } return new LookupExpr(arg); diff --git a/processing/src/main/java/org/apache/druid/query/expression/RegexpExtractExprMacro.java b/processing/src/main/java/org/apache/druid/query/expression/RegexpExtractExprMacro.java index 82190a4aab4b..dd7ce42b2e31 100644 --- a/processing/src/main/java/org/apache/druid/query/expression/RegexpExtractExprMacro.java +++ b/processing/src/main/java/org/apache/druid/query/expression/RegexpExtractExprMacro.java @@ -19,6 +19,7 @@ package org.apache.druid.query.expression; +import com.google.common.collect.ImmutableSet; import org.apache.druid.common.config.NullHandling; import org.apache.druid.java.util.common.IAE; import org.apache.druid.math.expr.Expr; @@ -89,6 +90,16 @@ public Expr visit(Shuttle shuttle) Expr newArg = arg.visit(shuttle); return shuttle.visit(new RegexpExtractExpr(newArg)); } + + @Override + public BindingDetails analyzeInputs() + { + final String identifier = arg.getIdentifierIfIdentifier(); + if (identifier == null) { + return arg.analyzeInputs(); + } + return arg.analyzeInputs().mergeWithScalars(ImmutableSet.of(identifier)); + } } return new RegexpExtractExpr(arg); } diff --git a/processing/src/main/java/org/apache/druid/query/expression/TimestampCeilExprMacro.java b/processing/src/main/java/org/apache/druid/query/expression/TimestampCeilExprMacro.java index 3dd9e7372eef..938ca55d8167 100644 --- a/processing/src/main/java/org/apache/druid/query/expression/TimestampCeilExprMacro.java +++ b/processing/src/main/java/org/apache/druid/query/expression/TimestampCeilExprMacro.java @@ -29,7 +29,9 @@ import org.apache.druid.math.expr.ExprMacroTable; import javax.annotation.Nonnull; +import java.util.HashSet; import java.util.List; +import java.util.Set; import java.util.stream.Collectors; public class TimestampCeilExprMacro implements ExprMacroTable.ExprMacro @@ -90,6 +92,12 @@ public Expr visit(Shuttle shuttle) Expr newArg = arg.visit(shuttle); return shuttle.visit(new TimestampCeilExpr(ImmutableList.of(newArg))); } + + @Override + public BindingDetails analyzeInputs() + { + return arg.analyzeInputs(); + } } private static PeriodGranularity getGranularity(final List args, final Expr.ObjectBinding bindings) @@ -134,5 +142,20 @@ public Expr visit(Shuttle shuttle) List newArgs = args.stream().map(x -> x.visit(shuttle)).collect(Collectors.toList()); return shuttle.visit(new TimestampCeilDynamicExpr(newArgs)); } + + @Override + public BindingDetails analyzeInputs() + { + Set scalars = new HashSet<>(); + BindingDetails accumulator = new BindingDetails(); + for (Expr arg : args) { + final String identifier = arg.getIdentifierIfIdentifier(); + if (identifier != null) { + scalars.add(identifier); + } + accumulator = accumulator.merge(arg.analyzeInputs()); + } + return accumulator.mergeWithScalars(scalars); + } } } diff --git a/processing/src/main/java/org/apache/druid/query/expression/TimestampExtractExprMacro.java b/processing/src/main/java/org/apache/druid/query/expression/TimestampExtractExprMacro.java index 0b2991aa5d79..e0a88ab15a13 100644 --- a/processing/src/main/java/org/apache/druid/query/expression/TimestampExtractExprMacro.java +++ b/processing/src/main/java/org/apache/druid/query/expression/TimestampExtractExprMacro.java @@ -19,6 +19,7 @@ package org.apache.druid.query.expression; +import com.google.common.collect.ImmutableSet; import org.apache.druid.java.util.common.IAE; import org.apache.druid.java.util.common.ISE; import org.apache.druid.java.util.common.StringUtils; @@ -142,6 +143,16 @@ public Expr visit(Shuttle shuttle) Expr newArg = arg.visit(shuttle); return shuttle.visit(new TimestampExtractExpr(newArg)); } + + @Override + public BindingDetails analyzeInputs() + { + final String identifier = arg.getIdentifierIfIdentifier(); + if (identifier == null) { + return arg.analyzeInputs(); + } + return arg.analyzeInputs().mergeWithScalars(ImmutableSet.of(identifier)); + } } return new TimestampExtractExpr(arg); diff --git a/processing/src/main/java/org/apache/druid/query/expression/TimestampFloorExprMacro.java b/processing/src/main/java/org/apache/druid/query/expression/TimestampFloorExprMacro.java index a828d038b3c9..5555cb3f10e4 100644 --- a/processing/src/main/java/org/apache/druid/query/expression/TimestampFloorExprMacro.java +++ b/processing/src/main/java/org/apache/druid/query/expression/TimestampFloorExprMacro.java @@ -20,6 +20,7 @@ package org.apache.druid.query.expression; import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableSet; import org.apache.druid.java.util.common.DateTimes; import org.apache.druid.java.util.common.IAE; import org.apache.druid.java.util.common.granularity.PeriodGranularity; @@ -28,7 +29,9 @@ import org.apache.druid.math.expr.ExprMacroTable; import javax.annotation.Nonnull; +import java.util.HashSet; import java.util.List; +import java.util.Set; import java.util.stream.Collectors; public class TimestampFloorExprMacro implements ExprMacroTable.ExprMacro @@ -116,6 +119,16 @@ public Expr visit(Shuttle shuttle) Expr newArg = arg.visit(shuttle); return shuttle.visit(new TimestampFloorExpr(ImmutableList.of(newArg))); } + + @Override + public BindingDetails analyzeInputs() + { + final String identifier = arg.getIdentifierIfIdentifier(); + if (identifier == null) { + return arg.analyzeInputs(); + } + return arg.analyzeInputs().mergeWithScalars(ImmutableSet.of(identifier)); + } } public static class TimestampFloorDynamicExpr implements Expr @@ -150,5 +163,20 @@ public Expr visit(Shuttle shuttle) List newArgs = args.stream().map(x -> x.visit(shuttle)).collect(Collectors.toList()); return shuttle.visit(new TimestampFloorDynamicExpr(newArgs)); } + + @Override + public BindingDetails analyzeInputs() + { + Set scalars = new HashSet<>(); + BindingDetails accumulator = new BindingDetails(); + for (Expr arg : args) { + final String identifier = arg.getIdentifierIfIdentifier(); + if (identifier != null) { + scalars.add(identifier); + } + accumulator = accumulator.merge(arg.analyzeInputs()); + } + return accumulator.mergeWithScalars(scalars); + } } } diff --git a/processing/src/main/java/org/apache/druid/query/expression/TimestampFormatExprMacro.java b/processing/src/main/java/org/apache/druid/query/expression/TimestampFormatExprMacro.java index b2142a91192a..a960ad9691ac 100644 --- a/processing/src/main/java/org/apache/druid/query/expression/TimestampFormatExprMacro.java +++ b/processing/src/main/java/org/apache/druid/query/expression/TimestampFormatExprMacro.java @@ -102,6 +102,12 @@ public Expr visit(Shuttle shuttle) Expr newArg = arg.visit(shuttle); return shuttle.visit(new TimestampFormatExpr(newArg)); } + + @Override + public BindingDetails analyzeInputs() + { + return arg.analyzeInputs(); + } } return new TimestampFormatExpr(arg); diff --git a/processing/src/main/java/org/apache/druid/query/expression/TimestampParseExprMacro.java b/processing/src/main/java/org/apache/druid/query/expression/TimestampParseExprMacro.java index 4bbfdbcaf433..bd40743327f9 100644 --- a/processing/src/main/java/org/apache/druid/query/expression/TimestampParseExprMacro.java +++ b/processing/src/main/java/org/apache/druid/query/expression/TimestampParseExprMacro.java @@ -19,6 +19,7 @@ package org.apache.druid.query.expression; +import com.google.common.collect.ImmutableSet; import org.apache.druid.java.util.common.DateTimes; import org.apache.druid.java.util.common.IAE; import org.apache.druid.math.expr.Expr; @@ -105,6 +106,16 @@ public Expr visit(Shuttle shuttle) Expr newArg = arg.visit(shuttle); return shuttle.visit(new TimestampParseExpr(newArg)); } + + @Override + public BindingDetails analyzeInputs() + { + final String identifier = arg.getIdentifierIfIdentifier(); + if (identifier == null) { + return arg.analyzeInputs(); + } + return arg.analyzeInputs().mergeWithScalars(ImmutableSet.of(identifier)); + } } return new TimestampParseExpr(arg); diff --git a/processing/src/main/java/org/apache/druid/query/expression/TimestampShiftExprMacro.java b/processing/src/main/java/org/apache/druid/query/expression/TimestampShiftExprMacro.java index e9b70dd4d328..15f2279b7156 100644 --- a/processing/src/main/java/org/apache/druid/query/expression/TimestampShiftExprMacro.java +++ b/processing/src/main/java/org/apache/druid/query/expression/TimestampShiftExprMacro.java @@ -20,6 +20,7 @@ package org.apache.druid.query.expression; import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableSet; import org.apache.druid.java.util.common.IAE; import org.apache.druid.java.util.common.granularity.PeriodGranularity; import org.apache.druid.math.expr.Expr; @@ -30,7 +31,9 @@ import org.joda.time.chrono.ISOChronology; import javax.annotation.Nonnull; +import java.util.HashSet; import java.util.List; +import java.util.Set; import java.util.stream.Collectors; public class TimestampShiftExprMacro implements ExprMacroTable.ExprMacro @@ -109,6 +112,16 @@ public Expr visit(Shuttle shuttle) Expr newArg = arg.visit(shuttle); return shuttle.visit(new TimestampShiftExpr(ImmutableList.of(newArg))); } + + @Override + public BindingDetails analyzeInputs() + { + final String identifier = arg.getIdentifierIfIdentifier(); + if (identifier == null) { + return arg.analyzeInputs(); + } + return arg.analyzeInputs().mergeWithScalars(ImmutableSet.of(identifier)); + } } private static class TimestampShiftDynamicExpr implements Expr @@ -146,5 +159,20 @@ public Expr visit(Shuttle shuttle) List newArgs = args.stream().map(x -> x.visit(shuttle)).collect(Collectors.toList()); return shuttle.visit(new TimestampShiftDynamicExpr(newArgs)); } + + @Override + public BindingDetails analyzeInputs() + { + Set scalars = new HashSet<>(); + BindingDetails accumulator = new BindingDetails(); + for (Expr arg : args) { + final String identifier = arg.getIdentifierIfIdentifier(); + if (identifier != null) { + scalars.add(identifier); + } + accumulator = accumulator.merge(arg.analyzeInputs()); + } + return accumulator.mergeWithScalars(scalars); + } } } diff --git a/processing/src/main/java/org/apache/druid/query/expression/TrimExprMacro.java b/processing/src/main/java/org/apache/druid/query/expression/TrimExprMacro.java index f2f6d3b9bc03..fd4dcea32d69 100644 --- a/processing/src/main/java/org/apache/druid/query/expression/TrimExprMacro.java +++ b/processing/src/main/java/org/apache/druid/query/expression/TrimExprMacro.java @@ -19,13 +19,16 @@ package org.apache.druid.query.expression; +import com.google.common.collect.ImmutableSet; import org.apache.druid.java.util.common.IAE; import org.apache.druid.math.expr.Expr; import org.apache.druid.math.expr.ExprEval; import org.apache.druid.math.expr.ExprMacroTable; import javax.annotation.Nonnull; +import java.util.HashSet; import java.util.List; +import java.util.Set; public abstract class TrimExprMacro implements ExprMacroTable.ExprMacro { @@ -162,6 +165,16 @@ public Expr visit(Shuttle shuttle) Expr newStringExpr = stringExpr.visit(shuttle); return shuttle.visit(new TrimStaticCharsExpr(mode, newStringExpr, chars)); } + + @Override + public BindingDetails analyzeInputs() + { + final String identifier = stringExpr.getIdentifierIfIdentifier(); + if (identifier == null) { + return stringExpr.analyzeInputs(); + } + return stringExpr.analyzeInputs().mergeWithScalars(ImmutableSet.of(identifier)); + } } private static class TrimDynamicCharsExpr implements Expr @@ -241,6 +254,21 @@ public Expr visit(Shuttle shuttle) Expr newCharsExpr = charsExpr.visit(shuttle); return shuttle.visit(new TrimDynamicCharsExpr(mode, newStringExpr, newCharsExpr)); } + + @Override + public BindingDetails analyzeInputs() + { + final String stringIdentifier = stringExpr.getIdentifierIfIdentifier(); + final Set scalars = new HashSet<>(); + if (stringIdentifier != null) { + scalars.add(stringIdentifier); + } + final String charsIdentifier = charsExpr.getIdentifierIfIdentifier(); + if (charsIdentifier != null) { + scalars.add(charsIdentifier); + } + return stringExpr.analyzeInputs().merge(charsExpr.analyzeInputs()).mergeWithScalars(scalars); + } } private static boolean arrayContains(char[] array, char c) diff --git a/processing/src/main/java/org/apache/druid/query/filter/ExpressionDimFilter.java b/processing/src/main/java/org/apache/druid/query/filter/ExpressionDimFilter.java index 9a348006a9e9..4e731e0c4b5e 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/ExpressionDimFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/ExpressionDimFilter.java @@ -77,7 +77,7 @@ public RangeSet getDimensionRangeSet(final String dimension) @Override public HashSet getRequiredColumns() { - return Sets.newHashSet(Parser.findRequiredBindings(parsed.get())); + return Sets.newHashSet(parsed.get().analyzeInputs().getFreeVariables()); } @Override diff --git a/processing/src/main/java/org/apache/druid/segment/filter/ExpressionFilter.java b/processing/src/main/java/org/apache/druid/segment/filter/ExpressionFilter.java index 652215a80ad9..5816c0ce0d61 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/ExpressionFilter.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/ExpressionFilter.java @@ -21,13 +21,11 @@ import com.google.common.base.Supplier; import com.google.common.base.Suppliers; -import com.google.common.collect.ImmutableSet; import com.google.common.collect.Iterables; import org.apache.druid.common.config.NullHandling; import org.apache.druid.math.expr.Evals; import org.apache.druid.math.expr.Expr; import org.apache.druid.math.expr.ExprEval; -import org.apache.druid.math.expr.Parser; import org.apache.druid.query.BitmapResultFactory; import org.apache.druid.query.expression.ExprUtils; import org.apache.druid.query.filter.BitmapIndexSelector; @@ -50,7 +48,7 @@ public class ExpressionFilter implements Filter public ExpressionFilter(final Supplier expr) { this.expr = expr; - this.requiredBindings = Suppliers.memoize(() -> ImmutableSet.copyOf(Parser.findRequiredBindings(expr.get()))); + this.requiredBindings = Suppliers.memoize(() -> expr.get().analyzeInputs().getFreeVariables()); } @Override diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java index e110e9981d13..eb3c5bfd1891 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java @@ -135,7 +135,8 @@ public static ColumnValueSelector makeExprEvalSelector( Expr expression ) { - final Parser.BindingDetails exprDetails = Parser.examineBindings(expression); + final Expr.BindingDetails exprDetails = expression.analyzeInputs(); + Parser.validateExpr(expression, exprDetails); final List columns = exprDetails.getRequiredColumns(); if (columns.size() == 1) { @@ -194,7 +195,7 @@ public static ColumnValueSelector makeExprEvalSelector( } - final Expr.ObjectBinding bindings = createBindings(expression, columnSelectorFactory); + final Expr.ObjectBinding bindings = createBindings(exprDetails, columnSelectorFactory); if (bindings.equals(ExprUtils.nilBindings())) { // Optimization for constant expressions. @@ -202,7 +203,12 @@ public static ColumnValueSelector makeExprEvalSelector( } if (unknownIfArrays.size() > 0) { - return new OpportunisticMultiValueStringExpressionColumnValueSelector(finalExpr, bindings, unknownIfArrays); + return new OpportunisticMultiValueStringExpressionColumnValueSelector( + finalExpr, + exprDetails, + bindings, + unknownIfArrays + ); } // No special optimization. return new ExpressionColumnValueSelector(finalExpr, bindings); @@ -214,7 +220,8 @@ public static DimensionSelector makeDimensionSelector( final ExtractionFn extractionFn ) { - final Parser.BindingDetails exprDetails = Parser.examineBindings(expression); + final Expr.BindingDetails exprDetails = expression.analyzeInputs(); + Parser.validateExpr(expression, exprDetails); final List columns = exprDetails.getRequiredColumns(); @@ -385,10 +392,13 @@ public void inspectRuntimeShape(RuntimeShapeInspector inspector) } } - private static Expr.ObjectBinding createBindings(Expr expression, ColumnSelectorFactory columnSelectorFactory) + private static Expr.ObjectBinding createBindings( + Expr.BindingDetails bindingDetails, + ColumnSelectorFactory columnSelectorFactory + ) { final Map> suppliers = new HashMap<>(); - final List columns = Parser.findRequiredBindings(expression); + final List columns = bindingDetails.getRequiredColumns(); for (String columnName : columns) { final ColumnCapabilities columnCapabilities = columnSelectorFactory .getColumnCapabilities(columnName); diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVirtualColumn.java b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVirtualColumn.java index d029f7f8311c..d71ac28f7db2 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVirtualColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVirtualColumn.java @@ -111,7 +111,7 @@ public ColumnCapabilities capabilities(String columnName) @Override public List requiredColumns() { - return Parser.findRequiredBindings(parsedExpression.get()); + return parsedExpression.get().analyzeInputs().getRequiredColumns(); } @Override diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/OpportunisticMultiValueStringExpressionColumnValueSelector.java b/processing/src/main/java/org/apache/druid/segment/virtual/OpportunisticMultiValueStringExpressionColumnValueSelector.java index 7e94a5d34a5d..30d0e063e05e 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/OpportunisticMultiValueStringExpressionColumnValueSelector.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/OpportunisticMultiValueStringExpressionColumnValueSelector.java @@ -34,19 +34,20 @@ public class OpportunisticMultiValueStringExpressionColumnValueSelector extends ExpressionColumnValueSelector { private final List unknownColumns; - private final Set arrayInputs; + private final Expr.BindingDetails baseExprBindingDetails; private final Set ignoredColumns; private final Int2ObjectMap transformedCache; public OpportunisticMultiValueStringExpressionColumnValueSelector( Expr expression, + Expr.BindingDetails baseExprBindingDetails, Expr.ObjectBinding bindings, Set unknownColumnsSet ) { super(expression, bindings); this.unknownColumns = new ArrayList<>(unknownColumnsSet); - this.arrayInputs = Parser.findArrayFnBindings(expression); + this.baseExprBindingDetails = baseExprBindingDetails; this.ignoredColumns = new HashSet<>(); this.transformedCache = new Int2ObjectArrayMap(unknownColumns.size()); } @@ -55,7 +56,9 @@ public OpportunisticMultiValueStringExpressionColumnValueSelector( public ExprEval getObject() { List arrayBindings = - unknownColumns.stream().filter(x -> !arrayInputs.contains(x) && isBindingArray(x)).collect(Collectors.toList()); + unknownColumns.stream() + .filter(x -> !baseExprBindingDetails.getArrayVariables().contains(x) && isBindingArray(x)) + .collect(Collectors.toList()); if (ignoredColumns.size() > 0) { unknownColumns.removeAll(ignoredColumns); @@ -79,7 +82,6 @@ private boolean isBindingArray(String x) Object binding = bindings.get(x); if (binding != null) { if (binding instanceof String[] && ((String[]) binding).length > 1) { - // if (binding instanceof String[]) { return true; } else if (binding instanceof Number) { ignoredColumns.add(x); diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/SingleLongInputCachingExpressionColumnValueSelector.java b/processing/src/main/java/org/apache/druid/segment/virtual/SingleLongInputCachingExpressionColumnValueSelector.java index f05329bd367b..af71a9979b1e 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/SingleLongInputCachingExpressionColumnValueSelector.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/SingleLongInputCachingExpressionColumnValueSelector.java @@ -24,7 +24,6 @@ import org.apache.druid.java.util.common.ISE; import org.apache.druid.math.expr.Expr; import org.apache.druid.math.expr.ExprEval; -import org.apache.druid.math.expr.Parser; import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; import org.apache.druid.segment.ColumnValueSelector; @@ -60,7 +59,7 @@ public SingleLongInputCachingExpressionColumnValueSelector( ) { // Verify expression has just one binding. - if (Parser.findRequiredBindings(expression).size() != 1) { + if (expression.analyzeInputs().getFreeVariables().size() != 1) { throw new ISE("WTF?! Expected expression with just one binding"); } diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/SingleStringInputCachingExpressionColumnValueSelector.java b/processing/src/main/java/org/apache/druid/segment/virtual/SingleStringInputCachingExpressionColumnValueSelector.java index f7b2aedbd7f0..4d358e08a90b 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/SingleStringInputCachingExpressionColumnValueSelector.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/SingleStringInputCachingExpressionColumnValueSelector.java @@ -25,7 +25,6 @@ import org.apache.druid.java.util.common.ISE; import org.apache.druid.math.expr.Expr; import org.apache.druid.math.expr.ExprEval; -import org.apache.druid.math.expr.Parser; import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; import org.apache.druid.segment.ColumnValueSelector; import org.apache.druid.segment.DimensionSelector; @@ -55,7 +54,7 @@ public SingleStringInputCachingExpressionColumnValueSelector( ) { // Verify expression has just one binding. - if (Parser.findRequiredBindings(expression).size() != 1) { + if (expression.analyzeInputs().getFreeVariables().size() != 1) { throw new ISE("WTF?! Expected expression with just one binding"); } diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/SingleStringInputDimensionSelector.java b/processing/src/main/java/org/apache/druid/segment/virtual/SingleStringInputDimensionSelector.java index ce49901553b3..275869a7b636 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/SingleStringInputDimensionSelector.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/SingleStringInputDimensionSelector.java @@ -23,7 +23,6 @@ import com.google.common.base.Predicate; import org.apache.druid.java.util.common.ISE; import org.apache.druid.math.expr.Expr; -import org.apache.druid.math.expr.Parser; import org.apache.druid.query.filter.ValueMatcher; import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; import org.apache.druid.segment.DimensionSelector; @@ -56,7 +55,7 @@ public SingleStringInputDimensionSelector( ) { // Verify expression has just one binding. - if (Parser.findRequiredBindings(expression).size() != 1) { + if (expression.analyzeInputs().getFreeVariables().size() != 1) { throw new ISE("WTF?! Expected expression with just one binding"); } diff --git a/processing/src/test/java/org/apache/druid/query/MultiValuedDimensionTest.java b/processing/src/test/java/org/apache/druid/query/MultiValuedDimensionTest.java index d08ad5bd0e30..f0282c6ed7b0 100644 --- a/processing/src/test/java/org/apache/druid/query/MultiValuedDimensionTest.java +++ b/processing/src/test/java/org/apache/druid/query/MultiValuedDimensionTest.java @@ -720,7 +720,7 @@ public void testGroupByExpressionFoldArrayToString() .setVirtualColumns( new ExpressionVirtualColumn( "tt", - "foldr((tag, acc) -> concat(acc, tag), tags, '')", + "fold((tag, acc) -> concat(acc, tag), tags, '')", ValueType.STRING, TestExprMacroTable.INSTANCE ) @@ -770,7 +770,7 @@ public void testGroupByExpressionFoldArrayToStringWithConcats() .setVirtualColumns( new ExpressionVirtualColumn( "tt", - "foldr((tag, acc) -> concat(concat(acc, case_searched(acc == '', '', ', '), concat('foo', tag)))), tags, '')", + "fold((tag, acc) -> concat(concat(acc, case_searched(acc == '', '', ', '), concat('foo', tag)))), tags, '')", ValueType.STRING, TestExprMacroTable.INSTANCE ) @@ -803,7 +803,7 @@ public void testGroupByExpressionMultiConflicting() { expectedException.expect(RuntimeException.class); expectedException.expectMessage( - "Invalid expression: (concat [(map ([x] -> (concat [x, othertags])), [tags]), tags]); identifier [tags] used as both scalar and array" + "Invalid expression: (concat [(map ([x] -> (concat [x, othertags])), [tags]), tags]); [tags] used as both scalar and array variables" ); GroupByQuery query = GroupByQuery .builder() @@ -838,7 +838,7 @@ public void testGroupByExpressionMultiConflictingAlso() { expectedException.expect(RuntimeException.class); expectedException.expectMessage( - "Invalid expression: (array_concat [tags, (array_append [othertags, tags])]); identifier [tags] used as both scalar and array" + "Invalid expression: (array_concat [tags, (array_append [othertags, tags])]); [tags] used as both scalar and array variables" ); GroupByQuery query = GroupByQuery .builder() diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/expression/Expressions.java b/sql/src/main/java/org/apache/druid/sql/calcite/expression/Expressions.java index 2413cdb9ec5e..ae31f86d4d39 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/expression/Expressions.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/expression/Expressions.java @@ -595,7 +595,7 @@ public static Granularity toQueryGranularity(final DruidExpression expression, f final Expr arg = expr.getArg(); final Granularity granularity = expr.getGranularity(); - if (ColumnHolder.TIME_COLUMN_NAME.equals(Parser.getIdentifierIfIdentifier(arg))) { + if (ColumnHolder.TIME_COLUMN_NAME.equals(arg.getIdentifierIfIdentifier())) { return granularity; } else { return null; From 74e3aaaf416331121b8bdb3902e0b60241140a5b Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Sat, 1 Jun 2019 18:03:24 -0700 Subject: [PATCH 24/48] simplify ExprMacro Expr implementations with base classes in core --- .../druid/math/expr/ExprMacroTable.java | 65 +++++++++++++++++++ .../expressions/BloomFilterExprMacro.java | 23 +------ .../druid/query/expression/LikeExprMacro.java | 24 +------ .../query/expression/LookupExprMacro.java | 24 +------ .../expression/RegexpExtractExprMacro.java | 25 +------ .../expression/TimestampCeilExprMacro.java | 54 ++------------- .../expression/TimestampExtractExprMacro.java | 24 +------ .../expression/TimestampFloorExprMacro.java | 60 ++--------------- .../expression/TimestampFormatExprMacro.java | 19 +----- .../expression/TimestampParseExprMacro.java | 24 +------ .../expression/TimestampShiftExprMacro.java | 60 ++--------------- .../druid/query/expression/TrimExprMacro.java | 27 ++------ 12 files changed, 102 insertions(+), 327 deletions(-) diff --git a/core/src/main/java/org/apache/druid/math/expr/ExprMacroTable.java b/core/src/main/java/org/apache/druid/math/expr/ExprMacroTable.java index c4b475832aaf..b597890825d6 100644 --- a/core/src/main/java/org/apache/druid/math/expr/ExprMacroTable.java +++ b/core/src/main/java/org/apache/druid/math/expr/ExprMacroTable.java @@ -20,12 +20,15 @@ package org.apache.druid.math.expr; import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableSet; import org.apache.druid.java.util.common.StringUtils; import javax.annotation.Nullable; import java.util.Collections; +import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.stream.Collectors; public class ExprMacroTable @@ -80,4 +83,66 @@ public interface ExprMacro Expr apply(List args); } + + public static abstract class BaseSingleScalarArgumentExprMacroFunctionExpr implements Expr + { + protected final Expr arg; + + public BaseSingleScalarArgumentExprMacroFunctionExpr(Expr arg) + { + this.arg = arg; + } + + @Override + public void visit(final Visitor visitor) + { + arg.visit(visitor); + visitor.visit(this); + } + + @Override + public BindingDetails analyzeInputs() + { + final String identifier = arg.getIdentifierIfIdentifier(); + if (identifier == null) { + return arg.analyzeInputs(); + } + return arg.analyzeInputs().mergeWithScalars(ImmutableSet.of(identifier)); + } + } + + public static abstract class BaseScalarExprMacroFunctionExpr implements Expr + { + protected final List args; + + public BaseScalarExprMacroFunctionExpr(final List args) + { + this.args = args; + } + + + @Override + public void visit(final Visitor visitor) + { + for (Expr arg : args) { + arg.visit(visitor); + } + visitor.visit(this); + } + + @Override + public BindingDetails analyzeInputs() + { + Set scalars = new HashSet<>(); + BindingDetails accumulator = new BindingDetails(); + for (Expr arg : args) { + final String identifier = arg.getIdentifierIfIdentifier(); + if (identifier != null) { + scalars.add(identifier); + } + accumulator = accumulator.merge(arg.analyzeInputs()); + } + return accumulator.mergeWithScalars(scalars); + } + } } diff --git a/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/query/expressions/BloomFilterExprMacro.java b/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/query/expressions/BloomFilterExprMacro.java index 22380e66074b..352224a60a38 100644 --- a/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/query/expressions/BloomFilterExprMacro.java +++ b/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/query/expressions/BloomFilterExprMacro.java @@ -19,7 +19,6 @@ package org.apache.druid.query.expressions; -import com.google.common.collect.ImmutableSet; import org.apache.druid.guice.BloomFilterSerializersModule; import org.apache.druid.java.util.common.IAE; import org.apache.druid.java.util.common.StringUtils; @@ -68,13 +67,11 @@ public Expr apply(List args) throw new RuntimeException("Failed to deserialize bloom filter", ioe); } - class BloomExpr implements Expr + class BloomExpr extends ExprMacroTable.BaseSingleScalarArgumentExprMacroFunctionExpr { - private final Expr arg; - private BloomExpr(Expr arg) { - this.arg = arg; + super(arg); } @Nonnull @@ -119,12 +116,6 @@ private boolean nullMatch() return filter.testBytes(null, 0, 0); } - @Override - public void visit(final Visitor visitor) - { - arg.visit(visitor); - visitor.visit(this); - } @Override public Expr visit(Shuttle shuttle) @@ -132,16 +123,6 @@ public Expr visit(Shuttle shuttle) Expr newArg = arg.visit(shuttle); return shuttle.visit(new BloomExpr(newArg)); } - - @Override - public BindingDetails analyzeInputs() - { - final String identifier = arg.getIdentifierIfIdentifier(); - if (identifier == null) { - return arg.analyzeInputs(); - } - return arg.analyzeInputs().mergeWithScalars(ImmutableSet.of(identifier)); - } } return new BloomExpr(arg); diff --git a/processing/src/main/java/org/apache/druid/query/expression/LikeExprMacro.java b/processing/src/main/java/org/apache/druid/query/expression/LikeExprMacro.java index 947859833d76..cca8749bac71 100644 --- a/processing/src/main/java/org/apache/druid/query/expression/LikeExprMacro.java +++ b/processing/src/main/java/org/apache/druid/query/expression/LikeExprMacro.java @@ -19,7 +19,6 @@ package org.apache.druid.query.expression; -import com.google.common.collect.ImmutableSet; import org.apache.druid.common.config.NullHandling; import org.apache.druid.java.util.common.IAE; import org.apache.druid.math.expr.Expr; @@ -68,13 +67,11 @@ public Expr apply(final List args) escapeChar ); - class LikeExtractExpr implements Expr + class LikeExtractExpr extends ExprMacroTable.BaseSingleScalarArgumentExprMacroFunctionExpr { - private final Expr arg; - private LikeExtractExpr(Expr arg) { - this.arg = arg; + super(arg); } @Nonnull @@ -84,29 +81,12 @@ public ExprEval eval(final ObjectBinding bindings) return ExprEval.of(likeMatcher.matches(arg.eval(bindings).asString()), ExprType.LONG); } - @Override - public void visit(final Visitor visitor) - { - arg.visit(visitor); - visitor.visit(this); - } - @Override public Expr visit(Shuttle shuttle) { Expr newArg = arg.visit(shuttle); return shuttle.visit(new LikeExtractExpr(newArg)); } - - @Override - public BindingDetails analyzeInputs() - { - final String identifier = arg.getIdentifierIfIdentifier(); - if (identifier == null) { - return arg.analyzeInputs(); - } - return arg.analyzeInputs().mergeWithScalars(ImmutableSet.of(identifier)); - } } return new LikeExtractExpr(arg); } diff --git a/processing/src/main/java/org/apache/druid/query/expression/LookupExprMacro.java b/processing/src/main/java/org/apache/druid/query/expression/LookupExprMacro.java index 9d7cdd299da3..39fa14b9e187 100644 --- a/processing/src/main/java/org/apache/druid/query/expression/LookupExprMacro.java +++ b/processing/src/main/java/org/apache/druid/query/expression/LookupExprMacro.java @@ -19,7 +19,6 @@ package org.apache.druid.query.expression; -import com.google.common.collect.ImmutableSet; import com.google.inject.Inject; import org.apache.druid.common.config.NullHandling; import org.apache.druid.java.util.common.IAE; @@ -72,13 +71,11 @@ public Expr apply(final List args) null ); - class LookupExpr implements Expr + class LookupExpr extends ExprMacroTable.BaseSingleScalarArgumentExprMacroFunctionExpr { - private final Expr arg; - private LookupExpr(Expr arg) { - this.arg = arg; + super(arg); } @Nonnull @@ -88,29 +85,12 @@ public ExprEval eval(final ObjectBinding bindings) return ExprEval.of(extractionFn.apply(NullHandling.emptyToNullIfNeeded(arg.eval(bindings).asString()))); } - @Override - public void visit(final Visitor visitor) - { - arg.visit(visitor); - visitor.visit(this); - } - @Override public Expr visit(Shuttle shuttle) { Expr newArg = arg.visit(shuttle); return shuttle.visit(new LookupExpr(newArg)); } - - @Override - public BindingDetails analyzeInputs() - { - final String identifier = arg.getIdentifierIfIdentifier(); - if (identifier == null) { - return arg.analyzeInputs(); - } - return arg.analyzeInputs().mergeWithScalars(ImmutableSet.of(identifier)); - } } return new LookupExpr(arg); diff --git a/processing/src/main/java/org/apache/druid/query/expression/RegexpExtractExprMacro.java b/processing/src/main/java/org/apache/druid/query/expression/RegexpExtractExprMacro.java index dd7ce42b2e31..a5dd90727968 100644 --- a/processing/src/main/java/org/apache/druid/query/expression/RegexpExtractExprMacro.java +++ b/processing/src/main/java/org/apache/druid/query/expression/RegexpExtractExprMacro.java @@ -19,7 +19,6 @@ package org.apache.druid.query.expression; -import com.google.common.collect.ImmutableSet; import org.apache.druid.common.config.NullHandling; import org.apache.druid.java.util.common.IAE; import org.apache.druid.math.expr.Expr; @@ -58,13 +57,12 @@ public Expr apply(final List args) final Pattern pattern = Pattern.compile(String.valueOf(patternExpr.getLiteralValue())); final int index = indexExpr == null ? 0 : ((Number) indexExpr.getLiteralValue()).intValue(); - class RegexpExtractExpr implements Expr - { - private final Expr arg; + class RegexpExtractExpr extends ExprMacroTable.BaseSingleScalarArgumentExprMacroFunctionExpr + { private RegexpExtractExpr(Expr arg) { - this.arg = arg; + super(arg); } @Nonnull @@ -77,29 +75,12 @@ public ExprEval eval(final ObjectBinding bindings) return ExprEval.of(NullHandling.emptyToNullIfNeeded(retVal)); } - @Override - public void visit(final Visitor visitor) - { - arg.visit(visitor); - visitor.visit(this); - } - @Override public Expr visit(Shuttle shuttle) { Expr newArg = arg.visit(shuttle); return shuttle.visit(new RegexpExtractExpr(newArg)); } - - @Override - public BindingDetails analyzeInputs() - { - final String identifier = arg.getIdentifierIfIdentifier(); - if (identifier == null) { - return arg.analyzeInputs(); - } - return arg.analyzeInputs().mergeWithScalars(ImmutableSet.of(identifier)); - } } return new RegexpExtractExpr(arg); } diff --git a/processing/src/main/java/org/apache/druid/query/expression/TimestampCeilExprMacro.java b/processing/src/main/java/org/apache/druid/query/expression/TimestampCeilExprMacro.java index 938ca55d8167..a1044ce2ee28 100644 --- a/processing/src/main/java/org/apache/druid/query/expression/TimestampCeilExprMacro.java +++ b/processing/src/main/java/org/apache/druid/query/expression/TimestampCeilExprMacro.java @@ -29,9 +29,7 @@ import org.apache.druid.math.expr.ExprMacroTable; import javax.annotation.Nonnull; -import java.util.HashSet; import java.util.List; -import java.util.Set; import java.util.stream.Collectors; public class TimestampCeilExprMacro implements ExprMacroTable.ExprMacro @@ -56,14 +54,13 @@ public Expr apply(final List args) } } - private static class TimestampCeilExpr implements Expr + private static class TimestampCeilExpr extends ExprMacroTable.BaseSingleScalarArgumentExprMacroFunctionExpr { - private final Expr arg; private final Granularity granularity; - public TimestampCeilExpr(final List args) + TimestampCeilExpr(final List args) { - this.arg = args.get(0); + super(args.get(0)); this.granularity = getGranularity(args, ExprUtils.nilBindings()); } @@ -79,25 +76,12 @@ public ExprEval eval(final ObjectBinding bindings) return ExprEval.of(granularity.bucketEnd(DateTimes.utc(arg.eval(bindings).asLong())).getMillis()); } - @Override - public void visit(final Visitor visitor) - { - arg.visit(visitor); - visitor.visit(this); - } - @Override public Expr visit(Shuttle shuttle) { Expr newArg = arg.visit(shuttle); return shuttle.visit(new TimestampCeilExpr(ImmutableList.of(newArg))); } - - @Override - public BindingDetails analyzeInputs() - { - return arg.analyzeInputs(); - } } private static PeriodGranularity getGranularity(final List args, final Expr.ObjectBinding bindings) @@ -110,13 +94,11 @@ private static PeriodGranularity getGranularity(final List args, final Exp ); } - private static class TimestampCeilDynamicExpr implements Expr + private static class TimestampCeilDynamicExpr extends ExprMacroTable.BaseScalarExprMacroFunctionExpr { - private final List args; - - public TimestampCeilDynamicExpr(final List args) + TimestampCeilDynamicExpr(final List args) { - this.args = args; + super(args); } @Nonnull @@ -127,35 +109,11 @@ public ExprEval eval(final ObjectBinding bindings) return ExprEval.of(granularity.bucketEnd(DateTimes.utc(args.get(0).eval(bindings).asLong())).getMillis()); } - @Override - public void visit(final Visitor visitor) - { - for (Expr arg : args) { - arg.visit(visitor); - } - visitor.visit(this); - } - @Override public Expr visit(Shuttle shuttle) { List newArgs = args.stream().map(x -> x.visit(shuttle)).collect(Collectors.toList()); return shuttle.visit(new TimestampCeilDynamicExpr(newArgs)); } - - @Override - public BindingDetails analyzeInputs() - { - Set scalars = new HashSet<>(); - BindingDetails accumulator = new BindingDetails(); - for (Expr arg : args) { - final String identifier = arg.getIdentifierIfIdentifier(); - if (identifier != null) { - scalars.add(identifier); - } - accumulator = accumulator.merge(arg.analyzeInputs()); - } - return accumulator.mergeWithScalars(scalars); - } } } diff --git a/processing/src/main/java/org/apache/druid/query/expression/TimestampExtractExprMacro.java b/processing/src/main/java/org/apache/druid/query/expression/TimestampExtractExprMacro.java index e0a88ab15a13..fd7021dbb21e 100644 --- a/processing/src/main/java/org/apache/druid/query/expression/TimestampExtractExprMacro.java +++ b/processing/src/main/java/org/apache/druid/query/expression/TimestampExtractExprMacro.java @@ -19,7 +19,6 @@ package org.apache.druid.query.expression; -import com.google.common.collect.ImmutableSet; import org.apache.druid.java.util.common.IAE; import org.apache.druid.java.util.common.ISE; import org.apache.druid.java.util.common.StringUtils; @@ -83,13 +82,11 @@ public Expr apply(final List args) final ISOChronology chronology = ISOChronology.getInstance(timeZone); - class TimestampExtractExpr implements Expr + class TimestampExtractExpr extends ExprMacroTable.BaseSingleScalarArgumentExprMacroFunctionExpr { - private final Expr arg; - private TimestampExtractExpr(Expr arg) { - this.arg = arg; + super(arg); } @Nonnull @@ -130,29 +127,12 @@ public ExprEval eval(final ObjectBinding bindings) } } - @Override - public void visit(final Visitor visitor) - { - arg.visit(visitor); - visitor.visit(this); - } - @Override public Expr visit(Shuttle shuttle) { Expr newArg = arg.visit(shuttle); return shuttle.visit(new TimestampExtractExpr(newArg)); } - - @Override - public BindingDetails analyzeInputs() - { - final String identifier = arg.getIdentifierIfIdentifier(); - if (identifier == null) { - return arg.analyzeInputs(); - } - return arg.analyzeInputs().mergeWithScalars(ImmutableSet.of(identifier)); - } } return new TimestampExtractExpr(arg); diff --git a/processing/src/main/java/org/apache/druid/query/expression/TimestampFloorExprMacro.java b/processing/src/main/java/org/apache/druid/query/expression/TimestampFloorExprMacro.java index 5555cb3f10e4..216dafc526a6 100644 --- a/processing/src/main/java/org/apache/druid/query/expression/TimestampFloorExprMacro.java +++ b/processing/src/main/java/org/apache/druid/query/expression/TimestampFloorExprMacro.java @@ -20,7 +20,6 @@ package org.apache.druid.query.expression; import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableSet; import org.apache.druid.java.util.common.DateTimes; import org.apache.druid.java.util.common.IAE; import org.apache.druid.java.util.common.granularity.PeriodGranularity; @@ -29,9 +28,7 @@ import org.apache.druid.math.expr.ExprMacroTable; import javax.annotation.Nonnull; -import java.util.HashSet; import java.util.List; -import java.util.Set; import java.util.stream.Collectors; public class TimestampFloorExprMacro implements ExprMacroTable.ExprMacro @@ -66,14 +63,13 @@ private static PeriodGranularity computeGranularity(final List args, final ); } - public static class TimestampFloorExpr implements Expr + public static class TimestampFloorExpr extends ExprMacroTable.BaseSingleScalarArgumentExprMacroFunctionExpr { - private final Expr arg; private final PeriodGranularity granularity; - public TimestampFloorExpr(final List args) + TimestampFloorExpr(final List args) { - this.arg = args.get(0); + super(args.get(0)); this.granularity = computeGranularity(args, ExprUtils.nilBindings()); } @@ -105,39 +101,19 @@ public ExprEval eval(final ObjectBinding bindings) return ExprEval.of(granularity.bucketStart(DateTimes.utc(eval.asLong())).getMillis()); } - @Override - public void visit(final Visitor visitor) - { - arg.visit(visitor); - visitor.visit(this); - } - - @Override public Expr visit(Shuttle shuttle) { Expr newArg = arg.visit(shuttle); return shuttle.visit(new TimestampFloorExpr(ImmutableList.of(newArg))); } - - @Override - public BindingDetails analyzeInputs() - { - final String identifier = arg.getIdentifierIfIdentifier(); - if (identifier == null) { - return arg.analyzeInputs(); - } - return arg.analyzeInputs().mergeWithScalars(ImmutableSet.of(identifier)); - } } - public static class TimestampFloorDynamicExpr implements Expr + public static class TimestampFloorDynamicExpr extends ExprMacroTable.BaseScalarExprMacroFunctionExpr { - private final List args; - - public TimestampFloorDynamicExpr(final List args) + TimestampFloorDynamicExpr(final List args) { - this.args = args; + super(args); } @Nonnull @@ -148,35 +124,11 @@ public ExprEval eval(final ObjectBinding bindings) return ExprEval.of(granularity.bucketStart(DateTimes.utc(args.get(0).eval(bindings).asLong())).getMillis()); } - @Override - public void visit(final Visitor visitor) - { - for (Expr arg : args) { - arg.visit(visitor); - } - visitor.visit(this); - } - @Override public Expr visit(Shuttle shuttle) { List newArgs = args.stream().map(x -> x.visit(shuttle)).collect(Collectors.toList()); return shuttle.visit(new TimestampFloorDynamicExpr(newArgs)); } - - @Override - public BindingDetails analyzeInputs() - { - Set scalars = new HashSet<>(); - BindingDetails accumulator = new BindingDetails(); - for (Expr arg : args) { - final String identifier = arg.getIdentifierIfIdentifier(); - if (identifier != null) { - scalars.add(identifier); - } - accumulator = accumulator.merge(arg.analyzeInputs()); - } - return accumulator.mergeWithScalars(scalars); - } } } diff --git a/processing/src/main/java/org/apache/druid/query/expression/TimestampFormatExprMacro.java b/processing/src/main/java/org/apache/druid/query/expression/TimestampFormatExprMacro.java index a960ad9691ac..2786522500cb 100644 --- a/processing/src/main/java/org/apache/druid/query/expression/TimestampFormatExprMacro.java +++ b/processing/src/main/java/org/apache/druid/query/expression/TimestampFormatExprMacro.java @@ -68,13 +68,11 @@ public Expr apply(final List args) ? ISODateTimeFormat.dateTime() : DateTimeFormat.forPattern(formatString).withZone(timeZone); - class TimestampFormatExpr implements Expr + class TimestampFormatExpr extends ExprMacroTable.BaseSingleScalarArgumentExprMacroFunctionExpr { - private final Expr arg; - private TimestampFormatExpr(Expr arg) { - this.arg = arg; + super(arg); } @Nonnull @@ -89,25 +87,12 @@ public ExprEval eval(final ObjectBinding bindings) return ExprEval.of(formatter.print(arg.eval(bindings).asLong())); } - @Override - public void visit(final Visitor visitor) - { - arg.visit(visitor); - visitor.visit(this); - } - @Override public Expr visit(Shuttle shuttle) { Expr newArg = arg.visit(shuttle); return shuttle.visit(new TimestampFormatExpr(newArg)); } - - @Override - public BindingDetails analyzeInputs() - { - return arg.analyzeInputs(); - } } return new TimestampFormatExpr(arg); diff --git a/processing/src/main/java/org/apache/druid/query/expression/TimestampParseExprMacro.java b/processing/src/main/java/org/apache/druid/query/expression/TimestampParseExprMacro.java index bd40743327f9..ac88aafa8c76 100644 --- a/processing/src/main/java/org/apache/druid/query/expression/TimestampParseExprMacro.java +++ b/processing/src/main/java/org/apache/druid/query/expression/TimestampParseExprMacro.java @@ -19,7 +19,6 @@ package org.apache.druid.query.expression; -import com.google.common.collect.ImmutableSet; import org.apache.druid.java.util.common.DateTimes; import org.apache.druid.java.util.common.IAE; import org.apache.druid.math.expr.Expr; @@ -65,13 +64,11 @@ public Expr apply(final List args) ? createDefaultParser(timeZone) : DateTimes.wrapFormatter(DateTimeFormat.forPattern(formatString).withZone(timeZone)); - class TimestampParseExpr implements Expr + class TimestampParseExpr extends ExprMacroTable.BaseSingleScalarArgumentExprMacroFunctionExpr { - private final Expr arg; - private TimestampParseExpr(Expr arg) { - this.arg = arg; + super(arg); } @Nonnull @@ -93,29 +90,12 @@ public ExprEval eval(final ObjectBinding bindings) } } - @Override - public void visit(final Visitor visitor) - { - arg.visit(visitor); - visitor.visit(this); - } - @Override public Expr visit(Shuttle shuttle) { Expr newArg = arg.visit(shuttle); return shuttle.visit(new TimestampParseExpr(newArg)); } - - @Override - public BindingDetails analyzeInputs() - { - final String identifier = arg.getIdentifierIfIdentifier(); - if (identifier == null) { - return arg.analyzeInputs(); - } - return arg.analyzeInputs().mergeWithScalars(ImmutableSet.of(identifier)); - } } return new TimestampParseExpr(arg); diff --git a/processing/src/main/java/org/apache/druid/query/expression/TimestampShiftExprMacro.java b/processing/src/main/java/org/apache/druid/query/expression/TimestampShiftExprMacro.java index 15f2279b7156..178bc98923db 100644 --- a/processing/src/main/java/org/apache/druid/query/expression/TimestampShiftExprMacro.java +++ b/processing/src/main/java/org/apache/druid/query/expression/TimestampShiftExprMacro.java @@ -20,7 +20,6 @@ package org.apache.druid.query.expression; import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableSet; import org.apache.druid.java.util.common.IAE; import org.apache.druid.java.util.common.granularity.PeriodGranularity; import org.apache.druid.math.expr.Expr; @@ -31,9 +30,7 @@ import org.joda.time.chrono.ISOChronology; import javax.annotation.Nonnull; -import java.util.HashSet; import java.util.List; -import java.util.Set; import java.util.stream.Collectors; public class TimestampShiftExprMacro implements ExprMacroTable.ExprMacro @@ -75,17 +72,16 @@ private static int getStep(final List args, final Expr.ObjectBinding bindi return args.get(2).eval(bindings).asInt(); } - private static class TimestampShiftExpr implements Expr + private static class TimestampShiftExpr extends ExprMacroTable.BaseSingleScalarArgumentExprMacroFunctionExpr { - private final Expr arg; private final Chronology chronology; private final Period period; private final int step; - public TimestampShiftExpr(final List args) + TimestampShiftExpr(final List args) { + super(args.get(0)); final PeriodGranularity granularity = getGranularity(args, ExprUtils.nilBindings()); - arg = args.get(0); period = granularity.getPeriod(); chronology = ISOChronology.getInstance(granularity.getTimeZone()); step = getStep(args, ExprUtils.nilBindings()); @@ -98,39 +94,19 @@ public ExprEval eval(final ObjectBinding bindings) return ExprEval.of(chronology.add(period, arg.eval(bindings).asLong(), step)); } - @Override - public void visit(final Visitor visitor) - { - arg.visit(visitor); - visitor.visit(this); - } - - @Override public Expr visit(Shuttle shuttle) { Expr newArg = arg.visit(shuttle); return shuttle.visit(new TimestampShiftExpr(ImmutableList.of(newArg))); } - - @Override - public BindingDetails analyzeInputs() - { - final String identifier = arg.getIdentifierIfIdentifier(); - if (identifier == null) { - return arg.analyzeInputs(); - } - return arg.analyzeInputs().mergeWithScalars(ImmutableSet.of(identifier)); - } } - private static class TimestampShiftDynamicExpr implements Expr + private static class TimestampShiftDynamicExpr extends ExprMacroTable.BaseScalarExprMacroFunctionExpr { - private final List args; - - public TimestampShiftDynamicExpr(final List args) + TimestampShiftDynamicExpr(final List args) { - this.args = args; + super(args); } @Nonnull @@ -144,35 +120,11 @@ public ExprEval eval(final ObjectBinding bindings) return ExprEval.of(chronology.add(period, args.get(0).eval(bindings).asLong(), step)); } - @Override - public void visit(final Visitor visitor) - { - for (Expr arg : args) { - arg.visit(visitor); - } - visitor.visit(this); - } - @Override public Expr visit(Shuttle shuttle) { List newArgs = args.stream().map(x -> x.visit(shuttle)).collect(Collectors.toList()); return shuttle.visit(new TimestampShiftDynamicExpr(newArgs)); } - - @Override - public BindingDetails analyzeInputs() - { - Set scalars = new HashSet<>(); - BindingDetails accumulator = new BindingDetails(); - for (Expr arg : args) { - final String identifier = arg.getIdentifierIfIdentifier(); - if (identifier != null) { - scalars.add(identifier); - } - accumulator = accumulator.merge(arg.analyzeInputs()); - } - return accumulator.mergeWithScalars(scalars); - } } } diff --git a/processing/src/main/java/org/apache/druid/query/expression/TrimExprMacro.java b/processing/src/main/java/org/apache/druid/query/expression/TrimExprMacro.java index fd4dcea32d69..fdafeda54163 100644 --- a/processing/src/main/java/org/apache/druid/query/expression/TrimExprMacro.java +++ b/processing/src/main/java/org/apache/druid/query/expression/TrimExprMacro.java @@ -19,7 +19,6 @@ package org.apache.druid.query.expression; -import com.google.common.collect.ImmutableSet; import org.apache.druid.java.util.common.IAE; import org.apache.druid.math.expr.Expr; import org.apache.druid.math.expr.ExprEval; @@ -97,16 +96,15 @@ public Expr apply(final List args) } } - private static class TrimStaticCharsExpr implements Expr + private static class TrimStaticCharsExpr extends ExprMacroTable.BaseSingleScalarArgumentExprMacroFunctionExpr { private final TrimMode mode; - private final Expr stringExpr; private final char[] chars; public TrimStaticCharsExpr(final TrimMode mode, final Expr stringExpr, final char[] chars) { + super(stringExpr); this.mode = mode; - this.stringExpr = stringExpr; this.chars = chars; } @@ -114,7 +112,7 @@ public TrimStaticCharsExpr(final TrimMode mode, final Expr stringExpr, final cha @Override public ExprEval eval(final ObjectBinding bindings) { - final ExprEval stringEval = stringExpr.eval(bindings); + final ExprEval stringEval = arg.eval(bindings); if (chars.length == 0 || stringEval.value() == null) { return stringEval; @@ -152,29 +150,12 @@ public ExprEval eval(final ObjectBinding bindings) } } - @Override - public void visit(final Visitor visitor) - { - stringExpr.visit(visitor); - visitor.visit(this); - } - @Override public Expr visit(Shuttle shuttle) { - Expr newStringExpr = stringExpr.visit(shuttle); + Expr newStringExpr = arg.visit(shuttle); return shuttle.visit(new TrimStaticCharsExpr(mode, newStringExpr, chars)); } - - @Override - public BindingDetails analyzeInputs() - { - final String identifier = stringExpr.getIdentifierIfIdentifier(); - if (identifier == null) { - return stringExpr.analyzeInputs(); - } - return stringExpr.analyzeInputs().mergeWithScalars(ImmutableSet.of(identifier)); - } } private static class TrimDynamicCharsExpr implements Expr From c0cc6e11d04d8399d39ff9842ab4c1a20d0fea66 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Sat, 1 Jun 2019 18:05:57 -0700 Subject: [PATCH 25/48] oops --- .../main/java/org/apache/druid/math/expr/ExprMacroTable.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/src/main/java/org/apache/druid/math/expr/ExprMacroTable.java b/core/src/main/java/org/apache/druid/math/expr/ExprMacroTable.java index b597890825d6..1971caf7609d 100644 --- a/core/src/main/java/org/apache/druid/math/expr/ExprMacroTable.java +++ b/core/src/main/java/org/apache/druid/math/expr/ExprMacroTable.java @@ -84,7 +84,7 @@ public interface ExprMacro Expr apply(List args); } - public static abstract class BaseSingleScalarArgumentExprMacroFunctionExpr implements Expr + public abstract static class BaseSingleScalarArgumentExprMacroFunctionExpr implements Expr { protected final Expr arg; @@ -111,7 +111,7 @@ public BindingDetails analyzeInputs() } } - public static abstract class BaseScalarExprMacroFunctionExpr implements Expr + public abstract static class BaseScalarExprMacroFunctionExpr implements Expr { protected final List args; From 6d6186bc2dda7946a46025f93af719834bf16d10 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Mon, 3 Jun 2019 14:27:47 -0700 Subject: [PATCH 26/48] more test --- .../org/apache/druid/math/expr/Parser.java | 33 ++++++++++++------- .../druid/math/expr/ApplyFunctionTest.java | 10 ++++++ 2 files changed, 31 insertions(+), 12 deletions(-) diff --git a/core/src/main/java/org/apache/druid/math/expr/Parser.java b/core/src/main/java/org/apache/druid/math/expr/Parser.java index 3a47e62ceaeb..cd616f9262ef 100644 --- a/core/src/main/java/org/apache/druid/math/expr/Parser.java +++ b/core/src/main/java/org/apache/druid/math/expr/Parser.java @@ -260,9 +260,12 @@ private static ApplyFunctionExpr liftApplyLambda(ApplyFunctionExpr expr, List x + y, x) => cartesian_map((x, y) -> x + y, x, y) - // cartesian_map((x, y) -> x + y + z, x, y) => cartesian_map((x, y, z) -> x + y + z, x, y, z) - final List lambdaIds = new ArrayList<>(expr.lambdaExpr.getIdentifiers().size() + unappliedArgs.size()); + // map(x -> x + y, x) => + // cartesian_map((x, y) -> x + y, x, y) + // cartesian_map((x, y) -> x + y + z, x, y) => + // cartesian_map((x, y, z) -> x + y + z, x, y, z) + final List lambdaIds = + new ArrayList<>(expr.lambdaExpr.getIdentifiers().size() + unappliedArgs.size()); lambdaIds.addAll(expr.lambdaExpr.getIdentifierExprs()); lambdaIds.addAll(unappliedLambdaBindings); final LambdaExpr newLambda = new LambdaExpr(lambdaIds, expr.lambdaExpr.getExpr()); @@ -275,9 +278,12 @@ private static ApplyFunctionExpr liftApplyLambda(ApplyFunctionExpr expr, List x > y, x) => filter(x -> x, cartesian_map((x,y) -> x > y, x, y)) - // any(x -> x > y, x) => any(x -> x, cartesian_map((x, y) -> x > y, x, y)) - // all(x -> x > y, x) => all(x -> x, cartesian_map((x, y) -> x > y, x, y)) + // filter(x -> x > y, x) => + // filter(x -> x, cartesian_map((x,y) -> x > y, x, y)) + // any(x -> x > y, x) => + // any(x -> x, cartesian_map((x, y) -> x > y, x, y)) + // all(x -> x > y, x) => + // all(x -> x, cartesian_map((x, y) -> x > y, x, y)) ApplyFunction newArrayFn = new ApplyFunction.CartesianMapFunction(); IdentifierExpr identityExprIdentifier = new IdentifierExpr("_"); LambdaExpr identityExpr = new LambdaExpr(ImmutableList.of(identityExprIdentifier), identityExprIdentifier); @@ -286,11 +292,14 @@ private static ApplyFunctionExpr liftApplyLambda(ApplyFunctionExpr expr, List acc + x + y, x, acc) => cartesian_fold((x, y, acc) -> acc + x + y, x, y, acc) - // cartesian_fold((x, y, acc) -> acc + x + y + z, x, y, acc) => cartesian_fold((x, y, z, acc) -> acc + x + y + z, x, y, z, acc) + // fold((x, acc) -> acc + x + y, x, acc) => + // cartesian_fold((x, y, acc) -> acc + x + y, x, y, acc) + // cartesian_fold((x, y, acc) -> acc + x + y + z, x, y, acc) => + // cartesian_fold((x, y, z, acc) -> acc + x + y + z, x, y, z, acc) final List newFoldArgs = new ArrayList<>(expr.argsExpr.size() + unappliedArgs.size()); - final List newFoldLambdaIdentifiers = new ArrayList<>(expr.lambdaExpr.getIdentifiers().size() + unappliedArgs.size()); + final List newFoldLambdaIdentifiers = + new ArrayList<>(expr.lambdaExpr.getIdentifiers().size() + unappliedArgs.size()); final List existingFoldLambdaIdentifiers = expr.lambdaExpr.getIdentifierExprs(); // accumulator argument is last argument, slice it off when constructing new arg list and lambda args identifiers for (int i = 0; i < expr.argsExpr.size() - 1; i++) { @@ -316,10 +325,10 @@ private static ApplyFunctionExpr liftApplyLambda(ApplyFunctionExpr expr, List inconsistentIdentifierUsage = + final Set conflicted = Sets.intersection(bindingDetails.getScalarVariables(), bindingDetails.getArrayVariables()); - if (inconsistentIdentifierUsage.size() != 0) { - throw new RE("Invalid expression: %s; %s used as both scalar and array variables", expression, inconsistentIdentifierUsage); + if (conflicted.size() != 0) { + throw new RE("Invalid expression: %s; %s used as both scalar and array variables", expression, conflicted); } } diff --git a/core/src/test/java/org/apache/druid/math/expr/ApplyFunctionTest.java b/core/src/test/java/org/apache/druid/math/expr/ApplyFunctionTest.java index 30dcbe41293e..06f1520b0ec4 100644 --- a/core/src/test/java/org/apache/druid/math/expr/ApplyFunctionTest.java +++ b/core/src/test/java/org/apache/druid/math/expr/ApplyFunctionTest.java @@ -122,6 +122,16 @@ public void testAllMatch() assertExpr("all(x -> x, map(x -> x > 1, [1, 2, 3, 4]))", "false"); } + @Test + public void testScoping() + { + assertExpr("map(b -> b + 1, b)", new Long[]{2L, 3L, 4L, 5L, 6L}); + assertExpr("fold((b, acc) -> acc + b, map(b -> b + 1, b), 0)", 20L); + assertExpr("fold((b, acc) -> acc + b, map(b -> b + 1, b), fold((b, acc) -> acc + b, map(b -> b + 1, b), 0))", 40L); + assertExpr("fold((b, acc) -> acc + b, map(b -> b + 1, b), 0) + fold((b, acc) -> acc + b, map(b -> b + 1, b), 0)", 40L); + assertExpr("fold((b, acc) -> acc + b, map(b -> b + 1, b), fold((b, acc) -> acc + b, map(b -> b + 1, b), 0) + fold((b, acc) -> acc + b, map(b -> b + 1, b), 0))", 60L); + } + private void assertExpr(final String expression, final Object expectedResult) { final Expr expr = Parser.parse(expression, ExprMacroTable.nil()); From 47e4231a94ce54c0a2e731a0db309098e65a3fdc Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Mon, 3 Jun 2019 15:13:04 -0700 Subject: [PATCH 27/48] use Shuttle for Parser.flatten, javadoc, cleanup --- .../org/apache/druid/math/expr/Parser.java | 146 ++++++++++-------- 1 file changed, 81 insertions(+), 65 deletions(-) diff --git a/core/src/main/java/org/apache/druid/math/expr/Parser.java b/core/src/main/java/org/apache/druid/math/expr/Parser.java index cd616f9262ef..1aa58049a269 100644 --- a/core/src/main/java/org/apache/druid/math/expr/Parser.java +++ b/core/src/main/java/org/apache/druid/math/expr/Parser.java @@ -60,7 +60,7 @@ public class Parser functionMap.put(StringUtils.toLowerCase(function.name()), function); } catch (Exception e) { - log.info("failed to instantiate " + clazz.getName() + ".. ignoring", e); + log.error(e, "failed to instantiate %s.. ignoring", clazz.getName()); } } } @@ -74,23 +74,36 @@ public class Parser applyFunctionMap.put(StringUtils.toLowerCase(function.name()), function); } catch (Exception e) { - log.info("failed to instantiate " + clazz.getName() + ".. ignoring", e); + log.error(e, "failed to instantiate %s.. ignoring", clazz.getName()); } } } APPLY_FUNCTIONS = ImmutableMap.copyOf(applyFunctionMap); } + /** + * Get {@link Function} by {@link Function#name()} + */ public static Function getFunction(String name) { return FUNCTIONS.get(StringUtils.toLowerCase(name)); } + /** + * Get {@link ApplyFunction} by {@link ApplyFunction#name()} + */ public static ApplyFunction getApplyFunction(String name) { return APPLY_FUNCTIONS.get(StringUtils.toLowerCase(name)); } + /** + * Parse a string into a flattened {@link Expr}. There is some overhead to this, and these objects are all immutable, + * so re-use instead of re-creating whenever possible. + * @param in expression to parse + * @param macroTable additional extensions to expression language + * @return + */ public static Expr parse(String in, ExprMacroTable macroTable) { return parse(in, macroTable, true); @@ -110,50 +123,48 @@ static Expr parse(String in, ExprMacroTable macroTable, boolean withFlatten) return withFlatten ? flatten(listener.getAST()) : listener.getAST(); } + /** + * Flatten an {@link Expr}, evaluating expressions on constants where possible to simplify the {@link Expr}. + */ public static Expr flatten(Expr expr) { - if (expr instanceof BinaryOpExprBase) { - BinaryOpExprBase binary = (BinaryOpExprBase) expr; - Expr left = flatten(binary.left); - Expr right = flatten(binary.right); - if (Evals.isAllConstants(left, right)) { - expr = expr.eval(null).toExpr(); - } else if (left != binary.left || right != binary.right) { - return Evals.binaryOp(binary, left, right); - } - } else if (expr instanceof UnaryExpr) { - UnaryExpr unary = (UnaryExpr) expr; - Expr eval = flatten(unary.expr); - if (eval instanceof ConstantExpr) { - expr = expr.eval(null).toExpr(); - } else if (eval != unary.expr) { - if (expr instanceof UnaryMinusExpr) { - expr = new UnaryMinusExpr(eval); - } else if (expr instanceof UnaryNotExpr) { - expr = new UnaryNotExpr(eval); - } else { - expr = unary; // unknown type.. + return expr.visit(childExpr -> { + if (childExpr instanceof BinaryOpExprBase) { + BinaryOpExprBase binary = (BinaryOpExprBase) childExpr; + if (Evals.isAllConstants(binary.left, binary.right)) { + return childExpr.eval(null).toExpr(); + } + } else if (childExpr instanceof UnaryExpr) { + UnaryExpr unary = (UnaryExpr) childExpr; + + if (unary.expr instanceof ConstantExpr) { + return childExpr.eval(null).toExpr(); + } + } else if (childExpr instanceof FunctionExpr) { + FunctionExpr functionExpr = (FunctionExpr) childExpr; + List args = functionExpr.args; + if (Evals.isAllConstants(args)) { + return childExpr.eval(null).toExpr(); + } + } else if (childExpr instanceof ApplyFunctionExpr) { + ApplyFunctionExpr applyFunctionExpr = (ApplyFunctionExpr) childExpr; + List args = applyFunctionExpr.argsExpr; + if (Evals.isAllConstants(args)) { + return childExpr.eval(null).toExpr(); } } - } else if (expr instanceof FunctionExpr) { - FunctionExpr functionExpr = (FunctionExpr) expr; - List args = functionExpr.args; - boolean flattened = false; - List flattening = Lists.newArrayListWithCapacity(args.size()); - for (Expr arg : args) { - Expr flatten = flatten(arg); - flattened |= flatten != arg; - flattening.add(flatten); - } - if (Evals.isAllConstants(flattening)) { - expr = expr.eval(null).toExpr(); - } else if (flattened) { - expr = new FunctionExpr(functionExpr.function, functionExpr.name, flattening); - } - } - return expr; + return childExpr; + }); } + /** + * Applies a transformation to an {@link Expr} given a list of known (or uknown) multi-value input columns that are + * used in a scalar manner, walking the {@link Expr} tree and lifting array variables into the {@link LambdaExpr} of + * {@link ApplyFunctionExpr} and transforming the arguments of {@link FunctionExpr} {@link Function.ArrayFunction} + * @param expr expression to visit and rewrite + * @param unapplied + * @return + */ public static Expr applyUnappliedIdentifiers(Expr expr, List unapplied) { Preconditions.checkArgument(unapplied.size() > 0); @@ -164,34 +175,29 @@ public static Expr applyUnappliedIdentifiers(Expr expr, List unapplied) // any unapplied identifiers that are inside a lambda expression need that lambda expression to be rewritten Expr newExpr = expr.visit( - new Expr.Shuttle() - { - @Override - public Expr visit(Expr expr) - { - if (expr instanceof ApplyFunctionExpr) { - // try to lift unapplied arguments into the apply function lambda - return liftApplyLambda((ApplyFunctionExpr) expr, unapplied); - } else if (expr instanceof FunctionExpr && ((FunctionExpr) expr).function instanceof Function.ArrayFunction) { - // check array function arguments for unapplied identifiers to transform if necessary - FunctionExpr fnExpr = (FunctionExpr) expr; - Function.ArrayFunction arrayFn = (Function.ArrayFunction) fnExpr.function; - Set arrayInputs = arrayFn.getArrayInputs(fnExpr.args); - List newArgs = new ArrayList<>(); - for (Expr arg : fnExpr.args) { - if (arg.getIdentifierIfIdentifier() == null && arrayInputs.contains(arg)) { - Expr newArg = applyUnappliedIdentifiers(arg, unapplied); - newArgs.add(newArg); - } else { - newArgs.add(arg); - } + childExpr -> { + if (childExpr instanceof ApplyFunctionExpr) { + // try to lift unapplied arguments into the apply function lambda + return liftApplyLambda((ApplyFunctionExpr) childExpr, unapplied); + } else if (childExpr instanceof FunctionExpr && ((FunctionExpr) childExpr).function instanceof Function.ArrayFunction) { + // check array function arguments for unapplied identifiers to transform if necessary + FunctionExpr fnExpr = (FunctionExpr) childExpr; + Function.ArrayFunction arrayFn = (Function.ArrayFunction) fnExpr.function; + Set arrayInputs = arrayFn.getArrayInputs(fnExpr.args); + List newArgs = new ArrayList<>(); + for (Expr arg : fnExpr.args) { + if (arg.getIdentifierIfIdentifier() == null && arrayInputs.contains(arg)) { + Expr newArg = applyUnappliedIdentifiers(arg, unapplied); + newArgs.add(newArg); + } else { + newArgs.add(arg); } - - FunctionExpr newFnExpr = new FunctionExpr(arrayFn, arrayFn.name(), newArgs); - return newFnExpr; } - return expr; + + FunctionExpr newFnExpr = new FunctionExpr(arrayFn, arrayFn.name(), newArgs); + return newFnExpr; } + return childExpr; } ); @@ -323,6 +329,9 @@ private static ApplyFunctionExpr liftApplyLambda(ApplyFunctionExpr expr, List conflicted = @@ -332,11 +341,18 @@ public static void validateExpr(Expr expression, Expr.BindingDetails bindingDeta } } + /** + * Create {@link Expr.ObjectBinding} backed by {@link Map} to provide values for identifiers to evaluate {@link Expr} + */ public static Expr.ObjectBinding withMap(final Map bindings) { return bindings::get; } + /** + * Create {@link Expr.ObjectBinding} backed by map of {@link Supplier} to provide values for identifiers to evaluate + * {@link Expr} + */ public static Expr.ObjectBinding withSuppliers(final Map> bindings) { return (String name) -> { From 7b062ee54412464ea603a55586085572fb151d1a Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Mon, 3 Jun 2019 17:42:37 -0700 Subject: [PATCH 28/48] fixes and more tests --- .../apache/druid/math/expr/ApplyFunction.java | 39 +++--- .../java/org/apache/druid/math/expr/Expr.java | 20 +++ .../org/apache/druid/math/expr/ExprEval.java | 8 +- .../org/apache/druid/math/expr/ExprType.java | 8 +- .../org/apache/druid/math/expr/Parser.java | 35 +++--- .../apache/druid/math/expr/ParserTest.java | 116 +++++++++++++++++- .../segment/virtual/ExpressionSelectors.java | 2 +- ...ueStringExpressionColumnValueSelector.java | 2 +- 8 files changed, 189 insertions(+), 41 deletions(-) diff --git a/core/src/main/java/org/apache/druid/math/expr/ApplyFunction.java b/core/src/main/java/org/apache/druid/math/expr/ApplyFunction.java index 47833a999f49..dd5d15a554d7 100644 --- a/core/src/main/java/org/apache/druid/math/expr/ApplyFunction.java +++ b/core/src/main/java/org/apache/druid/math/expr/ApplyFunction.java @@ -29,6 +29,7 @@ import javax.annotation.Nullable; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -51,32 +52,28 @@ ExprEval applyMap(LambdaExpr expr, int length, IndexableMapLambdaObjectBinding b Long[] longsOut = null; Double[] doublesOut = null; - ExprType outputType = null; - Object out = null; + ExprType elementType = null; for (int i = 0; i < length; i++) { ExprEval evaluated = expr.eval(bindings.withIndex(i)); - if (outputType == null) { - outputType = evaluated.type(); - switch (outputType) { + if (elementType == null) { + elementType = evaluated.type(); + switch (elementType) { case STRING: stringsOut = new String[length]; - out = stringsOut; break; case LONG: longsOut = new Long[length]; - out = longsOut; break; case DOUBLE: doublesOut = new Double[length]; - out = doublesOut; break; default: - throw new RE("Unhandled map function output type [%s]", outputType); + throw new RE("Unhandled map function output type [%s]", elementType); } } - switch (outputType) { + switch (elementType) { case STRING: stringsOut[i] = evaluated.asString(); break; @@ -88,7 +85,17 @@ ExprEval applyMap(LambdaExpr expr, int length, IndexableMapLambdaObjectBinding b break; } } - return ExprEval.bestEffortOf(out); + + switch (elementType) { + case STRING: + return ExprEval.ofStringArray(stringsOut); + case LONG: + return ExprEval.ofLongArray(longsOut); + case DOUBLE: + return ExprEval.ofDoubleArray(doublesOut); + default: + throw new RE("Unhandled map function output type [%s]", elementType); + } } } @@ -427,7 +434,7 @@ class SettableLambdaBinding implements Expr.ObjectBinding for (String lambdaIdentifier : expr.getIdentifiers()) { lambdaBindings.put(lambdaIdentifier, null); } - this.bindings = bindings; + this.bindings = bindings != null ? bindings : Collections.emptyMap()::get; } @Nullable @@ -463,7 +470,7 @@ class MapLambdaBinding implements IndexableMapLambdaObjectBinding { this.lambdaIdentifier = expr.getIdentifier(); this.arrayValues = arrayValues; - this.bindings = bindings; + this.bindings = bindings != null ? bindings : Collections.emptyMap()::get; } @Nullable @@ -500,7 +507,7 @@ class CartesianMapLambdaBinding implements IndexableMapLambdaObjectBinding lambdaIdentifiers.put(ids.get(i), i); } - this.bindings = bindings; + this.bindings = bindings != null ? bindings : Collections.emptyMap()::get; } @Nullable @@ -542,7 +549,7 @@ class FoldLambdaBinding implements IndexableFoldLambdaBinding this.accumulatorIdentifier = ids.get(1); this.arrayValues = arrayValues; this.accumulatorValue = initialAccumulator; - this.bindings = bindings; + this.bindings = bindings != null ? bindings : Collections.emptyMap()::get; } @Nullable @@ -584,7 +591,7 @@ class CartesianFoldLambdaBinding implements IndexableFoldLambdaBinding lambdaIdentifiers.put(ids.get(i), i); } this.accumulatorIdentifier = ids.get(ids.size() - 1); - this.bindings = bindings; + this.bindings = bindings != null ? bindings : Collections.emptyMap()::get; this.accumulatorValue = accumulatorValue; } diff --git a/core/src/main/java/org/apache/druid/math/expr/Expr.java b/core/src/main/java/org/apache/druid/math/expr/Expr.java index 46b09e895c66..4c6da80fe977 100644 --- a/core/src/main/java/org/apache/druid/math/expr/Expr.java +++ b/core/src/main/java/org/apache/druid/math/expr/Expr.java @@ -102,6 +102,9 @@ default String getIdentifierIfIdentifier() */ Expr visit(Shuttle shuttle); + /** + * Examing the usage of {@link IdentifierExpr} children of an {@link Expr}, constructing a {@link BindingDetails} + */ BindingDetails analyzeInputs(); /** @@ -140,6 +143,11 @@ interface Shuttle Expr visit(Expr expr); } + /** + * Information about the context in which {@link IdentifierExpr} are used in a greater {@link Expr}, listing + * the 'free variables' (total set of required input columns or values) and distinguishing between which identifiers + * are used as scalar values and which are used as array values. + */ class BindingDetails { private final Set freeVariables; @@ -163,21 +171,33 @@ public BindingDetails(Set freeVariables, Set scalarVariables, Se this.arrayVariables = arrayVariables; } + /** + * Get the list of required column inputs to evaluate an expression + */ public List getRequiredColumns() { return new ArrayList<>(freeVariables); } + /** + * Total set of 'free' identifiers of an {@link Expr}, that are not supplied by a {@link LambdaExpr} binding + */ public Set getFreeVariables() { return freeVariables; } + /** + * Set of identifiers which are used with scalar operators and functions + */ public Set getScalarVariables() { return scalarVariables; } + /** + * Set of identifiers which are used with array typed functions and apply functions. + */ public Set getArrayVariables() { return arrayVariables; diff --git a/core/src/main/java/org/apache/druid/math/expr/ExprEval.java b/core/src/main/java/org/apache/druid/math/expr/ExprEval.java index 54e766b90769..1aa855738808 100644 --- a/core/src/main/java/org/apache/druid/math/expr/ExprEval.java +++ b/core/src/main/java/org/apache/druid/math/expr/ExprEval.java @@ -805,7 +805,13 @@ private Long[] computeLongs() } return Arrays.stream(value).map(value -> { Long lv = GuavaUtils.tryParseLong(value); - return lv != null ? lv : Doubles.tryParse(value).longValue(); + if (lv == null) { + Double d = Doubles.tryParse(value); + if (d != null) { + lv = d.longValue(); + } + } + return lv; }).toArray(Long[]::new); } diff --git a/core/src/main/java/org/apache/druid/math/expr/ExprType.java b/core/src/main/java/org/apache/druid/math/expr/ExprType.java index a648e07156c1..0bc1573bef56 100644 --- a/core/src/main/java/org/apache/druid/math/expr/ExprType.java +++ b/core/src/main/java/org/apache/druid/math/expr/ExprType.java @@ -20,8 +20,14 @@ package org.apache.druid.math.expr; /** + * Base 'value' types of Druid expression language, all {@link Expr} must evaluate to one of these types. */ public enum ExprType { - DOUBLE, LONG, STRING, DOUBLE_ARRAY, LONG_ARRAY, STRING_ARRAY + DOUBLE, + LONG, + STRING, + DOUBLE_ARRAY, + LONG_ARRAY, + STRING_ARRAY } diff --git a/core/src/main/java/org/apache/druid/math/expr/Parser.java b/core/src/main/java/org/apache/druid/math/expr/Parser.java index 1aa58049a269..348791d485f6 100644 --- a/core/src/main/java/org/apache/druid/math/expr/Parser.java +++ b/core/src/main/java/org/apache/druid/math/expr/Parser.java @@ -25,7 +25,6 @@ import com.google.common.base.Supplier; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; -import com.google.common.collect.Lists; import com.google.common.collect.Sets; import org.antlr.v4.runtime.ANTLRInputStream; import org.antlr.v4.runtime.CommonTokenStream; @@ -150,7 +149,9 @@ public static Expr flatten(Expr expr) ApplyFunctionExpr applyFunctionExpr = (ApplyFunctionExpr) childExpr; List args = applyFunctionExpr.argsExpr; if (Evals.isAllConstants(args)) { - return childExpr.eval(null).toExpr(); + if (applyFunctionExpr.analyzeInputs().getFreeVariables().size() == 0) { + return childExpr.eval(null).toExpr(); + } } } return childExpr; @@ -162,12 +163,17 @@ public static Expr flatten(Expr expr) * used in a scalar manner, walking the {@link Expr} tree and lifting array variables into the {@link LambdaExpr} of * {@link ApplyFunctionExpr} and transforming the arguments of {@link FunctionExpr} {@link Function.ArrayFunction} * @param expr expression to visit and rewrite - * @param unapplied + * @param toApply * @return */ - public static Expr applyUnappliedIdentifiers(Expr expr, List unapplied) + public static Expr applyUnappliedIdentifiers(Expr expr, Expr.BindingDetails bindingDetails, List toApply) { - Preconditions.checkArgument(unapplied.size() > 0); + if (toApply.size() == 0) { + return expr; + } + List unapplied = toApply.stream() + .filter(x -> bindingDetails.getFreeVariables().contains(x)) + .collect(Collectors.toList()); ApplyFunction fn; final LambdaExpr lambdaExpr; @@ -179,22 +185,21 @@ public static Expr applyUnappliedIdentifiers(Expr expr, List unapplied) if (childExpr instanceof ApplyFunctionExpr) { // try to lift unapplied arguments into the apply function lambda return liftApplyLambda((ApplyFunctionExpr) childExpr, unapplied); - } else if (childExpr instanceof FunctionExpr && ((FunctionExpr) childExpr).function instanceof Function.ArrayFunction) { + } else if (childExpr instanceof FunctionExpr) { // check array function arguments for unapplied identifiers to transform if necessary FunctionExpr fnExpr = (FunctionExpr) childExpr; - Function.ArrayFunction arrayFn = (Function.ArrayFunction) fnExpr.function; - Set arrayInputs = arrayFn.getArrayInputs(fnExpr.args); + Set arrayInputs = fnExpr.function.getArrayInputs(fnExpr.args); List newArgs = new ArrayList<>(); for (Expr arg : fnExpr.args) { if (arg.getIdentifierIfIdentifier() == null && arrayInputs.contains(arg)) { - Expr newArg = applyUnappliedIdentifiers(arg, unapplied); + Expr newArg = applyUnappliedIdentifiers(arg, bindingDetails, unapplied); newArgs.add(newArg); } else { newArgs.add(arg); } } - FunctionExpr newFnExpr = new FunctionExpr(arrayFn, arrayFn.name(), newArgs); + FunctionExpr newFnExpr = new FunctionExpr(fnExpr.function, fnExpr.function.name(), newArgs); return newFnExpr; } return childExpr; @@ -216,7 +221,7 @@ public static Expr applyUnappliedIdentifiers(Expr expr, List unapplied) if (remainingUnappliedArgs.size() == 1) { fn = new ApplyFunction.MapFunction(); IdentifierExpr lambdaArg = new IdentifierExpr(remainingUnappliedArgs.iterator().next()); - lambdaExpr = new LambdaExpr(ImmutableList.of(lambdaArg), expr); + lambdaExpr = new LambdaExpr(ImmutableList.of(lambdaArg), newExpr); args = ImmutableList.of(lambdaArg); } else { fn = new ApplyFunction.CartesianMapFunction(); @@ -227,7 +232,7 @@ public static Expr applyUnappliedIdentifiers(Expr expr, List unapplied) identifiers.add(arg); args.add(arg); } - lambdaExpr = new LambdaExpr(identifiers, expr); + lambdaExpr = new LambdaExpr(identifiers, newExpr); } Expr magic = new ApplyFunctionExpr(fn, fn.name(), lambdaExpr, args); @@ -303,9 +308,9 @@ private static ApplyFunctionExpr liftApplyLambda(ApplyFunctionExpr expr, List acc + x + y + z, x, y, acc) => // cartesian_fold((x, y, z, acc) -> acc + x + y + z, x, y, z, acc) - final List newFoldArgs = new ArrayList<>(expr.argsExpr.size() + unappliedArgs.size()); + final List newFoldArgs = new ArrayList<>(expr.argsExpr.size() + unappliedLambdaBindings.size()); final List newFoldLambdaIdentifiers = - new ArrayList<>(expr.lambdaExpr.getIdentifiers().size() + unappliedArgs.size()); + new ArrayList<>(expr.lambdaExpr.getIdentifiers().size() + unappliedLambdaBindings.size()); final List existingFoldLambdaIdentifiers = expr.lambdaExpr.getIdentifierExprs(); // accumulator argument is last argument, slice it off when constructing new arg list and lambda args identifiers for (int i = 0; i < expr.argsExpr.size() - 1; i++) { @@ -316,7 +321,7 @@ private static ApplyFunctionExpr liftApplyLambda(ApplyFunctionExpr expr, List x + 1, [1, 2, 3, 4])", "(map ([x] -> (+ x 1)), [[1, 2, 3, 4]])", "[2, 3, 4, 5]"); + validateFlatten( + "map((x) -> x + z, [1, 2, 3, 4])", + "(map ([x] -> (+ x z)), [[1, 2, 3, 4]])", + "(map ([x] -> (+ x z)), [[1, 2, 3, 4]])" + ); + } + + @Test + public void testApplyUnapplied() + { + validateApplyUnapplied("x + 1", "(+ x 1)", "(+ x 1)", ImmutableList.of()); + validateApplyUnapplied("x + y", "(+ x y)", "(map ([x] -> (+ x y)), [x])", ImmutableList.of("x")); + validateApplyUnapplied( + "x + y", + "(+ x y)", + "(cartesian_map ([x, y] -> (+ x y)), [x, y])", + ImmutableList.of("x", "y") + ); + + validateApplyUnapplied( + "map(x -> x + y, x)", + "(map ([x] -> (+ x y)), [x])", + "(cartesian_map ([x, y] -> (+ x y)), [x, y])", + ImmutableList.of("y") + ); + validateApplyUnapplied( + "fold((x, acc) -> acc + x + y, x, 0)", + "(fold ([x, acc] -> (+ (+ acc x) y)), [x, 0])", + "(cartesian_fold ([x, y, acc] -> (+ (+ acc x) y)), [x, y, 0])", + ImmutableList.of("y") + ); + validateApplyUnapplied( + "z + fold((x, acc) -> acc + x + y, x, 0)", + "(+ z (fold ([x, acc] -> (+ (+ acc x) y)), [x, 0]))", + "(+ z (cartesian_fold ([x, y, acc] -> (+ (+ acc x) y)), [x, y, 0]))", + ImmutableList.of("y") + ); + validateApplyUnapplied( + "z + fold((x, acc) -> acc + x + y, x, 0)", + "(+ z (fold ([x, acc] -> (+ (+ acc x) y)), [x, 0]))", + "(map ([z] -> (+ z (cartesian_fold ([x, y, acc] -> (+ (+ acc x) y)), [x, y, 0]))), [z])", + ImmutableList.of("y", "z") + ); + validateApplyUnapplied( + "array_to_string(concat(x, 'hello'), ',')", + "(array_to_string [(concat [x, hello]), ,])", + "(array_to_string [(map ([x] -> (concat [x, hello])), [x]), ,])", + ImmutableList.of("x", "y") + ); + validateApplyUnapplied( + "cast(x, 'LONG')", + "(cast [x, LONG])", + "(map ([x] -> (cast [x, LONG])), [x])", + ImmutableList.of("x") + ); + validateApplyUnapplied( + "cartesian_map((x,y) -> x + y, x, y)", + "(cartesian_map ([x, y] -> (+ x y)), [x, y])", + "(cartesian_map ([x, y] -> (+ x y)), [x, y])", + ImmutableList.of("y") + ); + validateApplyUnapplied( + "cast(x, 'LONG_ARRAY')", + "(cast [x, LONG_ARRAY])", + "(cast [x, LONG_ARRAY])", + ImmutableList.of("x") + ); } @@ -328,6 +419,19 @@ private void validateParser( Assert.assertEquals(expression, arrays, deets.getArrayVariables()); } + private void validateApplyUnapplied( + String expression, + String unapplied, + String applied, + List identifiers + ) + { + final Expr parsed = Parser.parse(expression, ExprMacroTable.nil()); + final Expr transformed = Parser.applyUnappliedIdentifiers(parsed, parsed.analyzeInputs(), identifiers); + Assert.assertEquals(expression, unapplied, parsed.toString()); + Assert.assertEquals(applied, applied, transformed.toString()); + } + private void validateConstantExpression(String expression, Object expected) { Assert.assertEquals( diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java index eb3c5bfd1891..70a8edf783c2 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java @@ -189,7 +189,7 @@ public static ColumnValueSelector makeExprEvalSelector( .collect(Collectors.toList()); final Expr finalExpr; if (needsApplied.size() > 0) { - finalExpr = Parser.applyUnappliedIdentifiers(expression, needsApplied); + finalExpr = Parser.applyUnappliedIdentifiers(expression, exprDetails, needsApplied); } else { finalExpr = expression; } diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/OpportunisticMultiValueStringExpressionColumnValueSelector.java b/processing/src/main/java/org/apache/druid/segment/virtual/OpportunisticMultiValueStringExpressionColumnValueSelector.java index 30d0e063e05e..e462bb4cd071 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/OpportunisticMultiValueStringExpressionColumnValueSelector.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/OpportunisticMultiValueStringExpressionColumnValueSelector.java @@ -70,7 +70,7 @@ public ExprEval getObject() if (transformedCache.containsKey(key)) { return transformedCache.get(key).eval(bindings); } - Expr transformed = Parser.applyUnappliedIdentifiers(expression, arrayBindings); + Expr transformed = Parser.applyUnappliedIdentifiers(expression, baseExprBindingDetails, arrayBindings); transformedCache.put(key, transformed); return transformed.eval(bindings); } From b832cf9eea8b7987cc4a95c66320131c2bceaac0 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Mon, 3 Jun 2019 18:19:44 -0700 Subject: [PATCH 29/48] unused import --- core/src/main/java/org/apache/druid/math/expr/Parser.java | 1 - 1 file changed, 1 deletion(-) diff --git a/core/src/main/java/org/apache/druid/math/expr/Parser.java b/core/src/main/java/org/apache/druid/math/expr/Parser.java index 348791d485f6..be03395db08b 100644 --- a/core/src/main/java/org/apache/druid/math/expr/Parser.java +++ b/core/src/main/java/org/apache/druid/math/expr/Parser.java @@ -21,7 +21,6 @@ import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; import com.google.common.base.Supplier; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; From d046e3d4f9820013063e2ab6390c2ddc0fa8c85b Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Tue, 4 Jun 2019 01:25:37 -0700 Subject: [PATCH 30/48] fixes --- .../query/expression/TimestampCeilExprMacro.java | 10 +++++----- .../query/expression/TimestampFloorExprMacro.java | 13 +++++++------ .../query/expression/TimestampShiftExprMacro.java | 10 +++++----- 3 files changed, 17 insertions(+), 16 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/query/expression/TimestampCeilExprMacro.java b/processing/src/main/java/org/apache/druid/query/expression/TimestampCeilExprMacro.java index a1044ce2ee28..6b32dd365dcf 100644 --- a/processing/src/main/java/org/apache/druid/query/expression/TimestampCeilExprMacro.java +++ b/processing/src/main/java/org/apache/druid/query/expression/TimestampCeilExprMacro.java @@ -54,13 +54,13 @@ public Expr apply(final List args) } } - private static class TimestampCeilExpr extends ExprMacroTable.BaseSingleScalarArgumentExprMacroFunctionExpr + private static class TimestampCeilExpr extends ExprMacroTable.BaseScalarExprMacroFunctionExpr { private final Granularity granularity; TimestampCeilExpr(final List args) { - super(args.get(0)); + super(args); this.granularity = getGranularity(args, ExprUtils.nilBindings()); } @@ -73,14 +73,14 @@ public ExprEval eval(final ObjectBinding bindings) // Return null if the argument if null. return ExprEval.of(null); } - return ExprEval.of(granularity.bucketEnd(DateTimes.utc(arg.eval(bindings).asLong())).getMillis()); + return ExprEval.of(granularity.bucketEnd(DateTimes.utc(args.get(0).eval(bindings).asLong())).getMillis()); } @Override public Expr visit(Shuttle shuttle) { - Expr newArg = arg.visit(shuttle); - return shuttle.visit(new TimestampCeilExpr(ImmutableList.of(newArg))); + List newArgs = args.stream().map(x -> x.visit(shuttle)).collect(Collectors.toList()); + return shuttle.visit(new TimestampCeilExpr(newArgs)); } } diff --git a/processing/src/main/java/org/apache/druid/query/expression/TimestampFloorExprMacro.java b/processing/src/main/java/org/apache/druid/query/expression/TimestampFloorExprMacro.java index 216dafc526a6..3d6918b6cfde 100644 --- a/processing/src/main/java/org/apache/druid/query/expression/TimestampFloorExprMacro.java +++ b/processing/src/main/java/org/apache/druid/query/expression/TimestampFloorExprMacro.java @@ -63,13 +63,13 @@ private static PeriodGranularity computeGranularity(final List args, final ); } - public static class TimestampFloorExpr extends ExprMacroTable.BaseSingleScalarArgumentExprMacroFunctionExpr + public static class TimestampFloorExpr extends ExprMacroTable.BaseScalarExprMacroFunctionExpr { private final PeriodGranularity granularity; TimestampFloorExpr(final List args) { - super(args.get(0)); + super(args); this.granularity = computeGranularity(args, ExprUtils.nilBindings()); } @@ -78,7 +78,7 @@ public static class TimestampFloorExpr extends ExprMacroTable.BaseSingleScalarAr */ public Expr getArg() { - return arg; + return args.get(0); } /** @@ -93,7 +93,7 @@ public PeriodGranularity getGranularity() @Override public ExprEval eval(final ObjectBinding bindings) { - ExprEval eval = arg.eval(bindings); + ExprEval eval = args.get(0).eval(bindings); if (eval.isNumericNull()) { // Return null if the argument if null. return ExprEval.of(null); @@ -104,8 +104,9 @@ public ExprEval eval(final ObjectBinding bindings) @Override public Expr visit(Shuttle shuttle) { - Expr newArg = arg.visit(shuttle); - return shuttle.visit(new TimestampFloorExpr(ImmutableList.of(newArg))); + List newArgs = args.stream().map(x -> x.visit(shuttle)).collect(Collectors.toList()); + + return shuttle.visit(new TimestampFloorExpr(newArgs)); } } diff --git a/processing/src/main/java/org/apache/druid/query/expression/TimestampShiftExprMacro.java b/processing/src/main/java/org/apache/druid/query/expression/TimestampShiftExprMacro.java index 178bc98923db..2716334c69ce 100644 --- a/processing/src/main/java/org/apache/druid/query/expression/TimestampShiftExprMacro.java +++ b/processing/src/main/java/org/apache/druid/query/expression/TimestampShiftExprMacro.java @@ -72,7 +72,7 @@ private static int getStep(final List args, final Expr.ObjectBinding bindi return args.get(2).eval(bindings).asInt(); } - private static class TimestampShiftExpr extends ExprMacroTable.BaseSingleScalarArgumentExprMacroFunctionExpr + private static class TimestampShiftExpr extends ExprMacroTable.BaseScalarExprMacroFunctionExpr { private final Chronology chronology; private final Period period; @@ -80,7 +80,7 @@ private static class TimestampShiftExpr extends ExprMacroTable.BaseSingleScalarA TimestampShiftExpr(final List args) { - super(args.get(0)); + super(args); final PeriodGranularity granularity = getGranularity(args, ExprUtils.nilBindings()); period = granularity.getPeriod(); chronology = ISOChronology.getInstance(granularity.getTimeZone()); @@ -91,14 +91,14 @@ private static class TimestampShiftExpr extends ExprMacroTable.BaseSingleScalarA @Override public ExprEval eval(final ObjectBinding bindings) { - return ExprEval.of(chronology.add(period, arg.eval(bindings).asLong(), step)); + return ExprEval.of(chronology.add(period, args.get(0).eval(bindings).asLong(), step)); } @Override public Expr visit(Shuttle shuttle) { - Expr newArg = arg.visit(shuttle); - return shuttle.visit(new TimestampShiftExpr(ImmutableList.of(newArg))); + List newArgs = args.stream().map(x -> x.visit(shuttle)).collect(Collectors.toList()); + return shuttle.visit(new TimestampShiftExpr(newArgs)); } } From 0ea0f71ec326d5ceea9c1b6e008e2be2f8a62839 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Tue, 4 Jun 2019 19:25:13 -0700 Subject: [PATCH 31/48] javadocs, cleanup, refactors --- .../org/apache/druid/math/expr/antlr/Expr.g4 | 6 +- .../java/org/apache/druid/math/expr/Expr.java | 70 ++-- .../org/apache/druid/math/expr/ExprEval.java | 3 +- .../org/apache/druid/math/expr/Function.java | 336 ++++++++++-------- .../org/apache/druid/math/expr/Parser.java | 24 +- .../apache/druid/math/expr/ParserTest.java | 11 +- 6 files changed, 250 insertions(+), 200 deletions(-) diff --git a/core/src/main/antlr4/org/apache/druid/math/expr/antlr/Expr.g4 b/core/src/main/antlr4/org/apache/druid/math/expr/antlr/Expr.g4 index b2f98c23b296..d193e81d7906 100644 --- a/core/src/main/antlr4/org/apache/druid/math/expr/antlr/Expr.g4 +++ b/core/src/main/antlr4/org/apache/druid/math/expr/antlr/Expr.g4 @@ -29,9 +29,9 @@ expr : 'null' # null | DOUBLE # doubleExpr | LONG # longExpr | STRING # string - | '[' DOUBLE (',' DOUBLE)* ']' # doubleArray - | '[' LONG (',' LONG)* ']' # longArray - | '[' STRING (',' STRING)* ']' # stringArray + | '[' DOUBLE (',' DOUBLE)* ']' # doubleArray + | '[' LONG (',' LONG)* ']' # longArray + | '[' STRING (',' STRING)* ']' # stringArray | '[]' # emptyArray ; diff --git a/core/src/main/java/org/apache/druid/math/expr/Expr.java b/core/src/main/java/org/apache/druid/math/expr/Expr.java index 4c6da80fe977..3f2acbffca0e 100644 --- a/core/src/main/java/org/apache/druid/math/expr/Expr.java +++ b/core/src/main/java/org/apache/druid/math/expr/Expr.java @@ -20,6 +20,7 @@ package org.apache.druid.math.expr; import com.google.common.base.Preconditions; +import com.google.common.base.Suppliers; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Sets; import com.google.common.math.LongMath; @@ -29,6 +30,7 @@ import org.apache.druid.java.util.common.ISE; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.java.util.common.guava.Comparators; +import org.skife.jdbi.v2.sqlobject.Bind; import javax.annotation.Nonnull; import javax.annotation.Nullable; @@ -39,6 +41,7 @@ import java.util.List; import java.util.Objects; import java.util.Set; +import java.util.function.Supplier; import java.util.stream.Collectors; /** @@ -46,22 +49,20 @@ */ public interface Expr { + /** + * Indicates expression is a constant whose literal value can be extracted by {@link Expr#getLiteralValue()}, + * making evaluating with arguments and bindings unecessary + */ default boolean isLiteral() { // Overridden by things that are literals. return false; } - default boolean isArray() - { - // Overridden by things that are arrays. - return false; - } - /** * Returns the value of expr if expr is a literal, or throws an exception otherwise. * - * @return expr's literal value + * @return {@link ConstantExpr}'s literal value * * @throws IllegalStateException if expr is not a literal */ @@ -268,15 +269,6 @@ public BindingDetails analyzeInputs() } } -abstract class ConstantArrayExpr extends ConstantExpr -{ - @Override - public boolean isArray() - { - return true; - } -} - class LongExpr extends ConstantExpr { private final Long value; @@ -307,7 +299,7 @@ public ExprEval eval(ObjectBinding bindings) } } -class LongArrayExpr extends ConstantArrayExpr +class LongArrayExpr extends ConstantExpr { private final Long[] value; @@ -367,7 +359,7 @@ public ExprEval eval(ObjectBinding bindings) } } -class StringArrayExpr extends ConstantArrayExpr +class StringArrayExpr extends ConstantExpr { private final String[] value; @@ -427,7 +419,7 @@ public ExprEval eval(ObjectBinding bindings) } } -class DoubleArrayExpr extends ConstantArrayExpr +class DoubleArrayExpr extends ConstantExpr { private final Double[] value; @@ -655,6 +647,9 @@ class ApplyFunctionExpr implements Expr final String name; final LambdaExpr lambdaExpr; final List argsExpr; + final BindingDetails bindingDetails; + final BindingDetails lambdaBindingDetails; + final List argsBindingDetails; ApplyFunctionExpr(ApplyFunction function, String name, LambdaExpr expr, List args) { @@ -662,6 +657,27 @@ class ApplyFunctionExpr implements Expr this.name = name; this.argsExpr = args; this.lambdaExpr = expr; + + argsBindingDetails = new ArrayList<>(); + BindingDetails accumulator = new BindingDetails(); + for (Expr arg : argsExpr) { + BindingDetails argDetails = arg.analyzeInputs(); + argsBindingDetails.add(argDetails); + accumulator = accumulator.merge(argDetails); + } + + final Set arrayVariables = new HashSet<>(); + Set arrayArgs = function.getArrayInputs(argsExpr); + + for (Expr arg : arrayArgs) { + String s = arg.getIdentifierIfIdentifier(); + if (s != null) { + arrayVariables.add(s); + } + } + + lambdaBindingDetails = lambdaExpr.analyzeInputs(); + bindingDetails = accumulator.merge(lambdaBindingDetails).mergeWithArrays(arrayVariables); } @Override @@ -698,21 +714,7 @@ public Expr visit(Shuttle shuttle) @Override public BindingDetails analyzeInputs() { - BindingDetails accumulator = new BindingDetails(); - for (Expr arg : argsExpr) { - accumulator = accumulator.merge(arg.analyzeInputs()); - } - - final Set arrayVariables = new HashSet<>(); - Set arrayArgs = function.getArrayInputs(argsExpr); - - for (Expr arg : arrayArgs) { - String s = arg.getIdentifierIfIdentifier(); - if (s != null) { - arrayVariables.add(s); - } - } - return accumulator.merge(lambdaExpr.analyzeInputs()).mergeWithArrays(arrayVariables); + return bindingDetails; } } diff --git a/core/src/main/java/org/apache/druid/math/expr/ExprEval.java b/core/src/main/java/org/apache/druid/math/expr/ExprEval.java index 1aa855738808..3f243729db6b 100644 --- a/core/src/main/java/org/apache/druid/math/expr/ExprEval.java +++ b/core/src/main/java/org/apache/druid/math/expr/ExprEval.java @@ -29,6 +29,7 @@ import java.util.stream.Collectors; /** + * Generic result holder for evaluated {@link Expr} containing the value and {@link ExprType} of the value to allow */ public abstract class ExprEval { @@ -116,7 +117,7 @@ public static ExprEval bestEffortOf(@Nullable Object val) return new StringExprEval(val == null ? null : String.valueOf(val)); } - // Cached String values. Protected so they can be used by subclasses. + // Cached String values private boolean stringValueValid = false; private String stringValue; diff --git a/core/src/main/java/org/apache/druid/math/expr/Function.java b/core/src/main/java/org/apache/druid/math/expr/Function.java index cda1989bfeb7..8dd1614a7dc2 100644 --- a/core/src/main/java/org/apache/druid/math/expr/Function.java +++ b/core/src/main/java/org/apache/druid/math/expr/Function.java @@ -41,13 +41,21 @@ import java.util.stream.Stream; /** + * Base interface describing the mechanism used to evaluate a {@link FunctionExpr} + * * Do NOT remove "unused" members in this class. They are used by generated Antlr */ @SuppressWarnings("unused") interface Function { + /** + * Name of the function. + */ String name(); + /** + * Evaluate the function, given a list of arguments and a set of bindings to provide values for {@link IdentifierExpr}. + */ ExprEval apply(List args, Expr.ObjectBinding bindings); /** @@ -67,7 +75,10 @@ default Set getArrayInputs(List args) return Collections.emptySet(); } - abstract class SingleParam implements Function + /** + * Base class for a single variable input {@link Function} implementation + */ + abstract class UnivariateFunction implements Function { @Override public ExprEval apply(List args, Expr.ObjectBinding bindings) @@ -82,7 +93,10 @@ public ExprEval apply(List args, Expr.ObjectBinding bindings) protected abstract ExprEval eval(ExprEval param); } - abstract class DoubleParam implements Function + /** + * Base class for a 2 variable input {@link Function} implementation + */ + abstract class BivariateFunction implements Function { @Override public ExprEval apply(List args, Expr.ObjectBinding bindings) @@ -98,7 +112,11 @@ public ExprEval apply(List args, Expr.ObjectBinding bindings) protected abstract ExprEval eval(ExprEval x, ExprEval y); } - abstract class SingleParamMath extends SingleParam + /** + * Base class for a single variable input mathematical {@link Function}, with specialized 'eval' implementations that + * that operate on primitive number types + */ + abstract class UnivariateMathFunction extends UnivariateFunction { @Override protected final ExprEval eval(ExprEval param) @@ -125,7 +143,11 @@ protected ExprEval eval(double param) } } - abstract class DoubleParamMath extends DoubleParam + /** + * Base class for a 2 variable input mathematical {@link Function}, with specialized 'eval' implementations that + * operate on primitive number types + */ + abstract class BivariateMathFunction extends BivariateFunction { @Override protected final ExprEval eval(ExprEval x, ExprEval y) @@ -151,7 +173,11 @@ protected ExprEval eval(double x, double y) } } - abstract class DoubleParamString extends DoubleParam + /** + * Base class for a 2 variable input {@link Function} whose first argument is a {@link ExprType#STRING} and second + * argument is {@link ExprType#LONG} + */ + abstract class StringLongFunction extends BivariateFunction { @Override protected final ExprEval eval(ExprEval x, ExprEval y) @@ -168,6 +194,90 @@ protected final ExprEval eval(ExprEval x, ExprEval y) protected abstract ExprEval eval(String x, int y); } + /** + * {@link Function} that takes 1 array operand and 1 scalar operand + */ + abstract class ArrayScalarFunction implements Function + { + void validateArguments(List args) + { + if (args.size() != 2) { + throw new IAE("Function[%s] needs 2 argument", name()); + } + } + + @Override + public Set getScalarInputs(List args) + { + validateArguments(args); + return ImmutableSet.of(args.get(1)); + } + + @Override + public Set getArrayInputs(List args) + { + return ImmutableSet.of(args.get(0)); + } + + @Override + public ExprEval apply(List args, Expr.ObjectBinding bindings) + { + validateArguments(args); + final ExprEval arrayExpr = args.get(0).eval(bindings); + final ExprEval scalarExpr = args.get(1).eval(bindings); + if (arrayExpr.asArray() == null) { + return ExprEval.of(null); + } + return doApply(arrayExpr, scalarExpr); + } + + abstract ExprEval doApply(ExprEval arrayExpr, ExprEval scalarExpr); + } + + /** + * {@link Function} that takes 2 array operands + */ + abstract class ArraysFunction implements Function + { + void validateArguments(List args) + { + if (args.size() != 2) { + throw new IAE("Function[%s] needs 2 argument", name()); + } + } + + @Override + public Set getScalarInputs(List args) + { + return Collections.emptySet(); + } + + @Override + public Set getArrayInputs(List args) + { + validateArguments(args); + return ImmutableSet.copyOf(args); + } + + @Override + public ExprEval apply(List args, Expr.ObjectBinding bindings) + { + validateArguments(args); + final ExprEval arrayExpr1 = args.get(0).eval(bindings); + final ExprEval arrayExpr2 = args.get(1).eval(bindings); + + if (arrayExpr1.asArray() == null || arrayExpr2.asArray() == null) { + return ExprEval.of(null); + } + + return doApply(arrayExpr1, arrayExpr2); + } + + abstract ExprEval doApply(ExprEval lhsExpr, ExprEval rhsExpr); + } + + // ------------------------------ implementations ------------------------------ + class ParseLong implements Function { @Override @@ -223,7 +333,7 @@ public String name() @Override public ExprEval apply(List args, Expr.ObjectBinding bindings) { - if (args.size() >= 1) { + if (args.size() > 0) { throw new IAE("Function[%s] needs 0 argument", name()); } @@ -231,7 +341,7 @@ public ExprEval apply(List args, Expr.ObjectBinding bindings) } } - class Abs extends SingleParamMath + class Abs extends UnivariateMathFunction { @Override public String name() @@ -252,7 +362,7 @@ protected ExprEval eval(double param) } } - class Acos extends SingleParamMath + class Acos extends UnivariateMathFunction { @Override public String name() @@ -267,7 +377,7 @@ protected ExprEval eval(double param) } } - class Asin extends SingleParamMath + class Asin extends UnivariateMathFunction { @Override public String name() @@ -282,7 +392,7 @@ protected ExprEval eval(double param) } } - class Atan extends SingleParamMath + class Atan extends UnivariateMathFunction { @Override public String name() @@ -297,7 +407,7 @@ protected ExprEval eval(double param) } } - class Cbrt extends SingleParamMath + class Cbrt extends UnivariateMathFunction { @Override public String name() @@ -312,7 +422,7 @@ protected ExprEval eval(double param) } } - class Ceil extends SingleParamMath + class Ceil extends UnivariateMathFunction { @Override public String name() @@ -327,7 +437,7 @@ protected ExprEval eval(double param) } } - class Cos extends SingleParamMath + class Cos extends UnivariateMathFunction { @Override public String name() @@ -342,7 +452,7 @@ protected ExprEval eval(double param) } } - class Cosh extends SingleParamMath + class Cosh extends UnivariateMathFunction { @Override public String name() @@ -357,7 +467,7 @@ protected ExprEval eval(double param) } } - class Cot extends SingleParamMath + class Cot extends UnivariateMathFunction { @Override public String name() @@ -372,7 +482,7 @@ protected ExprEval eval(double param) } } - class Div extends DoubleParamMath + class Div extends BivariateMathFunction { @Override public String name() @@ -393,7 +503,7 @@ protected ExprEval eval(final double x, final double y) } } - class Exp extends SingleParamMath + class Exp extends UnivariateMathFunction { @Override public String name() @@ -408,7 +518,7 @@ protected ExprEval eval(double param) } } - class Expm1 extends SingleParamMath + class Expm1 extends UnivariateMathFunction { @Override public String name() @@ -423,7 +533,7 @@ protected ExprEval eval(double param) } } - class Floor extends SingleParamMath + class Floor extends UnivariateMathFunction { @Override public String name() @@ -438,7 +548,7 @@ protected ExprEval eval(double param) } } - class GetExponent extends SingleParamMath + class GetExponent extends UnivariateMathFunction { @Override public String name() @@ -453,7 +563,7 @@ protected ExprEval eval(double param) } } - class Log extends SingleParamMath + class Log extends UnivariateMathFunction { @Override public String name() @@ -468,7 +578,7 @@ protected ExprEval eval(double param) } } - class Log10 extends SingleParamMath + class Log10 extends UnivariateMathFunction { @Override public String name() @@ -483,7 +593,7 @@ protected ExprEval eval(double param) } } - class Log1p extends SingleParamMath + class Log1p extends UnivariateMathFunction { @Override public String name() @@ -498,7 +608,7 @@ protected ExprEval eval(double param) } } - class NextUp extends SingleParamMath + class NextUp extends UnivariateMathFunction { @Override public String name() @@ -513,7 +623,7 @@ protected ExprEval eval(double param) } } - class Rint extends SingleParamMath + class Rint extends UnivariateMathFunction { @Override public String name() @@ -576,7 +686,7 @@ private ExprEval eval(ExprEval param, int scale) } } - class Signum extends SingleParamMath + class Signum extends UnivariateMathFunction { @Override public String name() @@ -591,7 +701,7 @@ protected ExprEval eval(double param) } } - class Sin extends SingleParamMath + class Sin extends UnivariateMathFunction { @Override public String name() @@ -606,7 +716,7 @@ protected ExprEval eval(double param) } } - class Sinh extends SingleParamMath + class Sinh extends UnivariateMathFunction { @Override public String name() @@ -621,7 +731,7 @@ protected ExprEval eval(double param) } } - class Sqrt extends SingleParamMath + class Sqrt extends UnivariateMathFunction { @Override public String name() @@ -636,7 +746,7 @@ protected ExprEval eval(double param) } } - class Tan extends SingleParamMath + class Tan extends UnivariateMathFunction { @Override public String name() @@ -651,7 +761,7 @@ protected ExprEval eval(double param) } } - class Tanh extends SingleParamMath + class Tanh extends UnivariateMathFunction { @Override public String name() @@ -666,7 +776,7 @@ protected ExprEval eval(double param) } } - class ToDegrees extends SingleParamMath + class ToDegrees extends UnivariateMathFunction { @Override public String name() @@ -681,7 +791,7 @@ protected ExprEval eval(double param) } } - class ToRadians extends SingleParamMath + class ToRadians extends UnivariateMathFunction { @Override public String name() @@ -696,7 +806,7 @@ protected ExprEval eval(double param) } } - class Ulp extends SingleParamMath + class Ulp extends UnivariateMathFunction { @Override public String name() @@ -711,7 +821,7 @@ protected ExprEval eval(double param) } } - class Atan2 extends DoubleParamMath + class Atan2 extends BivariateMathFunction { @Override public String name() @@ -726,7 +836,7 @@ protected ExprEval eval(double y, double x) } } - class CopySign extends DoubleParamMath + class CopySign extends BivariateMathFunction { @Override public String name() @@ -741,7 +851,7 @@ protected ExprEval eval(double x, double y) } } - class Hypot extends DoubleParamMath + class Hypot extends BivariateMathFunction { @Override public String name() @@ -756,7 +866,7 @@ protected ExprEval eval(double x, double y) } } - class Remainder extends DoubleParamMath + class Remainder extends BivariateMathFunction { @Override public String name() @@ -771,7 +881,7 @@ protected ExprEval eval(double x, double y) } } - class Max extends DoubleParamMath + class Max extends BivariateMathFunction { @Override public String name() @@ -792,7 +902,7 @@ protected ExprEval eval(double x, double y) } } - class Min extends DoubleParamMath + class Min extends BivariateMathFunction { @Override public String name() @@ -813,7 +923,7 @@ protected ExprEval eval(double x, double y) } } - class NextAfter extends DoubleParamMath + class NextAfter extends BivariateMathFunction { @Override public String name() @@ -828,7 +938,7 @@ protected ExprEval eval(double x, double y) } } - class Pow extends DoubleParamMath + class Pow extends BivariateMathFunction { @Override public String name() @@ -843,7 +953,7 @@ protected ExprEval eval(double x, double y) } } - class Scalb extends DoubleParam + class Scalb extends BivariateFunction { @Override public String name() @@ -942,7 +1052,7 @@ public ExprEval apply(final List args, final Expr.ObjectBinding bindings) } } - class CastFunc extends DoubleParam + class CastFunc extends BivariateFunction { @Override public String name() @@ -1240,7 +1350,7 @@ public ExprEval apply(List args, Expr.ObjectBinding bindings) } } - class RightFunc extends DoubleParamString + class RightFunc extends StringLongFunction { @Override public String name() @@ -1262,7 +1372,7 @@ protected ExprEval eval(String x, int y) } } - class LeftFunc extends DoubleParamString + class LeftFunc extends StringLongFunction { @Override public String name() @@ -1354,7 +1464,7 @@ public ExprEval apply(List args, Expr.ObjectBinding bindings) } } - class ReverseFunc extends SingleParam + class ReverseFunc extends UnivariateFunction { @Override public String name() @@ -1376,7 +1486,7 @@ protected ExprEval eval(ExprEval param) } } - class RepeatFunc extends DoubleParamString + class RepeatFunc extends StringLongFunction { @Override public String name() @@ -1515,167 +1625,81 @@ public ExprEval apply(List args, Expr.ObjectBinding bindings) } } - class StringToArrayFunction implements Function + class ArrayLengthFunction implements Function { @Override public String name() { - return "string_to_array"; - } - - void validateArguments(List args) - { - if (args.size() != 2) { - throw new IAE("Function[%s] needs 2 argument", name()); - } + return "array_length"; } @Override public ExprEval apply(List args, Expr.ObjectBinding bindings) { - validateArguments(args); - + if (args.size() != 1) { + throw new IAE("Function[%s] needs 1 argument", name()); + } final ExprEval expr = args.get(0).eval(bindings); - final String arrayString = expr.asString(); - if (arrayString == null) { + final Object[] array = expr.asArray(); + if (array == null) { return ExprEval.of(null); } - final String split = args.get(1).eval(bindings).asString(); - return ExprEval.ofStringArray(arrayString.split(split != null ? split : "")); + return ExprEval.ofLong(array.length); } - @Override - public Set getScalarInputs(List args) - { - validateArguments(args); - return ImmutableSet.copyOf(args); - } - } - /** - * Function that operates on array typed operands - */ - interface ArrayFunction extends Function - { - default void validateArguments(List args) + @Override + public Set getArrayInputs(List args) { if (args.size() != 1) { throw new IAE("Function[%s] needs 1 argument", name()); } - } - - @Override - default Set getArrayInputs(List args) - { - validateArguments(args); return ImmutableSet.of(args.get(0)); } @Override - default Set getScalarInputs(List args) + public Set getScalarInputs(List args) { return Collections.emptySet(); } } - /** - * {@link ArraysFunction} that takes 1 array operand and 1 scalar operand - */ - abstract class ArrayScalarFunction implements ArrayFunction + class StringToArrayFunction implements Function { @Override - public void validateArguments(List args) - { - if (args.size() != 2) { - throw new IAE("Function[%s] needs 2 argument", name()); - } - } - - @Override - public Set getScalarInputs(List args) - { - validateArguments(args); - return ImmutableSet.of(args.get(1)); - } - - @Override - public Set getArrayInputs(List args) - { - return ImmutableSet.of(args.get(0)); - } - - @Override - public ExprEval apply(List args, Expr.ObjectBinding bindings) + public String name() { - validateArguments(args); - final ExprEval arrayExpr = args.get(0).eval(bindings); - final ExprEval scalarExpr = args.get(1).eval(bindings); - if (arrayExpr.asArray() == null) { - return ExprEval.of(null); - } - return doApply(arrayExpr, scalarExpr); + return "string_to_array"; } - abstract ExprEval doApply(ExprEval arrayExpr, ExprEval scalarExpr); - } - - /** - * {@link ArraysFunction} that takes 2 array operands - */ - abstract class ArraysFunction implements ArrayFunction - { - @Override - public void validateArguments(List args) + void validateArguments(List args) { if (args.size() != 2) { throw new IAE("Function[%s] needs 2 argument", name()); } } - @Override - public Set getArrayInputs(List args) - { - validateArguments(args); - return ImmutableSet.copyOf(args); - } - @Override public ExprEval apply(List args, Expr.ObjectBinding bindings) { validateArguments(args); - final ExprEval arrayExpr1 = args.get(0).eval(bindings); - final ExprEval arrayExpr2 = args.get(1).eval(bindings); - if (arrayExpr1.asArray() == null || arrayExpr2.asArray() == null) { + final ExprEval expr = args.get(0).eval(bindings); + final String arrayString = expr.asString(); + if (arrayString == null) { return ExprEval.of(null); } - return doApply(arrayExpr1, arrayExpr2); - } - - abstract ExprEval doApply(ExprEval lhsExpr, ExprEval rhsExpr); - } - - class ArrayLengthFunction implements ArrayFunction - { - @Override - public String name() - { - return "array_length"; + final String split = args.get(1).eval(bindings).asString(); + return ExprEval.ofStringArray(arrayString.split(split != null ? split : "")); } @Override - public ExprEval apply(List args, Expr.ObjectBinding bindings) + public Set getScalarInputs(List args) { validateArguments(args); - final ExprEval expr = args.get(0).eval(bindings); - final Object[] array = expr.asArray(); - if (array == null) { - return ExprEval.of(null); - } - - return ExprEval.ofLong(array.length); + return ImmutableSet.copyOf(args); } } diff --git a/core/src/main/java/org/apache/druid/math/expr/Parser.java b/core/src/main/java/org/apache/druid/math/expr/Parser.java index be03395db08b..59ed0dcbe8ab 100644 --- a/core/src/main/java/org/apache/druid/math/expr/Parser.java +++ b/core/src/main/java/org/apache/druid/math/expr/Parser.java @@ -160,7 +160,7 @@ public static Expr flatten(Expr expr) /** * Applies a transformation to an {@link Expr} given a list of known (or uknown) multi-value input columns that are * used in a scalar manner, walking the {@link Expr} tree and lifting array variables into the {@link LambdaExpr} of - * {@link ApplyFunctionExpr} and transforming the arguments of {@link FunctionExpr} {@link Function.ArrayFunction} + * {@link ApplyFunctionExpr} and transforming the arguments of {@link FunctionExpr} * @param expr expression to visit and rewrite * @param toApply * @return @@ -249,22 +249,36 @@ public static Expr applyUnappliedIdentifiers(Expr expr, Expr.BindingDetails bind */ private static ApplyFunctionExpr liftApplyLambda(ApplyFunctionExpr expr, List unappliedArgs) { - Expr.BindingDetails lambdaBinding = expr.lambdaExpr.analyzeInputs(); + + // recursively evaluate arguments to ensure they are properly transformed into arrays as necessary + List unappliedInThisApply = + unappliedArgs.stream() + .filter(u -> !expr.bindingDetails.getArrayVariables().contains(u)) + .collect(Collectors.toList()); + + List newArgs = new ArrayList<>(); + for (int i = 0; i < expr.argsExpr.size(); i++) { + newArgs.add(applyUnappliedIdentifiers( + expr.argsExpr.get(i), + expr.argsBindingDetails.get(i), + unappliedInThisApply) + ); + } + // this will _not_ include the lambda identifiers.. anything in this list needs to be applied - List unappliedLambdaBindings = lambdaBinding.getFreeVariables() + List unappliedLambdaBindings = expr.lambdaBindingDetails.getFreeVariables() .stream() .filter(unappliedArgs::contains) .map(IdentifierExpr::new) .collect(Collectors.toList()); if (unappliedLambdaBindings.size() == 0) { - return expr; + return new ApplyFunctionExpr(expr.function, expr.name, expr.lambdaExpr, newArgs); } final ApplyFunction newFn; final ApplyFunctionExpr newExpr; - final List newArgs = new ArrayList<>(expr.argsExpr); newArgs.addAll(unappliedLambdaBindings); switch (expr.function.name()) { diff --git a/core/src/test/java/org/apache/druid/math/expr/ParserTest.java b/core/src/test/java/org/apache/druid/math/expr/ParserTest.java index aae548400753..772b1c3090f0 100644 --- a/core/src/test/java/org/apache/druid/math/expr/ParserTest.java +++ b/core/src/test/java/org/apache/druid/math/expr/ParserTest.java @@ -328,6 +328,7 @@ public void testApplyFunctions() public void testApplyUnapplied() { validateApplyUnapplied("x + 1", "(+ x 1)", "(+ x 1)", ImmutableList.of()); + validateApplyUnapplied("x + 1", "(+ x 1)", "(+ x 1)", ImmutableList.of("z")); validateApplyUnapplied("x + y", "(+ x y)", "(map ([x] -> (+ x y)), [x])", ImmutableList.of("x")); validateApplyUnapplied( "x + y", @@ -342,6 +343,12 @@ public void testApplyUnapplied() "(cartesian_map ([x, y] -> (+ x y)), [x, y])", ImmutableList.of("y") ); + validateApplyUnapplied( + "map(x -> x + 1, x + 1)", + "(map ([x] -> (+ x 1)), [(+ x 1)])", + "(map ([x] -> (+ x 1)), [(map ([x] -> (+ x 1)), [x])])", + ImmutableList.of("x") + ); validateApplyUnapplied( "fold((x, acc) -> acc + x + y, x, 0)", "(fold ([x, acc] -> (+ (+ acc x) y)), [x, 0])", @@ -427,7 +434,9 @@ private void validateApplyUnapplied( ) { final Expr parsed = Parser.parse(expression, ExprMacroTable.nil()); - final Expr transformed = Parser.applyUnappliedIdentifiers(parsed, parsed.analyzeInputs(), identifiers); + Expr.BindingDetails deets = parsed.analyzeInputs(); + Parser.validateExpr(parsed, deets); + final Expr transformed = Parser.applyUnappliedIdentifiers(parsed, deets, identifiers); Assert.assertEquals(expression, unapplied, parsed.toString()); Assert.assertEquals(applied, applied, transformed.toString()); } From 7dabcd1b555bf1869d6bb2de86a745f88e2473c8 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Tue, 4 Jun 2019 22:14:44 -0700 Subject: [PATCH 32/48] fix imports --- core/src/main/java/org/apache/druid/math/expr/Expr.java | 3 --- 1 file changed, 3 deletions(-) diff --git a/core/src/main/java/org/apache/druid/math/expr/Expr.java b/core/src/main/java/org/apache/druid/math/expr/Expr.java index 3f2acbffca0e..3e650a66ec7e 100644 --- a/core/src/main/java/org/apache/druid/math/expr/Expr.java +++ b/core/src/main/java/org/apache/druid/math/expr/Expr.java @@ -20,7 +20,6 @@ package org.apache.druid.math.expr; import com.google.common.base.Preconditions; -import com.google.common.base.Suppliers; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Sets; import com.google.common.math.LongMath; @@ -30,7 +29,6 @@ import org.apache.druid.java.util.common.ISE; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.java.util.common.guava.Comparators; -import org.skife.jdbi.v2.sqlobject.Bind; import javax.annotation.Nonnull; import javax.annotation.Nullable; @@ -41,7 +39,6 @@ import java.util.List; import java.util.Objects; import java.util.Set; -import java.util.function.Supplier; import java.util.stream.Collectors; /** From 61f03b46efd230350911e3ef6691006e4d3528fc Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Tue, 4 Jun 2019 23:45:30 -0700 Subject: [PATCH 33/48] more javadoc --- .../apache/druid/math/expr/ApplyFunction.java | 152 +++++++++++++++++- 1 file changed, 144 insertions(+), 8 deletions(-) diff --git a/core/src/main/java/org/apache/druid/math/expr/ApplyFunction.java b/core/src/main/java/org/apache/druid/math/expr/ApplyFunction.java index dd5d15a554d7..3853150e69a6 100644 --- a/core/src/main/java/org/apache/druid/math/expr/ApplyFunction.java +++ b/core/src/main/java/org/apache/druid/math/expr/ApplyFunction.java @@ -36,18 +36,42 @@ import java.util.Set; import java.util.stream.Stream; +/** + * Base interface describing the mechanism used to evaluate an {@link ApplyFunctionExpr}, which 'applies' a + * {@link LambdaExpr} to one or more array {@link Expr} + */ public interface ApplyFunction { + /** + * Name of the function + */ String name(); + /** + * Apply {@link LambdaExpr} to argument list of {@link Expr} given a set of outer {@link Expr.ObjectBinding}. These + * outer bindings will be used to form the scope for the bindings used to evaluate the {@link LambdaExpr}, which use + * the array inputs to supply scalar values to use as bindings for {@link IdentifierExpr} in the lambda body. + */ ExprEval apply(LambdaExpr lambdaExpr, List argsExpr, Expr.ObjectBinding bindings); + /** + * Get list of input arguments which must evaluate to an array {@link ExprType} + */ Set getArrayInputs(List args); + /** + * Base class for "map" functions, which are a class of {@link ApplyFunction} which take a lambda function that is + * mapped to the values of an {@link IndexableMapLambdaObjectBinding} which is created from the outer + * {@link Expr.ObjectBinding} and the values of the array {@link Expr} argument(s) + */ abstract class BaseMapFunction implements ApplyFunction { - ExprEval applyMap(LambdaExpr expr, int length, IndexableMapLambdaObjectBinding bindings) + /** + * Evaluate {@link LambdaExpr} against every index position of an {@link IndexableMapLambdaObjectBinding} + */ + ExprEval applyMap(LambdaExpr expr, IndexableMapLambdaObjectBinding bindings) { + final int length = bindings.getLength(); String[] stringsOut = null; Long[] longsOut = null; Double[] doublesOut = null; @@ -99,6 +123,9 @@ ExprEval applyMap(LambdaExpr expr, int length, IndexableMapLambdaObjectBinding b } } + /** + * Map the scalar values of a single array input {@link Expr} to a single argument {@link LambdaExpr} + */ class MapFunction extends BaseMapFunction { static final String NAME = "map"; @@ -125,7 +152,7 @@ public ExprEval apply(LambdaExpr lambdaExpr, List argsExpr, Expr.ObjectBin } MapLambdaBinding lambdaBinding = new MapLambdaBinding(array, lambdaExpr, bindings); - return applyMap(lambdaExpr, array.length, lambdaBinding); + return applyMap(lambdaExpr, lambdaBinding); } @Override @@ -139,6 +166,9 @@ public Set getArrayInputs(List args) } } + /** + * Map the cartesian product of 'n' array input arguments to an 'n' argument {@link LambdaExpr} + */ class CartesianMapFunction extends BaseMapFunction { static final String NAME = "cartesian_map"; @@ -177,7 +207,7 @@ public ExprEval apply(LambdaExpr lambdaExpr, List argsExpr, Expr.ObjectBin List> product = CartesianList.create(arrayInputs); CartesianMapLambdaBinding lambdaBinding = new CartesianMapLambdaBinding(product, lambdaExpr, bindings); - return applyMap(lambdaExpr, product.size(), lambdaBinding); + return applyMap(lambdaExpr, lambdaBinding); } @Override @@ -187,11 +217,19 @@ public Set getArrayInputs(List args) } } + /** + * Base class for family of {@link ApplyFunction} which aggregate a scalar or array value given one or more array + * input {@link Expr} arguments and an array or scalar "accumulator" argument with an initial value + */ abstract class BaseFoldFunction implements ApplyFunction { - ExprEval applyFold(LambdaExpr lambdaExpr, Object accumulator, int length, IndexableFoldLambdaBinding bindings) + /** + * Accumulate a value by evaluating a {@link LambdaExpr} for each index position of an + * {@link IndexableFoldLambdaBinding} + */ + ExprEval applyFold(LambdaExpr lambdaExpr, Object accumulator, IndexableFoldLambdaBinding bindings) { - for (int i = 0; i < length; i++) { + for (int i = 0; i < bindings.getLength(); i++) { ExprEval evaluated = lambdaExpr.eval(bindings.accumulateWithIndex(i, accumulator)); accumulator = evaluated.value(); } @@ -199,6 +237,10 @@ ExprEval applyFold(LambdaExpr lambdaExpr, Object accumulator, int length, Indexa } } + /** + * Accumulate a value for a single array input with a 2 argument {@link LambdaExpr}. The 'array' input expression is + * the first argument, the initial value for the accumlator expression is the 2nd argument. + */ class FoldFunction extends BaseFoldFunction { static final String NAME = "fold"; @@ -226,17 +268,22 @@ public ExprEval apply(LambdaExpr lambdaExpr, List argsExpr, Expr.ObjectBin Object accumlator = accEval.value(); FoldLambdaBinding lambdaBinding = new FoldLambdaBinding(array, accumlator, lambdaExpr, bindings); - return applyFold(lambdaExpr, accumlator, array.length, lambdaBinding); + return applyFold(lambdaExpr, accumlator, lambdaBinding); } @Override public Set getArrayInputs(List args) { - // accumulator argument cannot be inferred, so ignore it until we think of something better to do + // accumulator argument cannot currently be inferred, so ignore it until we think of something better to do return ImmutableSet.of(args.get(0)); } } + /** + * Accumulate a value for the cartesian product of 'n' array inputs arguments with an 'n + 1' argument + * {@link LambdaExpr}. The 'array' input expressions are the first 'n' arguments, the initial value for the accumlator + * expression is the final argument. + */ class CartesianFoldFunction extends BaseFoldFunction { static final String NAME = "cartesian_fold"; @@ -283,7 +330,7 @@ public ExprEval apply(LambdaExpr lambdaExpr, List argsExpr, Expr.ObjectBin CartesianFoldLambdaBinding lambdaBindings = new CartesianFoldLambdaBinding(product, accumlator, lambdaExpr, bindings); - return applyFold(lambdaExpr, accumlator, product.size(), lambdaBindings); + return applyFold(lambdaExpr, accumlator, lambdaBindings); } @Override @@ -294,6 +341,9 @@ public Set getArrayInputs(List args) } } + /** + * Filter an array to all elements that evaluate to a 'truthy' value for a {@link LambdaExpr} + */ class FilterFunction implements ApplyFunction { static final String NAME = "filter"; @@ -354,6 +404,10 @@ private Stream filter(T[] array, LambdaExpr expr, SettableLambdaBinding b } } + /** + * Base class for family of {@link ApplyFunction} which evaluate elements elements of a single array input against + * a {@link LambdaExpr} to evaluate to a final 'truthy' value + */ abstract class MatchFunction implements ApplyFunction { @Override @@ -385,6 +439,10 @@ public Set getArrayInputs(List args) public abstract ExprEval match(Object[] values, LambdaExpr expr, SettableLambdaBinding bindings); } + /** + * Evaluates to true if any element of the array input {@link Expr} causes the {@link LambdaExpr} to evaluate to a + * 'truthy' value + */ class AnyMatchFunction extends MatchFunction { static final String NAME = "any"; @@ -404,6 +462,10 @@ public ExprEval match(Object[] values, LambdaExpr expr, SettableLambdaBinding bi } } + /** + * Evaluates to true if all element of the array input {@link Expr} causes the {@link LambdaExpr} to evaluate to a + * 'truthy' value + */ class AllMatchFunction extends MatchFunction { static final String NAME = "all"; @@ -423,6 +485,11 @@ public ExprEval match(Object[] values, LambdaExpr expr, SettableLambdaBinding bi } } + /** + * Simple, mutable, {@link Expr.ObjectBinding} for a {@link LambdaExpr} which provides a {@link Map} for storing + * arbitrary values to use as values for {@link IdentifierExpr} in the body of the lambda that are arguments to the + * lambda + */ class SettableLambdaBinding implements Expr.ObjectBinding { private final Expr.ObjectBinding bindings; @@ -454,11 +521,29 @@ SettableLambdaBinding withBinding(String key, Object value) } } + /** + * {@link Expr.ObjectBinding} which can be iterated by an integer index position for {@link BaseMapFunction}. + * Evaluating an {@link IdentifierExpr} against these bindings will return the value(s) of the array at the current + * index for any lambda identifiers, and fall through to the base {@link Expr.ObjectBinding} for all bindings provided + * by an outer scope. + */ interface IndexableMapLambdaObjectBinding extends Expr.ObjectBinding { + /** + * Total number of bindings in this binding + */ + int getLength(); + + /** + * Update index position + */ IndexableMapLambdaObjectBinding withIndex(int index); } + /** + * {@link IndexableMapLambdaObjectBinding} for a {@link MapFunction}. Lambda argument binding is stored in an object + * array, retrieving binding values for the lambda identifier returns the value at the current index. + */ class MapLambdaBinding implements IndexableMapLambdaObjectBinding { private final Expr.ObjectBinding bindings; @@ -483,6 +568,12 @@ public Object get(String name) return bindings.get(name); } + @Override + public int getLength() + { + return arrayValues.length; + } + @Override public MapLambdaBinding withIndex(int index) { @@ -491,6 +582,11 @@ public MapLambdaBinding withIndex(int index) } } + /** + * {@link IndexableMapLambdaObjectBinding} for a {@link CartesianMapFunction}. Lambda argument bindings stored as a + * cartesian product in the form of a list of lists of objects, where the inner list is the in order list of values + * for each {@link LambdaExpr} argument + */ class CartesianMapLambdaBinding implements IndexableMapLambdaObjectBinding { private final Expr.ObjectBinding bindings; @@ -520,6 +616,12 @@ public Object get(String name) return bindings.get(name); } + @Override + public int getLength() + { + return lambdaInputs.size(); + } + @Override public CartesianMapLambdaBinding withIndex(int index) { @@ -528,11 +630,29 @@ public CartesianMapLambdaBinding withIndex(int index) } } + /** + * {@link Expr.ObjectBinding} which can be iterated by an integer index position for {@link BaseFoldFunction}. + * Evaluating an {@link IdentifierExpr} against these bindings will return the value(s) of the array at the current + * index for any lambda array identifiers, the value of the 'accumulator' for the lambda accumulator identifier, + * and fall through to the base {@link Expr.ObjectBinding} for all bindings provided by an outer scope. + */ interface IndexableFoldLambdaBinding extends Expr.ObjectBinding { + /** + * Total number of bindings in this binding + */ + int getLength(); + + /** + * Update the index and accumulator value + */ IndexableFoldLambdaBinding accumulateWithIndex(int index, Object accumulator); } + /** + * {@link IndexableFoldLambdaBinding} for a {@link FoldFunction}. Like {@link MapLambdaBinding} + * but with additional information to track and provide binding values for an accumulator. + */ class FoldLambdaBinding implements IndexableFoldLambdaBinding { private final Expr.ObjectBinding bindings; @@ -564,6 +684,12 @@ public Object get(String name) return bindings.get(name); } + @Override + public int getLength() + { + return arrayValues.length; + } + @Override public FoldLambdaBinding accumulateWithIndex(int index, Object acc) { @@ -573,6 +699,10 @@ public FoldLambdaBinding accumulateWithIndex(int index, Object acc) } } + /** + * {@link IndexableFoldLambdaBinding} for a {@link CartesianFoldFunction}. Like {@link CartesianMapLambdaBinding} + * but with additional information to track and provide binding values for an accumulator. + */ class CartesianFoldLambdaBinding implements IndexableFoldLambdaBinding { private final Expr.ObjectBinding bindings; @@ -607,6 +737,12 @@ public Object get(String name) return bindings.get(name); } + @Override + public int getLength() + { + return lambdaInputs.size(); + } + @Override public CartesianFoldLambdaBinding accumulateWithIndex(int index, Object acc) { From bfce0402ec0c90484cf5672bc5259996d30b4947 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Wed, 5 Jun 2019 14:49:05 -0700 Subject: [PATCH 34/48] more javadoc --- .../java/org/apache/druid/math/expr/Expr.java | 56 +++++++++++++++---- 1 file changed, 44 insertions(+), 12 deletions(-) diff --git a/core/src/main/java/org/apache/druid/math/expr/Expr.java b/core/src/main/java/org/apache/druid/math/expr/Expr.java index 3e650a66ec7e..cb2b3af518a5 100644 --- a/core/src/main/java/org/apache/druid/math/expr/Expr.java +++ b/core/src/main/java/org/apache/druid/math/expr/Expr.java @@ -42,7 +42,7 @@ import java.util.stream.Collectors; /** - * Base interface of Druid expression language abstract syntax tree + * Base interface of Druid expression language abstract syntax tree nodes */ public interface Expr { @@ -89,14 +89,14 @@ default String getIdentifierIfIdentifier() /** * Programmatically inspect the {@link Expr} tree with a {@link Visitor}. Each {@link Expr} is responsible for - * ensuring the {@link Visitor} can reach all of it's {@link Expr} children. + * ensuring the {@link Visitor} can visit all of it's {@link Expr} children before visiting itself. */ void visit(Visitor visitor); /** * Programatically rewrite the {@link Expr} tree with a {@link Shuttle}.Each {@link Expr} is responsible for - * ensuring the {@link Shuttle} can reach all of it's {@link Expr} children, as well as updating it's children - * {@link Expr} with the results from the {@link Shuttle}. + * ensuring the {@link Shuttle} can visit all of it's {@link Expr} children, as well as updating it's children + * {@link Expr} with the results from the {@link Shuttle}, before finally visiting an updated form of itself. */ Expr visit(Shuttle shuttle); @@ -239,6 +239,11 @@ public BindingDetails mergeWithArrays(Set moreArrays) } } +/** + * Base type for all constant expressions. {@link ConstantExpr} allow for direct value extraction without evaluating + * {@link Expr.ObjectBinding}. {@link ConstantExpr} are terminal nodes of an expression tree, and have no children + * {@link Expr}. + */ abstract class ConstantExpr implements Expr { @Override @@ -446,39 +451,44 @@ public ExprEval eval(ObjectBinding bindings) } } +/** + * This {@link Expr} node is used to represent a variable in the expression language. At evaluation time, the string + * identifier will be used to retrieve the runtime value for the variable from {@link Expr.ObjectBinding}. + * {@link IdentifierExpr} are terminal nodes of an expression tree, and have no children {@link Expr}. + */ class IdentifierExpr implements Expr { - private final String value; + private final String identifier; IdentifierExpr(String value) { - this.value = value; + this.identifier = value; } @Override public String toString() { - return value; + return identifier; } @Nullable @Override public String getIdentifierIfIdentifier() { - return value; + return identifier; } @Override public BindingDetails analyzeInputs() { - return new BindingDetails(value); + return new BindingDetails(identifier); } @Nonnull @Override public ExprEval eval(ObjectBinding bindings) { - return ExprEval.bestEffortOf(bindings.get(value)); + return ExprEval.bestEffortOf(bindings.get(identifier)); } @Override @@ -568,6 +578,11 @@ public BindingDetails analyzeInputs() } } +/** + * {@link Expr} node for a {@link Function} call. {@link FunctionExpr} has children {@link Expr} in the form of the + * list of arguments that are passed to the {@link Function} along with the {@link Expr.ObjectBinding} when it is + * evaluated. + */ class FunctionExpr implements Expr { final Function function; @@ -638,6 +653,11 @@ public BindingDetails analyzeInputs() } } +/** + * This {@link Expr} node is representative of an {@link ApplyFunction}, and has children in the form of a + * {@link LambdaExpr} and the list of {@link Expr} arguments that are combined with {@link Expr.ObjectBinding} to + * evaluate the {@link LambdaExpr}. + */ class ApplyFunctionExpr implements Expr { final ApplyFunction function; @@ -715,6 +735,9 @@ public BindingDetails analyzeInputs() } } +/** + * Base type for all single argument operators, with a single {@link Expr} child for the operand. + */ abstract class UnaryExpr implements Expr { final Expr expr; @@ -828,8 +851,13 @@ public String toString() } } -// all concrete subclass of this should have constructor with the form of (String, Expr, Expr) -// if it's not possible, just be sure Evals.binaryOp() can handle that +/** + * Base type for all binary operators, this {@link Expr} has two children {@link Expr} for the left and right side + * operands. + * + * Note: all concrete subclass of this should have constructor with the form of (String, Expr, Expr) + * if it's not possible, just be sure Evals.binaryOp() can handle that + */ abstract class BinaryOpExprBase implements Expr { protected final String op; @@ -889,6 +917,10 @@ public BindingDetails analyzeInputs() } } +/** + * Base class for numerical binary operators, with additional methods defined to evaluate primitive values directly + * instead of wrapped with {@link ExprEval} + */ abstract class BinaryEvalOpExprBase extends BinaryOpExprBase { BinaryEvalOpExprBase(String op, Expr left, Expr right) From e591cae71e910261ada8f721417b160dea6a4545 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Wed, 5 Jun 2019 16:18:49 -0700 Subject: [PATCH 35/48] more --- .../druid/math/expr/ExprListenerImpl.java | 3 +++ .../druid/math/expr/ExprMacroTable.java | 20 +++++++++++++++---- .../expressions/BloomFilterExprMacro.java | 2 +- .../druid/query/expression/LikeExprMacro.java | 2 +- .../query/expression/LookupExprMacro.java | 2 +- .../expression/RegexpExtractExprMacro.java | 2 +- .../expression/TimestampCeilExprMacro.java | 4 ++-- .../expression/TimestampExtractExprMacro.java | 2 +- .../expression/TimestampFloorExprMacro.java | 4 ++-- .../expression/TimestampFormatExprMacro.java | 2 +- .../expression/TimestampParseExprMacro.java | 2 +- .../expression/TimestampShiftExprMacro.java | 4 ++-- .../druid/query/expression/TrimExprMacro.java | 2 +- 13 files changed, 33 insertions(+), 18 deletions(-) diff --git a/core/src/main/java/org/apache/druid/math/expr/ExprListenerImpl.java b/core/src/main/java/org/apache/druid/math/expr/ExprListenerImpl.java index 9b1b0b89c065..b4dc961e61f9 100644 --- a/core/src/main/java/org/apache/druid/math/expr/ExprListenerImpl.java +++ b/core/src/main/java/org/apache/druid/math/expr/ExprListenerImpl.java @@ -34,6 +34,9 @@ import java.util.Map; /** + * Implementation of antlr parse tree listener, transforms {@link ParseTree} to {@link Expr}, based on the grammar + * defined in Expr.g4. All + * {@link Expr} are created on 'exit' so that children {@link Expr} are already constructed. */ public class ExprListenerImpl extends ExprBaseListener { diff --git a/core/src/main/java/org/apache/druid/math/expr/ExprMacroTable.java b/core/src/main/java/org/apache/druid/math/expr/ExprMacroTable.java index 1971caf7609d..370c5a1633ce 100644 --- a/core/src/main/java/org/apache/druid/math/expr/ExprMacroTable.java +++ b/core/src/main/java/org/apache/druid/math/expr/ExprMacroTable.java @@ -31,6 +31,12 @@ import java.util.Set; import java.util.stream.Collectors; +/** + * Mechanism by which Druid expressions can define new functions for the Druid expression language. When + * {@link ExprListenerImpl} is creating a {@link FunctionExpr}, {@link ExprMacroTable} will first be checked to find + * the function by name, falling back to {@link Parser#getFunction(String)} to map to a built-in {@link Function} if + * none is defined in the macro table. + */ public class ExprMacroTable { private static final ExprMacroTable NIL = new ExprMacroTable(Collections.emptyList()); @@ -84,11 +90,14 @@ public interface ExprMacro Expr apply(List args); } - public abstract static class BaseSingleScalarArgumentExprMacroFunctionExpr implements Expr + /** + * Base class for single argument {@link ExprMacro} function {@link Expr} + */ + public abstract static class BaseScalarUnivariateMacroFunctionExpr implements Expr { protected final Expr arg; - public BaseSingleScalarArgumentExprMacroFunctionExpr(Expr arg) + public BaseScalarUnivariateMacroFunctionExpr(Expr arg) { this.arg = arg; } @@ -111,11 +120,14 @@ public BindingDetails analyzeInputs() } } - public abstract static class BaseScalarExprMacroFunctionExpr implements Expr + /** + * Base class for multi-argument {@link ExprMacro} function {@link Expr} + */ + public abstract static class BaseScalarMacroFunctionExpr implements Expr { protected final List args; - public BaseScalarExprMacroFunctionExpr(final List args) + public BaseScalarMacroFunctionExpr(final List args) { this.args = args; } diff --git a/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/query/expressions/BloomFilterExprMacro.java b/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/query/expressions/BloomFilterExprMacro.java index 352224a60a38..4328a5835e92 100644 --- a/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/query/expressions/BloomFilterExprMacro.java +++ b/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/query/expressions/BloomFilterExprMacro.java @@ -67,7 +67,7 @@ public Expr apply(List args) throw new RuntimeException("Failed to deserialize bloom filter", ioe); } - class BloomExpr extends ExprMacroTable.BaseSingleScalarArgumentExprMacroFunctionExpr + class BloomExpr extends ExprMacroTable.BaseScalarUnivariateMacroFunctionExpr { private BloomExpr(Expr arg) { diff --git a/processing/src/main/java/org/apache/druid/query/expression/LikeExprMacro.java b/processing/src/main/java/org/apache/druid/query/expression/LikeExprMacro.java index cca8749bac71..a1c980465eff 100644 --- a/processing/src/main/java/org/apache/druid/query/expression/LikeExprMacro.java +++ b/processing/src/main/java/org/apache/druid/query/expression/LikeExprMacro.java @@ -67,7 +67,7 @@ public Expr apply(final List args) escapeChar ); - class LikeExtractExpr extends ExprMacroTable.BaseSingleScalarArgumentExprMacroFunctionExpr + class LikeExtractExpr extends ExprMacroTable.BaseScalarUnivariateMacroFunctionExpr { private LikeExtractExpr(Expr arg) { diff --git a/processing/src/main/java/org/apache/druid/query/expression/LookupExprMacro.java b/processing/src/main/java/org/apache/druid/query/expression/LookupExprMacro.java index 39fa14b9e187..88e3ce44e578 100644 --- a/processing/src/main/java/org/apache/druid/query/expression/LookupExprMacro.java +++ b/processing/src/main/java/org/apache/druid/query/expression/LookupExprMacro.java @@ -71,7 +71,7 @@ public Expr apply(final List args) null ); - class LookupExpr extends ExprMacroTable.BaseSingleScalarArgumentExprMacroFunctionExpr + class LookupExpr extends ExprMacroTable.BaseScalarUnivariateMacroFunctionExpr { private LookupExpr(Expr arg) { diff --git a/processing/src/main/java/org/apache/druid/query/expression/RegexpExtractExprMacro.java b/processing/src/main/java/org/apache/druid/query/expression/RegexpExtractExprMacro.java index a5dd90727968..df8f1f955a4a 100644 --- a/processing/src/main/java/org/apache/druid/query/expression/RegexpExtractExprMacro.java +++ b/processing/src/main/java/org/apache/druid/query/expression/RegexpExtractExprMacro.java @@ -58,7 +58,7 @@ public Expr apply(final List args) final int index = indexExpr == null ? 0 : ((Number) indexExpr.getLiteralValue()).intValue(); - class RegexpExtractExpr extends ExprMacroTable.BaseSingleScalarArgumentExprMacroFunctionExpr + class RegexpExtractExpr extends ExprMacroTable.BaseScalarUnivariateMacroFunctionExpr { private RegexpExtractExpr(Expr arg) { diff --git a/processing/src/main/java/org/apache/druid/query/expression/TimestampCeilExprMacro.java b/processing/src/main/java/org/apache/druid/query/expression/TimestampCeilExprMacro.java index e9d8430e1a98..bb2f5af5e1e9 100644 --- a/processing/src/main/java/org/apache/druid/query/expression/TimestampCeilExprMacro.java +++ b/processing/src/main/java/org/apache/druid/query/expression/TimestampCeilExprMacro.java @@ -54,7 +54,7 @@ public Expr apply(final List args) } } - private static class TimestampCeilExpr extends ExprMacroTable.BaseScalarExprMacroFunctionExpr + private static class TimestampCeilExpr extends ExprMacroTable.BaseScalarMacroFunctionExpr { private final Granularity granularity; @@ -99,7 +99,7 @@ private static PeriodGranularity getGranularity(final List args, final Exp ); } - private static class TimestampCeilDynamicExpr extends ExprMacroTable.BaseScalarExprMacroFunctionExpr + private static class TimestampCeilDynamicExpr extends ExprMacroTable.BaseScalarMacroFunctionExpr { TimestampCeilDynamicExpr(final List args) { diff --git a/processing/src/main/java/org/apache/druid/query/expression/TimestampExtractExprMacro.java b/processing/src/main/java/org/apache/druid/query/expression/TimestampExtractExprMacro.java index fd7021dbb21e..48ae86caadf8 100644 --- a/processing/src/main/java/org/apache/druid/query/expression/TimestampExtractExprMacro.java +++ b/processing/src/main/java/org/apache/druid/query/expression/TimestampExtractExprMacro.java @@ -82,7 +82,7 @@ public Expr apply(final List args) final ISOChronology chronology = ISOChronology.getInstance(timeZone); - class TimestampExtractExpr extends ExprMacroTable.BaseSingleScalarArgumentExprMacroFunctionExpr + class TimestampExtractExpr extends ExprMacroTable.BaseScalarUnivariateMacroFunctionExpr { private TimestampExtractExpr(Expr arg) { diff --git a/processing/src/main/java/org/apache/druid/query/expression/TimestampFloorExprMacro.java b/processing/src/main/java/org/apache/druid/query/expression/TimestampFloorExprMacro.java index 7e7480aaae5e..00cbb547ac64 100644 --- a/processing/src/main/java/org/apache/druid/query/expression/TimestampFloorExprMacro.java +++ b/processing/src/main/java/org/apache/druid/query/expression/TimestampFloorExprMacro.java @@ -62,7 +62,7 @@ private static PeriodGranularity computeGranularity(final List args, final ); } - public static class TimestampFloorExpr extends ExprMacroTable.BaseScalarExprMacroFunctionExpr + public static class TimestampFloorExpr extends ExprMacroTable.BaseScalarMacroFunctionExpr { private final PeriodGranularity granularity; @@ -109,7 +109,7 @@ public Expr visit(Shuttle shuttle) } } - public static class TimestampFloorDynamicExpr extends ExprMacroTable.BaseScalarExprMacroFunctionExpr + public static class TimestampFloorDynamicExpr extends ExprMacroTable.BaseScalarMacroFunctionExpr { TimestampFloorDynamicExpr(final List args) { diff --git a/processing/src/main/java/org/apache/druid/query/expression/TimestampFormatExprMacro.java b/processing/src/main/java/org/apache/druid/query/expression/TimestampFormatExprMacro.java index 2786522500cb..f82b8b83f71f 100644 --- a/processing/src/main/java/org/apache/druid/query/expression/TimestampFormatExprMacro.java +++ b/processing/src/main/java/org/apache/druid/query/expression/TimestampFormatExprMacro.java @@ -68,7 +68,7 @@ public Expr apply(final List args) ? ISODateTimeFormat.dateTime() : DateTimeFormat.forPattern(formatString).withZone(timeZone); - class TimestampFormatExpr extends ExprMacroTable.BaseSingleScalarArgumentExprMacroFunctionExpr + class TimestampFormatExpr extends ExprMacroTable.BaseScalarUnivariateMacroFunctionExpr { private TimestampFormatExpr(Expr arg) { diff --git a/processing/src/main/java/org/apache/druid/query/expression/TimestampParseExprMacro.java b/processing/src/main/java/org/apache/druid/query/expression/TimestampParseExprMacro.java index ac88aafa8c76..b2079f4ac194 100644 --- a/processing/src/main/java/org/apache/druid/query/expression/TimestampParseExprMacro.java +++ b/processing/src/main/java/org/apache/druid/query/expression/TimestampParseExprMacro.java @@ -64,7 +64,7 @@ public Expr apply(final List args) ? createDefaultParser(timeZone) : DateTimes.wrapFormatter(DateTimeFormat.forPattern(formatString).withZone(timeZone)); - class TimestampParseExpr extends ExprMacroTable.BaseSingleScalarArgumentExprMacroFunctionExpr + class TimestampParseExpr extends ExprMacroTable.BaseScalarUnivariateMacroFunctionExpr { private TimestampParseExpr(Expr arg) { diff --git a/processing/src/main/java/org/apache/druid/query/expression/TimestampShiftExprMacro.java b/processing/src/main/java/org/apache/druid/query/expression/TimestampShiftExprMacro.java index 84d344c19b1e..872c89fcc00b 100644 --- a/processing/src/main/java/org/apache/druid/query/expression/TimestampShiftExprMacro.java +++ b/processing/src/main/java/org/apache/druid/query/expression/TimestampShiftExprMacro.java @@ -71,7 +71,7 @@ private static int getStep(final List args, final Expr.ObjectBinding bindi return args.get(2).eval(bindings).asInt(); } - private static class TimestampShiftExpr extends ExprMacroTable.BaseScalarExprMacroFunctionExpr + private static class TimestampShiftExpr extends ExprMacroTable.BaseScalarMacroFunctionExpr { private final Chronology chronology; private final Period period; @@ -101,7 +101,7 @@ public Expr visit(Shuttle shuttle) } } - private static class TimestampShiftDynamicExpr extends ExprMacroTable.BaseScalarExprMacroFunctionExpr + private static class TimestampShiftDynamicExpr extends ExprMacroTable.BaseScalarMacroFunctionExpr { TimestampShiftDynamicExpr(final List args) { diff --git a/processing/src/main/java/org/apache/druid/query/expression/TrimExprMacro.java b/processing/src/main/java/org/apache/druid/query/expression/TrimExprMacro.java index fdafeda54163..e3b49db8ca1a 100644 --- a/processing/src/main/java/org/apache/druid/query/expression/TrimExprMacro.java +++ b/processing/src/main/java/org/apache/druid/query/expression/TrimExprMacro.java @@ -96,7 +96,7 @@ public Expr apply(final List args) } } - private static class TrimStaticCharsExpr extends ExprMacroTable.BaseSingleScalarArgumentExprMacroFunctionExpr + private static class TrimStaticCharsExpr extends ExprMacroTable.BaseScalarUnivariateMacroFunctionExpr { private final TrimMode mode; private final char[] chars; From 5e393996a80ca395c38366f6de38e25b57e97abb Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Wed, 5 Jun 2019 17:51:47 -0700 Subject: [PATCH 36/48] more javadocs, nonnullbydefault, minor refactor --- .../java/org/apache/druid/math/expr/Expr.java | 32 ++---- .../org/apache/druid/math/expr/ExprEval.java | 10 +- .../apache/druid/math/expr/package-info.java | 23 +++++ .../segment/virtual/ExpressionSelectors.java | 97 +++++++++++-------- ...owBasedExpressionColumnValueSelector.java} | 20 +++- 5 files changed, 112 insertions(+), 70 deletions(-) create mode 100644 core/src/main/java/org/apache/druid/math/expr/package-info.java rename processing/src/main/java/org/apache/druid/segment/virtual/{OpportunisticMultiValueStringExpressionColumnValueSelector.java => RowBasedExpressionColumnValueSelector.java} (73%) diff --git a/core/src/main/java/org/apache/druid/math/expr/Expr.java b/core/src/main/java/org/apache/druid/math/expr/Expr.java index cb2b3af518a5..4becc13c933f 100644 --- a/core/src/main/java/org/apache/druid/math/expr/Expr.java +++ b/core/src/main/java/org/apache/druid/math/expr/Expr.java @@ -30,7 +30,6 @@ import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.java.util.common.guava.Comparators; -import javax.annotation.Nonnull; import javax.annotation.Nullable; import java.util.ArrayList; import java.util.Arrays; @@ -84,24 +83,23 @@ default String getIdentifierIfIdentifier() * Evaluate the {@link Expr} with the bindings which supply {@link IdentifierExpr} with their values, producing an * {@link ExprEval} with the result. */ - @Nonnull ExprEval eval(ObjectBinding bindings); /** * Programmatically inspect the {@link Expr} tree with a {@link Visitor}. Each {@link Expr} is responsible for - * ensuring the {@link Visitor} can visit all of it's {@link Expr} children before visiting itself. + * ensuring the {@link Visitor} can visit all of its {@link Expr} children before visiting itself. */ void visit(Visitor visitor); /** * Programatically rewrite the {@link Expr} tree with a {@link Shuttle}.Each {@link Expr} is responsible for - * ensuring the {@link Shuttle} can visit all of it's {@link Expr} children, as well as updating it's children + * ensuring the {@link Shuttle} can visit all of its {@link Expr} children, as well as updating its children * {@link Expr} with the results from the {@link Shuttle}, before finally visiting an updated form of itself. */ Expr visit(Shuttle shuttle); /** - * Examing the usage of {@link IdentifierExpr} children of an {@link Expr}, constructing a {@link BindingDetails} + * Examine the usage of {@link IdentifierExpr} children of an {@link Expr}, constructing a {@link BindingDetails} */ BindingDetails analyzeInputs(); @@ -280,7 +278,6 @@ class LongExpr extends ConstantExpr this.value = Preconditions.checkNotNull(value, "value"); } - @Nonnull @Override public Object getLiteralValue() { @@ -293,7 +290,6 @@ public String toString() return String.valueOf(value); } - @Nonnull @Override public ExprEval eval(ObjectBinding bindings) { @@ -310,7 +306,6 @@ class LongArrayExpr extends ConstantExpr this.value = Preconditions.checkNotNull(value, "value"); } - @Nonnull @Override public Object getLiteralValue() { @@ -323,7 +318,6 @@ public String toString() return Arrays.toString(value); } - @Nonnull @Override public ExprEval eval(ObjectBinding bindings) { @@ -333,9 +327,10 @@ public ExprEval eval(ObjectBinding bindings) class StringExpr extends ConstantExpr { + @Nullable private final String value; - StringExpr(String value) + StringExpr(@Nullable String value) { this.value = NullHandling.emptyToNullIfNeeded(value); } @@ -347,13 +342,13 @@ public Object getLiteralValue() return value; } + @Nullable @Override public String toString() { return value; } - @Nonnull @Override public ExprEval eval(ObjectBinding bindings) { @@ -370,7 +365,6 @@ class StringArrayExpr extends ConstantExpr this.value = Preconditions.checkNotNull(value, "value"); } - @Nonnull @Override public Object getLiteralValue() { @@ -383,7 +377,6 @@ public String toString() return Arrays.toString(value); } - @Nonnull @Override public ExprEval eval(ObjectBinding bindings) { @@ -400,7 +393,6 @@ class DoubleExpr extends ConstantExpr this.value = Preconditions.checkNotNull(value, "value"); } - @Nonnull @Override public Object getLiteralValue() { @@ -413,7 +405,6 @@ public String toString() return String.valueOf(value); } - @Nonnull @Override public ExprEval eval(ObjectBinding bindings) { @@ -430,7 +421,6 @@ class DoubleArrayExpr extends ConstantExpr this.value = Preconditions.checkNotNull(value, "value"); } - @Nonnull @Override public Object getLiteralValue() { @@ -443,7 +433,6 @@ public String toString() return Arrays.toString(value); } - @Nonnull @Override public ExprEval eval(ObjectBinding bindings) { @@ -484,7 +473,6 @@ public BindingDetails analyzeInputs() return new BindingDetails(identifier); } - @Nonnull @Override public ExprEval eval(ObjectBinding bindings) { @@ -542,7 +530,6 @@ public Expr getExpr() return expr; } - @Nonnull @Override public ExprEval eval(ObjectBinding bindings) { @@ -602,7 +589,6 @@ public String toString() return StringUtils.format("(%s %s)", name, args); } - @Nonnull @Override public ExprEval eval(ObjectBinding bindings) { @@ -703,7 +689,6 @@ public String toString() return StringUtils.format("(%s %s, %s)", name, lambdaExpr, argsExpr); } - @Nonnull @Override public ExprEval eval(ObjectBinding bindings) { @@ -794,7 +779,6 @@ UnaryExpr copy(Expr expr) return new UnaryMinusExpr(expr); } - @Nonnull @Override public ExprEval eval(ObjectBinding bindings) { @@ -831,7 +815,6 @@ UnaryExpr copy(Expr expr) return new UnaryNotExpr(expr); } - @Nonnull @Override public ExprEval eval(ObjectBinding bindings) { @@ -928,7 +911,6 @@ abstract class BinaryEvalOpExprBase extends BinaryOpExprBase super(op, left, right); } - @Nonnull @Override public ExprEval eval(ObjectBinding bindings) { @@ -1339,7 +1321,6 @@ protected BinaryOpExprBase copy(Expr left, Expr right) return new BinAndExpr(op, left, right); } - @Nonnull @Override public ExprEval eval(ObjectBinding bindings) { @@ -1361,7 +1342,6 @@ protected BinaryOpExprBase copy(Expr left, Expr right) return new BinOrExpr(op, left, right); } - @Nonnull @Override public ExprEval eval(ObjectBinding bindings) { diff --git a/core/src/main/java/org/apache/druid/math/expr/ExprEval.java b/core/src/main/java/org/apache/druid/math/expr/ExprEval.java index 3f243729db6b..1cafb17a750c 100644 --- a/core/src/main/java/org/apache/druid/math/expr/ExprEval.java +++ b/core/src/main/java/org/apache/druid/math/expr/ExprEval.java @@ -108,7 +108,7 @@ public static ExprEval bestEffortOf(@Nullable Object val) return new DoubleArrayExprEval((Double[]) val); } if (val instanceof Float[]) { - return new DoubleArrayExprEval(Arrays.stream((Float[]) val).map(x -> x.doubleValue()).toArray(Double[]::new)); + return new DoubleArrayExprEval(Arrays.stream((Float[]) val).map(Float::doubleValue).toArray(Double[]::new)); } if (val instanceof String[]) { return new StringArrayExprEval((String[]) val); @@ -119,6 +119,7 @@ public static ExprEval bestEffortOf(@Nullable Object val) // Cached String values private boolean stringValueValid = false; + @Nullable private String stringValue; @Nullable @@ -131,6 +132,7 @@ private ExprEval(@Nullable T value) public abstract ExprType type(); + @Nullable public T value() { return value; @@ -170,12 +172,16 @@ public boolean isArray() public abstract boolean asBoolean(); + @Nullable public abstract Object[] asArray(); + @Nullable public abstract String[] asStringArray(); + @Nullable public abstract Long[] asLongArray(); + @Nullable public abstract Double[] asDoubleArray(); public abstract ExprEval castTo(ExprType castTo); @@ -254,6 +260,7 @@ public final boolean asBoolean() return Evals.asBoolean(asDouble()); } + @Nullable @Override public Object[] asArray() { @@ -310,6 +317,7 @@ public final boolean asBoolean() return Evals.asBoolean(asLong()); } + @Nullable @Override public Object[] asArray() { diff --git a/core/src/main/java/org/apache/druid/math/expr/package-info.java b/core/src/main/java/org/apache/druid/math/expr/package-info.java new file mode 100644 index 000000000000..d7c92f963400 --- /dev/null +++ b/core/src/main/java/org/apache/druid/math/expr/package-info.java @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +@EverythingIsNonnullByDefault +package org.apache.druid.math.expr; + +import org.apache.druid.annotations.EverythingIsNonnullByDefault; diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java index 70a8edf783c2..3721523c3dbb 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java @@ -24,6 +24,7 @@ import com.google.common.base.Supplier; import com.google.common.collect.Iterables; import org.apache.druid.common.config.NullHandling; +import org.apache.druid.java.util.common.Pair; import org.apache.druid.math.expr.Expr; import org.apache.druid.math.expr.ExprEval; import org.apache.druid.math.expr.Parser; @@ -43,7 +44,6 @@ import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.data.IndexedInts; -import javax.annotation.Nonnull; import javax.annotation.Nullable; import java.util.Arrays; import java.util.HashMap; @@ -164,24 +164,10 @@ public static ColumnValueSelector makeExprEvalSelector( } } - final Set actualArrays = new HashSet<>(); - final Set unknownIfArrays = new HashSet<>(); - for (String column : columns) { - final ColumnCapabilities capabilities = columnSelectorFactory.getColumnCapabilities(column); - if (capabilities != null) { - if (capabilities.hasMultipleValues()) { - actualArrays.add(column); - } else if ( - !capabilities.isComplete() && - capabilities.getType().equals(ValueType.STRING) && - !exprDetails.getArrayVariables().contains(column) - ) { - unknownIfArrays.add(column); - } - } else { - unknownIfArrays.add(column); - } - } + final Pair, Set> arrayUsage = + examineColumnSelectorFactoryArrays(columnSelectorFactory, exprDetails, columns); + final Set actualArrays = arrayUsage.lhs; + final Set unknownIfArrays = arrayUsage.rhs; final List needsApplied = columns.stream() @@ -202,18 +188,25 @@ public static ColumnValueSelector makeExprEvalSelector( return new ConstantExprEvalSelector(expression.eval(bindings)); } + // if any unknown column input types, fall back to an expression selector that examines input bindings on a + // per row basis if (unknownIfArrays.size() > 0) { - return new OpportunisticMultiValueStringExpressionColumnValueSelector( + return new RowBasedExpressionColumnValueSelector( finalExpr, exprDetails, bindings, unknownIfArrays ); } - // No special optimization. + + // generic expression value selector for fully known input types return new ExpressionColumnValueSelector(finalExpr, bindings); } + /** + * Makes a single or multi-value {@link DimensionSelector} wrapper around a {@link ColumnValueSelector} created by + * {@link ExpressionSelectors#makeExprEvalSelector(ColumnSelectorFactory, Expr)} as appropriate + */ public static DimensionSelector makeDimensionSelector( final ColumnSelectorFactory columnSelectorFactory, final Expr expression, @@ -244,24 +237,11 @@ public static DimensionSelector makeDimensionSelector( } } - final Set actualArrays = new HashSet<>(); - final Set unknownIfArrays = new HashSet<>(); - for (String column : columns) { - final ColumnCapabilities capabilities = columnSelectorFactory.getColumnCapabilities(column); - if (capabilities != null) { - if (capabilities.hasMultipleValues()) { - actualArrays.add(column); - } else if ( - !capabilities.isComplete() && - capabilities.getType().equals(ValueType.STRING) && - !exprDetails.getArrayVariables().contains(column) - ) { - unknownIfArrays.add(column); - } - } else { - unknownIfArrays.add(column); - } - } + final Pair, Set> arrayUsage = + examineColumnSelectorFactoryArrays(columnSelectorFactory, exprDetails, columns); + final Set actualArrays = arrayUsage.lhs; + final Set unknownIfArrays = arrayUsage.rhs; + final ColumnValueSelector baseSelector = makeExprEvalSelector(columnSelectorFactory, expression); final boolean multiVal = actualArrays.size() > 0 || @@ -392,6 +372,11 @@ public void inspectRuntimeShape(RuntimeShapeInspector inspector) } } + /** + * Create {@link Expr.ObjectBinding} given a {@link ColumnSelectorFactory} and {@link Expr.BindingDetails} which + * provides the set of identifiers which need a binding (list of required columns), and context of whether or not they + * are used as array or scalar inputs + */ private static Expr.ObjectBinding createBindings( Expr.BindingDetails bindingDetails, ColumnSelectorFactory columnSelectorFactory @@ -472,7 +457,6 @@ private static Supplier makeNullableSupplier( } @VisibleForTesting - @Nonnull static Supplier supplierFromDimensionSelector(final DimensionSelector selector, boolean coerceArray) { Preconditions.checkNotNull(selector, "selector"); @@ -533,7 +517,6 @@ static Supplier supplierFromObjectSelector(final BaseObjectColumnValueSe } } - @Nonnull private static Object coerceListDimToStringArray(List val) { Object[] arrayVal = val.stream().map(Object::toString).toArray(String[]::new); @@ -542,4 +525,36 @@ private static Object coerceListDimToStringArray(List val) } return new String[]{null}; } + + /** + * Returns pair of columns which are definitely multi-valued, or 'actual' arrays, and those which we are unable to + * discern from the {@link ColumnSelectorFactory#getColumnCapabilities(String)}, or 'unknown' arrays. + */ + private static Pair, Set> examineColumnSelectorFactoryArrays( + ColumnSelectorFactory columnSelectorFactory, + Expr.BindingDetails exprDetails, + List columns + ) + { + final Set actualArrays = new HashSet<>(); + final Set unknownIfArrays = new HashSet<>(); + for (String column : columns) { + final ColumnCapabilities capabilities = columnSelectorFactory.getColumnCapabilities(column); + if (capabilities != null) { + if (capabilities.hasMultipleValues()) { + actualArrays.add(column); + } else if ( + !capabilities.isComplete() && + capabilities.getType().equals(ValueType.STRING) && + !exprDetails.getArrayVariables().contains(column) + ) { + unknownIfArrays.add(column); + } + } else { + unknownIfArrays.add(column); + } + } + + return new Pair<>(actualArrays, unknownIfArrays); + } } diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/OpportunisticMultiValueStringExpressionColumnValueSelector.java b/processing/src/main/java/org/apache/druid/segment/virtual/RowBasedExpressionColumnValueSelector.java similarity index 73% rename from processing/src/main/java/org/apache/druid/segment/virtual/OpportunisticMultiValueStringExpressionColumnValueSelector.java rename to processing/src/main/java/org/apache/druid/segment/virtual/RowBasedExpressionColumnValueSelector.java index e462bb4cd071..2237ceb68ad1 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/OpportunisticMultiValueStringExpressionColumnValueSelector.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/RowBasedExpressionColumnValueSelector.java @@ -31,14 +31,21 @@ import java.util.Set; import java.util.stream.Collectors; -public class OpportunisticMultiValueStringExpressionColumnValueSelector extends ExpressionColumnValueSelector +/** + * Expression column value selector that examines a set of 'unknown' type input bindings on a row by row basis, + * transforming the expression to handle multi-value list typed inputs as they are encountered. + * + * Currently, string dimensions are the only bindings which might appear as a {@link String} or a {@link String[]}, so + * numbers are eliminated from the set of 'unknown' bindings to check as they are encountered. + */ +public class RowBasedExpressionColumnValueSelector extends ExpressionColumnValueSelector { private final List unknownColumns; private final Expr.BindingDetails baseExprBindingDetails; private final Set ignoredColumns; private final Int2ObjectMap transformedCache; - public OpportunisticMultiValueStringExpressionColumnValueSelector( + public RowBasedExpressionColumnValueSelector( Expr expression, Expr.BindingDetails baseExprBindingDetails, Expr.ObjectBinding bindings, @@ -55,16 +62,20 @@ public OpportunisticMultiValueStringExpressionColumnValueSelector( @Override public ExprEval getObject() { + // check to find any arrays for this row List arrayBindings = unknownColumns.stream() .filter(x -> !baseExprBindingDetails.getArrayVariables().contains(x) && isBindingArray(x)) .collect(Collectors.toList()); + // eliminate anything that will never be an array if (ignoredColumns.size() > 0) { unknownColumns.removeAll(ignoredColumns); ignoredColumns.clear(); } + // if there are arrays, we need to transform the expression to one that applies each value of the array to the + // base expression, we keep a cache of transformed expressions to minimize extra work if (arrayBindings.size() > 0) { final int key = arrayBindings.hashCode(); if (transformedCache.containsKey(key)) { @@ -74,9 +85,14 @@ public ExprEval getObject() transformedCache.put(key, transformed); return transformed.eval(bindings); } + // no arrays for this row, evaluate base expression return expression.eval(bindings); } + /** + * Check if row value binding for identifier is an array, adding identifiers that retrieve {@link Number} to a set + * of 'unknowns' to eliminate by side effect + */ private boolean isBindingArray(String x) { Object binding = bindings.get(x); From e880a84451decaafbcfdd538ae93504a77ecefb4 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Wed, 5 Jun 2019 18:15:07 -0700 Subject: [PATCH 37/48] markdown fix --- docs/content/misc/math-expr.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/content/misc/math-expr.md b/docs/content/misc/math-expr.md index bfdb1108b353..48573603f892 100644 --- a/docs/content/misc/math-expr.md +++ b/docs/content/misc/math-expr.md @@ -64,7 +64,7 @@ The following built-in functions are available. |name|description| |----|-----------| -|cast|cast(expr,'LONG' or 'DOUBLE' or 'STRING') returns expr with specified type. exception can be thrown | +|cast|cast(expr,'LONG' or 'DOUBLE' or 'STRING' or 'LONG_ARRAY', or 'DOUBLE_ARRAY' or 'STRING_ARRAY') returns expr with specified type. exception can be thrown. Scalar types may be cast to array types and will take the form of a single element list (null will still be null). | |if|if(predicate,then,else) returns 'then' if 'predicate' evaluates to a positive number, otherwise it returns 'else' | |nvl|nvl(expr,expr-for-null) returns 'expr-for-null' if 'expr' is null (or empty string for string type) | |like|like(expr, pattern[, escape]) is equivalent to SQL `expr LIKE pattern`| @@ -169,8 +169,8 @@ See javadoc of java.lang.Math for detailed explanation for each function. | `array_overlap(arr1,arr2)` | returns true if arr1 and arr2 have any elements in common | | `array_offset_of(expr)` | returns the 0 based index of the first occurrence of expr in the array, or `null` if no matching elements exist in the array. | | `array_ordinal_of(expr)` | returns the 1 based index of the first occurrence of expr in the array, or `null` if no matching elements exist in the array. | -| `array_append(arr1,expr)` | appends expr to arr -| `array_concat(arr1,arr2)` | concatenates 2 arrays | +| `array_append(arr1,expr)` | appends expr to arr, the resulting array type determined by the type of the first array | +| `array_concat(arr1,arr2)` | concatenates 2 arrays, the resulting array type determined by the type of the first array | | `array_to_string(arr,str)` | joins all elements of arr by the delimiter specified by str | | `string_to_array(str1,str2)` | splits str1 into an array on the delimiter specified by str2 | From ebfb2ce69595aa21b77563e70c77636780a954ba Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Wed, 5 Jun 2019 18:27:11 -0700 Subject: [PATCH 38/48] adjustments --- .../ExpressionColumnValueSelector.java | 4 ++ .../segment/virtual/ExpressionSelectors.java | 37 +------------------ ...ultiValueExpressionDimensionSelector.java} | 32 +++++++++++++--- 3 files changed, 33 insertions(+), 40 deletions(-) rename processing/src/main/java/org/apache/druid/segment/virtual/{BaseMultiValueExpressionDimensionSelector.java => MultiValueExpressionDimensionSelector.java} (80%) diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionColumnValueSelector.java b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionColumnValueSelector.java index bd60ee9b4191..c0c3eafa1cac 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionColumnValueSelector.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionColumnValueSelector.java @@ -27,6 +27,10 @@ import javax.annotation.Nonnull; +/** + * Basic expression {@link ColumnValueSelector}. Evaluates {@link Expr} into {@link ExprEval} against + * {@link Expr.ObjectBinding} which are backed by the underlying expression input {@link ColumnValueSelector}s + */ public class ExpressionColumnValueSelector implements ColumnValueSelector { final Expr.ObjectBinding bindings; diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java index 3721523c3dbb..e340d36be891 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java @@ -180,7 +180,6 @@ public static ColumnValueSelector makeExprEvalSelector( finalExpr = expression; } - final Expr.ObjectBinding bindings = createBindings(exprDetails, columnSelectorFactory); if (bindings.equals(ExprUtils.nilBindings())) { @@ -257,39 +256,7 @@ public static DimensionSelector makeDimensionSelector( } else if (extractionFn == null) { if (multiVal) { - class MultiValueDimensionSelector extends BaseMultiValueExpressionDimensionSelector - { - private MultiValueDimensionSelector() - { - super(baseSelector); - } - - @Override - String getValue(ExprEval evaluated) - { - assert !evaluated.isArray(); - return NullHandling.emptyToNullIfNeeded(evaluated.asString()); - } - - @Override - List getArray(ExprEval evaluated) - { - assert evaluated.isArray(); - return Arrays.stream(evaluated.asStringArray()) - .map(NullHandling::emptyToNullIfNeeded) - .collect(Collectors.toList()); - } - - @Override - String getArrayValue(ExprEval evaluated, int i) - { - assert evaluated.isArray(); - String[] stringArray = evaluated.asStringArray(); - assert i < stringArray.length; - return NullHandling.emptyToNullIfNeeded(stringArray[i]); - } - } - return new MultiValueDimensionSelector(); + return new MultiValueExpressionDimensionSelector(baseSelector); } else { class DefaultExpressionDimensionSelector extends BaseSingleValueDimensionSelector { @@ -310,7 +277,7 @@ public void inspectRuntimeShape(RuntimeShapeInspector inspector) } } else { if (multiVal) { - class ExtractionMultiValueDimensionSelector extends BaseMultiValueExpressionDimensionSelector + class ExtractionMultiValueDimensionSelector extends MultiValueExpressionDimensionSelector { ExtractionMultiValueDimensionSelector() { diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/BaseMultiValueExpressionDimensionSelector.java b/processing/src/main/java/org/apache/druid/segment/virtual/MultiValueExpressionDimensionSelector.java similarity index 80% rename from processing/src/main/java/org/apache/druid/segment/virtual/BaseMultiValueExpressionDimensionSelector.java rename to processing/src/main/java/org/apache/druid/segment/virtual/MultiValueExpressionDimensionSelector.java index dfe486152f82..e3b5734ee62f 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/BaseMultiValueExpressionDimensionSelector.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/MultiValueExpressionDimensionSelector.java @@ -32,14 +32,20 @@ import org.apache.druid.segment.data.ZeroIndexedInts; import javax.annotation.Nullable; +import java.util.Arrays; import java.util.List; import java.util.Objects; +import java.util.stream.Collectors; -public abstract class BaseMultiValueExpressionDimensionSelector implements DimensionSelector +/** + * Basic multi-value dimension selector for an {@link org.apache.druid.math.expr.Expr} evaluating + * {@link ColumnValueSelector}. + */ +public class MultiValueExpressionDimensionSelector implements DimensionSelector { private final ColumnValueSelector baseSelector; - public BaseMultiValueExpressionDimensionSelector(ColumnValueSelector baseSelector) + public MultiValueExpressionDimensionSelector(ColumnValueSelector baseSelector) { this.baseSelector = baseSelector; } @@ -49,11 +55,27 @@ ExprEval getEvaluated() return baseSelector.getObject(); } - abstract String getValue(ExprEval evaluated); + String getValue(ExprEval evaluated) + { + assert !evaluated.isArray(); + return NullHandling.emptyToNullIfNeeded(evaluated.asString()); + } - abstract List getArray(ExprEval evaluated); + List getArray(ExprEval evaluated) + { + assert evaluated.isArray(); + return Arrays.stream(evaluated.asStringArray()) + .map(NullHandling::emptyToNullIfNeeded) + .collect(Collectors.toList()); + } - abstract String getArrayValue(ExprEval evaluated, int i); + String getArrayValue(ExprEval evaluated, int i) + { + assert evaluated.isArray(); + String[] stringArray = evaluated.asStringArray(); + assert i < stringArray.length; + return NullHandling.emptyToNullIfNeeded(stringArray[i]); + } @Override public IndexedInts getRow() From 4bfc7217839ce52ac0d7eda1c079f7a695900aa2 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Wed, 5 Jun 2019 18:33:49 -0700 Subject: [PATCH 39/48] more doc --- .../druid/segment/virtual/ExpressionSelectors.java | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java index e340d36be891..c948bd0d183b 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java @@ -279,7 +279,7 @@ public void inspectRuntimeShape(RuntimeShapeInspector inspector) if (multiVal) { class ExtractionMultiValueDimensionSelector extends MultiValueExpressionDimensionSelector { - ExtractionMultiValueDimensionSelector() + private ExtractionMultiValueDimensionSelector() { super(baseSelector); } @@ -423,6 +423,10 @@ private static Supplier makeNullableSupplier( } } + /** + * Create a supplier to feed {@link Expr.ObjectBinding} for a dimension selector, coercing values to always appear as + * arrays if specified. + */ @VisibleForTesting static Supplier supplierFromDimensionSelector(final DimensionSelector selector, boolean coerceArray) { @@ -447,6 +451,11 @@ static Supplier supplierFromDimensionSelector(final DimensionSelector se }; } + + /** + * Create a fallback supplier to feed {@link Expr.ObjectBinding} for a selector, used if column cannot be reliably + * detected as a primitive type + */ @Nullable static Supplier supplierFromObjectSelector(final BaseObjectColumnValueSelector selector) { @@ -484,6 +493,9 @@ static Supplier supplierFromObjectSelector(final BaseObjectColumnValueSe } } + /** + * Selectors are not consistent in treatment of null, [], and [null], so coerce [] to [null] + */ private static Object coerceListDimToStringArray(List val) { Object[] arrayVal = val.stream().map(Object::toString).toArray(String[]::new); From 9ed867c6c2828e0bef40e84b3d2dedb33987e6bf Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Thu, 6 Jun 2019 16:29:01 -0700 Subject: [PATCH 40/48] move initial filter out --- .../RowBasedExpressionColumnValueSelector.java | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/RowBasedExpressionColumnValueSelector.java b/processing/src/main/java/org/apache/druid/segment/virtual/RowBasedExpressionColumnValueSelector.java index 2237ceb68ad1..c0cdf4bcc9f1 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/RowBasedExpressionColumnValueSelector.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/RowBasedExpressionColumnValueSelector.java @@ -53,20 +53,19 @@ public RowBasedExpressionColumnValueSelector( ) { super(expression, bindings); - this.unknownColumns = new ArrayList<>(unknownColumnsSet); + this.unknownColumns = unknownColumnsSet.stream() + .filter(x -> !baseExprBindingDetails.getArrayVariables().contains(x)) + .collect(Collectors.toList()); this.baseExprBindingDetails = baseExprBindingDetails; this.ignoredColumns = new HashSet<>(); - this.transformedCache = new Int2ObjectArrayMap(unknownColumns.size()); + this.transformedCache = new Int2ObjectArrayMap<>(unknownColumns.size()); } @Override public ExprEval getObject() { // check to find any arrays for this row - List arrayBindings = - unknownColumns.stream() - .filter(x -> !baseExprBindingDetails.getArrayVariables().contains(x) && isBindingArray(x)) - .collect(Collectors.toList()); + List arrayBindings = unknownColumns.stream().filter(this::isBindingArray).collect(Collectors.toList()); // eliminate anything that will never be an array if (ignoredColumns.size() > 0) { From ee304ae0c7967497eaaae222623e8f5db7fdf6d3 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Thu, 6 Jun 2019 23:22:38 -0700 Subject: [PATCH 41/48] docs --- docs/content/misc/math-expr.md | 10 ++++++++-- .../virtual/RowBasedExpressionColumnValueSelector.java | 1 - 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/docs/content/misc/math-expr.md b/docs/content/misc/math-expr.md index 48573603f892..f42bbd4fe73c 100644 --- a/docs/content/misc/math-expr.md +++ b/docs/content/misc/math-expr.md @@ -55,8 +55,14 @@ For logical operators, a number is true if and only if it is positive (0 or nega type, it's the evaluation result of 'Boolean.valueOf(string)'. Multi-value string dimensions are supported and may be treated as either scalar or array typed values. When treated as -a scalar type, an expression will automatically be transformed to apply the scalar operation across all values of the -multi-valued type, to mimic Druid's native behavior. +a scalar type, an expression will automatically be transformed to apply the scalar operation across all values of the +multi-valued type, to mimic Druid's native behavior. Values that result in arrays will be coerced back into the native +Druid string type for aggregation. Druid aggregations on multi-value string dimensions on the individual values, _not_ +the 'array', behaving similar to the `unnest` operator available in many SQL dialects. However, by using the +`array_to_string` function, aggregations may be done on a stringified version of the complete array, allowing the +complete row to be preserved. Using `string_to_array` in an expression post-aggregator, allows transforming the +stringified dimension back into the true native array type. + The following built-in functions are available. diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/RowBasedExpressionColumnValueSelector.java b/processing/src/main/java/org/apache/druid/segment/virtual/RowBasedExpressionColumnValueSelector.java index c0cdf4bcc9f1..e34f26a606e7 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/RowBasedExpressionColumnValueSelector.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/RowBasedExpressionColumnValueSelector.java @@ -25,7 +25,6 @@ import org.apache.druid.math.expr.ExprEval; import org.apache.druid.math.expr.Parser; -import java.util.ArrayList; import java.util.HashSet; import java.util.List; import java.util.Set; From 520cac93daafc955e72496b07274eeda7f4cce4c Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Fri, 7 Jun 2019 04:07:14 -0700 Subject: [PATCH 42/48] map empty arg lambda, apply function argument validation --- .../apache/druid/math/expr/ApplyFunction.java | 84 +++++++++++++++++-- .../java/org/apache/druid/math/expr/Expr.java | 17 +++- .../druid/math/expr/ApplyFunctionTest.java | 26 +++++- 3 files changed, 115 insertions(+), 12 deletions(-) diff --git a/core/src/main/java/org/apache/druid/math/expr/ApplyFunction.java b/core/src/main/java/org/apache/druid/math/expr/ApplyFunction.java index 3853150e69a6..63b5daf44720 100644 --- a/core/src/main/java/org/apache/druid/math/expr/ApplyFunction.java +++ b/core/src/main/java/org/apache/druid/math/expr/ApplyFunction.java @@ -25,6 +25,7 @@ import it.unimi.dsi.fastutil.objects.Object2IntMap; import org.apache.druid.java.util.common.IAE; import org.apache.druid.java.util.common.RE; +import org.apache.druid.java.util.common.StringUtils; import javax.annotation.Nullable; import java.util.ArrayList; @@ -59,6 +60,8 @@ public interface ApplyFunction */ Set getArrayInputs(List args); + void validateArguments(LambdaExpr lambdaExpr, List args); + /** * Base class for "map" functions, which are a class of {@link ApplyFunction} which take a lambda function that is * mapped to the values of an {@link IndexableMapLambdaObjectBinding} which is created from the outer @@ -139,7 +142,6 @@ public String name() @Override public ExprEval apply(LambdaExpr lambdaExpr, List argsExpr, Expr.ObjectBinding bindings) { - Preconditions.checkArgument(argsExpr.size() == 1); Expr arrayExpr = argsExpr.get(0); ExprEval arrayEval = arrayExpr.eval(bindings); @@ -158,11 +160,22 @@ public ExprEval apply(LambdaExpr lambdaExpr, List argsExpr, Expr.ObjectBin @Override public Set getArrayInputs(List args) { - if (args.size() != 1) { - throw new IAE("ApplyFunction[%s] needs 1 argument", name()); + if (args.size() == 1) { + return ImmutableSet.of(args.get(0)); } + return Collections.emptySet(); + } - return ImmutableSet.of(args.get(0)); + @Override + public void validateArguments(LambdaExpr lambdaExpr, List args) + { + Preconditions.checkArgument(args.size() == 1); + if (lambdaExpr.identifierCount() > 0) { + Preconditions.checkArgument( + args.size() == lambdaExpr.identifierCount(), + StringUtils.format("lambda expression argument count does not match %s argument count", name()) + ); + } } } @@ -215,6 +228,18 @@ public Set getArrayInputs(List args) { return ImmutableSet.copyOf(args); } + + @Override + public void validateArguments(LambdaExpr lambdaExpr, List args) + { + Preconditions.checkArgument(args.size() > 0); + if (lambdaExpr.identifierCount() > 0) { + Preconditions.checkArgument( + args.size() == lambdaExpr.identifierCount(), + StringUtils.format("lambda expression argument count does not match %s argument count", name()) + ); + } + } } /** @@ -254,7 +279,6 @@ public String name() @Override public ExprEval apply(LambdaExpr lambdaExpr, List argsExpr, Expr.ObjectBinding bindings) { - Preconditions.checkArgument(argsExpr.size() == 2); Expr arrayExpr = argsExpr.get(0); Expr accExpr = argsExpr.get(1); @@ -277,6 +301,16 @@ public Set getArrayInputs(List args) // accumulator argument cannot currently be inferred, so ignore it until we think of something better to do return ImmutableSet.of(args.get(0)); } + + @Override + public void validateArguments(LambdaExpr lambdaExpr, List args) + { + Preconditions.checkArgument(args.size() == 2); + Preconditions.checkArgument( + args.size() == lambdaExpr.identifierCount(), + StringUtils.format("lambda expression argument count does not match %s argument count", name()) + ); + } } /** @@ -339,6 +373,15 @@ public Set getArrayInputs(List args) // accumulator argument cannot be inferred, so ignore it until we think of something better to do return ImmutableSet.copyOf(args.subList(0, args.size() - 1)); } + + @Override + public void validateArguments(LambdaExpr lambdaExpr, List args) + { + Preconditions.checkArgument( + args.size() == lambdaExpr.identifierCount(), + StringUtils.format("lambda expression argument count does not match %s argument count", name()) + ); + } } /** @@ -357,7 +400,6 @@ public String name() @Override public ExprEval apply(LambdaExpr lambdaExpr, List argsExpr, Expr.ObjectBinding bindings) { - Preconditions.checkArgument(argsExpr.size() == 1); Expr arrayExpr = argsExpr.get(0); ExprEval arrayEval = arrayExpr.eval(bindings); @@ -398,6 +440,16 @@ public Set getArrayInputs(List args) return ImmutableSet.of(args.get(0)); } + @Override + public void validateArguments(LambdaExpr lambdaExpr, List args) + { + Preconditions.checkArgument(args.size() == 1); + Preconditions.checkArgument( + args.size() == lambdaExpr.identifierCount(), + StringUtils.format("lambda expression argument count does not match %s argument count", name()) + ); + } + private Stream filter(T[] array, LambdaExpr expr, SettableLambdaBinding binding) { return Arrays.stream(array).filter(s -> expr.eval(binding.withBinding(expr.getIdentifier(), s)).asBoolean()); @@ -413,7 +465,6 @@ abstract class MatchFunction implements ApplyFunction @Override public ExprEval apply(LambdaExpr lambdaExpr, List argsExpr, Expr.ObjectBinding bindings) { - Preconditions.checkArgument(argsExpr.size() == 1); Expr arrayExpr = argsExpr.get(0); ExprEval arrayEval = arrayExpr.eval(bindings); @@ -436,6 +487,16 @@ public Set getArrayInputs(List args) return ImmutableSet.of(args.get(0)); } + @Override + public void validateArguments(LambdaExpr lambdaExpr, List args) + { + Preconditions.checkArgument(args.size() == 1); + Preconditions.checkArgument( + args.size() == lambdaExpr.identifierCount(), + StringUtils.format("lambda expression argument count does not match %s argument count", name()) + ); + } + public abstract ExprEval match(Object[] values, LambdaExpr expr, SettableLambdaBinding bindings); } @@ -547,22 +608,25 @@ interface IndexableMapLambdaObjectBinding extends Expr.ObjectBinding class MapLambdaBinding implements IndexableMapLambdaObjectBinding { private final Expr.ObjectBinding bindings; + @Nullable private final String lambdaIdentifier; private final Object[] arrayValues; private int index = 0; + private final boolean scoped; MapLambdaBinding(Object[] arrayValues, LambdaExpr expr, Expr.ObjectBinding bindings) { this.lambdaIdentifier = expr.getIdentifier(); this.arrayValues = arrayValues; this.bindings = bindings != null ? bindings : Collections.emptyMap()::get; + this.scoped = lambdaIdentifier != null; } @Nullable @Override public Object get(String name) { - if (name.equals(lambdaIdentifier)) { + if (scoped && name.equals(lambdaIdentifier)) { return arrayValues[index]; } return bindings.get(name); @@ -592,12 +656,14 @@ class CartesianMapLambdaBinding implements IndexableMapLambdaObjectBinding private final Expr.ObjectBinding bindings; private final Object2IntMap lambdaIdentifiers; private final List> lambdaInputs; + private final boolean scoped; private int index = 0; CartesianMapLambdaBinding(List> inputs, LambdaExpr expr, Expr.ObjectBinding bindings) { this.lambdaInputs = inputs; List ids = expr.getIdentifiers(); + this.scoped = ids.size() > 0; this.lambdaIdentifiers = new Object2IntArrayMap<>(ids.size()); for (int i = 0; i < ids.size(); i++) { lambdaIdentifiers.put(ids.get(i), i); @@ -610,7 +676,7 @@ class CartesianMapLambdaBinding implements IndexableMapLambdaObjectBinding @Override public Object get(String name) { - if (lambdaIdentifiers.containsKey(name)) { + if (scoped && lambdaIdentifiers.containsKey(name)) { return lambdaInputs.get(index).get(lambdaIdentifiers.getInt(name)); } return bindings.get(name); diff --git a/core/src/main/java/org/apache/druid/math/expr/Expr.java b/core/src/main/java/org/apache/druid/math/expr/Expr.java index 4becc13c933f..7d1e64489359 100644 --- a/core/src/main/java/org/apache/druid/math/expr/Expr.java +++ b/core/src/main/java/org/apache/druid/math/expr/Expr.java @@ -509,10 +509,19 @@ public String toString() return StringUtils.format("(%s -> %s)", args, expr); } + public int identifierCount() + { + return args.size(); + } + + @Nullable public String getIdentifier() { - Preconditions.checkState(args.size() == 1, "LambdaExpr has no or multiple arguments"); - return args.get(0).toString(); + Preconditions.checkState(args.size() < 2, "LambdaExpr has multiple arguments"); + if (args.size() == 1) { + return args.get(0).toString(); + } + return null; } public List getIdentifiers() @@ -661,6 +670,10 @@ class ApplyFunctionExpr implements Expr this.argsExpr = args; this.lambdaExpr = expr; + function.validateArguments(expr, args); + + // apply function expressions are examined during expression selector creation, so precompute and cache the + // binding details of children argsBindingDetails = new ArrayList<>(); BindingDetails accumulator = new BindingDetails(); for (Expr arg : argsExpr) { diff --git a/core/src/test/java/org/apache/druid/math/expr/ApplyFunctionTest.java b/core/src/test/java/org/apache/druid/math/expr/ApplyFunctionTest.java index 06f1520b0ec4..57c937df6ca3 100644 --- a/core/src/test/java/org/apache/druid/math/expr/ApplyFunctionTest.java +++ b/core/src/test/java/org/apache/druid/math/expr/ApplyFunctionTest.java @@ -23,12 +23,17 @@ import org.apache.druid.common.config.NullHandling; import org.junit.Assert; import org.junit.Before; +import org.junit.Rule; import org.junit.Test; +import org.junit.rules.ExpectedException; public class ApplyFunctionTest { private Expr.ObjectBinding bindings; + @Rule + public ExpectedException expectedException = ExpectedException.none(); + @Before public void setup() { @@ -59,6 +64,7 @@ public void testMap() assertExpr("map((x) -> x + 1, map((x) -> x + 1, [1, 2, 3, 4, 5]))", new Long[] {3L, 4L, 5L, 6L, 7L}); assertExpr("map((x) -> x + 1, map((x) -> x + 1, b))", new Long[] {3L, 4L, 5L, 6L, 7L}); + assertExpr("map(() -> 1, [1, 2, 3, 4, 5])", new Long[] {1L, 1L, 1L, 1L, 1L}); } @Test @@ -66,7 +72,7 @@ public void testCartesianMap() { assertExpr("cartesian_map((x, y) -> concat(x, y), ['foo', 'bar', 'baz', 'foobar'], ['bar', 'baz'])", new String[] {"foobar", "foobaz", "barbar", "barbaz", "bazbar", "bazbaz", "foobarbar", "foobarbaz"}); assertExpr("cartesian_map((x, y, z) -> concat(concat(x, y), z), ['foo', 'bar', 'baz', 'foobar'], ['bar', 'baz'], ['omg'])", new String[] {"foobaromg", "foobazomg", "barbaromg", "barbazomg", "bazbaromg", "bazbazomg", "foobarbaromg", "foobarbazomg"}); - + assertExpr("cartesian_map(() -> 1, [1, 2], [1, 2, 3])", new Long[] {1L, 1L, 1L, 1L, 1L, 1L}); assertExpr("cartesian_map((x, y) -> concat(x, y), d, d)", new String[] {null}); assertExpr("cartesian_map((x, y) -> concat(x, y), d, f)", new String[0]); if (NullHandling.replaceWithDefault()) { @@ -132,6 +138,24 @@ public void testScoping() assertExpr("fold((b, acc) -> acc + b, map(b -> b + 1, b), fold((b, acc) -> acc + b, map(b -> b + 1, b), 0) + fold((b, acc) -> acc + b, map(b -> b + 1, b), 0))", 60L); } + @Test + public void testInvalidArgCount() + { + expectedException.expect(IllegalArgumentException.class); + expectedException.expectMessage("lambda expression argument count does not match fold argument count"); + assertExpr("fold(() -> 1, [1, 1, 1, 1, 1], 0)", null); + + expectedException.expectMessage("lambda expression argument count does not match cartesian_fold argument count"); + assertExpr("cartesian_fold(() -> 1, [1, 1, 1, 1, 1], [1, 1], 0)", null); + + expectedException.expectMessage("lambda expression argument count does not match any argument count"); + assertExpr("any(() -> 1, [1, 2, 3, 4])", null); + + expectedException.expectMessage("lambda expression argument count does not match all argument count"); + assertExpr("all(() -> 0, [1, 2, 3, 4])", null); + + } + private void assertExpr(final String expression, final Object expectedResult) { final Expr expr = Parser.parse(expression, ExprMacroTable.nil()); From ff167d2e87d63e3c5f3853e0d4cfd28c9f12391b Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Fri, 7 Jun 2019 04:31:00 -0700 Subject: [PATCH 43/48] check function args at parse time instead of eval time --- .../java/org/apache/druid/math/expr/Expr.java | 1 + .../org/apache/druid/math/expr/Function.java | 279 ++++++++++++------ 2 files changed, 191 insertions(+), 89 deletions(-) diff --git a/core/src/main/java/org/apache/druid/math/expr/Expr.java b/core/src/main/java/org/apache/druid/math/expr/Expr.java index 7d1e64489359..e5028b482a7b 100644 --- a/core/src/main/java/org/apache/druid/math/expr/Expr.java +++ b/core/src/main/java/org/apache/druid/math/expr/Expr.java @@ -590,6 +590,7 @@ class FunctionExpr implements Expr this.function = function; this.name = name; this.args = args; + function.validateArguments(args); } @Override diff --git a/core/src/main/java/org/apache/druid/math/expr/Function.java b/core/src/main/java/org/apache/druid/math/expr/Function.java index 8dd1614a7dc2..283ec526ec91 100644 --- a/core/src/main/java/org/apache/druid/math/expr/Function.java +++ b/core/src/main/java/org/apache/druid/math/expr/Function.java @@ -75,17 +75,27 @@ default Set getArrayInputs(List args) return Collections.emptySet(); } + /** + * Validate function arguments + */ + void validateArguments(List args); + /** * Base class for a single variable input {@link Function} implementation */ abstract class UnivariateFunction implements Function { @Override - public ExprEval apply(List args, Expr.ObjectBinding bindings) + public void validateArguments(List args) { if (args.size() != 1) { throw new IAE("Function[%s] needs 1 argument", name()); } + } + + @Override + public ExprEval apply(List args, Expr.ObjectBinding bindings) + { Expr expr = args.get(0); return eval(expr.eval(bindings)); } @@ -99,11 +109,16 @@ public ExprEval apply(List args, Expr.ObjectBinding bindings) abstract class BivariateFunction implements Function { @Override - public ExprEval apply(List args, Expr.ObjectBinding bindings) + public void validateArguments(List args) { if (args.size() != 2) { throw new IAE("Function[%s] needs 2 arguments", name()); } + } + + @Override + public ExprEval apply(List args, Expr.ObjectBinding bindings) + { Expr expr1 = args.get(0); Expr expr2 = args.get(1); return eval(expr1.eval(bindings), expr2.eval(bindings)); @@ -199,7 +214,8 @@ protected final ExprEval eval(ExprEval x, ExprEval y) */ abstract class ArrayScalarFunction implements Function { - void validateArguments(List args) + @Override + public void validateArguments(List args) { if (args.size() != 2) { throw new IAE("Function[%s] needs 2 argument", name()); @@ -209,7 +225,6 @@ void validateArguments(List args) @Override public Set getScalarInputs(List args) { - validateArguments(args); return ImmutableSet.of(args.get(1)); } @@ -222,7 +237,6 @@ public Set getArrayInputs(List args) @Override public ExprEval apply(List args, Expr.ObjectBinding bindings) { - validateArguments(args); final ExprEval arrayExpr = args.get(0).eval(bindings); final ExprEval scalarExpr = args.get(1).eval(bindings); if (arrayExpr.asArray() == null) { @@ -239,7 +253,8 @@ public ExprEval apply(List args, Expr.ObjectBinding bindings) */ abstract class ArraysFunction implements Function { - void validateArguments(List args) + @Override + public void validateArguments(List args) { if (args.size() != 2) { throw new IAE("Function[%s] needs 2 argument", name()); @@ -255,14 +270,12 @@ public Set getScalarInputs(List args) @Override public Set getArrayInputs(List args) { - validateArguments(args); return ImmutableSet.copyOf(args); } @Override public ExprEval apply(List args, Expr.ObjectBinding bindings) { - validateArguments(args); final ExprEval arrayExpr1 = args.get(0).eval(bindings); final ExprEval arrayExpr2 = args.get(1).eval(bindings); @@ -287,16 +300,17 @@ public String name() } @Override - public ExprEval apply(List args, Expr.ObjectBinding bindings) + public void validateArguments(List args) { - final int radix; - if (args.size() == 1) { - radix = 10; - } else if (args.size() == 2) { - radix = args.get(1).eval(bindings).asInt(); - } else { + if (args.size() != 1 && args.size() != 2) { throw new IAE("Function[%s] needs 1 or 2 arguments", name()); } + } + + @Override + public ExprEval apply(List args, Expr.ObjectBinding bindings) + { + final int radix = args.size() == 1 ? 10 : args.get(1).eval(bindings).asInt(); final String input = NullHandling.nullToEmptyIfNeeded(args.get(0).eval(bindings).asString()); if (input == null) { @@ -332,12 +346,16 @@ public String name() @Override public ExprEval apply(List args, Expr.ObjectBinding bindings) + { + return ExprEval.of(PI); + } + + @Override + public void validateArguments(List args) { if (args.size() > 0) { throw new IAE("Function[%s] needs 0 argument", name()); } - - return ExprEval.of(PI); } } @@ -649,10 +667,6 @@ public String name() @Override public ExprEval apply(List args, Expr.ObjectBinding bindings) { - if (args.size() != 1 && args.size() != 2) { - throw new IAE("Function[%s] needs 1 or 2 arguments", name()); - } - ExprEval value1 = args.get(0).eval(bindings); if (value1.type() != ExprType.LONG && value1.type() != ExprType.DOUBLE) { throw new IAE("The first argument to the function[%s] should be integer or double type but get the %s type", name(), value1.type()); @@ -669,6 +683,14 @@ public ExprEval apply(List args, Expr.ObjectBinding bindings) } } + @Override + public void validateArguments(List args) + { + if (args.size() != 1 && args.size() != 2) { + throw new IAE("Function[%s] needs 1 or 2 arguments", name()); + } + } + private ExprEval eval(ExprEval param) { return eval(param, 0); @@ -978,13 +1000,17 @@ public String name() @Override public ExprEval apply(List args, Expr.ObjectBinding bindings) + { + ExprEval x = args.get(0).eval(bindings); + return x.asBoolean() ? args.get(1).eval(bindings) : args.get(2).eval(bindings); + } + + @Override + public void validateArguments(List args) { if (args.size() != 3) { throw new IAE("Function[%s] needs 3 arguments", name()); } - - ExprEval x = args.get(0).eval(bindings); - return x.asBoolean() ? args.get(1).eval(bindings) : args.get(2).eval(bindings); } } @@ -1002,10 +1028,6 @@ public String name() @Override public ExprEval apply(final List args, final Expr.ObjectBinding bindings) { - if (args.size() < 2) { - throw new IAE("Function[%s] must have at least 2 arguments", name()); - } - for (int i = 0; i < args.size(); i += 2) { if (i == args.size() - 1) { // ELSE else_result. @@ -1018,6 +1040,14 @@ public ExprEval apply(final List args, final Expr.ObjectBinding bindings) return ExprEval.of(null); } + + @Override + public void validateArguments(List args) + { + if (args.size() < 2) { + throw new IAE("Function[%s] must have at least 2 arguments", name()); + } + } } /** @@ -1034,10 +1064,6 @@ public String name() @Override public ExprEval apply(final List args, final Expr.ObjectBinding bindings) { - if (args.size() < 3) { - throw new IAE("Function[%s] must have at least 3 arguments", name()); - } - for (int i = 1; i < args.size(); i += 2) { if (i == args.size() - 1) { // ELSE else_result. @@ -1050,6 +1076,14 @@ public ExprEval apply(final List args, final Expr.ObjectBinding bindings) return ExprEval.of(null); } + + @Override + public void validateArguments(List args) + { + if (args.size() < 3) { + throw new IAE("Function[%s] must have at least 3 arguments", name()); + } + } } class CastFunc extends BivariateFunction @@ -1124,9 +1158,6 @@ public String name() @Override public ExprEval apply(List args, Expr.ObjectBinding bindings) { - if (args.size() != 1 && args.size() != 2) { - throw new IAE("Function[%s] needs 1 or 2 arguments", name()); - } ExprEval value = args.get(0).eval(bindings); if (value.type() != ExprType.STRING) { throw new IAE("first argument should be string type but got %s type", value.type()); @@ -1150,6 +1181,14 @@ public ExprEval apply(List args, Expr.ObjectBinding bindings) return toValue(date); } + @Override + public void validateArguments(List args) + { + if (args.size() != 1 && args.size() != 2) { + throw new IAE("Function[%s] needs 1 or 2 arguments", name()); + } + } + protected ExprEval toValue(DateTime date) { return ExprEval.of(date.getMillis()); @@ -1181,12 +1220,17 @@ public String name() @Override public ExprEval apply(List args, Expr.ObjectBinding bindings) + { + final ExprEval eval = args.get(0).eval(bindings); + return eval.value() == null ? args.get(1).eval(bindings) : eval; + } + + @Override + public void validateArguments(List args) { if (args.size() != 2) { throw new IAE("Function[%s] needs 2 arguments", name()); } - final ExprEval eval = args.get(0).eval(bindings); - return eval.value() == null ? args.get(1).eval(bindings) : eval; } } @@ -1225,6 +1269,12 @@ public ExprEval apply(List args, Expr.ObjectBinding bindings) return ExprEval.of(builder.toString()); } } + + @Override + public void validateArguments(List args) + { + // anything goes + } } class StrlenFunc implements Function @@ -1237,13 +1287,17 @@ public String name() @Override public ExprEval apply(List args, Expr.ObjectBinding bindings) + { + final String arg = args.get(0).eval(bindings).asString(); + return arg == null ? ExprEval.ofLong(NullHandling.defaultLongValue()) : ExprEval.of(arg.length()); + } + + @Override + public void validateArguments(List args) { if (args.size() != 1) { throw new IAE("Function[%s] needs 1 argument", name()); } - - final String arg = args.get(0).eval(bindings).asString(); - return arg == null ? ExprEval.ofLong(NullHandling.defaultLongValue()) : ExprEval.of(arg.length()); } } @@ -1258,10 +1312,6 @@ public String name() @Override public ExprEval apply(List args, Expr.ObjectBinding bindings) { - if (args.size() < 1) { - throw new IAE("Function[%s] needs 1 or more arguments", name()); - } - final String formatString = NullHandling.nullToEmptyIfNeeded(args.get(0).eval(bindings).asString()); if (formatString == null) { @@ -1275,6 +1325,14 @@ public ExprEval apply(List args, Expr.ObjectBinding bindings) return ExprEval.of(StringUtils.nonStrictFormat(formatString, formatArgs)); } + + @Override + public void validateArguments(List args) + { + if (args.size() < 1) { + throw new IAE("Function[%s] needs 1 or more arguments", name()); + } + } } class StrposFunc implements Function @@ -1288,10 +1346,6 @@ public String name() @Override public ExprEval apply(List args, Expr.ObjectBinding bindings) { - if (args.size() < 2 || args.size() > 3) { - throw new IAE("Function[%s] needs 2 or 3 arguments", name()); - } - final String haystack = NullHandling.nullToEmptyIfNeeded(args.get(0).eval(bindings).asString()); final String needle = NullHandling.nullToEmptyIfNeeded(args.get(1).eval(bindings).asString()); @@ -1309,6 +1363,14 @@ public ExprEval apply(List args, Expr.ObjectBinding bindings) return ExprEval.of(haystack.indexOf(needle, fromIndex)); } + + @Override + public void validateArguments(List args) + { + if (args.size() < 2 || args.size() > 3) { + throw new IAE("Function[%s] needs 2 or 3 arguments", name()); + } + } } class SubstringFunc implements Function @@ -1322,10 +1384,6 @@ public String name() @Override public ExprEval apply(List args, Expr.ObjectBinding bindings) { - if (args.size() != 3) { - throw new IAE("Function[%s] needs 3 arguments", name()); - } - final String arg = args.get(0).eval(bindings).asString(); if (arg == null) { @@ -1348,6 +1406,14 @@ public ExprEval apply(List args, Expr.ObjectBinding bindings) return ExprEval.of(NullHandling.defaultStringValue()); } } + + @Override + public void validateArguments(List args) + { + if (args.size() != 3) { + throw new IAE("Function[%s] needs 3 arguments", name()); + } + } } class RightFunc extends StringLongFunction @@ -1404,10 +1470,6 @@ public String name() @Override public ExprEval apply(List args, Expr.ObjectBinding bindings) { - if (args.size() != 3) { - throw new IAE("Function[%s] needs 3 arguments", name()); - } - final String arg = args.get(0).eval(bindings).asString(); final String pattern = NullHandling.nullToEmptyIfNeeded(args.get(1).eval(bindings).asString()); final String replacement = NullHandling.nullToEmptyIfNeeded(args.get(2).eval(bindings).asString()); @@ -1416,6 +1478,14 @@ public ExprEval apply(List args, Expr.ObjectBinding bindings) } return ExprEval.of(StringUtils.replace(arg, pattern, replacement)); } + + @Override + public void validateArguments(List args) + { + if (args.size() != 3) { + throw new IAE("Function[%s] needs 3 arguments", name()); + } + } } class LowerFunc implements Function @@ -1429,16 +1499,20 @@ public String name() @Override public ExprEval apply(List args, Expr.ObjectBinding bindings) { - if (args.size() != 1) { - throw new IAE("Function[%s] needs 1 argument", name()); - } - final String arg = args.get(0).eval(bindings).asString(); if (arg == null) { return ExprEval.of(NullHandling.defaultStringValue()); } return ExprEval.of(StringUtils.toLowerCase(arg)); } + + @Override + public void validateArguments(List args) + { + if (args.size() != 1) { + throw new IAE("Function[%s] needs 1 argument", name()); + } + } } class UpperFunc implements Function @@ -1452,16 +1526,20 @@ public String name() @Override public ExprEval apply(List args, Expr.ObjectBinding bindings) { - if (args.size() != 1) { - throw new IAE("Function[%s] needs 1 argument", name()); - } - final String arg = args.get(0).eval(bindings).asString(); if (arg == null) { return ExprEval.of(NullHandling.defaultStringValue()); } return ExprEval.of(StringUtils.toUpperCase(arg)); } + + @Override + public void validateArguments(List args) + { + if (args.size() != 1) { + throw new IAE("Function[%s] needs 1 argument", name()); + } + } } class ReverseFunc extends UnivariateFunction @@ -1511,13 +1589,17 @@ public String name() @Override public ExprEval apply(List args, Expr.ObjectBinding bindings) + { + final ExprEval expr = args.get(0).eval(bindings); + return ExprEval.of(expr.value() == null, ExprType.LONG); + } + + @Override + public void validateArguments(List args) { if (args.size() != 1) { throw new IAE("Function[%s] needs 1 argument", name()); } - - final ExprEval expr = args.get(0).eval(bindings); - return ExprEval.of(expr.value() == null, ExprType.LONG); } } @@ -1531,13 +1613,17 @@ public String name() @Override public ExprEval apply(List args, Expr.ObjectBinding bindings) + { + final ExprEval expr = args.get(0).eval(bindings); + return ExprEval.of(expr.value() != null, ExprType.LONG); + } + + @Override + public void validateArguments(List args) { if (args.size() != 1) { throw new IAE("Function[%s] needs 1 argument", name()); } - - final ExprEval expr = args.get(0).eval(bindings); - return ExprEval.of(expr.value() != null, ExprType.LONG); } } @@ -1552,10 +1638,6 @@ public String name() @Override public ExprEval apply(List args, Expr.ObjectBinding bindings) { - if (args.size() != 3) { - throw new IAE("Function[%s] needs 3 arguments", name()); - } - String base = args.get(0).eval(bindings).asString(); int len = args.get(1).eval(bindings).asInt(); String pad = args.get(2).eval(bindings).asString(); @@ -1567,6 +1649,14 @@ public ExprEval apply(List args, Expr.ObjectBinding bindings) } } + + @Override + public void validateArguments(List args) + { + if (args.size() != 3) { + throw new IAE("Function[%s] needs 3 arguments", name()); + } + } } class RpadFunc implements Function @@ -1580,10 +1670,6 @@ public String name() @Override public ExprEval apply(List args, Expr.ObjectBinding bindings) { - if (args.size() != 3) { - throw new IAE("Function[%s] needs 3 arguments", name()); - } - String base = args.get(0).eval(bindings).asString(); int len = args.get(1).eval(bindings).asInt(); String pad = args.get(2).eval(bindings).asString(); @@ -1595,6 +1681,14 @@ public ExprEval apply(List args, Expr.ObjectBinding bindings) } } + + @Override + public void validateArguments(List args) + { + if (args.size() != 3) { + throw new IAE("Function[%s] needs 3 arguments", name()); + } + } } class SubMonthFunc implements Function @@ -1608,10 +1702,6 @@ public String name() @Override public ExprEval apply(List args, Expr.ObjectBinding bindings) { - if (args.size() != 3) { - throw new IAE("Function[%s] needs 3 arguments", name()); - } - Long left = args.get(0).eval(bindings).asLong(); Long right = args.get(1).eval(bindings).asLong(); DateTimeZone timeZone = DateTimes.inferTzFromString(args.get(2).eval(bindings).asString()); @@ -1623,6 +1713,14 @@ public ExprEval apply(List args, Expr.ObjectBinding bindings) } } + + @Override + public void validateArguments(List args) + { + if (args.size() != 3) { + throw new IAE("Function[%s] needs 3 arguments", name()); + } + } } class ArrayLengthFunction implements Function @@ -1636,9 +1734,6 @@ public String name() @Override public ExprEval apply(List args, Expr.ObjectBinding bindings) { - if (args.size() != 1) { - throw new IAE("Function[%s] needs 1 argument", name()); - } final ExprEval expr = args.get(0).eval(bindings); final Object[] array = expr.asArray(); if (array == null) { @@ -1658,6 +1753,14 @@ public Set getArrayInputs(List args) return ImmutableSet.of(args.get(0)); } + @Override + public void validateArguments(List args) + { + if (args.size() != 1) { + throw new IAE("Function[%s] needs 1 argument", name()); + } + } + @Override public Set getScalarInputs(List args) { @@ -1673,7 +1776,8 @@ public String name() return "string_to_array"; } - void validateArguments(List args) + @Override + public void validateArguments(List args) { if (args.size() != 2) { throw new IAE("Function[%s] needs 2 argument", name()); @@ -1683,8 +1787,6 @@ void validateArguments(List args) @Override public ExprEval apply(List args, Expr.ObjectBinding bindings) { - validateArguments(args); - final ExprEval expr = args.get(0).eval(bindings); final String arrayString = expr.asString(); if (arrayString == null) { @@ -1698,7 +1800,6 @@ public ExprEval apply(List args, Expr.ObjectBinding bindings) @Override public Set getScalarInputs(List args) { - validateArguments(args); return ImmutableSet.copyOf(args); } } From d0bd362b2eab35602c0316b50e575262cc9067b0 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Mon, 10 Jun 2019 15:14:06 -0700 Subject: [PATCH 44/48] more immutable --- .../java/org/apache/druid/math/expr/Expr.java | 35 ++++++++++--------- .../apache/druid/math/expr/ParserTest.java | 7 ++++ 2 files changed, 26 insertions(+), 16 deletions(-) diff --git a/core/src/main/java/org/apache/druid/math/expr/Expr.java b/core/src/main/java/org/apache/druid/math/expr/Expr.java index e5028b482a7b..2396d4664bde 100644 --- a/core/src/main/java/org/apache/druid/math/expr/Expr.java +++ b/core/src/main/java/org/apache/druid/math/expr/Expr.java @@ -20,6 +20,7 @@ package org.apache.druid.math.expr; import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Sets; import com.google.common.math.LongMath; @@ -41,7 +42,8 @@ import java.util.stream.Collectors; /** - * Base interface of Druid expression language abstract syntax tree nodes + * Base interface of Druid expression language abstract syntax tree nodes. All {@link Expr} implementations are expected + * to be immutable. */ public interface Expr { @@ -146,9 +148,9 @@ interface Shuttle */ class BindingDetails { - private final Set freeVariables; - private final Set scalarVariables; - private final Set arrayVariables; + private final ImmutableSet freeVariables; + private final ImmutableSet scalarVariables; + private final ImmutableSet arrayVariables; public BindingDetails() { @@ -162,9 +164,9 @@ public BindingDetails(String identifier) public BindingDetails(Set freeVariables, Set scalarVariables, Set arrayVariables) { - this.freeVariables = freeVariables; - this.scalarVariables = scalarVariables; - this.arrayVariables = arrayVariables; + this.freeVariables = ImmutableSet.copyOf(freeVariables); + this.scalarVariables = ImmutableSet.copyOf(scalarVariables); + this.arrayVariables = ImmutableSet.copyOf(arrayVariables); } /** @@ -494,12 +496,12 @@ public Expr visit(Shuttle shuttle) class LambdaExpr implements Expr { - private final List args; + private final ImmutableList args; private final Expr expr; LambdaExpr(List args, Expr expr) { - this.args = args; + this.args = ImmutableList.copyOf(args); this.expr = expr; } @@ -583,13 +585,13 @@ class FunctionExpr implements Expr { final Function function; final String name; - final List args; + final ImmutableList args; FunctionExpr(Function function, String name, List args) { this.function = function; this.name = name; - this.args = args; + this.args = ImmutableList.copyOf(args); function.validateArguments(args); } @@ -659,27 +661,27 @@ class ApplyFunctionExpr implements Expr final ApplyFunction function; final String name; final LambdaExpr lambdaExpr; - final List argsExpr; + final ImmutableList argsExpr; final BindingDetails bindingDetails; final BindingDetails lambdaBindingDetails; - final List argsBindingDetails; + final ImmutableList argsBindingDetails; ApplyFunctionExpr(ApplyFunction function, String name, LambdaExpr expr, List args) { this.function = function; this.name = name; - this.argsExpr = args; + this.argsExpr = ImmutableList.copyOf(args); this.lambdaExpr = expr; function.validateArguments(expr, args); // apply function expressions are examined during expression selector creation, so precompute and cache the // binding details of children - argsBindingDetails = new ArrayList<>(); + ImmutableList.Builder argBindingDetailsBuilder = ImmutableList.builder(); BindingDetails accumulator = new BindingDetails(); for (Expr arg : argsExpr) { BindingDetails argDetails = arg.analyzeInputs(); - argsBindingDetails.add(argDetails); + argBindingDetailsBuilder.add(argDetails); accumulator = accumulator.merge(argDetails); } @@ -695,6 +697,7 @@ class ApplyFunctionExpr implements Expr lambdaBindingDetails = lambdaExpr.analyzeInputs(); bindingDetails = accumulator.merge(lambdaBindingDetails).mergeWithArrays(arrayVariables); + argsBindingDetails = argBindingDetailsBuilder.build(); } @Override diff --git a/core/src/test/java/org/apache/druid/math/expr/ParserTest.java b/core/src/test/java/org/apache/druid/math/expr/ParserTest.java index 772b1c3090f0..aa40eb51ccfa 100644 --- a/core/src/test/java/org/apache/druid/math/expr/ParserTest.java +++ b/core/src/test/java/org/apache/druid/math/expr/ParserTest.java @@ -249,6 +249,13 @@ public void testFunctions() @Test public void testApplyFunctions() { + validateParser( + "map(() -> 1, x)", + "(map ([] -> 1), [x])", + ImmutableList.of("x"), + ImmutableSet.of(), + ImmutableSet.of("x") + ); validateParser( "map((x) -> x + 1, x)", "(map ([x] -> (+ x 1)), [x])", From 48ebd0fbdb80e9da70e6ca801a7f11a2eacf24e0 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Mon, 10 Jun 2019 15:20:54 -0700 Subject: [PATCH 45/48] more more immutable --- .../apache/druid/math/expr/ApplyFunction.java | 2 +- .../java/org/apache/druid/math/expr/Expr.java | 17 ++++++++--------- .../org/apache/druid/math/expr/Function.java | 3 ++- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/core/src/main/java/org/apache/druid/math/expr/ApplyFunction.java b/core/src/main/java/org/apache/druid/math/expr/ApplyFunction.java index 63b5daf44720..f50fe8eb4b42 100644 --- a/core/src/main/java/org/apache/druid/math/expr/ApplyFunction.java +++ b/core/src/main/java/org/apache/druid/math/expr/ApplyFunction.java @@ -39,7 +39,7 @@ /** * Base interface describing the mechanism used to evaluate an {@link ApplyFunctionExpr}, which 'applies' a - * {@link LambdaExpr} to one or more array {@link Expr} + * {@link LambdaExpr} to one or more array {@link Expr}. All {@link ApplyFunction} implementations are immutable. */ public interface ApplyFunction { diff --git a/core/src/main/java/org/apache/druid/math/expr/Expr.java b/core/src/main/java/org/apache/druid/math/expr/Expr.java index 2396d4664bde..ee5ff9689e77 100644 --- a/core/src/main/java/org/apache/druid/math/expr/Expr.java +++ b/core/src/main/java/org/apache/druid/math/expr/Expr.java @@ -32,7 +32,6 @@ import org.apache.druid.java.util.common.guava.Comparators; import javax.annotation.Nullable; -import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.HashSet; @@ -42,8 +41,8 @@ import java.util.stream.Collectors; /** - * Base interface of Druid expression language abstract syntax tree nodes. All {@link Expr} implementations are expected - * to be immutable. + * Base interface of Druid expression language abstract syntax tree nodes. All {@link Expr} implementations are + * immutable. */ public interface Expr { @@ -172,15 +171,15 @@ public BindingDetails(Set freeVariables, Set scalarVariables, Se /** * Get the list of required column inputs to evaluate an expression */ - public List getRequiredColumns() + public ImmutableList getRequiredColumns() { - return new ArrayList<>(freeVariables); + return ImmutableList.copyOf(freeVariables); } /** * Total set of 'free' identifiers of an {@link Expr}, that are not supplied by a {@link LambdaExpr} binding */ - public Set getFreeVariables() + public ImmutableSet getFreeVariables() { return freeVariables; } @@ -188,7 +187,7 @@ public Set getFreeVariables() /** * Set of identifiers which are used with scalar operators and functions */ - public Set getScalarVariables() + public ImmutableSet getScalarVariables() { return scalarVariables; } @@ -196,7 +195,7 @@ public Set getScalarVariables() /** * Set of identifiers which are used with array typed functions and apply functions. */ - public Set getArrayVariables() + public ImmutableSet getArrayVariables() { return arrayVariables; } @@ -531,7 +530,7 @@ public List getIdentifiers() return args.stream().map(IdentifierExpr::toString).collect(Collectors.toList()); } - public List getIdentifierExprs() + public ImmutableList getIdentifierExprs() { return args; } diff --git a/core/src/main/java/org/apache/druid/math/expr/Function.java b/core/src/main/java/org/apache/druid/math/expr/Function.java index 283ec526ec91..65643e226d77 100644 --- a/core/src/main/java/org/apache/druid/math/expr/Function.java +++ b/core/src/main/java/org/apache/druid/math/expr/Function.java @@ -41,7 +41,8 @@ import java.util.stream.Stream; /** - * Base interface describing the mechanism used to evaluate a {@link FunctionExpr} + * Base interface describing the mechanism used to evaluate a {@link FunctionExpr}. All {@link Function} implementations + * are immutable. * * Do NOT remove "unused" members in this class. They are used by generated Antlr */ From c9b304b361bcbd1a0122ea918e0ad58f7011071d Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Mon, 10 Jun 2019 15:28:26 -0700 Subject: [PATCH 46/48] clarify grammar --- core/src/main/antlr4/org/apache/druid/math/expr/antlr/Expr.g4 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/main/antlr4/org/apache/druid/math/expr/antlr/Expr.g4 b/core/src/main/antlr4/org/apache/druid/math/expr/antlr/Expr.g4 index d193e81d7906..aacbbe9d4290 100644 --- a/core/src/main/antlr4/org/apache/druid/math/expr/antlr/Expr.g4 +++ b/core/src/main/antlr4/org/apache/druid/math/expr/antlr/Expr.g4 @@ -35,7 +35,7 @@ expr : 'null' # null | '[]' # emptyArray ; -lambda : (IDENTIFIER | '(' (IDENTIFIER (',' IDENTIFIER)*)? ')') '->' expr +lambda : (IDENTIFIER | '(' ')' | '(' IDENTIFIER (',' IDENTIFIER)* ')') '->' expr ; fnArgs : expr (',' expr)* # functionArgs From 5032099d2b7bb6aa8bf3d009eadd46fb02e00002 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Tue, 18 Jun 2019 13:39:26 -0700 Subject: [PATCH 47/48] fix docs --- docs/content/misc/math-expr.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/content/misc/math-expr.md b/docs/content/misc/math-expr.md index f42bbd4fe73c..57427a988c35 100644 --- a/docs/content/misc/math-expr.md +++ b/docs/content/misc/math-expr.md @@ -173,8 +173,8 @@ See javadoc of java.lang.Math for detailed explanation for each function. | `array_ordinal(arr,long)` | returns the array element at the 1 based index supplied, or null for an out of range index | | `array_contains(arr,expr)` | returns true if the array contains the element specified by expr, or contains all elements specified by expr if expr is an array | | `array_overlap(arr1,arr2)` | returns true if arr1 and arr2 have any elements in common | -| `array_offset_of(expr)` | returns the 0 based index of the first occurrence of expr in the array, or `null` if no matching elements exist in the array. | -| `array_ordinal_of(expr)` | returns the 1 based index of the first occurrence of expr in the array, or `null` if no matching elements exist in the array. | +| `array_offset_of(arr,expr)` | returns the 0 based index of the first occurrence of expr in the array, or `null` if no matching elements exist in the array. | +| `array_ordinal_of(arr,expr)` | returns the 1 based index of the first occurrence of expr in the array, or `null` if no matching elements exist in the array. | | `array_append(arr1,expr)` | appends expr to arr, the resulting array type determined by the type of the first array | | `array_concat(arr1,arr2)` | concatenates 2 arrays, the resulting array type determined by the type of the first array | | `array_to_string(arr,str)` | joins all elements of arr by the delimiter specified by str | From 4aae1d8f416f4632dc8d97ffd9cb31fb72184016 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Wed, 19 Jun 2019 01:31:39 -0700 Subject: [PATCH 48/48] empty array is string test, we need a way to make arrays better maybe in the future, or define empty arrays as other types.. --- core/src/test/java/org/apache/druid/math/expr/FunctionTest.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/core/src/test/java/org/apache/druid/math/expr/FunctionTest.java b/core/src/test/java/org/apache/druid/math/expr/FunctionTest.java index 1e7a2c870b4e..ec0884cefcf4 100644 --- a/core/src/test/java/org/apache/druid/math/expr/FunctionTest.java +++ b/core/src/test/java/org/apache/druid/math/expr/FunctionTest.java @@ -220,6 +220,8 @@ public void testArrayAppend() { assertExpr("array_append([1, 2, 3], 4)", new Long[]{1L, 2L, 3L, 4L}); assertExpr("array_append([1, 2, 3], 'bar')", new Long[]{1L, 2L, 3L, null}); + assertExpr("array_append([], 1)", new String[]{"1"}); + assertExpr("array_append(cast([], 'LONG_ARRAY'), 1)", new Long[]{1L}); } @Test