-
Notifications
You must be signed in to change notification settings - Fork 3.8k
add computed Expr output types #10370
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
834cb13
472918e
28c0588
57282df
3b5a1fe
8fc1253
2c6fc87
c36afdf
d2d7f9a
8d64ecc
ee4306c
b09b24d
9ff744b
40d0913
5d99b3d
b0b76aa
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -23,6 +23,7 @@ | |
| import com.google.common.collect.ImmutableSet; | ||
| import it.unimi.dsi.fastutil.objects.Object2IntArrayMap; | ||
| import it.unimi.dsi.fastutil.objects.Object2IntMap; | ||
| import it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap; | ||
| import org.apache.druid.java.util.common.IAE; | ||
| import org.apache.druid.java.util.common.RE; | ||
| import org.apache.druid.java.util.common.StringUtils; | ||
|
|
@@ -74,6 +75,15 @@ default boolean hasArrayOutput(LambdaExpr lambdaExpr) | |
| */ | ||
| void validateArguments(LambdaExpr lambdaExpr, List<Expr> args); | ||
|
|
||
| /** | ||
| * Compute the output type of this function for a given lambda and the argument expressions which will be applied as | ||
| * its inputs. | ||
| * | ||
| * @see Expr#getOutputType | ||
| */ | ||
| @Nullable | ||
| ExprType getOutputType(Expr.InputBindingTypes inputTypes, LambdaExpr expr, List<Expr> args); | ||
|
|
||
| /** | ||
| * Base class for "map" functions, which are a class of {@link ApplyFunction} which take a lambda function that is | ||
| * mapped to the values of an {@link IndexableMapLambdaObjectBinding} which is created from the outer | ||
|
|
@@ -87,6 +97,13 @@ public boolean hasArrayOutput(LambdaExpr lambdaExpr) | |
| return true; | ||
| } | ||
|
|
||
| @Nullable | ||
| @Override | ||
| public ExprType getOutputType(Expr.InputBindingTypes inputTypes, LambdaExpr expr, List<Expr> args) | ||
| { | ||
| return ExprType.asArrayType(expr.getOutputType(new LambdaInputBindingTypes(inputTypes, expr, args))); | ||
| } | ||
|
|
||
| /** | ||
| * Evaluate {@link LambdaExpr} against every index position of an {@link IndexableMapLambdaObjectBinding} | ||
| */ | ||
|
|
@@ -274,16 +291,24 @@ ExprEval applyFold(LambdaExpr lambdaExpr, Object accumulator, IndexableFoldLambd | |
| accumulator = evaluated.value(); | ||
| } | ||
| if (accumulator instanceof Boolean) { | ||
| return ExprEval.of((boolean) accumulator, ExprType.LONG); | ||
| return ExprEval.ofLongBoolean((boolean) accumulator); | ||
| } | ||
| return ExprEval.bestEffortOf(accumulator); | ||
| } | ||
|
|
||
| @Override | ||
| public boolean hasArrayOutput(LambdaExpr lambdaExpr) | ||
| { | ||
| Expr.BindingDetails lambdaBindingDetails = lambdaExpr.analyzeInputs(); | ||
| return lambdaBindingDetails.isOutputArray(); | ||
| Expr.BindingAnalysis lambdaBindingAnalysis = lambdaExpr.analyzeInputs(); | ||
| return lambdaBindingAnalysis.isOutputArray(); | ||
| } | ||
|
|
||
| @Nullable | ||
| @Override | ||
| public ExprType getOutputType(Expr.InputBindingTypes inputTypes, LambdaExpr expr, List<Expr> args) | ||
| { | ||
| // output type is accumulator type, which is last argument | ||
| return args.get(args.size() - 1).getOutputType(inputTypes); | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -481,6 +506,14 @@ public void validateArguments(LambdaExpr lambdaExpr, List<Expr> args) | |
| ); | ||
| } | ||
|
|
||
| @Nullable | ||
| @Override | ||
| public ExprType getOutputType(Expr.InputBindingTypes inputTypes, LambdaExpr expr, List<Expr> args) | ||
| { | ||
| // output type is input array type | ||
| return args.get(0).getOutputType(inputTypes); | ||
| } | ||
|
|
||
| private <T> Stream<T> filter(T[] array, LambdaExpr expr, SettableLambdaBinding binding) | ||
| { | ||
| return Arrays.stream(array).filter(s -> expr.eval(binding.withBinding(expr.getIdentifier(), s)).asBoolean()); | ||
|
|
@@ -501,7 +534,7 @@ public ExprEval apply(LambdaExpr lambdaExpr, List<Expr> argsExpr, Expr.ObjectBin | |
|
|
||
| final Object[] array = arrayEval.asArray(); | ||
| if (array == null) { | ||
| return ExprEval.of(false, ExprType.LONG); | ||
| return ExprEval.ofLongBoolean(false); | ||
| } | ||
|
|
||
| SettableLambdaBinding lambdaBinding = new SettableLambdaBinding(lambdaExpr, bindings); | ||
|
|
@@ -528,6 +561,13 @@ public void validateArguments(LambdaExpr lambdaExpr, List<Expr> args) | |
| ); | ||
| } | ||
|
|
||
| @Nullable | ||
| @Override | ||
| public ExprType getOutputType(Expr.InputBindingTypes inputTypes, LambdaExpr expr, List<Expr> args) | ||
| { | ||
| return ExprType.LONG; | ||
| } | ||
|
|
||
| public abstract ExprEval match(Object[] values, LambdaExpr expr, SettableLambdaBinding bindings); | ||
| } | ||
|
|
||
|
|
@@ -550,7 +590,7 @@ public ExprEval match(Object[] values, LambdaExpr expr, SettableLambdaBinding bi | |
| { | ||
| boolean anyMatch = Arrays.stream(values) | ||
| .anyMatch(o -> expr.eval(bindings.withBinding(expr.getIdentifier(), o)).asBoolean()); | ||
| return ExprEval.of(anyMatch, ExprType.LONG); | ||
| return ExprEval.ofLongBoolean(anyMatch); | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -573,7 +613,7 @@ public ExprEval match(Object[] values, LambdaExpr expr, SettableLambdaBinding bi | |
| { | ||
| boolean allMatch = Arrays.stream(values) | ||
| .allMatch(o -> expr.eval(bindings.withBinding(expr.getIdentifier(), o)).asBoolean()); | ||
| return ExprEval.of(allMatch, ExprType.LONG); | ||
| return ExprEval.ofLongBoolean(allMatch); | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -848,4 +888,38 @@ public CartesianFoldLambdaBinding accumulateWithIndex(int index, Object acc) | |
| return this; | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * Helper that can wrap another {@link Expr.InputBindingTypes} to use to supply the type information of a | ||
| * {@link LambdaExpr} when evaluating {@link ApplyFunctionExpr#getOutputType}. Lambda identifiers do not exist | ||
| * in the underlying {@link Expr.InputBindingTypes}, but can be created by mapping the lambda identifiers to the | ||
| * arguments that will be applied to them, to map the type information. | ||
| */ | ||
| class LambdaInputBindingTypes implements Expr.InputBindingTypes | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. javadocs please |
||
| { | ||
| private final Object2IntMap<String> lambdaIdentifiers; | ||
| private final Expr.InputBindingTypes inputTypes; | ||
| private final List<Expr> args; | ||
|
|
||
| public LambdaInputBindingTypes(Expr.InputBindingTypes inputTypes, LambdaExpr expr, List<Expr> args) | ||
| { | ||
| this.inputTypes = inputTypes; | ||
| this.args = args; | ||
| List<String> identifiers = expr.getIdentifiers(); | ||
| this.lambdaIdentifiers = new Object2IntOpenHashMap<>(args.size()); | ||
| for (int i = 0; i < args.size(); i++) { | ||
| lambdaIdentifiers.put(identifiers.get(i), i); | ||
| } | ||
| } | ||
|
|
||
| @Nullable | ||
| @Override | ||
| public ExprType getType(String name) | ||
| { | ||
| if (lambdaIdentifiers.containsKey(name)) { | ||
| return ExprType.elementType(args.get(lambdaIdentifiers.getInt(name)).getOutputType(inputTypes)); | ||
|
Comment on lines
+919
to
+920
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: Willl getType be called in a loop anywhere? It might be better to use
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It shouldn't be called in any sort of hot loop. The eventual usage of this information is so that this happens during a sort of planning phase that is checking to see if it we can make a strongly typed and optimized expression evaluator to use instead of the default. |
||
| } | ||
| return inputTypes.getType(name); | ||
| } | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -42,7 +42,7 @@ protected BinaryOpExprBase copy(Expr left, Expr right) | |
| @Override | ||
| protected ExprEval evalString(@Nullable String left, @Nullable String right) | ||
| { | ||
| return ExprEval.of(Comparators.<String>naturalNullsFirst().compare(left, right) < 0, ExprType.LONG); | ||
| return ExprEval.ofLongBoolean(Comparators.<String>naturalNullsFirst().compare(left, right) < 0); | ||
| } | ||
|
|
||
| @Override | ||
|
|
@@ -57,6 +57,17 @@ protected final double evalDouble(double left, double right) | |
| // Use Double.compare for more consistent NaN handling. | ||
| return Evals.asDouble(Double.compare(left, right) < 0); | ||
| } | ||
|
|
||
| @Nullable | ||
| @Override | ||
| public ExprType getOutputType(InputBindingTypes inputTypes) | ||
| { | ||
| ExprType implicitCast = super.getOutputType(inputTypes); | ||
| if (ExprType.STRING.equals(implicitCast)) { | ||
| return ExprType.LONG; | ||
| } | ||
| return implicitCast; | ||
|
Comment on lines
+65
to
+69
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Looking at how this function works got me thinking about some stuff... Does this function need to be in sync with the behavior in Can you explain how Also, what does it mean to have an output type of null?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Yeah, the behavior of
An output type of null signifies that we couldn't compute what the output type is, most likely because input type information isn't available. When the input to the expressions are actual segments ( I will add all of this stuff to the javadocs.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what should be the expected output type here if the left output type is a string and the right output type is a double?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good eye, you spotted a thing I was purposefully ignoring because I think it is not really great behavior, or consistent with other math functions. The default behavior for those 2 input math operators is to consider the arguments as doubles as you've noticed, regardless of whether or not they are both doubles. I suspect it is implemented like this to allow null values inputs to still work as zeros in 'default' mode ( It is also a bit unintuitive behavior. A string column which contains numeric values will work in one of these math operators, so something like I'm not actually sure what the best behavior here is, the math function behavior seems a bit more intuitive to me, where either input being a string produces a null output, so I chose to use that here. This PR is setting up for vectorized expressions, which are going to be strongly typed before processing, which I think makes this inconsistent/confusing/magical behavior not necessary. I was planning to raise a discussion about this part in a future PR since it isn't actually affecting anything yet, but I think it is good to start talking about it even though it isn't wired up to anything yet. Also, I really think 'default' mode complicates the expression system quite a lot. It would be my preference for the expression system to always behave in an SQL compatible manner, and default mode only come into play on the boundaries for data coming in and going out, but I haven't fully thought through the implications of this and it requires some discussion I think, and might be a bit dramatic of a change.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
That might not have been a great example, this might normal behavior in some other databases 😅. I'm going to have a closer read over https://www.postgresql.org/docs/9.0/typeconv-oper.html and https://www.postgresql.org/docs/9.0/typeconv-func.html and maybe reference some other docs too and see if might make sense to different versions of this implicit conversion function for different contexts.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
can you elaborate more on this? How will that look like? While I understand the right behavior is debatable, it may still be best to keep the type in sync with eval even if it's not very intuitive. It may require that the logic of figuring out the type is not reusable as a function
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Ah, its going to be based on using these methods, just the processors will be specialized to deal with the correct type based on the set of input types. Since the output type information isn't used for non-vectorized expressions, I'm trying to model this as the change I want to see in the world and will ensure that this matches the behavior of the vectorized expressions, but I've gone ahead and split operator from function auto conversion and changed it to match existing behavior for now in case it is necessary, and can always consolidate them again in the future.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm going to merge this PR, but we can continue this discussion in the next one, so this stuff can actually be tied into things in a concrete manner. Looking at postgres, its math functions do, at least somewhat, allow for implicit conversion of sting numbers to numbers. So
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks @clintropolis |
||
| } | ||
| } | ||
|
|
||
| class BinLeqExpr extends BinaryEvalOpExprBase | ||
|
|
@@ -75,7 +86,7 @@ protected BinaryOpExprBase copy(Expr left, Expr right) | |
| @Override | ||
| protected ExprEval evalString(@Nullable String left, @Nullable String right) | ||
| { | ||
| return ExprEval.of(Comparators.<String>naturalNullsFirst().compare(left, right) <= 0, ExprType.LONG); | ||
| return ExprEval.ofLongBoolean(Comparators.<String>naturalNullsFirst().compare(left, right) <= 0); | ||
| } | ||
|
|
||
| @Override | ||
|
|
@@ -90,6 +101,17 @@ protected final double evalDouble(double left, double right) | |
| // Use Double.compare for more consistent NaN handling. | ||
| return Evals.asDouble(Double.compare(left, right) <= 0); | ||
| } | ||
|
|
||
| @Nullable | ||
| @Override | ||
| public ExprType getOutputType(InputBindingTypes inputTypes) | ||
| { | ||
| ExprType implicitCast = super.getOutputType(inputTypes); | ||
| if (ExprType.STRING.equals(implicitCast)) { | ||
| return ExprType.LONG; | ||
| } | ||
| return implicitCast; | ||
| } | ||
| } | ||
|
|
||
| class BinGtExpr extends BinaryEvalOpExprBase | ||
|
|
@@ -108,7 +130,7 @@ protected BinaryOpExprBase copy(Expr left, Expr right) | |
| @Override | ||
| protected ExprEval evalString(@Nullable String left, @Nullable String right) | ||
| { | ||
| return ExprEval.of(Comparators.<String>naturalNullsFirst().compare(left, right) > 0, ExprType.LONG); | ||
| return ExprEval.ofLongBoolean(Comparators.<String>naturalNullsFirst().compare(left, right) > 0); | ||
| } | ||
|
|
||
| @Override | ||
|
|
@@ -123,6 +145,17 @@ protected final double evalDouble(double left, double right) | |
| // Use Double.compare for more consistent NaN handling. | ||
| return Evals.asDouble(Double.compare(left, right) > 0); | ||
| } | ||
|
|
||
| @Nullable | ||
| @Override | ||
| public ExprType getOutputType(InputBindingTypes inputTypes) | ||
| { | ||
| ExprType implicitCast = super.getOutputType(inputTypes); | ||
| if (ExprType.STRING.equals(implicitCast)) { | ||
| return ExprType.LONG; | ||
| } | ||
| return implicitCast; | ||
| } | ||
| } | ||
|
|
||
| class BinGeqExpr extends BinaryEvalOpExprBase | ||
|
|
@@ -141,7 +174,7 @@ protected BinaryOpExprBase copy(Expr left, Expr right) | |
| @Override | ||
| protected ExprEval evalString(@Nullable String left, @Nullable String right) | ||
| { | ||
| return ExprEval.of(Comparators.<String>naturalNullsFirst().compare(left, right) >= 0, ExprType.LONG); | ||
| return ExprEval.ofLongBoolean(Comparators.<String>naturalNullsFirst().compare(left, right) >= 0); | ||
| } | ||
|
|
||
| @Override | ||
|
|
@@ -156,6 +189,17 @@ protected final double evalDouble(double left, double right) | |
| // Use Double.compare for more consistent NaN handling. | ||
| return Evals.asDouble(Double.compare(left, right) >= 0); | ||
| } | ||
|
|
||
| @Nullable | ||
| @Override | ||
| public ExprType getOutputType(InputBindingTypes inputTypes) | ||
| { | ||
| ExprType implicitCast = super.getOutputType(inputTypes); | ||
| if (ExprType.STRING.equals(implicitCast)) { | ||
| return ExprType.LONG; | ||
| } | ||
| return implicitCast; | ||
| } | ||
| } | ||
|
|
||
| class BinEqExpr extends BinaryEvalOpExprBase | ||
|
|
@@ -174,7 +218,7 @@ protected BinaryOpExprBase copy(Expr left, Expr right) | |
| @Override | ||
| protected ExprEval evalString(@Nullable String left, @Nullable String right) | ||
| { | ||
| return ExprEval.of(Objects.equals(left, right), ExprType.LONG); | ||
| return ExprEval.ofLongBoolean(Objects.equals(left, right)); | ||
| } | ||
|
|
||
| @Override | ||
|
|
@@ -188,6 +232,17 @@ protected final double evalDouble(double left, double right) | |
| { | ||
| return Evals.asDouble(left == right); | ||
| } | ||
|
|
||
| @Nullable | ||
| @Override | ||
| public ExprType getOutputType(InputBindingTypes inputTypes) | ||
| { | ||
| ExprType implicitCast = super.getOutputType(inputTypes); | ||
| if (ExprType.STRING.equals(implicitCast)) { | ||
| return ExprType.LONG; | ||
| } | ||
| return implicitCast; | ||
| } | ||
| } | ||
|
|
||
| class BinNeqExpr extends BinaryEvalOpExprBase | ||
|
|
@@ -206,7 +261,7 @@ protected BinaryOpExprBase copy(Expr left, Expr right) | |
| @Override | ||
| protected ExprEval evalString(@Nullable String left, @Nullable String right) | ||
| { | ||
| return ExprEval.of(!Objects.equals(left, right), ExprType.LONG); | ||
| return ExprEval.ofLongBoolean(!Objects.equals(left, right)); | ||
| } | ||
|
|
||
| @Override | ||
|
|
@@ -220,6 +275,17 @@ protected final double evalDouble(double left, double right) | |
| { | ||
| return Evals.asDouble(left != right); | ||
| } | ||
|
|
||
| @Nullable | ||
| @Override | ||
| public ExprType getOutputType(InputBindingTypes inputTypes) | ||
| { | ||
| ExprType implicitCast = super.getOutputType(inputTypes); | ||
| if (ExprType.STRING.equals(implicitCast)) { | ||
| return ExprType.LONG; | ||
| } | ||
| return implicitCast; | ||
| } | ||
| } | ||
|
|
||
| class BinAndExpr extends BinaryOpExprBase | ||
|
|
@@ -262,5 +328,4 @@ public ExprEval eval(ObjectBinding bindings) | |
| ExprEval leftVal = left.eval(bindings); | ||
| return leftVal.asBoolean() ? leftVal : right.eval(bindings); | ||
| } | ||
|
|
||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
javadocs please