Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 80 additions & 6 deletions core/src/main/java/org/apache/druid/math/expr/ApplyFunction.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import com.google.common.collect.ImmutableSet;
import it.unimi.dsi.fastutil.objects.Object2IntArrayMap;
import it.unimi.dsi.fastutil.objects.Object2IntMap;
import it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap;
import org.apache.druid.java.util.common.IAE;
import org.apache.druid.java.util.common.RE;
import org.apache.druid.java.util.common.StringUtils;
Expand Down Expand Up @@ -74,6 +75,15 @@ default boolean hasArrayOutput(LambdaExpr lambdaExpr)
*/
void validateArguments(LambdaExpr lambdaExpr, List<Expr> args);

/**
* Compute the output type of this function for a given lambda and the argument expressions which will be applied as
* its inputs.
*
* @see Expr#getOutputType
*/
@Nullable
ExprType getOutputType(Expr.InputBindingTypes inputTypes, LambdaExpr expr, List<Expr> args);
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

javadocs please


/**
* Base class for "map" functions, which are a class of {@link ApplyFunction} which take a lambda function that is
* mapped to the values of an {@link IndexableMapLambdaObjectBinding} which is created from the outer
Expand All @@ -87,6 +97,13 @@ public boolean hasArrayOutput(LambdaExpr lambdaExpr)
return true;
}

@Nullable
@Override
public ExprType getOutputType(Expr.InputBindingTypes inputTypes, LambdaExpr expr, List<Expr> args)
{
return ExprType.asArrayType(expr.getOutputType(new LambdaInputBindingTypes(inputTypes, expr, args)));
}

/**
* Evaluate {@link LambdaExpr} against every index position of an {@link IndexableMapLambdaObjectBinding}
*/
Expand Down Expand Up @@ -274,16 +291,24 @@ ExprEval applyFold(LambdaExpr lambdaExpr, Object accumulator, IndexableFoldLambd
accumulator = evaluated.value();
}
if (accumulator instanceof Boolean) {
return ExprEval.of((boolean) accumulator, ExprType.LONG);
return ExprEval.ofLongBoolean((boolean) accumulator);
}
return ExprEval.bestEffortOf(accumulator);
}

@Override
public boolean hasArrayOutput(LambdaExpr lambdaExpr)
{
Expr.BindingDetails lambdaBindingDetails = lambdaExpr.analyzeInputs();
return lambdaBindingDetails.isOutputArray();
Expr.BindingAnalysis lambdaBindingAnalysis = lambdaExpr.analyzeInputs();
return lambdaBindingAnalysis.isOutputArray();
}

@Nullable
@Override
public ExprType getOutputType(Expr.InputBindingTypes inputTypes, LambdaExpr expr, List<Expr> args)
{
// output type is accumulator type, which is last argument
return args.get(args.size() - 1).getOutputType(inputTypes);
}
}

Expand Down Expand Up @@ -481,6 +506,14 @@ public void validateArguments(LambdaExpr lambdaExpr, List<Expr> args)
);
}

@Nullable
@Override
public ExprType getOutputType(Expr.InputBindingTypes inputTypes, LambdaExpr expr, List<Expr> args)
{
// output type is input array type
return args.get(0).getOutputType(inputTypes);
}

private <T> Stream<T> filter(T[] array, LambdaExpr expr, SettableLambdaBinding binding)
{
return Arrays.stream(array).filter(s -> expr.eval(binding.withBinding(expr.getIdentifier(), s)).asBoolean());
Expand All @@ -501,7 +534,7 @@ public ExprEval apply(LambdaExpr lambdaExpr, List<Expr> argsExpr, Expr.ObjectBin

final Object[] array = arrayEval.asArray();
if (array == null) {
return ExprEval.of(false, ExprType.LONG);
return ExprEval.ofLongBoolean(false);
}

SettableLambdaBinding lambdaBinding = new SettableLambdaBinding(lambdaExpr, bindings);
Expand All @@ -528,6 +561,13 @@ public void validateArguments(LambdaExpr lambdaExpr, List<Expr> args)
);
}

@Nullable
@Override
public ExprType getOutputType(Expr.InputBindingTypes inputTypes, LambdaExpr expr, List<Expr> args)
{
return ExprType.LONG;
}

public abstract ExprEval match(Object[] values, LambdaExpr expr, SettableLambdaBinding bindings);
}

Expand All @@ -550,7 +590,7 @@ public ExprEval match(Object[] values, LambdaExpr expr, SettableLambdaBinding bi
{
boolean anyMatch = Arrays.stream(values)
.anyMatch(o -> expr.eval(bindings.withBinding(expr.getIdentifier(), o)).asBoolean());
return ExprEval.of(anyMatch, ExprType.LONG);
return ExprEval.ofLongBoolean(anyMatch);
}
}

Expand All @@ -573,7 +613,7 @@ public ExprEval match(Object[] values, LambdaExpr expr, SettableLambdaBinding bi
{
boolean allMatch = Arrays.stream(values)
.allMatch(o -> expr.eval(bindings.withBinding(expr.getIdentifier(), o)).asBoolean());
return ExprEval.of(allMatch, ExprType.LONG);
return ExprEval.ofLongBoolean(allMatch);
}
}

Expand Down Expand Up @@ -848,4 +888,38 @@ public CartesianFoldLambdaBinding accumulateWithIndex(int index, Object acc)
return this;
}
}

/**
* Helper that can wrap another {@link Expr.InputBindingTypes} to use to supply the type information of a
* {@link LambdaExpr} when evaluating {@link ApplyFunctionExpr#getOutputType}. Lambda identifiers do not exist
* in the underlying {@link Expr.InputBindingTypes}, but can be created by mapping the lambda identifiers to the
* arguments that will be applied to them, to map the type information.
*/
class LambdaInputBindingTypes implements Expr.InputBindingTypes
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

javadocs please

{
private final Object2IntMap<String> lambdaIdentifiers;
private final Expr.InputBindingTypes inputTypes;
private final List<Expr> args;

public LambdaInputBindingTypes(Expr.InputBindingTypes inputTypes, LambdaExpr expr, List<Expr> args)
{
this.inputTypes = inputTypes;
this.args = args;
List<String> identifiers = expr.getIdentifiers();
this.lambdaIdentifiers = new Object2IntOpenHashMap<>(args.size());
for (int i = 0; i < args.size(); i++) {
lambdaIdentifiers.put(identifiers.get(i), i);
}
}

@Nullable
@Override
public ExprType getType(String name)
{
if (lambdaIdentifiers.containsKey(name)) {
return ExprType.elementType(args.get(lambdaIdentifiers.getInt(name)).getOutputType(inputTypes));
Comment on lines +919 to +920
Copy link
Copy Markdown
Contributor

@suneet-s suneet-s Sep 10, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: Willl getType be called in a loop anywhere? It might be better to use getOrDefault(..) instead to avoid 2 hashcode computations here (for containsKey and getInt)

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It shouldn't be called in any sort of hot loop. The eventual usage of this information is so that this happens during a sort of planning phase that is checking to see if it we can make a strongly typed and optimized expression evaluator to use instead of the default.

}
return inputTypes.getType(name);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ protected BinaryOpExprBase copy(Expr left, Expr right)
@Override
protected ExprEval evalString(@Nullable String left, @Nullable String right)
{
return ExprEval.of(Comparators.<String>naturalNullsFirst().compare(left, right) < 0, ExprType.LONG);
return ExprEval.ofLongBoolean(Comparators.<String>naturalNullsFirst().compare(left, right) < 0);
}

@Override
Expand All @@ -57,6 +57,17 @@ protected final double evalDouble(double left, double right)
// Use Double.compare for more consistent NaN handling.
return Evals.asDouble(Double.compare(left, right) < 0);
}

@Nullable
@Override
public ExprType getOutputType(InputBindingTypes inputTypes)
{
ExprType implicitCast = super.getOutputType(inputTypes);
if (ExprType.STRING.equals(implicitCast)) {
return ExprType.LONG;
}
return implicitCast;
Comment on lines +65 to +69
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looking at how this function works got me thinking about some stuff... Does this function need to be in sync with the behavior in BinaryEvalOpExprBase#eval (I think so 🤔) Since the eval method isn't implemented here, would it be better to implement it in BinaryEvalOpExprBase?

Can you explain how getOutputType would deal with default null handling mode.

Also, what does it mean to have an output type of null?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looking at how this function works got me thinking about some stuff... Does this function need to be in sync with the behavior in BinaryEvalOpExprBase#eval (I think so 🤔) Since the eval method isn't implemented here, would it be better to implement it in BinaryEvalOpExprBase?

Yeah, the behavior of getOutputType should always match the behavior of eval. In this case, it can't really be pushed down because the math operators also implement BinaryEvalOpExprBase, but do not handle string inputs as numerical outputs. We could put another type in between these logical operators and BinaryEvalOpExprBase that provides it though, I will consider doing that.

Can you explain how getOutputType would deal with default null handling mode.

getOutputType should be fully independent of how druid.generic.useDefaultValueForNull is set since it does not capture (currently) whether or not nulls can happen.

Also, what does it mean to have an output type of null?

An output type of null signifies that we couldn't compute what the output type is, most likely because input type information isn't available. When the input to the expressions are actual segments (QueryableIndexStorageAdapter) then type information should always be available, and a null signifies an input that doesn't exist, but other adapters (and other usages of Expr such as for transforms) might not always have complete type information.

I will add all of this stuff to the javadocs.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what should be the expected output type here if the left output type is a string and the right output type is a double?
As per org.apache.druid.math.expr.BinaryEvalOpExprBase#eval it seems, the runtime output type will be double . As per ExprType.autoTypeConversion it seems to be string.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good eye, you spotted a thing I was purposefully ignoring because I think it is not really great behavior, or consistent with other math functions. The default behavior for those 2 input math operators is to consider the arguments as doubles as you've noticed, regardless of whether or not they are both doubles. I suspect it is implemented like this to allow null values inputs to still work as zeros in 'default' mode (druid.generic.useDefaultValueForNull=true), because the current expressions are not strongly typed so these nulls all end up as string values. See this comment for a bit more explanation, and this comment for some of the problems that this causes.

It is also a bit unintuitive behavior. A string column which contains numeric values will work in one of these math operators, so something like 2.0 + '3.1' = 5.1 works magically. But it will also potentially treat the string value as 0 if the string is not a number in default mode, so 2.0 + 'foo' = 2.0 (or null in sql compatible mode). The math functions (min, max, etc) instead treat either input as string as a 0/null output without evaluating the function.

I'm not actually sure what the best behavior here is, the math function behavior seems a bit more intuitive to me, where either input being a string produces a null output, so I chose to use that here. This PR is setting up for vectorized expressions, which are going to be strongly typed before processing, which I think makes this inconsistent/confusing/magical behavior not necessary. I was planning to raise a discussion about this part in a future PR since it isn't actually affecting anything yet, but I think it is good to start talking about it even though it isn't wired up to anything yet.

Also, I really think 'default' mode complicates the expression system quite a lot. It would be my preference for the expression system to always behave in an SQL compatible manner, and default mode only come into play on the boundaries for data coming in and going out, but I haven't fully thought through the implications of this and it requires some discussion I think, and might be a bit dramatic of a change.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

so something like 2.0 + '3.1' = 5.1

That might not have been a great example, this might normal behavior in some other databases 😅. I'm going to have a closer read over https://www.postgresql.org/docs/9.0/typeconv-oper.html and https://www.postgresql.org/docs/9.0/typeconv-func.html and maybe reference some other docs too and see if might make sense to different versions of this implicit conversion function for different contexts.

Copy link
Copy Markdown
Contributor

@abhishekagarwal87 abhishekagarwal87 Sep 14, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This PR is setting up for vectorized expressions, which are going to be strongly typed before processing, which I think makes this inconsistent/confusing/magical behavior not necessary.

can you elaborate more on this? How will that look like?

While I understand the right behavior is debatable, it may still be best to keep the type in sync with eval even if it's not very intuitive. It may require that the logic of figuring out the type is not reusable as a function ExprType.autoTypeConversion and may have to be written differently for some operators. We can, later on, change the different functions, fixing both the eval and getOutputType together. But till then, they will be in sync.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you elaborate more on this? How will that look like?

Ah, its going to be based on using these methods, just the processors will be specialized to deal with the correct type based on the set of input types. Since the output type information isn't used for non-vectorized expressions, I'm trying to model this as the change I want to see in the world and will ensure that this matches the behavior of the vectorized expressions, but I've gone ahead and split operator from function auto conversion and changed it to match existing behavior for now in case it is necessary, and can always consolidate them again in the future.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm going to merge this PR, but we can continue this discussion in the next one, so this stuff can actually be tied into things in a concrete manner. Looking at postgres, its math functions do, at least somewhat, allow for implicit conversion of sting numbers to numbers. So 2.0 + '3.1' = 5.1 and cos('1') work, but '2.0' + '3.1' does not. In druid currently the first would have the same behavior as postgres, but the 2nd would be null or 0 depending on the value of druid.generic.useDefaultValueForNull, while the 3rd expression would do string concatenation.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks @clintropolis

}
}

class BinLeqExpr extends BinaryEvalOpExprBase
Expand All @@ -75,7 +86,7 @@ protected BinaryOpExprBase copy(Expr left, Expr right)
@Override
protected ExprEval evalString(@Nullable String left, @Nullable String right)
{
return ExprEval.of(Comparators.<String>naturalNullsFirst().compare(left, right) <= 0, ExprType.LONG);
return ExprEval.ofLongBoolean(Comparators.<String>naturalNullsFirst().compare(left, right) <= 0);
}

@Override
Expand All @@ -90,6 +101,17 @@ protected final double evalDouble(double left, double right)
// Use Double.compare for more consistent NaN handling.
return Evals.asDouble(Double.compare(left, right) <= 0);
}

@Nullable
@Override
public ExprType getOutputType(InputBindingTypes inputTypes)
{
ExprType implicitCast = super.getOutputType(inputTypes);
if (ExprType.STRING.equals(implicitCast)) {
return ExprType.LONG;
}
return implicitCast;
}
}

class BinGtExpr extends BinaryEvalOpExprBase
Expand All @@ -108,7 +130,7 @@ protected BinaryOpExprBase copy(Expr left, Expr right)
@Override
protected ExprEval evalString(@Nullable String left, @Nullable String right)
{
return ExprEval.of(Comparators.<String>naturalNullsFirst().compare(left, right) > 0, ExprType.LONG);
return ExprEval.ofLongBoolean(Comparators.<String>naturalNullsFirst().compare(left, right) > 0);
}

@Override
Expand All @@ -123,6 +145,17 @@ protected final double evalDouble(double left, double right)
// Use Double.compare for more consistent NaN handling.
return Evals.asDouble(Double.compare(left, right) > 0);
}

@Nullable
@Override
public ExprType getOutputType(InputBindingTypes inputTypes)
{
ExprType implicitCast = super.getOutputType(inputTypes);
if (ExprType.STRING.equals(implicitCast)) {
return ExprType.LONG;
}
return implicitCast;
}
}

class BinGeqExpr extends BinaryEvalOpExprBase
Expand All @@ -141,7 +174,7 @@ protected BinaryOpExprBase copy(Expr left, Expr right)
@Override
protected ExprEval evalString(@Nullable String left, @Nullable String right)
{
return ExprEval.of(Comparators.<String>naturalNullsFirst().compare(left, right) >= 0, ExprType.LONG);
return ExprEval.ofLongBoolean(Comparators.<String>naturalNullsFirst().compare(left, right) >= 0);
}

@Override
Expand All @@ -156,6 +189,17 @@ protected final double evalDouble(double left, double right)
// Use Double.compare for more consistent NaN handling.
return Evals.asDouble(Double.compare(left, right) >= 0);
}

@Nullable
@Override
public ExprType getOutputType(InputBindingTypes inputTypes)
{
ExprType implicitCast = super.getOutputType(inputTypes);
if (ExprType.STRING.equals(implicitCast)) {
return ExprType.LONG;
}
return implicitCast;
}
}

class BinEqExpr extends BinaryEvalOpExprBase
Expand All @@ -174,7 +218,7 @@ protected BinaryOpExprBase copy(Expr left, Expr right)
@Override
protected ExprEval evalString(@Nullable String left, @Nullable String right)
{
return ExprEval.of(Objects.equals(left, right), ExprType.LONG);
return ExprEval.ofLongBoolean(Objects.equals(left, right));
}

@Override
Expand All @@ -188,6 +232,17 @@ protected final double evalDouble(double left, double right)
{
return Evals.asDouble(left == right);
}

@Nullable
@Override
public ExprType getOutputType(InputBindingTypes inputTypes)
{
ExprType implicitCast = super.getOutputType(inputTypes);
if (ExprType.STRING.equals(implicitCast)) {
return ExprType.LONG;
}
return implicitCast;
}
}

class BinNeqExpr extends BinaryEvalOpExprBase
Expand All @@ -206,7 +261,7 @@ protected BinaryOpExprBase copy(Expr left, Expr right)
@Override
protected ExprEval evalString(@Nullable String left, @Nullable String right)
{
return ExprEval.of(!Objects.equals(left, right), ExprType.LONG);
return ExprEval.ofLongBoolean(!Objects.equals(left, right));
}

@Override
Expand All @@ -220,6 +275,17 @@ protected final double evalDouble(double left, double right)
{
return Evals.asDouble(left != right);
}

@Nullable
@Override
public ExprType getOutputType(InputBindingTypes inputTypes)
{
ExprType implicitCast = super.getOutputType(inputTypes);
if (ExprType.STRING.equals(implicitCast)) {
return ExprType.LONG;
}
return implicitCast;
}
}

class BinAndExpr extends BinaryOpExprBase
Expand Down Expand Up @@ -262,5 +328,4 @@ public ExprEval eval(ObjectBinding bindings)
ExprEval leftVal = left.eval(bindings);
return leftVal.asBoolean() ? leftVal : right.eval(bindings);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -81,12 +81,19 @@ public String stringify()
protected abstract BinaryOpExprBase copy(Expr left, Expr right);

@Override
public BindingDetails analyzeInputs()
public BindingAnalysis analyzeInputs()
{
// currently all binary operators operate on scalar inputs
return left.analyzeInputs().with(right).withScalarArguments(ImmutableSet.of(left, right));
}

@Nullable
@Override
public ExprType getOutputType(InputBindingTypes inputTypes)
{
return ExprType.operatorAutoTypeConversion(left.getOutputType(inputTypes), right.getOutputType(inputTypes));
}

@Override
public boolean equals(Object o)
{
Expand Down
Loading