Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,9 @@

package org.apache.druid.query.expressions;

import com.google.common.base.Supplier;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import org.apache.druid.java.util.common.IAE;
import org.apache.druid.java.util.common.Pair;
import org.apache.druid.math.expr.Expr;
import org.apache.druid.math.expr.ExprEval;
import org.apache.druid.math.expr.ExprMacroTable;
Expand Down Expand Up @@ -52,15 +50,15 @@ public class BloomFilterExpressionsTest extends InitializedNullHandlingTest
BloomFilterExpressions.TestExprMacro testMacro = new BloomFilterExpressions.TestExprMacro();
ExprMacroTable macroTable = new ExprMacroTable(ImmutableList.of(createMacro, addMacro, testMacro));

Expr.ObjectBinding inputBindings = InputBindings.withTypedSuppliers(
new ImmutableMap.Builder<String, Pair<ExpressionType, Supplier<Object>>>()
.put("bloomy", new Pair<>(BloomFilterExpressions.BLOOM_FILTER_TYPE, () -> new BloomKFilter(100)))
.put("string", new Pair<>(ExpressionType.STRING, () -> SOME_STRING))
.put("long", new Pair<>(ExpressionType.LONG, () -> SOME_LONG))
.put("double", new Pair<>(ExpressionType.DOUBLE, () -> SOME_DOUBLE))
.put("string_array", new Pair<>(ExpressionType.STRING_ARRAY, () -> SOME_STRING_ARRAY))
.put("long_array", new Pair<>(ExpressionType.LONG_ARRAY, () -> SOME_LONG_ARRAY))
.put("double_array", new Pair<>(ExpressionType.DOUBLE_ARRAY, () -> SOME_DOUBLE_ARRAY))
Expr.ObjectBinding inputBindings = InputBindings.forInputSuppliers(
new ImmutableMap.Builder<String, InputBindings.InputSupplier>()
.put("bloomy", InputBindings.inputSupplier(BloomFilterExpressions.BLOOM_FILTER_TYPE, () -> new BloomKFilter(100)))
.put("string", InputBindings.inputSupplier(ExpressionType.STRING, () -> SOME_STRING))
.put("long", InputBindings.inputSupplier(ExpressionType.LONG, () -> SOME_LONG))
.put("double", InputBindings.inputSupplier(ExpressionType.DOUBLE, () -> SOME_DOUBLE))
.put("string_array", InputBindings.inputSupplier(ExpressionType.STRING_ARRAY, () -> SOME_STRING_ARRAY))
.put("long_array", InputBindings.inputSupplier(ExpressionType.LONG_ARRAY, () -> SOME_LONG_ARRAY))
.put("double_array", InputBindings.inputSupplier(ExpressionType.DOUBLE_ARRAY, () -> SOME_DOUBLE_ARRAY))
.build()
);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -515,6 +515,9 @@ public static ExprEval ofType(@Nullable ExpressionType type, @Nullable Object va
if (value instanceof List) {
return bestEffortOf(value);
}
if (value instanceof byte[]) {
return new StringExprEval(StringUtils.encodeBase64String((byte[]) value));
}
Comment on lines +518 to +520
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This has me wondering, what if an expression actually wants the byte[] how would it be defined so that if one expression returns a byte[] and the next one wants to use it, then it will just be passed through without being base64 encoded in between?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This block is for things that are asking for STRING typed values, (though they might be multi-value strings as well). COMPLEX types will accept bytes as is and try to deserialize them into the appropriate object using a TypeStrategy that wraps the ObjectStrategy.

However, to complicate this answer slightly, where the type passed to this method comes from varies depending on where it is being called. This ofType method is what backs IdentifierExpr which is what feeds input values into expressions. When backed by a segment, the type will be the type which was stored in the segment, etc. For places we don't know though, such as expression transforms, we fall back to using bestEffortOf, which will handle byte[] as a STRING type. It lacks the complex type name so cant handle byte[] to complex object translation, so we turn it into a string because at least then something could do something with it.

Responding to this made me realize that the expression post-agg bindings could be improved with the partial type information derived from the aggregators 'result' type, so I have updated them to use it accordingly in the latest commit.

Back to byte[], we could alternatively consider leaving it as 'unknown' COMPLEX, though that would cause some issue with nested columns which is the other main user of 'bestEffortOf', which uses it to try to derive the type information of these values. Since we don't have a native binary blob type, STRING is most useful here so we can at least preserve the values (and for JSON, byte[] already come in as base64 strings, so byte[] really only appear in other nested formats, such as Avro, Parquet, Protobuf, and ORC).

return of(Evals.asString(value));
case LONG:
if (value instanceof Number) {
Expand Down
210 changes: 193 additions & 17 deletions processing/src/main/java/org/apache/druid/math/expr/InputBindings.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,14 @@
package org.apache.druid.math.expr;

import com.google.common.base.Supplier;
import org.apache.druid.java.util.common.Pair;
import org.apache.druid.data.input.Row;
import org.apache.druid.java.util.common.UOE;
import org.apache.druid.segment.column.ColumnHolder;

import javax.annotation.Nullable;
import java.util.HashMap;
import java.util.Map;
import java.util.function.Function;
import java.util.Objects;

public class InputBindings
{
Expand All @@ -45,11 +48,41 @@ public ExpressionType getType(String name)
}
};

/**
* Empty {@link Expr.ObjectBinding} that doesn't complain about attempts to access type or value for any input
* identifiers, both of which will be nulls. Typically used for evaluating known constant expressions, or finding
* a default or initial value of some expression if all inputs are null.
*/
public static Expr.ObjectBinding nilBindings()
{
return NIL_BINDINGS;
}

/**
* Empty binding that throw a {@link UOE} if anything attempts to lookup an identifier type or value
*/
public static Expr.ObjectBinding validateConstant(Expr expr)
{
return new Expr.ObjectBinding()
{
@Nullable
@Override
public Object get(String name)
{
// Sanity check. Bindings should not be used for a constant expression so explode if something tried
throw new UOE("Expression " + expr.stringify() + " has non-constant inputs.");
}

@Nullable
@Override
public ExpressionType getType(String name)
{
// Sanity check. Bindings should not be used for a constant expression so explode if something tried
throw new UOE("Expression " + expr.stringify() + " has non-constant inputs.");
}
};
}

/**
* Create an {@link Expr.InputBindingInspector} backed by a map of binding identifiers to their {@link ExprType}
*/
Expand All @@ -66,95 +99,238 @@ public ExpressionType getType(String name)
};
}

public static Expr.ObjectBinding singleProvider(ExpressionType type, final Function<String, ?> valueFn)
/**
* Creates a {@link Expr.ObjectBinding} backed by some {@link Row}. {@link ColumnHolder#TIME_COLUMN_NAME} is special
* handled to be backed by {@link Row#getTimestampFromEpoch()}, all other values are ethically sourced from
* {@link Row#getRaw(String)}.
*
* Types are detected and values are coereced via {@link ExprEval#bestEffortOf(Object)} because input types are
* currently unknown.
*/
public static Expr.ObjectBinding forRow(Row row)
{
return new BestEffortInputBindings()
{
@Override
ExprEval compute(String name)
{
if (ColumnHolder.TIME_COLUMN_NAME.equals(name)) {
return ExprEval.ofLong(row.getTimestampFromEpoch());
}
return ExprEval.bestEffortOf(row.getRaw(name));
}
};
}

/**
* Create {@link Expr.ObjectBinding} backed by {@link Map} to provide values for identifiers to evaluate {@link Expr}
*
* Types are detected and values are coereced via {@link ExprEval#bestEffortOf(Object)} because input types are
* currently unknown.
*
* This method is only used for testing and mimics the behavior of {@link #forRow(Row)} except lacks special handling
* for columns named {@link ColumnHolder#TIME_COLUMN_NAME}.
*/
@Deprecated
public static Expr.ObjectBinding forMap(final Map<String, ?> bindings)
{
return new Expr.ObjectBinding()
{
@Nullable
@Override
public Object get(String name)
{
return valueFn.apply(name);
return bindings.get(name);
}

@Nullable
@Override
public ExpressionType getType(String name)
{
return type;
return ExprEval.bestEffortOf(bindings.get(name)).type();
}
};
}

public static Expr.ObjectBinding forFunction(final Function<String, ?> valueFn)
/**
* Create {@link Expr.ObjectBinding} backed by {@link Map} to provide values for identifiers to evaluate {@link Expr}
*
* Types are detected and values are coereced via {@link ExprEval#bestEffortOf(Object)} because input types are
* currently unknown.
*/
public static Expr.ObjectBinding forMap(final Map<String, ?> bindings, Expr.InputBindingInspector inspector)
{
final Expr.InputBindingInspector inputBindingInspector = inspector;
return new BestEffortInputBindings()
{
@Nullable
@Override
public Object get(String name)
{
if (inputBindingInspector.getType(name) != null) {
return bindings.get(name);
}
// we didn't have complete type information on this one, fall through to bestEffortOf
return super.get(name);
}

@Nullable
@Override
public ExpressionType getType(String name)
{
final ExpressionType type = inputBindingInspector.getType(name);
if (type != null) {
return type;
}
// we didn't have complete type information on this one, fall through to bestEffortOf
return super.getType(name);
}

@Override
ExprEval compute(String name)
{
return ExprEval.bestEffortOf(bindings.get(name));
}
};
}

/**
* Create a {@link Expr.ObjectBinding} for a single input value of a known type provided by some {@link Supplier}
*/
public static Expr.ObjectBinding forInputSupplier(ExpressionType type, Supplier<?> supplier)
{
return new Expr.ObjectBinding()
{
@Nullable
@Override
public Object get(String name)
{
return valueFn.apply(name);
return supplier.get();
}

@Nullable
@Override
public ExpressionType getType(String name)
{
return ExprEval.bestEffortOf(valueFn.apply(name)).type();
return type;
}
};
}

/**
* Create {@link Expr.ObjectBinding} backed by {@link Map} to provide values for identifiers to evaluate {@link Expr}
* Create a {@link Expr.ObjectBinding} for a single input value of a known type provided by some {@link Supplier}
*/
public static Expr.ObjectBinding withMap(final Map<String, ?> bindings)
public static Expr.ObjectBinding forInputSupplier(String supplierName, ExpressionType type, Supplier<?> supplier)
{
return new Expr.ObjectBinding()
{
@Nullable
@Override
public Object get(String name)
{
return bindings.get(name);
if (Objects.equals(name, supplierName)) {
return supplier.get();
}
return null;
}

@Nullable
@Override
public ExpressionType getType(String name)
{
return ExprEval.bestEffortOf(bindings.get(name)).type();
if (Objects.equals(name, supplierName)) {
return type;
}
return null;
}
};
}

public static <T> InputSupplier<T> inputSupplier(ExpressionType type, Supplier<T> supplier)
{
return new InputSupplier<>(type, supplier);
}

/**
* Create {@link Expr.ObjectBinding} backed by map of {@link Supplier} to provide values for identifiers to evaluate
* {@link Expr}
*/
public static Expr.ObjectBinding withTypedSuppliers(final Map<String, Pair<ExpressionType, Supplier<Object>>> bindings)
public static Expr.ObjectBinding forInputSuppliers(final Map<String, InputSupplier> bindings)
{
return new Expr.ObjectBinding()
{
@Nullable
@Override
public Object get(String name)
{
Pair<ExpressionType, Supplier<Object>> binding = bindings.get(name);
return binding == null || binding.rhs == null ? null : binding.rhs.get();
InputSupplier<?> binding = bindings.get(name);
return binding == null ? null : binding.get();
}

@Nullable
@Override
public ExpressionType getType(String name)
{
Pair<ExpressionType, Supplier<Object>> binding = bindings.get(name);
InputSupplier<?> binding = bindings.get(name);
if (binding == null) {
return null;
}
return binding.lhs;
return binding.getType();
}
};
}

public static class InputSupplier<T> implements Supplier<T>
{
private final ExpressionType type;
private final Supplier<T> supplier;

private InputSupplier(ExpressionType type, Supplier<T> supplier)
{
this.supplier = supplier;
this.type = type;
}

@Override
public T get()
{
return supplier.get();
}

public ExpressionType getType()
{
return type;
}
}

/**
* {@link Expr.ObjectBinding} backed by a cache populated by {@link ExprEval#bestEffortOf(Object)} for when the input
* type information is totally unknown, for a single row worth of values. The values are cached so that asking for a
* type and getting the value of some input do not repeat computations.
*
* This type is not thread-safe, and not suitable for re-use for processing multiple-rows due to the presence of the
* result cache.
*/
public abstract static class BestEffortInputBindings implements Expr.ObjectBinding
{
private final Map<String, ExprEval> cachedBindings = new HashMap<>();

abstract ExprEval compute(String name);

@Nullable
@Override
public Object get(String name)
{
cachedBindings.computeIfAbsent(name, this::compute);
return cachedBindings.get(name).value();
}

@Nullable
@Override
public ExpressionType getType(String name)
{
cachedBindings.computeIfAbsent(name, this::compute);
return cachedBindings.get(name).type();
}
}
}
Loading