Skip to content
36 changes: 34 additions & 2 deletions core/src/main/java/org/apache/druid/math/expr/ExprEval.java
Original file line number Diff line number Diff line change
Expand Up @@ -410,6 +410,22 @@ public static ExprEval bestEffortOf(@Nullable Object val)
}
return new ArrayExprEval(ExpressionType.LONG_ARRAY, array);
}
if (val instanceof Integer[]) {
final Integer[] inputArray = (Integer[]) val;
final Object[] array = new Object[inputArray.length];
for (int i = 0; i < inputArray.length; i++) {
array[i] = inputArray[i] == null ? null : inputArray[i].longValue();
}
return new ArrayExprEval(ExpressionType.LONG_ARRAY, array);
}
if (val instanceof int[]) {
final int[] longArray = (int[]) val;
final Object[] array = new Object[longArray.length];
for (int i = 0; i < longArray.length; i++) {
array[i] = (long) longArray[i];
}
return new ArrayExprEval(ExpressionType.LONG_ARRAY, array);
}
if (val instanceof Double[]) {
final Double[] inputArray = (Double[]) val;
final Object[] array = new Object[inputArray.length];
Expand Down Expand Up @@ -438,7 +454,7 @@ public static ExprEval bestEffortOf(@Nullable Object val)
final float[] inputArray = (float[]) val;
final Object[] array = new Object[inputArray.length];
for (int i = 0; i < inputArray.length; i++) {
array[i] = inputArray[i];
array[i] = (double) inputArray[i];
}
return new ArrayExprEval(ExpressionType.DOUBLE_ARRAY, array);
}
Expand All @@ -463,6 +479,13 @@ public static ExprEval bestEffortOf(@Nullable Object val)
return ofArray(coerced.lhs, coerced.rhs);
}

// in 'best effort' mode, we couldn't possibly use byte[] as a complex or anything else useful without type
// knowledge, so lets turn it into a base64 encoded string so at least something downstream can use it by decoding
// back into bytes
if (val instanceof byte[]) {
return new StringExprEval(StringUtils.encodeBase64String((byte[]) val));
}
Comment on lines +482 to +487
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we even need to base64 it? why not just keep it as a byte[]?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is going into the expression system, if we don't handle it like this, it ends up as "unknown complex" and is basically useless in that form since we don't have a native byte[] typed expressions, and complex types cannot be cast to anything else (not that would do anything useful here).

With nested columns, leaving it as a byte[] is a problem, since we are using this method to process inputs to determine their type, we have a default case for anything that leaks through that isn't LONG or DOUBLE or STRING that effectively calls java toString on things that makes these end up not very useful byte[].toString.

I could change this to be a parse exception, but even if we did that, i still think it would still be useful to feed these into the expressions as a base64 encoded string rather than throwing it away. Maybe in the future it would be nice to have a byte blob type to not have to encode it, but until then, I think this is the most useful thing we can do, and its consistent with the behavior we use to handle byte[] in Rows.objectToStrings that normal string columns go through.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay


if (val != null) {
// is this cool?
return new ComplexExprEval(ExpressionType.UNKNOWN_COMPLEX, val);
Expand Down Expand Up @@ -520,9 +543,18 @@ public static ExprEval ofType(@Nullable ExpressionType type, @Nullable Object va
}
return ofDouble(null);
case COMPLEX:
// json isn't currently defined in druid-core, this can be reworked once
// https://github.com/apache/druid/pull/13698 is merged (or COMPLEX<json> is promoted to a real built-in type(s)
if ("json".equals(type.getComplexTypeName())) {
return ofComplex(type, value);
}
byte[] bytes = null;
if (value instanceof String) {
bytes = StringUtils.decodeBase64String((String) value);
try {
bytes = StringUtils.decodeBase64String((String) value);
}
catch (IllegalArgumentException ignored) {
}
} else if (value instanceof byte[]) {
bytes = (byte[]) value;
}
Expand Down
68 changes: 2 additions & 66 deletions core/src/main/java/org/apache/druid/math/expr/IdentifierExpr.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,8 @@

import org.apache.commons.lang.StringEscapeUtils;
import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.math.expr.vector.ExprEvalDoubleVector;
import org.apache.druid.math.expr.vector.ExprEvalLongVector;
import org.apache.druid.math.expr.vector.ExprEvalObjectVector;
import org.apache.druid.math.expr.vector.ExprEvalVector;
import org.apache.druid.math.expr.vector.ExprVectorProcessor;
import org.apache.druid.math.expr.vector.VectorProcessors;

import javax.annotation.Nullable;
import java.util.Objects;
Expand Down Expand Up @@ -152,51 +149,7 @@ public boolean canVectorize(InputBindingInspector inspector)
@Override
public ExprVectorProcessor<?> buildVectorized(VectorInputBindingInspector inspector)
{
ExpressionType inputType = inspector.getType(binding);

if (inputType == null) {
// nil column, we can be anything, so be a string because it's the most flexible
// (numbers will be populated with default values in default mode and non-null)
return new IdentifierVectorProcessor<Object[]>(ExpressionType.STRING)
{
@Override
public ExprEvalVector<Object[]> evalVector(VectorInputBinding bindings)
{
return new ExprEvalObjectVector(bindings.getObjectVector(binding));
}
};
}
switch (inputType.getType()) {
case LONG:
return new IdentifierVectorProcessor<long[]>(inputType)
{
@Override
public ExprEvalVector<long[]> evalVector(VectorInputBinding bindings)
{
return new ExprEvalLongVector(bindings.getLongVector(binding), bindings.getNullVector(binding));
}
};
case DOUBLE:
return new IdentifierVectorProcessor<double[]>(inputType)
{
@Override
public ExprEvalVector<double[]> evalVector(VectorInputBinding bindings)
{
return new ExprEvalDoubleVector(bindings.getDoubleVector(binding), bindings.getNullVector(binding));
}
};
case STRING:
return new IdentifierVectorProcessor<Object[]>(inputType)
{
@Override
public ExprEvalVector<Object[]> evalVector(VectorInputBinding bindings)
{
return new ExprEvalObjectVector(bindings.getObjectVector(binding));
}
};
default:
throw Exprs.cannotVectorize(this);
}
return VectorProcessors.identifier(inspector, binding);
}

@Override
Expand All @@ -218,20 +171,3 @@ public int hashCode()
return Objects.hash(identifier);
}
}

abstract class IdentifierVectorProcessor<T> implements ExprVectorProcessor<T>
{
private final ExpressionType outputType;

public IdentifierVectorProcessor(ExpressionType outputType)
{
this.outputType = outputType;
}

@Override
public ExpressionType getOutputType()
{
return outputType;
}
}

Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,55 @@ public ExpressionType getOutputType()
};
}

public static ExprVectorProcessor<?> identifier(Expr.VectorInputBindingInspector inspector, String binding)
{
ExpressionType inputType = inspector.getType(binding);

if (inputType == null) {
// nil column, we can be anything, so be a string because it's the most flexible
// (numbers will be populated with default values in default mode and non-null)
return new IdentifierVectorProcessor<Object[]>(ExpressionType.STRING)
{
@Override
public ExprEvalVector<Object[]> evalVector(Expr.VectorInputBinding bindings)
{
return new ExprEvalObjectVector(bindings.getObjectVector(binding));
}
};
}
switch (inputType.getType()) {
case LONG:
return new IdentifierVectorProcessor<long[]>(inputType)
{
@Override
public ExprEvalVector<long[]> evalVector(Expr.VectorInputBinding bindings)
{
return new ExprEvalLongVector(bindings.getLongVector(binding), bindings.getNullVector(binding));
}
};
case DOUBLE:
return new IdentifierVectorProcessor<double[]>(inputType)
{
@Override
public ExprEvalVector<double[]> evalVector(Expr.VectorInputBinding bindings)
{
return new ExprEvalDoubleVector(bindings.getDoubleVector(binding), bindings.getNullVector(binding));
}
};
case STRING:
return new IdentifierVectorProcessor<Object[]>(inputType)
{
@Override
public ExprEvalVector<Object[]> evalVector(Expr.VectorInputBinding bindings)
{
return new ExprEvalObjectVector(bindings.getObjectVector(binding));
}
};
default:
throw Exprs.cannotVectorize("[" + binding + "]");
}
}

public static <T> ExprVectorProcessor<T> parseLong(Expr.VectorInputBindingInspector inspector, Expr arg, int radix)
{
final ExprVectorProcessor<?> processor = new LongOutObjectInFunctionVectorProcessor(
Expand Down Expand Up @@ -889,4 +938,20 @@ private VectorProcessors()
{
// No instantiation
}

abstract static class IdentifierVectorProcessor<T> implements ExprVectorProcessor<T>
{
private final ExpressionType outputType;

public IdentifierVectorProcessor(ExpressionType outputType)
{
this.outputType = outputType;
}

@Override
public ExpressionType getOutputType()
{
return outputType;
}
}
}
136 changes: 136 additions & 0 deletions core/src/test/java/org/apache/druid/math/expr/EvalTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

package org.apache.druid.math.expr;

import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import org.apache.druid.common.config.NullHandling;
import org.apache.druid.java.util.common.IAE;
Expand Down Expand Up @@ -813,5 +814,140 @@ public void testEvalOfType()
eval = ExprEval.ofType(ExpressionType.STRING_ARRAY, new Object[] {1.0, 2L, "3", true, false});
Assert.assertEquals(ExpressionType.STRING_ARRAY, eval.type());
Assert.assertArrayEquals(new Object[] {"1.0", "2", "3", "true", "false"}, (Object[]) eval.value());

// json type isn't defined in druid-core
ExpressionType json = ExpressionType.fromString("COMPLEX<json>");
eval = ExprEval.ofType(json, ImmutableMap.of("x", 1L, "y", 2L));
Assert.assertEquals(json, eval.type());
Assert.assertEquals(ImmutableMap.of("x", 1L, "y", 2L), eval.value());

eval = ExprEval.ofType(json, "hello");
Assert.assertEquals(json, eval.type());
Assert.assertEquals("hello", eval.value());

ExpressionType stringyComplexThing = ExpressionType.fromString("COMPLEX<somestringything>");
eval = ExprEval.ofType(stringyComplexThing, "notbase64");
Assert.assertEquals(stringyComplexThing, eval.type());
Assert.assertEquals("notbase64", eval.value());
}

@Test
public void testBestEffortOf()
{
// strings
ExprEval eval = ExprEval.bestEffortOf("stringy");
Assert.assertEquals(ExpressionType.STRING, eval.type());
Assert.assertEquals("stringy", eval.value());

// by default, booleans are handled as strings
eval = ExprEval.bestEffortOf(true);
Assert.assertEquals(ExpressionType.STRING, eval.type());
Assert.assertEquals("true", eval.value());

eval = ExprEval.bestEffortOf(new byte[]{1, 2, 3, 4});
Assert.assertEquals(ExpressionType.STRING, eval.type());
Assert.assertEquals(StringUtils.encodeBase64String(new byte[]{1, 2, 3, 4}), eval.value());

// longs
eval = ExprEval.bestEffortOf(1L);
Assert.assertEquals(ExpressionType.LONG, eval.type());
Assert.assertEquals(1L, eval.value());

eval = ExprEval.bestEffortOf(1);
Assert.assertEquals(ExpressionType.LONG, eval.type());
Assert.assertEquals(1L, eval.value());

try {
// in strict boolean mode, they are longs
ExpressionProcessing.initializeForStrictBooleansTests(true);
eval = ExprEval.ofType(ExpressionType.LONG, true);
Assert.assertEquals(ExpressionType.LONG, eval.type());
Assert.assertEquals(1L, eval.value());
}
finally {
// reset
ExpressionProcessing.initializeForTests(null);
}

// doubles
eval = ExprEval.bestEffortOf(1.0);
Assert.assertEquals(ExpressionType.DOUBLE, eval.type());
Assert.assertEquals(1.0, eval.value());

eval = ExprEval.bestEffortOf(1.0f);
Assert.assertEquals(ExpressionType.DOUBLE, eval.type());
Assert.assertEquals(1.0, eval.value());

// arrays
eval = ExprEval.bestEffortOf(new Object[] {1L, 2L, 3L});
Assert.assertEquals(ExpressionType.LONG_ARRAY, eval.type());
Assert.assertArrayEquals(new Object[] {1L, 2L, 3L}, (Object[]) eval.value());

eval = ExprEval.bestEffortOf(new Object[] {1L, 2L, null, 3L});
Assert.assertEquals(ExpressionType.LONG_ARRAY, eval.type());
Assert.assertArrayEquals(new Object[] {1L, 2L, null, 3L}, (Object[]) eval.value());

eval = ExprEval.bestEffortOf(ImmutableList.of(1L, 2L, 3L));
Assert.assertEquals(ExpressionType.LONG_ARRAY, eval.type());
Assert.assertArrayEquals(new Object[] {1L, 2L, 3L}, (Object[]) eval.value());

eval = ExprEval.bestEffortOf(new long[] {1L, 2L, 3L});
Assert.assertEquals(ExpressionType.LONG_ARRAY, eval.type());
Assert.assertArrayEquals(new Object[] {1L, 2L, 3L}, (Object[]) eval.value());

eval = ExprEval.bestEffortOf(new Object[] {1, 2, 3});
Assert.assertEquals(ExpressionType.LONG_ARRAY, eval.type());
Assert.assertArrayEquals(new Object[] {1L, 2L, 3L}, (Object[]) eval.value());

eval = ExprEval.bestEffortOf(new int[] {1, 2, 3});
Assert.assertEquals(ExpressionType.LONG_ARRAY, eval.type());
Assert.assertArrayEquals(new Object[] {1L, 2L, 3L}, (Object[]) eval.value());

eval = ExprEval.bestEffortOf(new Object[] {1.0, 2.0, 3.0});
Assert.assertEquals(ExpressionType.DOUBLE_ARRAY, eval.type());
Assert.assertArrayEquals(new Object[] {1.0, 2.0, 3.0}, (Object[]) eval.value());

eval = ExprEval.bestEffortOf(new Object[] {null, 1.0, 2.0, 3.0});
Assert.assertEquals(ExpressionType.DOUBLE_ARRAY, eval.type());
Assert.assertArrayEquals(new Object[] {null, 1.0, 2.0, 3.0}, (Object[]) eval.value());

eval = ExprEval.bestEffortOf(new double[] {1.0, 2.0, 3.0});
Assert.assertEquals(ExpressionType.DOUBLE_ARRAY, eval.type());
Assert.assertArrayEquals(new Object[] {1.0, 2.0, 3.0}, (Object[]) eval.value());

eval = ExprEval.bestEffortOf(new Object[] {1.0f, 2.0f, 3.0f});
Assert.assertEquals(ExpressionType.DOUBLE_ARRAY, eval.type());
Assert.assertArrayEquals(new Object[] {1.0, 2.0, 3.0}, (Object[]) eval.value());

eval = ExprEval.bestEffortOf(new float[] {1.0f, 2.0f, 3.0f});
Assert.assertEquals(ExpressionType.DOUBLE_ARRAY, eval.type());
Assert.assertArrayEquals(new Object[] {1.0, 2.0, 3.0}, (Object[]) eval.value());

eval = ExprEval.bestEffortOf(new Object[] {"1", "2", "3"});
Assert.assertEquals(ExpressionType.STRING_ARRAY, eval.type());
Assert.assertArrayEquals(new Object[] {"1", "2", "3"}, (Object[]) eval.value());

eval = ExprEval.bestEffortOf(new String[] {"1", "2", "3"});
Assert.assertEquals(ExpressionType.STRING_ARRAY, eval.type());
Assert.assertArrayEquals(new Object[] {"1", "2", "3"}, (Object[]) eval.value());

eval = ExprEval.bestEffortOf(ImmutableList.of("1", "2", "3"));
Assert.assertEquals(ExpressionType.STRING_ARRAY, eval.type());
Assert.assertArrayEquals(new Object[] {"1", "2", "3"}, (Object[]) eval.value());

// arrays end up as the least restrictive type
eval = ExprEval.bestEffortOf(new Object[] {1.0, 2L});
Assert.assertEquals(ExpressionType.DOUBLE_ARRAY, eval.type());
Assert.assertArrayEquals(new Object[] {1.0, 2.0}, (Object[]) eval.value());

// arrays end up as the least restrictive type
eval = ExprEval.bestEffortOf(new Object[] {1.0, 2L, "3", true, false});
Assert.assertEquals(ExpressionType.STRING_ARRAY, eval.type());
Assert.assertArrayEquals(new Object[] {"1.0", "2", "3", "true", "false"}, (Object[]) eval.value());

// json type isn't defined in druid-core, what happens if we have some nested data?
eval = ExprEval.bestEffortOf(ImmutableMap.of("x", 1L, "y", 2L));
Assert.assertEquals(ExpressionType.UNKNOWN_COMPLEX, eval.type());
Assert.assertEquals(ImmutableMap.of("x", 1L, "y", 2L), eval.value());
}
}
Loading