Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -48,49 +48,67 @@ public class ExpressionProcessing
@VisibleForTesting
public static void initializeForTests(@Nullable Boolean allowNestedArrays)
{
INSTANCE = new ExpressionProcessingConfig(allowNestedArrays, null, null);
INSTANCE = new ExpressionProcessingConfig(allowNestedArrays, null, null, null);
}

@VisibleForTesting
public static void initializeForStrictBooleansTests(boolean useStrict)
{
INSTANCE = new ExpressionProcessingConfig(null, useStrict, null);
INSTANCE = new ExpressionProcessingConfig(null, useStrict, null, null);
}

@VisibleForTesting
public static void initializeForHomogenizeNullMultiValueStrings()
{
INSTANCE = new ExpressionProcessingConfig(null, null, null, true);
}

/**
* [['is expression support for'],['nested arrays'],['enabled?']]
*/
public static boolean allowNestedArrays()
{
// this should only be null in a unit test context
// in production this will be injected by the expression processing module
if (INSTANCE == null) {
throw new IllegalStateException(
"Expressions module not initialized, call ExpressionProcessing.initializeForTests()"
);
}
checkInitialized();
return INSTANCE.allowNestedArrays();
}


/**
* All boolean expressions are {@link ExpressionType#LONG}
*/
public static boolean useStrictBooleans()
{
// this should only be null in a unit test context, in production this will be injected by the null handling module
if (INSTANCE == null) {
throw new IllegalStateException("ExpressionProcessing module not initialized, call ExpressionProcessing.initializeForTests()");
}
checkInitialized();
return INSTANCE.isUseStrictBooleans();
}


/**
* All {@link ExprType#ARRAY} values will be converted to {@link ExpressionType#STRING} by their column selectors
* (not within expression processing) to be treated as multi-value strings instead of native arrays.
*/
public static boolean processArraysAsMultiValueStrings()
{
checkInitialized();
return INSTANCE.processArraysAsMultiValueStrings();
}

/**
* All multi-value string expression input values of 'null', '[]', and '[null]' will be coerced to '[null]'. If false,
* (the default) this will only be done when single value expressions are implicitly mapped across multi-value rows,
* so that the single valued expression will always be evaluated with an input value of 'null'
*/
public static boolean isHomogenizeNullMultiValueStringArrays()
{
checkInitialized();
return INSTANCE.isHomogenizeNullMultiValueStringArrays();
}

private static void checkInitialized()
{
// this should only be null in a unit test context, in production this will be injected by the null handling module
if (INSTANCE == null) {
throw new IllegalStateException(
"ExpressionProcessing module not initialized, call ExpressionProcessing.initializeForTests()"
"ExpressionProcessing module not initialized, call ExpressionProcessing.initializeForTests() or one of its variants"
);
}
return INSTANCE.processArraysAsMultiValueStrings();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,11 @@ public class ExpressionProcessingConfig
public static final String NESTED_ARRAYS_CONFIG_STRING = "druid.expressions.allowNestedArrays";
public static final String NULL_HANDLING_LEGACY_LOGICAL_OPS_STRING = "druid.expressions.useStrictBooleans";
// Coerce arrays to multi value strings
public static final String
PROCESS_ARRAYS_AS_MULTIVALUE_STRINGS_CONFIG_STRING = "druid.expressions.processArraysAsMultiValueStrings";
public static final String PROCESS_ARRAYS_AS_MULTIVALUE_STRINGS_CONFIG_STRING =
"druid.expressions.processArraysAsMultiValueStrings";
// Coerce 'null', '[]', and '[null]' into '[null]' for backwards compat with 0.22 and earlier
public static final String HOMOGENIZE_NULL_MULTIVALUE_STRING_ARRAYS =
"druid.expressions.homogenizeNullMultiValueStringArrays";

@JsonProperty("allowNestedArrays")
private final boolean allowNestedArrays;
Expand All @@ -41,27 +44,27 @@ public class ExpressionProcessingConfig
@JsonProperty("processArraysAsMultiValueStrings")
private final boolean processArraysAsMultiValueStrings;

@JsonProperty("homogenizeNullMultiValueStringArrays")
private final boolean homogenizeNullMultiValueStringArrays;

@JsonCreator
public ExpressionProcessingConfig(
@JsonProperty("allowNestedArrays") @Nullable Boolean allowNestedArrays,
@JsonProperty("useStrictBooleans") @Nullable Boolean useStrictBooleans,
@JsonProperty("processArraysAsMultiValueStrings") @Nullable Boolean processArraysAsMultiValueStrings
@JsonProperty("processArraysAsMultiValueStrings") @Nullable Boolean processArraysAsMultiValueStrings,
@JsonProperty("homogenizeNullMultiValueStringArrays") @Nullable Boolean homogenizeNullMultiValueStringArrays
)
{
this.allowNestedArrays = allowNestedArrays == null
? Boolean.valueOf(System.getProperty(NESTED_ARRAYS_CONFIG_STRING, "false"))
: allowNestedArrays;
if (useStrictBooleans == null) {
this.useStrictBooleans = Boolean.parseBoolean(
System.getProperty(NULL_HANDLING_LEGACY_LOGICAL_OPS_STRING, "false")
);
} else {
this.useStrictBooleans = useStrictBooleans;
}
this.processArraysAsMultiValueStrings
= processArraysAsMultiValueStrings == null
? Boolean.valueOf(System.getProperty(PROCESS_ARRAYS_AS_MULTIVALUE_STRINGS_CONFIG_STRING, "false"))
: processArraysAsMultiValueStrings;
this.allowNestedArrays = getWithPropertyFallbackFalse(allowNestedArrays, NESTED_ARRAYS_CONFIG_STRING);
this.useStrictBooleans = getWithPropertyFallbackFalse(useStrictBooleans, NULL_HANDLING_LEGACY_LOGICAL_OPS_STRING);
this.processArraysAsMultiValueStrings = getWithPropertyFallbackFalse(
processArraysAsMultiValueStrings,
PROCESS_ARRAYS_AS_MULTIVALUE_STRINGS_CONFIG_STRING
);
this.homogenizeNullMultiValueStringArrays = getWithPropertyFallbackFalse(
homogenizeNullMultiValueStringArrays,
HOMOGENIZE_NULL_MULTIVALUE_STRING_ARRAYS
);
}

public boolean allowNestedArrays()
Expand All @@ -78,4 +81,14 @@ public boolean processArraysAsMultiValueStrings()
{
return processArraysAsMultiValueStrings;
}

public boolean isHomogenizeNullMultiValueStringArrays()
{
return homogenizeNullMultiValueStringArrays;
}

private static boolean getWithPropertyFallbackFalse(@Nullable Boolean value, String property)
{
return value != null ? value : Boolean.valueOf(System.getProperty(property, "false"));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import org.apache.druid.java.util.common.Pair;
import org.apache.druid.math.expr.Expr;
import org.apache.druid.math.expr.ExprEval;
import org.apache.druid.math.expr.ExpressionProcessing;
import org.apache.druid.math.expr.ExpressionType;
import org.apache.druid.math.expr.InputBindings;
import org.apache.druid.query.dimension.DefaultDimensionSpec;
Expand Down Expand Up @@ -256,30 +257,43 @@ public static Expr.ObjectBinding createBindings(
final List<String> columns = plan.getAnalysis().getRequiredBindingsList();
final Map<String, Pair<ExpressionType, Supplier<Object>>> suppliers = new HashMap<>();
for (String columnName : columns) {
final ColumnCapabilities columnCapabilities = columnSelectorFactory.getColumnCapabilities(columnName);
final boolean multiVal = columnCapabilities != null && columnCapabilities.hasMultipleValues().isTrue();
final ColumnCapabilities capabilities = columnSelectorFactory.getColumnCapabilities(columnName);
final boolean multiVal = capabilities != null && capabilities.hasMultipleValues().isTrue();
final Supplier<Object> supplier;
final ExpressionType expressionType = ExpressionType.fromColumnType(columnCapabilities);

if (columnCapabilities == null ||
columnCapabilities.isArray() ||
(plan.is(ExpressionPlan.Trait.NON_SCALAR_OUTPUT) && !plan.is(ExpressionPlan.Trait.NEEDS_APPLIED))
) {
// Unknown ValueType or array type. Try making an Object selector and see if that gives us anything useful.
final ExpressionType expressionType = ExpressionType.fromColumnType(capabilities);

final boolean useObjectSupplierForMultiValueStringArray =
capabilities != null
// if homogenizing null multi-value string arrays, or if a single valued function that must be applied across
// multi-value rows, we can just use the dimension selector, which has the homogenization behavior built-in
&& ((!capabilities.is(ValueType.STRING))
|| (capabilities.is(ValueType.STRING)
&& !ExpressionProcessing.isHomogenizeNullMultiValueStringArrays()
&& !plan.is(ExpressionPlan.Trait.NEEDS_APPLIED)
)
)
// expression has array output
&& plan.is(ExpressionPlan.Trait.NON_SCALAR_OUTPUT);

final boolean homogenizeNullMultiValueStringArrays =
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: Maybe move this inside the if statement at 281?

plan.is(ExpressionPlan.Trait.NEEDS_APPLIED) || ExpressionProcessing.isHomogenizeNullMultiValueStringArrays();

if (capabilities == null || capabilities.isArray() || useObjectSupplierForMultiValueStringArray) {
// Unknown type, array type, or output array uses an Object selector and see if that gives anything useful
supplier = supplierFromObjectSelector(
columnSelectorFactory.makeColumnValueSelector(columnName),
plan.is(ExpressionPlan.Trait.NEEDS_APPLIED)
homogenizeNullMultiValueStringArrays
);
} else if (columnCapabilities.is(ValueType.FLOAT)) {
} else if (capabilities.is(ValueType.FLOAT)) {
ColumnValueSelector<?> selector = columnSelectorFactory.makeColumnValueSelector(columnName);
supplier = makeNullableNumericSupplier(selector, selector::getFloat);
} else if (columnCapabilities.is(ValueType.LONG)) {
} else if (capabilities.is(ValueType.LONG)) {
ColumnValueSelector<?> selector = columnSelectorFactory.makeColumnValueSelector(columnName);
supplier = makeNullableNumericSupplier(selector, selector::getLong);
} else if (columnCapabilities.is(ValueType.DOUBLE)) {
} else if (capabilities.is(ValueType.DOUBLE)) {
ColumnValueSelector<?> selector = columnSelectorFactory.makeColumnValueSelector(columnName);
supplier = makeNullableNumericSupplier(selector, selector::getDouble);
} else if (columnCapabilities.is(ValueType.STRING)) {
} else if (capabilities.is(ValueType.STRING)) {
supplier = supplierFromDimensionSelector(
columnSelectorFactory.makeDimensionSelector(new DefaultDimensionSpec(columnName, columnName)),
multiVal
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
import org.apache.druid.java.util.common.FileUtils;
import org.apache.druid.java.util.common.granularity.Granularities;
import org.apache.druid.java.util.common.guava.Sequence;
import org.apache.druid.math.expr.ExpressionProcessing;
import org.apache.druid.query.aggregation.AggregationTestHelper;
import org.apache.druid.query.aggregation.CountAggregatorFactory;
import org.apache.druid.query.dimension.DefaultDimensionSpec;
Expand Down Expand Up @@ -502,6 +503,57 @@ public void testGroupByExpressionMultiMulti()
TestHelper.assertExpectedObjects(expectedResults, result.toList(), "expr-multi-multi");
}

@Test
public void testGroupByExpressionMultiMultiBackwardsCompat0dot22andOlder()
{
try {
ExpressionProcessing.initializeForHomogenizeNullMultiValueStrings();
if (config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V1)) {
expectedException.expect(RuntimeException.class);
expectedException.expectMessage("GroupBy v1 does not support dimension selectors with unknown cardinality.");
}
GroupByQuery query = GroupByQuery
.builder()
.setDataSource("xx")
.setQuerySegmentSpec(new LegacySegmentSpec("1970/3000"))
.setGranularity(Granularities.ALL)
.setDimensions(new DefaultDimensionSpec("texpr", "texpr"))
.setVirtualColumns(
new ExpressionVirtualColumn(
"texpr",
"cartesian_map((x,y) -> concat(x, y), tags, othertags)",
ColumnType.STRING,
TestExprMacroTable.INSTANCE
)
)
.setLimit(5)
.setAggregatorSpecs(new CountAggregatorFactory("count"))
.setContext(context)
.build();

Sequence<ResultRow> result = helper.runQueryOnSegmentsObjs(
ImmutableList.of(
new QueryableIndexSegment(queryableIndex, SegmentId.dummy("sid1")),
new IncrementalIndexSegment(incrementalIndex, SegmentId.dummy("sid2"))
),
query
);

List<ResultRow> expectedResults = Arrays.asList(
GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970", "texpr", "t1u1", "count", 2L),
GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970", "texpr", "t1u2", "count", 2L),
GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970", "texpr", "t2u1", "count", 2L),
GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970", "texpr", "t2u2", "count", 2L),
GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970", "texpr", "t3u1", "count", 2L)
);

TestHelper.assertExpectedObjects(expectedResults, result.toList(), "expr-multi-multi");
}
finally {
ExpressionProcessing.initializeForTests(null);
}
}

@Test
public void testGroupByExpressionMultiMultiAuto()
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@

import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import junitparams.JUnitParamsRunner;
import org.apache.druid.common.config.NullHandling;
import org.apache.druid.java.util.common.HumanReadableBytes;
import org.apache.druid.java.util.common.IAE;
Expand Down Expand Up @@ -56,7 +55,6 @@
import org.apache.druid.sql.calcite.filtration.Filtration;
import org.apache.druid.sql.calcite.util.CalciteTests;
import org.junit.Test;
import org.junit.runner.RunWith;

import java.util.Arrays;
import java.util.Collections;
Expand All @@ -65,7 +63,6 @@
/**
* Tests for array functions and array types
*/
@RunWith(JUnitParamsRunner.class)
public class CalciteArraysQueryTest extends BaseCalciteQueryTest
{
// test some query stuffs, sort of limited since no native array column types so either need to use constructor or
Expand Down
Loading