From a009ce2a99dda0a9f6a5ebe90da840d9af95caae Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Thu, 6 May 2021 11:58:39 -0700 Subject: [PATCH 01/14] add single input string expression dimension vector selector and better expression planning --- .../druid/math/expr/FunctionalExpr.java | 7 +- .../vector/VectorGroupByEngine.java | 22 +- .../timeseries/TimeseriesQueryEngine.java | 7 +- .../druid/segment/ColumnProcessors.java | 22 +- .../apache/druid/segment/VirtualColumns.java | 11 + .../segment/column/ColumnCapabilities.java | 1 + .../druid/segment/virtual/ExpressionPlan.java | 174 +++- .../segment/virtual/ExpressionPlanner.java | 14 +- .../segment/virtual/ExpressionSelectors.java | 2 +- .../virtual/ExpressionVectorSelectors.java | 9 +- .../virtual/ExpressionVirtualColumn.java | 56 +- ...tionExpressionDimensionVectorSelector.java | 103 +++ .../virtual/VirtualizedColumnInspector.java | 60 ++ .../VirtualizedColumnSelectorFactory.java | 22 +- .../virtual/ExpressionPlannerTest.java | 813 ++++++++++++++++++ .../ExpressionVectorSelectorsTest.java | 6 +- 16 files changed, 1227 insertions(+), 102 deletions(-) create mode 100644 processing/src/main/java/org/apache/druid/segment/virtual/SingleStringDeferredEvaluationExpressionDimensionVectorSelector.java create mode 100644 processing/src/main/java/org/apache/druid/segment/virtual/VirtualizedColumnInspector.java create mode 100644 processing/src/test/java/org/apache/druid/segment/virtual/ExpressionPlannerTest.java diff --git a/core/src/main/java/org/apache/druid/math/expr/FunctionalExpr.java b/core/src/main/java/org/apache/druid/math/expr/FunctionalExpr.java index 00f381b64566..ea2c43c741ac 100644 --- a/core/src/main/java/org/apache/druid/math/expr/FunctionalExpr.java +++ b/core/src/main/java/org/apache/druid/math/expr/FunctionalExpr.java @@ -67,6 +67,11 @@ public List getIdentifiers() return args.stream().map(IdentifierExpr::toString).collect(Collectors.toList()); } + public List stringifyIdentifiers() + { + return args.stream().map(IdentifierExpr::stringify).collect(Collectors.toList()); + } + ImmutableList getIdentifierExprs() { return args; @@ -98,7 +103,7 @@ public ExprEval eval(ObjectBinding bindings) @Override public String stringify() { - return StringUtils.format("(%s) -> %s", ARG_JOINER.join(getIdentifiers()), expr.stringify()); + return StringUtils.format("(%s) -> %s", ARG_JOINER.join(stringifyIdentifiers()), expr.stringify()); } @Override diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/VectorGroupByEngine.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/VectorGroupByEngine.java index 848c18586e81..f66d51ac3cc0 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/VectorGroupByEngine.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/VectorGroupByEngine.java @@ -42,6 +42,7 @@ import org.apache.druid.query.groupby.epinephelinae.HashVectorGrouper; import org.apache.druid.query.groupby.epinephelinae.VectorGrouper; import org.apache.druid.query.vector.VectorCursorGranularizer; +import org.apache.druid.segment.ColumnInspector; import org.apache.druid.segment.ColumnProcessors; import org.apache.druid.segment.StorageAdapter; import org.apache.druid.segment.VirtualColumns; @@ -59,7 +60,6 @@ import java.util.Iterator; import java.util.List; import java.util.NoSuchElementException; -import java.util.function.Function; import java.util.stream.Collectors; public class VectorGroupByEngine @@ -75,18 +75,18 @@ public static boolean canVectorize( @Nullable final Filter filter ) { - Function capabilitiesFunction = name -> - query.getVirtualColumns().getColumnCapabilitiesWithFallback(adapter, name); + final ColumnInspector inspector = query.getVirtualColumns().wrapInspector(adapter); - return canVectorizeDimensions(capabilitiesFunction, query.getDimensions()) - && query.getDimensions().stream().allMatch(DimensionSpec::canVectorize) - && query.getAggregatorSpecs().stream().allMatch(aggregatorFactory -> aggregatorFactory.canVectorize(adapter)) + return adapter.canVectorize(filter, query.getVirtualColumns(), false) + && canVectorizeDimensions(inspector, query.getDimensions()) && VirtualColumns.shouldVectorize(query, query.getVirtualColumns(), adapter) - && adapter.canVectorize(filter, query.getVirtualColumns(), false); + && query.getAggregatorSpecs() + .stream() + .allMatch(aggregatorFactory -> aggregatorFactory.canVectorize(inspector)); } public static boolean canVectorizeDimensions( - final Function capabilitiesFunction, + final ColumnInspector inspector, final List dimensions ) { @@ -94,6 +94,10 @@ public static boolean canVectorizeDimensions( .stream() .allMatch( dimension -> { + if (!dimension.canVectorize()) { + return false; + } + if (dimension.mustDecorate()) { // group by on multi value dimensions are not currently supported // DimensionSpecs that decorate may turn singly-valued columns into multi-valued selectors. @@ -102,7 +106,7 @@ public static boolean canVectorizeDimensions( } // Now check column capabilities. - final ColumnCapabilities columnCapabilities = capabilitiesFunction.apply(dimension.getDimension()); + final ColumnCapabilities columnCapabilities = inspector.getColumnCapabilities(dimension.getDimension()); // null here currently means the column does not exist, nil columns can be vectorized if (columnCapabilities == null) { return true; diff --git a/processing/src/main/java/org/apache/druid/query/timeseries/TimeseriesQueryEngine.java b/processing/src/main/java/org/apache/druid/query/timeseries/TimeseriesQueryEngine.java index 75cb4986ee70..113dd9b4487d 100644 --- a/processing/src/main/java/org/apache/druid/query/timeseries/TimeseriesQueryEngine.java +++ b/processing/src/main/java/org/apache/druid/query/timeseries/TimeseriesQueryEngine.java @@ -39,6 +39,7 @@ import org.apache.druid.query.aggregation.AggregatorFactory; import org.apache.druid.query.filter.Filter; import org.apache.druid.query.vector.VectorCursorGranularizer; +import org.apache.druid.segment.ColumnInspector; import org.apache.druid.segment.SegmentMissingException; import org.apache.druid.segment.StorageAdapter; import org.apache.druid.segment.VirtualColumns; @@ -94,10 +95,12 @@ public Sequence> process(final TimeseriesQuery que final Granularity gran = query.getGranularity(); final boolean descending = query.isDescending(); + final ColumnInspector inspector = query.getVirtualColumns().wrapInspector(adapter); + final boolean doVectorize = QueryContexts.getVectorize(query).shouldVectorize( - query.getAggregatorSpecs().stream().allMatch(aggregatorFactory -> aggregatorFactory.canVectorize(adapter)) + adapter.canVectorize(filter, query.getVirtualColumns(), descending) && VirtualColumns.shouldVectorize(query, query.getVirtualColumns(), adapter) - && adapter.canVectorize(filter, query.getVirtualColumns(), descending) + && query.getAggregatorSpecs().stream().allMatch(aggregatorFactory -> aggregatorFactory.canVectorize(inspector)) ); final Sequence> result; diff --git a/processing/src/main/java/org/apache/druid/segment/ColumnProcessors.java b/processing/src/main/java/org/apache/druid/segment/ColumnProcessors.java index 18307121d2e9..7743b8966e78 100644 --- a/processing/src/main/java/org/apache/druid/segment/ColumnProcessors.java +++ b/processing/src/main/java/org/apache/druid/segment/ColumnProcessors.java @@ -219,12 +219,26 @@ private static ColumnCapabilities computeDimensionSpecCapabilities( } else if (dimensionSpec.getExtractionFn() != null) { // DimensionSpec is applying an extractionFn but *not* decorating. We have some insight into how the // extractionFn will behave, so let's use it. + final boolean dictionaryEncoded; + final boolean unique; + final boolean sorted; + if (columnCapabilities != null) { + dictionaryEncoded = columnCapabilities.isDictionaryEncoded().isTrue(); + unique = columnCapabilities.areDictionaryValuesUnique().isTrue(); + sorted = columnCapabilities.areDictionaryValuesSorted().isTrue(); + } else { + dictionaryEncoded = false; + unique = false; + sorted = false; + } return new ColumnCapabilitiesImpl() .setType(ValueType.STRING) - .setDictionaryValuesSorted(dimensionSpec.getExtractionFn().preservesOrdering()) - .setDictionaryValuesUnique(dimensionSpec.getExtractionFn().getExtractionType() - == ExtractionFn.ExtractionType.ONE_TO_ONE) + .setDictionaryEncoded(dictionaryEncoded) + .setDictionaryValuesSorted(sorted && dimensionSpec.getExtractionFn().preservesOrdering()) + .setDictionaryValuesUnique( + unique && dimensionSpec.getExtractionFn().getExtractionType() == ExtractionFn.ExtractionType.ONE_TO_ONE + ) .setHasMultipleValues(dimensionSpec.mustDecorate() || mayBeMultiValue(columnCapabilities)); } else { // No transformation. Pass through underlying types. @@ -319,7 +333,7 @@ private static T makeVectorProcessorInternal( switch (capabilities.getType()) { case STRING: // if column is not uniquely dictionary encoded, use an object selector - if (capabilities.isDictionaryEncoded().isFalse() || capabilities.areDictionaryValuesUnique().isFalse()) { + if (capabilities.isDictionaryEncoded().isFalse()) { return processorFactory.makeObjectProcessor( capabilities, objectSelectorFn.apply(selectorFactory) diff --git a/processing/src/main/java/org/apache/druid/segment/VirtualColumns.java b/processing/src/main/java/org/apache/druid/segment/VirtualColumns.java index 3b141d4498ec..c38574080e03 100644 --- a/processing/src/main/java/org/apache/druid/segment/VirtualColumns.java +++ b/processing/src/main/java/org/apache/druid/segment/VirtualColumns.java @@ -43,6 +43,7 @@ import org.apache.druid.segment.vector.VectorColumnSelectorFactory; import org.apache.druid.segment.vector.VectorObjectSelector; import org.apache.druid.segment.vector.VectorValueSelector; +import org.apache.druid.segment.virtual.VirtualizedColumnInspector; import org.apache.druid.segment.virtual.VirtualizedColumnSelectorFactory; import javax.annotation.Nullable; @@ -423,6 +424,16 @@ public ColumnSelectorFactory wrap(final ColumnSelectorFactory baseFactory) } } + + public ColumnInspector wrapInspector(ColumnInspector inspector) + { + if (virtualColumns.isEmpty()) { + return inspector; + } else { + return new VirtualizedColumnInspector(inspector, this); + } + } + @Override public byte[] getCacheKey() { diff --git a/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilities.java b/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilities.java index 1d85d2b82a8f..e6d0920748fe 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilities.java +++ b/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilities.java @@ -41,6 +41,7 @@ public interface ColumnCapabilities * * If ValueType is COMPLEX, then the typeName associated with it. */ + @Nullable String getComplexTypeName(); /** diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionPlan.java b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionPlan.java index b1ab3a9010f4..96cf5017806a 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionPlan.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionPlan.java @@ -19,10 +19,14 @@ package org.apache.druid.segment.virtual; +import com.google.common.base.Preconditions; import com.google.common.collect.Iterables; import org.apache.druid.math.expr.Expr; import org.apache.druid.math.expr.ExprType; import org.apache.druid.math.expr.Parser; +import org.apache.druid.segment.ColumnInspector; +import org.apache.druid.segment.column.ColumnCapabilities; +import org.apache.druid.segment.column.ColumnCapabilitiesImpl; import org.apache.druid.segment.column.ValueType; import javax.annotation.Nullable; @@ -57,7 +61,8 @@ public enum Trait */ UNKNOWN_INPUTS, /** - * expression has inputs whose type was incomplete, such as unknown multi-valuedness + * expression has inputs whose type was incomplete, such as unknown multi-valuedness, which are not explicitly + * used as multi-valued/array inputs */ INCOMPLETE_INPUTS, /** @@ -74,6 +79,7 @@ public enum Trait VECTORIZABLE } + private final ColumnInspector baseInputInspector; private final Expr expression; private final Expr.BindingAnalysis analysis; private final EnumSet traits; @@ -86,6 +92,7 @@ public enum Trait private final List unappliedInputs; ExpressionPlan( + ColumnInspector baseInputInspector, Expr expression, Expr.BindingAnalysis analysis, EnumSet traits, @@ -95,6 +102,7 @@ public enum Trait List unappliedInputs ) { + this.baseInputInspector = baseInputInspector; this.expression = expression; this.analysis = analysis; this.traits = traits; @@ -104,16 +112,28 @@ public enum Trait this.unappliedInputs = unappliedInputs; } + /** + * An expression with no inputs is a constant + */ public boolean isConstant() { return analysis.getRequiredBindings().isEmpty(); } + /** + * Gets the original expression that was planned + */ public Expr getExpression() { return expression; } + /** + * If an expression uses a multi-valued input in a scalar manner, the expression can be automatically transformed + * to map these values across the expression, applying the original expression to every value. + * + * @see Parser#applyUnappliedBindings(Expr, Expr.BindingAnalysis, List) + */ public Expr getAppliedExpression() { if (is(Trait.NEEDS_APPLIED)) { @@ -122,61 +142,183 @@ public Expr getAppliedExpression() return expression; } + /** + * If an expression uses a multi-valued input in a scalar manner, and the expression contains an accumulator such as + * for use as part of an aggregator, the expression can be automatically transformed to fold the accumulator across + * the values of the original expression. + * + * @see Parser#foldUnappliedBindings(Expr, Expr.BindingAnalysis, List, String) + */ public Expr getAppliedFoldExpression(String accumulatorId) { if (is(Trait.NEEDS_APPLIED)) { + Preconditions.checkState( + !unappliedInputs.contains(accumulatorId), + "Accumulator cannot be implicitly transformed, if it is an ARRAY or multi-valued type it must" + + " be used explicitly as such" + ); return Parser.foldUnappliedBindings(expression, analysis, unappliedInputs, accumulatorId); } return expression; } - public Expr.BindingAnalysis getAnalysis() - { - return analysis; - } - - public boolean is(Trait... flags) - { - return is(traits, flags); - } - - public boolean any(Trait... flags) - { - return any(traits, flags); - } - + /** + * The output type of the original expression. + * + * Note that this might not be the true for the expressions provided by {@link #getAppliedExpression()} + * or {@link #getAppliedFoldExpression(String)}, should the expression have any unapplied inputs + */ @Nullable public ExprType getOutputType() { return outputType; } + /** + * If and only if the column has a single input, get the {@link ValueType} of that input + */ @Nullable public ValueType getSingleInputType() { return singleInputType; } + /** + * If and only if the expression has a single input, get the name of that input + */ public String getSingleInputName() { return Iterables.getOnlyElement(analysis.getRequiredBindings()); } + /** + * Get set of inputs which were completely missing information, possibly a non-existent column or from a column + * selector factory with incomplete information + */ public Set getUnknownInputs() { return unknownInputs; } + /** + * Returns basic analysis of the inputs to an {@link Expr} and how they are used + * + * @see Expr.BindingAnalysis + */ + public Expr.BindingAnalysis getAnalysis() + { + return analysis; + } + + /** + * Tries to construct the most appropriate {@link ColumnCapabilities} for this plan given the {@link #outputType} and + * {@link #traits} inferred by the {@link ExpressionPlanner}, optionally with the help of hint {@link ValueType}. + * + * If no output type was able to be inferred during planning, returns null + */ + @Nullable + public ColumnCapabilities inferColumnCapabilities(@Nullable ValueType hint) + { + if (outputType != null) { + final ValueType inferredValueType = ExprType.toValueType(outputType); + + if (inferredValueType.isNumeric()) { + // if float was explicitly specified preserve it, because it will currently never be the computed output type + // since there is no float expression type + if (ValueType.FLOAT == hint) { + return ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.FLOAT); + } + return ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(inferredValueType); + } + + // null constants can sometimes trip up the type inference to report STRING, so check if explicitly supplied + // output type is numeric and stick with that if so + if (hint != null && hint.isNumeric()) { + return ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(hint); + } + + // fancy string stuffs + if (ValueType.STRING == inferredValueType) { + // constant strings are supported as dimension selectors, set them as dictionary encoded and unique for all the + // bells and whistles the engines have to offer + if (isConstant()) { + return ColumnCapabilitiesImpl.createSimpleSingleValueStringColumnCapabilities() + .setDictionaryEncoded(true) + .setDictionaryValuesUnique(true) + .setDictionaryValuesSorted(true) + .setHasNulls(expression.isNullLiteral()); + } + + // single input strings also have an optimization which allow defering evaluation time until dictionary encoded + // column lookup, so if the underlying column is a dictionary encoded string then we can report as such + if (any(Trait.SINGLE_INPUT_SCALAR, Trait.SINGLE_INPUT_MAPPABLE)) { + ColumnCapabilities underlyingCapabilities = baseInputInspector.getColumnCapabilities(getSingleInputName()); + if (underlyingCapabilities != null) { + // since we don't know if the expression is 1:1 or if it retains ordering we can only piggy back only + // report as dictionary encoded, but it still allows us to use algorithms which work with dictionaryIds + // to create a dictionary encoded selector instead of an object selector to defer expression evaluation + // until query time + return ColumnCapabilitiesImpl.copyOf(underlyingCapabilities) + .setDictionaryValuesSorted(false) + .setDictionaryValuesUnique(false) + .setHasNulls(true); + } + } + } + + // we don't have to check for unknown input here because output type is unable to be inferred if we don't know + // the complete set of input types + if (any(Trait.NON_SCALAR_OUTPUT, Trait.NEEDS_APPLIED)) { + // if the hint requested a string, use a string + if (ValueType.STRING == hint) { + return ColumnCapabilitiesImpl.createSimpleArrayColumnCapabilities(ValueType.STRING); + } + // maybe something is looking for a little fun and wants arrays? let whatever it is through + return ColumnCapabilitiesImpl.createSimpleArrayColumnCapabilities(ExprType.toValueType(outputType)); + } + + // if we got here, lets call it single value string output, non-dictionary encoded + return ColumnCapabilitiesImpl.createSimpleSingleValueStringColumnCapabilities(); + } + // we don't know what we don't know + return null; + } + + /** + * Returns true if all of the supplied traits are true in this plan + */ + public boolean is(Trait... flags) + { + return is(traits, flags); + } + + /** + * Returns true if any of the supplied traits are true in this plan + */ + public boolean any(Trait... flags) + { + return any(traits, flags); + } + + /** + * Returns true if all of the supplied traits are true in the supplied set + */ static boolean is(EnumSet traits, Trait... args) { return Arrays.stream(args).allMatch(traits::contains); } + /** + * Returns true if any of the supplied traits are true in the supplied set + */ static boolean any(EnumSet traits, Trait... args) { return Arrays.stream(args).anyMatch(traits::contains); } + /** + * Returns true if none of the supplied traits are true in the supplied set + */ static boolean none(EnumSet traits, Trait... args) { return Arrays.stream(args).noneMatch(traits::contains); diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionPlanner.java b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionPlanner.java index 71fb9fac9ff8..c693df5ec9a8 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionPlanner.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionPlanner.java @@ -155,8 +155,7 @@ public static ExpressionPlan plan(ColumnInspector inspector, Expr expression) final boolean shouldComputeOutput = ExpressionPlan.none( traits, ExpressionPlan.Trait.UNKNOWN_INPUTS, - ExpressionPlan.Trait.INCOMPLETE_INPUTS, - ExpressionPlan.Trait.NEEDS_APPLIED + ExpressionPlan.Trait.INCOMPLETE_INPUTS ); if (shouldComputeOutput) { @@ -177,7 +176,7 @@ public static ExpressionPlan plan(ColumnInspector inspector, Expr expression) } // vectorized expressions do not support incomplete, multi-valued inputs or outputs, or implicit mapping - // they also do support unknown inputs, but they also do not currently have to deal with them, as missing + // they also do not support unknown inputs, but they also do not currently have to deal with them, as missing // capabilites is indicative of a non-existent column instead of an unknown schema. If this ever changes, // this check should also change boolean supportsVector = ExpressionPlan.none( @@ -193,8 +192,17 @@ public static ExpressionPlan plan(ColumnInspector inspector, Expr expression) // due to unknown inputs, but that's ok here since it just means it doesnt exist outputType = expression.getOutputType(inspector); traits.add(ExpressionPlan.Trait.VECTORIZABLE); + } else if (supportsVector && ExpressionPlan.is(traits, ExpressionPlan.Trait.SINGLE_INPUT_SCALAR)) { + // single input scalar expressions are currently vectorizable even if the expression is not + // this is kind of gross because this is actually only vectorizable if you're using a dictionary encoded + // vector selector, and not a vector object selector, but hopefully things check that a dictionary exists + // and prefer to use it... + outputType = expression.getOutputType(inspector); + traits.add(ExpressionPlan.Trait.VECTORIZABLE); } + return new ExpressionPlan( + inspector, expression, analysis, traits, diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java index fc6ddfff6064..079270459bd1 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java @@ -184,7 +184,7 @@ public static DimensionSelector makeDimensionSelector( { final ExpressionPlan plan = ExpressionPlanner.plan(columnSelectorFactory, expression); - if (plan.is(ExpressionPlan.Trait.SINGLE_INPUT_MAPPABLE)) { + if (plan.any(ExpressionPlan.Trait.SINGLE_INPUT_SCALAR, ExpressionPlan.Trait.SINGLE_INPUT_MAPPABLE)) { final String column = plan.getSingleInputName(); if (plan.getSingleInputType() == ValueType.STRING) { return new SingleStringInputDimensionSelector( diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVectorSelectors.java b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVectorSelectors.java index b6ef0e8aea8b..8da71f4c6e03 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVectorSelectors.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVectorSelectors.java @@ -23,6 +23,7 @@ import org.apache.druid.math.expr.Expr; import org.apache.druid.math.expr.ExprType; import org.apache.druid.math.expr.vector.ExprVectorProcessor; +import org.apache.druid.query.dimension.DefaultDimensionSpec; import org.apache.druid.query.expression.ExprUtils; import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ValueType; @@ -54,7 +55,13 @@ public static SingleValueDimensionVectorSelector makeSingleValueDimensionVectorS String constant = plan.getExpression().eval(ExprUtils.nilBindings()).asString(); return ConstantVectorSelectors.singleValueDimensionVectorSelector(factory.getReadableVectorInspector(), constant); } - throw new IllegalStateException("Only constant expressions currently support dimension selectors"); + if (plan.is(ExpressionPlan.Trait.SINGLE_INPUT_SCALAR) && ExprType.STRING == plan.getOutputType()) { + return new SingleStringDeferredEvaluationExpressionDimensionVectorSelector( + factory.makeSingleValueDimensionSelector(DefaultDimensionSpec.of(plan.getSingleInputName())), + plan.getExpression() + ); + } + throw new IllegalStateException("Only constant and single input string expressions currently support dictionary encoded selectors"); } public static VectorValueSelector makeVectorValueSelector( diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVirtualColumn.java b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVirtualColumn.java index 343cd8a4978e..63fd816fad07 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVirtualColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVirtualColumn.java @@ -30,7 +30,6 @@ import org.apache.druid.java.util.common.logger.Logger; import org.apache.druid.math.expr.Expr; import org.apache.druid.math.expr.ExprMacroTable; -import org.apache.druid.math.expr.ExprType; import org.apache.druid.math.expr.Parser; import org.apache.druid.query.cache.CacheKeyBuilder; import org.apache.druid.query.dimension.DimensionSpec; @@ -188,59 +187,22 @@ public ColumnCapabilities capabilities(String columnName) public ColumnCapabilities capabilities(ColumnInspector inspector, String columnName) { final ExpressionPlan plan = ExpressionPlanner.plan(inspector, parsedExpression.get()); - - if (plan.getOutputType() != null) { - - final ExprType inferredOutputType = plan.getOutputType(); - if (outputType != null && ExprType.fromValueType(outputType) != inferredOutputType) { + final ColumnCapabilities inferred = plan.inferColumnCapabilities(outputType); + // if we can infer the column capabilities from the expression plan, then use that + if (inferred != null) { + // explicit outputType is used as a hint, how did it compare to the planners inferred output type? + if (inferred.getType() != outputType) { log.warn( "Projected output type %s of expression %s does not match provided type %s", - plan.getOutputType(), + inferred.getType(), expression, outputType ); } - final ValueType valueType = ExprType.toValueType(inferredOutputType); - - if (valueType.isNumeric()) { - // if float was explicitly specified preserve it, because it will currently never be the computed output type - if (ValueType.FLOAT == outputType) { - return ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.FLOAT); - } - return ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(valueType); - } - - // null constants can sometimes trip up the type inference to report STRING, so check if explicitly supplied - // output type is numeric and stick with that if so - if (outputType != null && outputType.isNumeric()) { - return ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(outputType); - } - - // array types shouldn't escape the expression system currently, so coerce anything past this point into some - // style of string - - // we don't have to check for unknown input here because output type is unable to be inferred if we don't know - // the complete set of input types - if (plan.any(ExpressionPlan.Trait.NON_SCALAR_OUTPUT, ExpressionPlan.Trait.NEEDS_APPLIED)) { - // always a multi-value string since wider engine does not yet support array types - return new ColumnCapabilitiesImpl().setType(ValueType.STRING).setHasMultipleValues(true); - } - - // constant strings are supported as dimension selectors, set them as dictionary encoded and unique - if (plan.isConstant()) { - return new ColumnCapabilitiesImpl().setType(ValueType.STRING) - .setDictionaryEncoded(true) - .setDictionaryValuesUnique(true) - .setDictionaryValuesSorted(true) - .setHasMultipleValues(false); - } - - // if we got here, lets call it single value string output, non-dictionary encoded - return new ColumnCapabilitiesImpl().setType(ValueType.STRING) - .setHasMultipleValues(false) - .setDictionaryEncoded(false); + return inferred; } - // fallback to + + // fallback to default capabilities return capabilities(columnName); } diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/SingleStringDeferredEvaluationExpressionDimensionVectorSelector.java b/processing/src/main/java/org/apache/druid/segment/virtual/SingleStringDeferredEvaluationExpressionDimensionVectorSelector.java new file mode 100644 index 000000000000..69da268836b2 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/segment/virtual/SingleStringDeferredEvaluationExpressionDimensionVectorSelector.java @@ -0,0 +1,103 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.virtual; + +import org.apache.druid.java.util.common.ISE; +import org.apache.druid.math.expr.Expr; +import org.apache.druid.segment.DimensionDictionarySelector; +import org.apache.druid.segment.IdLookup; +import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector; + +import javax.annotation.Nullable; + +/** + * lol + * + * @see SingleStringInputDimensionSelector + */ +public class SingleStringDeferredEvaluationExpressionDimensionVectorSelector implements SingleValueDimensionVectorSelector +{ + private final SingleValueDimensionVectorSelector selector; + private final Expr expression; + private final SingleInputBindings bindings = new SingleInputBindings(); + + public SingleStringDeferredEvaluationExpressionDimensionVectorSelector( + SingleValueDimensionVectorSelector selector, + Expr expression + ) + { + // Verify selector has a working dictionary. + if (selector.getValueCardinality() == DimensionDictionarySelector.CARDINALITY_UNKNOWN + || !selector.nameLookupPossibleInAdvance()) { + throw new ISE("Selector of class[%s] does not have a dictionary, cannot use it.", selector.getClass().getName()); + } + this.selector = selector; + this.expression = expression; + } + + @Override + public int getValueCardinality() + { + return CARDINALITY_UNKNOWN; + } + + @Nullable + @Override + public String lookupName(int id) + { + final String value; + + value = selector.lookupName(id); + + bindings.set(value); + return expression.eval(bindings).asString(); + } + + @Override + public boolean nameLookupPossibleInAdvance() + { + return selector.nameLookupPossibleInAdvance(); + } + + @Nullable + @Override + public IdLookup idLookup() + { + return null; + } + + @Override + public int[] getRowVector() + { + return selector.getRowVector(); + } + + @Override + public int getMaxVectorSize() + { + return selector.getMaxVectorSize(); + } + + @Override + public int getCurrentVectorSize() + { + return selector.getCurrentVectorSize(); + } +} diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/VirtualizedColumnInspector.java b/processing/src/main/java/org/apache/druid/segment/virtual/VirtualizedColumnInspector.java new file mode 100644 index 000000000000..bba7f7672c9c --- /dev/null +++ b/processing/src/main/java/org/apache/druid/segment/virtual/VirtualizedColumnInspector.java @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.virtual; + +import org.apache.druid.segment.ColumnInspector; +import org.apache.druid.segment.VirtualColumns; +import org.apache.druid.segment.column.ColumnCapabilities; + +import javax.annotation.Nullable; + +/** + * Provides {@link ColumnCapabilities} for both virtual and non-virtual columns by building on top of another base + * {@link ColumnInspector}. + * + * {@link VirtualColumns} are provided with the base inspector so that they may potentially infer output types to + * construct the appropriate capabilities for virtual columns, while the base inspector directly supplies the + * capabilities for non-virtual columns. + */ +public class VirtualizedColumnInspector implements ColumnInspector +{ + protected final VirtualColumns virtualColumns; + protected final ColumnInspector baseInspector; + + public VirtualizedColumnInspector( + ColumnInspector baseInspector, + VirtualColumns virtualColumns + ) + { + this.virtualColumns = virtualColumns; + this.baseInspector = baseInspector; + } + + @Nullable + @Override + public ColumnCapabilities getColumnCapabilities(String columnName) + { + if (virtualColumns.exists(columnName)) { + return virtualColumns.getColumnCapabilities(baseInspector, columnName); + } else { + return baseInspector.getColumnCapabilities(columnName); + } + } +} diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/VirtualizedColumnSelectorFactory.java b/processing/src/main/java/org/apache/druid/segment/virtual/VirtualizedColumnSelectorFactory.java index d37de2ea3c6e..15fce041970f 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/VirtualizedColumnSelectorFactory.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/VirtualizedColumnSelectorFactory.java @@ -25,22 +25,21 @@ import org.apache.druid.segment.ColumnValueSelector; import org.apache.druid.segment.DimensionSelector; import org.apache.druid.segment.VirtualColumns; -import org.apache.druid.segment.column.ColumnCapabilities; -import javax.annotation.Nullable; - -public class VirtualizedColumnSelectorFactory implements ColumnSelectorFactory +/** + * {@link ColumnSelectorFactory} which can create selectors for both virtual and non-virtual columns + */ +public class VirtualizedColumnSelectorFactory extends VirtualizedColumnInspector implements ColumnSelectorFactory { private final ColumnSelectorFactory baseFactory; - private final VirtualColumns virtualColumns; public VirtualizedColumnSelectorFactory( ColumnSelectorFactory baseFactory, VirtualColumns virtualColumns ) { + super(baseFactory, virtualColumns); this.baseFactory = Preconditions.checkNotNull(baseFactory, "baseFactory"); - this.virtualColumns = Preconditions.checkNotNull(virtualColumns, "virtualColumns"); } @Override @@ -62,15 +61,4 @@ public ColumnValueSelector makeColumnValueSelector(String columnName) return baseFactory.makeColumnValueSelector(columnName); } } - - @Nullable - @Override - public ColumnCapabilities getColumnCapabilities(String columnName) - { - if (virtualColumns.exists(columnName)) { - return virtualColumns.getColumnCapabilities(baseFactory, columnName); - } else { - return baseFactory.getColumnCapabilities(columnName); - } - } } diff --git a/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionPlannerTest.java b/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionPlannerTest.java new file mode 100644 index 000000000000..145aeec3f9f2 --- /dev/null +++ b/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionPlannerTest.java @@ -0,0 +1,813 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.virtual; + +import com.google.common.collect.ImmutableMap; +import org.apache.druid.common.config.NullHandling; +import org.apache.druid.math.expr.ExprType; +import org.apache.druid.math.expr.Parser; +import org.apache.druid.query.expression.TestExprMacroTable; +import org.apache.druid.segment.ColumnInspector; +import org.apache.druid.segment.column.ColumnCapabilities; +import org.apache.druid.segment.column.ColumnCapabilitiesImpl; +import org.apache.druid.segment.column.ValueType; +import org.apache.druid.testing.InitializedNullHandlingTest; +import org.junit.Assert; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.ExpectedException; + +import javax.annotation.Nullable; +import java.util.Map; + +public class ExpressionPlannerTest extends InitializedNullHandlingTest +{ + public static final ColumnInspector SYNTHETIC_INSPECTOR = new ColumnInspector() + { + private final Map capabilitiesMap = + ImmutableMap.builder() + .put( + "l1", + ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.LONG) + ) + .put( + "l2", + ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.LONG) + ) + .put( + "f1", + ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.FLOAT) + ) + .put( + "f2", + ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.FLOAT) + ) + .put( + "d1", + ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.DOUBLE) + ) + .put( + "d2", + ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.DOUBLE) + ) + .put( + "s1", + ColumnCapabilitiesImpl.createSimpleSingleValueStringColumnCapabilities() + ) + .put( + // segment style single value dictionary encoded with unique sorted dictionary + "s2", + new ColumnCapabilitiesImpl().setType(ValueType.STRING) + .setDictionaryEncoded(true) + .setHasBitmapIndexes(true) + .setDictionaryValuesSorted(true) + .setDictionaryValuesUnique(true) + .setHasMultipleValues(false) + ) + .put( + // dictionary encoded but not unique or sorted, maybe an indexed table from a join result + "s3", + new ColumnCapabilitiesImpl().setType(ValueType.STRING) + .setDictionaryEncoded(true) + .setHasBitmapIndexes(false) + .setDictionaryValuesSorted(false) + .setDictionaryValuesUnique(false) + .setHasMultipleValues(false) + ) + .put( + // string with unknown multi-valuedness + "s4", + new ColumnCapabilitiesImpl().setType(ValueType.STRING) + ) + .put( + // dictionary encoded multi valued string dimension f + "m1", + new ColumnCapabilitiesImpl().setType(ValueType.STRING) + .setDictionaryEncoded(true) + .setHasBitmapIndexes(true) + .setDictionaryValuesUnique(true) + .setDictionaryValuesSorted(true) + .setHasMultipleValues(true) + ) + .put( + // simple multi valued string dimension f + "m2", + new ColumnCapabilitiesImpl().setType(ValueType.STRING) + .setDictionaryEncoded(false) + .setHasBitmapIndexes(false) + .setDictionaryValuesUnique(false) + .setDictionaryValuesSorted(false) + .setHasMultipleValues(true) + ) + .put( + "sa1", + ColumnCapabilitiesImpl.createSimpleArrayColumnCapabilities(ValueType.STRING_ARRAY) + ) + .put( + "sa2", + ColumnCapabilitiesImpl.createSimpleArrayColumnCapabilities(ValueType.STRING_ARRAY) + ) + .put( + "la1", + ColumnCapabilitiesImpl.createSimpleArrayColumnCapabilities(ValueType.LONG_ARRAY) + ) + .put( + "la2", + ColumnCapabilitiesImpl.createSimpleArrayColumnCapabilities(ValueType.LONG_ARRAY) + ) + .put( + "da1", + ColumnCapabilitiesImpl.createSimpleArrayColumnCapabilities(ValueType.DOUBLE_ARRAY) + ) + .put( + "da2", + ColumnCapabilitiesImpl.createSimpleArrayColumnCapabilities(ValueType.DOUBLE_ARRAY) + ) + .build(); + + @Nullable + @Override + public ColumnCapabilities getColumnCapabilities(String column) + { + return capabilitiesMap.get(column); + } + }; + + @Rule + public ExpectedException expectedException = ExpectedException.none(); + + @Test + public void testUnknown() + { + // column has no capabilities + // the vectorize query engine contracts is such that the lack of column capabilities is indicative of a nil column + // so this is vectorizable + // for non-vectorized expression processing, this will probably end up using a selector that examines inputs on a + // row by row basis to determine if the expression needs applied to multi-valued inputs + + ExpressionPlan thePlan = plan("concat(x, 'x')"); + Assert.assertTrue( + thePlan.is( + ExpressionPlan.Trait.UNKNOWN_INPUTS, + ExpressionPlan.Trait.VECTORIZABLE + ) + ); + Assert.assertFalse( + thePlan.is( + ExpressionPlan.Trait.NEEDS_APPLIED, + ExpressionPlan.Trait.INCOMPLETE_INPUTS, + ExpressionPlan.Trait.SINGLE_INPUT_SCALAR, + ExpressionPlan.Trait.SINGLE_INPUT_MAPPABLE, + ExpressionPlan.Trait.NON_SCALAR_OUTPUT, + ExpressionPlan.Trait.CONSTANT + ) + ); + // this expression has no "unapplied bindings", nothing to apply + Assert.assertEquals("concat(\"x\", 'x')", thePlan.getAppliedExpression().stringify()); + Assert.assertEquals("concat(\"x\", 'x')", thePlan.getAppliedFoldExpression("__acc").stringify()); + Assert.assertEquals(ExprType.STRING, thePlan.getOutputType()); + ColumnCapabilities inferred = thePlan.inferColumnCapabilities(null); + Assert.assertNotNull(inferred); + Assert.assertEquals(ValueType.STRING, inferred.getType()); + Assert.assertNull(inferred.getComplexTypeName()); + Assert.assertTrue(inferred.hasNulls().isTrue()); + Assert.assertFalse(inferred.isDictionaryEncoded().isMaybeTrue()); + Assert.assertFalse(inferred.areDictionaryValuesSorted().isMaybeTrue()); + Assert.assertFalse(inferred.areDictionaryValuesUnique().isMaybeTrue()); + Assert.assertFalse(inferred.hasMultipleValues().isMaybeTrue()); + Assert.assertFalse(inferred.hasBitmapIndexes()); + Assert.assertFalse(inferred.hasSpatialIndexes()); + + // what if both inputs are unknown, can we know things? + thePlan = plan("x * y"); + Assert.assertTrue( + thePlan.is( + ExpressionPlan.Trait.UNKNOWN_INPUTS + ) + ); + Assert.assertFalse( + thePlan.is( + ExpressionPlan.Trait.NEEDS_APPLIED, + ExpressionPlan.Trait.VECTORIZABLE, + ExpressionPlan.Trait.INCOMPLETE_INPUTS, + ExpressionPlan.Trait.SINGLE_INPUT_SCALAR, + ExpressionPlan.Trait.SINGLE_INPUT_MAPPABLE, + ExpressionPlan.Trait.NON_SCALAR_OUTPUT, + ExpressionPlan.Trait.CONSTANT + ) + ); + + Assert.assertEquals("(\"x\" * \"y\")", thePlan.getAppliedExpression().stringify()); + Assert.assertEquals("(\"x\" * \"y\")", thePlan.getAppliedFoldExpression("__acc").stringify()); + Assert.assertNull(thePlan.getOutputType()); + Assert.assertNull(thePlan.inferColumnCapabilities(null)); + // no we cannot + } + + @Test + public void testScalarStringNondictionaryEncoded() + { + ExpressionPlan thePlan = plan("concat(s1, 'x')"); + Assert.assertTrue( + thePlan.is( + ExpressionPlan.Trait.VECTORIZABLE + ) + ); + Assert.assertFalse( + thePlan.is( + ExpressionPlan.Trait.SINGLE_INPUT_SCALAR, + ExpressionPlan.Trait.SINGLE_INPUT_MAPPABLE, + ExpressionPlan.Trait.INCOMPLETE_INPUTS, + ExpressionPlan.Trait.UNKNOWN_INPUTS, + ExpressionPlan.Trait.NEEDS_APPLIED, + ExpressionPlan.Trait.NON_SCALAR_INPUTS, + ExpressionPlan.Trait.NON_SCALAR_OUTPUT + ) + ); + Assert.assertEquals("concat(\"s1\", 'x')", thePlan.getAppliedExpression().stringify()); + Assert.assertEquals("concat(\"s1\", 'x')", thePlan.getAppliedFoldExpression("__acc").stringify()); + Assert.assertEquals(ExprType.STRING, thePlan.getOutputType()); + ColumnCapabilities inferred = thePlan.inferColumnCapabilities(null); + Assert.assertNotNull(inferred); + Assert.assertEquals(ValueType.STRING, inferred.getType()); + Assert.assertNull(inferred.getComplexTypeName()); + Assert.assertTrue(inferred.hasNulls().isTrue()); + Assert.assertFalse(inferred.isDictionaryEncoded().isMaybeTrue()); + Assert.assertFalse(inferred.areDictionaryValuesSorted().isMaybeTrue()); + Assert.assertFalse(inferred.areDictionaryValuesUnique().isMaybeTrue()); + Assert.assertFalse(inferred.hasMultipleValues().isMaybeTrue()); + Assert.assertFalse(inferred.hasBitmapIndexes()); + Assert.assertFalse(inferred.hasSpatialIndexes()); + } + + @Test + public void testScalarNumeric() + { + ExpressionPlan thePlan = plan("l1 + 5"); + Assert.assertTrue( + thePlan.is( + ExpressionPlan.Trait.SINGLE_INPUT_SCALAR, + ExpressionPlan.Trait.VECTORIZABLE + ) + ); + Assert.assertFalse( + thePlan.is( + ExpressionPlan.Trait.SINGLE_INPUT_MAPPABLE, + ExpressionPlan.Trait.INCOMPLETE_INPUTS, + ExpressionPlan.Trait.UNKNOWN_INPUTS, + ExpressionPlan.Trait.NEEDS_APPLIED, + ExpressionPlan.Trait.NON_SCALAR_INPUTS, + ExpressionPlan.Trait.NON_SCALAR_OUTPUT + ) + ); + Assert.assertEquals("(\"l1\" + 5)", thePlan.getAppliedExpression().stringify()); + Assert.assertEquals("(\"l1\" + 5)", thePlan.getAppliedFoldExpression("__acc").stringify()); + Assert.assertEquals("(\"l1\" + 5)", thePlan.getAppliedFoldExpression("l1").stringify()); + Assert.assertEquals(ExprType.LONG, thePlan.getOutputType()); + ColumnCapabilities inferred = thePlan.inferColumnCapabilities(null); + Assert.assertNotNull(inferred); + Assert.assertEquals(ValueType.LONG, inferred.getType()); + Assert.assertNull(inferred.getComplexTypeName()); + if (NullHandling.sqlCompatible()) { + Assert.assertTrue(inferred.hasNulls().isMaybeTrue()); + } else { + Assert.assertFalse(inferred.hasNulls().isMaybeTrue()); + } + Assert.assertFalse(inferred.isDictionaryEncoded().isMaybeTrue()); + Assert.assertFalse(inferred.areDictionaryValuesSorted().isMaybeTrue()); + Assert.assertFalse(inferred.areDictionaryValuesUnique().isMaybeTrue()); + Assert.assertFalse(inferred.hasMultipleValues().isMaybeTrue()); + Assert.assertFalse(inferred.hasBitmapIndexes()); + Assert.assertFalse(inferred.hasSpatialIndexes()); + + thePlan = plan("l1 + 5.0"); + Assert.assertEquals(ExprType.DOUBLE, thePlan.getOutputType()); + + thePlan = plan("d1 * d2"); + Assert.assertTrue( + thePlan.is( + ExpressionPlan.Trait.VECTORIZABLE + ) + ); + Assert.assertFalse( + thePlan.is( + ExpressionPlan.Trait.SINGLE_INPUT_SCALAR, + ExpressionPlan.Trait.SINGLE_INPUT_MAPPABLE, + ExpressionPlan.Trait.INCOMPLETE_INPUTS, + ExpressionPlan.Trait.UNKNOWN_INPUTS, + ExpressionPlan.Trait.NEEDS_APPLIED, + ExpressionPlan.Trait.NON_SCALAR_INPUTS, + ExpressionPlan.Trait.NON_SCALAR_OUTPUT + ) + ); + Assert.assertEquals("(\"d1\" * \"d2\")", thePlan.getAppliedExpression().stringify()); + Assert.assertEquals("(\"d1\" * \"d2\")", thePlan.getAppliedFoldExpression("__acc").stringify()); + Assert.assertEquals("(\"d1\" * \"d2\")", thePlan.getAppliedFoldExpression("d1").stringify()); + Assert.assertEquals(ExprType.DOUBLE, thePlan.getOutputType()); + inferred = thePlan.inferColumnCapabilities(null); + Assert.assertNotNull(inferred); + Assert.assertEquals(ValueType.DOUBLE, inferred.getType()); + Assert.assertNull(inferred.getComplexTypeName()); + if (NullHandling.sqlCompatible()) { + Assert.assertTrue(inferred.hasNulls().isMaybeTrue()); + } else { + Assert.assertFalse(inferred.hasNulls().isMaybeTrue()); + } + Assert.assertFalse(inferred.isDictionaryEncoded().isMaybeTrue()); + Assert.assertFalse(inferred.areDictionaryValuesSorted().isMaybeTrue()); + Assert.assertFalse(inferred.areDictionaryValuesUnique().isMaybeTrue()); + Assert.assertFalse(inferred.hasMultipleValues().isMaybeTrue()); + Assert.assertFalse(inferred.hasBitmapIndexes()); + Assert.assertFalse(inferred.hasSpatialIndexes()); + } + + @Test + public void testScalarStringDictionaryEncoded() + { + ExpressionPlan thePlan = plan("concat(s2, 'x')"); + Assert.assertTrue( + thePlan.is( + ExpressionPlan.Trait.SINGLE_INPUT_SCALAR, + ExpressionPlan.Trait.SINGLE_INPUT_MAPPABLE, + ExpressionPlan.Trait.VECTORIZABLE + ) + ); + Assert.assertFalse( + thePlan.is( + ExpressionPlan.Trait.INCOMPLETE_INPUTS, + ExpressionPlan.Trait.UNKNOWN_INPUTS, + ExpressionPlan.Trait.NEEDS_APPLIED, + ExpressionPlan.Trait.NON_SCALAR_INPUTS, + ExpressionPlan.Trait.NON_SCALAR_OUTPUT + ) + ); + Assert.assertEquals("concat(\"s2\", 'x')", thePlan.getAppliedExpression().stringify()); + Assert.assertEquals("concat(\"s2\", 'x')", thePlan.getAppliedFoldExpression("__acc").stringify()); + Assert.assertEquals(ExprType.STRING, thePlan.getOutputType()); + ColumnCapabilities inferred = thePlan.inferColumnCapabilities(null); + Assert.assertNotNull(inferred); + Assert.assertEquals(ValueType.STRING, inferred.getType()); + Assert.assertNull(inferred.getComplexTypeName()); + Assert.assertTrue(inferred.hasNulls().isTrue()); + Assert.assertTrue(inferred.isDictionaryEncoded().isTrue()); + Assert.assertFalse(inferred.areDictionaryValuesSorted().isMaybeTrue()); + Assert.assertFalse(inferred.areDictionaryValuesUnique().isMaybeTrue()); + Assert.assertFalse(inferred.hasMultipleValues().isMaybeTrue()); + Assert.assertTrue(inferred.hasBitmapIndexes()); + Assert.assertFalse(inferred.hasSpatialIndexes()); + + // multiple input columns + thePlan = plan("concat(s2, s3)"); + Assert.assertTrue( + thePlan.is( + ExpressionPlan.Trait.VECTORIZABLE + ) + ); + Assert.assertFalse( + thePlan.is( + ExpressionPlan.Trait.SINGLE_INPUT_SCALAR, + ExpressionPlan.Trait.SINGLE_INPUT_MAPPABLE, + ExpressionPlan.Trait.INCOMPLETE_INPUTS, + ExpressionPlan.Trait.UNKNOWN_INPUTS, + ExpressionPlan.Trait.NEEDS_APPLIED, + ExpressionPlan.Trait.NON_SCALAR_INPUTS, + ExpressionPlan.Trait.NON_SCALAR_OUTPUT + ) + ); + Assert.assertEquals("concat(\"s2\", \"s3\")", thePlan.getAppliedExpression().stringify()); + Assert.assertEquals("concat(\"s2\", \"s3\")", thePlan.getAppliedFoldExpression("__acc").stringify()); + // what if s3 is an accumulator instead? nope, still no NEEDS_APPLIED so nothing to do + Assert.assertEquals( + "concat(\"s2\", \"s3\")", + thePlan.getAppliedFoldExpression("s3").stringify() + ); + Assert.assertEquals(ExprType.STRING, thePlan.getOutputType()); + inferred = thePlan.inferColumnCapabilities(null); + Assert.assertNotNull(inferred); + Assert.assertEquals(ValueType.STRING, inferred.getType()); + Assert.assertNull(inferred.getComplexTypeName()); + Assert.assertTrue(inferred.hasNulls().isTrue()); + Assert.assertFalse(inferred.isDictionaryEncoded().isMaybeTrue()); + Assert.assertFalse(inferred.areDictionaryValuesSorted().isMaybeTrue()); + Assert.assertFalse(inferred.areDictionaryValuesUnique().isMaybeTrue()); + Assert.assertFalse(inferred.hasMultipleValues().isMaybeTrue()); + Assert.assertFalse(inferred.hasBitmapIndexes()); + Assert.assertFalse(inferred.hasSpatialIndexes()); + } + + @Test + public void testMultiValueStringDictionaryEncoded() + { + ExpressionPlan thePlan = plan("concat(m1, 'x')"); + Assert.assertTrue( + thePlan.is( + ExpressionPlan.Trait.NEEDS_APPLIED, + ExpressionPlan.Trait.SINGLE_INPUT_MAPPABLE, + ExpressionPlan.Trait.NON_SCALAR_OUTPUT + ) + ); + Assert.assertFalse( + thePlan.is( + ExpressionPlan.Trait.INCOMPLETE_INPUTS, + ExpressionPlan.Trait.UNKNOWN_INPUTS, + ExpressionPlan.Trait.NON_SCALAR_INPUTS, + ExpressionPlan.Trait.VECTORIZABLE + ) + ); + Assert.assertEquals(ExprType.STRING, thePlan.getOutputType()); + ColumnCapabilities inferred = thePlan.inferColumnCapabilities(null); + Assert.assertNotNull(inferred); + Assert.assertEquals(ValueType.STRING, inferred.getType()); + Assert.assertNull(inferred.getComplexTypeName()); + Assert.assertTrue(inferred.hasNulls().isMaybeTrue()); + Assert.assertTrue(inferred.isDictionaryEncoded().isTrue()); + Assert.assertFalse(inferred.areDictionaryValuesSorted().isMaybeTrue()); + Assert.assertFalse(inferred.areDictionaryValuesUnique().isMaybeTrue()); + Assert.assertTrue(inferred.hasMultipleValues().isTrue()); + Assert.assertTrue(inferred.hasBitmapIndexes()); + Assert.assertFalse(inferred.hasSpatialIndexes()); + + thePlan = plan("concat(s1, m2)"); + Assert.assertTrue( + thePlan.is( + ExpressionPlan.Trait.NEEDS_APPLIED, + ExpressionPlan.Trait.NON_SCALAR_OUTPUT + ) + ); + Assert.assertFalse( + thePlan.is( + ExpressionPlan.Trait.INCOMPLETE_INPUTS, + ExpressionPlan.Trait.UNKNOWN_INPUTS, + ExpressionPlan.Trait.NON_SCALAR_INPUTS, + ExpressionPlan.Trait.VECTORIZABLE + ) + ); + Assert.assertEquals( + "map((\"m2\") -> concat(\"s1\", \"m2\"), \"m2\")", + thePlan.getAppliedExpression().stringify() + ); + Assert.assertEquals( + "fold((\"m2\", \"s1\") -> concat(\"s1\", \"m2\"), \"m2\", \"s1\")", + thePlan.getAppliedFoldExpression("s1").stringify() + ); + Assert.assertEquals(ExprType.STRING, thePlan.getOutputType()); + inferred = thePlan.inferColumnCapabilities(null); + Assert.assertNotNull(inferred); + Assert.assertEquals(ValueType.STRING, inferred.getType()); + Assert.assertTrue(inferred.hasMultipleValues().isTrue()); + + thePlan = plan("concat(m1, m2)"); + Assert.assertTrue( + thePlan.is( + ExpressionPlan.Trait.NEEDS_APPLIED, + ExpressionPlan.Trait.NON_SCALAR_OUTPUT + ) + ); + Assert.assertFalse( + thePlan.is( + ExpressionPlan.Trait.INCOMPLETE_INPUTS, + ExpressionPlan.Trait.UNKNOWN_INPUTS, + ExpressionPlan.Trait.NON_SCALAR_INPUTS, + ExpressionPlan.Trait.VECTORIZABLE + ) + ); + Assert.assertEquals(ExprType.STRING, thePlan.getOutputType()); + // whoa + Assert.assertEquals( + "cartesian_map((\"m1\", \"m2\") -> concat(\"m1\", \"m2\"), \"m1\", \"m2\")", + thePlan.getAppliedExpression().stringify() + ); + // sort of funny, but technically correct + Assert.assertEquals( + "cartesian_fold((\"m1\", \"m2\", \"__acc\") -> concat(\"m1\", \"m2\"), \"m1\", \"m2\", \"__acc\")", + thePlan.getAppliedFoldExpression("__acc").stringify() + ); + inferred = thePlan.inferColumnCapabilities(null); + Assert.assertNotNull(inferred); + Assert.assertEquals(ValueType.STRING, inferred.getType()); + Assert.assertTrue(inferred.hasMultipleValues().isTrue()); + } + + @Test + public void testMultiValueStringDictionaryEncodedIllegalAccumulator() + { + expectedException.expect(IllegalStateException.class); + expectedException.expectMessage( + "Accumulator cannot be implicitly transformed, if it is an ARRAY or multi-valued type it must be used explicitly as such" + ); + ExpressionPlan thePlan = plan("concat(m1, 'x')"); + Assert.assertTrue( + thePlan.is( + ExpressionPlan.Trait.NEEDS_APPLIED, + ExpressionPlan.Trait.SINGLE_INPUT_MAPPABLE, + ExpressionPlan.Trait.NON_SCALAR_OUTPUT + ) + ); + Assert.assertFalse( + thePlan.is( + ExpressionPlan.Trait.INCOMPLETE_INPUTS, + ExpressionPlan.Trait.UNKNOWN_INPUTS, + ExpressionPlan.Trait.NON_SCALAR_INPUTS, + ExpressionPlan.Trait.VECTORIZABLE + ) + ); + Assert.assertEquals(ExprType.STRING, thePlan.getOutputType()); + + thePlan = plan("concat(m1, m2)"); + Assert.assertTrue( + thePlan.is( + ExpressionPlan.Trait.NEEDS_APPLIED, + ExpressionPlan.Trait.NON_SCALAR_OUTPUT + ) + ); + Assert.assertFalse( + thePlan.is( + ExpressionPlan.Trait.INCOMPLETE_INPUTS, + ExpressionPlan.Trait.UNKNOWN_INPUTS, + ExpressionPlan.Trait.NON_SCALAR_INPUTS, + ExpressionPlan.Trait.VECTORIZABLE + ) + ); + // what happens if we try to use a multi-valued input that was not explicitly used as multi-valued as the + // accumulator? + thePlan.getAppliedFoldExpression("m1"); + Assert.assertEquals(ExprType.STRING, thePlan.getOutputType()); + } + + @Test + public void testIncompleteString() + { + ExpressionPlan thePlan = plan("concat(s4, 'x')"); + Assert.assertTrue( + thePlan.is( + ExpressionPlan.Trait.INCOMPLETE_INPUTS + ) + ); + Assert.assertFalse( + thePlan.is( + ExpressionPlan.Trait.SINGLE_INPUT_SCALAR, + ExpressionPlan.Trait.SINGLE_INPUT_MAPPABLE, + ExpressionPlan.Trait.UNKNOWN_INPUTS, + ExpressionPlan.Trait.NEEDS_APPLIED, + ExpressionPlan.Trait.NON_SCALAR_INPUTS, + ExpressionPlan.Trait.NON_SCALAR_OUTPUT, + ExpressionPlan.Trait.VECTORIZABLE + ) + ); + // incomplete inputs are not transformed either, rather this will need to be detected and handled on a row-by-row + // basis + Assert.assertEquals("concat(\"s4\", 'x')", thePlan.getAppliedExpression().stringify()); + Assert.assertEquals("concat(\"s4\", 'x')", thePlan.getAppliedFoldExpression("__acc").stringify()); + // incomplete and unknown skip output type since we don't reliably know + Assert.assertNull(thePlan.getOutputType()); + Assert.assertNull(thePlan.inferColumnCapabilities(null)); + } + + @Test + public void testArrayOutput() + { + // its ok to use scalar inputs to array expressions, string columns cant help it if sometimes they are single + // valued and sometimes they are multi-valued + ExpressionPlan thePlan = plan("array_append(s1, 'x')"); + assertArrayInAndOut(thePlan); + // with a string hint, it should look like a multi-valued string + ColumnCapabilities inferred = thePlan.inferColumnCapabilities(ValueType.STRING); + Assert.assertNotNull(inferred); + Assert.assertEquals(ValueType.STRING, inferred.getType()); + Assert.assertNull(inferred.getComplexTypeName()); + Assert.assertTrue(inferred.hasNulls().isMaybeTrue()); + Assert.assertFalse(inferred.isDictionaryEncoded().isMaybeTrue()); + Assert.assertFalse(inferred.areDictionaryValuesSorted().isMaybeTrue()); + Assert.assertFalse(inferred.areDictionaryValuesUnique().isMaybeTrue()); + Assert.assertTrue(inferred.hasMultipleValues().isTrue()); + Assert.assertFalse(inferred.hasBitmapIndexes()); + Assert.assertFalse(inferred.hasSpatialIndexes()); + // with no hint though, let the array free + inferred = thePlan.inferColumnCapabilities(ValueType.STRING_ARRAY); + Assert.assertNotNull(inferred); + Assert.assertEquals(ValueType.STRING_ARRAY, inferred.getType()); + Assert.assertNull(inferred.getComplexTypeName()); + Assert.assertTrue(inferred.hasNulls().isMaybeTrue()); + Assert.assertFalse(inferred.isDictionaryEncoded().isMaybeTrue()); + Assert.assertFalse(inferred.areDictionaryValuesSorted().isMaybeTrue()); + Assert.assertFalse(inferred.areDictionaryValuesUnique().isMaybeTrue()); + Assert.assertTrue(inferred.hasMultipleValues().isTrue()); + Assert.assertFalse(inferred.hasBitmapIndexes()); + Assert.assertFalse(inferred.hasSpatialIndexes()); + + Assert.assertEquals("array_append(\"s1\", 'x')", thePlan.getAppliedExpression().stringify()); + Assert.assertEquals("array_append(\"s1\", 'x')", thePlan.getAppliedFoldExpression("__acc").stringify()); + Assert.assertEquals(ExprType.STRING_ARRAY, thePlan.getOutputType()); + + // multi-valued are cool too + thePlan = plan("array_append(m1, 'x')"); + assertArrayInAndOut(thePlan); + + // what about incomplete inputs with arrays? they are not reported as incomplete because they are treated as arrays + thePlan = plan("array_append(s4, 'x')"); + assertArrayInAndOut(thePlan); + Assert.assertEquals(ExprType.STRING_ARRAY, thePlan.getOutputType()); + + // what about if it is the scalar argument? there it is + thePlan = plan("array_append(m1, s4)"); + Assert.assertTrue( + thePlan.is( + ExpressionPlan.Trait.NON_SCALAR_INPUTS, + ExpressionPlan.Trait.INCOMPLETE_INPUTS, + ExpressionPlan.Trait.NON_SCALAR_OUTPUT + ) + ); + Assert.assertFalse( + thePlan.is( + ExpressionPlan.Trait.SINGLE_INPUT_SCALAR, + ExpressionPlan.Trait.SINGLE_INPUT_MAPPABLE, + ExpressionPlan.Trait.UNKNOWN_INPUTS, + ExpressionPlan.Trait.NEEDS_APPLIED, + ExpressionPlan.Trait.VECTORIZABLE + ) + ); + // incomplete and unknown skip output type since we don't reliably know + Assert.assertNull(thePlan.getOutputType()); + + // array types are cool too + thePlan = plan("array_append(sa1, 'x')"); + assertArrayInAndOut(thePlan); + + thePlan = plan("array_append(sa1, 'x')"); + assertArrayInAndOut(thePlan); + } + + + @Test + public void testScalarOutputMultiValueInput() + { + ExpressionPlan thePlan = plan("array_to_string(array_append(s1, 'x'), ',')"); + assertArrayInput(thePlan); + ColumnCapabilities inferred = thePlan.inferColumnCapabilities(ValueType.STRING); + Assert.assertNotNull(inferred); + Assert.assertEquals(ValueType.STRING, inferred.getType()); + Assert.assertNull(inferred.getComplexTypeName()); + Assert.assertTrue(inferred.hasNulls().isTrue()); + Assert.assertFalse(inferred.isDictionaryEncoded().isMaybeTrue()); + Assert.assertFalse(inferred.areDictionaryValuesSorted().isMaybeTrue()); + Assert.assertFalse(inferred.areDictionaryValuesUnique().isMaybeTrue()); + Assert.assertFalse(inferred.hasMultipleValues().isMaybeTrue()); + Assert.assertFalse(inferred.hasBitmapIndexes()); + Assert.assertFalse(inferred.hasSpatialIndexes()); + + Assert.assertEquals("array_to_string(array_append(\"s1\", 'x'), ',')", thePlan.getAppliedExpression().stringify()); + Assert.assertEquals( + "array_to_string(array_append(\"s1\", 'x'), ',')", + thePlan.getAppliedFoldExpression("__acc").stringify() + ); + Assert.assertEquals(ExprType.STRING, thePlan.getOutputType()); + + // what about a multi-valued input + thePlan = plan("array_to_string(array_append(s1, m1), ',')"); + assertArrayInput(thePlan); + + Assert.assertEquals( + "array_to_string(map((\"m1\") -> array_append(\"s1\", \"m1\"), \"m1\"), ',')", + thePlan.getAppliedExpression().stringify() + ); + Assert.assertEquals( + "array_to_string(fold((\"m1\", \"s1\") -> array_append(\"s1\", \"m1\"), \"m1\", \"s1\"), ',')", + thePlan.getAppliedFoldExpression("s1").stringify() + ); + // why is this null + Assert.assertEquals(ExprType.STRING, thePlan.getOutputType()); + } + + @Test + public void testScalarOutputArrayInput() + { + ExpressionPlan thePlan = plan("array_to_string(array_append(sa1, 'x'), ',')"); + assertArrayInput(thePlan); + + Assert.assertEquals("array_to_string(array_append(\"sa1\", 'x'), ',')", thePlan.getAppliedExpression().stringify()); + Assert.assertEquals( + "array_to_string(array_append(\"sa1\", 'x'), ',')", + thePlan.getAppliedFoldExpression("__acc").stringify() + ); + Assert.assertEquals(ExprType.STRING, thePlan.getOutputType()); + + + thePlan = plan("array_to_string(array_concat(sa1, sa2), ',')"); + assertArrayInput(thePlan); + Assert.assertEquals(ExprType.STRING, thePlan.getOutputType()); + + thePlan = plan("fold((x, acc) -> acc + x, array_concat(la1, la2), 0)"); + assertArrayInput(thePlan); + Assert.assertEquals( + "fold((\"x\", \"acc\") -> (\"acc\" + \"x\"), array_concat(\"la1\", \"la2\"), 0)", + thePlan.getAppliedExpression().stringify() + ); + Assert.assertEquals( + "fold((\"x\", \"acc\") -> (\"acc\" + \"x\"), array_concat(\"la1\", \"la2\"), 0)", + thePlan.getAppliedFoldExpression("__acc").stringify() + ); + Assert.assertEquals(ExprType.LONG, thePlan.getOutputType()); + + thePlan = plan("fold((x, acc) -> acc * x, array_concat(da1, da2), 0.0)"); + assertArrayInput(thePlan); + Assert.assertEquals( + "fold((\"x\", \"acc\") -> (\"acc\" * \"x\"), array_concat(\"da1\", \"da2\"), 0.0)", + thePlan.getAppliedExpression().stringify() + ); + Assert.assertEquals( + "fold((\"x\", \"acc\") -> (\"acc\" * \"x\"), array_concat(\"da1\", \"da2\"), 0.0)", + thePlan.getAppliedFoldExpression("__acc").stringify() + ); + Assert.assertEquals(ExprType.DOUBLE, thePlan.getOutputType()); + } + + @Test + public void testArrayConstruction() + { + ExpressionPlan thePlan = plan("array(l1, l2)"); + Assert.assertTrue( + thePlan.is( + ExpressionPlan.Trait.NON_SCALAR_OUTPUT + ) + ); + Assert.assertFalse( + thePlan.is( + ExpressionPlan.Trait.SINGLE_INPUT_SCALAR, + ExpressionPlan.Trait.SINGLE_INPUT_MAPPABLE, + ExpressionPlan.Trait.UNKNOWN_INPUTS, + ExpressionPlan.Trait.INCOMPLETE_INPUTS, + ExpressionPlan.Trait.NEEDS_APPLIED, + ExpressionPlan.Trait.NON_SCALAR_INPUTS, + ExpressionPlan.Trait.VECTORIZABLE + ) + ); + Assert.assertEquals(ExprType.LONG_ARRAY, thePlan.getOutputType()); + + thePlan = plan("array(l1, d1)"); + Assert.assertEquals(ExprType.DOUBLE_ARRAY, thePlan.getOutputType()); + thePlan = plan("array(l1, d1, s1)"); + Assert.assertEquals(ExprType.STRING_ARRAY, thePlan.getOutputType()); + } + + private static ExpressionPlan plan(String expression) + { + return ExpressionPlanner.plan(SYNTHETIC_INSPECTOR, Parser.parse(expression, TestExprMacroTable.INSTANCE)); + } + + private static void assertArrayInput(ExpressionPlan thePlan) + { + Assert.assertTrue( + thePlan.is( + ExpressionPlan.Trait.NON_SCALAR_INPUTS + ) + ); + Assert.assertFalse( + thePlan.is( + ExpressionPlan.Trait.SINGLE_INPUT_SCALAR, + ExpressionPlan.Trait.SINGLE_INPUT_MAPPABLE, + ExpressionPlan.Trait.NON_SCALAR_OUTPUT, + ExpressionPlan.Trait.INCOMPLETE_INPUTS, + ExpressionPlan.Trait.UNKNOWN_INPUTS, + ExpressionPlan.Trait.NEEDS_APPLIED, + ExpressionPlan.Trait.VECTORIZABLE + ) + ); + } + + private static void assertArrayInAndOut(ExpressionPlan thePlan) + { + Assert.assertTrue( + thePlan.is( + ExpressionPlan.Trait.NON_SCALAR_INPUTS, + ExpressionPlan.Trait.NON_SCALAR_OUTPUT + ) + ); + Assert.assertFalse( + thePlan.is( + ExpressionPlan.Trait.SINGLE_INPUT_SCALAR, + ExpressionPlan.Trait.SINGLE_INPUT_MAPPABLE, + ExpressionPlan.Trait.INCOMPLETE_INPUTS, + ExpressionPlan.Trait.UNKNOWN_INPUTS, + ExpressionPlan.Trait.NEEDS_APPLIED, + ExpressionPlan.Trait.VECTORIZABLE + ) + ); + } +} diff --git a/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVectorSelectorsTest.java b/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVectorSelectorsTest.java index c53676bea3ae..40955f69b3a6 100644 --- a/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVectorSelectorsTest.java +++ b/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVectorSelectorsTest.java @@ -88,7 +88,11 @@ public class ExpressionVectorSelectorsTest "long2", "float2", "double2", - "string3" + "string3", + "string1 + string3", + "concat(string1, string2, string3)", + "concat(string1, 'x')", + "concat(string1, nonexistent)" ); private static final int ROWS_PER_SEGMENT = 100_000; From 09913fc3d932bd1921d88b330140e828f00ac1f0 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Wed, 12 May 2021 16:34:20 -0700 Subject: [PATCH 02/14] better --- .../druid/segment/virtual/ExpressionPlan.java | 15 +- .../segment/virtual/ExpressionPlanner.java | 20 +- .../segment/virtual/ExpressionSelectors.java | 5 +- .../virtual/ExpressionPlannerTest.java | 236 +++++++++++------- .../calcite/CalciteParameterQueryTest.java | 3 +- .../druid/sql/calcite/CalciteQueryTest.java | 55 +--- 6 files changed, 173 insertions(+), 161 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionPlan.java b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionPlan.java index 96cf5017806a..649e6b5a1e52 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionPlan.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionPlan.java @@ -44,16 +44,18 @@ public enum Trait */ CONSTANT, /** - * expression has a single, single valued input, and is dictionary encoded if the value is a string + * expression has a single, single valued input, and is dictionary encoded if the value is a string, and does + * not produce non-scalar output */ SINGLE_INPUT_SCALAR, /** * expression has a single input, which may produce single or multi-valued output, but if so, it must be implicitly - * mappable (i.e. the expression is not treating its input as an array and not wanting to output an array) + * mappable (i.e. the expression is not treating its input as an array and does not produce non-scalar output) */ SINGLE_INPUT_MAPPABLE, /** - * expression must be implicitly mapped across the multiple values per row of known multi-value inputs + * expression must be implicitly mapped across the multiple values per row of known multi-value inputs, the final + * output will be multi-valued */ NEEDS_APPLIED, /** @@ -62,15 +64,15 @@ public enum Trait UNKNOWN_INPUTS, /** * expression has inputs whose type was incomplete, such as unknown multi-valuedness, which are not explicitly - * used as multi-valued/array inputs + * used as possibly multi-valued/array inputs */ INCOMPLETE_INPUTS, /** - * expression explicitly using multi-valued inputs as array inputs + * expression explicitly using multi-valued inputs as array inputs or has array inputs */ NON_SCALAR_INPUTS, /** - * expression produces explict multi-valued output, or implicit multi-valued output via mapping + * expression produces explict multi-valued output */ NON_SCALAR_OUTPUT, /** @@ -259,6 +261,7 @@ public ColumnCapabilities inferColumnCapabilities(@Nullable ValueType hint) // to create a dictionary encoded selector instead of an object selector to defer expression evaluation // until query time return ColumnCapabilitiesImpl.copyOf(underlyingCapabilities) + .setType(ValueType.STRING) .setDictionaryValuesSorted(false) .setDictionaryValuesUnique(false) .setHasNulls(true); diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionPlanner.java b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionPlanner.java index c693df5ec9a8..e180b9eb0368 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionPlanner.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionPlanner.java @@ -167,14 +167,10 @@ public static ExpressionPlan plan(ColumnInspector inspector, Expr expression) traits.add(ExpressionPlan.Trait.NON_SCALAR_OUTPUT); // single input mappable may not produce array output explicitly, only through implicit mapping + traits.remove(ExpressionPlan.Trait.SINGLE_INPUT_SCALAR); traits.remove(ExpressionPlan.Trait.SINGLE_INPUT_MAPPABLE); } - // if implicit mapping is in play, output will be multi-valued but may still use SINGLE_INPUT_MAPPABLE optimization - if (ExpressionPlan.is(traits, ExpressionPlan.Trait.NEEDS_APPLIED)) { - traits.add(ExpressionPlan.Trait.NON_SCALAR_OUTPUT); - } - // vectorized expressions do not support incomplete, multi-valued inputs or outputs, or implicit mapping // they also do not support unknown inputs, but they also do not currently have to deal with them, as missing // capabilites is indicative of a non-existent column instead of an unknown schema. If this ever changes, @@ -197,8 +193,18 @@ public static ExpressionPlan plan(ColumnInspector inspector, Expr expression) // this is kind of gross because this is actually only vectorizable if you're using a dictionary encoded // vector selector, and not a vector object selector, but hopefully things check that a dictionary exists // and prefer to use it... - outputType = expression.getOutputType(inspector); - traits.add(ExpressionPlan.Trait.VECTORIZABLE); + // but the least we can do is check that the column is in fact dictionary encoded to try to sway the hands of fate + ColumnCapabilities columnCapabilities = + inspector.getColumnCapabilities(Iterables.getOnlyElement(analysis.getRequiredBindings())); + if (columnCapabilities != null + && columnCapabilities.isDictionaryEncoded().isTrue() + && columnCapabilities.hasMultipleValues().isFalse()) { + outputType = expression.getOutputType(inspector); + // must be a string + if (outputType == ExprType.STRING) { + traits.add(ExpressionPlan.Trait.VECTORIZABLE); + } + } } return new ExpressionPlan( diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java index 079270459bd1..d8cde468ac7d 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java @@ -139,7 +139,7 @@ public static ColumnValueSelector makeExprEvalSelector( ExpressionPlan plan ) { - if (plan.is(ExpressionPlan.Trait.SINGLE_INPUT_SCALAR)) { + if (plan.is(ExpressionPlan.Trait.SINGLE_INPUT_SCALAR) && !plan.is(ExpressionPlan.Trait.NON_SCALAR_OUTPUT)) { final String column = plan.getSingleInputName(); final ValueType inputType = plan.getSingleInputType(); if (inputType == ValueType.LONG) { @@ -184,7 +184,8 @@ public static DimensionSelector makeDimensionSelector( { final ExpressionPlan plan = ExpressionPlanner.plan(columnSelectorFactory, expression); - if (plan.any(ExpressionPlan.Trait.SINGLE_INPUT_SCALAR, ExpressionPlan.Trait.SINGLE_INPUT_MAPPABLE)) { + if (plan.any(ExpressionPlan.Trait.SINGLE_INPUT_SCALAR, ExpressionPlan.Trait.SINGLE_INPUT_MAPPABLE) + && !plan.is(ExpressionPlan.Trait.NON_SCALAR_OUTPUT)) { final String column = plan.getSingleInputName(); if (plan.getSingleInputType() == ValueType.STRING) { return new SingleStringInputDimensionSelector( diff --git a/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionPlannerTest.java b/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionPlannerTest.java index 145aeec3f9f2..c6d273e3d894 100644 --- a/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionPlannerTest.java +++ b/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionPlannerTest.java @@ -44,36 +44,36 @@ public class ExpressionPlannerTest extends InitializedNullHandlingTest private final Map capabilitiesMap = ImmutableMap.builder() .put( - "l1", + "long1", ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.LONG) ) .put( - "l2", + "long2", ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.LONG) ) .put( - "f1", + "float1", ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.FLOAT) ) .put( - "f2", + "float2", ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.FLOAT) ) .put( - "d1", + "double1", ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.DOUBLE) ) .put( - "d2", + "double2", ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.DOUBLE) ) .put( - "s1", + "scalar_string", ColumnCapabilitiesImpl.createSimpleSingleValueStringColumnCapabilities() ) .put( // segment style single value dictionary encoded with unique sorted dictionary - "s2", + "scalar_dictionary_string", new ColumnCapabilitiesImpl().setType(ValueType.STRING) .setDictionaryEncoded(true) .setHasBitmapIndexes(true) @@ -83,7 +83,7 @@ public class ExpressionPlannerTest extends InitializedNullHandlingTest ) .put( // dictionary encoded but not unique or sorted, maybe an indexed table from a join result - "s3", + "scalar_dictionary_string_nonunique", new ColumnCapabilitiesImpl().setType(ValueType.STRING) .setDictionaryEncoded(true) .setHasBitmapIndexes(false) @@ -93,12 +93,12 @@ public class ExpressionPlannerTest extends InitializedNullHandlingTest ) .put( // string with unknown multi-valuedness - "s4", + "string_unknown", new ColumnCapabilitiesImpl().setType(ValueType.STRING) ) .put( - // dictionary encoded multi valued string dimension f - "m1", + // dictionary encoded multi valued string dimension + "multi_dictionary_string", new ColumnCapabilitiesImpl().setType(ValueType.STRING) .setDictionaryEncoded(true) .setHasBitmapIndexes(true) @@ -107,8 +107,8 @@ public class ExpressionPlannerTest extends InitializedNullHandlingTest .setHasMultipleValues(true) ) .put( - // simple multi valued string dimension f - "m2", + // simple multi valued string dimension unsorted + "multi_dictionary_string_nonunique", new ColumnCapabilitiesImpl().setType(ValueType.STRING) .setDictionaryEncoded(false) .setHasBitmapIndexes(false) @@ -117,27 +117,27 @@ public class ExpressionPlannerTest extends InitializedNullHandlingTest .setHasMultipleValues(true) ) .put( - "sa1", + "string_array_1", ColumnCapabilitiesImpl.createSimpleArrayColumnCapabilities(ValueType.STRING_ARRAY) ) .put( - "sa2", + "string_array_2", ColumnCapabilitiesImpl.createSimpleArrayColumnCapabilities(ValueType.STRING_ARRAY) ) .put( - "la1", + "long_array_1", ColumnCapabilitiesImpl.createSimpleArrayColumnCapabilities(ValueType.LONG_ARRAY) ) .put( - "la2", + "long_array_2", ColumnCapabilitiesImpl.createSimpleArrayColumnCapabilities(ValueType.LONG_ARRAY) ) .put( - "da1", + "double_array_1", ColumnCapabilitiesImpl.createSimpleArrayColumnCapabilities(ValueType.DOUBLE_ARRAY) ) .put( - "da2", + "double_array_2", ColumnCapabilitiesImpl.createSimpleArrayColumnCapabilities(ValueType.DOUBLE_ARRAY) ) .build(); @@ -224,7 +224,7 @@ public void testUnknown() @Test public void testScalarStringNondictionaryEncoded() { - ExpressionPlan thePlan = plan("concat(s1, 'x')"); + ExpressionPlan thePlan = plan("concat(scalar_string, 'x')"); Assert.assertTrue( thePlan.is( ExpressionPlan.Trait.VECTORIZABLE @@ -241,8 +241,8 @@ public void testScalarStringNondictionaryEncoded() ExpressionPlan.Trait.NON_SCALAR_OUTPUT ) ); - Assert.assertEquals("concat(\"s1\", 'x')", thePlan.getAppliedExpression().stringify()); - Assert.assertEquals("concat(\"s1\", 'x')", thePlan.getAppliedFoldExpression("__acc").stringify()); + Assert.assertEquals("concat(\"scalar_string\", 'x')", thePlan.getAppliedExpression().stringify()); + Assert.assertEquals("concat(\"scalar_string\", 'x')", thePlan.getAppliedFoldExpression("__acc").stringify()); Assert.assertEquals(ExprType.STRING, thePlan.getOutputType()); ColumnCapabilities inferred = thePlan.inferColumnCapabilities(null); Assert.assertNotNull(inferred); @@ -260,7 +260,7 @@ public void testScalarStringNondictionaryEncoded() @Test public void testScalarNumeric() { - ExpressionPlan thePlan = plan("l1 + 5"); + ExpressionPlan thePlan = plan("long1 + 5"); Assert.assertTrue( thePlan.is( ExpressionPlan.Trait.SINGLE_INPUT_SCALAR, @@ -277,9 +277,9 @@ public void testScalarNumeric() ExpressionPlan.Trait.NON_SCALAR_OUTPUT ) ); - Assert.assertEquals("(\"l1\" + 5)", thePlan.getAppliedExpression().stringify()); - Assert.assertEquals("(\"l1\" + 5)", thePlan.getAppliedFoldExpression("__acc").stringify()); - Assert.assertEquals("(\"l1\" + 5)", thePlan.getAppliedFoldExpression("l1").stringify()); + Assert.assertEquals("(\"long1\" + 5)", thePlan.getAppliedExpression().stringify()); + Assert.assertEquals("(\"long1\" + 5)", thePlan.getAppliedFoldExpression("__acc").stringify()); + Assert.assertEquals("(\"long1\" + 5)", thePlan.getAppliedFoldExpression("long1").stringify()); Assert.assertEquals(ExprType.LONG, thePlan.getOutputType()); ColumnCapabilities inferred = thePlan.inferColumnCapabilities(null); Assert.assertNotNull(inferred); @@ -297,10 +297,10 @@ public void testScalarNumeric() Assert.assertFalse(inferred.hasBitmapIndexes()); Assert.assertFalse(inferred.hasSpatialIndexes()); - thePlan = plan("l1 + 5.0"); + thePlan = plan("long1 + 5.0"); Assert.assertEquals(ExprType.DOUBLE, thePlan.getOutputType()); - thePlan = plan("d1 * d2"); + thePlan = plan("double1 * double2"); Assert.assertTrue( thePlan.is( ExpressionPlan.Trait.VECTORIZABLE @@ -317,9 +317,9 @@ public void testScalarNumeric() ExpressionPlan.Trait.NON_SCALAR_OUTPUT ) ); - Assert.assertEquals("(\"d1\" * \"d2\")", thePlan.getAppliedExpression().stringify()); - Assert.assertEquals("(\"d1\" * \"d2\")", thePlan.getAppliedFoldExpression("__acc").stringify()); - Assert.assertEquals("(\"d1\" * \"d2\")", thePlan.getAppliedFoldExpression("d1").stringify()); + Assert.assertEquals("(\"double1\" * \"double2\")", thePlan.getAppliedExpression().stringify()); + Assert.assertEquals("(\"double1\" * \"double2\")", thePlan.getAppliedFoldExpression("__acc").stringify()); + Assert.assertEquals("(\"double1\" * \"double2\")", thePlan.getAppliedFoldExpression("double1").stringify()); Assert.assertEquals(ExprType.DOUBLE, thePlan.getOutputType()); inferred = thePlan.inferColumnCapabilities(null); Assert.assertNotNull(inferred); @@ -341,7 +341,7 @@ public void testScalarNumeric() @Test public void testScalarStringDictionaryEncoded() { - ExpressionPlan thePlan = plan("concat(s2, 'x')"); + ExpressionPlan thePlan = plan("concat(scalar_dictionary_string, 'x')"); Assert.assertTrue( thePlan.is( ExpressionPlan.Trait.SINGLE_INPUT_SCALAR, @@ -358,8 +358,11 @@ public void testScalarStringDictionaryEncoded() ExpressionPlan.Trait.NON_SCALAR_OUTPUT ) ); - Assert.assertEquals("concat(\"s2\", 'x')", thePlan.getAppliedExpression().stringify()); - Assert.assertEquals("concat(\"s2\", 'x')", thePlan.getAppliedFoldExpression("__acc").stringify()); + Assert.assertEquals("concat(\"scalar_dictionary_string\", 'x')", thePlan.getAppliedExpression().stringify()); + Assert.assertEquals( + "concat(\"scalar_dictionary_string\", 'x')", + thePlan.getAppliedFoldExpression("__acc").stringify() + ); Assert.assertEquals(ExprType.STRING, thePlan.getOutputType()); ColumnCapabilities inferred = thePlan.inferColumnCapabilities(null); Assert.assertNotNull(inferred); @@ -374,7 +377,7 @@ public void testScalarStringDictionaryEncoded() Assert.assertFalse(inferred.hasSpatialIndexes()); // multiple input columns - thePlan = plan("concat(s2, s3)"); + thePlan = plan("concat(scalar_dictionary_string, scalar_dictionary_string_nonunique)"); Assert.assertTrue( thePlan.is( ExpressionPlan.Trait.VECTORIZABLE @@ -391,12 +394,18 @@ public void testScalarStringDictionaryEncoded() ExpressionPlan.Trait.NON_SCALAR_OUTPUT ) ); - Assert.assertEquals("concat(\"s2\", \"s3\")", thePlan.getAppliedExpression().stringify()); - Assert.assertEquals("concat(\"s2\", \"s3\")", thePlan.getAppliedFoldExpression("__acc").stringify()); - // what if s3 is an accumulator instead? nope, still no NEEDS_APPLIED so nothing to do Assert.assertEquals( - "concat(\"s2\", \"s3\")", - thePlan.getAppliedFoldExpression("s3").stringify() + "concat(\"scalar_dictionary_string\", \"scalar_dictionary_string_nonunique\")", + thePlan.getAppliedExpression().stringify() + ); + Assert.assertEquals( + "concat(\"scalar_dictionary_string\", \"scalar_dictionary_string_nonunique\")", + thePlan.getAppliedFoldExpression("__acc").stringify() + ); + // what if scalar_dictionary_string_nonunique is an accumulator instead? nope, still no NEEDS_APPLIED so nothing to do + Assert.assertEquals( + "concat(\"scalar_dictionary_string\", \"scalar_dictionary_string_nonunique\")", + thePlan.getAppliedFoldExpression("scalar_dictionary_string_nonunique").stringify() ); Assert.assertEquals(ExprType.STRING, thePlan.getOutputType()); inferred = thePlan.inferColumnCapabilities(null); @@ -410,17 +419,35 @@ public void testScalarStringDictionaryEncoded() Assert.assertFalse(inferred.hasMultipleValues().isMaybeTrue()); Assert.assertFalse(inferred.hasBitmapIndexes()); Assert.assertFalse(inferred.hasSpatialIndexes()); + + // array output of dictionary encoded string are not considered single scalar/mappable, nor vectorizable + thePlan = plan("array(scalar_dictionary_string)"); + Assert.assertTrue( + thePlan.is( + ExpressionPlan.Trait.NON_SCALAR_OUTPUT + ) + ); + Assert.assertFalse( + thePlan.is( + ExpressionPlan.Trait.INCOMPLETE_INPUTS, + ExpressionPlan.Trait.UNKNOWN_INPUTS, + ExpressionPlan.Trait.NEEDS_APPLIED, + ExpressionPlan.Trait.NON_SCALAR_INPUTS, + ExpressionPlan.Trait.SINGLE_INPUT_SCALAR, + ExpressionPlan.Trait.SINGLE_INPUT_MAPPABLE, + ExpressionPlan.Trait.VECTORIZABLE + ) + ); } @Test public void testMultiValueStringDictionaryEncoded() { - ExpressionPlan thePlan = plan("concat(m1, 'x')"); + ExpressionPlan thePlan = plan("concat(multi_dictionary_string, 'x')"); Assert.assertTrue( thePlan.is( ExpressionPlan.Trait.NEEDS_APPLIED, - ExpressionPlan.Trait.SINGLE_INPUT_MAPPABLE, - ExpressionPlan.Trait.NON_SCALAR_OUTPUT + ExpressionPlan.Trait.SINGLE_INPUT_MAPPABLE ) ); Assert.assertFalse( @@ -428,6 +455,7 @@ public void testMultiValueStringDictionaryEncoded() ExpressionPlan.Trait.INCOMPLETE_INPUTS, ExpressionPlan.Trait.UNKNOWN_INPUTS, ExpressionPlan.Trait.NON_SCALAR_INPUTS, + ExpressionPlan.Trait.NON_SCALAR_OUTPUT, ExpressionPlan.Trait.VECTORIZABLE ) ); @@ -444,11 +472,10 @@ public void testMultiValueStringDictionaryEncoded() Assert.assertTrue(inferred.hasBitmapIndexes()); Assert.assertFalse(inferred.hasSpatialIndexes()); - thePlan = plan("concat(s1, m2)"); + thePlan = plan("concat(scalar_string, multi_dictionary_string_nonunique)"); Assert.assertTrue( thePlan.is( - ExpressionPlan.Trait.NEEDS_APPLIED, - ExpressionPlan.Trait.NON_SCALAR_OUTPUT + ExpressionPlan.Trait.NEEDS_APPLIED ) ); Assert.assertFalse( @@ -456,16 +483,17 @@ public void testMultiValueStringDictionaryEncoded() ExpressionPlan.Trait.INCOMPLETE_INPUTS, ExpressionPlan.Trait.UNKNOWN_INPUTS, ExpressionPlan.Trait.NON_SCALAR_INPUTS, + ExpressionPlan.Trait.NON_SCALAR_OUTPUT, ExpressionPlan.Trait.VECTORIZABLE ) ); Assert.assertEquals( - "map((\"m2\") -> concat(\"s1\", \"m2\"), \"m2\")", + "map((\"multi_dictionary_string_nonunique\") -> concat(\"scalar_string\", \"multi_dictionary_string_nonunique\"), \"multi_dictionary_string_nonunique\")", thePlan.getAppliedExpression().stringify() ); Assert.assertEquals( - "fold((\"m2\", \"s1\") -> concat(\"s1\", \"m2\"), \"m2\", \"s1\")", - thePlan.getAppliedFoldExpression("s1").stringify() + "fold((\"multi_dictionary_string_nonunique\", \"scalar_string\") -> concat(\"scalar_string\", \"multi_dictionary_string_nonunique\"), \"multi_dictionary_string_nonunique\", \"scalar_string\")", + thePlan.getAppliedFoldExpression("scalar_string").stringify() ); Assert.assertEquals(ExprType.STRING, thePlan.getOutputType()); inferred = thePlan.inferColumnCapabilities(null); @@ -473,11 +501,10 @@ public void testMultiValueStringDictionaryEncoded() Assert.assertEquals(ValueType.STRING, inferred.getType()); Assert.assertTrue(inferred.hasMultipleValues().isTrue()); - thePlan = plan("concat(m1, m2)"); + thePlan = plan("concat(multi_dictionary_string, multi_dictionary_string_nonunique)"); Assert.assertTrue( thePlan.is( - ExpressionPlan.Trait.NEEDS_APPLIED, - ExpressionPlan.Trait.NON_SCALAR_OUTPUT + ExpressionPlan.Trait.NEEDS_APPLIED ) ); Assert.assertFalse( @@ -485,24 +512,41 @@ public void testMultiValueStringDictionaryEncoded() ExpressionPlan.Trait.INCOMPLETE_INPUTS, ExpressionPlan.Trait.UNKNOWN_INPUTS, ExpressionPlan.Trait.NON_SCALAR_INPUTS, + ExpressionPlan.Trait.NON_SCALAR_OUTPUT, ExpressionPlan.Trait.VECTORIZABLE ) ); Assert.assertEquals(ExprType.STRING, thePlan.getOutputType()); // whoa Assert.assertEquals( - "cartesian_map((\"m1\", \"m2\") -> concat(\"m1\", \"m2\"), \"m1\", \"m2\")", + "cartesian_map((\"multi_dictionary_string\", \"multi_dictionary_string_nonunique\") -> concat(\"multi_dictionary_string\", \"multi_dictionary_string_nonunique\"), \"multi_dictionary_string\", \"multi_dictionary_string_nonunique\")", thePlan.getAppliedExpression().stringify() ); // sort of funny, but technically correct Assert.assertEquals( - "cartesian_fold((\"m1\", \"m2\", \"__acc\") -> concat(\"m1\", \"m2\"), \"m1\", \"m2\", \"__acc\")", + "cartesian_fold((\"multi_dictionary_string\", \"multi_dictionary_string_nonunique\", \"__acc\") -> concat(\"multi_dictionary_string\", \"multi_dictionary_string_nonunique\"), \"multi_dictionary_string\", \"multi_dictionary_string_nonunique\", \"__acc\")", thePlan.getAppliedFoldExpression("__acc").stringify() ); inferred = thePlan.inferColumnCapabilities(null); Assert.assertNotNull(inferred); Assert.assertEquals(ValueType.STRING, inferred.getType()); Assert.assertTrue(inferred.hasMultipleValues().isTrue()); + + thePlan = plan("array_append(multi_dictionary_string, 'foo')"); + Assert.assertTrue( + thePlan.is( + ExpressionPlan.Trait.NON_SCALAR_OUTPUT + ) + ); + Assert.assertFalse( + thePlan.is( + ExpressionPlan.Trait.NEEDS_APPLIED, + ExpressionPlan.Trait.INCOMPLETE_INPUTS, + ExpressionPlan.Trait.UNKNOWN_INPUTS, + ExpressionPlan.Trait.NON_SCALAR_INPUTS, + ExpressionPlan.Trait.VECTORIZABLE + ) + ); } @Test @@ -512,12 +556,11 @@ public void testMultiValueStringDictionaryEncodedIllegalAccumulator() expectedException.expectMessage( "Accumulator cannot be implicitly transformed, if it is an ARRAY or multi-valued type it must be used explicitly as such" ); - ExpressionPlan thePlan = plan("concat(m1, 'x')"); + ExpressionPlan thePlan = plan("concat(multi_dictionary_string, 'x')"); Assert.assertTrue( thePlan.is( ExpressionPlan.Trait.NEEDS_APPLIED, - ExpressionPlan.Trait.SINGLE_INPUT_MAPPABLE, - ExpressionPlan.Trait.NON_SCALAR_OUTPUT + ExpressionPlan.Trait.SINGLE_INPUT_MAPPABLE ) ); Assert.assertFalse( @@ -525,16 +568,16 @@ public void testMultiValueStringDictionaryEncodedIllegalAccumulator() ExpressionPlan.Trait.INCOMPLETE_INPUTS, ExpressionPlan.Trait.UNKNOWN_INPUTS, ExpressionPlan.Trait.NON_SCALAR_INPUTS, + ExpressionPlan.Trait.NON_SCALAR_OUTPUT, ExpressionPlan.Trait.VECTORIZABLE ) ); Assert.assertEquals(ExprType.STRING, thePlan.getOutputType()); - thePlan = plan("concat(m1, m2)"); + thePlan = plan("concat(multi_dictionary_string, multi_dictionary_string_nonunique)"); Assert.assertTrue( thePlan.is( - ExpressionPlan.Trait.NEEDS_APPLIED, - ExpressionPlan.Trait.NON_SCALAR_OUTPUT + ExpressionPlan.Trait.NEEDS_APPLIED ) ); Assert.assertFalse( @@ -542,19 +585,20 @@ public void testMultiValueStringDictionaryEncodedIllegalAccumulator() ExpressionPlan.Trait.INCOMPLETE_INPUTS, ExpressionPlan.Trait.UNKNOWN_INPUTS, ExpressionPlan.Trait.NON_SCALAR_INPUTS, + ExpressionPlan.Trait.NON_SCALAR_OUTPUT, ExpressionPlan.Trait.VECTORIZABLE ) ); // what happens if we try to use a multi-valued input that was not explicitly used as multi-valued as the // accumulator? - thePlan.getAppliedFoldExpression("m1"); + thePlan.getAppliedFoldExpression("multi_dictionary_string"); Assert.assertEquals(ExprType.STRING, thePlan.getOutputType()); } @Test public void testIncompleteString() { - ExpressionPlan thePlan = plan("concat(s4, 'x')"); + ExpressionPlan thePlan = plan("concat(string_unknown, 'x')"); Assert.assertTrue( thePlan.is( ExpressionPlan.Trait.INCOMPLETE_INPUTS @@ -573,8 +617,8 @@ public void testIncompleteString() ); // incomplete inputs are not transformed either, rather this will need to be detected and handled on a row-by-row // basis - Assert.assertEquals("concat(\"s4\", 'x')", thePlan.getAppliedExpression().stringify()); - Assert.assertEquals("concat(\"s4\", 'x')", thePlan.getAppliedFoldExpression("__acc").stringify()); + Assert.assertEquals("concat(\"string_unknown\", 'x')", thePlan.getAppliedExpression().stringify()); + Assert.assertEquals("concat(\"string_unknown\", 'x')", thePlan.getAppliedFoldExpression("__acc").stringify()); // incomplete and unknown skip output type since we don't reliably know Assert.assertNull(thePlan.getOutputType()); Assert.assertNull(thePlan.inferColumnCapabilities(null)); @@ -585,7 +629,7 @@ public void testArrayOutput() { // its ok to use scalar inputs to array expressions, string columns cant help it if sometimes they are single // valued and sometimes they are multi-valued - ExpressionPlan thePlan = plan("array_append(s1, 'x')"); + ExpressionPlan thePlan = plan("array_append(scalar_string, 'x')"); assertArrayInAndOut(thePlan); // with a string hint, it should look like a multi-valued string ColumnCapabilities inferred = thePlan.inferColumnCapabilities(ValueType.STRING); @@ -612,21 +656,21 @@ public void testArrayOutput() Assert.assertFalse(inferred.hasBitmapIndexes()); Assert.assertFalse(inferred.hasSpatialIndexes()); - Assert.assertEquals("array_append(\"s1\", 'x')", thePlan.getAppliedExpression().stringify()); - Assert.assertEquals("array_append(\"s1\", 'x')", thePlan.getAppliedFoldExpression("__acc").stringify()); + Assert.assertEquals("array_append(\"scalar_string\", 'x')", thePlan.getAppliedExpression().stringify()); + Assert.assertEquals("array_append(\"scalar_string\", 'x')", thePlan.getAppliedFoldExpression("__acc").stringify()); Assert.assertEquals(ExprType.STRING_ARRAY, thePlan.getOutputType()); // multi-valued are cool too - thePlan = plan("array_append(m1, 'x')"); + thePlan = plan("array_append(multi_dictionary_string, 'x')"); assertArrayInAndOut(thePlan); // what about incomplete inputs with arrays? they are not reported as incomplete because they are treated as arrays - thePlan = plan("array_append(s4, 'x')"); + thePlan = plan("array_append(string_unknown, 'x')"); assertArrayInAndOut(thePlan); Assert.assertEquals(ExprType.STRING_ARRAY, thePlan.getOutputType()); // what about if it is the scalar argument? there it is - thePlan = plan("array_append(m1, s4)"); + thePlan = plan("array_append(multi_dictionary_string, string_unknown)"); Assert.assertTrue( thePlan.is( ExpressionPlan.Trait.NON_SCALAR_INPUTS, @@ -647,10 +691,10 @@ public void testArrayOutput() Assert.assertNull(thePlan.getOutputType()); // array types are cool too - thePlan = plan("array_append(sa1, 'x')"); + thePlan = plan("array_append(string_array_1, 'x')"); assertArrayInAndOut(thePlan); - thePlan = plan("array_append(sa1, 'x')"); + thePlan = plan("array_append(string_array_1, 'x')"); assertArrayInAndOut(thePlan); } @@ -658,7 +702,7 @@ public void testArrayOutput() @Test public void testScalarOutputMultiValueInput() { - ExpressionPlan thePlan = plan("array_to_string(array_append(s1, 'x'), ',')"); + ExpressionPlan thePlan = plan("array_to_string(array_append(scalar_string, 'x'), ',')"); assertArrayInput(thePlan); ColumnCapabilities inferred = thePlan.inferColumnCapabilities(ValueType.STRING); Assert.assertNotNull(inferred); @@ -672,24 +716,27 @@ public void testScalarOutputMultiValueInput() Assert.assertFalse(inferred.hasBitmapIndexes()); Assert.assertFalse(inferred.hasSpatialIndexes()); - Assert.assertEquals("array_to_string(array_append(\"s1\", 'x'), ',')", thePlan.getAppliedExpression().stringify()); Assert.assertEquals( - "array_to_string(array_append(\"s1\", 'x'), ',')", + "array_to_string(array_append(\"scalar_string\", 'x'), ',')", + thePlan.getAppliedExpression().stringify() + ); + Assert.assertEquals( + "array_to_string(array_append(\"scalar_string\", 'x'), ',')", thePlan.getAppliedFoldExpression("__acc").stringify() ); Assert.assertEquals(ExprType.STRING, thePlan.getOutputType()); // what about a multi-valued input - thePlan = plan("array_to_string(array_append(s1, m1), ',')"); + thePlan = plan("array_to_string(array_append(scalar_string, multi_dictionary_string), ',')"); assertArrayInput(thePlan); Assert.assertEquals( - "array_to_string(map((\"m1\") -> array_append(\"s1\", \"m1\"), \"m1\"), ',')", + "array_to_string(map((\"multi_dictionary_string\") -> array_append(\"scalar_string\", \"multi_dictionary_string\"), \"multi_dictionary_string\"), ',')", thePlan.getAppliedExpression().stringify() ); Assert.assertEquals( - "array_to_string(fold((\"m1\", \"s1\") -> array_append(\"s1\", \"m1\"), \"m1\", \"s1\"), ',')", - thePlan.getAppliedFoldExpression("s1").stringify() + "array_to_string(fold((\"multi_dictionary_string\", \"scalar_string\") -> array_append(\"scalar_string\", \"multi_dictionary_string\"), \"multi_dictionary_string\", \"scalar_string\"), ',')", + thePlan.getAppliedFoldExpression("scalar_string").stringify() ); // why is this null Assert.assertEquals(ExprType.STRING, thePlan.getOutputType()); @@ -698,41 +745,44 @@ public void testScalarOutputMultiValueInput() @Test public void testScalarOutputArrayInput() { - ExpressionPlan thePlan = plan("array_to_string(array_append(sa1, 'x'), ',')"); + ExpressionPlan thePlan = plan("array_to_string(array_append(string_array_1, 'x'), ',')"); assertArrayInput(thePlan); - Assert.assertEquals("array_to_string(array_append(\"sa1\", 'x'), ',')", thePlan.getAppliedExpression().stringify()); Assert.assertEquals( - "array_to_string(array_append(\"sa1\", 'x'), ',')", + "array_to_string(array_append(\"string_array_1\", 'x'), ',')", + thePlan.getAppliedExpression().stringify() + ); + Assert.assertEquals( + "array_to_string(array_append(\"string_array_1\", 'x'), ',')", thePlan.getAppliedFoldExpression("__acc").stringify() ); Assert.assertEquals(ExprType.STRING, thePlan.getOutputType()); - thePlan = plan("array_to_string(array_concat(sa1, sa2), ',')"); + thePlan = plan("array_to_string(array_concat(string_array_1, string_array_2), ',')"); assertArrayInput(thePlan); Assert.assertEquals(ExprType.STRING, thePlan.getOutputType()); - thePlan = plan("fold((x, acc) -> acc + x, array_concat(la1, la2), 0)"); + thePlan = plan("fold((x, acc) -> acc + x, array_concat(long_array_1, long_array_2), 0)"); assertArrayInput(thePlan); Assert.assertEquals( - "fold((\"x\", \"acc\") -> (\"acc\" + \"x\"), array_concat(\"la1\", \"la2\"), 0)", + "fold((\"x\", \"acc\") -> (\"acc\" + \"x\"), array_concat(\"long_array_1\", \"long_array_2\"), 0)", thePlan.getAppliedExpression().stringify() ); Assert.assertEquals( - "fold((\"x\", \"acc\") -> (\"acc\" + \"x\"), array_concat(\"la1\", \"la2\"), 0)", + "fold((\"x\", \"acc\") -> (\"acc\" + \"x\"), array_concat(\"long_array_1\", \"long_array_2\"), 0)", thePlan.getAppliedFoldExpression("__acc").stringify() ); Assert.assertEquals(ExprType.LONG, thePlan.getOutputType()); - thePlan = plan("fold((x, acc) -> acc * x, array_concat(da1, da2), 0.0)"); + thePlan = plan("fold((x, acc) -> acc * x, array_concat(double_array_1, double_array_2), 0.0)"); assertArrayInput(thePlan); Assert.assertEquals( - "fold((\"x\", \"acc\") -> (\"acc\" * \"x\"), array_concat(\"da1\", \"da2\"), 0.0)", + "fold((\"x\", \"acc\") -> (\"acc\" * \"x\"), array_concat(\"double_array_1\", \"double_array_2\"), 0.0)", thePlan.getAppliedExpression().stringify() ); Assert.assertEquals( - "fold((\"x\", \"acc\") -> (\"acc\" * \"x\"), array_concat(\"da1\", \"da2\"), 0.0)", + "fold((\"x\", \"acc\") -> (\"acc\" * \"x\"), array_concat(\"double_array_1\", \"double_array_2\"), 0.0)", thePlan.getAppliedFoldExpression("__acc").stringify() ); Assert.assertEquals(ExprType.DOUBLE, thePlan.getOutputType()); @@ -741,7 +791,7 @@ public void testScalarOutputArrayInput() @Test public void testArrayConstruction() { - ExpressionPlan thePlan = plan("array(l1, l2)"); + ExpressionPlan thePlan = plan("array(long1, long2)"); Assert.assertTrue( thePlan.is( ExpressionPlan.Trait.NON_SCALAR_OUTPUT @@ -760,9 +810,9 @@ public void testArrayConstruction() ); Assert.assertEquals(ExprType.LONG_ARRAY, thePlan.getOutputType()); - thePlan = plan("array(l1, d1)"); + thePlan = plan("array(long1, double1)"); Assert.assertEquals(ExprType.DOUBLE_ARRAY, thePlan.getOutputType()); - thePlan = plan("array(l1, d1, s1)"); + thePlan = plan("array(long1, double1, scalar_string)"); Assert.assertEquals(ExprType.STRING_ARRAY, thePlan.getOutputType()); } diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteParameterQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteParameterQueryTest.java index 03087832abe4..d7464d9e2e98 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteParameterQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteParameterQueryTest.java @@ -651,7 +651,6 @@ public void testNullParameter() throws Exception { // contrived example of using null as an sql parameter to at least test the codepath because lots of things dont // actually work as null and things like 'IS NULL' fail to parse in calcite if expressed as 'IS ?' - cannotVectorize(); // this will optimize out the 3rd argument because 2nd argument will be constant and not null testQuery( @@ -703,7 +702,7 @@ public void testNullParameter() throws Exception ValueType.STRING ) ) - .setDimensions(dimensions(new DefaultDimensionSpec("v0", "v0", ValueType.STRING))) + .setDimensions(dimensions(new DefaultDimensionSpec("v0", "d0", ValueType.STRING))) .setAggregatorSpecs(aggregators(new CountAggregatorFactory("a0"))) .setContext(QUERY_CONTEXT_DEFAULT) .build() diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java index e93af44d30f2..2b5b16395f93 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java @@ -288,8 +288,8 @@ public void testSelectConstantExpressionFromTable() throws Exception @Test public void testGroupByWithPostAggregatorReferencingTimeFloorColumnOnTimeseries() throws Exception { + // cannot vectorize due to virtual columns cannotVectorize(); - testQuery( "SELECT TIME_FORMAT(\"date\", 'yyyy-MM'), SUM(x)\n" + "FROM (\n" @@ -7415,9 +7415,8 @@ public void testSelectDistinctWithCascadeExtractionFilter() throws Exception @Test public void testSelectDistinctWithStrlenFilter() throws Exception { - // Cannot vectorize due to usage of expressions. + // cannot vectorize due to virtual columns cannotVectorize(); - testQuery( "SELECT distinct dim1 FROM druid.foo " + "WHERE CHARACTER_LENGTH(dim1) = 3 OR CAST(CHARACTER_LENGTH(dim1) AS varchar) = 3", @@ -9110,10 +9109,6 @@ public void testCountDistinctOfSubstring() throws Exception public void testCountDistinctOfTrim() throws Exception { // Test a couple different syntax variants of TRIM. - - // Cannot vectorize due to virtual columns. - cannotVectorize(); - testQuery( "SELECT COUNT(DISTINCT TRIM(BOTH ' ' FROM dim1)) FROM druid.foo WHERE TRIM(dim1) <> ''", ImmutableList.of( @@ -9230,9 +9225,6 @@ public void testRegexpExtract() throws Exception @Test public void testRegexpExtractFilterViaNotNullCheck() throws Exception { - // Cannot vectorize due to extractionFn in dimension spec. - cannotVectorize(); - testQuery( "SELECT COUNT(*)\n" + "FROM foo\n" @@ -10005,9 +9997,8 @@ public void testGroupByFloorTimeAndOneOtherDimensionWithOrderBy() throws Excepti @Test public void testGroupByStringLength() throws Exception { - // Cannot vectorize due to virtual columns. + // cannot vectorize due to virtual columns cannotVectorize(); - testQuery( "SELECT CHARACTER_LENGTH(dim1), COUNT(*) FROM druid.foo GROUP BY CHARACTER_LENGTH(dim1)", ImmutableList.of( @@ -12497,9 +12488,8 @@ public void testGroupByExtractYear() throws Exception @Test public void testGroupByFormatYearAndMonth() throws Exception { - // Cannot vectorize due to virtual columns. + // cannot vectorize due to virtual columns cannotVectorize(); - testQuery( "SELECT\n" + " TIME_FORMAt(__time, 'yyyy MM') AS \"year\",\n" @@ -12804,9 +12794,6 @@ public void testGroupByTimeAndOtherDimension() throws Exception @Test public void testGroupingSets() throws Exception { - // Cannot vectorize due to virtual columns. - cannotVectorize(); - testQuery( "SELECT dim2, gran, SUM(cnt), GROUPING(dim2, gran)\n" + "FROM (SELECT FLOOR(__time TO MONTH) AS gran, COALESCE(dim2, '') dim2, cnt FROM druid.foo) AS x\n" @@ -12869,10 +12856,6 @@ public void testGroupingSets() throws Exception public void testGroupingAggregatorDifferentOrder() throws Exception { requireMergeBuffers(3); - - // Cannot vectorize due to virtual columns. - cannotVectorize(); - testQuery( "SELECT dim2, gran, SUM(cnt), GROUPING(gran, dim2)\n" + "FROM (SELECT FLOOR(__time TO MONTH) AS gran, COALESCE(dim2, '') dim2, cnt FROM druid.foo) AS x\n" @@ -13022,9 +13005,6 @@ public void testGroupingSetsWithNumericDimension() throws Exception @Test public void testGroupByRollup() throws Exception { - // Cannot vectorize due to virtual columns. - cannotVectorize(); - testQuery( "SELECT dim2, gran, SUM(cnt)\n" + "FROM (SELECT FLOOR(__time TO MONTH) AS gran, COALESCE(dim2, '') dim2, cnt FROM druid.foo) AS x\n" @@ -13080,9 +13060,6 @@ public void testGroupByRollup() throws Exception @Test public void testGroupByRollupDifferentOrder() throws Exception { - // Cannot vectorize due to virtual columns. - cannotVectorize(); - // Like "testGroupByRollup", but the ROLLUP exprs are in the reverse order. testQuery( "SELECT dim2, gran, SUM(cnt)\n" @@ -13138,9 +13115,6 @@ public void testGroupByRollupDifferentOrder() throws Exception @Test public void testGroupByCube() throws Exception { - // Cannot vectorize due to virtual columns. - cannotVectorize(); - testQuery( "SELECT dim2, gran, SUM(cnt)\n" + "FROM (SELECT FLOOR(__time TO MONTH) AS gran, COALESCE(dim2, '') dim2, cnt FROM druid.foo) AS x\n" @@ -13199,9 +13173,6 @@ public void testGroupByCube() throws Exception @Test public void testGroupingSetsWithDummyDimension() throws Exception { - // Cannot vectorize due to virtual columns. - cannotVectorize(); - testQuery( "SELECT dim2, gran, SUM(cnt)\n" + "FROM (SELECT FLOOR(__time TO MONTH) AS gran, COALESCE(dim2, '') dim2, cnt FROM druid.foo) AS x\n" @@ -13260,9 +13231,6 @@ public void testGroupingSetsWithDummyDimension() throws Exception @Test public void testGroupingSetsNoSuperset() throws Exception { - // Cannot vectorize due to virtual columns. - cannotVectorize(); - // Note: the grouping sets are reordered in the output of this query, but this is allowed. testQuery( "SELECT dim2, gran, SUM(cnt)\n" @@ -13316,9 +13284,6 @@ public void testGroupingSetsNoSuperset() throws Exception @Test public void testGroupingSetsWithOrderByDimension() throws Exception { - // Cannot vectorize due to virtual columns. - cannotVectorize(); - testQuery( "SELECT dim2, gran, SUM(cnt)\n" + "FROM (SELECT FLOOR(__time TO MONTH) AS gran, COALESCE(dim2, '') dim2, cnt FROM druid.foo) AS x\n" @@ -13389,9 +13354,6 @@ public void testGroupingSetsWithOrderByDimension() throws Exception @Test public void testGroupingSetsWithOrderByAggregator() throws Exception { - // Cannot vectorize due to virtual columns. - cannotVectorize(); - testQuery( "SELECT dim2, gran, SUM(cnt)\n" + "FROM (SELECT FLOOR(__time TO MONTH) AS gran, COALESCE(dim2, '') dim2, cnt FROM druid.foo) AS x\n" @@ -13457,9 +13419,6 @@ public void testGroupingSetsWithOrderByAggregator() throws Exception @Test public void testGroupingSetsWithOrderByAggregatorWithLimit() throws Exception { - // Cannot vectorize due to virtual columns. - cannotVectorize(); - testQuery( "SELECT dim2, gran, SUM(cnt)\n" + "FROM (SELECT FLOOR(__time TO MONTH) AS gran, COALESCE(dim2, '') dim2, cnt FROM druid.foo) AS x\n" @@ -17600,9 +17559,6 @@ public void testTimeStampAddConversion() throws Exception @Test public void testGroupingSetsWithLimit() throws Exception { - // Cannot vectorize due to virtual columns. - cannotVectorize(); - testQuery( "SELECT dim2, gran, SUM(cnt)\n" + "FROM (SELECT FLOOR(__time TO MONTH) AS gran, COALESCE(dim2, '') dim2, cnt FROM druid.foo) AS x\n" @@ -17667,9 +17623,6 @@ public void testGroupingSetsWithLimit() throws Exception @Test public void testGroupingSetsWithLimitOrderByGran() throws Exception { - // Cannot vectorize due to virtual columns. - cannotVectorize(); - testQuery( "SELECT dim2, gran, SUM(cnt)\n" + "FROM (SELECT FLOOR(__time TO MONTH) AS gran, COALESCE(dim2, '') dim2, cnt FROM druid.foo) AS x\n" From 22e8f2faa38b6a91c1588d51040d80124dd52eab Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Wed, 12 May 2021 18:24:28 -0700 Subject: [PATCH 03/14] fixes --- .../hll/sql/HllSketchSqlAggregatorTest.java | 3 --- .../theta/sql/ThetaSketchSqlAggregatorTest.java | 3 --- ...edBucketsHistogramQuantileSqlAggregatorTest.java | 4 ---- .../histogram/sql/QuantileSqlAggregatorTest.java | 1 - .../query/timeseries/TimeseriesQueryEngine.java | 2 +- .../druid/sql/calcite/BaseCalciteQueryTest.java | 13 ++++++++----- 6 files changed, 9 insertions(+), 17 deletions(-) diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchSqlAggregatorTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchSqlAggregatorTest.java index 2c71fa429ebb..6c0363041c83 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchSqlAggregatorTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchSqlAggregatorTest.java @@ -139,9 +139,6 @@ public DruidOperatorTable createOperatorTable() @Test public void testApproxCountDistinctHllSketch() throws Exception { - // Can't vectorize due to SUBSTRING expression. - cannotVectorize(); - final List expectedResults; if (NullHandling.replaceWithDefault()) { diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchSqlAggregatorTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchSqlAggregatorTest.java index 963a69c80110..aa967d3bde4b 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchSqlAggregatorTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchSqlAggregatorTest.java @@ -137,9 +137,6 @@ public DruidOperatorTable createOperatorTable() @Test public void testApproxCountDistinctThetaSketch() throws Exception { - // Cannot vectorize due to SUBSTRING. - cannotVectorize(); - final List expectedResults; if (NullHandling.replaceWithDefault()) { diff --git a/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/sql/FixedBucketsHistogramQuantileSqlAggregatorTest.java b/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/sql/FixedBucketsHistogramQuantileSqlAggregatorTest.java index 948d63dfe070..35341c24de71 100644 --- a/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/sql/FixedBucketsHistogramQuantileSqlAggregatorTest.java +++ b/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/sql/FixedBucketsHistogramQuantileSqlAggregatorTest.java @@ -123,8 +123,6 @@ public DruidOperatorTable createOperatorTable() @Test public void testQuantileOnFloatAndLongs() throws Exception { - cannotVectorize(); - final List expectedResults = ImmutableList.of( new Object[]{ 1.0299999713897705, @@ -238,8 +236,6 @@ public void testQuantileOnFloatAndLongs() throws Exception @Test public void testQuantileOnCastedString() throws Exception { - cannotVectorize(); - testQuery( "SELECT\n" + "APPROX_QUANTILE_FIXED_BUCKETS(CAST(dim1 AS DOUBLE), 0.01, 20, 0.0, 10.0),\n" diff --git a/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/sql/QuantileSqlAggregatorTest.java b/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/sql/QuantileSqlAggregatorTest.java index 48993ca9a122..720d2fa8b1ca 100644 --- a/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/sql/QuantileSqlAggregatorTest.java +++ b/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/sql/QuantileSqlAggregatorTest.java @@ -121,7 +121,6 @@ public DruidOperatorTable createOperatorTable() @Test public void testQuantileOnFloatAndLongs() throws Exception { - cannotVectorize(); testQuery( "SELECT\n" + "APPROX_QUANTILE(m1, 0.01),\n" diff --git a/processing/src/main/java/org/apache/druid/query/timeseries/TimeseriesQueryEngine.java b/processing/src/main/java/org/apache/druid/query/timeseries/TimeseriesQueryEngine.java index 113dd9b4487d..e93cc973c6d7 100644 --- a/processing/src/main/java/org/apache/druid/query/timeseries/TimeseriesQueryEngine.java +++ b/processing/src/main/java/org/apache/druid/query/timeseries/TimeseriesQueryEngine.java @@ -67,7 +67,7 @@ public class TimeseriesQueryEngine @VisibleForTesting public TimeseriesQueryEngine() { - this.bufferPool = new StupidPool<>("dummy", () -> ByteBuffer.allocate(1000000)); + this.bufferPool = new StupidPool<>("dummy", () -> ByteBuffer.allocate(10000000)); } @Inject diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/BaseCalciteQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/BaseCalciteQueryTest.java index 645f9b9e0b9c..14a46b1235fc 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/BaseCalciteQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/BaseCalciteQueryTest.java @@ -218,12 +218,15 @@ public DateTimeZone getSqlTimeZone() // Add additional context to the given context map for when the // timeseries query has timestamp_floor expression on the timestamp dimension - public static Map getTimeseriesContextWithFloorTime(Map context, - String timestampResultField) + public static Map getTimeseriesContextWithFloorTime( + Map context, + String timestampResultField + ) { - return ImmutableMap.builder().putAll(context) - .put(TimeseriesQuery.CTX_TIMESTAMP_RESULT_FIELD, timestampResultField) - .build(); + return ImmutableMap.builder() + .putAll(context) + .put(TimeseriesQuery.CTX_TIMESTAMP_RESULT_FIELD, timestampResultField) + .build(); } // Matches QUERY_CONTEXT_LOS_ANGELES From 516a4a5aae1a4769c1a3bcd01d39405b7c2eeb43 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Wed, 12 May 2021 18:44:00 -0700 Subject: [PATCH 04/14] oops --- .../apache/druid/segment/virtual/ExpressionSelectors.java | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java index d8cde468ac7d..079270459bd1 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java @@ -139,7 +139,7 @@ public static ColumnValueSelector makeExprEvalSelector( ExpressionPlan plan ) { - if (plan.is(ExpressionPlan.Trait.SINGLE_INPUT_SCALAR) && !plan.is(ExpressionPlan.Trait.NON_SCALAR_OUTPUT)) { + if (plan.is(ExpressionPlan.Trait.SINGLE_INPUT_SCALAR)) { final String column = plan.getSingleInputName(); final ValueType inputType = plan.getSingleInputType(); if (inputType == ValueType.LONG) { @@ -184,8 +184,7 @@ public static DimensionSelector makeDimensionSelector( { final ExpressionPlan plan = ExpressionPlanner.plan(columnSelectorFactory, expression); - if (plan.any(ExpressionPlan.Trait.SINGLE_INPUT_SCALAR, ExpressionPlan.Trait.SINGLE_INPUT_MAPPABLE) - && !plan.is(ExpressionPlan.Trait.NON_SCALAR_OUTPUT)) { + if (plan.any(ExpressionPlan.Trait.SINGLE_INPUT_SCALAR, ExpressionPlan.Trait.SINGLE_INPUT_MAPPABLE)) { final String column = plan.getSingleInputName(); if (plan.getSingleInputType() == ValueType.STRING) { return new SingleStringInputDimensionSelector( From 79468b182bf7ccf23b81d656ee4ce378cf6c29a9 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Wed, 12 May 2021 21:10:12 -0700 Subject: [PATCH 05/14] rework how vector processor factories choose string processors, fix to be less aggressive about vectorizing --- .../hll/sql/HllSketchSqlAggregatorTest.java | 3 + .../sql/ThetaSketchSqlAggregatorTest.java | 3 + ...etsHistogramQuantileSqlAggregatorTest.java | 4 ++ .../sql/QuantileSqlAggregatorTest.java | 2 + .../GroupByVectorColumnProcessorFactory.java | 12 ++++ .../druid/segment/ColumnProcessors.java | 4 +- .../segment/VectorColumnProcessorFactory.java | 18 ++++++ .../segment/virtual/ExpressionPlanner.java | 17 ------ .../calcite/CalciteParameterQueryTest.java | 1 + .../druid/sql/calcite/CalciteQueryTest.java | 55 +++++++++++++++++-- 10 files changed, 96 insertions(+), 23 deletions(-) diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchSqlAggregatorTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchSqlAggregatorTest.java index 6c0363041c83..2c71fa429ebb 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchSqlAggregatorTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchSqlAggregatorTest.java @@ -139,6 +139,9 @@ public DruidOperatorTable createOperatorTable() @Test public void testApproxCountDistinctHllSketch() throws Exception { + // Can't vectorize due to SUBSTRING expression. + cannotVectorize(); + final List expectedResults; if (NullHandling.replaceWithDefault()) { diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchSqlAggregatorTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchSqlAggregatorTest.java index aa967d3bde4b..963a69c80110 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchSqlAggregatorTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchSqlAggregatorTest.java @@ -137,6 +137,9 @@ public DruidOperatorTable createOperatorTable() @Test public void testApproxCountDistinctThetaSketch() throws Exception { + // Cannot vectorize due to SUBSTRING. + cannotVectorize(); + final List expectedResults; if (NullHandling.replaceWithDefault()) { diff --git a/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/sql/FixedBucketsHistogramQuantileSqlAggregatorTest.java b/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/sql/FixedBucketsHistogramQuantileSqlAggregatorTest.java index 35341c24de71..948d63dfe070 100644 --- a/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/sql/FixedBucketsHistogramQuantileSqlAggregatorTest.java +++ b/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/sql/FixedBucketsHistogramQuantileSqlAggregatorTest.java @@ -123,6 +123,8 @@ public DruidOperatorTable createOperatorTable() @Test public void testQuantileOnFloatAndLongs() throws Exception { + cannotVectorize(); + final List expectedResults = ImmutableList.of( new Object[]{ 1.0299999713897705, @@ -236,6 +238,8 @@ public void testQuantileOnFloatAndLongs() throws Exception @Test public void testQuantileOnCastedString() throws Exception { + cannotVectorize(); + testQuery( "SELECT\n" + "APPROX_QUANTILE_FIXED_BUCKETS(CAST(dim1 AS DOUBLE), 0.01, 20, 0.0, 10.0),\n" diff --git a/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/sql/QuantileSqlAggregatorTest.java b/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/sql/QuantileSqlAggregatorTest.java index 720d2fa8b1ca..5d803ef4fe76 100644 --- a/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/sql/QuantileSqlAggregatorTest.java +++ b/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/sql/QuantileSqlAggregatorTest.java @@ -121,6 +121,8 @@ public DruidOperatorTable createOperatorTable() @Test public void testQuantileOnFloatAndLongs() throws Exception { + cannotVectorize(); + testQuery( "SELECT\n" + "APPROX_QUANTILE(m1, 0.01),\n" diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/GroupByVectorColumnProcessorFactory.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/GroupByVectorColumnProcessorFactory.java index 069751d1eace..d6f4e898813e 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/GroupByVectorColumnProcessorFactory.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/GroupByVectorColumnProcessorFactory.java @@ -117,4 +117,16 @@ public GroupByVectorColumnSelector makeObjectProcessor( } return NilGroupByVectorColumnSelector.INSTANCE; } + + /** + * The group by engine vector processor has a more relaxed approach to choosing to use a dictionary encoded string + * selector + */ + @Override + public boolean useDictionaryEncodedSelector(ColumnCapabilities capabilities) + { + Preconditions.checkArgument(capabilities != null, "Capabilities must not be null"); + Preconditions.checkArgument(capabilities.getType() == ValueType.STRING, "Must only be called on a STRING column"); + return capabilities.isDictionaryEncoded().isTrue(); + } } diff --git a/processing/src/main/java/org/apache/druid/segment/ColumnProcessors.java b/processing/src/main/java/org/apache/druid/segment/ColumnProcessors.java index 7743b8966e78..195c1d696b5c 100644 --- a/processing/src/main/java/org/apache/druid/segment/ColumnProcessors.java +++ b/processing/src/main/java/org/apache/druid/segment/ColumnProcessors.java @@ -332,8 +332,8 @@ private static T makeVectorProcessorInternal( switch (capabilities.getType()) { case STRING: - // if column is not uniquely dictionary encoded, use an object selector - if (capabilities.isDictionaryEncoded().isFalse()) { + // let the processor factory decide if it prefers to use an object selector or dictionary encoded selector + if (!processorFactory.useDictionaryEncodedSelector(capabilities)) { return processorFactory.makeObjectProcessor( capabilities, objectSelectorFn.apply(selectorFactory) diff --git a/processing/src/main/java/org/apache/druid/segment/VectorColumnProcessorFactory.java b/processing/src/main/java/org/apache/druid/segment/VectorColumnProcessorFactory.java index b2df1f9a37dc..66e2a178a443 100644 --- a/processing/src/main/java/org/apache/druid/segment/VectorColumnProcessorFactory.java +++ b/processing/src/main/java/org/apache/druid/segment/VectorColumnProcessorFactory.java @@ -19,7 +19,9 @@ package org.apache.druid.segment; +import com.google.common.base.Preconditions; import org.apache.druid.segment.column.ColumnCapabilities; +import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.vector.MultiValueDimensionVectorSelector; import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector; import org.apache.druid.segment.vector.VectorObjectSelector; @@ -86,4 +88,20 @@ T makeMultiValueDimensionProcessor( * cases where the dictionary does not exist or is not expected to be useful. */ T makeObjectProcessor(@SuppressWarnings("unused") ColumnCapabilities capabilities, VectorObjectSelector selector); + + /** + * The processor factory can influence the decision on whether or not to prefer a dictionary encoded column value + * selector over a an object selector by examining the {@link ColumnCapabilities}. + * + * By default, all processor factories prefer to use a dictionary encoded selector if the column has a dictionary + * available ({@link ColumnCapabilities#isDictionaryEncoded()} is true), and there is a unique mapping of dictionary + * id to value ({@link ColumnCapabilities#areDictionaryValuesUnique()} is true), but this can be overridden + * if there is more appropriate behavior for a given processor. + */ + default boolean useDictionaryEncodedSelector(ColumnCapabilities capabilities) + { + Preconditions.checkArgument(capabilities != null, "Capabilities must not be null"); + Preconditions.checkArgument(capabilities.getType() == ValueType.STRING, "Must only be called on a STRING column"); + return capabilities.isDictionaryEncoded().and(capabilities.areDictionaryValuesUnique()).isTrue(); + } } diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionPlanner.java b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionPlanner.java index e180b9eb0368..38f21f8022dc 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionPlanner.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionPlanner.java @@ -188,23 +188,6 @@ public static ExpressionPlan plan(ColumnInspector inspector, Expr expression) // due to unknown inputs, but that's ok here since it just means it doesnt exist outputType = expression.getOutputType(inspector); traits.add(ExpressionPlan.Trait.VECTORIZABLE); - } else if (supportsVector && ExpressionPlan.is(traits, ExpressionPlan.Trait.SINGLE_INPUT_SCALAR)) { - // single input scalar expressions are currently vectorizable even if the expression is not - // this is kind of gross because this is actually only vectorizable if you're using a dictionary encoded - // vector selector, and not a vector object selector, but hopefully things check that a dictionary exists - // and prefer to use it... - // but the least we can do is check that the column is in fact dictionary encoded to try to sway the hands of fate - ColumnCapabilities columnCapabilities = - inspector.getColumnCapabilities(Iterables.getOnlyElement(analysis.getRequiredBindings())); - if (columnCapabilities != null - && columnCapabilities.isDictionaryEncoded().isTrue() - && columnCapabilities.hasMultipleValues().isFalse()) { - outputType = expression.getOutputType(inspector); - // must be a string - if (outputType == ExprType.STRING) { - traits.add(ExpressionPlan.Trait.VECTORIZABLE); - } - } } return new ExpressionPlan( diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteParameterQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteParameterQueryTest.java index 5582bc1407fa..110b15254820 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteParameterQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteParameterQueryTest.java @@ -649,6 +649,7 @@ public void testWrongTypeParameter() throws Exception @Test public void testNullParameter() throws Exception { + cannotVectorize(); // contrived example of using null as an sql parameter to at least test the codepath because lots of things dont // actually work as null and things like 'IS NULL' fail to parse in calcite if expressed as 'IS ?' diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java index 1cf6e8b6475f..976d61a0e202 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java @@ -291,8 +291,8 @@ public void testSelectConstantExpressionFromTable() throws Exception @Test public void testGroupByWithPostAggregatorReferencingTimeFloorColumnOnTimeseries() throws Exception { - // cannot vectorize due to virtual columns cannotVectorize(); + testQuery( "SELECT TIME_FORMAT(\"date\", 'yyyy-MM'), SUM(x)\n" + "FROM (\n" @@ -7448,8 +7448,9 @@ public void testSelectDistinctWithCascadeExtractionFilter() throws Exception @Test public void testSelectDistinctWithStrlenFilter() throws Exception { - // cannot vectorize due to virtual columns + // Cannot vectorize due to usage of expressions. cannotVectorize(); + testQuery( "SELECT distinct dim1 FROM druid.foo " + "WHERE CHARACTER_LENGTH(dim1) = 3 OR CAST(CHARACTER_LENGTH(dim1) AS varchar) = 3", @@ -9173,6 +9174,10 @@ public void testCountDistinctOfSubstring() throws Exception public void testCountDistinctOfTrim() throws Exception { // Test a couple different syntax variants of TRIM. + + // Cannot vectorize due to virtual columns. + cannotVectorize(); + testQuery( "SELECT COUNT(DISTINCT TRIM(BOTH ' ' FROM dim1)) FROM druid.foo WHERE TRIM(dim1) <> ''", ImmutableList.of( @@ -9289,6 +9294,9 @@ public void testRegexpExtract() throws Exception @Test public void testRegexpExtractFilterViaNotNullCheck() throws Exception { + // Cannot vectorize due to extractionFn in dimension spec. + cannotVectorize(); + testQuery( "SELECT COUNT(*)\n" + "FROM foo\n" @@ -10061,8 +10069,9 @@ public void testGroupByFloorTimeAndOneOtherDimensionWithOrderBy() throws Excepti @Test public void testGroupByStringLength() throws Exception { - // cannot vectorize due to virtual columns + // Cannot vectorize due to virtual columns. cannotVectorize(); + testQuery( "SELECT CHARACTER_LENGTH(dim1), COUNT(*) FROM druid.foo GROUP BY CHARACTER_LENGTH(dim1)", ImmutableList.of( @@ -12905,8 +12914,9 @@ public void testGroupByExtractYear() throws Exception @Test public void testGroupByFormatYearAndMonth() throws Exception { - // cannot vectorize due to virtual columns + // Cannot vectorize due to virtual columns. cannotVectorize(); + testQuery( "SELECT\n" + " TIME_FORMAt(__time, 'yyyy MM') AS \"year\",\n" @@ -13211,6 +13221,9 @@ public void testGroupByTimeAndOtherDimension() throws Exception @Test public void testGroupingSets() throws Exception { + // Cannot vectorize due to virtual columns. + cannotVectorize(); + testQuery( "SELECT dim2, gran, SUM(cnt), GROUPING(dim2, gran)\n" + "FROM (SELECT FLOOR(__time TO MONTH) AS gran, COALESCE(dim2, '') dim2, cnt FROM druid.foo) AS x\n" @@ -13273,6 +13286,10 @@ public void testGroupingSets() throws Exception public void testGroupingAggregatorDifferentOrder() throws Exception { requireMergeBuffers(3); + + // Cannot vectorize due to virtual columns. + cannotVectorize(); + testQuery( "SELECT dim2, gran, SUM(cnt), GROUPING(gran, dim2)\n" + "FROM (SELECT FLOOR(__time TO MONTH) AS gran, COALESCE(dim2, '') dim2, cnt FROM druid.foo) AS x\n" @@ -13422,6 +13439,9 @@ public void testGroupingSetsWithNumericDimension() throws Exception @Test public void testGroupByRollup() throws Exception { + // Cannot vectorize due to virtual columns. + cannotVectorize(); + testQuery( "SELECT dim2, gran, SUM(cnt)\n" + "FROM (SELECT FLOOR(__time TO MONTH) AS gran, COALESCE(dim2, '') dim2, cnt FROM druid.foo) AS x\n" @@ -13477,6 +13497,9 @@ public void testGroupByRollup() throws Exception @Test public void testGroupByRollupDifferentOrder() throws Exception { + // Cannot vectorize due to virtual columns. + cannotVectorize(); + // Like "testGroupByRollup", but the ROLLUP exprs are in the reverse order. testQuery( "SELECT dim2, gran, SUM(cnt)\n" @@ -13532,6 +13555,9 @@ public void testGroupByRollupDifferentOrder() throws Exception @Test public void testGroupByCube() throws Exception { + // Cannot vectorize due to virtual columns. + cannotVectorize(); + testQuery( "SELECT dim2, gran, SUM(cnt)\n" + "FROM (SELECT FLOOR(__time TO MONTH) AS gran, COALESCE(dim2, '') dim2, cnt FROM druid.foo) AS x\n" @@ -13590,6 +13616,9 @@ public void testGroupByCube() throws Exception @Test public void testGroupingSetsWithDummyDimension() throws Exception { + // Cannot vectorize due to virtual columns. + cannotVectorize(); + testQuery( "SELECT dim2, gran, SUM(cnt)\n" + "FROM (SELECT FLOOR(__time TO MONTH) AS gran, COALESCE(dim2, '') dim2, cnt FROM druid.foo) AS x\n" @@ -13648,6 +13677,9 @@ public void testGroupingSetsWithDummyDimension() throws Exception @Test public void testGroupingSetsNoSuperset() throws Exception { + // Cannot vectorize due to virtual columns. + cannotVectorize(); + // Note: the grouping sets are reordered in the output of this query, but this is allowed. testQuery( "SELECT dim2, gran, SUM(cnt)\n" @@ -13701,6 +13733,9 @@ public void testGroupingSetsNoSuperset() throws Exception @Test public void testGroupingSetsWithOrderByDimension() throws Exception { + // Cannot vectorize due to virtual columns. + cannotVectorize(); + testQuery( "SELECT dim2, gran, SUM(cnt)\n" + "FROM (SELECT FLOOR(__time TO MONTH) AS gran, COALESCE(dim2, '') dim2, cnt FROM druid.foo) AS x\n" @@ -13771,6 +13806,9 @@ public void testGroupingSetsWithOrderByDimension() throws Exception @Test public void testGroupingSetsWithOrderByAggregator() throws Exception { + // Cannot vectorize due to virtual columns. + cannotVectorize(); + testQuery( "SELECT dim2, gran, SUM(cnt)\n" + "FROM (SELECT FLOOR(__time TO MONTH) AS gran, COALESCE(dim2, '') dim2, cnt FROM druid.foo) AS x\n" @@ -13836,6 +13874,9 @@ public void testGroupingSetsWithOrderByAggregator() throws Exception @Test public void testGroupingSetsWithOrderByAggregatorWithLimit() throws Exception { + // Cannot vectorize due to virtual columns. + cannotVectorize(); + testQuery( "SELECT dim2, gran, SUM(cnt)\n" + "FROM (SELECT FLOOR(__time TO MONTH) AS gran, COALESCE(dim2, '') dim2, cnt FROM druid.foo) AS x\n" @@ -17976,6 +18017,9 @@ public void testTimeStampAddConversion() throws Exception @Test public void testGroupingSetsWithLimit() throws Exception { + // Cannot vectorize due to virtual columns. + cannotVectorize(); + testQuery( "SELECT dim2, gran, SUM(cnt)\n" + "FROM (SELECT FLOOR(__time TO MONTH) AS gran, COALESCE(dim2, '') dim2, cnt FROM druid.foo) AS x\n" @@ -18040,6 +18084,9 @@ public void testGroupingSetsWithLimit() throws Exception @Test public void testGroupingSetsWithLimitOrderByGran() throws Exception { + // Cannot vectorize due to virtual columns. + cannotVectorize(); + testQuery( "SELECT dim2, gran, SUM(cnt)\n" + "FROM (SELECT FLOOR(__time TO MONTH) AS gran, COALESCE(dim2, '') dim2, cnt FROM druid.foo) AS x\n" From 66eceb51fe6ecbf1f113b1bfec0aaeee1265935e Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Thu, 13 May 2021 00:07:11 -0700 Subject: [PATCH 06/14] oops --- .../sql/FixedBucketsHistogramQuantileSqlAggregatorTest.java | 4 ---- .../aggregation/histogram/sql/QuantileSqlAggregatorTest.java | 2 -- 2 files changed, 6 deletions(-) diff --git a/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/sql/FixedBucketsHistogramQuantileSqlAggregatorTest.java b/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/sql/FixedBucketsHistogramQuantileSqlAggregatorTest.java index 948d63dfe070..35341c24de71 100644 --- a/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/sql/FixedBucketsHistogramQuantileSqlAggregatorTest.java +++ b/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/sql/FixedBucketsHistogramQuantileSqlAggregatorTest.java @@ -123,8 +123,6 @@ public DruidOperatorTable createOperatorTable() @Test public void testQuantileOnFloatAndLongs() throws Exception { - cannotVectorize(); - final List expectedResults = ImmutableList.of( new Object[]{ 1.0299999713897705, @@ -238,8 +236,6 @@ public void testQuantileOnFloatAndLongs() throws Exception @Test public void testQuantileOnCastedString() throws Exception { - cannotVectorize(); - testQuery( "SELECT\n" + "APPROX_QUANTILE_FIXED_BUCKETS(CAST(dim1 AS DOUBLE), 0.01, 20, 0.0, 10.0),\n" diff --git a/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/sql/QuantileSqlAggregatorTest.java b/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/sql/QuantileSqlAggregatorTest.java index 5d803ef4fe76..720d2fa8b1ca 100644 --- a/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/sql/QuantileSqlAggregatorTest.java +++ b/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/sql/QuantileSqlAggregatorTest.java @@ -121,8 +121,6 @@ public DruidOperatorTable createOperatorTable() @Test public void testQuantileOnFloatAndLongs() throws Exception { - cannotVectorize(); - testQuery( "SELECT\n" + "APPROX_QUANTILE(m1, 0.01),\n" From 1d3f5901a154ca35d8df0f8d7dffc60c0100472e Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Thu, 13 May 2021 03:32:35 -0700 Subject: [PATCH 07/14] javadocs, renaming --- .../segment/virtual/ExpressionSelectors.java | 2 +- .../virtual/ExpressionVectorSelectors.java | 2 +- ...redEvaluationExpressionDimensionSelector.java} | 15 +++++++++++---- ...luationExpressionDimensionVectorSelector.java} | 14 ++++++++++---- .../virtual/ExpressionVirtualColumnTest.java | 6 +++--- 5 files changed, 26 insertions(+), 13 deletions(-) rename processing/src/main/java/org/apache/druid/segment/virtual/{SingleStringInputDimensionSelector.java => SingleStringInputDeferredEvaluationExpressionDimensionSelector.java} (81%) rename processing/src/main/java/org/apache/druid/segment/virtual/{SingleStringDeferredEvaluationExpressionDimensionVectorSelector.java => SingleStringInputDeferredEvaluationExpressionDimensionVectorSelector.java} (75%) diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java index 079270459bd1..d32d6895a828 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java @@ -187,7 +187,7 @@ public static DimensionSelector makeDimensionSelector( if (plan.any(ExpressionPlan.Trait.SINGLE_INPUT_SCALAR, ExpressionPlan.Trait.SINGLE_INPUT_MAPPABLE)) { final String column = plan.getSingleInputName(); if (plan.getSingleInputType() == ValueType.STRING) { - return new SingleStringInputDimensionSelector( + return new SingleStringInputDeferredEvaluationExpressionDimensionSelector( columnSelectorFactory.makeDimensionSelector(DefaultDimensionSpec.of(column)), expression ); diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVectorSelectors.java b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVectorSelectors.java index 8da71f4c6e03..95e1f6c3eb85 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVectorSelectors.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVectorSelectors.java @@ -56,7 +56,7 @@ public static SingleValueDimensionVectorSelector makeSingleValueDimensionVectorS return ConstantVectorSelectors.singleValueDimensionVectorSelector(factory.getReadableVectorInspector(), constant); } if (plan.is(ExpressionPlan.Trait.SINGLE_INPUT_SCALAR) && ExprType.STRING == plan.getOutputType()) { - return new SingleStringDeferredEvaluationExpressionDimensionVectorSelector( + return new SingleStringInputDeferredEvaluationExpressionDimensionVectorSelector( factory.makeSingleValueDimensionSelector(DefaultDimensionSpec.of(plan.getSingleInputName())), plan.getExpression() ); diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/SingleStringInputDimensionSelector.java b/processing/src/main/java/org/apache/druid/segment/virtual/SingleStringInputDeferredEvaluationExpressionDimensionSelector.java similarity index 81% rename from processing/src/main/java/org/apache/druid/segment/virtual/SingleStringInputDimensionSelector.java rename to processing/src/main/java/org/apache/druid/segment/virtual/SingleStringInputDeferredEvaluationExpressionDimensionSelector.java index 0a6cee87afd3..f4f58388da8b 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/SingleStringInputDimensionSelector.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/SingleStringInputDeferredEvaluationExpressionDimensionSelector.java @@ -34,16 +34,23 @@ import javax.annotation.Nullable; /** - * A DimensionSelector decorator that computes an expression on top of it. See {@link ExpressionSelectors} for details - * on how expression selectors are constructed. + * A {@link DimensionSelector} decorator that directly exposes the underlying dictionary id in {@link #getRow}, + * saving expression computation until {@link #lookupName} is called. This allows for performing operations like + * grouping on the native dictionary ids, and deferring expression evaluation until after which can dramatically + * reduce the total number of evaluations. + * + * @see ExpressionSelectors for details on how expression selectors are constructed. + * + * @see SingleStringInputDeferredEvaluationExpressionDimensionVectorSelector for the vectorized version of + * this selector. */ -public class SingleStringInputDimensionSelector implements DimensionSelector +public class SingleStringInputDeferredEvaluationExpressionDimensionSelector implements DimensionSelector { private final DimensionSelector selector; private final Expr expression; private final SingleInputBindings bindings = new SingleInputBindings(); - public SingleStringInputDimensionSelector( + public SingleStringInputDeferredEvaluationExpressionDimensionSelector( final DimensionSelector selector, final Expr expression ) diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/SingleStringDeferredEvaluationExpressionDimensionVectorSelector.java b/processing/src/main/java/org/apache/druid/segment/virtual/SingleStringInputDeferredEvaluationExpressionDimensionVectorSelector.java similarity index 75% rename from processing/src/main/java/org/apache/druid/segment/virtual/SingleStringDeferredEvaluationExpressionDimensionVectorSelector.java rename to processing/src/main/java/org/apache/druid/segment/virtual/SingleStringInputDeferredEvaluationExpressionDimensionVectorSelector.java index 69da268836b2..5b7d1551bd2a 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/SingleStringDeferredEvaluationExpressionDimensionVectorSelector.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/SingleStringInputDeferredEvaluationExpressionDimensionVectorSelector.java @@ -28,17 +28,23 @@ import javax.annotation.Nullable; /** - * lol + * A {@link SingleValueDimensionVectorSelector} decorator that directly exposes the underlying dictionary ids in + * {@link #getRowVector}, saving expression computation until {@link #lookupName} is called. This allows for + * performing operations like grouping on the native dictionary ids, and deferring expression evaluation until + * after, which can dramatically reduce the total number of evaluations. * - * @see SingleStringInputDimensionSelector + * @see ExpressionVectorSelectors for details on how expression vector selectors are constructed. + * + * @see SingleStringInputDeferredEvaluationExpressionDimensionSelector for the non-vectorized version of this selector. */ -public class SingleStringDeferredEvaluationExpressionDimensionVectorSelector implements SingleValueDimensionVectorSelector +public class SingleStringInputDeferredEvaluationExpressionDimensionVectorSelector + implements SingleValueDimensionVectorSelector { private final SingleValueDimensionVectorSelector selector; private final Expr expression; private final SingleInputBindings bindings = new SingleInputBindings(); - public SingleStringDeferredEvaluationExpressionDimensionVectorSelector( + public SingleStringInputDeferredEvaluationExpressionDimensionVectorSelector( SingleValueDimensionVectorSelector selector, Expr expression ) diff --git a/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVirtualColumnTest.java b/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVirtualColumnTest.java index aefb99364ed8..93c86fd3db37 100644 --- a/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVirtualColumnTest.java +++ b/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVirtualColumnTest.java @@ -268,7 +268,7 @@ public void testMultiObjectSelectorMakesRightSelector() { DimensionSpec spec = new DefaultDimensionSpec("expr", "expr"); - // do some ugly faking to test if SingleStringInputDimensionSelector is created for multi-value expressions when possible + // do some ugly faking to test if SingleStringInputDeferredEvaluationExpressionDimensionSelector is created for multi-value expressions when possible ColumnSelectorFactory factory = new ColumnSelectorFactory() { @Override @@ -331,7 +331,7 @@ public String lookupName(int id) @Override public boolean nameLookupPossibleInAdvance() { - // fake this so when SingleStringInputDimensionSelector it doesn't explode + // fake this so when SingleStringInputDeferredEvaluationExpressionDimensionSelector it doesn't explode return true; } @@ -365,7 +365,7 @@ public ColumnCapabilities getColumnCapabilities(String column) final BaseObjectColumnValueSelector selectorExplicit = SCALE_LIST_SELF_EXPLICIT.makeDimensionSelector(spec, factory); - Assert.assertTrue(selectorImplicit instanceof SingleStringInputDimensionSelector); + Assert.assertTrue(selectorImplicit instanceof SingleStringInputDeferredEvaluationExpressionDimensionSelector); Assert.assertTrue(selectorExplicit instanceof ExpressionMultiValueDimensionSelector); } From a3e98ba30f041931de82177d8112a9eb0e6d6e7a Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Mon, 17 May 2021 22:22:07 -0700 Subject: [PATCH 08/14] more javadocs --- .../vector/GroupByVectorColumnProcessorFactory.java | 11 ++++++++++- .../druid/segment/VectorColumnProcessorFactory.java | 7 +++++++ .../java/org/apache/druid/segment/VirtualColumns.java | 9 ++++++++- 3 files changed, 25 insertions(+), 2 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/GroupByVectorColumnProcessorFactory.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/GroupByVectorColumnProcessorFactory.java index d6f4e898813e..f37d07812591 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/GroupByVectorColumnProcessorFactory.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/GroupByVectorColumnProcessorFactory.java @@ -120,7 +120,16 @@ public GroupByVectorColumnSelector makeObjectProcessor( /** * The group by engine vector processor has a more relaxed approach to choosing to use a dictionary encoded string - * selector + * selector over an object selector than some of the other {@link VectorColumnProcessorFactory} implementations. + * + * Basically, if a valid dictionary exists, we will use it to group on dictionary ids (so that we can use + * {@link SingleValueStringGroupByVectorColumnSelector} whenever possible instead of + * {@link DictionaryBuildingSingleValueStringGroupByVectorColumnSelector}). + * + * We do this even for things like virtual columns that have a single string input, because it allows deferring + * accessing any of the actual string values, which involves at minimum reading utf8 byte values and converting + * them to string form (if not already cached), and in the case of expressions, computing the expression output for + * the the string input. */ @Override public boolean useDictionaryEncodedSelector(ColumnCapabilities capabilities) diff --git a/processing/src/main/java/org/apache/druid/segment/VectorColumnProcessorFactory.java b/processing/src/main/java/org/apache/druid/segment/VectorColumnProcessorFactory.java index 66e2a178a443..d3d569747ebb 100644 --- a/processing/src/main/java/org/apache/druid/segment/VectorColumnProcessorFactory.java +++ b/processing/src/main/java/org/apache/druid/segment/VectorColumnProcessorFactory.java @@ -97,6 +97,13 @@ T makeMultiValueDimensionProcessor( * available ({@link ColumnCapabilities#isDictionaryEncoded()} is true), and there is a unique mapping of dictionary * id to value ({@link ColumnCapabilities#areDictionaryValuesUnique()} is true), but this can be overridden * if there is more appropriate behavior for a given processor. + * + * For processors, this means by default only actual dictionary encoded string columns (likely from real segments) + * will use {@link SingleValueDimensionVectorSelector} and {@link MultiValueDimensionVectorSelector}, while + * processors on things like string expression virtual columns will prefer to use {@link VectorObjectSelector}. In + * other words, it is geared towards use cases where there is a clear opportunity to benefit to deferring having to + * deal with the actual string value in exchange for the increased complexity of dealing with dictionary encoded + * selectors. */ default boolean useDictionaryEncodedSelector(ColumnCapabilities capabilities) { diff --git a/processing/src/main/java/org/apache/druid/segment/VirtualColumns.java b/processing/src/main/java/org/apache/druid/segment/VirtualColumns.java index c38574080e03..effcf33c0cff 100644 --- a/processing/src/main/java/org/apache/druid/segment/VirtualColumns.java +++ b/processing/src/main/java/org/apache/druid/segment/VirtualColumns.java @@ -415,6 +415,10 @@ public VirtualColumn[] getVirtualColumns() return virtualColumns.toArray(new VirtualColumn[0]); } + /** + * Creates a {@link VirtualizedColumnSelectorFactory} which can create column selectors for {@link #virtualColumns} + * in addition to selectors for all physical columns in the underlying factory. + */ public ColumnSelectorFactory wrap(final ColumnSelectorFactory baseFactory) { if (virtualColumns.isEmpty()) { @@ -424,7 +428,10 @@ public ColumnSelectorFactory wrap(final ColumnSelectorFactory baseFactory) } } - + /** + * Creates a {@link VirtualizedColumnInspector} that provides {@link ColumnCapabilities} information for all + * {@link #virtualColumns} in addition to the capabilities of all physical columns in the underlying inspector. + */ public ColumnInspector wrapInspector(ColumnInspector inspector) { if (virtualColumns.isEmpty()) { From 60910e29406211a55ff52413087d3f1c7f6a841a Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Mon, 17 May 2021 23:40:01 -0700 Subject: [PATCH 09/14] benchmarks --- .../druid/benchmark/query/SqlExpressionBenchmark.java | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java index 0cf24ec4f78f..67632086dcc0 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java @@ -178,7 +178,11 @@ public String getFormatString() // 26: group by string expr with non-expr agg "SELECT CONCAT(string2, '-', long2), SUM(double1) FROM foo GROUP BY 1 ORDER BY 2", // 27: group by string expr with expr agg - "SELECT CONCAT(string2, '-', long2), SUM(long1 * double4) FROM foo GROUP BY 1 ORDER BY 2" + "SELECT CONCAT(string2, '-', long2), SUM(long1 * double4) FROM foo GROUP BY 1 ORDER BY 2", + // 28: group by single input string low cardinality expr with expr agg + "SELECT CONCAT(string2, '-', 'foo'), SUM(long1 * long4) FROM foo GROUP BY 1 ORDER BY 2", + // 28: group by single input string high cardinality expr with expr agg + "SELECT CONCAT(string3, '-', 'foo'), SUM(long1 * long4) FROM foo GROUP BY 1 ORDER BY 2" ); @Param({"5000000"}) @@ -217,7 +221,9 @@ public String getFormatString() "24", "25", "26", - "27" + "27", + "28", + "29" }) private String query; From 99d0b7469b2f6584d86e1dcdb1de08e22819bdb4 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Tue, 18 May 2021 01:10:03 -0700 Subject: [PATCH 10/14] use string expression vector processor with vector size 1 instead of expr.eval --- .../query/SqlExpressionBenchmark.java | 5 +- ...tionExpressionDimensionVectorSelector.java | 79 ++++++++++++++++--- .../SqlVectorizedExpressionSanityTest.java | 1 + 3 files changed, 73 insertions(+), 12 deletions(-) diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java index 67632086dcc0..cb5ce5f45d6d 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java @@ -188,7 +188,10 @@ public String getFormatString() @Param({"5000000"}) private int rowsPerSegment; - @Param({"false", "force"}) + @Param({ + "false", + "force" + }) private String vectorize; @Param({ diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/SingleStringInputDeferredEvaluationExpressionDimensionVectorSelector.java b/processing/src/main/java/org/apache/druid/segment/virtual/SingleStringInputDeferredEvaluationExpressionDimensionVectorSelector.java index 5b7d1551bd2a..e22e11b585e0 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/SingleStringInputDeferredEvaluationExpressionDimensionVectorSelector.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/SingleStringInputDeferredEvaluationExpressionDimensionVectorSelector.java @@ -21,6 +21,8 @@ import org.apache.druid.java.util.common.ISE; import org.apache.druid.math.expr.Expr; +import org.apache.druid.math.expr.ExprType; +import org.apache.druid.math.expr.vector.ExprVectorProcessor; import org.apache.druid.segment.DimensionDictionarySelector; import org.apache.druid.segment.IdLookup; import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector; @@ -41,8 +43,8 @@ public class SingleStringInputDeferredEvaluationExpressionDimensionVectorSelecto implements SingleValueDimensionVectorSelector { private final SingleValueDimensionVectorSelector selector; - private final Expr expression; - private final SingleInputBindings bindings = new SingleInputBindings(); + private final ExprVectorProcessor stringProcessor; + private final StringVectorInputBindings inputBinding; public SingleStringInputDeferredEvaluationExpressionDimensionVectorSelector( SingleValueDimensionVectorSelector selector, @@ -52,10 +54,14 @@ public SingleStringInputDeferredEvaluationExpressionDimensionVectorSelector( // Verify selector has a working dictionary. if (selector.getValueCardinality() == DimensionDictionarySelector.CARDINALITY_UNKNOWN || !selector.nameLookupPossibleInAdvance()) { - throw new ISE("Selector of class[%s] does not have a dictionary, cannot use it.", selector.getClass().getName()); + throw new ISE( + "Selector of class[%s] does not have a dictionary, cannot use it.", + selector.getClass().getName() + ); } this.selector = selector; - this.expression = expression; + this.inputBinding = new StringVectorInputBindings(); + this.stringProcessor = expression.buildVectorized(inputBinding); } @Override @@ -68,18 +74,14 @@ public int getValueCardinality() @Override public String lookupName(int id) { - final String value; - - value = selector.lookupName(id); - - bindings.set(value); - return expression.eval(bindings).asString(); + inputBinding.currentValue[0] = selector.lookupName(id); + return stringProcessor.evalVector(inputBinding).values()[0]; } @Override public boolean nameLookupPossibleInAdvance() { - return selector.nameLookupPossibleInAdvance(); + return true; } @Nullable @@ -106,4 +108,59 @@ public int getCurrentVectorSize() { return selector.getCurrentVectorSize(); } + + private static final class StringVectorInputBindings implements Expr.VectorInputBinding + { + private final String[] currentValue = new String[1]; + + @Nullable + @Override + public ExprType getType(String name) + { + return ExprType.STRING; + } + + @Override + public int getMaxVectorSize() + { + return 1; + } + + @Override + public int getCurrentVectorSize() + { + return 1; + } + + @Override + public int getCurrentVectorId() + { + return -1; + } + + @Override + public T[] getObjectVector(String name) + { + return (T[]) currentValue; + } + + @Override + public long[] getLongVector(String name) + { + throw new UnsupportedOperationException("attempt to get long[] from string[] only scalar binding"); + } + + @Override + public double[] getDoubleVector(String name) + { + throw new UnsupportedOperationException("attempt to get double[] from string[] only scalar binding"); + } + + @Nullable + @Override + public boolean[] getNullVector(String name) + { + throw new UnsupportedOperationException("attempt to get boolean[] null vector from string[] only scalar binding"); + } + } } diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/SqlVectorizedExpressionSanityTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/SqlVectorizedExpressionSanityTest.java index 7fb7dde42fac..de788fadc7b9 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/SqlVectorizedExpressionSanityTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/SqlVectorizedExpressionSanityTest.java @@ -85,6 +85,7 @@ public class SqlVectorizedExpressionSanityTest extends InitializedNullHandlingTe "SELECT (long1 * long2), SUM(double1) FROM foo GROUP BY 1 ORDER BY 2", "SELECT string2, SUM(long1 * long4) FROM foo GROUP BY 1 ORDER BY 2", "SELECT string1 + string2, COUNT(*) FROM foo GROUP BY 1 ORDER BY 2", + "SELECT CONCAT(string1, '-', 'foo'), COUNT(*) FROM foo GROUP BY 1 ORDER BY 2", "SELECT CONCAT(string1, '-', string2), string3, COUNT(*) FROM foo GROUP BY 1,2 ORDER BY 3", "SELECT CONCAT(string1, '-', string2, '-', long1, '-', double1, '-', float1) FROM foo GROUP BY 1" ); From 63f6277c2d5c7c304058087e998b7fa1b2ba4b6d Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Thu, 10 Jun 2021 21:35:01 -0700 Subject: [PATCH 11/14] better logging --- .../virtual/ExpressionVirtualColumn.java | 25 +++++++++++++------ 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVirtualColumn.java b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVirtualColumn.java index 63fd816fad07..f507d0030732 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVirtualColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVirtualColumn.java @@ -191,13 +191,24 @@ public ColumnCapabilities capabilities(ColumnInspector inspector, String columnN // if we can infer the column capabilities from the expression plan, then use that if (inferred != null) { // explicit outputType is used as a hint, how did it compare to the planners inferred output type? - if (inferred.getType() != outputType) { - log.warn( - "Projected output type %s of expression %s does not match provided type %s", - inferred.getType(), - expression, - outputType - ); + if (inferred.getType() != outputType && outputType != null) { + // if both sides are numeric, let it slide and log at debug level + // but mismatches involving strings and arrays might be worth knowing about so warn + if (!inferred.getType().isNumeric() && !outputType.isNumeric()) { + log.warn( + "Projected output type %s of expression %s does not match provided type %s", + inferred.getType(), + expression, + outputType + ); + } else { + log.debug( + "Projected output type %s of expression %s does not match provided type %s", + inferred.getType(), + expression, + outputType + ); + } } return inferred; } From fdb7a540c369705f944695ecd6e01f4e01763b18 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Fri, 2 Jul 2021 05:33:42 -0700 Subject: [PATCH 12/14] javadocs, surprising number of the the --- .../org/apache/druid/concurrent/LifecycleLock.java | 2 +- .../java/org/apache/druid/math/expr/Function.java | 2 +- docs/operations/security-overview.md | 2 +- docs/querying/caching.md | 2 +- docs/querying/datasource.md | 4 ++-- docs/querying/sorting-orders.md | 2 +- .../org/apache/druid/extendedset/intset/IntSet.java | 2 +- .../java/org/apache/hadoop/fs/HadoopFsWrapper.java | 2 +- .../aggregation/histogram/ApproximateHistogram.java | 2 +- .../histogram/FixedBucketsHistogram.java | 2 +- .../vector/GroupByVectorColumnProcessorFactory.java | 2 +- .../topn/types/TopNColumnAggregatesProcessor.java | 2 +- ...EvaluationExpressionDimensionVectorSelector.java | 13 ++++++++++--- .../druid/server/coordinator/BalancerStrategy.java | 2 +- 14 files changed, 24 insertions(+), 17 deletions(-) diff --git a/core/src/main/java/org/apache/druid/concurrent/LifecycleLock.java b/core/src/main/java/org/apache/druid/concurrent/LifecycleLock.java index a5b8bad51ad4..8d59e6bbf667 100644 --- a/core/src/main/java/org/apache/druid/concurrent/LifecycleLock.java +++ b/core/src/main/java/org/apache/druid/concurrent/LifecycleLock.java @@ -257,7 +257,7 @@ public boolean canStop() } /** - * Finalizes stopping the the LifecycleLock. This method must be called before exit from stop() on this object, + * Finalizes stopping the LifecycleLock. This method must be called before exit from stop() on this object, * usually in a finally block. If you're using a restartable object, use {@link #exitStopAndReset()} instead. * * @throws IllegalMonitorStateException if {@link #canStop()} is not yet called on this LifecycleLock diff --git a/core/src/main/java/org/apache/druid/math/expr/Function.java b/core/src/main/java/org/apache/druid/math/expr/Function.java index a0b8e59a17ff..ed7082eba0e9 100644 --- a/core/src/main/java/org/apache/druid/math/expr/Function.java +++ b/core/src/main/java/org/apache/druid/math/expr/Function.java @@ -647,7 +647,7 @@ public ExprEval apply(List args, Expr.ObjectBinding bindings) if (evals.isEmpty()) { // The GREATEST/LEAST functions are not in the SQL standard. Emulate the behavior of postgres (return null if // all expressions are null, otherwise skip null values) since it is used as a base for a wide number of - // databases. This also matches the behavior the the long/double greatest/least post aggregators. Some other + // databases. This also matches the behavior the long/double greatest/least post aggregators. Some other // databases (e.g., MySQL) return null if any expression is null. // https://www.postgresql.org/docs/9.5/functions-conditional.html // https://dev.mysql.com/doc/refman/8.0/en/comparison-operators.html#function_least diff --git a/docs/operations/security-overview.md b/docs/operations/security-overview.md index c51e7d203df1..e1a1dd7ff83b 100644 --- a/docs/operations/security-overview.md +++ b/docs/operations/security-overview.md @@ -122,7 +122,7 @@ For more information, see [TLS support](tls-support.md) and [Simple SSLContext P ## Authentication and authorization -You can configure authentication and authorization to control access to the the Druid APIs. Then configure users, roles, and permissions, as described in the following sections. Make the configuration changes in the `common.runtime.properties` file on all Druid servers in the cluster. +You can configure authentication and authorization to control access to the Druid APIs. Then configure users, roles, and permissions, as described in the following sections. Make the configuration changes in the `common.runtime.properties` file on all Druid servers in the cluster. Within Druid's operating context, authenticators control the way user identities are verified. Authorizers employ user roles to relate authenticated users to the datasources they are permitted to access. You can set the finest-grained permissions on a per-datasource basis. diff --git a/docs/querying/caching.md b/docs/querying/caching.md index 3ee0191601dc..5d6affe4bdfd 100644 --- a/docs/querying/caching.md +++ b/docs/querying/caching.md @@ -63,7 +63,7 @@ For instance, whole-query caching is a good option when you have queries that in - On Historicals, the default. Enable segment-level cache population on Historicals for larger production clusters to prevent Brokers from having to merge all query results. When you enable cache population on Historicals instead of Brokers, the Historicals merge their own local results and put less strain on the Brokers. -- On ingestion tasks in the Peon or Indexer service. Larger production clusters should enable segment-level cache population on task services only to prevent Brokers from having to merge all query results. When you enable cache population on task execution services instead of Brokers, the the task execution services to merge their own local results and put less strain on the Brokers. +- On ingestion tasks in the Peon or Indexer service. Larger production clusters should enable segment-level cache population on task services only to prevent Brokers from having to merge all query results. When you enable cache population on task execution services instead of Brokers, the task execution services to merge their own local results and put less strain on the Brokers. Task executor services only support caches that store data locally. For example the `caffeine` cache. This restriction exists because the cache stores results at the level of intermediate partial segments generated by the ingestion tasks. These intermediate partial segments may not be identical across task replicas. Therefore task executor services ignore remote cache types such as `memcached`. diff --git a/docs/querying/datasource.md b/docs/querying/datasource.md index 8fd121df533b..a9dbaab4417d 100644 --- a/docs/querying/datasource.md +++ b/docs/querying/datasource.md @@ -54,7 +54,7 @@ The table datasource is the most common type. This is the kind of datasource you [data ingestion](../ingestion/index.md). They are split up into segments, distributed around the cluster, and queried in parallel. -In [Druid SQL](sql.md#from), table datasources reside in the the `druid` schema. This is the default schema, so table +In [Druid SQL](sql.md#from), table datasources reside in the `druid` schema. This is the default schema, so table datasources can be referenced as either `druid.dataSourceName` or simply `dataSourceName`. In native queries, table datasources can be referenced using their names as strings (as in the example above), or by @@ -92,7 +92,7 @@ SELECT k, v FROM lookup.countries Lookup datasources correspond to Druid's key-value [lookup](lookups.md) objects. In [Druid SQL](sql.md#from), -they reside in the the `lookup` schema. They are preloaded in memory on all servers, so they can be accessed rapidly. +they reside in the `lookup` schema. They are preloaded in memory on all servers, so they can be accessed rapidly. They can be joined onto regular tables using the [join operator](#join). Lookup datasources are key-value oriented and always have exactly two columns: `k` (the key) and `v` (the value), and diff --git a/docs/querying/sorting-orders.md b/docs/querying/sorting-orders.md index 34f80572bf82..9609d1216521 100644 --- a/docs/querying/sorting-orders.md +++ b/docs/querying/sorting-orders.md @@ -49,7 +49,7 @@ This sorting order will try to parse all string values as numbers. Unparseable v When comparing two unparseable values (e.g., "hello" and "world"), this ordering will sort by comparing the unparsed strings lexicographically. ## Strlen -Sorts values by the their string lengths. When there is a tie, this comparator falls back to using the String compareTo method. +Sorts values by their string lengths. When there is a tie, this comparator falls back to using the String compareTo method. ## Version Sorts values as versions, e.g.: "10.0 sorts after 9.0", "1.0.0-SNAPSHOT sorts after 1.0.0". diff --git a/extendedset/src/main/java/org/apache/druid/extendedset/intset/IntSet.java b/extendedset/src/main/java/org/apache/druid/extendedset/intset/IntSet.java index df90c1c1d0bc..8342978e2fdb 100755 --- a/extendedset/src/main/java/org/apache/druid/extendedset/intset/IntSet.java +++ b/extendedset/src/main/java/org/apache/druid/extendedset/intset/IntSet.java @@ -77,7 +77,7 @@ interface IntIterator extends org.roaringbitmap.IntIterator int next(); /** - * Skips all the elements before the the specified element, so that + * Skips all the elements before the specified element, so that * {@link #next()} gives the given element or, if it does not exist, the * element immediately after according to the sorting provided by this * set. diff --git a/extensions-core/hdfs-storage/src/main/java/org/apache/hadoop/fs/HadoopFsWrapper.java b/extensions-core/hdfs-storage/src/main/java/org/apache/hadoop/fs/HadoopFsWrapper.java index a57677115db5..610d6110f488 100644 --- a/extensions-core/hdfs-storage/src/main/java/org/apache/hadoop/fs/HadoopFsWrapper.java +++ b/extensions-core/hdfs-storage/src/main/java/org/apache/hadoop/fs/HadoopFsWrapper.java @@ -26,7 +26,7 @@ import java.lang.reflect.Method; /** - * This wrapper class is created to be able to access some of the the "protected" methods inside Hadoop's + * This wrapper class is created to be able to access some of the "protected" methods inside Hadoop's * FileSystem class. Those are supposed to become public eventually or more appropriate alternatives would be * provided. * This is a hack and should be removed when no longer necessary. diff --git a/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogram.java b/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogram.java index ed67b8d21a8d..fa3772518b3e 100644 --- a/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogram.java +++ b/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogram.java @@ -1520,7 +1520,7 @@ public double sum(final float b) * * @param probabilities array of probabilities * - * @return an array of length probabilities.length representing the the approximate sample quantiles + * @return an array of length probabilities.length representing the approximate sample quantiles * corresponding to the given probabilities */ diff --git a/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/FixedBucketsHistogram.java b/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/FixedBucketsHistogram.java index e10203caf23e..754e0df2f9eb 100644 --- a/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/FixedBucketsHistogram.java +++ b/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/FixedBucketsHistogram.java @@ -94,7 +94,7 @@ public class FixedBucketsHistogram public static final byte SPARSE_ENCODING_MODE = 0x02; /** - * Determines how the the histogram handles outliers. + * Determines how the histogram handles outliers. * * Ignore: do not track outliers at all * Overflow: track outlier counts in upperOutlierCount and lowerOutlierCount. diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/GroupByVectorColumnProcessorFactory.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/GroupByVectorColumnProcessorFactory.java index f37d07812591..a141113bad5b 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/GroupByVectorColumnProcessorFactory.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/GroupByVectorColumnProcessorFactory.java @@ -129,7 +129,7 @@ public GroupByVectorColumnSelector makeObjectProcessor( * We do this even for things like virtual columns that have a single string input, because it allows deferring * accessing any of the actual string values, which involves at minimum reading utf8 byte values and converting * them to string form (if not already cached), and in the case of expressions, computing the expression output for - * the the string input. + * the string input. */ @Override public boolean useDictionaryEncodedSelector(ColumnCapabilities capabilities) diff --git a/processing/src/main/java/org/apache/druid/query/topn/types/TopNColumnAggregatesProcessor.java b/processing/src/main/java/org/apache/druid/query/topn/types/TopNColumnAggregatesProcessor.java index ac5b21ff123e..d7c2fcd5b2b6 100644 --- a/processing/src/main/java/org/apache/druid/query/topn/types/TopNColumnAggregatesProcessor.java +++ b/processing/src/main/java/org/apache/druid/query/topn/types/TopNColumnAggregatesProcessor.java @@ -115,7 +115,7 @@ long scanAndAggregate( void initAggregateStore(); /** - * Closes all on heap {@link Aggregator} associated withe the aggregates processor + * Closes all on heap {@link Aggregator} associated with the aggregates processor */ void closeAggregators(); } diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/SingleStringInputDeferredEvaluationExpressionDimensionVectorSelector.java b/processing/src/main/java/org/apache/druid/segment/virtual/SingleStringInputDeferredEvaluationExpressionDimensionVectorSelector.java index e22e11b585e0..56813a491f50 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/SingleStringInputDeferredEvaluationExpressionDimensionVectorSelector.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/SingleStringInputDeferredEvaluationExpressionDimensionVectorSelector.java @@ -44,7 +44,7 @@ public class SingleStringInputDeferredEvaluationExpressionDimensionVectorSelecto { private final SingleValueDimensionVectorSelector selector; private final ExprVectorProcessor stringProcessor; - private final StringVectorInputBindings inputBinding; + private final StringLookupVectorInputBindings inputBinding; public SingleStringInputDeferredEvaluationExpressionDimensionVectorSelector( SingleValueDimensionVectorSelector selector, @@ -60,7 +60,7 @@ public SingleStringInputDeferredEvaluationExpressionDimensionVectorSelector( ); } this.selector = selector; - this.inputBinding = new StringVectorInputBindings(); + this.inputBinding = new StringLookupVectorInputBindings(); this.stringProcessor = expression.buildVectorized(inputBinding); } @@ -109,7 +109,14 @@ public int getCurrentVectorSize() return selector.getCurrentVectorSize(); } - private static final class StringVectorInputBindings implements Expr.VectorInputBinding + /** + * Special single element vector input bindings used for processing the string value for {@link #lookupName(int)} + * + * Vector size is fixed to 1 because {@link #lookupName} operates on a single dictionary value at a time. If a + * bulk lookup method is ever added, these vector bindings should be modified to process the results with actual + * vectors. + */ + private static final class StringLookupVectorInputBindings implements Expr.VectorInputBinding { private final String[] currentValue = new String[1]; diff --git a/server/src/main/java/org/apache/druid/server/coordinator/BalancerStrategy.java b/server/src/main/java/org/apache/druid/server/coordinator/BalancerStrategy.java index db451693674a..3d0ec0f6665b 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/BalancerStrategy.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/BalancerStrategy.java @@ -37,7 +37,7 @@ public interface BalancerStrategy { /** - * Find the best server to move a {@link DataSegment} to according the the balancing strategy. + * Find the best server to move a {@link DataSegment} to according the balancing strategy. * @param proposalSegment segment to move * @param serverHolders servers to consider as move destinations * @return The server to move to, or null if no move should be made or no server is suitable From ced4a02b37983c5d9ca7328829074e2843635551 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Fri, 2 Jul 2021 05:38:45 -0700 Subject: [PATCH 13/14] more --- .../org/apache/druid/segment/ColumnProcessors.java | 2 +- .../apache/druid/segment/virtual/ExpressionPlan.java | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/ColumnProcessors.java b/processing/src/main/java/org/apache/druid/segment/ColumnProcessors.java index 195c1d696b5c..ffb06ed4e3a5 100644 --- a/processing/src/main/java/org/apache/druid/segment/ColumnProcessors.java +++ b/processing/src/main/java/org/apache/druid/segment/ColumnProcessors.java @@ -239,7 +239,7 @@ private static ColumnCapabilities computeDimensionSpecCapabilities( .setDictionaryValuesUnique( unique && dimensionSpec.getExtractionFn().getExtractionType() == ExtractionFn.ExtractionType.ONE_TO_ONE ) - .setHasMultipleValues(dimensionSpec.mustDecorate() || mayBeMultiValue(columnCapabilities)); + .setHasMultipleValues(mayBeMultiValue(columnCapabilities)); } else { // No transformation. Pass through underlying types. return columnCapabilities; diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionPlan.java b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionPlan.java index 649e6b5a1e52..59ef0349b346 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionPlan.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionPlan.java @@ -219,7 +219,7 @@ public Expr.BindingAnalysis getAnalysis() * If no output type was able to be inferred during planning, returns null */ @Nullable - public ColumnCapabilities inferColumnCapabilities(@Nullable ValueType hint) + public ColumnCapabilities inferColumnCapabilities(@Nullable ValueType outputTypeHint) { if (outputType != null) { final ValueType inferredValueType = ExprType.toValueType(outputType); @@ -227,7 +227,7 @@ public ColumnCapabilities inferColumnCapabilities(@Nullable ValueType hint) if (inferredValueType.isNumeric()) { // if float was explicitly specified preserve it, because it will currently never be the computed output type // since there is no float expression type - if (ValueType.FLOAT == hint) { + if (ValueType.FLOAT == outputTypeHint) { return ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.FLOAT); } return ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(inferredValueType); @@ -235,8 +235,8 @@ public ColumnCapabilities inferColumnCapabilities(@Nullable ValueType hint) // null constants can sometimes trip up the type inference to report STRING, so check if explicitly supplied // output type is numeric and stick with that if so - if (hint != null && hint.isNumeric()) { - return ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(hint); + if (outputTypeHint != null && outputTypeHint.isNumeric()) { + return ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(outputTypeHint); } // fancy string stuffs @@ -273,7 +273,7 @@ public ColumnCapabilities inferColumnCapabilities(@Nullable ValueType hint) // the complete set of input types if (any(Trait.NON_SCALAR_OUTPUT, Trait.NEEDS_APPLIED)) { // if the hint requested a string, use a string - if (ValueType.STRING == hint) { + if (ValueType.STRING == outputTypeHint) { return ColumnCapabilitiesImpl.createSimpleArrayColumnCapabilities(ValueType.STRING); } // maybe something is looking for a little fun and wants arrays? let whatever it is through From 76677bf65059faea1e3886fc92830af3f417caff Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Fri, 2 Jul 2021 05:42:25 -0700 Subject: [PATCH 14/14] simplify --- .../druid/segment/virtual/ExpressionPlanner.java | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionPlanner.java b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionPlanner.java index 38f21f8022dc..4eb91eb7ae1d 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionPlanner.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionPlanner.java @@ -80,17 +80,13 @@ public static ExpressionPlan plan(ColumnInspector inspector, Expr expression) // SINGLE_INPUT_MAPPABLE // is set when a single input string column, which can be multi-valued, but if so, it must be implicitly mappable // (i.e. the expression is not treating its input as an array and not wanting to output an array) - if (capabilities != null) { + if (capabilities != null && !analysis.hasInputArrays() && !analysis.isOutputArray()) { boolean isSingleInputMappable = false; - boolean isSingleInputScalar = capabilities.hasMultipleValues().isFalse() && - !analysis.hasInputArrays() && - !analysis.isOutputArray(); + boolean isSingleInputScalar = capabilities.hasMultipleValues().isFalse(); if (capabilities.getType() == ValueType.STRING) { isSingleInputScalar &= capabilities.isDictionaryEncoded().isTrue(); isSingleInputMappable = capabilities.isDictionaryEncoded().isTrue() && - !capabilities.hasMultipleValues().isUnknown() && - !analysis.hasInputArrays() && - !analysis.isOutputArray(); + !capabilities.hasMultipleValues().isUnknown(); } // if satisfied, set single input output type and flags