From 1e4750a1535cb54e54ed7b5962511fe86745df7a Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Thu, 18 Mar 2021 05:14:40 -0700 Subject: [PATCH 1/3] vector group by support for string expressions --- .../query/SqlExpressionBenchmark.java | 10 +- .../math/expr/BinaryMathOperatorExpr.java | 10 +- .../java/org/apache/druid/math/expr/Expr.java | 30 +++++ .../org/apache/druid/math/expr/ExprType.java | 10 ++ .../org/apache/druid/math/expr/Function.java | 16 +++ ...StringOutMultiStringInVectorProcessor.java | 68 +++++++++++ ...ngOutStringsInFunctionVectorProcessor.java | 5 +- .../expr/vector/VectorStringProcessors.java | 102 +++++++++++++++++ .../druid/math/expr/VectorExprSanityTest.java | 9 ++ ...alueStringGroupByVectorColumnSelector.java | 107 ++++++++++++++++++ .../GroupByVectorColumnProcessorFactory.java | 4 +- .../vector/VectorGroupByEngine.java | 8 +- .../query/groupby/GroupByQueryRunnerTest.java | 73 ++++++++++-- .../virtual/VectorizedVirtualColumnTest.java | 2 - .../druid/sql/calcite/CalciteQueryTest.java | 72 +++++++++++- .../SqlVectorizedExpressionSanityTest.java | 5 +- 16 files changed, 498 insertions(+), 33 deletions(-) create mode 100644 core/src/main/java/org/apache/druid/math/expr/vector/StringOutMultiStringInVectorProcessor.java create mode 100644 core/src/main/java/org/apache/druid/math/expr/vector/VectorStringProcessors.java create mode 100644 processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/DictionaryBuildingSingleValueStringGroupByVectorColumnSelector.java diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java index 0fbe44bbfdac..0cf24ec4f78f 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java @@ -174,7 +174,11 @@ public String getFormatString() // 24: group by long expr with non-expr agg "SELECT (long1 * long2), SUM(double1) FROM foo GROUP BY 1 ORDER BY 2", // 25: group by non-expr with expr agg - "SELECT string2, SUM(long1 * long4) FROM foo GROUP BY 1 ORDER BY 2" + "SELECT string2, SUM(long1 * long4) FROM foo GROUP BY 1 ORDER BY 2", + // 26: group by string expr with non-expr agg + "SELECT CONCAT(string2, '-', long2), SUM(double1) FROM foo GROUP BY 1 ORDER BY 2", + // 27: group by string expr with expr agg + "SELECT CONCAT(string2, '-', long2), SUM(long1 * double4) FROM foo GROUP BY 1 ORDER BY 2" ); @Param({"5000000"}) @@ -211,7 +215,9 @@ public String getFormatString() "22", "23", "24", - "25" + "25", + "26", + "27" }) private String query; diff --git a/core/src/main/java/org/apache/druid/math/expr/BinaryMathOperatorExpr.java b/core/src/main/java/org/apache/druid/math/expr/BinaryMathOperatorExpr.java index d7daac98c25b..384249bbeaaa 100644 --- a/core/src/main/java/org/apache/druid/math/expr/BinaryMathOperatorExpr.java +++ b/core/src/main/java/org/apache/druid/math/expr/BinaryMathOperatorExpr.java @@ -24,6 +24,7 @@ import org.apache.druid.common.config.NullHandling; import org.apache.druid.math.expr.vector.ExprVectorProcessor; import org.apache.druid.math.expr.vector.VectorMathProcessors; +import org.apache.druid.math.expr.vector.VectorStringProcessors; import javax.annotation.Nullable; @@ -63,12 +64,19 @@ protected double evalDouble(double left, double right) @Override public boolean canVectorize(InputBindingInspector inspector) { - return inspector.areNumeric(left, right) && inspector.canVectorize(left, right); + return inspector.areScalar(left, right) && inspector.canVectorize(left, right); } @Override public ExprVectorProcessor buildVectorized(VectorInputBindingInspector inspector) { + ExprType type = ExprTypeConversion.operator( + left.getOutputType(inspector), + right.getOutputType(inspector) + ); + if (ExprType.STRING.equals(type)) { + return VectorStringProcessors.concat(inspector, left, right); + } return VectorMathProcessors.plus(inspector, left, right); } } diff --git a/core/src/main/java/org/apache/druid/math/expr/Expr.java b/core/src/main/java/org/apache/druid/math/expr/Expr.java index 3c0e4c741c70..4df2bf845d2a 100644 --- a/core/src/main/java/org/apache/druid/math/expr/Expr.java +++ b/core/src/main/java/org/apache/druid/math/expr/Expr.java @@ -213,6 +213,36 @@ default boolean areNumeric(Expr... args) return areNumeric(Arrays.asList(args)); } + /** + * Check if all provided {@link Expr} can infer the output type as {@link ExprType#isScalar()} (non-array) with a + * value of true. + * + * There must be at least one expression with a computable scalar output type for this method to return true. + */ + default boolean areScalar(List args) + { + boolean scalar = true; + for (Expr arg : args) { + ExprType argType = arg.getOutputType(this); + if (argType == null) { + continue; + } + scalar &= argType.isScalar(); + } + return scalar; + } + + /** + * Check if all provided {@link Expr} can infer the output type as {@link ExprType#isScalar()} (non-array) with a + * value of true. + * + * There must be at least one expression with a computable scalar output type for this method to return true. + */ + default boolean areScalar(Expr... args) + { + return areScalar(Arrays.asList(args)); + } + /** * Check if every provided {@link Expr} computes {@link Expr#canVectorize(InputBindingInspector)} to a value of true */ diff --git a/core/src/main/java/org/apache/druid/math/expr/ExprType.java b/core/src/main/java/org/apache/druid/math/expr/ExprType.java index 4c9949d824e3..eaacf5612af9 100644 --- a/core/src/main/java/org/apache/druid/math/expr/ExprType.java +++ b/core/src/main/java/org/apache/druid/math/expr/ExprType.java @@ -42,6 +42,11 @@ public boolean isNumeric() return isNumeric(this); } + public boolean isScalar() + { + return isScalar(this); + } + /** * The expression system does not distinguish between {@link ValueType#FLOAT} and {@link ValueType#DOUBLE}, and * cannot currently handle {@link ValueType#COMPLEX} inputs. This method will convert {@link ValueType#FLOAT} to @@ -131,6 +136,11 @@ public static boolean isNumeric(@Nullable ExprType type) return LONG.equals(type) || DOUBLE.equals(type); } + public static boolean isScalar(@Nullable ExprType exprType) + { + return !isArray(exprType); + } + public static boolean isArray(@Nullable ExprType type) { return LONG_ARRAY.equals(type) || DOUBLE_ARRAY.equals(type) || STRING_ARRAY.equals(type); diff --git a/core/src/main/java/org/apache/druid/math/expr/Function.java b/core/src/main/java/org/apache/druid/math/expr/Function.java index 5322cd64cd79..874d1edfed62 100644 --- a/core/src/main/java/org/apache/druid/math/expr/Function.java +++ b/core/src/main/java/org/apache/druid/math/expr/Function.java @@ -30,6 +30,7 @@ import org.apache.druid.math.expr.vector.ExprVectorProcessor; import org.apache.druid.math.expr.vector.VectorMathProcessors; import org.apache.druid.math.expr.vector.VectorProcessors; +import org.apache.druid.math.expr.vector.VectorStringProcessors; import org.joda.time.DateTime; import org.joda.time.DateTimeZone; import org.joda.time.format.DateTimeFormat; @@ -2191,6 +2192,21 @@ public ExprType getOutputType(Expr.InputBindingInspector inspector, List a { return ExprType.STRING; } + + @Override + public boolean canVectorize(Expr.InputBindingInspector inspector, List args) + { + return inspector.areScalar(args) && inspector.canVectorize(args); + } + + @Override + public ExprVectorProcessor asVectorProcessor( + Expr.VectorInputBindingInspector inspector, + List args + ) + { + return VectorStringProcessors.concat(inspector, args); + } } class StrlenFunc implements Function diff --git a/core/src/main/java/org/apache/druid/math/expr/vector/StringOutMultiStringInVectorProcessor.java b/core/src/main/java/org/apache/druid/math/expr/vector/StringOutMultiStringInVectorProcessor.java new file mode 100644 index 000000000000..8c6844551c23 --- /dev/null +++ b/core/src/main/java/org/apache/druid/math/expr/vector/StringOutMultiStringInVectorProcessor.java @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.math.expr.vector; + +import org.apache.druid.common.config.NullHandling; +import org.apache.druid.math.expr.Expr; +import org.apache.druid.math.expr.ExprType; + +/** + * many strings enter, one string leaves... + */ +public abstract class StringOutMultiStringInVectorProcessor implements ExprVectorProcessor +{ + final ExprVectorProcessor[] inputs; + final int maxVectorSize; + final String[] outValues; + final boolean sqlCompatible = NullHandling.sqlCompatible(); + + protected StringOutMultiStringInVectorProcessor( + ExprVectorProcessor[] inputs, + int maxVectorSize + ) + { + this.inputs = inputs; + this.maxVectorSize = maxVectorSize; + this.outValues = new String[maxVectorSize]; + } + + @Override + public ExprType getOutputType() + { + return ExprType.STRING; + } + + @Override + public ExprEvalVector evalVector(Expr.VectorInputBinding bindings) + { + final int currentSize = bindings.getCurrentVectorSize(); + final String[][] in = new String[inputs.length][]; + for (int i = 0; i < inputs.length; i++) { + in[i] = inputs[i].evalVector(bindings).values(); + } + + for (int i = 0; i < currentSize; i++) { + processIndex(in, i); + } + return new ExprEvalStringVector(outValues); + } + + abstract void processIndex(String[][] in, int i); +} diff --git a/core/src/main/java/org/apache/druid/math/expr/vector/StringOutStringsInFunctionVectorProcessor.java b/core/src/main/java/org/apache/druid/math/expr/vector/StringOutStringsInFunctionVectorProcessor.java index b744b03743a2..35f38bc09d6c 100644 --- a/core/src/main/java/org/apache/druid/math/expr/vector/StringOutStringsInFunctionVectorProcessor.java +++ b/core/src/main/java/org/apache/druid/math/expr/vector/StringOutStringsInFunctionVectorProcessor.java @@ -19,7 +19,6 @@ package org.apache.druid.math.expr.vector; -import org.apache.druid.common.config.NullHandling; import org.apache.druid.math.expr.ExprType; import javax.annotation.Nullable; @@ -27,8 +26,6 @@ public abstract class StringOutStringsInFunctionVectorProcessor extends BivariateFunctionVectorObjectProcessor { - final boolean sqlCompatible = NullHandling.sqlCompatible(); - protected StringOutStringsInFunctionVectorProcessor( ExprVectorProcessor left, ExprVectorProcessor right, @@ -44,7 +41,7 @@ protected StringOutStringsInFunctionVectorProcessor( } @Nullable - abstract String processValue(@Nullable String leftVal, @Nullable String rightVal); + protected abstract String processValue(@Nullable String leftVal, @Nullable String rightVal); @Override void processIndex(String[] strings, String[] strings2, int i) diff --git a/core/src/main/java/org/apache/druid/math/expr/vector/VectorStringProcessors.java b/core/src/main/java/org/apache/druid/math/expr/vector/VectorStringProcessors.java new file mode 100644 index 000000000000..18669b03f4e2 --- /dev/null +++ b/core/src/main/java/org/apache/druid/math/expr/vector/VectorStringProcessors.java @@ -0,0 +1,102 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.math.expr.vector; + +import org.apache.druid.common.config.NullHandling; +import org.apache.druid.math.expr.Expr; +import org.apache.druid.math.expr.ExprType; + +import javax.annotation.Nullable; +import java.util.List; + +public class VectorStringProcessors +{ + public static ExprVectorProcessor concat(Expr.VectorInputBindingInspector inspector, Expr left, Expr right) + { + final ExprVectorProcessor processor; + if (NullHandling.sqlCompatible()) { + processor = new StringOutStringsInFunctionVectorProcessor( + left.buildVectorized(inspector), + right.buildVectorized(inspector), + inspector.getMaxVectorSize() + ) + { + @Nullable + @Override + protected String processValue(@Nullable String leftVal, @Nullable String rightVal) + { + return leftVal + rightVal; + } + }; + } else { + processor = new StringOutStringsInFunctionVectorProcessor( + left.buildVectorized(inspector), + right.buildVectorized(inspector), + inspector.getMaxVectorSize() + ) + { + @Nullable + @Override + protected String processValue(@Nullable String leftVal, @Nullable String rightVal) + { + return NullHandling.nullToEmptyIfNeeded(leftVal) + NullHandling.nullToEmptyIfNeeded(rightVal); + } + }; + } + return processor; + } + + public static ExprVectorProcessor concat(Expr.VectorInputBindingInspector inspector, List inputs) + { + final ExprVectorProcessor[] inputProcessors = new ExprVectorProcessor[inputs.size()]; + for (int i = 0; i < inputs.size(); i++) { + inputProcessors[i] = CastToTypeVectorProcessor.cast(inputs.get(i).buildVectorized(inspector), ExprType.STRING); + } + final ExprVectorProcessor processor = new StringOutMultiStringInVectorProcessor( + inputProcessors, + inspector.getMaxVectorSize() + ) + { + @Override + void processIndex(String[][] in, int i) + { + // Result of concatenation is null if any of the Values is null. + // e.g. 'select CONCAT(null, "abc") as c;' will return null as per Standard SQL spec. + String first = NullHandling.nullToEmptyIfNeeded(in[0][i]); + if (first == null) { + outValues[i] = null; + return; + } + final StringBuilder builder = new StringBuilder(first); + for (int inputNumber = 1; inputNumber < in.length; inputNumber++) { + final String s = NullHandling.nullToEmptyIfNeeded(in[inputNumber][i]); + if (s == null) { + outValues[i] = null; + return; + } else { + builder.append(s); + } + } + outValues[i] = builder.toString(); + } + }; + return processor; + } +} diff --git a/core/src/test/java/org/apache/druid/math/expr/VectorExprSanityTest.java b/core/src/test/java/org/apache/druid/math/expr/VectorExprSanityTest.java index 7fd55048e236..6d769e067133 100644 --- a/core/src/test/java/org/apache/druid/math/expr/VectorExprSanityTest.java +++ b/core/src/test/java/org/apache/druid/math/expr/VectorExprSanityTest.java @@ -202,6 +202,15 @@ public void testCast() testFunctions(types, templates, args); } + @Test + public void testStringFns() + { + testExpression("s1 + s2", types); + testExpression("s1 + '-' + s2", types); + testExpression("concat(s1, s2)", types); + testExpression("concat(s1,'-',s2,'-',l1,'-',d1)", types); + } + static void testFunctions(Map types, String[] templates, String[] args) { for (String template : templates) { diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/DictionaryBuildingSingleValueStringGroupByVectorColumnSelector.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/DictionaryBuildingSingleValueStringGroupByVectorColumnSelector.java new file mode 100644 index 000000000000..d83166f328fc --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/DictionaryBuildingSingleValueStringGroupByVectorColumnSelector.java @@ -0,0 +1,107 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.groupby.epinephelinae.vector; + +import it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap; +import org.apache.datasketches.memory.Memory; +import org.apache.datasketches.memory.WritableMemory; +import org.apache.druid.common.config.NullHandling; +import org.apache.druid.query.groupby.ResultRow; +import org.apache.druid.segment.vector.VectorObjectSelector; + +import java.util.ArrayList; +import java.util.List; + +/** + * A {@link GroupByVectorColumnSelector} that builds an internal String<->Integer dictionary, used for grouping + * single-valued STRING columns which are not natively dictionary encoded, e.g. expression virtual columns. + * + * This is effectively the {@link VectorGroupByEngine} analog of + * {@link org.apache.druid.query.groupby.epinephelinae.column.DictionaryBuildingStringGroupByColumnSelectorStrategy} + */ +public class DictionaryBuildingSingleValueStringGroupByVectorColumnSelector implements GroupByVectorColumnSelector +{ + private static final int GROUP_BY_MISSING_VALUE = -1; + + private final VectorObjectSelector selector; + + private int nextId = 0; + private final List dictionary = new ArrayList<>(); + private final Object2IntOpenHashMap reverseDictionary = new Object2IntOpenHashMap<>(); + + { + reverseDictionary.defaultReturnValue(-1); + } + + public DictionaryBuildingSingleValueStringGroupByVectorColumnSelector(VectorObjectSelector selector) + { + this.selector = selector; + } + + + @Override + public int getGroupingKeySize() + { + return Integer.BYTES; + } + + @Override + public void writeKeys( + final WritableMemory keySpace, + final int keySize, + final int keyOffset, + final int startRow, + final int endRow + ) + { + final Object[] vector = selector.getObjectVector(); + + for (int i = startRow, j = keyOffset; i < endRow; i++, j += keySize) { + final String value = (String) vector[i]; + final int dictId = reverseDictionary.getInt(value); + if (dictId < 0) { + dictionary.add(value); + reverseDictionary.put(value, nextId); + keySpace.putInt(j, nextId); + nextId++; + } else { + keySpace.putInt(j, dictId); + } + } + } + + @Override + public void writeKeyToResultRow( + final Memory keyMemory, + final int keyOffset, + final ResultRow resultRow, + final int resultRowPosition + ) + { + final int id = keyMemory.getInt(keyOffset); + // GROUP_BY_MISSING_VALUE is used to indicate empty rows, which are omitted from the result map. + if (id != GROUP_BY_MISSING_VALUE) { + final String value = dictionary.get(id); + resultRow.set(resultRowPosition, value); + } else { + resultRow.set(resultRowPosition, NullHandling.defaultStringValue()); + } + } +} diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/GroupByVectorColumnProcessorFactory.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/GroupByVectorColumnProcessorFactory.java index 6f332def41b3..069751d1eace 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/GroupByVectorColumnProcessorFactory.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/GroupByVectorColumnProcessorFactory.java @@ -113,9 +113,7 @@ public GroupByVectorColumnSelector makeObjectProcessor( ) { if (ValueType.STRING.equals(capabilities.getType())) { - throw new UnsupportedOperationException( - "Vectorized groupBys on non-dictionary encoded string columns with object selectors are not yet implemented" - ); + return new DictionaryBuildingSingleValueStringGroupByVectorColumnSelector(selector); } return NilGroupByVectorColumnSelector.INSTANCE; } diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/VectorGroupByEngine.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/VectorGroupByEngine.java index 374ba664f8f1..848c18586e81 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/VectorGroupByEngine.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/VectorGroupByEngine.java @@ -46,7 +46,6 @@ import org.apache.druid.segment.StorageAdapter; import org.apache.druid.segment.VirtualColumns; import org.apache.druid.segment.column.ColumnCapabilities; -import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.filter.Filters; import org.apache.druid.segment.vector.VectorColumnSelectorFactory; import org.apache.druid.segment.vector.VectorCursor; @@ -108,12 +107,7 @@ public static boolean canVectorizeDimensions( if (columnCapabilities == null) { return true; } - // strings must be single valued, dictionary encoded, and have unique dictionary entries - if (ValueType.STRING.equals(columnCapabilities.getType())) { - return columnCapabilities.hasMultipleValues().isFalse() && - columnCapabilities.isDictionaryEncoded().isTrue() && - columnCapabilities.areDictionaryValuesUnique().isTrue(); - } + // must be single valued return columnCapabilities.hasMultipleValues().isFalse(); }); } diff --git a/processing/src/test/java/org/apache/druid/query/groupby/GroupByQueryRunnerTest.java b/processing/src/test/java/org/apache/druid/query/groupby/GroupByQueryRunnerTest.java index 70818e962904..eb1ea6274418 100644 --- a/processing/src/test/java/org/apache/druid/query/groupby/GroupByQueryRunnerTest.java +++ b/processing/src/test/java/org/apache/druid/query/groupby/GroupByQueryRunnerTest.java @@ -1016,10 +1016,6 @@ public void testGroupByWithStringPostAggregator() @Test public void testGroupByWithStringVirtualColumn() { - // Cannot vectorize due to virtual columns. - // all virtual columns are single input column, so it will work for group by v1, even with multi-value inputs - cannotVectorize(); - GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.DATA_SOURCE) .setQuerySegmentSpec(QueryRunnerTestHelper.FIRST_TO_THIRD) @@ -1080,6 +1076,69 @@ public void testGroupByWithStringVirtualColumn() TestHelper.assertExpectedObjects(expectedResults, results, "virtual-column"); } + @Test + public void testGroupByWithStringVirtualColumnVectorizable() + { + GroupByQuery query = makeQueryBuilder() + .setDataSource(QueryRunnerTestHelper.DATA_SOURCE) + .setQuerySegmentSpec(QueryRunnerTestHelper.FIRST_TO_THIRD) + .setVirtualColumns( + new ExpressionVirtualColumn( + "vc", + "cast(quality, 'STRING')", + ValueType.STRING, + TestExprMacroTable.INSTANCE + ) + ) + .setDimensions(new DefaultDimensionSpec("vc", "alias")) + .setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT, new LongSumAggregatorFactory("idx", "index")) + .setGranularity(QueryRunnerTestHelper.DAY_GRAN) + .build(); + + List expectedResults = Arrays.asList( + makeRow(query, "2011-04-01", "alias", "automotive", "rows", 1L, "idx", 135L), + makeRow(query, "2011-04-01", "alias", "business", "rows", 1L, "idx", 118L), + makeRow( + query, + "2011-04-01", + "alias", + "entertainment", + "rows", + 1L, + "idx", + 158L + ), + makeRow(query, "2011-04-01", "alias", "health", "rows", 1L, "idx", 120L), + makeRow(query, "2011-04-01", "alias", "mezzanine", "rows", 3L, "idx", 2870L), + makeRow(query, "2011-04-01", "alias", "news", "rows", 1L, "idx", 121L), + makeRow(query, "2011-04-01", "alias", "premium", "rows", 3L, "idx", 2900L), + makeRow(query, "2011-04-01", "alias", "technology", "rows", 1L, "idx", 78L), + makeRow(query, "2011-04-01", "alias", "travel", "rows", 1L, "idx", 119L), + + makeRow(query, "2011-04-02", "alias", "automotive", "rows", 1L, "idx", 147L), + makeRow(query, "2011-04-02", "alias", "business", "rows", 1L, "idx", 112L), + makeRow( + query, + "2011-04-02", + "alias", + "entertainment", + "rows", + 1L, + "idx", + 166L + ), + makeRow(query, "2011-04-02", "alias", "health", "rows", 1L, "idx", 113L), + makeRow(query, "2011-04-02", "alias", "mezzanine", "rows", 3L, "idx", 2447L), + makeRow(query, "2011-04-02", "alias", "news", "rows", 1L, "idx", 114L), + makeRow(query, "2011-04-02", "alias", "premium", "rows", 3L, "idx", 2505L), + makeRow(query, "2011-04-02", "alias", "technology", "rows", 1L, "idx", 97L), + makeRow(query, "2011-04-02", "alias", "travel", "rows", 1L, "idx", 126L) + ); + + Iterable results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query); + TestHelper.assertExpectedObjects(expectedResults, results, "virtual-column"); + } + @Test public void testGroupByWithDurationGranularity() { @@ -6336,9 +6395,6 @@ public void testSubqueryWithFirstLast() @Test public void testGroupByWithSubtotalsSpecOfDimensionsPrefixes() { - // Cannot vectorize due to usage of expressions. - cannotVectorize(); - if (!config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V2)) { return; } @@ -6452,9 +6508,6 @@ public void testGroupByWithSubtotalsSpecOfDimensionsPrefixes() @Test public void testGroupByWithSubtotalsSpecGeneral() { - // Cannot vectorize due to usage of expressions. - cannotVectorize(); - if (!config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V2)) { return; } diff --git a/processing/src/test/java/org/apache/druid/segment/virtual/VectorizedVirtualColumnTest.java b/processing/src/test/java/org/apache/druid/segment/virtual/VectorizedVirtualColumnTest.java index 3aa6eef06f82..850c4d52ad0e 100644 --- a/processing/src/test/java/org/apache/druid/segment/virtual/VectorizedVirtualColumnTest.java +++ b/processing/src/test/java/org/apache/druid/segment/virtual/VectorizedVirtualColumnTest.java @@ -155,8 +155,6 @@ public void testGroupByMultiValueStringUnknown() @Test public void testGroupBySingleValueStringNotDictionaryEncoded() { - // cannot currently group by string columns that are not dictionary encoded - cannotVectorize(); testGroupBy(new ColumnCapabilitiesImpl() .setType(ValueType.STRING) .setDictionaryEncoded(false) diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java index 35641cf97120..3ce598f00489 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java @@ -8936,9 +8936,6 @@ public void testRegexpExtractFilterViaNotNullCheck() throws Exception @Test public void testRegexpLikeFilter() throws Exception { - // Cannot vectorize due to usage of regex filter. - cannotVectorize(); - testQuery( "SELECT COUNT(*)\n" + "FROM foo\n" @@ -14424,6 +14421,75 @@ public void testConcat() throws Exception ); } + @Test + public void testConcatGroup() throws Exception + { + testQuery( + "SELECT CONCAT(dim1, '-', dim1, '_', dim1) as dimX FROM foo GROUP BY 1", + ImmutableList.of( + new GroupByQuery.Builder() + .setDataSource(CalciteTests.DATASOURCE1) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setVirtualColumns(expressionVirtualColumn( + "v0", + "concat(\"dim1\",'-',\"dim1\",'_',\"dim1\")", + ValueType.STRING + )) + .setDimensions(dimensions(new DefaultDimensionSpec("v0", "d0"))) + .setGranularity(Granularities.ALL) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{"-_"}, + new Object[]{"1-1_1"}, + new Object[]{"10.1-10.1_10.1"}, + new Object[]{"2-2_2"}, + new Object[]{"abc-abc_abc"}, + new Object[]{"def-def_def"} + ) + ); + + final List secondResults; + if (useDefault) { + secondResults = ImmutableList.of( + new Object[]{"10.1x2.0999910.1"}, + new Object[]{"1ax4.099991"}, + new Object[]{"2x3.099992"}, + new Object[]{"abcx6.09999abc"}, + new Object[]{"ax1.09999"}, + new Object[]{"defabcx5.09999def"} + ); + } else { + secondResults = ImmutableList.of( + new Object[]{null}, + new Object[]{"1ax4.099991"}, + new Object[]{"2x3.099992"}, + new Object[]{"ax1.09999"}, + new Object[]{"defabcx5.09999def"} + ); + } + testQuery( + "SELECT CONCAT(dim1, CONCAT(dim2,'x'), m2, 9999, dim1) as dimX FROM foo GROUP BY 1", + ImmutableList.of( + new GroupByQuery.Builder() + .setDataSource(CalciteTests.DATASOURCE1) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setVirtualColumns(expressionVirtualColumn( + "v0", + "concat(\"dim1\",concat(\"dim2\",'x'),\"m2\",9999,\"dim1\")", + ValueType.STRING + )) + .setDimensions(dimensions(new DefaultDimensionSpec("v0", "d0"))) + .setGranularity(Granularities.ALL) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + + ), + secondResults + ); + } + @Test public void testTextcat() throws Exception { diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/SqlVectorizedExpressionSanityTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/SqlVectorizedExpressionSanityTest.java index b97b7f1a489b..7fb7dde42fac 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/SqlVectorizedExpressionSanityTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/SqlVectorizedExpressionSanityTest.java @@ -83,7 +83,10 @@ public class SqlVectorizedExpressionSanityTest extends InitializedNullHandlingTe "SELECT TIME_FLOOR(__time, 'PT1H'), SUM(long1 * long4) FROM foo GROUP BY 1 ORDER BY 2", "SELECT TIME_FLOOR(TIMESTAMPADD(DAY, -1, __time), 'PT1H'), SUM(long1 * long4) FROM foo GROUP BY 1 ORDER BY 1", "SELECT (long1 * long2), SUM(double1) FROM foo GROUP BY 1 ORDER BY 2", - "SELECT string2, SUM(long1 * long4) FROM foo GROUP BY 1 ORDER BY 2" + "SELECT string2, SUM(long1 * long4) FROM foo GROUP BY 1 ORDER BY 2", + "SELECT string1 + string2, COUNT(*) FROM foo GROUP BY 1 ORDER BY 2", + "SELECT CONCAT(string1, '-', string2), string3, COUNT(*) FROM foo GROUP BY 1,2 ORDER BY 3", + "SELECT CONCAT(string1, '-', string2, '-', long1, '-', double1, '-', float1) FROM foo GROUP BY 1" ); private static final int ROWS_PER_SEGMENT = 100_000; From e68a6ba0df7f8b37029fee381470d4b1b335d31f Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Thu, 18 Mar 2021 14:43:48 -0700 Subject: [PATCH 2/3] fix test --- .../apache/druid/query/filter/sql/BloomDimFilterSqlTest.java | 3 --- 1 file changed, 3 deletions(-) diff --git a/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/filter/sql/BloomDimFilterSqlTest.java b/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/filter/sql/BloomDimFilterSqlTest.java index f29fbc2b07c5..2377dc3e299b 100644 --- a/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/filter/sql/BloomDimFilterSqlTest.java +++ b/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/filter/sql/BloomDimFilterSqlTest.java @@ -177,9 +177,6 @@ public void testBloomFilterExprFilter() throws Exception @Test public void testBloomFilterVirtualColumn() throws Exception { - // Cannot vectorize due to expression virtual columns. - cannotVectorize(); - BloomKFilter filter = new BloomKFilter(1500); filter.addString("def-foo"); byte[] bytes = BloomFilterSerializersModule.bloomKFilterToBytes(filter); From a8a40ac4ce27363883d40e54caeb2ae56c0e2c91 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Thu, 8 Apr 2021 04:21:18 -0700 Subject: [PATCH 3/3] comments, javadoc --- .../expr/vector/BivariateFunctionVectorObjectProcessor.java | 5 ++++- .../druid/math/expr/vector/VectorStringProcessors.java | 1 + 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/core/src/main/java/org/apache/druid/math/expr/vector/BivariateFunctionVectorObjectProcessor.java b/core/src/main/java/org/apache/druid/math/expr/vector/BivariateFunctionVectorObjectProcessor.java index ff92b9f6a896..2f1279a6d2d5 100644 --- a/core/src/main/java/org/apache/druid/math/expr/vector/BivariateFunctionVectorObjectProcessor.java +++ b/core/src/main/java/org/apache/druid/math/expr/vector/BivariateFunctionVectorObjectProcessor.java @@ -25,7 +25,10 @@ import java.lang.reflect.Array; /** - * Base {@link ExprVectorProcessor} for expressions and functions with 2 'object' typed inputs (strings, arrays) + * Base {@link ExprVectorProcessor} for expressions and functions with 2 'object' typed inputs (strings, arrays). + * + * In SQL compatible null handling mode, for a row with either left or right input as a null value, it will be handled + * by {@link #processNull(int)} instead of {@link #processIndex(Object, Object, int)}. */ public abstract class BivariateFunctionVectorObjectProcessor implements ExprVectorProcessor diff --git a/core/src/main/java/org/apache/druid/math/expr/vector/VectorStringProcessors.java b/core/src/main/java/org/apache/druid/math/expr/vector/VectorStringProcessors.java index 18669b03f4e2..bbfbd68634ab 100644 --- a/core/src/main/java/org/apache/druid/math/expr/vector/VectorStringProcessors.java +++ b/core/src/main/java/org/apache/druid/math/expr/vector/VectorStringProcessors.java @@ -42,6 +42,7 @@ public static ExprVectorProcessor concat(Expr.VectorInputBindingInspector @Override protected String processValue(@Nullable String leftVal, @Nullable String rightVal) { + // in sql compatible mode, nulls are handled by super class and never make it here... return leftVal + rightVal; } };