From 48fc029c36bebf11ebc0402dd6c82faa6e849c73 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Thu, 27 Jun 2024 04:04:53 -0700 Subject: [PATCH 1/3] fix vector grouping expression deferred evaluation to only consider dictionary encoded strings as fixed width --- .../query/groupby/DeferExpressionDimensions.java | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/query/groupby/DeferExpressionDimensions.java b/processing/src/main/java/org/apache/druid/query/groupby/DeferExpressionDimensions.java index 3c6621d90387..093be582d2a1 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/DeferExpressionDimensions.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/DeferExpressionDimensions.java @@ -71,7 +71,7 @@ public boolean useDeferredGroupBySelector( return false; } - if (!capabilities.isNumeric() && !capabilities.isDictionaryEncoded().isTrue()) { + if (!capabilities.isNumeric() && !DeferExpressionDimensions.isDictionaryEncodedString(capabilities)) { // Not fixed-width. return false; } @@ -106,7 +106,7 @@ public boolean useDeferredGroupBySelector( allNumericInputs = allNumericInputs && capabilities.isNumeric(); - if (!capabilities.isNumeric() && !capabilities.isDictionaryEncoded().isTrue()) { + if (!capabilities.isNumeric() && !DeferExpressionDimensions.isDictionaryEncodedString(capabilities)) { // Not fixed-width. return false; } @@ -162,6 +162,16 @@ public String toString() return jsonName; } + + /** + * {@link VectorColumnSelectorFactory} currently can only make dictionary encoded selectors for string types, so + * we can only consider them as fixed width + */ + private static boolean isDictionaryEncodedString(ColumnCapabilities capabilities) + { + return capabilities.isDictionaryEncoded().isTrue() && capabilities.is(ValueType.STRING); + } + /** * Whether the given expression can be deferred innately by the selector created by * {@link ExpressionVirtualColumn#makeSingleValueVectorDimensionSelector(DimensionSpec, VectorColumnSelectorFactory)}. From eb044c5c374a58af5c709357508581f277a201fd Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Thu, 27 Jun 2024 04:45:25 -0700 Subject: [PATCH 2/3] tests --- .../groupby/DeferExpressionDimensions.java | 19 +- .../virtual/ExpressionPlannerTest.java | 481 ++++++++++++++++++ 2 files changed, 495 insertions(+), 5 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/query/groupby/DeferExpressionDimensions.java b/processing/src/main/java/org/apache/druid/query/groupby/DeferExpressionDimensions.java index 093be582d2a1..86df6dfd5a9f 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/DeferExpressionDimensions.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/DeferExpressionDimensions.java @@ -71,7 +71,7 @@ public boolean useDeferredGroupBySelector( return false; } - if (!capabilities.isNumeric() && !DeferExpressionDimensions.isDictionaryEncodedString(capabilities)) { + if (!capabilities.isNumeric() && !DeferExpressionDimensions.isDictionaryEncodedScalarString(capabilities)) { // Not fixed-width. return false; } @@ -106,7 +106,7 @@ public boolean useDeferredGroupBySelector( allNumericInputs = allNumericInputs && capabilities.isNumeric(); - if (!capabilities.isNumeric() && !DeferExpressionDimensions.isDictionaryEncodedString(capabilities)) { + if (!capabilities.isNumeric() && !DeferExpressionDimensions.isDictionaryEncodedScalarString(capabilities)) { // Not fixed-width. return false; } @@ -165,11 +165,20 @@ public String toString() /** * {@link VectorColumnSelectorFactory} currently can only make dictionary encoded selectors for string types, so - * we can only consider them as fixed width + * we can only consider them as fixed width. Additionally, to err on the side of safety, multi-value string columns + * are also not considered fixed width because expressions process multi-value dimensions as single rows, so we would + * need all dictionary ids to be present in the combined key. + * + * At the time of this javadoc, vector group by does not support multi-value dimensions anyway, so this isn't really + * a problem, but if it did, we could consider allowing them if we ensure that all multi-value inputs are used as + * scalars and so the expression can be applied separately to each individual dictionary id (e.g. the equivalent of + * {@link ExpressionPlan.Trait#SINGLE_INPUT_MAPPABLE} but for all multi-value string inputs of the expression). */ - private static boolean isDictionaryEncodedString(ColumnCapabilities capabilities) + private static boolean isDictionaryEncodedScalarString(ColumnCapabilities capabilities) { - return capabilities.isDictionaryEncoded().isTrue() && capabilities.is(ValueType.STRING); + return capabilities.isDictionaryEncoded().isTrue() && + capabilities.is(ValueType.STRING) && + capabilities.hasMultipleValues().isFalse(); } /** diff --git a/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionPlannerTest.java b/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionPlannerTest.java index 9b4d1b84af20..fcae823b6262 100644 --- a/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionPlannerTest.java +++ b/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionPlannerTest.java @@ -29,6 +29,7 @@ import org.apache.druid.math.expr.ExpressionType; import org.apache.druid.math.expr.Parser; import org.apache.druid.query.expression.TestExprMacroTable; +import org.apache.druid.query.groupby.DeferExpressionDimensions; import org.apache.druid.segment.ColumnInspector; import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ColumnCapabilitiesImpl; @@ -234,6 +235,28 @@ public void testUnknown() Assert.assertNull(thePlan.getOutputType()); Assert.assertNull(thePlan.inferColumnCapabilities(null)); // no we cannot + + Assert.assertFalse( + DeferExpressionDimensions.SINGLE_STRING.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + Assert.assertFalse( + DeferExpressionDimensions.FIXED_WIDTH_NON_NUMERIC.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + Assert.assertFalse( + DeferExpressionDimensions.FIXED_WIDTH.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); } @Test @@ -269,6 +292,28 @@ public void testScalarStringNondictionaryEncoded() Assert.assertFalse(inferred.hasMultipleValues().isMaybeTrue()); Assert.assertFalse(inferred.hasBitmapIndexes()); Assert.assertFalse(inferred.hasSpatialIndexes()); + + Assert.assertFalse( + DeferExpressionDimensions.SINGLE_STRING.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + Assert.assertFalse( + DeferExpressionDimensions.FIXED_WIDTH_NON_NUMERIC.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + Assert.assertFalse( + DeferExpressionDimensions.FIXED_WIDTH.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); } @Test @@ -348,6 +393,27 @@ public void testScalarNumeric() Assert.assertFalse(inferred.hasMultipleValues().isMaybeTrue()); Assert.assertFalse(inferred.hasBitmapIndexes()); Assert.assertFalse(inferred.hasSpatialIndexes()); + Assert.assertFalse( + DeferExpressionDimensions.SINGLE_STRING.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + Assert.assertFalse( + DeferExpressionDimensions.FIXED_WIDTH_NON_NUMERIC.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + Assert.assertTrue( + DeferExpressionDimensions.FIXED_WIDTH.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); } @Test @@ -387,6 +453,30 @@ public void testScalarStringDictionaryEncoded() Assert.assertTrue(inferred.hasBitmapIndexes()); Assert.assertFalse(inferred.hasSpatialIndexes()); + Assert.assertFalse( + DeferExpressionDimensions.SINGLE_STRING.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + // innately deferrable + Assert.assertFalse( + DeferExpressionDimensions.FIXED_WIDTH_NON_NUMERIC.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + Assert.assertFalse( + DeferExpressionDimensions.FIXED_WIDTH.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + + // multiple input columns thePlan = plan("concat(scalar_dictionary_string, scalar_dictionary_string_nonunique)"); Assert.assertTrue( @@ -430,6 +520,29 @@ public void testScalarStringDictionaryEncoded() Assert.assertFalse(inferred.hasBitmapIndexes()); Assert.assertFalse(inferred.hasSpatialIndexes()); + Assert.assertFalse( + DeferExpressionDimensions.SINGLE_STRING.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + Assert.assertTrue( + DeferExpressionDimensions.FIXED_WIDTH_NON_NUMERIC.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + Assert.assertTrue( + DeferExpressionDimensions.FIXED_WIDTH.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + + // array output of dictionary encoded string are not considered single scalar/mappable, nor vectorizable thePlan = plan("array(scalar_dictionary_string)"); Assert.assertTrue( @@ -448,6 +561,27 @@ public void testScalarStringDictionaryEncoded() ExpressionPlan.Trait.VECTORIZABLE ) ); + Assert.assertFalse( + DeferExpressionDimensions.SINGLE_STRING.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + Assert.assertTrue( + DeferExpressionDimensions.FIXED_WIDTH_NON_NUMERIC.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + Assert.assertTrue( + DeferExpressionDimensions.FIXED_WIDTH.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); } @Test @@ -481,6 +615,29 @@ public void testMultiValueStringDictionaryEncoded() Assert.assertTrue(inferred.hasBitmapIndexes()); Assert.assertFalse(inferred.hasSpatialIndexes()); + Assert.assertFalse( + DeferExpressionDimensions.SINGLE_STRING.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + Assert.assertFalse( + DeferExpressionDimensions.FIXED_WIDTH_NON_NUMERIC.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + Assert.assertFalse( + DeferExpressionDimensions.FIXED_WIDTH.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + + thePlan = plan("concat(scalar_string, multi_dictionary_string_nonunique)"); Assert.assertTrue( thePlan.is( @@ -510,6 +667,28 @@ public void testMultiValueStringDictionaryEncoded() Assert.assertEquals(ValueType.STRING, inferred.getType()); Assert.assertTrue(inferred.hasMultipleValues().isTrue()); + Assert.assertFalse( + DeferExpressionDimensions.SINGLE_STRING.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + Assert.assertFalse( + DeferExpressionDimensions.FIXED_WIDTH_NON_NUMERIC.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + Assert.assertFalse( + DeferExpressionDimensions.FIXED_WIDTH.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + thePlan = plan("concat(multi_dictionary_string, multi_dictionary_string_nonunique)"); Assert.assertTrue( thePlan.is( @@ -541,6 +720,28 @@ public void testMultiValueStringDictionaryEncoded() Assert.assertEquals(ValueType.STRING, inferred.getType()); Assert.assertTrue(inferred.hasMultipleValues().isTrue()); + Assert.assertFalse( + DeferExpressionDimensions.SINGLE_STRING.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + Assert.assertFalse( + DeferExpressionDimensions.FIXED_WIDTH_NON_NUMERIC.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + Assert.assertFalse( + DeferExpressionDimensions.FIXED_WIDTH.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + thePlan = plan("array_append(multi_dictionary_string, 'foo')"); Assert.assertTrue( thePlan.is( @@ -556,6 +757,27 @@ public void testMultiValueStringDictionaryEncoded() ExpressionPlan.Trait.VECTORIZABLE ) ); + Assert.assertFalse( + DeferExpressionDimensions.SINGLE_STRING.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + Assert.assertFalse( + DeferExpressionDimensions.FIXED_WIDTH_NON_NUMERIC.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + Assert.assertFalse( + DeferExpressionDimensions.FIXED_WIDTH.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); } @Test @@ -582,6 +804,27 @@ public void testMultiValueStringDictionaryEncodedIllegalAccumulator() ) ); Assert.assertEquals(ExpressionType.STRING, thePlan.getOutputType()); + Assert.assertFalse( + DeferExpressionDimensions.SINGLE_STRING.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + Assert.assertFalse( + DeferExpressionDimensions.FIXED_WIDTH_NON_NUMERIC.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + Assert.assertFalse( + DeferExpressionDimensions.FIXED_WIDTH.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); thePlan = plan("concat(multi_dictionary_string, multi_dictionary_string_nonunique)"); Assert.assertTrue( @@ -631,6 +874,28 @@ public void testIncompleteString() // incomplete and unknown skip output type since we don't reliably know Assert.assertNull(thePlan.getOutputType()); Assert.assertNull(thePlan.inferColumnCapabilities(null)); + + Assert.assertFalse( + DeferExpressionDimensions.SINGLE_STRING.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + Assert.assertFalse( + DeferExpressionDimensions.FIXED_WIDTH_NON_NUMERIC.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + Assert.assertFalse( + DeferExpressionDimensions.FIXED_WIDTH.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); } @Test @@ -667,14 +932,78 @@ public void testArrayOutput() Assert.assertEquals("array_append(\"scalar_string\", 'x')", thePlan.getAppliedFoldExpression("__acc").stringify()); Assert.assertEquals(ExpressionType.STRING_ARRAY, thePlan.getOutputType()); + Assert.assertFalse( + DeferExpressionDimensions.SINGLE_STRING.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + Assert.assertFalse( + DeferExpressionDimensions.FIXED_WIDTH_NON_NUMERIC.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + Assert.assertFalse( + DeferExpressionDimensions.FIXED_WIDTH.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + // multi-valued are cool too thePlan = plan("array_append(multi_dictionary_string, 'x')"); assertArrayInAndOut(thePlan); + Assert.assertFalse( + DeferExpressionDimensions.SINGLE_STRING.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + Assert.assertFalse( + DeferExpressionDimensions.FIXED_WIDTH_NON_NUMERIC.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + Assert.assertFalse( + DeferExpressionDimensions.FIXED_WIDTH.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); // what about incomplete inputs with arrays? they are not reported as incomplete because they are treated as arrays thePlan = plan("array_append(string_unknown, 'x')"); assertArrayInAndOut(thePlan); Assert.assertEquals(ExpressionType.STRING_ARRAY, thePlan.getOutputType()); + Assert.assertFalse( + DeferExpressionDimensions.SINGLE_STRING.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + Assert.assertFalse( + DeferExpressionDimensions.FIXED_WIDTH_NON_NUMERIC.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + Assert.assertFalse( + DeferExpressionDimensions.FIXED_WIDTH.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); // what about if it is the scalar argument? there it is thePlan = plan("array_append(multi_dictionary_string, string_unknown)"); @@ -696,13 +1025,76 @@ public void testArrayOutput() ); // incomplete and unknown skip output type since we don't reliably know Assert.assertNull(thePlan.getOutputType()); + Assert.assertFalse( + DeferExpressionDimensions.SINGLE_STRING.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + Assert.assertFalse( + DeferExpressionDimensions.FIXED_WIDTH_NON_NUMERIC.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + Assert.assertFalse( + DeferExpressionDimensions.FIXED_WIDTH.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); // array types are cool too thePlan = plan("array_append(string_array_1, 'x')"); assertArrayInAndOut(thePlan); + Assert.assertFalse( + DeferExpressionDimensions.SINGLE_STRING.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + Assert.assertFalse( + DeferExpressionDimensions.FIXED_WIDTH_NON_NUMERIC.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + Assert.assertFalse( + DeferExpressionDimensions.FIXED_WIDTH.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); thePlan = plan("array_append(string_array_1, 'x')"); assertArrayInAndOut(thePlan); + Assert.assertFalse( + DeferExpressionDimensions.SINGLE_STRING.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + Assert.assertFalse( + DeferExpressionDimensions.FIXED_WIDTH_NON_NUMERIC.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + Assert.assertFalse( + DeferExpressionDimensions.FIXED_WIDTH.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); } @@ -732,6 +1124,28 @@ public void testScalarOutputMultiValueInput() ); Assert.assertEquals(ExpressionType.STRING, thePlan.getOutputType()); + Assert.assertFalse( + DeferExpressionDimensions.SINGLE_STRING.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + Assert.assertFalse( + DeferExpressionDimensions.FIXED_WIDTH_NON_NUMERIC.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + Assert.assertFalse( + DeferExpressionDimensions.FIXED_WIDTH.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + // what about a multi-valued input thePlan = plan("array_to_string(array_append(scalar_string, multi_dictionary_string), ',')"); Assert.assertTrue( @@ -761,6 +1175,28 @@ public void testScalarOutputMultiValueInput() ); // why is this null Assert.assertEquals(ExpressionType.STRING, thePlan.getOutputType()); + + Assert.assertFalse( + DeferExpressionDimensions.SINGLE_STRING.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + Assert.assertFalse( + DeferExpressionDimensions.FIXED_WIDTH_NON_NUMERIC.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + Assert.assertFalse( + DeferExpressionDimensions.FIXED_WIDTH.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); } @Test @@ -864,6 +1300,29 @@ public void testNestedColumnExpression() ColumnType.NESTED_DATA.getComplexTypeName(), inferred.getComplexTypeName() ); + + Assert.assertFalse( + DeferExpressionDimensions.SINGLE_STRING.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + // all numeric inputs so these are true + Assert.assertTrue( + DeferExpressionDimensions.FIXED_WIDTH_NON_NUMERIC.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + Assert.assertTrue( + DeferExpressionDimensions.FIXED_WIDTH.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); } @Test @@ -895,6 +1354,28 @@ public void testDictionaryComplexStringOutput() inferred.getType() ); Assert.assertFalse(inferred.isDictionaryEncoded().isMaybeTrue()); + + Assert.assertFalse( + DeferExpressionDimensions.SINGLE_STRING.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + Assert.assertFalse( + DeferExpressionDimensions.FIXED_WIDTH_NON_NUMERIC.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + Assert.assertFalse( + DeferExpressionDimensions.FIXED_WIDTH.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); } private static ExpressionPlan plan(String expression) From 783eb64e2ef21e1e8e0897c4dcd8d14269b419ee Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Thu, 27 Jun 2024 12:45:21 -0700 Subject: [PATCH 3/3] fix strange --- .../apache/druid/query/groupby/DeferExpressionDimensions.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/query/groupby/DeferExpressionDimensions.java b/processing/src/main/java/org/apache/druid/query/groupby/DeferExpressionDimensions.java index 86df6dfd5a9f..9f6a88f08b2d 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/DeferExpressionDimensions.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/DeferExpressionDimensions.java @@ -71,7 +71,7 @@ public boolean useDeferredGroupBySelector( return false; } - if (!capabilities.isNumeric() && !DeferExpressionDimensions.isDictionaryEncodedScalarString(capabilities)) { + if (!capabilities.isNumeric() && !isDictionaryEncodedScalarString(capabilities)) { // Not fixed-width. return false; } @@ -106,7 +106,7 @@ public boolean useDeferredGroupBySelector( allNumericInputs = allNumericInputs && capabilities.isNumeric(); - if (!capabilities.isNumeric() && !DeferExpressionDimensions.isDictionaryEncodedScalarString(capabilities)) { + if (!capabilities.isNumeric() && !isDictionaryEncodedScalarString(capabilities)) { // Not fixed-width. return false; }