From ccb340312e955363ecf7ff4275a73a98ede1a30a Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Tue, 8 Feb 2022 23:03:26 -0800 Subject: [PATCH 1/3] fix bugs with multi-value string array expression handling --- .../java/org/apache/druid/math/expr/Expr.java | 5 + .../druid/math/expr/IdentifierExpr.java | 6 + .../apache/druid/math/expr/ParserTest.java | 6 + .../generator/GeneratorBasicSchemas.java | 7 + .../druid/segment/virtual/ExpressionPlan.java | 4 + .../segment/virtual/ExpressionPlanner.java | 11 +- .../segment/virtual/ExpressionSelectors.java | 16 +- ...tCachingExpressionColumnValueSelector.java | 2 +- .../segment/generator/SegmentGenerator.java | 69 ++++ .../virtual/ExpressionSelectorsTest.java | 330 ++++++++++++++++++ .../sql/calcite/CalciteArraysQueryTest.java | 38 +- .../CalciteMultiValueStringQueryTest.java | 52 ++- 12 files changed, 506 insertions(+), 40 deletions(-) diff --git a/core/src/main/java/org/apache/druid/math/expr/Expr.java b/core/src/main/java/org/apache/druid/math/expr/Expr.java index 0c5ffae6e293..a2cc91a57ad4 100644 --- a/core/src/main/java/org/apache/druid/math/expr/Expr.java +++ b/core/src/main/java/org/apache/druid/math/expr/Expr.java @@ -63,6 +63,11 @@ default boolean isNullLiteral() return false; } + default boolean isIdentifier() + { + return false; + } + /** * Returns the value of expr if expr is a literal, or throws an exception otherwise. * diff --git a/core/src/main/java/org/apache/druid/math/expr/IdentifierExpr.java b/core/src/main/java/org/apache/druid/math/expr/IdentifierExpr.java index 49e9c96671af..a41d7a995754 100644 --- a/core/src/main/java/org/apache/druid/math/expr/IdentifierExpr.java +++ b/core/src/main/java/org/apache/druid/math/expr/IdentifierExpr.java @@ -86,6 +86,12 @@ public String getBinding() return binding; } + @Override + public boolean isIdentifier() + { + return true; + } + @Nullable @Override public String getIdentifierIfIdentifier() diff --git a/core/src/test/java/org/apache/druid/math/expr/ParserTest.java b/core/src/test/java/org/apache/druid/math/expr/ParserTest.java index 019595495253..51ae5da75323 100644 --- a/core/src/test/java/org/apache/druid/math/expr/ParserTest.java +++ b/core/src/test/java/org/apache/druid/math/expr/ParserTest.java @@ -726,6 +726,7 @@ private void validateLiteral(String expr, ExpressionType type, Object expected, Expr parsedFlat = Parser.parse(expr, ExprMacroTable.nil(), true); Assert.assertTrue(parsed.isLiteral()); Assert.assertTrue(parsedFlat.isLiteral()); + Assert.assertFalse(parsed.isIdentifier()); Assert.assertEquals(type, parsed.getOutputType(emptyBinding)); Assert.assertEquals(type, parsedFlat.getOutputType(emptyBinding)); Assert.assertEquals(expected, parsed.getLiteralValue()); @@ -770,6 +771,11 @@ private void validateParser( ) { final Expr parsed = Parser.parse(expression, ExprMacroTable.nil()); + if (parsed instanceof IdentifierExpr) { + Assert.assertTrue(parsed.isIdentifier()); + } else { + Assert.assertFalse(parsed.isIdentifier()); + } final Expr.BindingAnalysis deets = parsed.analyzeInputs(); Assert.assertEquals(expression, expected, parsed.toString()); Assert.assertEquals(expression, identifiers, deets.getRequiredBindingsList()); diff --git a/processing/src/main/java/org/apache/druid/segment/generator/GeneratorBasicSchemas.java b/processing/src/main/java/org/apache/druid/segment/generator/GeneratorBasicSchemas.java index 71252506c876..1338cf5734cb 100644 --- a/processing/src/main/java/org/apache/druid/segment/generator/GeneratorBasicSchemas.java +++ b/processing/src/main/java/org/apache/druid/segment/generator/GeneratorBasicSchemas.java @@ -321,6 +321,13 @@ public class GeneratorBasicSchemas GeneratorColumnSchema.makeLazyDiscreteUniform("string4", ValueType.STRING, false, 1, null, 1, 10_000), GeneratorColumnSchema.makeLazyDiscreteUniform("string5", ValueType.STRING, false, 1, 0.3, 1, 1_000_000), + // multi string dims + GeneratorColumnSchema.makeSequential("multi-string1", ValueType.STRING, false, 8, null, 0, 10000), + GeneratorColumnSchema.makeLazyZipf("multi-string2", ValueType.STRING, false, 8, null, 1, 100, 1.5), + GeneratorColumnSchema.makeLazyZipf("multi-string3", ValueType.STRING, false, 16, 0.1, 1, 1_000_000, 2.0), + GeneratorColumnSchema.makeLazyDiscreteUniform("multi-string4", ValueType.STRING, false, 4, null, 1, 10_000), + GeneratorColumnSchema.makeLazyDiscreteUniform("multi-string5", ValueType.STRING, false, 8, 0.3, 1, 1_000_000), + // numeric dims GeneratorColumnSchema.makeSequential("long1", ValueType.LONG, false, 1, null, 0, 10000), GeneratorColumnSchema.makeLazyZipf("long2", ValueType.LONG, false, 1, null, 1, 101, 1.5), diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionPlan.java b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionPlan.java index 52ac996d71e2..b0b57e09178c 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionPlan.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionPlan.java @@ -45,6 +45,10 @@ public enum Trait * expression has no inputs and can be optimized into a constant selector */ CONSTANT, + /** + * expression is a simple identifier expression, do not transform + */ + IDENTIFIER, /** * expression has a single, single valued input, and is dictionary encoded if the value is a string, and does * not produce non-scalar output diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionPlanner.java b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionPlanner.java index ccf642d45f73..bd77c6ca32e8 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionPlanner.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionPlanner.java @@ -69,6 +69,8 @@ public static ExpressionPlan plan(ColumnInspector inspector, Expr expression) // check and set traits which allow optimized selectors to be created if (columns.isEmpty()) { traits.add(ExpressionPlan.Trait.CONSTANT); + } else if (expression.isIdentifier()) { + traits.add(ExpressionPlan.Trait.IDENTIFIER); } else if (columns.size() == 1) { final String column = Iterables.getOnlyElement(columns); final ColumnCapabilities capabilities = inspector.getColumnCapabilities(column); @@ -105,7 +107,14 @@ public static ExpressionPlan plan(ColumnInspector inspector, Expr expression) // if we didn't eliminate this expression as a single input scalar or mappable expression, it might need // automatic transformation to map across multi-valued inputs (or row by row detection in the worst case) - if (ExpressionPlan.none(traits, ExpressionPlan.Trait.SINGLE_INPUT_SCALAR)) { + if ( + ExpressionPlan.none( + traits, + ExpressionPlan.Trait.SINGLE_INPUT_SCALAR, + ExpressionPlan.Trait.CONSTANT, + ExpressionPlan.Trait.IDENTIFIER + ) + ) { final Set definitelyMultiValued = new HashSet<>(); final Set definitelyArray = new HashSet<>(); for (String column : analysis.getRequiredBindings()) { diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java index ec5f8bff7c4c..3f6e2820dfd0 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java @@ -296,7 +296,8 @@ public static Expr.ObjectBinding createBindings( } else if (capabilities.is(ValueType.STRING)) { supplier = supplierFromDimensionSelector( columnSelectorFactory.makeDimensionSelector(new DefaultDimensionSpec(columnName, columnName)), - multiVal + multiVal, + homogenizeNullMultiValueStringArrays ); } else { // complex type just pass straight through @@ -349,7 +350,8 @@ public ExpressionType getType(String name) * * @see org.apache.druid.segment.BaseNullableColumnValueSelector#isNull() for why this only works in the numeric case */ - private static Supplier makeNullableNumericSupplier( + @VisibleForTesting + public static Supplier makeNullableNumericSupplier( ColumnValueSelector selector, Supplier supplier ) @@ -371,7 +373,7 @@ private static Supplier makeNullableNumericSupplier( * arrays if specified. */ @VisibleForTesting - static Supplier supplierFromDimensionSelector(final DimensionSelector selector, boolean coerceArray) + static Supplier supplierFromDimensionSelector(final DimensionSelector selector, boolean coerceArray, boolean homogenize) { Preconditions.checkNotNull(selector, "selector"); return () -> { @@ -381,8 +383,12 @@ static Supplier supplierFromDimensionSelector(final DimensionSelector se return selector.lookupName(row.get(0)); } else { // column selector factories hate you and use [] and [null] interchangeably for nullish data - if (row.size() == 0) { - return new Object[]{null}; + if (selector.getObject() == null) { + if (homogenize) { + return new Object[]{null}; + } else { + return null; + } } final Object[] strings = new Object[row.size()]; // noinspection SSBasedInspection diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/SingleStringInputCachingExpressionColumnValueSelector.java b/processing/src/main/java/org/apache/druid/segment/virtual/SingleStringInputCachingExpressionColumnValueSelector.java index 68a8522233c3..22700f20fa19 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/SingleStringInputCachingExpressionColumnValueSelector.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/SingleStringInputCachingExpressionColumnValueSelector.java @@ -64,7 +64,7 @@ public SingleStringInputCachingExpressionColumnValueSelector( this.selector = Preconditions.checkNotNull(selector, "selector"); this.expression = Preconditions.checkNotNull(expression, "expression"); - final Supplier inputSupplier = ExpressionSelectors.supplierFromDimensionSelector(selector, false); + final Supplier inputSupplier = ExpressionSelectors.supplierFromDimensionSelector(selector, false, false); this.bindings = InputBindings.singleProvider(ExpressionType.STRING, name -> inputSupplier.get()); if (selector.getValueCardinality() == DimensionDictionarySelector.CARDINALITY_UNKNOWN) { diff --git a/processing/src/test/java/org/apache/druid/segment/generator/SegmentGenerator.java b/processing/src/test/java/org/apache/druid/segment/generator/SegmentGenerator.java index 2ce9296e58a8..4f21658aaa95 100644 --- a/processing/src/test/java/org/apache/druid/segment/generator/SegmentGenerator.java +++ b/processing/src/test/java/org/apache/druid/segment/generator/SegmentGenerator.java @@ -35,6 +35,7 @@ import org.apache.druid.segment.QueryableIndex; import org.apache.druid.segment.TestHelper; import org.apache.druid.segment.data.RoaringBitmapSerdeFactory; +import org.apache.druid.segment.incremental.IncrementalIndex; import org.apache.druid.segment.incremental.IncrementalIndexSchema; import org.apache.druid.segment.serde.ComplexMetrics; import org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMediumFactory; @@ -211,6 +212,57 @@ public QueryableIndex generate( return retVal; } + public IncrementalIndex generateIncrementalIndex( + + final DataSegment dataSegment, + final GeneratorSchemaInfo schemaInfo, + final Granularity granularity, + final int numRows + ) + { + // In case we need to generate hyperUniques. + ComplexMetrics.registerSerde("hyperUnique", new HyperUniquesSerde()); + + final String dataHash = Hashing.sha256() + .newHasher() + .putString(dataSegment.getId().toString(), StandardCharsets.UTF_8) + .putString(schemaInfo.toString(), StandardCharsets.UTF_8) + .putString(granularity.toString(), StandardCharsets.UTF_8) + .putInt(numRows) + .hash() + .toString(); + + + final DataGenerator dataGenerator = new DataGenerator( + schemaInfo.getColumnSchemas(), + dataSegment.getId().hashCode(), /* Use segment identifier hashCode as seed */ + schemaInfo.getDataInterval(), + numRows + ); + + final IncrementalIndexSchema indexSchema = new IncrementalIndexSchema.Builder() + .withDimensionsSpec(schemaInfo.getDimensionsSpec()) + .withMetrics(schemaInfo.getAggsArray()) + .withRollup(schemaInfo.isWithRollup()) + .withQueryGranularity(granularity) + .build(); + + final List rows = new ArrayList<>(); + + for (int i = 0; i < numRows; i++) { + final InputRow row = dataGenerator.nextRow(); + rows.add(row); + + if ((i + 1) % 20000 == 0) { + log.info("%,d/%,d rows generated for[%s].", i + 1, numRows, dataSegment); + } + } + + log.info("%,d/%,d rows generated for[%s].", numRows, numRows, dataSegment); + + return makeIncrementalIndex(dataSegment.getId(), dataHash, 0, rows, indexSchema); + } + @Override public void close() throws IOException { @@ -236,6 +288,23 @@ private QueryableIndex makeIndex( .buildMMappedIndex(); } + private IncrementalIndex makeIncrementalIndex( + final SegmentId identifier, + final String dataHash, + final int indexNumber, + final List rows, + final IncrementalIndexSchema indexSchema + ) + { + return IndexBuilder + .create() + .schema(indexSchema) + .tmpDir(new File(getSegmentDir(identifier, dataHash), String.valueOf(indexNumber))) + .segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance()) + .rows(rows) + .buildIncrementalIndex(); + } + private File getSegmentDir(final SegmentId identifier, final String dataHash) { return new File(cacheDir, StringUtils.format("%s_%s", identifier, dataHash)); diff --git a/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionSelectorsTest.java b/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionSelectorsTest.java index 56d6d2d7ee1e..93e7b10727fa 100644 --- a/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionSelectorsTest.java +++ b/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionSelectorsTest.java @@ -31,26 +31,42 @@ import org.apache.druid.java.util.common.Intervals; import org.apache.druid.java.util.common.granularity.Granularities; import org.apache.druid.java.util.common.guava.Sequence; +import org.apache.druid.java.util.common.io.Closer; +import org.apache.druid.math.expr.Expr; import org.apache.druid.math.expr.ExprEval; import org.apache.druid.math.expr.ExprMacroTable; import org.apache.druid.math.expr.Parser; import org.apache.druid.query.aggregation.AggregatorFactory; import org.apache.druid.query.aggregation.CountAggregatorFactory; +import org.apache.druid.query.dimension.DefaultDimensionSpec; +import org.apache.druid.query.expression.TestExprMacroTable; import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; import org.apache.druid.segment.BaseSingleValueDimensionSelector; +import org.apache.druid.segment.ColumnSelectorFactory; import org.apache.druid.segment.ColumnValueSelector; import org.apache.druid.segment.Cursor; import org.apache.druid.segment.DimensionSelector; +import org.apache.druid.segment.QueryableIndex; +import org.apache.druid.segment.QueryableIndexStorageAdapter; +import org.apache.druid.segment.StorageAdapter; import org.apache.druid.segment.TestObjectColumnSelector; import org.apache.druid.segment.VirtualColumns; import org.apache.druid.segment.column.ColumnCapabilities; +import org.apache.druid.segment.generator.GeneratorBasicSchemas; +import org.apache.druid.segment.generator.GeneratorSchemaInfo; +import org.apache.druid.segment.generator.SegmentGenerator; import org.apache.druid.segment.incremental.IncrementalIndex; import org.apache.druid.segment.incremental.IncrementalIndexSchema; import org.apache.druid.segment.incremental.IncrementalIndexStorageAdapter; import org.apache.druid.segment.incremental.IndexSizeExceededException; import org.apache.druid.segment.incremental.OnheapIncrementalIndex; import org.apache.druid.testing.InitializedNullHandlingTest; +import org.apache.druid.timeline.DataSegment; +import org.apache.druid.timeline.partition.LinearShardSpec; +import org.apache.druid.utils.CloseableUtils; +import org.junit.AfterClass; import org.junit.Assert; +import org.junit.BeforeClass; import org.junit.Test; import java.util.ArrayList; @@ -59,6 +75,319 @@ public class ExpressionSelectorsTest extends InitializedNullHandlingTest { + private static Closer CLOSER; + private static QueryableIndex QUERYABLE_INDEX; + private static QueryableIndexStorageAdapter QUERYABLE_INDEX_STORAGE_ADAPTER; + private static IncrementalIndex INCREMENTAL_INDEX; + private static IncrementalIndexStorageAdapter INCREMENTAL_INDEX_STORAGE_ADAPTER; + private static List ADAPTERS; + + @BeforeClass + public static void setup() + { + CLOSER = Closer.create(); + final GeneratorSchemaInfo schemaInfo = GeneratorBasicSchemas.SCHEMA_MAP.get("expression-testbench"); + + final DataSegment dataSegment = DataSegment.builder() + .dataSource("foo") + .interval(schemaInfo.getDataInterval()) + .version("1") + .shardSpec(new LinearShardSpec(0)) + .size(0) + .build(); + final SegmentGenerator segmentGenerator = CLOSER.register(new SegmentGenerator()); + + final int numRows = 10_000; + INCREMENTAL_INDEX = CLOSER.register( + segmentGenerator.generateIncrementalIndex(dataSegment, schemaInfo, Granularities.HOUR, numRows) + ); + INCREMENTAL_INDEX_STORAGE_ADAPTER = new IncrementalIndexStorageAdapter(INCREMENTAL_INDEX); + + QUERYABLE_INDEX = CLOSER.register( + segmentGenerator.generate(dataSegment, schemaInfo, Granularities.HOUR, numRows) + ); + QUERYABLE_INDEX_STORAGE_ADAPTER = new QueryableIndexStorageAdapter(QUERYABLE_INDEX); + + ADAPTERS = ImmutableList.of(INCREMENTAL_INDEX_STORAGE_ADAPTER, QUERYABLE_INDEX_STORAGE_ADAPTER); + } + + @AfterClass + public static void teardown() + { + CloseableUtils.closeAndSuppressExceptions(CLOSER, throwable -> {}); + } + + @Test + public void test_single_value_string_bindings() + { + final String columnName = "string3"; + for (StorageAdapter adapter : ADAPTERS) { + Sequence cursorSequence = adapter.makeCursors( + null, + adapter.getInterval(), + VirtualColumns.EMPTY, + Granularities.ALL, + false, + null + ); + + List flatten = cursorSequence.toList(); + + for (Cursor cursor : flatten) { + ColumnSelectorFactory factory = cursor.getColumnSelectorFactory(); + ExpressionPlan plan = ExpressionPlanner.plan( + adapter, + Parser.parse("\"string3\"", TestExprMacroTable.INSTANCE) + ); + ExpressionPlan plan2 = ExpressionPlanner.plan( + adapter, + Parser.parse( + "concat(\"string3\", 'foo')", + TestExprMacroTable.INSTANCE + ) + ); + + Expr.ObjectBinding bindings = ExpressionSelectors.createBindings(factory, plan); + Expr.ObjectBinding bindings2 = ExpressionSelectors.createBindings(factory, plan2); + + DimensionSelector dimSelector = factory.makeDimensionSelector(DefaultDimensionSpec.of(columnName)); + ColumnValueSelector valueSelector = factory.makeColumnValueSelector(columnName); + + // realtime index needs to handle as multi-value in case any new values are added during processing + final boolean isMultiVal = factory.getColumnCapabilities(columnName) == null || + factory.getColumnCapabilities(columnName).hasMultipleValues().isMaybeTrue(); + while (!cursor.isDone()) { + Object dimSelectorVal = dimSelector.getObject(); + Object valueSelectorVal = valueSelector.getObject(); + Object bindingVal = bindings.get(columnName); + Object bindingVal2 = bindings2.get(columnName); + if (dimSelectorVal == null) { + Assert.assertNull(dimSelectorVal); + Assert.assertNull(valueSelectorVal); + Assert.assertNull(bindingVal); + if (isMultiVal) { + Assert.assertNull(((Object[]) bindingVal2)[0]); + } else { + Assert.assertNull(bindingVal2); + } + + } else { + if (isMultiVal) { + Assert.assertEquals(dimSelectorVal, ((Object[]) bindingVal)[0]); + Assert.assertEquals(valueSelectorVal, ((Object[]) bindingVal)[0]); + Assert.assertEquals(dimSelectorVal, ((Object[]) bindingVal2)[0]); + Assert.assertEquals(valueSelectorVal, ((Object[]) bindingVal2)[0]); + } else { + Assert.assertEquals(dimSelectorVal, bindingVal); + Assert.assertEquals(valueSelectorVal, bindingVal); + Assert.assertEquals(dimSelectorVal, bindingVal2); + Assert.assertEquals(valueSelectorVal, bindingVal2); + } + } + + cursor.advance(); + } + } + } + } + + @Test + public void test_multi_value_string_bindings() + { + final String columnName = "multi-string3"; + for (StorageAdapter adapter : ADAPTERS) { + Sequence cursorSequence = adapter.makeCursors( + null, + adapter.getInterval(), + VirtualColumns.EMPTY, + Granularities.ALL, + false, + null + ); + + List flatten = cursorSequence.toList(); + + for (Cursor cursor : flatten) { + ColumnSelectorFactory factory = cursor.getColumnSelectorFactory(); + + // identifier, uses dimension selector supplier supplier, no null coercion + ExpressionPlan plan = ExpressionPlanner.plan( + adapter, + Parser.parse("\"multi-string3\"", TestExprMacroTable.INSTANCE) + ); + // array output, uses object selector supplier, no null coercion + ExpressionPlan plan2 = ExpressionPlanner.plan( + adapter, + Parser.parse( + "array_append(\"multi-string3\", 'foo')", + TestExprMacroTable.INSTANCE + ) + ); + // array input, uses dimension selector supplier, no null coercion + ExpressionPlan plan3 = ExpressionPlanner.plan( + adapter, + Parser.parse( + "array_length(\"multi-string3\")", + TestExprMacroTable.INSTANCE + ) + ); + // used as scalar, has null coercion + ExpressionPlan plan4 = ExpressionPlanner.plan( + adapter, + Parser.parse( + "concat(\"multi-string3\", 'foo')", + TestExprMacroTable.INSTANCE + ) + ); + Expr.ObjectBinding bindings = ExpressionSelectors.createBindings(factory, plan); + Expr.ObjectBinding bindings2 = ExpressionSelectors.createBindings(factory, plan2); + Expr.ObjectBinding bindings3 = ExpressionSelectors.createBindings(factory, plan3); + Expr.ObjectBinding bindings4 = ExpressionSelectors.createBindings(factory, plan4); + + DimensionSelector dimSelector = factory.makeDimensionSelector(DefaultDimensionSpec.of(columnName)); + ColumnValueSelector valueSelector = factory.makeColumnValueSelector(columnName); + + while (!cursor.isDone()) { + Object dimSelectorVal = dimSelector.getObject(); + Object valueSelectorVal = valueSelector.getObject(); + Object bindingVal = bindings.get(columnName); + Object bindingVal2 = bindings2.get(columnName); + Object bindingVal3 = bindings3.get(columnName); + Object bindingVal4 = bindings4.get(columnName); + + if (dimSelectorVal == null) { + Assert.assertNull(dimSelectorVal); + Assert.assertNull(valueSelectorVal); + Assert.assertNull(bindingVal); + Assert.assertNull(bindingVal2); + Assert.assertNull(bindingVal3); + // binding4 has null coercion + Assert.assertArrayEquals(new Object[]{null}, (Object[]) bindingVal4); + } else { + Assert.assertArrayEquals(((List) dimSelectorVal).toArray(), (Object[]) bindingVal); + Assert.assertArrayEquals(((List) valueSelectorVal).toArray(), (Object[]) bindingVal); + Assert.assertArrayEquals(((List) dimSelectorVal).toArray(), (Object[]) bindingVal2); + Assert.assertArrayEquals(((List) valueSelectorVal).toArray(), (Object[]) bindingVal2); + Assert.assertArrayEquals(((List) dimSelectorVal).toArray(), (Object[]) bindingVal3); + Assert.assertArrayEquals(((List) valueSelectorVal).toArray(), (Object[]) bindingVal3); + } + + cursor.advance(); + } + } + } + } + + @Test + public void test_long_bindings() + { + final String columnName = "long3"; + for (StorageAdapter adapter : ADAPTERS) { + Sequence cursorSequence = adapter.makeCursors( + null, + adapter.getInterval(), + VirtualColumns.EMPTY, + Granularities.ALL, + false, + null + ); + + List flatten = cursorSequence.toList(); + + for (Cursor cursor : flatten) { + ColumnSelectorFactory factory = cursor.getColumnSelectorFactory(); + // an assortment of plans + ExpressionPlan plan = ExpressionPlanner.plan( + adapter, + Parser.parse("\"long3\"", TestExprMacroTable.INSTANCE) + ); + ExpressionPlan plan2 = ExpressionPlanner.plan( + adapter, + Parser.parse( + "\"long3\" + 3", + TestExprMacroTable.INSTANCE + ) + ); + + Expr.ObjectBinding bindings = ExpressionSelectors.createBindings(factory, plan); + Expr.ObjectBinding bindings2 = ExpressionSelectors.createBindings(factory, plan2); + + ColumnValueSelector valueSelector = factory.makeColumnValueSelector(columnName); + + while (!cursor.isDone()) { + Object bindingVal = bindings.get(columnName); + Object bindingVal2 = bindings2.get(columnName); + if (valueSelector.isNull()) { + Assert.assertNull(valueSelector.getObject()); + Assert.assertNull(bindingVal); + Assert.assertNull(bindingVal2); + } else { + Assert.assertEquals(valueSelector.getObject(), bindingVal); + Assert.assertEquals(valueSelector.getLong(), bindingVal); + Assert.assertEquals(valueSelector.getObject(), bindingVal2); + Assert.assertEquals(valueSelector.getLong(), bindingVal2); + } + cursor.advance(); + } + } + } + } + + @Test + public void test_double_bindings() + { + final String columnName = "double3"; + for (StorageAdapter adapter : ADAPTERS) { + Sequence cursorSequence = adapter.makeCursors( + null, + adapter.getInterval(), + VirtualColumns.EMPTY, + Granularities.ALL, + false, + null + ); + + List flatten = cursorSequence.toList(); + + for (Cursor cursor : flatten) { + ColumnSelectorFactory factory = cursor.getColumnSelectorFactory(); + // an assortment of plans + ExpressionPlan plan = ExpressionPlanner.plan( + adapter, + Parser.parse("\"double3\"", TestExprMacroTable.INSTANCE) + ); + ExpressionPlan plan2 = ExpressionPlanner.plan( + adapter, + Parser.parse( + "\"double3\" + 3.0", + TestExprMacroTable.INSTANCE + ) + ); + + Expr.ObjectBinding bindings = ExpressionSelectors.createBindings(factory, plan); + Expr.ObjectBinding bindings2 = ExpressionSelectors.createBindings(factory, plan2); + + ColumnValueSelector valueSelector = factory.makeColumnValueSelector(columnName); + + while (!cursor.isDone()) { + Object bindingVal = bindings.get(columnName); + Object bindingVal2 = bindings2.get(columnName); + if (valueSelector.isNull()) { + Assert.assertNull(valueSelector.getObject()); + Assert.assertNull(bindingVal); + Assert.assertNull(bindingVal2); + } else { + Assert.assertEquals(valueSelector.getObject(), bindingVal); + Assert.assertEquals(valueSelector.getDouble(), bindingVal); + Assert.assertEquals(valueSelector.getObject(), bindingVal2); + Assert.assertEquals(valueSelector.getDouble(), bindingVal2); + } + cursor.advance(); + } + } + } + } + @Test public void test_canMapOverDictionary_oneSingleValueInput() { @@ -153,6 +482,7 @@ public void test_supplierFromDimensionSelector() final SettableSupplier settableSupplier = new SettableSupplier<>(); final Supplier supplier = ExpressionSelectors.supplierFromDimensionSelector( dimensionSelectorFromSupplier(settableSupplier), + false, false ); diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java index 8815af63c83b..d18a6c1676f1 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java @@ -513,8 +513,7 @@ public void testArrayOverlapFilterNonLiteral() throws Exception .build() ), ImmutableList.of( - new Object[]{"[\"a\",\"b\"]"}, - new Object[]{useDefault ? "" : null} + new Object[]{"[\"a\",\"b\"]"} ) ); } @@ -654,10 +653,10 @@ public void testArrayLength() throws Exception ImmutableList.of( new Object[]{"", 2, 1L}, new Object[]{"10.1", 2, 1L}, - new Object[]{"1", 1, 1L}, - new Object[]{"2", 1, 1L}, - new Object[]{"abc", 1, 1L}, - new Object[]{"def", 1, 1L} + useDefault ? new Object[]{"2", 1, 1L} : new Object[]{"1", 1, 1L}, + useDefault ? new Object[]{"1", 0, 1L} : new Object[]{"2", 1, 1L}, + new Object[]{"abc", useDefault ? 0 : null, 1L}, + new Object[]{"def", useDefault ? 0 : null, 1L} ) ); } @@ -785,14 +784,14 @@ public void testArrayPrependAppend() throws Exception ImmutableList results; if (useDefault) { results = ImmutableList.of( - new Object[]{"foo,null", "null,foo", 3L}, + new Object[]{"", "", 3L}, new Object[]{"foo,a,b", "a,b,foo", 1L}, new Object[]{"foo,b,c", "b,c,foo", 1L}, new Object[]{"foo,d", "d,foo", 1L} ); } else { results = ImmutableList.of( - new Object[]{"foo,null", "null,foo", 2L}, + new Object[]{null, null, 2L}, new Object[]{"foo,", ",foo", 1L}, new Object[]{"foo,a,b", "a,b,foo", 1L}, new Object[]{"foo,b,c", "b,c,foo", 1L}, @@ -1205,8 +1204,14 @@ public void testArrayOffsetOf() throws Exception .setContext(QUERY_CONTEXT_DEFAULT) .build() ), - ImmutableList.of( - new Object[]{useDefault ? -1 : null, 4L}, + useDefault + ? ImmutableList.of( + new Object[]{0, 4L}, + new Object[]{-1, 1L}, + new Object[]{1, 1L} + ) + : ImmutableList.of( + new Object[]{null, 4L}, new Object[]{0, 1L}, new Object[]{1, 1L} ) @@ -1248,8 +1253,15 @@ public void testArrayOrdinalOf() throws Exception .setContext(QUERY_CONTEXT_DEFAULT) .build() ), - ImmutableList.of( - new Object[]{useDefault ? -1 : null, 4L}, + useDefault + ? ImmutableList.of( + new Object[]{0, 3L}, + new Object[]{-1, 1L}, + new Object[]{1, 1L}, + new Object[]{2, 1L} + ) + : ImmutableList.of( + new Object[]{null, 4L}, new Object[]{1, 1L}, new Object[]{2, 1L} ) @@ -1321,14 +1333,12 @@ public void testArrayToStringToMultiValueString() throws Exception ImmutableList results; if (useDefault) { results = ImmutableList.of( - new Object[]{ImmutableList.of("", "d"), 3L}, new Object[]{ImmutableList.of("a", "b", "d"), 1L}, new Object[]{ImmutableList.of("b", "c", "d"), 1L}, new Object[]{ImmutableList.of("d", "d"), 1L} ); } else { results = ImmutableList.of( - new Object[]{null, 2L}, new Object[]{ImmutableList.of("", "d"), 1L}, new Object[]{ImmutableList.of("a", "b", "d"), 1L}, new Object[]{ImmutableList.of("b", "c", "d"), 1L}, diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteMultiValueStringQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteMultiValueStringQueryTest.java index 33b105bde07f..d851337b25a0 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteMultiValueStringQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteMultiValueStringQueryTest.java @@ -263,10 +263,7 @@ public void testMultiValueStringOverlapFilterNonLiteral() throws Exception .context(QUERY_CONTEXT_DEFAULT) .build() ), - ImmutableList.of( - new Object[]{"[\"a\",\"b\"]"}, - new Object[]{useDefault ? "" : null} - ) + ImmutableList.of(new Object[]{"[\"a\",\"b\"]"}) ); } @@ -403,10 +400,10 @@ public void testMultiValueStringLength() throws Exception ImmutableList.of( new Object[]{"", 2, 1L}, new Object[]{"10.1", 2, 1L}, - new Object[]{"1", 1, 1L}, - new Object[]{"2", 1, 1L}, - new Object[]{"abc", 1, 1L}, - new Object[]{"def", 1, 1L} + useDefault ? new Object[]{"2", 1, 1L} : new Object[]{"1", 1, 1L}, + useDefault ? new Object[]{"1", 0, 1L} : new Object[]{"2", 1, 1L}, + new Object[]{"abc", useDefault ? 0 : null, 1L}, + new Object[]{"def", useDefault ? 0 : null, 1L} ) ); } @@ -540,14 +537,14 @@ public void testMultiValueStringPrependAppend() throws Exception ImmutableList results; if (useDefault) { results = ImmutableList.of( - new Object[]{"foo,null", "null,foo", 3L}, + new Object[]{"", "", 3L}, new Object[]{"foo,a,b", "a,b,foo", 1L}, new Object[]{"foo,b,c", "b,c,foo", 1L}, new Object[]{"foo,d", "d,foo", 1L} ); } else { results = ImmutableList.of( - new Object[]{"foo,null", "null,foo", 2L}, + new Object[]{null, null, 2L}, new Object[]{"foo,", ",foo", 1L}, new Object[]{"foo,a,b", "a,b,foo", 1L}, new Object[]{"foo,b,c", "b,c,foo", 1L}, @@ -830,8 +827,14 @@ public void testMultiValueStringOffsetOf() throws Exception .setContext(QUERY_CONTEXT_DEFAULT) .build() ), - ImmutableList.of( - new Object[]{useDefault ? -1 : null, 4L}, + useDefault + ? ImmutableList.of( + new Object[]{0, 4L}, + new Object[]{-1, 1L}, + new Object[]{1, 1L} + ) + : ImmutableList.of( + new Object[]{null, 4L}, new Object[]{0, 1L}, new Object[]{1, 1L} ) @@ -873,8 +876,15 @@ public void testMultiValueStringOrdinalOf() throws Exception .setContext(QUERY_CONTEXT_DEFAULT) .build() ), - ImmutableList.of( - new Object[]{useDefault ? -1 : null, 4L}, + useDefault + ? ImmutableList.of( + new Object[]{0, 3L}, + new Object[]{-1, 1L}, + new Object[]{1, 1L}, + new Object[]{2, 1L} + ) + : ImmutableList.of( + new Object[]{null, 4L}, new Object[]{1, 1L}, new Object[]{2, 1L} ) @@ -946,8 +956,7 @@ public void testMultiValueStringToStringToMultiValueString() throws Exception ImmutableList results; if (useDefault) { results = ImmutableList.of( - new Object[]{"d", 7L}, - new Object[]{"", 3L}, + new Object[]{"d", 4L}, new Object[]{"b", 2L}, new Object[]{"a", 1L}, new Object[]{"c", 1L} @@ -955,7 +964,6 @@ public void testMultiValueStringToStringToMultiValueString() throws Exception } else { results = ImmutableList.of( new Object[]{"d", 5L}, - new Object[]{null, 2L}, new Object[]{"b", 2L}, new Object[]{"", 1L}, new Object[]{"a", 1L}, @@ -1137,9 +1145,13 @@ public void testMultiValueListFilterComposed() throws Exception .setContext(QUERY_CONTEXT_DEFAULT) .build() ), - ImmutableList.of( + useDefault ? ImmutableList.of( new Object[]{0, 4L}, new Object[]{1, 2L} + ) : ImmutableList.of( + new Object[]{null, 2L}, + new Object[]{0, 2L}, + new Object[]{1, 2L} ) ); } @@ -1181,7 +1193,9 @@ public void testMultiValueListFilterComposedDeny() throws Exception .setContext(QUERY_CONTEXT_DEFAULT) .build() ), - useDefault ? ImmutableList.of(new Object[]{1, 6L}) : ImmutableList.of(new Object[]{1, 4L}, new Object[]{0, 2L}) + useDefault + ? ImmutableList.of(new Object[]{0, 3L}, new Object[]{1, 3L}) + : ImmutableList.of(new Object[]{1, 4L}, new Object[]{null, 2L}) ); } From 7ba59dc0d2cb65429602a1a92924f4ee22408d43 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Wed, 9 Feb 2022 00:57:57 -0800 Subject: [PATCH 2/3] better --- .../org/apache/druid/segment/virtual/ExpressionSelectors.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java index 3f6e2820dfd0..670fcf41cf46 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java @@ -383,7 +383,7 @@ static Supplier supplierFromDimensionSelector(final DimensionSelector se return selector.lookupName(row.get(0)); } else { // column selector factories hate you and use [] and [null] interchangeably for nullish data - if (selector.getObject() == null) { + if (row.size() == 0 || (row.size() == 1 && selector.getObject() == null)) { if (homogenize) { return new Object[]{null}; } else { From f8da8ccc7db7efe4cf2fc57862b88d135e220878 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Wed, 9 Feb 2022 05:05:26 -0800 Subject: [PATCH 3/3] fix tests --- .../druid/query/MultiValuedDimensionTest.java | 23 +++++++++++++------ 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/processing/src/test/java/org/apache/druid/query/MultiValuedDimensionTest.java b/processing/src/test/java/org/apache/druid/query/MultiValuedDimensionTest.java index c6ef667fe20a..99ee52a76a3a 100644 --- a/processing/src/test/java/org/apache/druid/query/MultiValuedDimensionTest.java +++ b/processing/src/test/java/org/apache/druid/query/MultiValuedDimensionTest.java @@ -816,10 +816,19 @@ public void testGroupByExpressionArrayExpressionFilter() query ); - List expectedResults = Arrays.asList( - GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970", "tt", NullHandling.replaceWithDefault() ? -1L : null, "count", 6L), - GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970", "tt", 1L, "count", 2L) - ); + List expectedResults; + if (NullHandling.replaceWithDefault()) { + expectedResults = Arrays.asList( + GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970", "tt", -1L, "count", 4L), + GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970", "tt", 0L, "count", 2L), + GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970", "tt", 1L, "count", 2L) + ); + } else { + expectedResults = Arrays.asList( + GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970", "tt", null, "count", 6L), + GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970", "tt", 1L, "count", 2L) + ); + } TestHelper.assertExpectedObjects(expectedResults, result.toList(), "expr-auto"); } @@ -858,7 +867,7 @@ public void testGroupByExpressionArrayFnArg() ); List expectedResults = Arrays.asList( - GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970", "tt", "foo", "count", 2L), + GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970", "tt", NullHandling.replaceWithDefault() ? null : "foo", "count", 2L), GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970", "tt", "foot1, foot2, foot3", "count", 2L), GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970", "tt", "foot3, foot4, foot5", "count", 2L), GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970", "tt", "foot5, foot6, foot7", "count", 2L) @@ -977,7 +986,7 @@ public void testGroupByExpressionFoldArrayToStringWithConcats() .setVirtualColumns( new ExpressionVirtualColumn( "tt", - "fold((tag, acc) -> concat(concat(acc, case_searched(acc == '', '', ', '), concat('foo', tag)))), tags, '')", + "fold((tag, acc) -> concat(concat(acc, case_searched(acc == '', '', ', '), concat('foo', tag))), tags, '')", ColumnType.STRING, TestExprMacroTable.INSTANCE ) @@ -995,7 +1004,7 @@ public void testGroupByExpressionFoldArrayToStringWithConcats() ); List expectedResults = Arrays.asList( - GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970", "tt", "foo", "count", 2L), + GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970", "tt", NullHandling.replaceWithDefault() ? null : "foo", "count", 2L), GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970", "tt", "foot1, foot2, foot3", "count", 2L), GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970", "tt", "foot3, foot4, foot5", "count", 2L), GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970", "tt", "foot5, foot6, foot7", "count", 2L)