From 02de082c66918a473339e4229f75dea5c90718b0 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Tue, 31 Jan 2023 21:05:34 -0800 Subject: [PATCH 01/11] various nested column fixes changes: * modified druid schema column type compution to special case COMPLEX handling to choose COMPLEX if any column in any segment is COMPLEX * NestedFieldVirtualColumn can now work correctly on any type of column, returning either a column selector if a root path, or nil selector if not * fixed a random bug with NilVectorSelector when using a vector size larger than the default and druid.generic.useDefaultValueForNull=false would have the nulls vector set to all false instead of true * fixed an overly aggressive check in ExprEval.ofType when handling complex types which would try to treat any string as base64 without gracefully falling back if it was not in fact base64 encoded, along with special handling for complex * added ExpressionVectorSelectors.castValueSelectorToObject and ExpressionVectorSelectors.castObjectSelectorToNumeric as convience methods to cast vector selectors using cast expressions without the trouble of constructing an expression. the polymorphic nature of the non-vectorized engine (and significantly larger overhead of non-vectorized expression processing) made adding similar methods for non-vectorized selectors less attractive and so have not been added at this time * more tests more better --- .../org/apache/druid/math/expr/ExprEval.java | 11 +- .../druid/math/expr/IdentifierExpr.java | 68 +--- .../math/expr/vector/VectorProcessors.java | 65 ++++ .../org/apache/druid/math/expr/EvalTest.java | 15 + .../CompressedNestedDataComplexColumn.java | 5 +- .../nested/NestedDataColumnSupplier.java | 133 +++++-- .../nested/NestedDataComplexTypeSerde.java | 2 +- .../segment/vector/NilVectorSelector.java | 5 +- .../virtual/ExpressionVectorSelectors.java | 43 +++ .../virtual/NestedFieldVirtualColumn.java | 347 +++++++++++++++--- .../groupby/NestedDataGroupByQueryTest.java | 189 ++++++---- .../query/scan/NestedDataScanQueryTest.java | 6 +- .../nested/NestedDataColumnSupplierTest.java | 4 +- .../segment/vector/NilVectorSelectorTest.java | 85 +++++ .../ExpressionVectorSelectorsCastTest.java | 209 +++++++++++ .../resources/simple-nested-test-data.json | 2 +- .../calcite/schema/SegmentMetadataCache.java | 19 +- .../calcite/CalciteNestedDataQueryTest.java | 343 ++++++++++++++++- 18 files changed, 1322 insertions(+), 229 deletions(-) create mode 100644 processing/src/test/java/org/apache/druid/segment/vector/NilVectorSelectorTest.java create mode 100644 processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVectorSelectorsCastTest.java diff --git a/core/src/main/java/org/apache/druid/math/expr/ExprEval.java b/core/src/main/java/org/apache/druid/math/expr/ExprEval.java index 80c5b9731c99..33876c7be44e 100644 --- a/core/src/main/java/org/apache/druid/math/expr/ExprEval.java +++ b/core/src/main/java/org/apache/druid/math/expr/ExprEval.java @@ -520,9 +520,18 @@ public static ExprEval ofType(@Nullable ExpressionType type, @Nullable Object va } return ofDouble(null); case COMPLEX: + // json isn't currently defined in druid-core, this can be reworked once + // https://github.com/apache/druid/pull/13698 is merged (or COMPLEX is promoted to a real built-in type(s) + if ("json".equals(type.getComplexTypeName())) { + return ofComplex(type, value); + } byte[] bytes = null; if (value instanceof String) { - bytes = StringUtils.decodeBase64String((String) value); + try { + bytes = StringUtils.decodeBase64String((String) value); + } + catch (IllegalArgumentException ignored) { + } } else if (value instanceof byte[]) { bytes = (byte[]) value; } diff --git a/core/src/main/java/org/apache/druid/math/expr/IdentifierExpr.java b/core/src/main/java/org/apache/druid/math/expr/IdentifierExpr.java index 01622a5b2687..63b5535f8445 100644 --- a/core/src/main/java/org/apache/druid/math/expr/IdentifierExpr.java +++ b/core/src/main/java/org/apache/druid/math/expr/IdentifierExpr.java @@ -21,11 +21,8 @@ import org.apache.commons.lang.StringEscapeUtils; import org.apache.druid.java.util.common.StringUtils; -import org.apache.druid.math.expr.vector.ExprEvalDoubleVector; -import org.apache.druid.math.expr.vector.ExprEvalLongVector; -import org.apache.druid.math.expr.vector.ExprEvalObjectVector; -import org.apache.druid.math.expr.vector.ExprEvalVector; import org.apache.druid.math.expr.vector.ExprVectorProcessor; +import org.apache.druid.math.expr.vector.VectorProcessors; import javax.annotation.Nullable; import java.util.Objects; @@ -152,51 +149,7 @@ public boolean canVectorize(InputBindingInspector inspector) @Override public ExprVectorProcessor buildVectorized(VectorInputBindingInspector inspector) { - ExpressionType inputType = inspector.getType(binding); - - if (inputType == null) { - // nil column, we can be anything, so be a string because it's the most flexible - // (numbers will be populated with default values in default mode and non-null) - return new IdentifierVectorProcessor(ExpressionType.STRING) - { - @Override - public ExprEvalVector evalVector(VectorInputBinding bindings) - { - return new ExprEvalObjectVector(bindings.getObjectVector(binding)); - } - }; - } - switch (inputType.getType()) { - case LONG: - return new IdentifierVectorProcessor(inputType) - { - @Override - public ExprEvalVector evalVector(VectorInputBinding bindings) - { - return new ExprEvalLongVector(bindings.getLongVector(binding), bindings.getNullVector(binding)); - } - }; - case DOUBLE: - return new IdentifierVectorProcessor(inputType) - { - @Override - public ExprEvalVector evalVector(VectorInputBinding bindings) - { - return new ExprEvalDoubleVector(bindings.getDoubleVector(binding), bindings.getNullVector(binding)); - } - }; - case STRING: - return new IdentifierVectorProcessor(inputType) - { - @Override - public ExprEvalVector evalVector(VectorInputBinding bindings) - { - return new ExprEvalObjectVector(bindings.getObjectVector(binding)); - } - }; - default: - throw Exprs.cannotVectorize(this); - } + return VectorProcessors.identifier(inspector, binding); } @Override @@ -218,20 +171,3 @@ public int hashCode() return Objects.hash(identifier); } } - -abstract class IdentifierVectorProcessor implements ExprVectorProcessor -{ - private final ExpressionType outputType; - - public IdentifierVectorProcessor(ExpressionType outputType) - { - this.outputType = outputType; - } - - @Override - public ExpressionType getOutputType() - { - return outputType; - } -} - diff --git a/core/src/main/java/org/apache/druid/math/expr/vector/VectorProcessors.java b/core/src/main/java/org/apache/druid/math/expr/vector/VectorProcessors.java index ee066065ad15..e23e03eef389 100644 --- a/core/src/main/java/org/apache/druid/math/expr/vector/VectorProcessors.java +++ b/core/src/main/java/org/apache/druid/math/expr/vector/VectorProcessors.java @@ -157,6 +157,55 @@ public ExpressionType getOutputType() }; } + public static ExprVectorProcessor identifier(Expr.VectorInputBindingInspector inspector, String binding) + { + ExpressionType inputType = inspector.getType(binding); + + if (inputType == null) { + // nil column, we can be anything, so be a string because it's the most flexible + // (numbers will be populated with default values in default mode and non-null) + return new IdentifierVectorProcessor(ExpressionType.STRING) + { + @Override + public ExprEvalVector evalVector(Expr.VectorInputBinding bindings) + { + return new ExprEvalObjectVector(bindings.getObjectVector(binding)); + } + }; + } + switch (inputType.getType()) { + case LONG: + return new IdentifierVectorProcessor(inputType) + { + @Override + public ExprEvalVector evalVector(Expr.VectorInputBinding bindings) + { + return new ExprEvalLongVector(bindings.getLongVector(binding), bindings.getNullVector(binding)); + } + }; + case DOUBLE: + return new IdentifierVectorProcessor(inputType) + { + @Override + public ExprEvalVector evalVector(Expr.VectorInputBinding bindings) + { + return new ExprEvalDoubleVector(bindings.getDoubleVector(binding), bindings.getNullVector(binding)); + } + }; + case STRING: + return new IdentifierVectorProcessor(inputType) + { + @Override + public ExprEvalVector evalVector(Expr.VectorInputBinding bindings) + { + return new ExprEvalObjectVector(bindings.getObjectVector(binding)); + } + }; + default: + throw Exprs.cannotVectorize("[" + binding + "]"); + } + } + public static ExprVectorProcessor parseLong(Expr.VectorInputBindingInspector inspector, Expr arg, int radix) { final ExprVectorProcessor processor = new LongOutObjectInFunctionVectorProcessor( @@ -889,4 +938,20 @@ private VectorProcessors() { // No instantiation } + + abstract static class IdentifierVectorProcessor implements ExprVectorProcessor + { + private final ExpressionType outputType; + + public IdentifierVectorProcessor(ExpressionType outputType) + { + this.outputType = outputType; + } + + @Override + public ExpressionType getOutputType() + { + return outputType; + } + } } diff --git a/core/src/test/java/org/apache/druid/math/expr/EvalTest.java b/core/src/test/java/org/apache/druid/math/expr/EvalTest.java index b15f5f810639..c5d71d306a8f 100644 --- a/core/src/test/java/org/apache/druid/math/expr/EvalTest.java +++ b/core/src/test/java/org/apache/druid/math/expr/EvalTest.java @@ -813,5 +813,20 @@ public void testEvalOfType() eval = ExprEval.ofType(ExpressionType.STRING_ARRAY, new Object[] {1.0, 2L, "3", true, false}); Assert.assertEquals(ExpressionType.STRING_ARRAY, eval.type()); Assert.assertArrayEquals(new Object[] {"1.0", "2", "3", "true", "false"}, (Object[]) eval.value()); + + // json type isn't defined in druid-core + ExpressionType json = ExpressionType.fromString("COMPLEX"); + eval = ExprEval.ofType(json, ImmutableMap.of("x", 1L, "y", 2L)); + Assert.assertEquals(json, eval.type()); + Assert.assertEquals(ImmutableMap.of("x", 1L, "y", 2L), eval.value()); + + eval = ExprEval.ofType(json, "hello"); + Assert.assertEquals(json, eval.type()); + Assert.assertEquals("hello", eval.value()); + + ExpressionType stringyComplexThing = ExpressionType.fromString("COMPLEX"); + eval = ExprEval.ofType(stringyComplexThing, "notbase64"); + Assert.assertEquals(stringyComplexThing, eval.type()); + Assert.assertEquals("notbase64", eval.value()); } } diff --git a/processing/src/main/java/org/apache/druid/segment/nested/CompressedNestedDataComplexColumn.java b/processing/src/main/java/org/apache/druid/segment/nested/CompressedNestedDataComplexColumn.java index 88b8a5dd615b..77919c4bffd8 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/CompressedNestedDataComplexColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/CompressedNestedDataComplexColumn.java @@ -510,6 +510,7 @@ private ColumnHolder readNestedFieldColumn(String field) metadata.getBitmapSerdeFactory().getObjectStrategy(), columnBuilder.getFileMapper() ); + final boolean hasNull = localDictionarySupplier.get().get(0) == 0; Supplier> columnSupplier = () -> { FixedIndexed localDict = localDictionarySupplier.get(); return closer.register(new NestedFieldLiteralDictionaryEncodedColumn( @@ -521,13 +522,13 @@ private ColumnHolder readNestedFieldColumn(String field) longDictionarySupplier.get(), doubleDictionarySupplier.get(), localDict, - localDict.get(0) == 0 + hasNull ? rBitmaps.get(0) : metadata.getBitmapSerdeFactory().getBitmapFactory().makeEmptyImmutableBitmap() )); }; columnBuilder.setHasMultipleValues(false) - .setHasNulls(true) + .setHasNulls(hasNull) .setDictionaryEncodedColumnSupplier(columnSupplier); columnBuilder.setIndexSupplier( new NestedFieldLiteralColumnIndexSupplier( diff --git a/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnSupplier.java b/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnSupplier.java index 4e1ecbb994ed..4e0c4825dd4f 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnSupplier.java @@ -20,7 +20,6 @@ package org.apache.druid.segment.nested; import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; import com.google.common.base.Supplier; import org.apache.druid.collections.bitmap.ImmutableBitmap; @@ -46,35 +45,24 @@ public class NestedDataColumnSupplier implements Supplier { - private final byte version; - private final NestedDataColumnMetadata metadata; - private final GenericIndexed fields; - private final NestedLiteralTypeInfo fieldInfo; - private final CompressedVariableSizedBlobColumnSupplier compressedRawColumnSupplier; - private final ImmutableBitmap nullValues; - private final GenericIndexed stringDictionary; - private final Supplier frontCodedStringDictionarySupplier; - private final Supplier> longDictionarySupplier; - private final Supplier> doubleDictionarySupplier; - private final ColumnConfig columnConfig; - private final SmooshedFileMapper fileMapper; - - @Nullable - private final ColumnType simpleType; - - public NestedDataColumnSupplier( + public static NestedDataColumnSupplier read( ByteBuffer bb, ColumnBuilder columnBuilder, ColumnConfig columnConfig, ObjectMapper jsonMapper ) { - this(bb, columnBuilder, columnConfig, jsonMapper, ColumnType.LONG.getStrategy(), ColumnType.DOUBLE.getStrategy()); + return read( + bb, + columnBuilder, + columnConfig, + jsonMapper, + ColumnType.LONG.getStrategy(), + ColumnType.DOUBLE.getStrategy() + ); } - // strictly for testing? - @VisibleForTesting - public NestedDataColumnSupplier( + public static NestedDataColumnSupplier read( ByteBuffer bb, ColumnBuilder columnBuilder, ColumnConfig columnConfig, @@ -83,11 +71,22 @@ public NestedDataColumnSupplier( TypeStrategy doubleTypeStrategy ) { - this.version = bb.get(); + final byte version = bb.get(); if (version == 0x03 || version == 0x04) { try { + final NestedDataColumnMetadata metadata; + final GenericIndexed fields; + final NestedLiteralTypeInfo fieldInfo; + final CompressedVariableSizedBlobColumnSupplier compressedRawColumnSupplier; + final ImmutableBitmap nullValues; + final GenericIndexed stringDictionary; + final Supplier frontCodedStringDictionarySupplier; + final Supplier> longDictionarySupplier; + final Supplier> doubleDictionarySupplier; final SmooshedFileMapper mapper = columnBuilder.getFileMapper(); + ColumnType simpleType; + metadata = jsonMapper.readValue( IndexMerger.SERIALIZER_UTILS.readString(bb), NestedDataColumnMetadata.class @@ -95,9 +94,13 @@ public NestedDataColumnSupplier( fields = GenericIndexed.read(bb, GenericIndexed.STRING_STRATEGY, mapper); fieldInfo = NestedLiteralTypeInfo.read(bb, fields.size()); - if (fields.size() == 1 && - ((version == 0x03 && NestedPathFinder.JQ_PATH_ROOT.equals(fields.get(0))) || - (version == 0x04 && NestedPathFinder.JSON_PATH_ROOT.equals(fields.get(0)))) + if (fields.size() == 0) { + // all nulls, in the future we'll deal with this better... but for now lets just call it a string because + // it is the most permissive (besides json) + simpleType = ColumnType.STRING; + } else if (fields.size() == 1 && + ((version == 0x03 && NestedPathFinder.JQ_PATH_ROOT.equals(fields.get(0))) || + (version == 0x04 && NestedPathFinder.JSON_PATH_ROOT.equals(fields.get(0)))) ) { simpleType = fieldInfo.getTypes(0).getSingleType(); } else { @@ -106,6 +109,7 @@ public NestedDataColumnSupplier( final ByteBuffer stringDictionaryBuffer = loadInternalFile( mapper, + metadata, NestedDataColumnSerializer.STRING_DICTIONARY_FILE_NAME ); @@ -139,6 +143,7 @@ public NestedDataColumnSupplier( } final ByteBuffer longDictionaryBuffer = loadInternalFile( mapper, + metadata, NestedDataColumnSerializer.LONG_DICTIONARY_FILE_NAME ); longDictionarySupplier = FixedIndexed.read( @@ -149,6 +154,7 @@ public NestedDataColumnSupplier( ); final ByteBuffer doubleDictionaryBuffer = loadInternalFile( mapper, + metadata, NestedDataColumnSerializer.DOUBLE_DICTIONARY_FILE_NAME ); doubleDictionarySupplier = FixedIndexed.read( @@ -157,7 +163,7 @@ public NestedDataColumnSupplier( metadata.getByteOrder(), Double.BYTES ); - final ByteBuffer rawBuffer = loadInternalFile(mapper, NestedDataColumnSerializer.RAW_FILE_NAME); + final ByteBuffer rawBuffer = loadInternalFile(mapper, metadata, NestedDataColumnSerializer.RAW_FILE_NAME); compressedRawColumnSupplier = CompressedVariableSizedBlobColumnSupplier.fromByteBuffer( NestedDataColumnSerializer.getInternalFileName( metadata.getFileNameBase(), NestedDataColumnSerializer.RAW_FILE_NAME @@ -168,11 +174,31 @@ public NestedDataColumnSupplier( ); if (metadata.hasNulls()) { columnBuilder.setHasNulls(true); - final ByteBuffer nullIndexBuffer = loadInternalFile(mapper, NestedDataColumnSerializer.NULL_BITMAP_FILE_NAME); + final ByteBuffer nullIndexBuffer = loadInternalFile( + mapper, + metadata, + NestedDataColumnSerializer.NULL_BITMAP_FILE_NAME + ); nullValues = metadata.getBitmapSerdeFactory().getObjectStrategy().fromByteBufferWithSize(nullIndexBuffer); } else { nullValues = metadata.getBitmapSerdeFactory().getBitmapFactory().makeEmptyImmutableBitmap(); } + + return new NestedDataColumnSupplier( + version, + metadata, + fields, + fieldInfo, + compressedRawColumnSupplier, + nullValues, + stringDictionary, + frontCodedStringDictionarySupplier, + longDictionarySupplier, + doubleDictionarySupplier, + columnConfig, + Preconditions.checkNotNull(mapper, "Null fileMapper"), + simpleType + ); } catch (IOException ex) { throw new RE(ex, "Failed to deserialize V%s column.", version); @@ -180,10 +206,53 @@ public NestedDataColumnSupplier( } else { throw new RE("Unknown version " + version); } + } + + private final byte version; + private final NestedDataColumnMetadata metadata; + private final GenericIndexed fields; + private final NestedLiteralTypeInfo fieldInfo; + private final CompressedVariableSizedBlobColumnSupplier compressedRawColumnSupplier; + private final ImmutableBitmap nullValues; + private final GenericIndexed stringDictionary; + private final Supplier frontCodedStringDictionarySupplier; + private final Supplier> longDictionarySupplier; + private final Supplier> doubleDictionarySupplier; + private final ColumnConfig columnConfig; + private final SmooshedFileMapper fileMapper; - fileMapper = Preconditions.checkNotNull(columnBuilder.getFileMapper(), "Null fileMapper"); + @Nullable + private final ColumnType simpleType; + private NestedDataColumnSupplier( + byte version, + NestedDataColumnMetadata metadata, + GenericIndexed fields, + NestedLiteralTypeInfo fieldInfo, + CompressedVariableSizedBlobColumnSupplier compressedRawColumnSupplier, + ImmutableBitmap nullValues, + GenericIndexed stringDictionary, + Supplier frontCodedStringDictionarySupplier, + Supplier> longDictionarySupplier, + Supplier> doubleDictionarySupplier, + ColumnConfig columnConfig, + SmooshedFileMapper fileMapper, + @Nullable ColumnType simpleType + ) + { + this.version = version; + this.metadata = metadata; + this.fields = fields; + this.fieldInfo = fieldInfo; + this.compressedRawColumnSupplier = compressedRawColumnSupplier; + this.nullValues = nullValues; + this.stringDictionary = stringDictionary; + this.frontCodedStringDictionarySupplier = frontCodedStringDictionarySupplier; + this.longDictionarySupplier = longDictionarySupplier; + this.doubleDictionarySupplier = doubleDictionarySupplier; this.columnConfig = columnConfig; + this.fileMapper = fileMapper; + this.simpleType = simpleType; } @Override @@ -261,7 +330,11 @@ private NestedDataColumnV4 makeV4() ); } - private ByteBuffer loadInternalFile(SmooshedFileMapper fileMapper, String internalFileName) throws IOException + private static ByteBuffer loadInternalFile( + SmooshedFileMapper fileMapper, + NestedDataColumnMetadata metadata, + String internalFileName + ) throws IOException { return fileMapper.mapFile( NestedDataColumnSerializer.getInternalFileName(metadata.getFileNameBase(), internalFileName) diff --git a/processing/src/main/java/org/apache/druid/segment/nested/NestedDataComplexTypeSerde.java b/processing/src/main/java/org/apache/druid/segment/nested/NestedDataComplexTypeSerde.java index fec5dcfeaa11..868b735e213f 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/NestedDataComplexTypeSerde.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/NestedDataComplexTypeSerde.java @@ -83,7 +83,7 @@ public void deserializeColumn( ColumnConfig columnConfig ) { - NestedDataColumnSupplier supplier = new NestedDataColumnSupplier(buffer, builder, columnConfig, OBJECT_MAPPER); + NestedDataColumnSupplier supplier = NestedDataColumnSupplier.read(buffer, builder, columnConfig, OBJECT_MAPPER); ColumnCapabilitiesImpl capabilitiesBuilder = builder.getCapabilitiesBuilder(); capabilitiesBuilder.setDictionaryEncoded(true); capabilitiesBuilder.setDictionaryValuesSorted(true); diff --git a/processing/src/main/java/org/apache/druid/segment/vector/NilVectorSelector.java b/processing/src/main/java/org/apache/druid/segment/vector/NilVectorSelector.java index 3e1bed6076cf..35b3834105f8 100644 --- a/processing/src/main/java/org/apache/druid/segment/vector/NilVectorSelector.java +++ b/processing/src/main/java/org/apache/druid/segment/vector/NilVectorSelector.java @@ -24,6 +24,7 @@ import org.apache.druid.segment.QueryableIndexStorageAdapter; import javax.annotation.Nullable; +import java.util.Arrays; public class NilVectorSelector implements VectorValueSelector, VectorObjectSelector, SingleValueDimensionVectorSelector, IdLookup @@ -82,9 +83,11 @@ public static NilVectorSelector create(final VectorSizeInspector vectorSizeInspe DEFAULT_OBJECT_VECTOR ); } else { + final boolean[] nulls = new boolean[vectorSizeInspector.getMaxVectorSize()]; + Arrays.fill(nulls, NullHandling.sqlCompatible()); return new NilVectorSelector( vectorSizeInspector, - new boolean[vectorSizeInspector.getMaxVectorSize()], + nulls, new int[vectorSizeInspector.getMaxVectorSize()], new long[vectorSizeInspector.getMaxVectorSize()], new float[vectorSizeInspector.getMaxVectorSize()], diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVectorSelectors.java b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVectorSelectors.java index 3ee12ff88564..77c40b551e10 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVectorSelectors.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVectorSelectors.java @@ -24,10 +24,14 @@ import org.apache.druid.math.expr.ExprType; import org.apache.druid.math.expr.ExpressionType; import org.apache.druid.math.expr.InputBindings; +import org.apache.druid.math.expr.vector.CastToTypeVectorProcessor; import org.apache.druid.math.expr.vector.ExprVectorProcessor; +import org.apache.druid.math.expr.vector.VectorProcessors; import org.apache.druid.query.dimension.DefaultDimensionSpec; import org.apache.druid.segment.column.ColumnCapabilities; +import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.vector.ConstantVectorSelectors; +import org.apache.druid.segment.vector.ReadableVectorInspector; import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector; import org.apache.druid.segment.vector.VectorColumnSelectorFactory; import org.apache.druid.segment.vector.VectorObjectSelector; @@ -103,6 +107,45 @@ public static VectorObjectSelector makeVectorObjectSelector( return new ExpressionVectorObjectSelector(processor, bindings); } + public static VectorObjectSelector castValueSelectorToObject( + ReadableVectorInspector inspector, + String columnName, + VectorValueSelector selector, + ColumnType selectorType, + ColumnType castTo + ) + { + ExpressionVectorInputBinding binding = new ExpressionVectorInputBinding(inspector); + binding.addNumeric(columnName, ExpressionType.fromColumnType(selectorType), selector); + return new ExpressionVectorObjectSelector( + CastToTypeVectorProcessor.cast( + VectorProcessors.identifier(binding, columnName), + ExpressionType.fromColumnType(castTo) + ), + binding + ); + } + + public static VectorValueSelector castObjectSelectorToNumeric( + ReadableVectorInspector inspector, + String columnName, + VectorObjectSelector selector, + ColumnType selectorType, + ColumnType castTo + ) + { + Preconditions.checkArgument(castTo.isNumeric(), "Must cast to a numeric type to make a value selector"); + ExpressionVectorInputBinding binding = new ExpressionVectorInputBinding(inspector); + binding.addObjectSelector(columnName, ExpressionType.fromColumnType(selectorType), selector); + return new ExpressionVectorValueSelector( + CastToTypeVectorProcessor.cast( + VectorProcessors.identifier(binding, columnName), + ExpressionType.fromColumnType(castTo) + ), + binding + ); + } + private static Expr.VectorInputBinding createVectorBindings( Expr.BindingAnalysis bindingAnalysis, VectorColumnSelectorFactory vectorColumnSelectorFactory diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/NestedFieldVirtualColumn.java b/processing/src/main/java/org/apache/druid/segment/virtual/NestedFieldVirtualColumn.java index df06eb44f321..2a3d1e08aefb 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/NestedFieldVirtualColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/NestedFieldVirtualColumn.java @@ -24,10 +24,14 @@ import com.fasterxml.jackson.annotation.JsonProperty; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; +import com.google.common.base.Predicate; import com.google.common.primitives.Doubles; +import org.apache.druid.common.config.NullHandling; import org.apache.druid.common.guava.GuavaUtils; +import org.apache.druid.java.util.common.Numbers; import org.apache.druid.query.cache.CacheKeyBuilder; import org.apache.druid.query.dimension.DimensionSpec; +import org.apache.druid.query.filter.ValueMatcher; import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; import org.apache.druid.segment.BaseSingleValueDimensionSelector; import org.apache.druid.segment.ColumnInspector; @@ -35,13 +39,19 @@ import org.apache.druid.segment.ColumnSelectorFactory; import org.apache.druid.segment.ColumnValueSelector; import org.apache.druid.segment.DimensionSelector; +import org.apache.druid.segment.IdLookup; import org.apache.druid.segment.NilColumnValueSelector; import org.apache.druid.segment.VirtualColumn; +import org.apache.druid.segment.column.BaseColumn; import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ColumnCapabilitiesImpl; +import org.apache.druid.segment.column.ColumnHolder; import org.apache.druid.segment.column.ColumnIndexSupplier; import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.column.DictionaryEncodedColumn; import org.apache.druid.segment.column.ValueType; +import org.apache.druid.segment.column.ValueTypes; +import org.apache.druid.segment.data.IndexedInts; import org.apache.druid.segment.data.ReadableOffset; import org.apache.druid.segment.nested.NestedDataComplexColumn; import org.apache.druid.segment.nested.NestedDataComplexTypeSerde; @@ -59,6 +69,7 @@ import org.apache.druid.segment.vector.VectorValueSelector; import javax.annotation.Nullable; +import java.nio.ByteBuffer; import java.util.Collections; import java.util.List; import java.util.Objects; @@ -205,7 +216,7 @@ public DimensionSelector makeDimensionSelector( // this dimension selector is used for realtime queries, nested paths are not themselves dictionary encoded until // written to segment, so we fall back to processing the structured data from a column value selector on the // complex column - ColumnValueSelector valueSelector = makeColumnValueSelector(dimensionSpec.getOutputName(), factory); + ColumnValueSelector valueSelector = makeColumnValueSelector(dimensionSpec.getOutputName(), factory); return new FieldDimensionSelector(valueSelector); } @@ -216,7 +227,7 @@ public ColumnValueSelector makeColumnValueSelector( ) { // this column value selector is used for realtime queries, so we always process StructuredData - final ColumnValueSelector baseSelector = factory.makeColumnValueSelector(this.columnName); + final ColumnValueSelector baseSelector = factory.makeColumnValueSelector(this.columnName); // processFromRaw is true that means JSON_QUERY, which can return partial results, otherwise this virtual column // is JSON_VALUE which only returns literals, so use the literal value selector instead @@ -233,20 +244,40 @@ public DimensionSelector makeDimensionSelector( ReadableOffset offset ) { - final NestedDataComplexColumn column = NestedDataComplexColumn.fromColumnSelector(columnSelector, columnName); - if (column == null) { - // complex column itself didn't exist + ColumnHolder holder = columnSelector.getColumnHolder(columnName); + if (holder == null) { + // column doesn't exist return DimensionSelector.constant(null); } if (hasNegativeArrayIndex) { - return new FieldDimensionSelector( - new RawFieldLiteralColumnValueSelector( - column.makeColumnValueSelector(offset), - parts - ) - ); + // if the path has negative array elements, then we have to use the 'raw' processing of the FieldDimensionSelector + // created with the column selector factory instead of using the optimized nested field column, return null + // to fall through + return null; + } + + BaseColumn theColumn = holder.getColumn(); + if (theColumn instanceof NestedDataComplexColumn) { + final NestedDataComplexColumn column = (NestedDataComplexColumn) theColumn; + return column.makeDimensionSelector(parts, offset, dimensionSpec.getExtractionFn()); + } + + // ok, so not a nested column, but we can still do stuff + if (!parts.isEmpty()) { + // we are being asked for a path that will never exist, so we are null selector + return DimensionSelector.constant(null); + } + + // the path was the 'root', we're in luck, spit out a selector that behaves the same way as a nested column + if (theColumn instanceof DictionaryEncodedColumn) { + final DictionaryEncodedColumn column = (DictionaryEncodedColumn) theColumn; + return new AutoCastingValueSelector(column.makeDimensionSelector(offset, dimensionSpec.getExtractionFn())); } - return column.makeDimensionSelector(parts, offset, dimensionSpec.getExtractionFn()); + return ValueTypes.makeNumericWrappingDimensionSelector( + holder.getCapabilities().getType(), + theColumn.makeColumnValueSelector(offset), + dimensionSpec.getExtractionFn() + ); } @@ -258,18 +289,37 @@ public ColumnValueSelector makeColumnValueSelector( ReadableOffset offset ) { - final NestedDataComplexColumn column = NestedDataComplexColumn.fromColumnSelector(columnSelector, this.columnName); - if (column == null) { + ColumnHolder holder = columnSelector.getColumnHolder(this.columnName); + if (holder == null) { return NilColumnValueSelector.instance(); } + BaseColumn theColumn = holder.getColumn(); - // processFromRaw is true, that means JSON_QUERY, which can return partial results, otherwise this virtual column - // is JSON_VALUE which only returns literals, so we can use the nested columns value selector - return processFromRaw - ? new RawFieldColumnSelector(column.makeColumnValueSelector(offset), parts) - : hasNegativeArrayIndex - ? new RawFieldLiteralColumnValueSelector(column.makeColumnValueSelector(offset), parts) - : column.makeColumnValueSelector(parts, offset); + if (processFromRaw || hasNegativeArrayIndex) { + // if the path has negative array elements, or has set the flag to process 'raw' values explicitly (JSON_QUERY), + // then we use the 'raw' processing of the RawFieldColumnSelector/RawFieldLiteralColumnValueSelector created + // with the column selector factory instead of using the optimized nested field column + return null; + } + + // "JSON_VALUE", which only returns literals, on a NestedDataComplexColumn, so we can use the fields value selector + if (theColumn instanceof NestedDataComplexColumn) { + final NestedDataComplexColumn column = (NestedDataComplexColumn) theColumn; + return column.makeColumnValueSelector(parts, offset); + } + + // ok, so not a nested column, but we can still do stuff + if (!parts.isEmpty()) { + // we are being asked for a path that will never exist, so we are null selector + return NilColumnValueSelector.instance(); + } + + // the path was the 'root', we're in luck, spit out a selector that behaves the same way as a nested column + if (theColumn instanceof DictionaryEncodedColumn) { + final DictionaryEncodedColumn column = (DictionaryEncodedColumn) theColumn; + return new AutoCastingValueSelector(column.makeDimensionSelector(offset, null)); + } + return theColumn.makeColumnValueSelector(offset); } @Override @@ -286,12 +336,26 @@ public SingleValueDimensionVectorSelector makeSingleValueVectorDimensionSelector ReadableVectorOffset offset ) { - final NestedDataComplexColumn column = NestedDataComplexColumn.fromColumnSelector(columnSelector, columnName); - if (column == null) { + ColumnHolder holder = columnSelector.getColumnHolder(columnName); + if (holder == null) { return NilVectorSelector.create(offset); } + BaseColumn theColumn = holder.getColumn(); + + if (theColumn instanceof NestedDataComplexColumn) { + final NestedDataComplexColumn column = (NestedDataComplexColumn) theColumn; + return column.makeSingleValueDimensionVectorSelector(parts, offset); + } - return column.makeSingleValueDimensionVectorSelector(parts, offset); + // not a nested column + if (!parts.isEmpty()) { + // we are being asked for a path that will never exist, so we are null selector + return NilVectorSelector.create(offset); + } + + // we will not end up here unless underlying column capabilities lied about something being dictionary encoded... + // so no need for magic casting like nonvectorized engine + return ((DictionaryEncodedColumn) theColumn).makeSingleValueDimensionVectorSelector(offset); } @Nullable @@ -302,15 +366,36 @@ public VectorObjectSelector makeVectorObjectSelector( ReadableVectorOffset offset ) { - final NestedDataComplexColumn column = NestedDataComplexColumn.fromColumnSelector(columnSelector, this.columnName); - if (column == null) { + ColumnHolder holder = columnSelector.getColumnHolder(this.columnName); + if (holder == null) { return NilVectorSelector.create(offset); } + BaseColumn theColumn = holder.getColumn(); + + // processFromRaw is true, that means JSON_QUERY, which can return partial results, otherwise this virtual column // is JSON_VALUE which only returns literals, so we can use the nested columns value selector - return processFromRaw - ? new RawFieldVectorObjectSelector(column.makeVectorObjectSelector(offset), parts) - : column.makeVectorObjectSelector(parts, offset); + if (theColumn instanceof NestedDataComplexColumn) { + final NestedDataComplexColumn column = (NestedDataComplexColumn) theColumn; + if (processFromRaw) { + return new RawFieldVectorObjectSelector(column.makeVectorObjectSelector(offset), parts); + } + return column.makeVectorObjectSelector(parts, offset); + } + if (!parts.isEmpty()) { + return NilVectorSelector.create(offset); + } + ColumnCapabilities capabilities = holder.getCapabilities(); + if (capabilities.isNumeric()) { + return ExpressionVectorSelectors.castValueSelectorToObject( + offset, + this.columnName, + theColumn.makeVectorValueSelector(offset), + capabilities.toColumnType(), + expectedType + ); + } + return theColumn.makeVectorObjectSelector(offset); } @Nullable @@ -321,11 +406,30 @@ public VectorValueSelector makeVectorValueSelector( ReadableVectorOffset offset ) { - final NestedDataComplexColumn column = NestedDataComplexColumn.fromColumnSelector(columnSelector, this.columnName); - if (column == null) { + ColumnHolder holder = columnSelector.getColumnHolder(this.columnName); + if (holder == null) { + return NilVectorSelector.create(offset); + } + BaseColumn theColumn = holder.getColumn(); + if (!(theColumn instanceof NestedDataComplexColumn)) { + + if (parts.isEmpty()) { + ColumnCapabilities capabilities = holder.getCapabilities(); + if (theColumn instanceof DictionaryEncodedColumn) { + return ExpressionVectorSelectors.castObjectSelectorToNumeric( + offset, + this.columnName, + theColumn.makeVectorObjectSelector(offset), + capabilities.toColumnType(), + expectedType + ); + } + return theColumn.makeVectorValueSelector(offset); + } return NilVectorSelector.create(offset); } + final NestedDataComplexColumn column = (NestedDataComplexColumn) theColumn; // if column is numeric, it has a vector value selector, so we can directly make a vector value selector // if we are missing an expectedType, then we've got nothing else to work with so try it anyway if (column.isNumeric(parts) || expectedType == null) { @@ -482,12 +586,14 @@ public ColumnCapabilities capabilities(String columnName) // JSON_QUERY always returns a StructuredData return ColumnCapabilitiesImpl.createDefault() .setType(NestedDataComplexTypeSerde.TYPE) - .setHasMultipleValues(false); + .setHasMultipleValues(false) + .setHasNulls(true); } // this should only be used for 'realtime' queries, so don't indicate that we are dictionary encoded or have indexes // from here return ColumnCapabilitiesImpl.createDefault() - .setType(expectedType != null ? expectedType : ColumnType.STRING); + .setType(expectedType != null ? expectedType : ColumnType.STRING) + .setHasNulls(expectedType == null || (expectedType.isNumeric() && NullHandling.sqlCompatible())); } @Override @@ -497,7 +603,8 @@ public ColumnCapabilities capabilities(ColumnInspector inspector, String columnN // JSON_QUERY always returns a StructuredData return ColumnCapabilitiesImpl.createDefault() .setType(NestedDataComplexTypeSerde.TYPE) - .setHasMultipleValues(false); + .setHasMultipleValues(false) + .setHasNulls(true); } // ColumnInspector isn't really enough... we need the ability to read the complex column itself to examine // the nested fields type information to really be accurate here, so we rely on the expectedType to guide us @@ -508,7 +615,8 @@ public ColumnCapabilities capabilities(ColumnInspector inspector, String columnN .setDictionaryEncoded(true) .setDictionaryValuesSorted(true) .setDictionaryValuesUnique(true) - .setHasBitmapIndexes(true); + .setHasBitmapIndexes(true) + .setHasNulls(expectedType == null || (expectedType.isNumeric() && NullHandling.sqlCompatible())); } return capabilities(columnName); } @@ -577,30 +685,21 @@ public RawFieldLiteralColumnValueSelector(ColumnValueSelector baseSelector, List public double getDouble() { Object o = getObject(); - if (o instanceof Number) { - return ((Number) o).doubleValue(); - } - return 0.0; + return Numbers.tryParseDouble(o, 0.0); } @Override public float getFloat() { Object o = getObject(); - if (o instanceof Number) { - return ((Number) o).floatValue(); - } - return 0f; + return Numbers.tryParseFloat(o, 0.0f); } @Override public long getLong() { Object o = getObject(); - if (o instanceof Number) { - return ((Number) o).longValue(); - } - return 0L; + return Numbers.tryParseLong(o, 0L); } @Override @@ -613,7 +712,8 @@ public void inspectRuntimeShape(RuntimeShapeInspector inspector) @Override public boolean isNull() { - return !(getObject() instanceof Number); + Object o = getObject(); + return !(o instanceof Number || (o instanceof String && Doubles.tryParse((String) o) != null)); } @Nullable @@ -650,8 +750,8 @@ public RawFieldColumnSelector(ColumnValueSelector baseSelector, List valueSelector; - public FieldDimensionSelector(ColumnValueSelector valueSelector) + public FieldDimensionSelector(ColumnValueSelector valueSelector) { this.valueSelector = valueSelector; } @@ -780,4 +884,133 @@ protected String getValue() return String.valueOf(val); } } + + /** + * DimensionSelector that implements implicit casting when used via + */ + private static class AutoCastingValueSelector implements DimensionSelector + { + private final DimensionSelector baseSelector; + + public AutoCastingValueSelector(DimensionSelector baseSelector) + { + this.baseSelector = baseSelector; + } + + @Override + public IndexedInts getRow() + { + return baseSelector.getRow(); + } + + @Override + public ValueMatcher makeValueMatcher(@Nullable String value) + { + return baseSelector.makeValueMatcher(value); + } + + @Override + public ValueMatcher makeValueMatcher(Predicate predicate) + { + return baseSelector.makeValueMatcher(predicate); + } + + @Override + public void inspectRuntimeShape(RuntimeShapeInspector inspector) + { + baseSelector.inspectRuntimeShape(inspector); + } + + @Nullable + @Override + public Object getObject() + { + return baseSelector.getObject(); + } + + @Override + public Class classOfObject() + { + return baseSelector.classOfObject(); + } + + @Override + public int getValueCardinality() + { + return baseSelector.getValueCardinality(); + } + + @Nullable + @Override + public String lookupName(int id) + { + return baseSelector.lookupName(id); + } + + @Nullable + @Override + public ByteBuffer lookupNameUtf8(int id) + { + return baseSelector.lookupNameUtf8(id); + } + + @Override + public boolean supportsLookupNameUtf8() + { + return baseSelector.supportsLookupNameUtf8(); + } + + @Override + public float getFloat() + { + final IndexedInts row = getRow(); + if (row.size() != 1) { + return 0f; + } + return Numbers.tryParseFloat(lookupName(row.get(0)), 0f); + } + + @Override + public double getDouble() + { + final IndexedInts row = getRow(); + if (row.size() != 1) { + return 0.0; + } + return Numbers.tryParseDouble(lookupName(row.get(0)), 0.0); + } + + @Override + public long getLong() + { + final IndexedInts row = getRow(); + if (row.size() != 1) { + return 0L; + } + return Numbers.tryParseLong(lookupName(row.get(0)), 0L); + } + + @Override + public boolean isNull() + { + final IndexedInts row = getRow(); + if (row.size() != 1) { + return true; + } + return Doubles.tryParse(lookupName(row.get(0))) == null; + } + + @Override + public boolean nameLookupPossibleInAdvance() + { + return baseSelector.nameLookupPossibleInAdvance(); + } + + @Nullable + @Override + public IdLookup idLookup() + { + return baseSelector.idLookup(); + } + } } diff --git a/processing/src/test/java/org/apache/druid/query/groupby/NestedDataGroupByQueryTest.java b/processing/src/test/java/org/apache/druid/query/groupby/NestedDataGroupByQueryTest.java index dac3295e3caa..9f33735c03da 100644 --- a/processing/src/test/java/org/apache/druid/query/groupby/NestedDataGroupByQueryTest.java +++ b/processing/src/test/java/org/apache/druid/query/groupby/NestedDataGroupByQueryTest.java @@ -50,7 +50,6 @@ import org.junit.Before; import org.junit.Rule; import org.junit.Test; -import org.junit.rules.ExpectedException; import org.junit.rules.TemporaryFolder; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; @@ -65,8 +64,6 @@ public class NestedDataGroupByQueryTest extends InitializedNullHandlingTest { private static final Logger LOG = new Logger(NestedDataGroupByQueryTest.class); - @Rule - public ExpectedException expectedException = ExpectedException.none(); @Rule public final TemporaryFolder tempFolder = new TemporaryFolder(); @@ -78,8 +75,6 @@ public class NestedDataGroupByQueryTest extends InitializedNullHandlingTest private final TrinaryFn> segmentsGenerator; private final String segmentsName; - private boolean cannotVectorize = false; - public NestedDataGroupByQueryTest( GroupByQueryConfig config, TrinaryFn> segmentGenerator, @@ -103,7 +98,7 @@ public Map getContext() { return ImmutableMap.of( QueryContexts.VECTORIZE_KEY, vectorize.toString(), - QueryContexts.VECTORIZE_VIRTUAL_COLUMNS_KEY, "true" + QueryContexts.VECTORIZE_VIRTUAL_COLUMNS_KEY, vectorize.toString() ); } @@ -127,19 +122,6 @@ public static Collection constructorFeeder() @Before public void setup() { - if (!"segments".equals(segmentsName)) { - if (GroupByStrategySelector.STRATEGY_V1.equals(config.getDefaultStrategy())) { - expectedException.expect(RuntimeException.class); - expectedException.expectMessage( - "GroupBy v1 does not support dimension selectors with unknown cardinality." - ); - } else if (vectorize == QueryContexts.Vectorize.FORCE) { - expectedException.expect(RuntimeException.class); - expectedException.expectMessage( - "Cannot vectorize!" - ); - } - } } @After @@ -162,12 +144,8 @@ public void testGroupBySomeField() .build(); - Sequence seq = helper.runQueryOnSegmentsObjs(segmentsGenerator.apply(helper, tempFolder, closer), groupQuery); - - List results = seq.toList(); - verifyResults( - groupQuery.getResultRowSignature(), - results, + runResults( + groupQuery, ImmutableList.of( new Object[]{null, 8L}, new Object[]{"100", 2L}, @@ -177,6 +155,51 @@ public void testGroupBySomeField() ); } + @Test + public void testGroupByRegularColumns() + { + GroupByQuery groupQuery = GroupByQuery.builder() + .setDataSource("test_datasource") + .setGranularity(Granularities.ALL) + .setInterval(Intervals.ETERNITY) + .setDimensions( + DefaultDimensionSpec.of("v0"), + DefaultDimensionSpec.of("v1"), + new DefaultDimensionSpec("v2", "v2", ColumnType.LONG), + new DefaultDimensionSpec("v3", "v3", ColumnType.LONG), + new DefaultDimensionSpec("v4", "v4", ColumnType.STRING), + new DefaultDimensionSpec("v5", "v5", ColumnType.LONG) + ) + .setVirtualColumns( + new NestedFieldVirtualColumn("dim", "$", "v0", ColumnType.STRING), + new NestedFieldVirtualColumn("dim", "$.x", "v1", ColumnType.STRING), + new NestedFieldVirtualColumn("dim", "$", "v2", ColumnType.LONG), + new NestedFieldVirtualColumn("count", "$", "v3", ColumnType.LONG), + new NestedFieldVirtualColumn("count", "$", "v4", ColumnType.STRING), + new NestedFieldVirtualColumn("count", "$.x", "v5", ColumnType.LONG) + ) + .setAggregatorSpecs(new CountAggregatorFactory("count")) + .setContext(getContext()) + .build(); + + runResults( + groupQuery, + NullHandling.replaceWithDefault() + ? ImmutableList.of( + new Object[]{"100", null, 100L, 1L, "1", 0L, 2L}, + new Object[]{"hello", null, 0L, 1L, "1", 0L, 12L}, + new Object[]{"world", null, 0L, 1L, "1", 0L, 2L} + ) + : ImmutableList.of( + new Object[]{"100", null, 100L, 1L, "1", null, 2L}, + new Object[]{"hello", null, null, 1L, "1", null, 12L}, + new Object[]{"world", null, null, 1L, "1", null, 2L} + ), + "incremental".equals(segmentsName), + true + ); + } + @Test public void testGroupBySomeFieldWithFilter() { @@ -197,12 +220,8 @@ public void testGroupBySomeFieldWithFilter() .build(); - Sequence seq = helper.runQueryOnSegmentsObjs(segmentsGenerator.apply(helper, tempFolder, closer), groupQuery); - - List results = seq.toList(); - verifyResults( - groupQuery.getResultRowSignature(), - results, + runResults( + groupQuery, ImmutableList.of( new Object[]{null, 8L}, new Object[]{"100", 2L}, @@ -232,14 +251,7 @@ public void testGroupByNoFieldWithFilter() .build(); - Sequence seq = helper.runQueryOnSegmentsObjs(segmentsGenerator.apply(helper, tempFolder, closer), groupQuery); - - List results = seq.toList(); - verifyResults( - groupQuery.getResultRowSignature(), - results, - ImmutableList.of(new Object[]{null, 16L}) - ); + runResults(groupQuery, ImmutableList.of(new Object[]{null, 16L})); } @Test @@ -266,26 +278,12 @@ public void testGroupBySomeFieldWithNonExistentAgg() .build(); - Sequence seq = helper.runQueryOnSegmentsObjs(segmentsGenerator.apply(helper, tempFolder, closer), groupQuery); - - List results = seq.toList(); - - verifyResults( - groupQuery.getResultRowSignature(), - results, - ImmutableList.of(new Object[]{null, NullHandling.defaultLongValue()}) - ); + runResults(groupQuery, ImmutableList.of(new Object[]{null, NullHandling.defaultLongValue()})); } @Test public void testGroupByNonExistentVirtualColumn() { - if (GroupByStrategySelector.STRATEGY_V1.equals(config.getDefaultStrategy())) { - expectedException.expect(RuntimeException.class); - expectedException.expectMessage( - "GroupBy v1 does not support dimension selectors with unknown cardinality." - ); - } GroupByQuery groupQuery = GroupByQuery.builder() .setDataSource("test_datasource") .setGranularity(Granularities.ALL) @@ -304,19 +302,86 @@ public void testGroupByNonExistentVirtualColumn() .setContext(getContext()) .build(); - - Sequence seq = helper.runQueryOnSegmentsObjs(segmentsGenerator.apply(helper, tempFolder, closer), groupQuery); - - List results = seq.toList(); - verifyResults( - groupQuery.getResultRowSignature(), - results, + runResults( + groupQuery, NullHandling.sqlCompatible() ? ImmutableList.of(new Object[]{null, 16L}) - : ImmutableList.of(new Object[]{"foo", 16L}) + : ImmutableList.of(new Object[]{"foo", 16L}), + true, + false ); } + private void runResults(GroupByQuery groupQuery, List expectedResults) + { + runResults(groupQuery, expectedResults, false, false); + } + + private void runResults(GroupByQuery groupQuery, List expectedResults, boolean hasUnknownCardinality, boolean hasNonStringOutput) + { + if (GroupByStrategySelector.STRATEGY_V1.equals(config.getDefaultStrategy())) { + if (hasUnknownCardinality) { + Throwable t = Assert.assertThrows( + RuntimeException.class, + () -> helper.runQueryOnSegmentsObjs(segmentsGenerator.apply(helper, tempFolder, closer), groupQuery).toList() + ); + Assert.assertEquals( + "java.lang.UnsupportedOperationException: GroupBy v1 does not support dimension selectors with unknown cardinality.", + t.getMessage() + ); + return; + } + if (hasNonStringOutput) { + Throwable t = Assert.assertThrows( + RuntimeException.class, + () -> helper.runQueryOnSegmentsObjs(segmentsGenerator.apply(helper, tempFolder, closer), groupQuery).toList() + ); + Assert.assertEquals( + "java.lang.UnsupportedOperationException: GroupBy v1 only supports dimensions with an outputType of STRING.", + t.getMessage() + ); + return; + } + } + if (!"segments".equals(segmentsName)) { + if (GroupByStrategySelector.STRATEGY_V1.equals(config.getDefaultStrategy())) { + Throwable t = Assert.assertThrows( + RuntimeException.class, + () -> helper.runQueryOnSegmentsObjs(segmentsGenerator.apply(helper, tempFolder, closer), groupQuery) + .toList() + ); + Assert.assertEquals( + "java.lang.UnsupportedOperationException: GroupBy v1 does not support dimension selectors with unknown cardinality.", + t.getMessage() + ); + } else if (vectorize == QueryContexts.Vectorize.FORCE) { + Throwable t = Assert.assertThrows( + RuntimeException.class, + () -> helper.runQueryOnSegmentsObjs(segmentsGenerator.apply(helper, tempFolder, closer), groupQuery) + .toList() + ); + Assert.assertEquals( + "java.util.concurrent.ExecutionException: java.lang.RuntimeException: org.apache.druid.java.util.common.ISE: Cannot vectorize!", + t.getMessage() + ); + return; + } + } else { + + Sequence seq = helper.runQueryOnSegmentsObjs( + segmentsGenerator.apply(helper, tempFolder, closer), + groupQuery + ); + + List results = seq.toList(); + verifyResults( + groupQuery.getResultRowSignature(), + results, + expectedResults + ); + } + } + private static void verifyResults(RowSignature rowSignature, List results, List expected) { LOG.info("results:\n%s", results); diff --git a/processing/src/test/java/org/apache/druid/query/scan/NestedDataScanQueryTest.java b/processing/src/test/java/org/apache/druid/query/scan/NestedDataScanQueryTest.java index 9ac5a70b72f3..268c19bf9177 100644 --- a/processing/src/test/java/org/apache/druid/query/scan/NestedDataScanQueryTest.java +++ b/processing/src/test/java/org/apache/druid/query/scan/NestedDataScanQueryTest.java @@ -160,7 +160,11 @@ public void testIngestAndScanSegmentsRealtime() throws Exception new NestedFieldVirtualColumn("nest", "$.x", "x"), new NestedFieldVirtualColumn("nester", "$.x[0]", "x_0"), new NestedFieldVirtualColumn("nester", "$.y.c[1]", "y_c_1"), - new NestedFieldVirtualColumn("nester", "$.", "nester_root") + new NestedFieldVirtualColumn("nester", "$.", "nester_root"), + new NestedFieldVirtualColumn("dim", "$", "dim_root"), + new NestedFieldVirtualColumn("dim", "$.x", "dim_path"), + new NestedFieldVirtualColumn("count", "$", "count_root"), + new NestedFieldVirtualColumn("count", "$.x", "count_path") ) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) .limit(100) diff --git a/processing/src/test/java/org/apache/druid/segment/nested/NestedDataColumnSupplierTest.java b/processing/src/test/java/org/apache/druid/segment/nested/NestedDataColumnSupplierTest.java index ce3e40e6556e..38764b093af4 100644 --- a/processing/src/test/java/org/apache/druid/segment/nested/NestedDataColumnSupplierTest.java +++ b/processing/src/test/java/org/apache/druid/segment/nested/NestedDataColumnSupplierTest.java @@ -172,7 +172,7 @@ public void testBasicFunctionality() throws IOException { ColumnBuilder bob = new ColumnBuilder(); bob.setFileMapper(fileMapper); - NestedDataColumnSupplier supplier = new NestedDataColumnSupplier( + NestedDataColumnSupplier supplier = NestedDataColumnSupplier.read( baseBuffer, bob, () -> 0, @@ -191,7 +191,7 @@ public void testConcurrency() throws ExecutionException, InterruptedException // if this test ever starts being to be a flake, there might be thread safety issues ColumnBuilder bob = new ColumnBuilder(); bob.setFileMapper(fileMapper); - NestedDataColumnSupplier supplier = new NestedDataColumnSupplier( + NestedDataColumnSupplier supplier = NestedDataColumnSupplier.read( baseBuffer, bob, () -> 0, diff --git a/processing/src/test/java/org/apache/druid/segment/vector/NilVectorSelectorTest.java b/processing/src/test/java/org/apache/druid/segment/vector/NilVectorSelectorTest.java new file mode 100644 index 000000000000..2f4c93e862ed --- /dev/null +++ b/processing/src/test/java/org/apache/druid/segment/vector/NilVectorSelectorTest.java @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.vector; + +import org.apache.druid.collections.bitmap.WrappedRoaringBitmap; +import org.apache.druid.common.config.NullHandling; +import org.apache.druid.segment.QueryableIndexStorageAdapter; +import org.apache.druid.testing.InitializedNullHandlingTest; +import org.junit.Assert; +import org.junit.Test; + +import java.util.concurrent.ThreadLocalRandom; + +public class NilVectorSelectorTest extends InitializedNullHandlingTest +{ + private static final int NUM_ROWS = 10_000; + + @Test + public void testDefaultSizedVector() + { + testVectorSize(QueryableIndexStorageAdapter.DEFAULT_VECTOR_SIZE); + } + + @Test + public void testCustomSizeVector() + { + testVectorSize(1024); + } + + private static void testVectorSize(int vectorSize) + { + NoFilterVectorOffset offset = new NoFilterVectorOffset(vectorSize, 0, NUM_ROWS); + testOffset(offset); + WrappedRoaringBitmap bitmap = new WrappedRoaringBitmap(); + int numSet = 0; + for (int i = 0; i < NUM_ROWS; i++) { + if (ThreadLocalRandom.current().nextDouble() > 0.2) { + bitmap.add(i); + numSet++; + } + } + BitmapVectorOffset bitmapOffset = new BitmapVectorOffset(vectorSize, bitmap.toImmutableBitmap(), 0, NUM_ROWS); + testOffset(bitmapOffset); + } + + private static void testOffset(VectorOffset offset) + { + NilVectorSelector nil = NilVectorSelector.create(offset); + while (!offset.isDone()) { + final int[] dict = nil.getRowVector(); + final long[] longs = nil.getLongVector(); + final double[] doubles = nil.getDoubleVector(); + final float[] floats = nil.getFloatVector(); + final boolean[] nulls = nil.getNullVector(); + final Object[] objects = nil.getObjectVector(); + + for (int i = 0; i < offset.getCurrentVectorSize(); i++) { + Assert.assertEquals(0, dict[i]); + Assert.assertEquals(0L, longs[i]); + Assert.assertEquals(0.0, doubles[i], 0.0); + Assert.assertEquals(0f, floats[i], 0.0); + Assert.assertEquals(NullHandling.sqlCompatible(), nulls[i]); + Assert.assertNull(objects[i]); + } + offset.advance(); + } + } +} diff --git a/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVectorSelectorsCastTest.java b/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVectorSelectorsCastTest.java new file mode 100644 index 000000000000..e13820d4cc2e --- /dev/null +++ b/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVectorSelectorsCastTest.java @@ -0,0 +1,209 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.virtual; + +import com.google.common.collect.ImmutableList; +import org.apache.druid.java.util.common.granularity.Granularities; +import org.apache.druid.java.util.common.io.Closer; +import org.apache.druid.math.expr.ExpressionType; +import org.apache.druid.query.expression.TestExprMacroTable; +import org.apache.druid.segment.QueryableIndex; +import org.apache.druid.segment.QueryableIndexStorageAdapter; +import org.apache.druid.segment.VirtualColumns; +import org.apache.druid.segment.column.ColumnCapabilities; +import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.column.ValueType; +import org.apache.druid.segment.generator.GeneratorBasicSchemas; +import org.apache.druid.segment.generator.GeneratorSchemaInfo; +import org.apache.druid.segment.generator.SegmentGenerator; +import org.apache.druid.segment.vector.VectorCursor; +import org.apache.druid.segment.vector.VectorObjectSelector; +import org.apache.druid.segment.vector.VectorValueSelector; +import org.apache.druid.timeline.DataSegment; +import org.apache.druid.timeline.partition.LinearShardSpec; +import org.junit.AfterClass; +import org.junit.Assert; +import org.junit.BeforeClass; +import org.junit.Test; + +import java.io.IOException; + +public class ExpressionVectorSelectorsCastTest +{ + private static final int ROWS_PER_SEGMENT = 10_000; + + private static QueryableIndex INDEX; + private static Closer CLOSER; + + @BeforeClass + public static void setupClass() + { + CLOSER = Closer.create(); + + final GeneratorSchemaInfo schemaInfo = GeneratorBasicSchemas.SCHEMA_MAP.get("expression-testbench"); + + final DataSegment dataSegment = DataSegment.builder() + .dataSource("foo") + .interval(schemaInfo.getDataInterval()) + .version("1") + .shardSpec(new LinearShardSpec(0)) + .size(0) + .build(); + + final SegmentGenerator segmentGenerator = CLOSER.register(new SegmentGenerator()); + INDEX = CLOSER.register( + segmentGenerator.generate(dataSegment, schemaInfo, Granularities.HOUR, ROWS_PER_SEGMENT) + ); + } + + @AfterClass + public static void teardownClass() throws IOException + { + CLOSER.close(); + } + + @Test + public void testCastObjectSelectorToValueSelector() + { + testCast(INDEX, "string1", ColumnType.LONG, CLOSER); + testCast(INDEX, "string2", ColumnType.DOUBLE, CLOSER); + testCast(INDEX, "string3", ColumnType.FLOAT, CLOSER); + } + + @Test + public void testCastValueSelectorSelectorToObjectSelector() + { + testCast(INDEX, "long1", ColumnType.STRING, CLOSER); + testCast(INDEX, "long2", ColumnType.STRING, CLOSER); + testCast(INDEX, "double1", ColumnType.STRING, CLOSER); + testCast(INDEX, "double2", ColumnType.STRING, CLOSER); + testCast(INDEX, "float1", ColumnType.STRING, CLOSER); + testCast(INDEX, "float2", ColumnType.STRING, CLOSER); + } + + public static void testCast( + QueryableIndex index, + String column, + ColumnType castTo, + Closer closer + ) + { + final VirtualColumns virtualColumns = VirtualColumns.create( + ImmutableList.of( + new ExpressionVirtualColumn( + "v", + "cast(" + column + ", '" + ExpressionType.fromColumnType(castTo) + "')", + castTo, + TestExprMacroTable.INSTANCE + ) + ) + ); + final QueryableIndexStorageAdapter storageAdapter = new QueryableIndexStorageAdapter(index); + VectorCursor cursor = storageAdapter.makeVectorCursor( + null, + index.getDataInterval(), + virtualColumns, + false, + 512, + null + ); + closer.register(cursor); + + ColumnCapabilities capabilities = INDEX.getColumnCapabilities(column); + + if (capabilities.isNumeric() && castTo.is(ValueType.STRING)) { + // numeric -> string + verifyNumericToString(column, castTo, cursor); + } else { + // string -> numeric + verifyStringToNumeric(column, castTo, cursor); + } + } + + private static void verifyStringToNumeric(String column, ColumnType castTo, VectorCursor cursor) + { + VectorValueSelector selector = cursor.getColumnSelectorFactory().makeValueSelector("v"); + VectorValueSelector castSelector = ExpressionVectorSelectors.castObjectSelectorToNumeric( + cursor.getColumnSelectorFactory().getReadableVectorInspector(), + column, + cursor.getColumnSelectorFactory().makeObjectSelector(column), + ColumnType.STRING, + castTo + ); + while (!cursor.isDone()) { + boolean[] nulls; + boolean[] castNulls; + switch (castTo.getType()) { + case LONG: + nulls = selector.getNullVector(); + castNulls = castSelector.getNullVector(); + long[] longs = selector.getLongVector(); + long[] castLongs = castSelector.getLongVector(); + for (int i = 0; i < selector.getCurrentVectorSize(); i++) { + if (nulls != null) { + Assert.assertEquals(nulls[i], castNulls[i]); + } + Assert.assertEquals(longs[i], castLongs[i]); + } + break; + case DOUBLE: + nulls = selector.getNullVector(); + castNulls = selector.getNullVector(); + double[] doubles = selector.getDoubleVector(); + double[] castDoubles = castSelector.getDoubleVector(); + for (int i = 0; i < selector.getCurrentVectorSize(); i++) { + if (nulls != null) { + Assert.assertEquals(nulls[i], castNulls[i]); + } + Assert.assertEquals(doubles[i], castDoubles[i], 0.0); + } + break; + } + + cursor.advance(); + } + } + + private static void verifyNumericToString(String column, ColumnType castTo, VectorCursor cursor) + { + VectorObjectSelector objectSelector = cursor.getColumnSelectorFactory().makeObjectSelector("v"); + VectorObjectSelector castSelector = ExpressionVectorSelectors.castValueSelectorToObject( + cursor.getColumnSelectorFactory().getReadableVectorInspector(), + column, + cursor.getColumnSelectorFactory().makeValueSelector(column), + cursor.getColumnSelectorFactory().getColumnCapabilities(column).toColumnType(), + castTo + ); + while (!cursor.isDone()) { + switch (castTo.getType()) { + case STRING: + Object[] objects = objectSelector.getObjectVector(); + Object[] otherObjects = castSelector.getObjectVector(); + Assert.assertEquals(objectSelector.getCurrentVectorSize(), castSelector.getCurrentVectorSize()); + for (int i = 0; i < objectSelector.getCurrentVectorSize(); i++) { + Assert.assertEquals(objects[i], otherObjects[i]); + } + break; + } + + cursor.advance(); + } + } +} diff --git a/processing/src/test/resources/simple-nested-test-data.json b/processing/src/test/resources/simple-nested-test-data.json index 17e16ce27361..3def4206ac9c 100644 --- a/processing/src/test/resources/simple-nested-test-data.json +++ b/processing/src/test/resources/simple-nested-test-data.json @@ -1,7 +1,7 @@ {"timestamp": "2021-01-01", "dim": "hello", "nest": {"x": 100, "y": 200, "z": 300}, "nester":{ "x": ["a", "b", "c"], "y": {"a": "a", "b": "b", "c": [1, 2, 3]}}, "variant": {"a": ["hello", "world"], "b": {"x": "hello", "y": "world"}}, "list":[{"x": 5, "y": 10}, {"x": 15, "y": 22}]} {"timestamp": "2021-01-01", "dim": "hello", "nester":{ "x": ["x", "y", "z"]}, "list":[{"x": 35, "y": 310}, {"x": 315, "y": 322}]} {"timestamp": "2021-01-01", "dim": "hello", "nest":{ "x": 300, "y": 800}, "nester": "hello"} -{"timestamp": "2021-01-01", "dim": "hello", "nest":{ "y": 500}, "list":[{"x": 115, "y": 410}, {"x": 415, "y": 422}]} +{"timestamp": "2021-01-01", "dim": "100", "nest":{ "y": 500}, "list":[{"x": 115, "y": 410}, {"x": 415, "y": 422}]} {"timestamp": "2021-01-02", "dim": "world", "nest": {"x": 200, "y": 100, "z": 101}, "nester":{ "x": ["x", "y", "z"], "y": {"a": "b", "b": "c", "c": [4, 5, 6]}}, "variant": {"b": ["hello", "world"], "c": {"x": ["hello"], "y": "world"}}} {"timestamp": "2021-01-02", "dim": "hello", "nester":{ "x": ["x", "y", "z"]}} {"timestamp": "2021-01-02", "dim": "hello", "nest":{ "x": 300, "y": 800}, "nester": 1} diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/schema/SegmentMetadataCache.java b/sql/src/main/java/org/apache/druid/sql/calcite/schema/SegmentMetadataCache.java index f9181e0914a8..2e5aa9f58bfb 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/schema/SegmentMetadataCache.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/schema/SegmentMetadataCache.java @@ -54,6 +54,7 @@ import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.RowSignature; import org.apache.druid.segment.join.JoinableFactory; +import org.apache.druid.segment.nested.NestedDataComplexTypeSerde; import org.apache.druid.server.QueryLifecycleFactory; import org.apache.druid.server.SegmentManager; import org.apache.druid.server.coordination.DruidServerMetadata; @@ -103,6 +104,7 @@ public class SegmentMetadataCache private static final EmittingLogger log = new EmittingLogger(SegmentMetadataCache.class); private static final int MAX_SEGMENTS_PER_QUERY = 15000; private static final long DEFAULT_NUM_ROWS = 0; + private static final Interner ROW_SIGNATURE_INTERNER = Interners.newWeakInterner(); private final QueryLifecycleFactory queryLifecycleFactory; private final SegmentMetadataCacheConfig config; @@ -119,8 +121,6 @@ public class SegmentMetadataCache */ private final ConcurrentMap tables = new ConcurrentHashMap<>(); - private static final Interner ROW_SIGNATURE_INTERNER = Interners.newWeakInterner(); - /** * DataSource -> Segment -> AvailableSegmentMetadata(contains RowSignature) for that segment. * Use SortedMap for segments so they are merged in deterministic order, from older to newer. @@ -798,7 +798,20 @@ DatasourceTable.PhysicalDatasourceMetadata buildDruidTable(final String dataSour rowSignature.getColumnType(column) .orElseThrow(() -> new ISE("Encountered null type for column [%s]", column)); - columnTypes.putIfAbsent(column, columnType); + columnTypes.compute(column, (c, existingType) -> { + if (existingType == null) { + return columnType; + } + if (columnType == null) { + return existingType; + } + // if any are json, are all json + if (NestedDataComplexTypeSerde.TYPE.equals(columnType) || NestedDataComplexTypeSerde.TYPE.equals(existingType)) { + return NestedDataComplexTypeSerde.TYPE; + } + // "existing type" is the 'newest' type, since we iterate the segments list by newest start time + return existingType; + }); } } } diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java index a8d0c94df73e..288fa6c0c2f2 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java @@ -28,7 +28,9 @@ import org.apache.druid.data.input.impl.DimensionSchema; import org.apache.druid.data.input.impl.DimensionsSpec; import org.apache.druid.data.input.impl.InputRowParser; +import org.apache.druid.data.input.impl.LongDimensionSchema; import org.apache.druid.data.input.impl.MapInputRowParser; +import org.apache.druid.data.input.impl.StringDimensionSchema; import org.apache.druid.data.input.impl.TimeAndDimsParseSpec; import org.apache.druid.data.input.impl.TimestampSpec; import org.apache.druid.guice.DruidInjectorBuilder; @@ -76,11 +78,14 @@ public class CalciteNestedDataQueryTest extends BaseCalciteQueryTest { private static final String DATA_SOURCE = "nested"; + private static final String DATA_SOURCE_MIXED = "nested_mix"; + private static final String DATA_SOURCE_MIXED_2 = "nested_mix_2"; private static final List> RAW_ROWS = ImmutableList.of( ImmutableMap.builder() .put("t", "2000-01-01") .put("string", "aaa") + .put("string_sparse", "zzz") .put("nest", ImmutableMap.of("x", 100L, "y", 2.02, "z", "300", "mixed", 1L, "mixed2", "1")) .put( "nester", @@ -103,6 +108,7 @@ public class CalciteNestedDataQueryTest extends BaseCalciteQueryTest ImmutableMap.builder() .put("t", "2000-01-01") .put("string", "ddd") + .put("string_sparse", "yyy") .put("long", 2L) .build(), ImmutableMap.builder() @@ -126,6 +132,7 @@ public class CalciteNestedDataQueryTest extends BaseCalciteQueryTest .build() ); + private static final InputRowParser> PARSER = new MapInputRowParser( new TimeAndDimsParseSpec( new TimestampSpec("t", "iso", null), @@ -135,9 +142,26 @@ public class CalciteNestedDataQueryTest extends BaseCalciteQueryTest .add(new NestedDataDimensionSchema("nest")) .add(new NestedDataDimensionSchema("nester")) .add(new NestedDataDimensionSchema("long")) + .add(new NestedDataDimensionSchema("string_sparse")) + .build() + ).build() + ) + ); + + private static final InputRowParser> PARSER_MIX = new MapInputRowParser( + new TimeAndDimsParseSpec( + new TimestampSpec("t", "iso", null), + DimensionsSpec.builder().setDimensions( + ImmutableList.builder() + .add(new StringDimensionSchema("string")) + .add(new NestedDataDimensionSchema("nest")) + .add(new NestedDataDimensionSchema("nester")) + .add(new LongDimensionSchema("long")) + .add(new StringDimensionSchema("string_sparse")) .build() ).build() - )); + ) + ); private static final List ROWS = RAW_ROWS.stream().map(raw -> TestDataBuilder.createRow(raw, PARSER)).collect(Collectors.toList()); @@ -174,7 +198,72 @@ public SpecificSegmentsQuerySegmentWalker createQuerySegmentWalker( .rows(ROWS) .buildMMappedIndex(); - return new SpecificSegmentsQuerySegmentWalker(conglomerate).add( + final QueryableIndex indexMix11 = + IndexBuilder.create() + .tmpDir(temporaryFolder.newFolder()) + .segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance()) + .schema( + new IncrementalIndexSchema.Builder() + .withMetrics( + new CountAggregatorFactory("cnt") + ) + .withDimensionsSpec(PARSER) + .withRollup(false) + .build() + ) + .rows(ROWS) + .buildMMappedIndex(); + + final QueryableIndex indexMix12 = + IndexBuilder.create() + .tmpDir(temporaryFolder.newFolder()) + .segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance()) + .schema( + new IncrementalIndexSchema.Builder() + .withMetrics( + new CountAggregatorFactory("cnt") + ) + .withDimensionsSpec(PARSER_MIX) + .withRollup(false) + .build() + ) + .rows(ROWS) + .buildMMappedIndex(); + + final QueryableIndex indexMix21 = + IndexBuilder.create() + .tmpDir(temporaryFolder.newFolder()) + .segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance()) + .schema( + new IncrementalIndexSchema.Builder() + .withMetrics( + new CountAggregatorFactory("cnt") + ) + .withDimensionsSpec(PARSER_MIX) + .withRollup(false) + .build() + ) + .rows(ROWS) + .buildMMappedIndex(); + + final QueryableIndex indexMix22 = + IndexBuilder.create() + .tmpDir(temporaryFolder.newFolder()) + .segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance()) + .schema( + new IncrementalIndexSchema.Builder() + .withMetrics( + new CountAggregatorFactory("cnt") + ) + .withDimensionsSpec(PARSER) + .withRollup(false) + .build() + ) + .rows(ROWS) + .buildMMappedIndex(); + + SpecificSegmentsQuerySegmentWalker walker = new SpecificSegmentsQuerySegmentWalker(conglomerate); + walker.add( DataSegment.builder() .dataSource(DATA_SOURCE) .interval(index.getDataInterval()) @@ -183,7 +272,45 @@ public SpecificSegmentsQuerySegmentWalker createQuerySegmentWalker( .size(0) .build(), index + ).add( + DataSegment.builder() + .dataSource(DATA_SOURCE_MIXED) + .interval(index.getDataInterval()) + .version("1") + .shardSpec(new LinearShardSpec(0)) + .size(0) + .build(), + indexMix11 + ).add( + DataSegment.builder() + .dataSource(DATA_SOURCE_MIXED) + .interval(index.getDataInterval()) + .version("1") + .shardSpec(new LinearShardSpec(1)) + .size(0) + .build(), + indexMix12 + ).add( + DataSegment.builder() + .dataSource(DATA_SOURCE_MIXED_2) + .interval(index.getDataInterval()) + .version("1") + .shardSpec(new LinearShardSpec(0)) + .size(0) + .build(), + indexMix21 + ).add( + DataSegment.builder() + .dataSource(DATA_SOURCE_MIXED_2) + .interval(index.getDataInterval()) + .version("1") + .shardSpec(new LinearShardSpec(1)) + .size(0) + .build(), + indexMix22 ); + + return walker; } @Test @@ -443,6 +570,218 @@ public void testGroupByRootSingleTypeString() ); } + @Test + public void testGroupByRootSingleTypeLongMixed1() + { + testQuery( + "SELECT " + + "long, " + + "SUM(cnt) " + + "FROM druid.nested_mix GROUP BY 1", + ImmutableList.of( + GroupByQuery.builder() + .setDataSource(DATA_SOURCE_MIXED) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setDimensions( + dimensions( + new DefaultDimensionSpec("long", "d0", ColumnType.LONG) + ) + ) + .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{1L, 2L}, + new Object[]{2L, 4L}, + new Object[]{3L, 2L}, + new Object[]{4L, 2L}, + new Object[]{5L, 4L} + ), + RowSignature.builder() + .add("long", ColumnType.LONG) + .add("EXPR$1", ColumnType.LONG) + .build() + ); + } + + @Test + public void testGroupByRootSingleTypeStringMixed1() + { + testQuery( + "SELECT " + + "string, " + + "SUM(cnt) " + + "FROM druid.nested_mix GROUP BY 1", + ImmutableList.of( + GroupByQuery.builder() + .setDataSource(DATA_SOURCE_MIXED) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setDimensions( + dimensions( + new DefaultDimensionSpec("string", "d0") + ) + ) + .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{"aaa", 4L}, + new Object[]{"bbb", 2L}, + new Object[]{"ccc", 2L}, + new Object[]{"ddd", 4L}, + new Object[]{"eee", 2L} + ), + RowSignature.builder() + .add("string", ColumnType.STRING) + .add("EXPR$1", ColumnType.LONG) + .build() + ); + } + + @Test + public void testGroupByRootSingleTypeStringMixed1Sparse() + { + testQuery( + "SELECT " + + "string_sparse, " + + "SUM(cnt) " + + "FROM druid.nested_mix GROUP BY 1", + ImmutableList.of( + GroupByQuery.builder() + .setDataSource(DATA_SOURCE_MIXED) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setDimensions( + dimensions( + new DefaultDimensionSpec("string_sparse", "d0") + ) + ) + .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{NullHandling.defaultStringValue(), 10L}, + new Object[]{"yyy", 2L}, + new Object[]{"zzz", 2L} + ), + RowSignature.builder() + .add("string_sparse", ColumnType.STRING) + .add("EXPR$1", ColumnType.LONG) + .build() + ); + } + + @Test + public void testGroupByRootSingleTypeLongMixed2() + { + testQuery( + "SELECT " + + "long, " + + "SUM(cnt) " + + "FROM druid.nested_mix_2 GROUP BY 1", + ImmutableList.of( + GroupByQuery.builder() + .setDataSource(DATA_SOURCE_MIXED_2) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setDimensions( + dimensions( + new DefaultDimensionSpec("long", "d0", ColumnType.LONG) + ) + ) + .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{1L, 2L}, + new Object[]{2L, 4L}, + new Object[]{3L, 2L}, + new Object[]{4L, 2L}, + new Object[]{5L, 4L} + ), + RowSignature.builder() + .add("long", ColumnType.LONG) + .add("EXPR$1", ColumnType.LONG) + .build() + ); + } + + @Test + public void testGroupByRootSingleTypeStringMixed2() + { + testQuery( + "SELECT " + + "string, " + + "SUM(cnt) " + + "FROM druid.nested_mix_2 GROUP BY 1", + ImmutableList.of( + GroupByQuery.builder() + .setDataSource(DATA_SOURCE_MIXED_2) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setDimensions( + dimensions( + new DefaultDimensionSpec("string", "d0") + ) + ) + .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{"aaa", 4L}, + new Object[]{"bbb", 2L}, + new Object[]{"ccc", 2L}, + new Object[]{"ddd", 4L}, + new Object[]{"eee", 2L} + ), + RowSignature.builder() + .add("string", ColumnType.STRING) + .add("EXPR$1", ColumnType.LONG) + .build() + ); + } + + @Test + public void testGroupByRootSingleTypeStringMixed2Sparse() + { + testQuery( + "SELECT " + + "string_sparse, " + + "SUM(cnt) " + + "FROM druid.nested_mix_2 GROUP BY 1", + ImmutableList.of( + GroupByQuery.builder() + .setDataSource(DATA_SOURCE_MIXED_2) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setDimensions( + dimensions( + new DefaultDimensionSpec("string_sparse", "d0") + ) + ) + .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{NullHandling.defaultStringValue(), 10L}, + new Object[]{"yyy", 2L}, + new Object[]{"zzz", 2L} + ), + RowSignature.builder() + .add("string_sparse", ColumnType.STRING) + .add("EXPR$1", ColumnType.LONG) + .build() + ); + } + @Test public void testGroupByJsonValues() { From e5cb7d9222a798bbb5c492c031e1ccd355f74e24 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Wed, 1 Feb 2023 19:02:34 -0800 Subject: [PATCH 02/11] better --- .../nested/NestedDataColumnSupplier.java | 4 +- .../segment/vector/NilVectorSelector.java | 4 +- .../virtual/NestedFieldVirtualColumn.java | 55 ++++++++++++++----- .../ExpressionVectorSelectorsCastTest.java | 19 +++++++ 4 files changed, 63 insertions(+), 19 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnSupplier.java b/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnSupplier.java index 4e0c4825dd4f..f517f21a415b 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnSupplier.java @@ -84,7 +84,7 @@ public static NestedDataColumnSupplier read( final Supplier frontCodedStringDictionarySupplier; final Supplier> longDictionarySupplier; final Supplier> doubleDictionarySupplier; - final SmooshedFileMapper mapper = columnBuilder.getFileMapper(); + final SmooshedFileMapper mapper = Preconditions.checkNotNull(columnBuilder.getFileMapper(), "Null fileMapper"); ColumnType simpleType; metadata = jsonMapper.readValue( @@ -196,7 +196,7 @@ public static NestedDataColumnSupplier read( longDictionarySupplier, doubleDictionarySupplier, columnConfig, - Preconditions.checkNotNull(mapper, "Null fileMapper"), + mapper, simpleType ); } diff --git a/processing/src/main/java/org/apache/druid/segment/vector/NilVectorSelector.java b/processing/src/main/java/org/apache/druid/segment/vector/NilVectorSelector.java index 35b3834105f8..556929aca344 100644 --- a/processing/src/main/java/org/apache/druid/segment/vector/NilVectorSelector.java +++ b/processing/src/main/java/org/apache/druid/segment/vector/NilVectorSelector.java @@ -37,9 +37,7 @@ public class NilVectorSelector private static final Object[] DEFAULT_OBJECT_VECTOR = new Object[QueryableIndexStorageAdapter.DEFAULT_VECTOR_SIZE]; static { - for (int i = 0; i < DEFAULT_NULLS_VECTOR.length; i++) { - DEFAULT_NULLS_VECTOR[i] = NullHandling.sqlCompatible(); - } + Arrays.fill(DEFAULT_NULLS_VECTOR, NullHandling.sqlCompatible()); } private final VectorSizeInspector vectorSizeInspector; diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/NestedFieldVirtualColumn.java b/processing/src/main/java/org/apache/druid/segment/virtual/NestedFieldVirtualColumn.java index 2a3d1e08aefb..83b3b1bfdf96 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/NestedFieldVirtualColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/NestedFieldVirtualColumn.java @@ -31,6 +31,7 @@ import org.apache.druid.java.util.common.Numbers; import org.apache.druid.query.cache.CacheKeyBuilder; import org.apache.druid.query.dimension.DimensionSpec; +import org.apache.druid.query.extraction.ExtractionFn; import org.apache.druid.query.filter.ValueMatcher; import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; import org.apache.druid.segment.BaseSingleValueDimensionSelector; @@ -78,16 +79,16 @@ * Optimized virtual column that can make direct selectors into a {@link NestedDataComplexColumn} or any associated * nested fields ({@link org.apache.druid.segment.nested.NestedFieldLiteralDictionaryEncodedColumn}) including using * their indexes. - * + *

* This virtual column is used for the SQL operators JSON_VALUE (if {@link #processFromRaw} is set to false) or * JSON_QUERY (if it is true), and accepts 'JSONPath' or 'jq' syntax string representations of paths, or a parsed * list of {@link NestedPathPart} in order to determine what should be selected from the column. - * + *

* Type information for nested fields is completely absent in the SQL planner, so it guesses the best it can to set * {@link #expectedType} from the context of how something is being used, e.g. an aggregators default type or an * explicit cast, or, if using the 'RETURNING' syntax which explicitly specifies type. This might not be the same as * if it had actual type information, but, we try to stick with whatever we chose there to do the best we can for now. - * + *

* Since {@link #capabilities(ColumnInspector, String)} is determined by the {@link #expectedType}, the results will * be best effor cast to the expected type if the column is not natively the expected type so that this column can * fulfill the contract of the type of selector that is likely to be created to read this column. @@ -217,7 +218,7 @@ public DimensionSelector makeDimensionSelector( // written to segment, so we fall back to processing the structured data from a column value selector on the // complex column ColumnValueSelector valueSelector = makeColumnValueSelector(dimensionSpec.getOutputName(), factory); - return new FieldDimensionSelector(valueSelector); + return dimensionSpec.decorate(new FieldDimensionSelector(valueSelector)); } @Override @@ -247,7 +248,7 @@ public DimensionSelector makeDimensionSelector( ColumnHolder holder = columnSelector.getColumnHolder(columnName); if (holder == null) { // column doesn't exist - return DimensionSelector.constant(null); + return dimensionSpec.decorate(DimensionSelector.constant(null)); } if (hasNegativeArrayIndex) { // if the path has negative array elements, then we have to use the 'raw' processing of the FieldDimensionSelector @@ -256,10 +257,19 @@ public DimensionSelector makeDimensionSelector( return null; } + return dimensionSpec.decorate(makeDimensionSelectorUndecorated(holder, offset, dimensionSpec.getExtractionFn())); + } + + private DimensionSelector makeDimensionSelectorUndecorated( + ColumnHolder holder, + ReadableOffset offset, + @Nullable ExtractionFn extractionFn + ) + { BaseColumn theColumn = holder.getColumn(); if (theColumn instanceof NestedDataComplexColumn) { final NestedDataComplexColumn column = (NestedDataComplexColumn) theColumn; - return column.makeDimensionSelector(parts, offset, dimensionSpec.getExtractionFn()); + return column.makeDimensionSelector(parts, offset, extractionFn); } // ok, so not a nested column, but we can still do stuff @@ -271,12 +281,12 @@ public DimensionSelector makeDimensionSelector( // the path was the 'root', we're in luck, spit out a selector that behaves the same way as a nested column if (theColumn instanceof DictionaryEncodedColumn) { final DictionaryEncodedColumn column = (DictionaryEncodedColumn) theColumn; - return new AutoCastingValueSelector(column.makeDimensionSelector(offset, dimensionSpec.getExtractionFn())); + return new AutoCastingValueSelector(column.makeDimensionSelector(offset, extractionFn)); } return ValueTypes.makeNumericWrappingDimensionSelector( holder.getCapabilities().getType(), theColumn.makeColumnValueSelector(offset), - dimensionSpec.getExtractionFn() + extractionFn ); } @@ -338,10 +348,18 @@ public SingleValueDimensionVectorSelector makeSingleValueVectorDimensionSelector { ColumnHolder holder = columnSelector.getColumnHolder(columnName); if (holder == null) { - return NilVectorSelector.create(offset); + return dimensionSpec.decorate(NilVectorSelector.create(offset)); } - BaseColumn theColumn = holder.getColumn(); + return dimensionSpec.decorate(makeSingleValueVectorDimensionSelectorUndecorated(holder, offset)); + } + + private SingleValueDimensionVectorSelector makeSingleValueVectorDimensionSelectorUndecorated( + ColumnHolder holder, + ReadableVectorOffset offset + ) + { + BaseColumn theColumn = holder.getColumn(); if (theColumn instanceof NestedDataComplexColumn) { final NestedDataComplexColumn column = (NestedDataComplexColumn) theColumn; return column.makeSingleValueDimensionVectorSelector(parts, offset); @@ -358,6 +376,7 @@ public SingleValueDimensionVectorSelector makeSingleValueVectorDimensionSelector return ((DictionaryEncodedColumn) theColumn).makeSingleValueDimensionVectorSelector(offset); } + @Nullable @Override public VectorObjectSelector makeVectorObjectSelector( @@ -445,6 +464,7 @@ public VectorValueSelector makeVectorValueSelector( @Nullable private boolean[] nullVector = null; private int id = ReadableVectorInspector.NULL_ID; + @Override public long[] getLongVector() { @@ -593,7 +613,8 @@ public ColumnCapabilities capabilities(String columnName) // from here return ColumnCapabilitiesImpl.createDefault() .setType(expectedType != null ? expectedType : ColumnType.STRING) - .setHasNulls(expectedType == null || (expectedType.isNumeric() && NullHandling.sqlCompatible())); + .setHasNulls(expectedType == null || (expectedType.isNumeric() + && NullHandling.sqlCompatible())); } @Override @@ -616,7 +637,8 @@ public ColumnCapabilities capabilities(ColumnInspector inspector, String columnN .setDictionaryValuesSorted(true) .setDictionaryValuesUnique(true) .setHasBitmapIndexes(true) - .setHasNulls(expectedType == null || (expectedType.isNumeric() && NullHandling.sqlCompatible())); + .setHasNulls(expectedType == null || (expectedType.isNumeric() + && NullHandling.sqlCompatible())); } return capabilities(columnName); } @@ -671,7 +693,7 @@ public String toString() /** * Process the "raw" data to extract literals with {@link NestedPathFinder#findLiteral(Object, List)}. Like * {@link RawFieldColumnSelector} but only literals and does not wrap the results in {@link StructuredData}. - * + *

* This is used as a selector on realtime data when the native field columns are not available. */ public static class RawFieldLiteralColumnValueSelector extends RawFieldColumnSelector @@ -886,7 +908,12 @@ protected String getValue() } /** - * DimensionSelector that implements implicit casting when used via + * {@link DimensionSelector} that provides implicit numeric casting when used as a value selector, trying best effort + * to implement {@link #getLong()}, {@link #getDouble()}, {@link #getFloat()}, {@link #isNull()} on top of some + * other {@link DimensionSelector}. + *

+ * This is used as a fall-back when making a selector and the underlying column is NOT a + * {@link NestedDataComplexColumn}, whose field {@link DimensionSelector} natively implement this behavior. */ private static class AutoCastingValueSelector implements DimensionSelector { diff --git a/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVectorSelectorsCastTest.java b/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVectorSelectorsCastTest.java index e13820d4cc2e..6f733ee0b91c 100644 --- a/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVectorSelectorsCastTest.java +++ b/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVectorSelectorsCastTest.java @@ -175,6 +175,22 @@ private static void verifyStringToNumeric(String column, ColumnType castTo, Vect Assert.assertEquals(doubles[i], castDoubles[i], 0.0); } break; + + case FLOAT: + nulls = selector.getNullVector(); + castNulls = selector.getNullVector(); + float[] floats = selector.getFloatVector(); + float[] castFloats = castSelector.getFloatVector(); + for (int i = 0; i < selector.getCurrentVectorSize(); i++) { + if (nulls != null) { + Assert.assertEquals(nulls[i], castNulls[i]); + } + Assert.assertEquals(floats[i], castFloats[i], 0.0); + } + break; + default: + Assert.fail("this shouldn't happen"); + return; } cursor.advance(); @@ -201,6 +217,9 @@ private static void verifyNumericToString(String column, ColumnType castTo, Vect Assert.assertEquals(objects[i], otherObjects[i]); } break; + default: + Assert.fail("this shouldn't happen"); + return; } cursor.advance(); From 7a60699e69b4a167a5e9cda4b73d634dcb35c22d Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Wed, 1 Feb 2023 19:06:36 -0800 Subject: [PATCH 03/11] pointless --- .../apache/druid/segment/nested/NestedDataColumnSupplier.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnSupplier.java b/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnSupplier.java index f517f21a415b..507a531f4bf2 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnSupplier.java @@ -20,7 +20,6 @@ package org.apache.druid.segment.nested; import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.common.base.Preconditions; import com.google.common.base.Supplier; import org.apache.druid.collections.bitmap.ImmutableBitmap; import org.apache.druid.java.util.common.ISE; @@ -75,6 +74,7 @@ public static NestedDataColumnSupplier read( if (version == 0x03 || version == 0x04) { try { + final SmooshedFileMapper mapper = columnBuilder.getFileMapper(); final NestedDataColumnMetadata metadata; final GenericIndexed fields; final NestedLiteralTypeInfo fieldInfo; @@ -84,7 +84,6 @@ public static NestedDataColumnSupplier read( final Supplier frontCodedStringDictionarySupplier; final Supplier> longDictionarySupplier; final Supplier> doubleDictionarySupplier; - final SmooshedFileMapper mapper = Preconditions.checkNotNull(columnBuilder.getFileMapper(), "Null fileMapper"); ColumnType simpleType; metadata = jsonMapper.readValue( From 6ab287fa405427cc7f843201ccdc49e7aec7e300 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Wed, 1 Feb 2023 20:13:31 -0800 Subject: [PATCH 04/11] constant fix --- .../druid/segment/virtual/NestedFieldVirtualColumn.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/NestedFieldVirtualColumn.java b/processing/src/main/java/org/apache/druid/segment/virtual/NestedFieldVirtualColumn.java index 83b3b1bfdf96..af3f7e1f3ea4 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/NestedFieldVirtualColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/NestedFieldVirtualColumn.java @@ -248,7 +248,7 @@ public DimensionSelector makeDimensionSelector( ColumnHolder holder = columnSelector.getColumnHolder(columnName); if (holder == null) { // column doesn't exist - return dimensionSpec.decorate(DimensionSelector.constant(null)); + return dimensionSpec.decorate(DimensionSelector.constant(null, dimensionSpec.getExtractionFn())); } if (hasNegativeArrayIndex) { // if the path has negative array elements, then we have to use the 'raw' processing of the FieldDimensionSelector @@ -275,7 +275,7 @@ private DimensionSelector makeDimensionSelectorUndecorated( // ok, so not a nested column, but we can still do stuff if (!parts.isEmpty()) { // we are being asked for a path that will never exist, so we are null selector - return DimensionSelector.constant(null); + return DimensionSelector.constant(null, extractionFn); } // the path was the 'root', we're in luck, spit out a selector that behaves the same way as a nested column From 77e148547d095dd800a5079b8695e44ee2326350 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Fri, 3 Feb 2023 02:10:40 -0800 Subject: [PATCH 05/11] modernize calcite tests to not use parsers, tidy up nested field virtual column --- ...ressedBigDecimalSqlAggregatorTestBase.java | 20 ++- .../segment/vector/NilVectorSelector.java | 4 +- .../virtual/NestedFieldVirtualColumn.java | 103 ++++++++------- .../calcite/CalciteNestedDataQueryTest.java | 76 ++++++----- .../sql/calcite/util/TestDataBuilder.java | 120 +++++++++--------- 5 files changed, 162 insertions(+), 161 deletions(-) diff --git a/extensions-contrib/compressed-bigdecimal/src/test/java/org/apache/druid/compressedbigdecimal/CompressedBigDecimalSqlAggregatorTestBase.java b/extensions-contrib/compressed-bigdecimal/src/test/java/org/apache/druid/compressedbigdecimal/CompressedBigDecimalSqlAggregatorTestBase.java index 5d1c67c2f38a..21dc9d9f0323 100644 --- a/extensions-contrib/compressed-bigdecimal/src/test/java/org/apache/druid/compressedbigdecimal/CompressedBigDecimalSqlAggregatorTestBase.java +++ b/extensions-contrib/compressed-bigdecimal/src/test/java/org/apache/druid/compressedbigdecimal/CompressedBigDecimalSqlAggregatorTestBase.java @@ -25,10 +25,8 @@ import com.google.common.collect.ImmutableList; import com.google.inject.Injector; import org.apache.druid.data.input.InputRow; +import org.apache.druid.data.input.InputRowSchema; import org.apache.druid.data.input.impl.DimensionsSpec; -import org.apache.druid.data.input.impl.InputRowParser; -import org.apache.druid.data.input.impl.MapInputRowParser; -import org.apache.druid.data.input.impl.TimeAndDimsParseSpec; import org.apache.druid.data.input.impl.TimestampSpec; import org.apache.druid.guice.DruidInjectorBuilder; import org.apache.druid.java.util.common.StringUtils; @@ -55,22 +53,20 @@ import java.io.IOException; import java.util.Collections; import java.util.List; -import java.util.Map; import java.util.stream.Collectors; public abstract class CompressedBigDecimalSqlAggregatorTestBase extends BaseCalciteQueryTest { - private static final InputRowParser> PARSER = new MapInputRowParser( - new TimeAndDimsParseSpec( - new TimestampSpec(TestDataBuilder.TIMESTAMP_COLUMN, "iso", null), - new DimensionsSpec( - DimensionsSpec.getDefaultSchemas(ImmutableList.of("dim1", "dim2", "dim3", "m2")) - ) - ) + private static final InputRowSchema SCHEMA = new InputRowSchema( + new TimestampSpec(TestDataBuilder.TIMESTAMP_COLUMN, "iso", null), + new DimensionsSpec( + DimensionsSpec.getDefaultSchemas(ImmutableList.of("dim1", "dim2", "dim3", "m2")) + ), + null ); private static final List ROWS1 = - TestDataBuilder.RAW_ROWS1.stream().map(m -> TestDataBuilder.createRow(m, PARSER)).collect(Collectors.toList()); + TestDataBuilder.RAW_ROWS1.stream().map(m -> TestDataBuilder.createRow(m, SCHEMA)).collect(Collectors.toList()); @Override public void configureGuice(DruidInjectorBuilder builder) diff --git a/processing/src/main/java/org/apache/druid/segment/vector/NilVectorSelector.java b/processing/src/main/java/org/apache/druid/segment/vector/NilVectorSelector.java index 556929aca344..762f2e73fc77 100644 --- a/processing/src/main/java/org/apache/druid/segment/vector/NilVectorSelector.java +++ b/processing/src/main/java/org/apache/druid/segment/vector/NilVectorSelector.java @@ -82,7 +82,9 @@ public static NilVectorSelector create(final VectorSizeInspector vectorSizeInspe ); } else { final boolean[] nulls = new boolean[vectorSizeInspector.getMaxVectorSize()]; - Arrays.fill(nulls, NullHandling.sqlCompatible()); + if (NullHandling.sqlCompatible()) { + Arrays.fill(nulls, true); + } return new NilVectorSelector( vectorSizeInspector, nulls, diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/NestedFieldVirtualColumn.java b/processing/src/main/java/org/apache/druid/segment/virtual/NestedFieldVirtualColumn.java index af3f7e1f3ea4..1936b38b14b4 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/NestedFieldVirtualColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/NestedFieldVirtualColumn.java @@ -251,6 +251,7 @@ public DimensionSelector makeDimensionSelector( return dimensionSpec.decorate(DimensionSelector.constant(null, dimensionSpec.getExtractionFn())); } if (hasNegativeArrayIndex) { + // negative array elements in a path expression mean that values should be fetched 'from the end' of the array // if the path has negative array elements, then we have to use the 'raw' processing of the FieldDimensionSelector // created with the column selector factory instead of using the optimized nested field column, return null // to fall through @@ -272,22 +273,25 @@ private DimensionSelector makeDimensionSelectorUndecorated( return column.makeDimensionSelector(parts, offset, extractionFn); } - // ok, so not a nested column, but we can still do stuff - if (!parts.isEmpty()) { - // we are being asked for a path that will never exist, so we are null selector - return DimensionSelector.constant(null, extractionFn); + // not a nested column, but we can still do stuff if the path is the 'root', indicated by an empty path parts + if (parts.isEmpty()) { + // dictionary encoded columns do not typically implement the value selector methods (getLong, getDouble, getFloat) + // nothing *should* be using a dimension selector to call the numeric getters, but just in case... wrap their + // selector in a "best effort" casting selector to implement them + if (theColumn instanceof DictionaryEncodedColumn) { + final DictionaryEncodedColumn column = (DictionaryEncodedColumn) theColumn; + return new BestEffortCastingValueSelector(column.makeDimensionSelector(offset, extractionFn)); + } + // for non-dictionary encoded columns, wrap a value selector to make it appear as a dimension selector + return ValueTypes.makeNumericWrappingDimensionSelector( + holder.getCapabilities().getType(), + theColumn.makeColumnValueSelector(offset), + extractionFn + ); } - // the path was the 'root', we're in luck, spit out a selector that behaves the same way as a nested column - if (theColumn instanceof DictionaryEncodedColumn) { - final DictionaryEncodedColumn column = (DictionaryEncodedColumn) theColumn; - return new AutoCastingValueSelector(column.makeDimensionSelector(offset, extractionFn)); - } - return ValueTypes.makeNumericWrappingDimensionSelector( - holder.getCapabilities().getType(), - theColumn.makeColumnValueSelector(offset), - extractionFn - ); + // we are not a nested column and are being asked for a path that will never exist, so we are nil selector + return DimensionSelector.constant(null, extractionFn); } @@ -318,18 +322,21 @@ public ColumnValueSelector makeColumnValueSelector( return column.makeColumnValueSelector(parts, offset); } - // ok, so not a nested column, but we can still do stuff - if (!parts.isEmpty()) { - // we are being asked for a path that will never exist, so we are null selector - return NilColumnValueSelector.instance(); + // not a nested column, but we can still do stuff if the path is the 'root', indicated by an empty path parts + if (parts.isEmpty()) { + // dictionary encoded columns do not typically implement the value selector methods (getLong, getDouble, getFloat) + // so we want to wrap their selector in a "best effort" casting selector to implement them + if (theColumn instanceof DictionaryEncodedColumn) { + final DictionaryEncodedColumn column = (DictionaryEncodedColumn) theColumn; + return new BestEffortCastingValueSelector(column.makeDimensionSelector(offset, null)); + } + // otherwise it is probably cool to pass through the value selector directly, if numbers make sense the selector + // very likely implemented them, and everyone implements getObject if not + return theColumn.makeColumnValueSelector(offset); } - // the path was the 'root', we're in luck, spit out a selector that behaves the same way as a nested column - if (theColumn instanceof DictionaryEncodedColumn) { - final DictionaryEncodedColumn column = (DictionaryEncodedColumn) theColumn; - return new AutoCastingValueSelector(column.makeDimensionSelector(offset, null)); - } - return theColumn.makeColumnValueSelector(offset); + // we are not a nested column and are being asked for a path that will never exist, so we are nil selector + return NilColumnValueSelector.instance(); } @Override @@ -365,15 +372,15 @@ private SingleValueDimensionVectorSelector makeSingleValueVectorDimensionSelecto return column.makeSingleValueDimensionVectorSelector(parts, offset); } - // not a nested column - if (!parts.isEmpty()) { - // we are being asked for a path that will never exist, so we are null selector - return NilVectorSelector.create(offset); + // not a nested column, but we can still do stuff if the path is the 'root', indicated by an empty path parts + if (parts.isEmpty()) { + // we will not end up here unless underlying column capabilities lied about something being dictionary encoded... + // so no need for magic casting like nonvectorized engine + return ((DictionaryEncodedColumn) theColumn).makeSingleValueDimensionVectorSelector(offset); } - // we will not end up here unless underlying column capabilities lied about something being dictionary encoded... - // so no need for magic casting like nonvectorized engine - return ((DictionaryEncodedColumn) theColumn).makeSingleValueDimensionVectorSelector(offset); + // we are not a nested column and are being asked for a path that will never exist, so we are nil selector + return NilVectorSelector.create(offset); } @@ -391,7 +398,6 @@ public VectorObjectSelector makeVectorObjectSelector( } BaseColumn theColumn = holder.getColumn(); - // processFromRaw is true, that means JSON_QUERY, which can return partial results, otherwise this virtual column // is JSON_VALUE which only returns literals, so we can use the nested columns value selector if (theColumn instanceof NestedDataComplexColumn) { @@ -401,20 +407,23 @@ public VectorObjectSelector makeVectorObjectSelector( } return column.makeVectorObjectSelector(parts, offset); } - if (!parts.isEmpty()) { - return NilVectorSelector.create(offset); - } - ColumnCapabilities capabilities = holder.getCapabilities(); - if (capabilities.isNumeric()) { - return ExpressionVectorSelectors.castValueSelectorToObject( - offset, - this.columnName, - theColumn.makeVectorValueSelector(offset), - capabilities.toColumnType(), - expectedType - ); + // not a nested column, but we can still do stuff if the path is the 'root', indicated by an empty path parts + if (parts.isEmpty()) { + ColumnCapabilities capabilities = holder.getCapabilities(); + if (capabilities.isNumeric()) { + return ExpressionVectorSelectors.castValueSelectorToObject( + offset, + this.columnName, + theColumn.makeVectorValueSelector(offset), + capabilities.toColumnType(), + expectedType + ); + } + return theColumn.makeVectorObjectSelector(offset); } - return theColumn.makeVectorObjectSelector(offset); + + // we are not a nested column and are being asked for a path that will never exist, so we are nil selector + return NilVectorSelector.create(offset); } @Nullable @@ -915,11 +924,11 @@ protected String getValue() * This is used as a fall-back when making a selector and the underlying column is NOT a * {@link NestedDataComplexColumn}, whose field {@link DimensionSelector} natively implement this behavior. */ - private static class AutoCastingValueSelector implements DimensionSelector + private static class BestEffortCastingValueSelector implements DimensionSelector { private final DimensionSelector baseSelector; - public AutoCastingValueSelector(DimensionSelector baseSelector) + public BestEffortCastingValueSelector(DimensionSelector baseSelector) { this.baseSelector = baseSelector; } diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java index 288fa6c0c2f2..f11a7c2717d0 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java @@ -25,13 +25,11 @@ import com.google.inject.Injector; import org.apache.druid.common.config.NullHandling; import org.apache.druid.data.input.InputRow; +import org.apache.druid.data.input.InputRowSchema; import org.apache.druid.data.input.impl.DimensionSchema; import org.apache.druid.data.input.impl.DimensionsSpec; -import org.apache.druid.data.input.impl.InputRowParser; import org.apache.druid.data.input.impl.LongDimensionSchema; -import org.apache.druid.data.input.impl.MapInputRowParser; import org.apache.druid.data.input.impl.StringDimensionSchema; -import org.apache.druid.data.input.impl.TimeAndDimsParseSpec; import org.apache.druid.data.input.impl.TimestampSpec; import org.apache.druid.guice.DruidInjectorBuilder; import org.apache.druid.guice.NestedDataModule; @@ -72,7 +70,6 @@ import java.io.IOException; import java.util.List; -import java.util.Map; import java.util.stream.Collectors; public class CalciteNestedDataQueryTest extends BaseCalciteQueryTest @@ -132,39 +129,38 @@ public class CalciteNestedDataQueryTest extends BaseCalciteQueryTest .build() ); - - private static final InputRowParser> PARSER = new MapInputRowParser( - new TimeAndDimsParseSpec( - new TimestampSpec("t", "iso", null), - DimensionsSpec.builder().setDimensions( - ImmutableList.builder() - .add(new NestedDataDimensionSchema("string")) - .add(new NestedDataDimensionSchema("nest")) - .add(new NestedDataDimensionSchema("nester")) - .add(new NestedDataDimensionSchema("long")) - .add(new NestedDataDimensionSchema("string_sparse")) - .build() - ).build() - ) + private static final InputRowSchema ALL_JSON_COLUMNS = new InputRowSchema( + new TimestampSpec("t", "iso", null), + DimensionsSpec.builder().setDimensions( + ImmutableList.builder() + .add(new NestedDataDimensionSchema("string")) + .add(new NestedDataDimensionSchema("nest")) + .add(new NestedDataDimensionSchema("nester")) + .add(new NestedDataDimensionSchema("long")) + .add(new NestedDataDimensionSchema("string_sparse")) + .build() + ).build(), + null ); - private static final InputRowParser> PARSER_MIX = new MapInputRowParser( - new TimeAndDimsParseSpec( - new TimestampSpec("t", "iso", null), - DimensionsSpec.builder().setDimensions( - ImmutableList.builder() - .add(new StringDimensionSchema("string")) - .add(new NestedDataDimensionSchema("nest")) - .add(new NestedDataDimensionSchema("nester")) - .add(new LongDimensionSchema("long")) - .add(new StringDimensionSchema("string_sparse")) - .build() - ).build() - ) + private static final InputRowSchema JSON_AND_SCALAR_MIX = new InputRowSchema( + new TimestampSpec("t", "iso", null), + DimensionsSpec.builder().setDimensions( + ImmutableList.builder() + .add(new StringDimensionSchema("string")) + .add(new NestedDataDimensionSchema("nest")) + .add(new NestedDataDimensionSchema("nester")) + .add(new LongDimensionSchema("long")) + .add(new StringDimensionSchema("string_sparse")) + .build() + ).build(), + null ); - private static final List ROWS = - RAW_ROWS.stream().map(raw -> TestDataBuilder.createRow(raw, PARSER)).collect(Collectors.toList()); + RAW_ROWS.stream().map(raw -> TestDataBuilder.createRow(raw, ALL_JSON_COLUMNS)).collect(Collectors.toList()); + + private static final List ROWS_MIX = + RAW_ROWS.stream().map(raw -> TestDataBuilder.createRow(raw, JSON_AND_SCALAR_MIX)).collect(Collectors.toList()); @Override public void configureGuice(DruidInjectorBuilder builder) @@ -191,7 +187,7 @@ public SpecificSegmentsQuerySegmentWalker createQuerySegmentWalker( .withMetrics( new CountAggregatorFactory("cnt") ) - .withDimensionsSpec(PARSER) + .withDimensionsSpec(ALL_JSON_COLUMNS.getDimensionsSpec()) .withRollup(false) .build() ) @@ -207,7 +203,7 @@ public SpecificSegmentsQuerySegmentWalker createQuerySegmentWalker( .withMetrics( new CountAggregatorFactory("cnt") ) - .withDimensionsSpec(PARSER) + .withDimensionsSpec(ALL_JSON_COLUMNS.getDimensionsSpec()) .withRollup(false) .build() ) @@ -223,11 +219,11 @@ public SpecificSegmentsQuerySegmentWalker createQuerySegmentWalker( .withMetrics( new CountAggregatorFactory("cnt") ) - .withDimensionsSpec(PARSER_MIX) + .withDimensionsSpec(JSON_AND_SCALAR_MIX.getDimensionsSpec()) .withRollup(false) .build() ) - .rows(ROWS) + .rows(ROWS_MIX) .buildMMappedIndex(); final QueryableIndex indexMix21 = @@ -239,11 +235,11 @@ public SpecificSegmentsQuerySegmentWalker createQuerySegmentWalker( .withMetrics( new CountAggregatorFactory("cnt") ) - .withDimensionsSpec(PARSER_MIX) + .withDimensionsSpec(JSON_AND_SCALAR_MIX.getDimensionsSpec()) .withRollup(false) .build() ) - .rows(ROWS) + .rows(ROWS_MIX) .buildMMappedIndex(); final QueryableIndex indexMix22 = @@ -255,7 +251,7 @@ public SpecificSegmentsQuerySegmentWalker createQuerySegmentWalker( .withMetrics( new CountAggregatorFactory("cnt") ) - .withDimensionsSpec(PARSER) + .withDimensionsSpec(ALL_JSON_COLUMNS.getDimensionsSpec()) .withRollup(false) .build() ) diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/util/TestDataBuilder.java b/sql/src/test/java/org/apache/druid/sql/calcite/util/TestDataBuilder.java index f4a84f216979..f8547d10c467 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/util/TestDataBuilder.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/util/TestDataBuilder.java @@ -26,16 +26,15 @@ import com.google.common.collect.ImmutableSet; import com.google.inject.Injector; import org.apache.druid.data.input.InputRow; +import org.apache.druid.data.input.InputRowSchema; import org.apache.druid.data.input.MapBasedInputRow; import org.apache.druid.data.input.impl.DimensionSchema; import org.apache.druid.data.input.impl.DimensionsSpec; import org.apache.druid.data.input.impl.DoubleDimensionSchema; import org.apache.druid.data.input.impl.FloatDimensionSchema; -import org.apache.druid.data.input.impl.InputRowParser; import org.apache.druid.data.input.impl.LongDimensionSchema; import org.apache.druid.data.input.impl.MapInputRowParser; import org.apache.druid.data.input.impl.StringDimensionSchema; -import org.apache.druid.data.input.impl.TimeAndDimsParseSpec; import org.apache.druid.data.input.impl.TimestampSpec; import org.apache.druid.jackson.DefaultObjectMapper; import org.apache.druid.java.util.common.DateTimes; @@ -118,62 +117,60 @@ public Optional build( } }; - private static final InputRowParser> PARSER = new MapInputRowParser( - new TimeAndDimsParseSpec( - new TimestampSpec(TIMESTAMP_COLUMN, "iso", null), - new DimensionsSpec( - DimensionsSpec.getDefaultSchemas(ImmutableList.of("dim1", "dim2", "dim3")) - ) - ) + private static final InputRowSchema FOO_SCHEMA = new InputRowSchema( + new TimestampSpec(TIMESTAMP_COLUMN, "iso", null), + new DimensionsSpec( + DimensionsSpec.getDefaultSchemas(ImmutableList.of("dim1", "dim2", "dim3")) + ), + null ); - private static final InputRowParser> PARSER_NUMERIC_DIMS = new MapInputRowParser( - new TimeAndDimsParseSpec( - new TimestampSpec(TIMESTAMP_COLUMN, "iso", null), - new DimensionsSpec( - ImmutableList.builder() - .addAll(DimensionsSpec.getDefaultSchemas(ImmutableList.of( - "dim1", - "dim2", - "dim3", - "dim4", - "dim5", - "dim6" - ))) - .add(new DoubleDimensionSchema("d1")) - .add(new DoubleDimensionSchema("d2")) - .add(new FloatDimensionSchema("f1")) - .add(new FloatDimensionSchema("f2")) - .add(new LongDimensionSchema("l1")) - .add(new LongDimensionSchema("l2")) - .build() - ) - ) + private static final InputRowSchema NUMFOO_SCHEMA = new InputRowSchema( + new TimestampSpec(TIMESTAMP_COLUMN, "iso", null), + new DimensionsSpec( + ImmutableList.builder() + .addAll(DimensionsSpec.getDefaultSchemas(ImmutableList.of( + "dim1", + "dim2", + "dim3", + "dim4", + "dim5", + "dim6" + ))) + .add(new DoubleDimensionSchema("d1")) + .add(new DoubleDimensionSchema("d2")) + .add(new FloatDimensionSchema("f1")) + .add(new FloatDimensionSchema("f2")) + .add(new LongDimensionSchema("l1")) + .add(new LongDimensionSchema("l2")) + .build() + ), + null ); - private static final InputRowParser> PARSER_LOTS_OF_COLUMNS = new MapInputRowParser( - new TimeAndDimsParseSpec( - new TimestampSpec("timestamp", "millis", null), - new DimensionsSpec( - DimensionsSpec.getDefaultSchemas( - ImmutableList.builder().add("dimHyperUnique") - .add("dimMultivalEnumerated") - .add("dimMultivalEnumerated2") - .add("dimMultivalSequentialWithNulls") - .add("dimSequential") - .add("dimSequentialHalfNull") - .add("dimUniform") - .add("dimZipf") - .add("metFloatNormal") - .add("metFloatZipf") - .add("metLongSequential") - .add("metLongUniform") - .build() - ) + private static final InputRowSchema LOTS_OF_COLUMNS_SCHEMA = new InputRowSchema( + new TimestampSpec("timestamp", "millis", null), + new DimensionsSpec( + DimensionsSpec.getDefaultSchemas( + ImmutableList.builder().add("dimHyperUnique") + .add("dimMultivalEnumerated") + .add("dimMultivalEnumerated2") + .add("dimMultivalSequentialWithNulls") + .add("dimSequential") + .add("dimSequentialHalfNull") + .add("dimUniform") + .add("dimZipf") + .add("metFloatNormal") + .add("metFloatZipf") + .add("metLongSequential") + .add("metLongUniform") + .build() ) - ) + ), + null ); + private static final IncrementalIndexSchema INDEX_SCHEMA = new IncrementalIndexSchema.Builder() .withMetrics( new CountAggregatorFactory("cnt"), @@ -220,7 +217,7 @@ public Optional build( new DoubleSumAggregatorFactory("m2", "m2"), new HyperUniquesAggregatorFactory("unique_dim1", "dim1") ) - .withDimensionsSpec(PARSER_NUMERIC_DIMS) + .withDimensionsSpec(NUMFOO_SCHEMA.getDimensionsSpec()) .withRollup(false) .build(); @@ -228,7 +225,7 @@ public Optional build( .withMetrics( new CountAggregatorFactory("count") ) - .withDimensionsSpec(PARSER_LOTS_OF_COLUMNS) + .withDimensionsSpec(LOTS_OF_COLUMNS_SCHEMA.getDimensionsSpec()) .withRollup(false) .build(); @@ -436,7 +433,7 @@ public Optional build( .build() ); public static final List ROWS1_WITH_NUMERIC_DIMS = - RAW_ROWS1_WITH_NUMERIC_DIMS.stream().map(raw -> createRow(raw, PARSER_NUMERIC_DIMS)).collect(Collectors.toList()); + RAW_ROWS1_WITH_NUMERIC_DIMS.stream().map(raw -> createRow(raw, NUMFOO_SCHEMA)).collect(Collectors.toList()); public static final List> RAW_ROWS2 = ImmutableList.of( ImmutableMap.builder() @@ -509,7 +506,7 @@ public Optional build( .put("dimSequential", "0") .put("dimSequentialHalfNull", "0") .build(), - PARSER_LOTS_OF_COLUMNS + LOTS_OF_COLUMNS_SCHEMA ), createRow( ImmutableMap.builder() @@ -525,7 +522,7 @@ public Optional build( .put("dimHyperUnique", "8") .put("dimSequential", "8") .build(), - PARSER_LOTS_OF_COLUMNS + LOTS_OF_COLUMNS_SCHEMA ) ); @@ -928,23 +925,24 @@ private static MapBasedInputRow toRow(String time, List dimensions, Map< public static InputRow createRow(final ImmutableMap map) { - return PARSER.parseBatch((Map) map).get(0); + return MapInputRowParser.parse(FOO_SCHEMA, (Map) map); } - public static InputRow createRow(final ImmutableMap map, InputRowParser> parser) + public static InputRow createRow(final ImmutableMap map, InputRowSchema inputRowSchema) { - return parser.parseBatch((Map) map).get(0); + return MapInputRowParser.parse(inputRowSchema, (Map) map); } public static InputRow createRow(final Object t, final String dim1, final String dim2, final double m1) { - return PARSER.parseBatch( + return MapInputRowParser.parse( + FOO_SCHEMA, ImmutableMap.of( "t", new DateTime(t, ISOChronology.getInstanceUTC()).getMillis(), "dim1", dim1, "dim2", dim2, "m1", m1 ) - ).get(0); + ); } } From fb429eda1db4cc373b9de0fc5b6b7b40657419b3 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Fri, 3 Feb 2023 13:24:43 -0800 Subject: [PATCH 06/11] less parser --- .../sql/BloomFilterSqlAggregatorTest.java | 30 ++++++++-------- .../QueryableIndexColumnCapabilitiesTest.java | 36 +++++++++---------- 2 files changed, 31 insertions(+), 35 deletions(-) diff --git a/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/aggregation/bloom/sql/BloomFilterSqlAggregatorTest.java b/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/aggregation/bloom/sql/BloomFilterSqlAggregatorTest.java index c1426048faf3..ce2fdcd08fd4 100644 --- a/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/aggregation/bloom/sql/BloomFilterSqlAggregatorTest.java +++ b/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/aggregation/bloom/sql/BloomFilterSqlAggregatorTest.java @@ -24,14 +24,12 @@ import com.google.inject.Injector; import org.apache.druid.common.config.NullHandling; import org.apache.druid.data.input.InputRow; +import org.apache.druid.data.input.InputRowSchema; import org.apache.druid.data.input.impl.DimensionSchema; import org.apache.druid.data.input.impl.DimensionsSpec; import org.apache.druid.data.input.impl.DoubleDimensionSchema; import org.apache.druid.data.input.impl.FloatDimensionSchema; -import org.apache.druid.data.input.impl.InputRowParser; import org.apache.druid.data.input.impl.LongDimensionSchema; -import org.apache.druid.data.input.impl.MapInputRowParser; -import org.apache.druid.data.input.impl.TimeAndDimsParseSpec; import org.apache.druid.data.input.impl.TimestampSpec; import org.apache.druid.guice.BloomFilterExtensionModule; import org.apache.druid.guice.DruidInjectorBuilder; @@ -89,18 +87,18 @@ public SpecificSegmentsQuerySegmentWalker createQuerySegmentWalker( final Injector injector ) throws IOException { - InputRowParser parser = new MapInputRowParser( - new TimeAndDimsParseSpec( - new TimestampSpec("t", "iso", null), - new DimensionsSpec( - ImmutableList.builder() - .addAll(DimensionsSpec.getDefaultSchemas(ImmutableList.of("dim1", "dim2", "dim3"))) - .add(new DoubleDimensionSchema("d1")) - .add(new FloatDimensionSchema("f1")) - .add(new LongDimensionSchema("l1")) - .build() - ) - )); + InputRowSchema schema = new InputRowSchema( + new TimestampSpec("t", "iso", null), + new DimensionsSpec( + ImmutableList.builder() + .addAll(DimensionsSpec.getDefaultSchemas(ImmutableList.of("dim1", "dim2", "dim3"))) + .add(new DoubleDimensionSchema("d1")) + .add(new FloatDimensionSchema("f1")) + .add(new LongDimensionSchema("l1")) + .build() + ), + null + ); final QueryableIndex index = IndexBuilder.create() @@ -112,7 +110,7 @@ public SpecificSegmentsQuerySegmentWalker createQuerySegmentWalker( new CountAggregatorFactory("cnt"), new DoubleSumAggregatorFactory("m1", "m1") ) - .withDimensionsSpec(parser) + .withDimensionsSpec(TestDataBuilder.INDEX_SCHEMA_NUMERIC_DIMS.getDimensionsSpec()) .withRollup(false) .build() ) diff --git a/processing/src/test/java/org/apache/druid/segment/QueryableIndexColumnCapabilitiesTest.java b/processing/src/test/java/org/apache/druid/segment/QueryableIndexColumnCapabilitiesTest.java index 367623d6a8bc..f7b47f6ee5ba 100644 --- a/processing/src/test/java/org/apache/druid/segment/QueryableIndexColumnCapabilitiesTest.java +++ b/processing/src/test/java/org/apache/druid/segment/QueryableIndexColumnCapabilitiesTest.java @@ -21,16 +21,15 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; -import com.google.common.collect.Iterables; import org.apache.druid.common.config.NullHandling; import org.apache.druid.data.input.InputRow; +import org.apache.druid.data.input.InputRowSchema; import org.apache.druid.data.input.impl.DimensionSchema; import org.apache.druid.data.input.impl.DimensionsSpec; import org.apache.druid.data.input.impl.DoubleDimensionSchema; import org.apache.druid.data.input.impl.FloatDimensionSchema; import org.apache.druid.data.input.impl.LongDimensionSchema; import org.apache.druid.data.input.impl.MapInputRowParser; -import org.apache.druid.data.input.impl.TimeAndDimsParseSpec; import org.apache.druid.data.input.impl.TimestampSpec; import org.apache.druid.java.util.common.DateTimes; import org.apache.druid.query.aggregation.AggregatorFactory; @@ -73,18 +72,17 @@ public class QueryableIndexColumnCapabilitiesTest extends InitializedNullHandlin @BeforeClass public static void setup() throws IOException { - MapInputRowParser parser = new MapInputRowParser( - new TimeAndDimsParseSpec( - new TimestampSpec("time", "auto", null), - new DimensionsSpec( - ImmutableList.builder() - .addAll(DimensionsSpec.getDefaultSchemas(ImmutableList.of("d1", "d2"))) - .add(new DoubleDimensionSchema("d3")) - .add(new FloatDimensionSchema("d4")) - .add(new LongDimensionSchema("d5")) - .build() - ) - ) + InputRowSchema rowSchema = new InputRowSchema( + new TimestampSpec("time", "auto", null), + new DimensionsSpec( + ImmutableList.builder() + .addAll(DimensionsSpec.getDefaultSchemas(ImmutableList.of("d1", "d2"))) + .add(new DoubleDimensionSchema("d3")) + .add(new FloatDimensionSchema("d4")) + .add(new LongDimensionSchema("d5")) + .build() + ), + null ); AggregatorFactory[] metricsSpecs = new AggregatorFactory[] { new CountAggregatorFactory("cnt"), @@ -102,14 +100,14 @@ public static void setup() throws IOException .put("d4", 1.234f) .put("d5", 10L) .build(); - rows.add(Iterables.getOnlyElement(parser.parseBatch(event))); + rows.add(MapInputRowParser.parse(rowSchema, event)); IndexBuilder builder = IndexBuilder.create() .rows(rows) .schema( new IncrementalIndexSchema.Builder() .withMetrics(metricsSpecs) - .withDimensionsSpec(parser) + .withDimensionsSpec(rowSchema.getDimensionsSpec()) .withRollup(false) .build() ) @@ -118,7 +116,7 @@ public static void setup() throws IOException MMAP_INDEX = builder.buildMMappedIndex(); List rowsWithNulls = new ArrayList<>(); - rowsWithNulls.add(Iterables.getOnlyElement(parser.parseBatch(event))); + rowsWithNulls.add(MapInputRowParser.parse(rowSchema, event)); Map eventWithNulls = new HashMap<>(); eventWithNulls.put("time", DateTimes.nowUtc().getMillis()); @@ -128,14 +126,14 @@ public static void setup() throws IOException eventWithNulls.put("d4", null); eventWithNulls.put("d5", null); - rowsWithNulls.add(Iterables.getOnlyElement(parser.parseBatch(eventWithNulls))); + rowsWithNulls.add(MapInputRowParser.parse(rowSchema, eventWithNulls)); IndexBuilder builderWithNulls = IndexBuilder.create() .rows(rowsWithNulls) .schema( new IncrementalIndexSchema.Builder() .withMetrics(metricsSpecs) - .withDimensionsSpec(parser) + .withDimensionsSpec(rowSchema.getDimensionsSpec()) .withRollup(false) .build() ) From 0bdd5a9833b89b606673bd19d6d3a94b6664aa5b Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Fri, 3 Feb 2023 15:51:50 -0800 Subject: [PATCH 07/11] remove unused --- .../sql/BloomFilterSqlAggregatorTest.java | 20 ------------------- 1 file changed, 20 deletions(-) diff --git a/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/aggregation/bloom/sql/BloomFilterSqlAggregatorTest.java b/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/aggregation/bloom/sql/BloomFilterSqlAggregatorTest.java index ce2fdcd08fd4..fcf7099f28b0 100644 --- a/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/aggregation/bloom/sql/BloomFilterSqlAggregatorTest.java +++ b/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/aggregation/bloom/sql/BloomFilterSqlAggregatorTest.java @@ -24,13 +24,6 @@ import com.google.inject.Injector; import org.apache.druid.common.config.NullHandling; import org.apache.druid.data.input.InputRow; -import org.apache.druid.data.input.InputRowSchema; -import org.apache.druid.data.input.impl.DimensionSchema; -import org.apache.druid.data.input.impl.DimensionsSpec; -import org.apache.druid.data.input.impl.DoubleDimensionSchema; -import org.apache.druid.data.input.impl.FloatDimensionSchema; -import org.apache.druid.data.input.impl.LongDimensionSchema; -import org.apache.druid.data.input.impl.TimestampSpec; import org.apache.druid.guice.BloomFilterExtensionModule; import org.apache.druid.guice.DruidInjectorBuilder; import org.apache.druid.java.util.common.granularity.Granularities; @@ -87,19 +80,6 @@ public SpecificSegmentsQuerySegmentWalker createQuerySegmentWalker( final Injector injector ) throws IOException { - InputRowSchema schema = new InputRowSchema( - new TimestampSpec("t", "iso", null), - new DimensionsSpec( - ImmutableList.builder() - .addAll(DimensionsSpec.getDefaultSchemas(ImmutableList.of("dim1", "dim2", "dim3"))) - .add(new DoubleDimensionSchema("d1")) - .add(new FloatDimensionSchema("f1")) - .add(new LongDimensionSchema("l1")) - .build() - ), - null - ); - final QueryableIndex index = IndexBuilder.create() .tmpDir(temporaryFolder.newFolder()) From a6c02ec7c445d451f0342cd666fb58e5c79a8caf Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Fri, 3 Feb 2023 21:29:48 -0800 Subject: [PATCH 08/11] changes: * fix inconsistency between nested column indexer and serializer in handling values (coerce non primitive and non arrays of primitives using asString) * ExprEval best effort mode now handles byte[] as string * added test for ExprEval.bestEffortOf, and add missing conversion cases that tests uncovered --- .../org/apache/druid/math/expr/ExprEval.java | 25 +++- .../org/apache/druid/math/expr/EvalTest.java | 116 ++++++++++++++++++ .../nested/NestedDataColumnSerializer.java | 7 +- .../expression/TimestampShiftMacroTest.java | 2 +- 4 files changed, 147 insertions(+), 3 deletions(-) diff --git a/core/src/main/java/org/apache/druid/math/expr/ExprEval.java b/core/src/main/java/org/apache/druid/math/expr/ExprEval.java index 33876c7be44e..2cfe6990887a 100644 --- a/core/src/main/java/org/apache/druid/math/expr/ExprEval.java +++ b/core/src/main/java/org/apache/druid/math/expr/ExprEval.java @@ -410,6 +410,22 @@ public static ExprEval bestEffortOf(@Nullable Object val) } return new ArrayExprEval(ExpressionType.LONG_ARRAY, array); } + if (val instanceof Integer[]) { + final Integer[] inputArray = (Integer[]) val; + final Object[] array = new Object[inputArray.length]; + for (int i = 0; i < inputArray.length; i++) { + array[i] = inputArray[i] == null ? null : inputArray[i].longValue(); + } + return new ArrayExprEval(ExpressionType.LONG_ARRAY, array); + } + if (val instanceof int[]) { + final int[] longArray = (int[]) val; + final Object[] array = new Object[longArray.length]; + for (int i = 0; i < longArray.length; i++) { + array[i] = (long)longArray[i]; + } + return new ArrayExprEval(ExpressionType.LONG_ARRAY, array); + } if (val instanceof Double[]) { final Double[] inputArray = (Double[]) val; final Object[] array = new Object[inputArray.length]; @@ -438,7 +454,7 @@ public static ExprEval bestEffortOf(@Nullable Object val) final float[] inputArray = (float[]) val; final Object[] array = new Object[inputArray.length]; for (int i = 0; i < inputArray.length; i++) { - array[i] = inputArray[i]; + array[i] = (double) inputArray[i]; } return new ArrayExprEval(ExpressionType.DOUBLE_ARRAY, array); } @@ -463,6 +479,13 @@ public static ExprEval bestEffortOf(@Nullable Object val) return ofArray(coerced.lhs, coerced.rhs); } + // in 'best effort' mode, we couldn't possibly use byte[] as a complex or anything else useful without type + // knowledge, so lets turn it into a base64 encoded string so at least something downstream can use it by decoding + // back into bytes + if (val instanceof byte[]) { + return new StringExprEval(StringUtils.encodeBase64String((byte[]) val)); + } + if (val != null) { // is this cool? return new ComplexExprEval(ExpressionType.UNKNOWN_COMPLEX, val); diff --git a/core/src/test/java/org/apache/druid/math/expr/EvalTest.java b/core/src/test/java/org/apache/druid/math/expr/EvalTest.java index c5d71d306a8f..8dc73af51fd6 100644 --- a/core/src/test/java/org/apache/druid/math/expr/EvalTest.java +++ b/core/src/test/java/org/apache/druid/math/expr/EvalTest.java @@ -19,6 +19,7 @@ package org.apache.druid.math.expr; +import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import org.apache.druid.common.config.NullHandling; import org.apache.druid.java.util.common.IAE; @@ -829,4 +830,119 @@ public void testEvalOfType() Assert.assertEquals(stringyComplexThing, eval.type()); Assert.assertEquals("notbase64", eval.value()); } + + @Test + public void testBestEffortOf() + { + // strings + ExprEval eval = ExprEval.bestEffortOf("stringy"); + Assert.assertEquals(ExpressionType.STRING, eval.type()); + Assert.assertEquals("stringy", eval.value()); + + // by default, booleans are handled as strings + eval = ExprEval.bestEffortOf(true); + Assert.assertEquals(ExpressionType.STRING, eval.type()); + Assert.assertEquals("true", eval.value()); + + eval = ExprEval.bestEffortOf(new byte[]{1, 2, 3, 4}); + Assert.assertEquals(ExpressionType.STRING, eval.type()); + Assert.assertEquals(StringUtils.encodeBase64String(new byte[]{1, 2, 3, 4}), eval.value()); + + // longs + eval = ExprEval.bestEffortOf(1L); + Assert.assertEquals(ExpressionType.LONG, eval.type()); + Assert.assertEquals(1L, eval.value()); + + eval = ExprEval.bestEffortOf(1); + Assert.assertEquals(ExpressionType.LONG, eval.type()); + Assert.assertEquals(1L, eval.value()); + + try { + // in strict boolean mode, they are longs + ExpressionProcessing.initializeForStrictBooleansTests(true); + eval = ExprEval.ofType(ExpressionType.LONG, true); + Assert.assertEquals(ExpressionType.LONG, eval.type()); + Assert.assertEquals(1L, eval.value()); + } + finally { + // reset + ExpressionProcessing.initializeForTests(null); + } + + // doubles + eval = ExprEval.bestEffortOf(1.0); + Assert.assertEquals(ExpressionType.DOUBLE, eval.type()); + Assert.assertEquals(1.0, eval.value()); + + eval = ExprEval.bestEffortOf(1.0f); + Assert.assertEquals(ExpressionType.DOUBLE, eval.type()); + Assert.assertEquals(1.0, eval.value()); + + // arrays + eval = ExprEval.bestEffortOf(new Object[] {1L, 2L, 3L}); + Assert.assertEquals(ExpressionType.LONG_ARRAY, eval.type()); + Assert.assertArrayEquals(new Object[] {1L, 2L, 3L}, (Object[]) eval.value()); + + eval = ExprEval.bestEffortOf(new Object[] {1L, 2L, null, 3L}); + Assert.assertEquals(ExpressionType.LONG_ARRAY, eval.type()); + Assert.assertArrayEquals(new Object[] {1L, 2L, null, 3L}, (Object[]) eval.value()); + + eval = ExprEval.bestEffortOf(ImmutableList.of(1L, 2L, 3L)); + Assert.assertEquals(ExpressionType.LONG_ARRAY, eval.type()); + Assert.assertArrayEquals(new Object[] {1L, 2L, 3L}, (Object[]) eval.value()); + + eval = ExprEval.bestEffortOf(new long[] {1L, 2L, 3L}); + Assert.assertEquals(ExpressionType.LONG_ARRAY, eval.type()); + Assert.assertArrayEquals(new Object[] {1L, 2L, 3L}, (Object[]) eval.value()); + + eval = ExprEval.bestEffortOf(new Object[] {1, 2, 3}); + Assert.assertEquals(ExpressionType.LONG_ARRAY, eval.type()); + Assert.assertArrayEquals(new Object[] {1L, 2L, 3L}, (Object[]) eval.value()); + + eval = ExprEval.bestEffortOf(new int[] {1, 2, 3}); + Assert.assertEquals(ExpressionType.LONG_ARRAY, eval.type()); + Assert.assertArrayEquals(new Object[] {1L, 2L, 3L}, (Object[]) eval.value()); + + eval = ExprEval.bestEffortOf(new Object[] {1.0, 2.0, 3.0}); + Assert.assertEquals(ExpressionType.DOUBLE_ARRAY, eval.type()); + Assert.assertArrayEquals(new Object[] {1.0, 2.0, 3.0}, (Object[]) eval.value()); + + eval = ExprEval.bestEffortOf(new Object[] {null, 1.0, 2.0, 3.0}); + Assert.assertEquals(ExpressionType.DOUBLE_ARRAY, eval.type()); + Assert.assertArrayEquals(new Object[] {null, 1.0, 2.0, 3.0}, (Object[]) eval.value()); + + eval = ExprEval.bestEffortOf(new double[] {1.0, 2.0, 3.0}); + Assert.assertEquals(ExpressionType.DOUBLE_ARRAY, eval.type()); + Assert.assertArrayEquals(new Object[] {1.0, 2.0, 3.0}, (Object[]) eval.value()); + + eval = ExprEval.bestEffortOf(new Object[] {1.0f, 2.0f, 3.0f}); + Assert.assertEquals(ExpressionType.DOUBLE_ARRAY, eval.type()); + Assert.assertArrayEquals(new Object[] {1.0, 2.0, 3.0}, (Object[]) eval.value()); + + eval = ExprEval.bestEffortOf(new float[] {1.0f, 2.0f, 3.0f}); + Assert.assertEquals(ExpressionType.DOUBLE_ARRAY, eval.type()); + Assert.assertArrayEquals(new Object[] {1.0, 2.0, 3.0}, (Object[]) eval.value()); + + eval = ExprEval.bestEffortOf(new Object[] {"1", "2", "3"}); + Assert.assertEquals(ExpressionType.STRING_ARRAY, eval.type()); + Assert.assertArrayEquals(new Object[] {"1", "2", "3"}, (Object[]) eval.value()); + + eval = ExprEval.bestEffortOf(new String[] {"1", "2", "3"}); + Assert.assertEquals(ExpressionType.STRING_ARRAY, eval.type()); + Assert.assertArrayEquals(new Object[] {"1", "2", "3"}, (Object[]) eval.value()); + + eval = ExprEval.bestEffortOf(ImmutableList.of("1", "2", "3")); + Assert.assertEquals(ExpressionType.STRING_ARRAY, eval.type()); + Assert.assertArrayEquals(new Object[] {"1", "2", "3"}, (Object[]) eval.value()); + + // arrays end up as the least restrictice type + eval = ExprEval.bestEffortOf(new Object[] {1.0, 2L, "3", true, false}); + Assert.assertEquals(ExpressionType.STRING_ARRAY, eval.type()); + Assert.assertArrayEquals(new Object[] {"1.0", "2", "3", "true", "false"}, (Object[]) eval.value()); + + // json type isn't defined in druid-core, what happens if we have some nested data? + eval = ExprEval.bestEffortOf(ImmutableMap.of("x", 1L, "y", 2L)); + Assert.assertEquals(ExpressionType.UNKNOWN_COMPLEX, eval.type()); + Assert.assertEquals(ImmutableMap.of("x", 1L, "y", 2L), eval.value()); + } } diff --git a/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnSerializer.java b/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnSerializer.java index 44d228e67c5a..f73b72209c85 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnSerializer.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnSerializer.java @@ -89,7 +89,12 @@ public StructuredDataProcessor.ProcessedLiteral processLiteralField(ArrayList if (writer != null) { try { ExprEval eval = ExprEval.bestEffortOf(fieldValue); - writer.addValue(rowCount, eval.value()); + if (eval.type().isPrimitive() || (eval.type().isArray() && eval.type().getElementType().isPrimitive())) { + writer.addValue(rowCount, eval.value()); + } else { + // behave consistently with nested column indexer, which defaults to string + writer.addValue(rowCount, eval.asString()); + } // serializer doesn't use size estimate return StructuredDataProcessor.ProcessedLiteral.NULL_LITERAL; } diff --git a/processing/src/test/java/org/apache/druid/query/expression/TimestampShiftMacroTest.java b/processing/src/test/java/org/apache/druid/query/expression/TimestampShiftMacroTest.java index 1dc87451c8e6..2c39ae429bc7 100644 --- a/processing/src/test/java/org/apache/druid/query/expression/TimestampShiftMacroTest.java +++ b/processing/src/test/java/org/apache/druid/query/expression/TimestampShiftMacroTest.java @@ -257,7 +257,7 @@ private static class NotLiteralExpr extends ExprMacroTable.BaseScalarUnivariateM @Override public ExprEval eval(ObjectBinding bindings) { - return ExprEval.bestEffortOf(bindings.get(name)); + return ExprEval.ofType(bindings.getType(name), bindings.get(name)); } @Override From 3f633546b36a926a95a978fb35b5921701a89303 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Fri, 3 Feb 2023 21:35:08 -0800 Subject: [PATCH 09/11] typo and more test --- .../src/test/java/org/apache/druid/math/expr/EvalTest.java | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/core/src/test/java/org/apache/druid/math/expr/EvalTest.java b/core/src/test/java/org/apache/druid/math/expr/EvalTest.java index 8dc73af51fd6..f1d3bee5b959 100644 --- a/core/src/test/java/org/apache/druid/math/expr/EvalTest.java +++ b/core/src/test/java/org/apache/druid/math/expr/EvalTest.java @@ -935,7 +935,12 @@ public void testBestEffortOf() Assert.assertEquals(ExpressionType.STRING_ARRAY, eval.type()); Assert.assertArrayEquals(new Object[] {"1", "2", "3"}, (Object[]) eval.value()); - // arrays end up as the least restrictice type + // arrays end up as the least restrictive type + eval = ExprEval.bestEffortOf(new Object[] {1.0, 2L}); + Assert.assertEquals(ExpressionType.DOUBLE_ARRAY, eval.type()); + Assert.assertArrayEquals(new Object[] {1.0, 2.0}, (Object[]) eval.value()); + + // arrays end up as the least restrictive type eval = ExprEval.bestEffortOf(new Object[] {1.0, 2L, "3", true, false}); Assert.assertEquals(ExpressionType.STRING_ARRAY, eval.type()); Assert.assertArrayEquals(new Object[] {"1.0", "2", "3", "true", "false"}, (Object[]) eval.value()); From f8880868c4da366e0f19e996867fb0eabf462d40 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Sat, 4 Feb 2023 17:27:00 -0800 Subject: [PATCH 10/11] style --- core/src/main/java/org/apache/druid/math/expr/ExprEval.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/main/java/org/apache/druid/math/expr/ExprEval.java b/core/src/main/java/org/apache/druid/math/expr/ExprEval.java index 2cfe6990887a..1d5298ffc1d4 100644 --- a/core/src/main/java/org/apache/druid/math/expr/ExprEval.java +++ b/core/src/main/java/org/apache/druid/math/expr/ExprEval.java @@ -422,7 +422,7 @@ public static ExprEval bestEffortOf(@Nullable Object val) final int[] longArray = (int[]) val; final Object[] array = new Object[longArray.length]; for (int i = 0; i < longArray.length; i++) { - array[i] = (long)longArray[i]; + array[i] = (long) longArray[i]; } return new ArrayExprEval(ExpressionType.LONG_ARRAY, array); } From e0e6745a9af2302d57b546a48d007bc6da0f49b2 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Sun, 5 Feb 2023 21:17:30 -0800 Subject: [PATCH 11/11] adjust --- .../apache/druid/segment/virtual/NestedFieldVirtualColumn.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/NestedFieldVirtualColumn.java b/processing/src/main/java/org/apache/druid/segment/virtual/NestedFieldVirtualColumn.java index 1936b38b14b4..86e9c95a803e 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/NestedFieldVirtualColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/NestedFieldVirtualColumn.java @@ -622,8 +622,7 @@ public ColumnCapabilities capabilities(String columnName) // from here return ColumnCapabilitiesImpl.createDefault() .setType(expectedType != null ? expectedType : ColumnType.STRING) - .setHasNulls(expectedType == null || (expectedType.isNumeric() - && NullHandling.sqlCompatible())); + .setHasNulls(expectedType == null || !expectedType.isNumeric() || (expectedType.isNumeric() && NullHandling.sqlCompatible())); } @Override