diff --git a/core/src/main/java/org/apache/druid/math/expr/ExprEval.java b/core/src/main/java/org/apache/druid/math/expr/ExprEval.java index 80c5b9731c99..1d5298ffc1d4 100644 --- a/core/src/main/java/org/apache/druid/math/expr/ExprEval.java +++ b/core/src/main/java/org/apache/druid/math/expr/ExprEval.java @@ -410,6 +410,22 @@ public static ExprEval bestEffortOf(@Nullable Object val) } return new ArrayExprEval(ExpressionType.LONG_ARRAY, array); } + if (val instanceof Integer[]) { + final Integer[] inputArray = (Integer[]) val; + final Object[] array = new Object[inputArray.length]; + for (int i = 0; i < inputArray.length; i++) { + array[i] = inputArray[i] == null ? null : inputArray[i].longValue(); + } + return new ArrayExprEval(ExpressionType.LONG_ARRAY, array); + } + if (val instanceof int[]) { + final int[] longArray = (int[]) val; + final Object[] array = new Object[longArray.length]; + for (int i = 0; i < longArray.length; i++) { + array[i] = (long) longArray[i]; + } + return new ArrayExprEval(ExpressionType.LONG_ARRAY, array); + } if (val instanceof Double[]) { final Double[] inputArray = (Double[]) val; final Object[] array = new Object[inputArray.length]; @@ -438,7 +454,7 @@ public static ExprEval bestEffortOf(@Nullable Object val) final float[] inputArray = (float[]) val; final Object[] array = new Object[inputArray.length]; for (int i = 0; i < inputArray.length; i++) { - array[i] = inputArray[i]; + array[i] = (double) inputArray[i]; } return new ArrayExprEval(ExpressionType.DOUBLE_ARRAY, array); } @@ -463,6 +479,13 @@ public static ExprEval bestEffortOf(@Nullable Object val) return ofArray(coerced.lhs, coerced.rhs); } + // in 'best effort' mode, we couldn't possibly use byte[] as a complex or anything else useful without type + // knowledge, so lets turn it into a base64 encoded string so at least something downstream can use it by decoding + // back into bytes + if (val instanceof byte[]) { + return new StringExprEval(StringUtils.encodeBase64String((byte[]) val)); + } + if (val != null) { // is this cool? return new ComplexExprEval(ExpressionType.UNKNOWN_COMPLEX, val); @@ -520,9 +543,18 @@ public static ExprEval ofType(@Nullable ExpressionType type, @Nullable Object va } return ofDouble(null); case COMPLEX: + // json isn't currently defined in druid-core, this can be reworked once + // https://github.com/apache/druid/pull/13698 is merged (or COMPLEX is promoted to a real built-in type(s) + if ("json".equals(type.getComplexTypeName())) { + return ofComplex(type, value); + } byte[] bytes = null; if (value instanceof String) { - bytes = StringUtils.decodeBase64String((String) value); + try { + bytes = StringUtils.decodeBase64String((String) value); + } + catch (IllegalArgumentException ignored) { + } } else if (value instanceof byte[]) { bytes = (byte[]) value; } diff --git a/core/src/main/java/org/apache/druid/math/expr/IdentifierExpr.java b/core/src/main/java/org/apache/druid/math/expr/IdentifierExpr.java index 01622a5b2687..63b5535f8445 100644 --- a/core/src/main/java/org/apache/druid/math/expr/IdentifierExpr.java +++ b/core/src/main/java/org/apache/druid/math/expr/IdentifierExpr.java @@ -21,11 +21,8 @@ import org.apache.commons.lang.StringEscapeUtils; import org.apache.druid.java.util.common.StringUtils; -import org.apache.druid.math.expr.vector.ExprEvalDoubleVector; -import org.apache.druid.math.expr.vector.ExprEvalLongVector; -import org.apache.druid.math.expr.vector.ExprEvalObjectVector; -import org.apache.druid.math.expr.vector.ExprEvalVector; import org.apache.druid.math.expr.vector.ExprVectorProcessor; +import org.apache.druid.math.expr.vector.VectorProcessors; import javax.annotation.Nullable; import java.util.Objects; @@ -152,51 +149,7 @@ public boolean canVectorize(InputBindingInspector inspector) @Override public ExprVectorProcessor buildVectorized(VectorInputBindingInspector inspector) { - ExpressionType inputType = inspector.getType(binding); - - if (inputType == null) { - // nil column, we can be anything, so be a string because it's the most flexible - // (numbers will be populated with default values in default mode and non-null) - return new IdentifierVectorProcessor(ExpressionType.STRING) - { - @Override - public ExprEvalVector evalVector(VectorInputBinding bindings) - { - return new ExprEvalObjectVector(bindings.getObjectVector(binding)); - } - }; - } - switch (inputType.getType()) { - case LONG: - return new IdentifierVectorProcessor(inputType) - { - @Override - public ExprEvalVector evalVector(VectorInputBinding bindings) - { - return new ExprEvalLongVector(bindings.getLongVector(binding), bindings.getNullVector(binding)); - } - }; - case DOUBLE: - return new IdentifierVectorProcessor(inputType) - { - @Override - public ExprEvalVector evalVector(VectorInputBinding bindings) - { - return new ExprEvalDoubleVector(bindings.getDoubleVector(binding), bindings.getNullVector(binding)); - } - }; - case STRING: - return new IdentifierVectorProcessor(inputType) - { - @Override - public ExprEvalVector evalVector(VectorInputBinding bindings) - { - return new ExprEvalObjectVector(bindings.getObjectVector(binding)); - } - }; - default: - throw Exprs.cannotVectorize(this); - } + return VectorProcessors.identifier(inspector, binding); } @Override @@ -218,20 +171,3 @@ public int hashCode() return Objects.hash(identifier); } } - -abstract class IdentifierVectorProcessor implements ExprVectorProcessor -{ - private final ExpressionType outputType; - - public IdentifierVectorProcessor(ExpressionType outputType) - { - this.outputType = outputType; - } - - @Override - public ExpressionType getOutputType() - { - return outputType; - } -} - diff --git a/core/src/main/java/org/apache/druid/math/expr/vector/VectorProcessors.java b/core/src/main/java/org/apache/druid/math/expr/vector/VectorProcessors.java index ee066065ad15..e23e03eef389 100644 --- a/core/src/main/java/org/apache/druid/math/expr/vector/VectorProcessors.java +++ b/core/src/main/java/org/apache/druid/math/expr/vector/VectorProcessors.java @@ -157,6 +157,55 @@ public ExpressionType getOutputType() }; } + public static ExprVectorProcessor identifier(Expr.VectorInputBindingInspector inspector, String binding) + { + ExpressionType inputType = inspector.getType(binding); + + if (inputType == null) { + // nil column, we can be anything, so be a string because it's the most flexible + // (numbers will be populated with default values in default mode and non-null) + return new IdentifierVectorProcessor(ExpressionType.STRING) + { + @Override + public ExprEvalVector evalVector(Expr.VectorInputBinding bindings) + { + return new ExprEvalObjectVector(bindings.getObjectVector(binding)); + } + }; + } + switch (inputType.getType()) { + case LONG: + return new IdentifierVectorProcessor(inputType) + { + @Override + public ExprEvalVector evalVector(Expr.VectorInputBinding bindings) + { + return new ExprEvalLongVector(bindings.getLongVector(binding), bindings.getNullVector(binding)); + } + }; + case DOUBLE: + return new IdentifierVectorProcessor(inputType) + { + @Override + public ExprEvalVector evalVector(Expr.VectorInputBinding bindings) + { + return new ExprEvalDoubleVector(bindings.getDoubleVector(binding), bindings.getNullVector(binding)); + } + }; + case STRING: + return new IdentifierVectorProcessor(inputType) + { + @Override + public ExprEvalVector evalVector(Expr.VectorInputBinding bindings) + { + return new ExprEvalObjectVector(bindings.getObjectVector(binding)); + } + }; + default: + throw Exprs.cannotVectorize("[" + binding + "]"); + } + } + public static ExprVectorProcessor parseLong(Expr.VectorInputBindingInspector inspector, Expr arg, int radix) { final ExprVectorProcessor processor = new LongOutObjectInFunctionVectorProcessor( @@ -889,4 +938,20 @@ private VectorProcessors() { // No instantiation } + + abstract static class IdentifierVectorProcessor implements ExprVectorProcessor + { + private final ExpressionType outputType; + + public IdentifierVectorProcessor(ExpressionType outputType) + { + this.outputType = outputType; + } + + @Override + public ExpressionType getOutputType() + { + return outputType; + } + } } diff --git a/core/src/test/java/org/apache/druid/math/expr/EvalTest.java b/core/src/test/java/org/apache/druid/math/expr/EvalTest.java index b15f5f810639..f1d3bee5b959 100644 --- a/core/src/test/java/org/apache/druid/math/expr/EvalTest.java +++ b/core/src/test/java/org/apache/druid/math/expr/EvalTest.java @@ -19,6 +19,7 @@ package org.apache.druid.math.expr; +import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import org.apache.druid.common.config.NullHandling; import org.apache.druid.java.util.common.IAE; @@ -813,5 +814,140 @@ public void testEvalOfType() eval = ExprEval.ofType(ExpressionType.STRING_ARRAY, new Object[] {1.0, 2L, "3", true, false}); Assert.assertEquals(ExpressionType.STRING_ARRAY, eval.type()); Assert.assertArrayEquals(new Object[] {"1.0", "2", "3", "true", "false"}, (Object[]) eval.value()); + + // json type isn't defined in druid-core + ExpressionType json = ExpressionType.fromString("COMPLEX"); + eval = ExprEval.ofType(json, ImmutableMap.of("x", 1L, "y", 2L)); + Assert.assertEquals(json, eval.type()); + Assert.assertEquals(ImmutableMap.of("x", 1L, "y", 2L), eval.value()); + + eval = ExprEval.ofType(json, "hello"); + Assert.assertEquals(json, eval.type()); + Assert.assertEquals("hello", eval.value()); + + ExpressionType stringyComplexThing = ExpressionType.fromString("COMPLEX"); + eval = ExprEval.ofType(stringyComplexThing, "notbase64"); + Assert.assertEquals(stringyComplexThing, eval.type()); + Assert.assertEquals("notbase64", eval.value()); + } + + @Test + public void testBestEffortOf() + { + // strings + ExprEval eval = ExprEval.bestEffortOf("stringy"); + Assert.assertEquals(ExpressionType.STRING, eval.type()); + Assert.assertEquals("stringy", eval.value()); + + // by default, booleans are handled as strings + eval = ExprEval.bestEffortOf(true); + Assert.assertEquals(ExpressionType.STRING, eval.type()); + Assert.assertEquals("true", eval.value()); + + eval = ExprEval.bestEffortOf(new byte[]{1, 2, 3, 4}); + Assert.assertEquals(ExpressionType.STRING, eval.type()); + Assert.assertEquals(StringUtils.encodeBase64String(new byte[]{1, 2, 3, 4}), eval.value()); + + // longs + eval = ExprEval.bestEffortOf(1L); + Assert.assertEquals(ExpressionType.LONG, eval.type()); + Assert.assertEquals(1L, eval.value()); + + eval = ExprEval.bestEffortOf(1); + Assert.assertEquals(ExpressionType.LONG, eval.type()); + Assert.assertEquals(1L, eval.value()); + + try { + // in strict boolean mode, they are longs + ExpressionProcessing.initializeForStrictBooleansTests(true); + eval = ExprEval.ofType(ExpressionType.LONG, true); + Assert.assertEquals(ExpressionType.LONG, eval.type()); + Assert.assertEquals(1L, eval.value()); + } + finally { + // reset + ExpressionProcessing.initializeForTests(null); + } + + // doubles + eval = ExprEval.bestEffortOf(1.0); + Assert.assertEquals(ExpressionType.DOUBLE, eval.type()); + Assert.assertEquals(1.0, eval.value()); + + eval = ExprEval.bestEffortOf(1.0f); + Assert.assertEquals(ExpressionType.DOUBLE, eval.type()); + Assert.assertEquals(1.0, eval.value()); + + // arrays + eval = ExprEval.bestEffortOf(new Object[] {1L, 2L, 3L}); + Assert.assertEquals(ExpressionType.LONG_ARRAY, eval.type()); + Assert.assertArrayEquals(new Object[] {1L, 2L, 3L}, (Object[]) eval.value()); + + eval = ExprEval.bestEffortOf(new Object[] {1L, 2L, null, 3L}); + Assert.assertEquals(ExpressionType.LONG_ARRAY, eval.type()); + Assert.assertArrayEquals(new Object[] {1L, 2L, null, 3L}, (Object[]) eval.value()); + + eval = ExprEval.bestEffortOf(ImmutableList.of(1L, 2L, 3L)); + Assert.assertEquals(ExpressionType.LONG_ARRAY, eval.type()); + Assert.assertArrayEquals(new Object[] {1L, 2L, 3L}, (Object[]) eval.value()); + + eval = ExprEval.bestEffortOf(new long[] {1L, 2L, 3L}); + Assert.assertEquals(ExpressionType.LONG_ARRAY, eval.type()); + Assert.assertArrayEquals(new Object[] {1L, 2L, 3L}, (Object[]) eval.value()); + + eval = ExprEval.bestEffortOf(new Object[] {1, 2, 3}); + Assert.assertEquals(ExpressionType.LONG_ARRAY, eval.type()); + Assert.assertArrayEquals(new Object[] {1L, 2L, 3L}, (Object[]) eval.value()); + + eval = ExprEval.bestEffortOf(new int[] {1, 2, 3}); + Assert.assertEquals(ExpressionType.LONG_ARRAY, eval.type()); + Assert.assertArrayEquals(new Object[] {1L, 2L, 3L}, (Object[]) eval.value()); + + eval = ExprEval.bestEffortOf(new Object[] {1.0, 2.0, 3.0}); + Assert.assertEquals(ExpressionType.DOUBLE_ARRAY, eval.type()); + Assert.assertArrayEquals(new Object[] {1.0, 2.0, 3.0}, (Object[]) eval.value()); + + eval = ExprEval.bestEffortOf(new Object[] {null, 1.0, 2.0, 3.0}); + Assert.assertEquals(ExpressionType.DOUBLE_ARRAY, eval.type()); + Assert.assertArrayEquals(new Object[] {null, 1.0, 2.0, 3.0}, (Object[]) eval.value()); + + eval = ExprEval.bestEffortOf(new double[] {1.0, 2.0, 3.0}); + Assert.assertEquals(ExpressionType.DOUBLE_ARRAY, eval.type()); + Assert.assertArrayEquals(new Object[] {1.0, 2.0, 3.0}, (Object[]) eval.value()); + + eval = ExprEval.bestEffortOf(new Object[] {1.0f, 2.0f, 3.0f}); + Assert.assertEquals(ExpressionType.DOUBLE_ARRAY, eval.type()); + Assert.assertArrayEquals(new Object[] {1.0, 2.0, 3.0}, (Object[]) eval.value()); + + eval = ExprEval.bestEffortOf(new float[] {1.0f, 2.0f, 3.0f}); + Assert.assertEquals(ExpressionType.DOUBLE_ARRAY, eval.type()); + Assert.assertArrayEquals(new Object[] {1.0, 2.0, 3.0}, (Object[]) eval.value()); + + eval = ExprEval.bestEffortOf(new Object[] {"1", "2", "3"}); + Assert.assertEquals(ExpressionType.STRING_ARRAY, eval.type()); + Assert.assertArrayEquals(new Object[] {"1", "2", "3"}, (Object[]) eval.value()); + + eval = ExprEval.bestEffortOf(new String[] {"1", "2", "3"}); + Assert.assertEquals(ExpressionType.STRING_ARRAY, eval.type()); + Assert.assertArrayEquals(new Object[] {"1", "2", "3"}, (Object[]) eval.value()); + + eval = ExprEval.bestEffortOf(ImmutableList.of("1", "2", "3")); + Assert.assertEquals(ExpressionType.STRING_ARRAY, eval.type()); + Assert.assertArrayEquals(new Object[] {"1", "2", "3"}, (Object[]) eval.value()); + + // arrays end up as the least restrictive type + eval = ExprEval.bestEffortOf(new Object[] {1.0, 2L}); + Assert.assertEquals(ExpressionType.DOUBLE_ARRAY, eval.type()); + Assert.assertArrayEquals(new Object[] {1.0, 2.0}, (Object[]) eval.value()); + + // arrays end up as the least restrictive type + eval = ExprEval.bestEffortOf(new Object[] {1.0, 2L, "3", true, false}); + Assert.assertEquals(ExpressionType.STRING_ARRAY, eval.type()); + Assert.assertArrayEquals(new Object[] {"1.0", "2", "3", "true", "false"}, (Object[]) eval.value()); + + // json type isn't defined in druid-core, what happens if we have some nested data? + eval = ExprEval.bestEffortOf(ImmutableMap.of("x", 1L, "y", 2L)); + Assert.assertEquals(ExpressionType.UNKNOWN_COMPLEX, eval.type()); + Assert.assertEquals(ImmutableMap.of("x", 1L, "y", 2L), eval.value()); } } diff --git a/extensions-contrib/compressed-bigdecimal/src/test/java/org/apache/druid/compressedbigdecimal/CompressedBigDecimalSqlAggregatorTestBase.java b/extensions-contrib/compressed-bigdecimal/src/test/java/org/apache/druid/compressedbigdecimal/CompressedBigDecimalSqlAggregatorTestBase.java index 5d1c67c2f38a..21dc9d9f0323 100644 --- a/extensions-contrib/compressed-bigdecimal/src/test/java/org/apache/druid/compressedbigdecimal/CompressedBigDecimalSqlAggregatorTestBase.java +++ b/extensions-contrib/compressed-bigdecimal/src/test/java/org/apache/druid/compressedbigdecimal/CompressedBigDecimalSqlAggregatorTestBase.java @@ -25,10 +25,8 @@ import com.google.common.collect.ImmutableList; import com.google.inject.Injector; import org.apache.druid.data.input.InputRow; +import org.apache.druid.data.input.InputRowSchema; import org.apache.druid.data.input.impl.DimensionsSpec; -import org.apache.druid.data.input.impl.InputRowParser; -import org.apache.druid.data.input.impl.MapInputRowParser; -import org.apache.druid.data.input.impl.TimeAndDimsParseSpec; import org.apache.druid.data.input.impl.TimestampSpec; import org.apache.druid.guice.DruidInjectorBuilder; import org.apache.druid.java.util.common.StringUtils; @@ -55,22 +53,20 @@ import java.io.IOException; import java.util.Collections; import java.util.List; -import java.util.Map; import java.util.stream.Collectors; public abstract class CompressedBigDecimalSqlAggregatorTestBase extends BaseCalciteQueryTest { - private static final InputRowParser> PARSER = new MapInputRowParser( - new TimeAndDimsParseSpec( - new TimestampSpec(TestDataBuilder.TIMESTAMP_COLUMN, "iso", null), - new DimensionsSpec( - DimensionsSpec.getDefaultSchemas(ImmutableList.of("dim1", "dim2", "dim3", "m2")) - ) - ) + private static final InputRowSchema SCHEMA = new InputRowSchema( + new TimestampSpec(TestDataBuilder.TIMESTAMP_COLUMN, "iso", null), + new DimensionsSpec( + DimensionsSpec.getDefaultSchemas(ImmutableList.of("dim1", "dim2", "dim3", "m2")) + ), + null ); private static final List ROWS1 = - TestDataBuilder.RAW_ROWS1.stream().map(m -> TestDataBuilder.createRow(m, PARSER)).collect(Collectors.toList()); + TestDataBuilder.RAW_ROWS1.stream().map(m -> TestDataBuilder.createRow(m, SCHEMA)).collect(Collectors.toList()); @Override public void configureGuice(DruidInjectorBuilder builder) diff --git a/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/aggregation/bloom/sql/BloomFilterSqlAggregatorTest.java b/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/aggregation/bloom/sql/BloomFilterSqlAggregatorTest.java index c1426048faf3..fcf7099f28b0 100644 --- a/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/aggregation/bloom/sql/BloomFilterSqlAggregatorTest.java +++ b/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/aggregation/bloom/sql/BloomFilterSqlAggregatorTest.java @@ -24,15 +24,6 @@ import com.google.inject.Injector; import org.apache.druid.common.config.NullHandling; import org.apache.druid.data.input.InputRow; -import org.apache.druid.data.input.impl.DimensionSchema; -import org.apache.druid.data.input.impl.DimensionsSpec; -import org.apache.druid.data.input.impl.DoubleDimensionSchema; -import org.apache.druid.data.input.impl.FloatDimensionSchema; -import org.apache.druid.data.input.impl.InputRowParser; -import org.apache.druid.data.input.impl.LongDimensionSchema; -import org.apache.druid.data.input.impl.MapInputRowParser; -import org.apache.druid.data.input.impl.TimeAndDimsParseSpec; -import org.apache.druid.data.input.impl.TimestampSpec; import org.apache.druid.guice.BloomFilterExtensionModule; import org.apache.druid.guice.DruidInjectorBuilder; import org.apache.druid.java.util.common.granularity.Granularities; @@ -89,19 +80,6 @@ public SpecificSegmentsQuerySegmentWalker createQuerySegmentWalker( final Injector injector ) throws IOException { - InputRowParser parser = new MapInputRowParser( - new TimeAndDimsParseSpec( - new TimestampSpec("t", "iso", null), - new DimensionsSpec( - ImmutableList.builder() - .addAll(DimensionsSpec.getDefaultSchemas(ImmutableList.of("dim1", "dim2", "dim3"))) - .add(new DoubleDimensionSchema("d1")) - .add(new FloatDimensionSchema("f1")) - .add(new LongDimensionSchema("l1")) - .build() - ) - )); - final QueryableIndex index = IndexBuilder.create() .tmpDir(temporaryFolder.newFolder()) @@ -112,7 +90,7 @@ public SpecificSegmentsQuerySegmentWalker createQuerySegmentWalker( new CountAggregatorFactory("cnt"), new DoubleSumAggregatorFactory("m1", "m1") ) - .withDimensionsSpec(parser) + .withDimensionsSpec(TestDataBuilder.INDEX_SCHEMA_NUMERIC_DIMS.getDimensionsSpec()) .withRollup(false) .build() ) diff --git a/processing/src/main/java/org/apache/druid/segment/nested/CompressedNestedDataComplexColumn.java b/processing/src/main/java/org/apache/druid/segment/nested/CompressedNestedDataComplexColumn.java index 88b8a5dd615b..77919c4bffd8 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/CompressedNestedDataComplexColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/CompressedNestedDataComplexColumn.java @@ -510,6 +510,7 @@ private ColumnHolder readNestedFieldColumn(String field) metadata.getBitmapSerdeFactory().getObjectStrategy(), columnBuilder.getFileMapper() ); + final boolean hasNull = localDictionarySupplier.get().get(0) == 0; Supplier> columnSupplier = () -> { FixedIndexed localDict = localDictionarySupplier.get(); return closer.register(new NestedFieldLiteralDictionaryEncodedColumn( @@ -521,13 +522,13 @@ private ColumnHolder readNestedFieldColumn(String field) longDictionarySupplier.get(), doubleDictionarySupplier.get(), localDict, - localDict.get(0) == 0 + hasNull ? rBitmaps.get(0) : metadata.getBitmapSerdeFactory().getBitmapFactory().makeEmptyImmutableBitmap() )); }; columnBuilder.setHasMultipleValues(false) - .setHasNulls(true) + .setHasNulls(hasNull) .setDictionaryEncodedColumnSupplier(columnSupplier); columnBuilder.setIndexSupplier( new NestedFieldLiteralColumnIndexSupplier( diff --git a/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnSerializer.java b/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnSerializer.java index 44d228e67c5a..f73b72209c85 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnSerializer.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnSerializer.java @@ -89,7 +89,12 @@ public StructuredDataProcessor.ProcessedLiteral processLiteralField(ArrayList if (writer != null) { try { ExprEval eval = ExprEval.bestEffortOf(fieldValue); - writer.addValue(rowCount, eval.value()); + if (eval.type().isPrimitive() || (eval.type().isArray() && eval.type().getElementType().isPrimitive())) { + writer.addValue(rowCount, eval.value()); + } else { + // behave consistently with nested column indexer, which defaults to string + writer.addValue(rowCount, eval.asString()); + } // serializer doesn't use size estimate return StructuredDataProcessor.ProcessedLiteral.NULL_LITERAL; } diff --git a/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnSupplier.java b/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnSupplier.java index 4e1ecbb994ed..507a531f4bf2 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnSupplier.java @@ -20,8 +20,6 @@ package org.apache.druid.segment.nested; import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; import com.google.common.base.Supplier; import org.apache.druid.collections.bitmap.ImmutableBitmap; import org.apache.druid.java.util.common.ISE; @@ -46,35 +44,24 @@ public class NestedDataColumnSupplier implements Supplier { - private final byte version; - private final NestedDataColumnMetadata metadata; - private final GenericIndexed fields; - private final NestedLiteralTypeInfo fieldInfo; - private final CompressedVariableSizedBlobColumnSupplier compressedRawColumnSupplier; - private final ImmutableBitmap nullValues; - private final GenericIndexed stringDictionary; - private final Supplier frontCodedStringDictionarySupplier; - private final Supplier> longDictionarySupplier; - private final Supplier> doubleDictionarySupplier; - private final ColumnConfig columnConfig; - private final SmooshedFileMapper fileMapper; - - @Nullable - private final ColumnType simpleType; - - public NestedDataColumnSupplier( + public static NestedDataColumnSupplier read( ByteBuffer bb, ColumnBuilder columnBuilder, ColumnConfig columnConfig, ObjectMapper jsonMapper ) { - this(bb, columnBuilder, columnConfig, jsonMapper, ColumnType.LONG.getStrategy(), ColumnType.DOUBLE.getStrategy()); + return read( + bb, + columnBuilder, + columnConfig, + jsonMapper, + ColumnType.LONG.getStrategy(), + ColumnType.DOUBLE.getStrategy() + ); } - // strictly for testing? - @VisibleForTesting - public NestedDataColumnSupplier( + public static NestedDataColumnSupplier read( ByteBuffer bb, ColumnBuilder columnBuilder, ColumnConfig columnConfig, @@ -83,11 +70,22 @@ public NestedDataColumnSupplier( TypeStrategy doubleTypeStrategy ) { - this.version = bb.get(); + final byte version = bb.get(); if (version == 0x03 || version == 0x04) { try { final SmooshedFileMapper mapper = columnBuilder.getFileMapper(); + final NestedDataColumnMetadata metadata; + final GenericIndexed fields; + final NestedLiteralTypeInfo fieldInfo; + final CompressedVariableSizedBlobColumnSupplier compressedRawColumnSupplier; + final ImmutableBitmap nullValues; + final GenericIndexed stringDictionary; + final Supplier frontCodedStringDictionarySupplier; + final Supplier> longDictionarySupplier; + final Supplier> doubleDictionarySupplier; + ColumnType simpleType; + metadata = jsonMapper.readValue( IndexMerger.SERIALIZER_UTILS.readString(bb), NestedDataColumnMetadata.class @@ -95,9 +93,13 @@ public NestedDataColumnSupplier( fields = GenericIndexed.read(bb, GenericIndexed.STRING_STRATEGY, mapper); fieldInfo = NestedLiteralTypeInfo.read(bb, fields.size()); - if (fields.size() == 1 && - ((version == 0x03 && NestedPathFinder.JQ_PATH_ROOT.equals(fields.get(0))) || - (version == 0x04 && NestedPathFinder.JSON_PATH_ROOT.equals(fields.get(0)))) + if (fields.size() == 0) { + // all nulls, in the future we'll deal with this better... but for now lets just call it a string because + // it is the most permissive (besides json) + simpleType = ColumnType.STRING; + } else if (fields.size() == 1 && + ((version == 0x03 && NestedPathFinder.JQ_PATH_ROOT.equals(fields.get(0))) || + (version == 0x04 && NestedPathFinder.JSON_PATH_ROOT.equals(fields.get(0)))) ) { simpleType = fieldInfo.getTypes(0).getSingleType(); } else { @@ -106,6 +108,7 @@ public NestedDataColumnSupplier( final ByteBuffer stringDictionaryBuffer = loadInternalFile( mapper, + metadata, NestedDataColumnSerializer.STRING_DICTIONARY_FILE_NAME ); @@ -139,6 +142,7 @@ public NestedDataColumnSupplier( } final ByteBuffer longDictionaryBuffer = loadInternalFile( mapper, + metadata, NestedDataColumnSerializer.LONG_DICTIONARY_FILE_NAME ); longDictionarySupplier = FixedIndexed.read( @@ -149,6 +153,7 @@ public NestedDataColumnSupplier( ); final ByteBuffer doubleDictionaryBuffer = loadInternalFile( mapper, + metadata, NestedDataColumnSerializer.DOUBLE_DICTIONARY_FILE_NAME ); doubleDictionarySupplier = FixedIndexed.read( @@ -157,7 +162,7 @@ public NestedDataColumnSupplier( metadata.getByteOrder(), Double.BYTES ); - final ByteBuffer rawBuffer = loadInternalFile(mapper, NestedDataColumnSerializer.RAW_FILE_NAME); + final ByteBuffer rawBuffer = loadInternalFile(mapper, metadata, NestedDataColumnSerializer.RAW_FILE_NAME); compressedRawColumnSupplier = CompressedVariableSizedBlobColumnSupplier.fromByteBuffer( NestedDataColumnSerializer.getInternalFileName( metadata.getFileNameBase(), NestedDataColumnSerializer.RAW_FILE_NAME @@ -168,11 +173,31 @@ public NestedDataColumnSupplier( ); if (metadata.hasNulls()) { columnBuilder.setHasNulls(true); - final ByteBuffer nullIndexBuffer = loadInternalFile(mapper, NestedDataColumnSerializer.NULL_BITMAP_FILE_NAME); + final ByteBuffer nullIndexBuffer = loadInternalFile( + mapper, + metadata, + NestedDataColumnSerializer.NULL_BITMAP_FILE_NAME + ); nullValues = metadata.getBitmapSerdeFactory().getObjectStrategy().fromByteBufferWithSize(nullIndexBuffer); } else { nullValues = metadata.getBitmapSerdeFactory().getBitmapFactory().makeEmptyImmutableBitmap(); } + + return new NestedDataColumnSupplier( + version, + metadata, + fields, + fieldInfo, + compressedRawColumnSupplier, + nullValues, + stringDictionary, + frontCodedStringDictionarySupplier, + longDictionarySupplier, + doubleDictionarySupplier, + columnConfig, + mapper, + simpleType + ); } catch (IOException ex) { throw new RE(ex, "Failed to deserialize V%s column.", version); @@ -180,10 +205,53 @@ public NestedDataColumnSupplier( } else { throw new RE("Unknown version " + version); } + } + + private final byte version; + private final NestedDataColumnMetadata metadata; + private final GenericIndexed fields; + private final NestedLiteralTypeInfo fieldInfo; + private final CompressedVariableSizedBlobColumnSupplier compressedRawColumnSupplier; + private final ImmutableBitmap nullValues; + private final GenericIndexed stringDictionary; + private final Supplier frontCodedStringDictionarySupplier; + private final Supplier> longDictionarySupplier; + private final Supplier> doubleDictionarySupplier; + private final ColumnConfig columnConfig; + private final SmooshedFileMapper fileMapper; - fileMapper = Preconditions.checkNotNull(columnBuilder.getFileMapper(), "Null fileMapper"); + @Nullable + private final ColumnType simpleType; + private NestedDataColumnSupplier( + byte version, + NestedDataColumnMetadata metadata, + GenericIndexed fields, + NestedLiteralTypeInfo fieldInfo, + CompressedVariableSizedBlobColumnSupplier compressedRawColumnSupplier, + ImmutableBitmap nullValues, + GenericIndexed stringDictionary, + Supplier frontCodedStringDictionarySupplier, + Supplier> longDictionarySupplier, + Supplier> doubleDictionarySupplier, + ColumnConfig columnConfig, + SmooshedFileMapper fileMapper, + @Nullable ColumnType simpleType + ) + { + this.version = version; + this.metadata = metadata; + this.fields = fields; + this.fieldInfo = fieldInfo; + this.compressedRawColumnSupplier = compressedRawColumnSupplier; + this.nullValues = nullValues; + this.stringDictionary = stringDictionary; + this.frontCodedStringDictionarySupplier = frontCodedStringDictionarySupplier; + this.longDictionarySupplier = longDictionarySupplier; + this.doubleDictionarySupplier = doubleDictionarySupplier; this.columnConfig = columnConfig; + this.fileMapper = fileMapper; + this.simpleType = simpleType; } @Override @@ -261,7 +329,11 @@ private NestedDataColumnV4 makeV4() ); } - private ByteBuffer loadInternalFile(SmooshedFileMapper fileMapper, String internalFileName) throws IOException + private static ByteBuffer loadInternalFile( + SmooshedFileMapper fileMapper, + NestedDataColumnMetadata metadata, + String internalFileName + ) throws IOException { return fileMapper.mapFile( NestedDataColumnSerializer.getInternalFileName(metadata.getFileNameBase(), internalFileName) diff --git a/processing/src/main/java/org/apache/druid/segment/nested/NestedDataComplexTypeSerde.java b/processing/src/main/java/org/apache/druid/segment/nested/NestedDataComplexTypeSerde.java index fec5dcfeaa11..868b735e213f 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/NestedDataComplexTypeSerde.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/NestedDataComplexTypeSerde.java @@ -83,7 +83,7 @@ public void deserializeColumn( ColumnConfig columnConfig ) { - NestedDataColumnSupplier supplier = new NestedDataColumnSupplier(buffer, builder, columnConfig, OBJECT_MAPPER); + NestedDataColumnSupplier supplier = NestedDataColumnSupplier.read(buffer, builder, columnConfig, OBJECT_MAPPER); ColumnCapabilitiesImpl capabilitiesBuilder = builder.getCapabilitiesBuilder(); capabilitiesBuilder.setDictionaryEncoded(true); capabilitiesBuilder.setDictionaryValuesSorted(true); diff --git a/processing/src/main/java/org/apache/druid/segment/vector/NilVectorSelector.java b/processing/src/main/java/org/apache/druid/segment/vector/NilVectorSelector.java index 3e1bed6076cf..762f2e73fc77 100644 --- a/processing/src/main/java/org/apache/druid/segment/vector/NilVectorSelector.java +++ b/processing/src/main/java/org/apache/druid/segment/vector/NilVectorSelector.java @@ -24,6 +24,7 @@ import org.apache.druid.segment.QueryableIndexStorageAdapter; import javax.annotation.Nullable; +import java.util.Arrays; public class NilVectorSelector implements VectorValueSelector, VectorObjectSelector, SingleValueDimensionVectorSelector, IdLookup @@ -36,9 +37,7 @@ public class NilVectorSelector private static final Object[] DEFAULT_OBJECT_VECTOR = new Object[QueryableIndexStorageAdapter.DEFAULT_VECTOR_SIZE]; static { - for (int i = 0; i < DEFAULT_NULLS_VECTOR.length; i++) { - DEFAULT_NULLS_VECTOR[i] = NullHandling.sqlCompatible(); - } + Arrays.fill(DEFAULT_NULLS_VECTOR, NullHandling.sqlCompatible()); } private final VectorSizeInspector vectorSizeInspector; @@ -82,9 +81,13 @@ public static NilVectorSelector create(final VectorSizeInspector vectorSizeInspe DEFAULT_OBJECT_VECTOR ); } else { + final boolean[] nulls = new boolean[vectorSizeInspector.getMaxVectorSize()]; + if (NullHandling.sqlCompatible()) { + Arrays.fill(nulls, true); + } return new NilVectorSelector( vectorSizeInspector, - new boolean[vectorSizeInspector.getMaxVectorSize()], + nulls, new int[vectorSizeInspector.getMaxVectorSize()], new long[vectorSizeInspector.getMaxVectorSize()], new float[vectorSizeInspector.getMaxVectorSize()], diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVectorSelectors.java b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVectorSelectors.java index 3ee12ff88564..77c40b551e10 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVectorSelectors.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVectorSelectors.java @@ -24,10 +24,14 @@ import org.apache.druid.math.expr.ExprType; import org.apache.druid.math.expr.ExpressionType; import org.apache.druid.math.expr.InputBindings; +import org.apache.druid.math.expr.vector.CastToTypeVectorProcessor; import org.apache.druid.math.expr.vector.ExprVectorProcessor; +import org.apache.druid.math.expr.vector.VectorProcessors; import org.apache.druid.query.dimension.DefaultDimensionSpec; import org.apache.druid.segment.column.ColumnCapabilities; +import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.vector.ConstantVectorSelectors; +import org.apache.druid.segment.vector.ReadableVectorInspector; import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector; import org.apache.druid.segment.vector.VectorColumnSelectorFactory; import org.apache.druid.segment.vector.VectorObjectSelector; @@ -103,6 +107,45 @@ public static VectorObjectSelector makeVectorObjectSelector( return new ExpressionVectorObjectSelector(processor, bindings); } + public static VectorObjectSelector castValueSelectorToObject( + ReadableVectorInspector inspector, + String columnName, + VectorValueSelector selector, + ColumnType selectorType, + ColumnType castTo + ) + { + ExpressionVectorInputBinding binding = new ExpressionVectorInputBinding(inspector); + binding.addNumeric(columnName, ExpressionType.fromColumnType(selectorType), selector); + return new ExpressionVectorObjectSelector( + CastToTypeVectorProcessor.cast( + VectorProcessors.identifier(binding, columnName), + ExpressionType.fromColumnType(castTo) + ), + binding + ); + } + + public static VectorValueSelector castObjectSelectorToNumeric( + ReadableVectorInspector inspector, + String columnName, + VectorObjectSelector selector, + ColumnType selectorType, + ColumnType castTo + ) + { + Preconditions.checkArgument(castTo.isNumeric(), "Must cast to a numeric type to make a value selector"); + ExpressionVectorInputBinding binding = new ExpressionVectorInputBinding(inspector); + binding.addObjectSelector(columnName, ExpressionType.fromColumnType(selectorType), selector); + return new ExpressionVectorValueSelector( + CastToTypeVectorProcessor.cast( + VectorProcessors.identifier(binding, columnName), + ExpressionType.fromColumnType(castTo) + ), + binding + ); + } + private static Expr.VectorInputBinding createVectorBindings( Expr.BindingAnalysis bindingAnalysis, VectorColumnSelectorFactory vectorColumnSelectorFactory diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/NestedFieldVirtualColumn.java b/processing/src/main/java/org/apache/druid/segment/virtual/NestedFieldVirtualColumn.java index df06eb44f321..86e9c95a803e 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/NestedFieldVirtualColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/NestedFieldVirtualColumn.java @@ -24,10 +24,15 @@ import com.fasterxml.jackson.annotation.JsonProperty; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; +import com.google.common.base.Predicate; import com.google.common.primitives.Doubles; +import org.apache.druid.common.config.NullHandling; import org.apache.druid.common.guava.GuavaUtils; +import org.apache.druid.java.util.common.Numbers; import org.apache.druid.query.cache.CacheKeyBuilder; import org.apache.druid.query.dimension.DimensionSpec; +import org.apache.druid.query.extraction.ExtractionFn; +import org.apache.druid.query.filter.ValueMatcher; import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; import org.apache.druid.segment.BaseSingleValueDimensionSelector; import org.apache.druid.segment.ColumnInspector; @@ -35,13 +40,19 @@ import org.apache.druid.segment.ColumnSelectorFactory; import org.apache.druid.segment.ColumnValueSelector; import org.apache.druid.segment.DimensionSelector; +import org.apache.druid.segment.IdLookup; import org.apache.druid.segment.NilColumnValueSelector; import org.apache.druid.segment.VirtualColumn; +import org.apache.druid.segment.column.BaseColumn; import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ColumnCapabilitiesImpl; +import org.apache.druid.segment.column.ColumnHolder; import org.apache.druid.segment.column.ColumnIndexSupplier; import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.column.DictionaryEncodedColumn; import org.apache.druid.segment.column.ValueType; +import org.apache.druid.segment.column.ValueTypes; +import org.apache.druid.segment.data.IndexedInts; import org.apache.druid.segment.data.ReadableOffset; import org.apache.druid.segment.nested.NestedDataComplexColumn; import org.apache.druid.segment.nested.NestedDataComplexTypeSerde; @@ -59,6 +70,7 @@ import org.apache.druid.segment.vector.VectorValueSelector; import javax.annotation.Nullable; +import java.nio.ByteBuffer; import java.util.Collections; import java.util.List; import java.util.Objects; @@ -67,16 +79,16 @@ * Optimized virtual column that can make direct selectors into a {@link NestedDataComplexColumn} or any associated * nested fields ({@link org.apache.druid.segment.nested.NestedFieldLiteralDictionaryEncodedColumn}) including using * their indexes. - * + *

* This virtual column is used for the SQL operators JSON_VALUE (if {@link #processFromRaw} is set to false) or * JSON_QUERY (if it is true), and accepts 'JSONPath' or 'jq' syntax string representations of paths, or a parsed * list of {@link NestedPathPart} in order to determine what should be selected from the column. - * + *

* Type information for nested fields is completely absent in the SQL planner, so it guesses the best it can to set * {@link #expectedType} from the context of how something is being used, e.g. an aggregators default type or an * explicit cast, or, if using the 'RETURNING' syntax which explicitly specifies type. This might not be the same as * if it had actual type information, but, we try to stick with whatever we chose there to do the best we can for now. - * + *

* Since {@link #capabilities(ColumnInspector, String)} is determined by the {@link #expectedType}, the results will * be best effor cast to the expected type if the column is not natively the expected type so that this column can * fulfill the contract of the type of selector that is likely to be created to read this column. @@ -205,8 +217,8 @@ public DimensionSelector makeDimensionSelector( // this dimension selector is used for realtime queries, nested paths are not themselves dictionary encoded until // written to segment, so we fall back to processing the structured data from a column value selector on the // complex column - ColumnValueSelector valueSelector = makeColumnValueSelector(dimensionSpec.getOutputName(), factory); - return new FieldDimensionSelector(valueSelector); + ColumnValueSelector valueSelector = makeColumnValueSelector(dimensionSpec.getOutputName(), factory); + return dimensionSpec.decorate(new FieldDimensionSelector(valueSelector)); } @Override @@ -216,7 +228,7 @@ public ColumnValueSelector makeColumnValueSelector( ) { // this column value selector is used for realtime queries, so we always process StructuredData - final ColumnValueSelector baseSelector = factory.makeColumnValueSelector(this.columnName); + final ColumnValueSelector baseSelector = factory.makeColumnValueSelector(this.columnName); // processFromRaw is true that means JSON_QUERY, which can return partial results, otherwise this virtual column // is JSON_VALUE which only returns literals, so use the literal value selector instead @@ -233,20 +245,53 @@ public DimensionSelector makeDimensionSelector( ReadableOffset offset ) { - final NestedDataComplexColumn column = NestedDataComplexColumn.fromColumnSelector(columnSelector, columnName); - if (column == null) { - // complex column itself didn't exist - return DimensionSelector.constant(null); + ColumnHolder holder = columnSelector.getColumnHolder(columnName); + if (holder == null) { + // column doesn't exist + return dimensionSpec.decorate(DimensionSelector.constant(null, dimensionSpec.getExtractionFn())); } if (hasNegativeArrayIndex) { - return new FieldDimensionSelector( - new RawFieldLiteralColumnValueSelector( - column.makeColumnValueSelector(offset), - parts - ) + // negative array elements in a path expression mean that values should be fetched 'from the end' of the array + // if the path has negative array elements, then we have to use the 'raw' processing of the FieldDimensionSelector + // created with the column selector factory instead of using the optimized nested field column, return null + // to fall through + return null; + } + + return dimensionSpec.decorate(makeDimensionSelectorUndecorated(holder, offset, dimensionSpec.getExtractionFn())); + } + + private DimensionSelector makeDimensionSelectorUndecorated( + ColumnHolder holder, + ReadableOffset offset, + @Nullable ExtractionFn extractionFn + ) + { + BaseColumn theColumn = holder.getColumn(); + if (theColumn instanceof NestedDataComplexColumn) { + final NestedDataComplexColumn column = (NestedDataComplexColumn) theColumn; + return column.makeDimensionSelector(parts, offset, extractionFn); + } + + // not a nested column, but we can still do stuff if the path is the 'root', indicated by an empty path parts + if (parts.isEmpty()) { + // dictionary encoded columns do not typically implement the value selector methods (getLong, getDouble, getFloat) + // nothing *should* be using a dimension selector to call the numeric getters, but just in case... wrap their + // selector in a "best effort" casting selector to implement them + if (theColumn instanceof DictionaryEncodedColumn) { + final DictionaryEncodedColumn column = (DictionaryEncodedColumn) theColumn; + return new BestEffortCastingValueSelector(column.makeDimensionSelector(offset, extractionFn)); + } + // for non-dictionary encoded columns, wrap a value selector to make it appear as a dimension selector + return ValueTypes.makeNumericWrappingDimensionSelector( + holder.getCapabilities().getType(), + theColumn.makeColumnValueSelector(offset), + extractionFn ); } - return column.makeDimensionSelector(parts, offset, dimensionSpec.getExtractionFn()); + + // we are not a nested column and are being asked for a path that will never exist, so we are nil selector + return DimensionSelector.constant(null, extractionFn); } @@ -258,18 +303,40 @@ public ColumnValueSelector makeColumnValueSelector( ReadableOffset offset ) { - final NestedDataComplexColumn column = NestedDataComplexColumn.fromColumnSelector(columnSelector, this.columnName); - if (column == null) { + ColumnHolder holder = columnSelector.getColumnHolder(this.columnName); + if (holder == null) { return NilColumnValueSelector.instance(); } + BaseColumn theColumn = holder.getColumn(); - // processFromRaw is true, that means JSON_QUERY, which can return partial results, otherwise this virtual column - // is JSON_VALUE which only returns literals, so we can use the nested columns value selector - return processFromRaw - ? new RawFieldColumnSelector(column.makeColumnValueSelector(offset), parts) - : hasNegativeArrayIndex - ? new RawFieldLiteralColumnValueSelector(column.makeColumnValueSelector(offset), parts) - : column.makeColumnValueSelector(parts, offset); + if (processFromRaw || hasNegativeArrayIndex) { + // if the path has negative array elements, or has set the flag to process 'raw' values explicitly (JSON_QUERY), + // then we use the 'raw' processing of the RawFieldColumnSelector/RawFieldLiteralColumnValueSelector created + // with the column selector factory instead of using the optimized nested field column + return null; + } + + // "JSON_VALUE", which only returns literals, on a NestedDataComplexColumn, so we can use the fields value selector + if (theColumn instanceof NestedDataComplexColumn) { + final NestedDataComplexColumn column = (NestedDataComplexColumn) theColumn; + return column.makeColumnValueSelector(parts, offset); + } + + // not a nested column, but we can still do stuff if the path is the 'root', indicated by an empty path parts + if (parts.isEmpty()) { + // dictionary encoded columns do not typically implement the value selector methods (getLong, getDouble, getFloat) + // so we want to wrap their selector in a "best effort" casting selector to implement them + if (theColumn instanceof DictionaryEncodedColumn) { + final DictionaryEncodedColumn column = (DictionaryEncodedColumn) theColumn; + return new BestEffortCastingValueSelector(column.makeDimensionSelector(offset, null)); + } + // otherwise it is probably cool to pass through the value selector directly, if numbers make sense the selector + // very likely implemented them, and everyone implements getObject if not + return theColumn.makeColumnValueSelector(offset); + } + + // we are not a nested column and are being asked for a path that will never exist, so we are nil selector + return NilColumnValueSelector.instance(); } @Override @@ -286,14 +353,37 @@ public SingleValueDimensionVectorSelector makeSingleValueVectorDimensionSelector ReadableVectorOffset offset ) { - final NestedDataComplexColumn column = NestedDataComplexColumn.fromColumnSelector(columnSelector, columnName); - if (column == null) { - return NilVectorSelector.create(offset); + ColumnHolder holder = columnSelector.getColumnHolder(columnName); + if (holder == null) { + return dimensionSpec.decorate(NilVectorSelector.create(offset)); + } + + return dimensionSpec.decorate(makeSingleValueVectorDimensionSelectorUndecorated(holder, offset)); + } + + private SingleValueDimensionVectorSelector makeSingleValueVectorDimensionSelectorUndecorated( + ColumnHolder holder, + ReadableVectorOffset offset + ) + { + BaseColumn theColumn = holder.getColumn(); + if (theColumn instanceof NestedDataComplexColumn) { + final NestedDataComplexColumn column = (NestedDataComplexColumn) theColumn; + return column.makeSingleValueDimensionVectorSelector(parts, offset); + } + + // not a nested column, but we can still do stuff if the path is the 'root', indicated by an empty path parts + if (parts.isEmpty()) { + // we will not end up here unless underlying column capabilities lied about something being dictionary encoded... + // so no need for magic casting like nonvectorized engine + return ((DictionaryEncodedColumn) theColumn).makeSingleValueDimensionVectorSelector(offset); } - return column.makeSingleValueDimensionVectorSelector(parts, offset); + // we are not a nested column and are being asked for a path that will never exist, so we are nil selector + return NilVectorSelector.create(offset); } + @Nullable @Override public VectorObjectSelector makeVectorObjectSelector( @@ -302,15 +392,38 @@ public VectorObjectSelector makeVectorObjectSelector( ReadableVectorOffset offset ) { - final NestedDataComplexColumn column = NestedDataComplexColumn.fromColumnSelector(columnSelector, this.columnName); - if (column == null) { + ColumnHolder holder = columnSelector.getColumnHolder(this.columnName); + if (holder == null) { return NilVectorSelector.create(offset); } + BaseColumn theColumn = holder.getColumn(); + // processFromRaw is true, that means JSON_QUERY, which can return partial results, otherwise this virtual column // is JSON_VALUE which only returns literals, so we can use the nested columns value selector - return processFromRaw - ? new RawFieldVectorObjectSelector(column.makeVectorObjectSelector(offset), parts) - : column.makeVectorObjectSelector(parts, offset); + if (theColumn instanceof NestedDataComplexColumn) { + final NestedDataComplexColumn column = (NestedDataComplexColumn) theColumn; + if (processFromRaw) { + return new RawFieldVectorObjectSelector(column.makeVectorObjectSelector(offset), parts); + } + return column.makeVectorObjectSelector(parts, offset); + } + // not a nested column, but we can still do stuff if the path is the 'root', indicated by an empty path parts + if (parts.isEmpty()) { + ColumnCapabilities capabilities = holder.getCapabilities(); + if (capabilities.isNumeric()) { + return ExpressionVectorSelectors.castValueSelectorToObject( + offset, + this.columnName, + theColumn.makeVectorValueSelector(offset), + capabilities.toColumnType(), + expectedType + ); + } + return theColumn.makeVectorObjectSelector(offset); + } + + // we are not a nested column and are being asked for a path that will never exist, so we are nil selector + return NilVectorSelector.create(offset); } @Nullable @@ -321,11 +434,30 @@ public VectorValueSelector makeVectorValueSelector( ReadableVectorOffset offset ) { - final NestedDataComplexColumn column = NestedDataComplexColumn.fromColumnSelector(columnSelector, this.columnName); - if (column == null) { + ColumnHolder holder = columnSelector.getColumnHolder(this.columnName); + if (holder == null) { + return NilVectorSelector.create(offset); + } + BaseColumn theColumn = holder.getColumn(); + if (!(theColumn instanceof NestedDataComplexColumn)) { + + if (parts.isEmpty()) { + ColumnCapabilities capabilities = holder.getCapabilities(); + if (theColumn instanceof DictionaryEncodedColumn) { + return ExpressionVectorSelectors.castObjectSelectorToNumeric( + offset, + this.columnName, + theColumn.makeVectorObjectSelector(offset), + capabilities.toColumnType(), + expectedType + ); + } + return theColumn.makeVectorValueSelector(offset); + } return NilVectorSelector.create(offset); } + final NestedDataComplexColumn column = (NestedDataComplexColumn) theColumn; // if column is numeric, it has a vector value selector, so we can directly make a vector value selector // if we are missing an expectedType, then we've got nothing else to work with so try it anyway if (column.isNumeric(parts) || expectedType == null) { @@ -341,6 +473,7 @@ public VectorValueSelector makeVectorValueSelector( @Nullable private boolean[] nullVector = null; private int id = ReadableVectorInspector.NULL_ID; + @Override public long[] getLongVector() { @@ -482,12 +615,14 @@ public ColumnCapabilities capabilities(String columnName) // JSON_QUERY always returns a StructuredData return ColumnCapabilitiesImpl.createDefault() .setType(NestedDataComplexTypeSerde.TYPE) - .setHasMultipleValues(false); + .setHasMultipleValues(false) + .setHasNulls(true); } // this should only be used for 'realtime' queries, so don't indicate that we are dictionary encoded or have indexes // from here return ColumnCapabilitiesImpl.createDefault() - .setType(expectedType != null ? expectedType : ColumnType.STRING); + .setType(expectedType != null ? expectedType : ColumnType.STRING) + .setHasNulls(expectedType == null || !expectedType.isNumeric() || (expectedType.isNumeric() && NullHandling.sqlCompatible())); } @Override @@ -497,7 +632,8 @@ public ColumnCapabilities capabilities(ColumnInspector inspector, String columnN // JSON_QUERY always returns a StructuredData return ColumnCapabilitiesImpl.createDefault() .setType(NestedDataComplexTypeSerde.TYPE) - .setHasMultipleValues(false); + .setHasMultipleValues(false) + .setHasNulls(true); } // ColumnInspector isn't really enough... we need the ability to read the complex column itself to examine // the nested fields type information to really be accurate here, so we rely on the expectedType to guide us @@ -508,7 +644,9 @@ public ColumnCapabilities capabilities(ColumnInspector inspector, String columnN .setDictionaryEncoded(true) .setDictionaryValuesSorted(true) .setDictionaryValuesUnique(true) - .setHasBitmapIndexes(true); + .setHasBitmapIndexes(true) + .setHasNulls(expectedType == null || (expectedType.isNumeric() + && NullHandling.sqlCompatible())); } return capabilities(columnName); } @@ -563,7 +701,7 @@ public String toString() /** * Process the "raw" data to extract literals with {@link NestedPathFinder#findLiteral(Object, List)}. Like * {@link RawFieldColumnSelector} but only literals and does not wrap the results in {@link StructuredData}. - * + *

* This is used as a selector on realtime data when the native field columns are not available. */ public static class RawFieldLiteralColumnValueSelector extends RawFieldColumnSelector @@ -577,30 +715,21 @@ public RawFieldLiteralColumnValueSelector(ColumnValueSelector baseSelector, List public double getDouble() { Object o = getObject(); - if (o instanceof Number) { - return ((Number) o).doubleValue(); - } - return 0.0; + return Numbers.tryParseDouble(o, 0.0); } @Override public float getFloat() { Object o = getObject(); - if (o instanceof Number) { - return ((Number) o).floatValue(); - } - return 0f; + return Numbers.tryParseFloat(o, 0.0f); } @Override public long getLong() { Object o = getObject(); - if (o instanceof Number) { - return ((Number) o).longValue(); - } - return 0L; + return Numbers.tryParseLong(o, 0L); } @Override @@ -613,7 +742,8 @@ public void inspectRuntimeShape(RuntimeShapeInspector inspector) @Override public boolean isNull() { - return !(getObject() instanceof Number); + Object o = getObject(); + return !(o instanceof Number || (o instanceof String && Doubles.tryParse((String) o) != null)); } @Nullable @@ -650,8 +780,8 @@ public RawFieldColumnSelector(ColumnValueSelector baseSelector, List valueSelector; - public FieldDimensionSelector(ColumnValueSelector valueSelector) + public FieldDimensionSelector(ColumnValueSelector valueSelector) { this.valueSelector = valueSelector; } @@ -780,4 +914,138 @@ protected String getValue() return String.valueOf(val); } } + + /** + * {@link DimensionSelector} that provides implicit numeric casting when used as a value selector, trying best effort + * to implement {@link #getLong()}, {@link #getDouble()}, {@link #getFloat()}, {@link #isNull()} on top of some + * other {@link DimensionSelector}. + *

+ * This is used as a fall-back when making a selector and the underlying column is NOT a + * {@link NestedDataComplexColumn}, whose field {@link DimensionSelector} natively implement this behavior. + */ + private static class BestEffortCastingValueSelector implements DimensionSelector + { + private final DimensionSelector baseSelector; + + public BestEffortCastingValueSelector(DimensionSelector baseSelector) + { + this.baseSelector = baseSelector; + } + + @Override + public IndexedInts getRow() + { + return baseSelector.getRow(); + } + + @Override + public ValueMatcher makeValueMatcher(@Nullable String value) + { + return baseSelector.makeValueMatcher(value); + } + + @Override + public ValueMatcher makeValueMatcher(Predicate predicate) + { + return baseSelector.makeValueMatcher(predicate); + } + + @Override + public void inspectRuntimeShape(RuntimeShapeInspector inspector) + { + baseSelector.inspectRuntimeShape(inspector); + } + + @Nullable + @Override + public Object getObject() + { + return baseSelector.getObject(); + } + + @Override + public Class classOfObject() + { + return baseSelector.classOfObject(); + } + + @Override + public int getValueCardinality() + { + return baseSelector.getValueCardinality(); + } + + @Nullable + @Override + public String lookupName(int id) + { + return baseSelector.lookupName(id); + } + + @Nullable + @Override + public ByteBuffer lookupNameUtf8(int id) + { + return baseSelector.lookupNameUtf8(id); + } + + @Override + public boolean supportsLookupNameUtf8() + { + return baseSelector.supportsLookupNameUtf8(); + } + + @Override + public float getFloat() + { + final IndexedInts row = getRow(); + if (row.size() != 1) { + return 0f; + } + return Numbers.tryParseFloat(lookupName(row.get(0)), 0f); + } + + @Override + public double getDouble() + { + final IndexedInts row = getRow(); + if (row.size() != 1) { + return 0.0; + } + return Numbers.tryParseDouble(lookupName(row.get(0)), 0.0); + } + + @Override + public long getLong() + { + final IndexedInts row = getRow(); + if (row.size() != 1) { + return 0L; + } + return Numbers.tryParseLong(lookupName(row.get(0)), 0L); + } + + @Override + public boolean isNull() + { + final IndexedInts row = getRow(); + if (row.size() != 1) { + return true; + } + return Doubles.tryParse(lookupName(row.get(0))) == null; + } + + @Override + public boolean nameLookupPossibleInAdvance() + { + return baseSelector.nameLookupPossibleInAdvance(); + } + + @Nullable + @Override + public IdLookup idLookup() + { + return baseSelector.idLookup(); + } + } } diff --git a/processing/src/test/java/org/apache/druid/query/expression/TimestampShiftMacroTest.java b/processing/src/test/java/org/apache/druid/query/expression/TimestampShiftMacroTest.java index 1dc87451c8e6..2c39ae429bc7 100644 --- a/processing/src/test/java/org/apache/druid/query/expression/TimestampShiftMacroTest.java +++ b/processing/src/test/java/org/apache/druid/query/expression/TimestampShiftMacroTest.java @@ -257,7 +257,7 @@ private static class NotLiteralExpr extends ExprMacroTable.BaseScalarUnivariateM @Override public ExprEval eval(ObjectBinding bindings) { - return ExprEval.bestEffortOf(bindings.get(name)); + return ExprEval.ofType(bindings.getType(name), bindings.get(name)); } @Override diff --git a/processing/src/test/java/org/apache/druid/query/groupby/NestedDataGroupByQueryTest.java b/processing/src/test/java/org/apache/druid/query/groupby/NestedDataGroupByQueryTest.java index dac3295e3caa..9f33735c03da 100644 --- a/processing/src/test/java/org/apache/druid/query/groupby/NestedDataGroupByQueryTest.java +++ b/processing/src/test/java/org/apache/druid/query/groupby/NestedDataGroupByQueryTest.java @@ -50,7 +50,6 @@ import org.junit.Before; import org.junit.Rule; import org.junit.Test; -import org.junit.rules.ExpectedException; import org.junit.rules.TemporaryFolder; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; @@ -65,8 +64,6 @@ public class NestedDataGroupByQueryTest extends InitializedNullHandlingTest { private static final Logger LOG = new Logger(NestedDataGroupByQueryTest.class); - @Rule - public ExpectedException expectedException = ExpectedException.none(); @Rule public final TemporaryFolder tempFolder = new TemporaryFolder(); @@ -78,8 +75,6 @@ public class NestedDataGroupByQueryTest extends InitializedNullHandlingTest private final TrinaryFn> segmentsGenerator; private final String segmentsName; - private boolean cannotVectorize = false; - public NestedDataGroupByQueryTest( GroupByQueryConfig config, TrinaryFn> segmentGenerator, @@ -103,7 +98,7 @@ public Map getContext() { return ImmutableMap.of( QueryContexts.VECTORIZE_KEY, vectorize.toString(), - QueryContexts.VECTORIZE_VIRTUAL_COLUMNS_KEY, "true" + QueryContexts.VECTORIZE_VIRTUAL_COLUMNS_KEY, vectorize.toString() ); } @@ -127,19 +122,6 @@ public static Collection constructorFeeder() @Before public void setup() { - if (!"segments".equals(segmentsName)) { - if (GroupByStrategySelector.STRATEGY_V1.equals(config.getDefaultStrategy())) { - expectedException.expect(RuntimeException.class); - expectedException.expectMessage( - "GroupBy v1 does not support dimension selectors with unknown cardinality." - ); - } else if (vectorize == QueryContexts.Vectorize.FORCE) { - expectedException.expect(RuntimeException.class); - expectedException.expectMessage( - "Cannot vectorize!" - ); - } - } } @After @@ -162,12 +144,8 @@ public void testGroupBySomeField() .build(); - Sequence seq = helper.runQueryOnSegmentsObjs(segmentsGenerator.apply(helper, tempFolder, closer), groupQuery); - - List results = seq.toList(); - verifyResults( - groupQuery.getResultRowSignature(), - results, + runResults( + groupQuery, ImmutableList.of( new Object[]{null, 8L}, new Object[]{"100", 2L}, @@ -177,6 +155,51 @@ public void testGroupBySomeField() ); } + @Test + public void testGroupByRegularColumns() + { + GroupByQuery groupQuery = GroupByQuery.builder() + .setDataSource("test_datasource") + .setGranularity(Granularities.ALL) + .setInterval(Intervals.ETERNITY) + .setDimensions( + DefaultDimensionSpec.of("v0"), + DefaultDimensionSpec.of("v1"), + new DefaultDimensionSpec("v2", "v2", ColumnType.LONG), + new DefaultDimensionSpec("v3", "v3", ColumnType.LONG), + new DefaultDimensionSpec("v4", "v4", ColumnType.STRING), + new DefaultDimensionSpec("v5", "v5", ColumnType.LONG) + ) + .setVirtualColumns( + new NestedFieldVirtualColumn("dim", "$", "v0", ColumnType.STRING), + new NestedFieldVirtualColumn("dim", "$.x", "v1", ColumnType.STRING), + new NestedFieldVirtualColumn("dim", "$", "v2", ColumnType.LONG), + new NestedFieldVirtualColumn("count", "$", "v3", ColumnType.LONG), + new NestedFieldVirtualColumn("count", "$", "v4", ColumnType.STRING), + new NestedFieldVirtualColumn("count", "$.x", "v5", ColumnType.LONG) + ) + .setAggregatorSpecs(new CountAggregatorFactory("count")) + .setContext(getContext()) + .build(); + + runResults( + groupQuery, + NullHandling.replaceWithDefault() + ? ImmutableList.of( + new Object[]{"100", null, 100L, 1L, "1", 0L, 2L}, + new Object[]{"hello", null, 0L, 1L, "1", 0L, 12L}, + new Object[]{"world", null, 0L, 1L, "1", 0L, 2L} + ) + : ImmutableList.of( + new Object[]{"100", null, 100L, 1L, "1", null, 2L}, + new Object[]{"hello", null, null, 1L, "1", null, 12L}, + new Object[]{"world", null, null, 1L, "1", null, 2L} + ), + "incremental".equals(segmentsName), + true + ); + } + @Test public void testGroupBySomeFieldWithFilter() { @@ -197,12 +220,8 @@ public void testGroupBySomeFieldWithFilter() .build(); - Sequence seq = helper.runQueryOnSegmentsObjs(segmentsGenerator.apply(helper, tempFolder, closer), groupQuery); - - List results = seq.toList(); - verifyResults( - groupQuery.getResultRowSignature(), - results, + runResults( + groupQuery, ImmutableList.of( new Object[]{null, 8L}, new Object[]{"100", 2L}, @@ -232,14 +251,7 @@ public void testGroupByNoFieldWithFilter() .build(); - Sequence seq = helper.runQueryOnSegmentsObjs(segmentsGenerator.apply(helper, tempFolder, closer), groupQuery); - - List results = seq.toList(); - verifyResults( - groupQuery.getResultRowSignature(), - results, - ImmutableList.of(new Object[]{null, 16L}) - ); + runResults(groupQuery, ImmutableList.of(new Object[]{null, 16L})); } @Test @@ -266,26 +278,12 @@ public void testGroupBySomeFieldWithNonExistentAgg() .build(); - Sequence seq = helper.runQueryOnSegmentsObjs(segmentsGenerator.apply(helper, tempFolder, closer), groupQuery); - - List results = seq.toList(); - - verifyResults( - groupQuery.getResultRowSignature(), - results, - ImmutableList.of(new Object[]{null, NullHandling.defaultLongValue()}) - ); + runResults(groupQuery, ImmutableList.of(new Object[]{null, NullHandling.defaultLongValue()})); } @Test public void testGroupByNonExistentVirtualColumn() { - if (GroupByStrategySelector.STRATEGY_V1.equals(config.getDefaultStrategy())) { - expectedException.expect(RuntimeException.class); - expectedException.expectMessage( - "GroupBy v1 does not support dimension selectors with unknown cardinality." - ); - } GroupByQuery groupQuery = GroupByQuery.builder() .setDataSource("test_datasource") .setGranularity(Granularities.ALL) @@ -304,19 +302,86 @@ public void testGroupByNonExistentVirtualColumn() .setContext(getContext()) .build(); - - Sequence seq = helper.runQueryOnSegmentsObjs(segmentsGenerator.apply(helper, tempFolder, closer), groupQuery); - - List results = seq.toList(); - verifyResults( - groupQuery.getResultRowSignature(), - results, + runResults( + groupQuery, NullHandling.sqlCompatible() ? ImmutableList.of(new Object[]{null, 16L}) - : ImmutableList.of(new Object[]{"foo", 16L}) + : ImmutableList.of(new Object[]{"foo", 16L}), + true, + false ); } + private void runResults(GroupByQuery groupQuery, List expectedResults) + { + runResults(groupQuery, expectedResults, false, false); + } + + private void runResults(GroupByQuery groupQuery, List expectedResults, boolean hasUnknownCardinality, boolean hasNonStringOutput) + { + if (GroupByStrategySelector.STRATEGY_V1.equals(config.getDefaultStrategy())) { + if (hasUnknownCardinality) { + Throwable t = Assert.assertThrows( + RuntimeException.class, + () -> helper.runQueryOnSegmentsObjs(segmentsGenerator.apply(helper, tempFolder, closer), groupQuery).toList() + ); + Assert.assertEquals( + "java.lang.UnsupportedOperationException: GroupBy v1 does not support dimension selectors with unknown cardinality.", + t.getMessage() + ); + return; + } + if (hasNonStringOutput) { + Throwable t = Assert.assertThrows( + RuntimeException.class, + () -> helper.runQueryOnSegmentsObjs(segmentsGenerator.apply(helper, tempFolder, closer), groupQuery).toList() + ); + Assert.assertEquals( + "java.lang.UnsupportedOperationException: GroupBy v1 only supports dimensions with an outputType of STRING.", + t.getMessage() + ); + return; + } + } + if (!"segments".equals(segmentsName)) { + if (GroupByStrategySelector.STRATEGY_V1.equals(config.getDefaultStrategy())) { + Throwable t = Assert.assertThrows( + RuntimeException.class, + () -> helper.runQueryOnSegmentsObjs(segmentsGenerator.apply(helper, tempFolder, closer), groupQuery) + .toList() + ); + Assert.assertEquals( + "java.lang.UnsupportedOperationException: GroupBy v1 does not support dimension selectors with unknown cardinality.", + t.getMessage() + ); + } else if (vectorize == QueryContexts.Vectorize.FORCE) { + Throwable t = Assert.assertThrows( + RuntimeException.class, + () -> helper.runQueryOnSegmentsObjs(segmentsGenerator.apply(helper, tempFolder, closer), groupQuery) + .toList() + ); + Assert.assertEquals( + "java.util.concurrent.ExecutionException: java.lang.RuntimeException: org.apache.druid.java.util.common.ISE: Cannot vectorize!", + t.getMessage() + ); + return; + } + } else { + + Sequence seq = helper.runQueryOnSegmentsObjs( + segmentsGenerator.apply(helper, tempFolder, closer), + groupQuery + ); + + List results = seq.toList(); + verifyResults( + groupQuery.getResultRowSignature(), + results, + expectedResults + ); + } + } + private static void verifyResults(RowSignature rowSignature, List results, List expected) { LOG.info("results:\n%s", results); diff --git a/processing/src/test/java/org/apache/druid/query/scan/NestedDataScanQueryTest.java b/processing/src/test/java/org/apache/druid/query/scan/NestedDataScanQueryTest.java index 9ac5a70b72f3..268c19bf9177 100644 --- a/processing/src/test/java/org/apache/druid/query/scan/NestedDataScanQueryTest.java +++ b/processing/src/test/java/org/apache/druid/query/scan/NestedDataScanQueryTest.java @@ -160,7 +160,11 @@ public void testIngestAndScanSegmentsRealtime() throws Exception new NestedFieldVirtualColumn("nest", "$.x", "x"), new NestedFieldVirtualColumn("nester", "$.x[0]", "x_0"), new NestedFieldVirtualColumn("nester", "$.y.c[1]", "y_c_1"), - new NestedFieldVirtualColumn("nester", "$.", "nester_root") + new NestedFieldVirtualColumn("nester", "$.", "nester_root"), + new NestedFieldVirtualColumn("dim", "$", "dim_root"), + new NestedFieldVirtualColumn("dim", "$.x", "dim_path"), + new NestedFieldVirtualColumn("count", "$", "count_root"), + new NestedFieldVirtualColumn("count", "$.x", "count_path") ) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) .limit(100) diff --git a/processing/src/test/java/org/apache/druid/segment/QueryableIndexColumnCapabilitiesTest.java b/processing/src/test/java/org/apache/druid/segment/QueryableIndexColumnCapabilitiesTest.java index 367623d6a8bc..f7b47f6ee5ba 100644 --- a/processing/src/test/java/org/apache/druid/segment/QueryableIndexColumnCapabilitiesTest.java +++ b/processing/src/test/java/org/apache/druid/segment/QueryableIndexColumnCapabilitiesTest.java @@ -21,16 +21,15 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; -import com.google.common.collect.Iterables; import org.apache.druid.common.config.NullHandling; import org.apache.druid.data.input.InputRow; +import org.apache.druid.data.input.InputRowSchema; import org.apache.druid.data.input.impl.DimensionSchema; import org.apache.druid.data.input.impl.DimensionsSpec; import org.apache.druid.data.input.impl.DoubleDimensionSchema; import org.apache.druid.data.input.impl.FloatDimensionSchema; import org.apache.druid.data.input.impl.LongDimensionSchema; import org.apache.druid.data.input.impl.MapInputRowParser; -import org.apache.druid.data.input.impl.TimeAndDimsParseSpec; import org.apache.druid.data.input.impl.TimestampSpec; import org.apache.druid.java.util.common.DateTimes; import org.apache.druid.query.aggregation.AggregatorFactory; @@ -73,18 +72,17 @@ public class QueryableIndexColumnCapabilitiesTest extends InitializedNullHandlin @BeforeClass public static void setup() throws IOException { - MapInputRowParser parser = new MapInputRowParser( - new TimeAndDimsParseSpec( - new TimestampSpec("time", "auto", null), - new DimensionsSpec( - ImmutableList.builder() - .addAll(DimensionsSpec.getDefaultSchemas(ImmutableList.of("d1", "d2"))) - .add(new DoubleDimensionSchema("d3")) - .add(new FloatDimensionSchema("d4")) - .add(new LongDimensionSchema("d5")) - .build() - ) - ) + InputRowSchema rowSchema = new InputRowSchema( + new TimestampSpec("time", "auto", null), + new DimensionsSpec( + ImmutableList.builder() + .addAll(DimensionsSpec.getDefaultSchemas(ImmutableList.of("d1", "d2"))) + .add(new DoubleDimensionSchema("d3")) + .add(new FloatDimensionSchema("d4")) + .add(new LongDimensionSchema("d5")) + .build() + ), + null ); AggregatorFactory[] metricsSpecs = new AggregatorFactory[] { new CountAggregatorFactory("cnt"), @@ -102,14 +100,14 @@ public static void setup() throws IOException .put("d4", 1.234f) .put("d5", 10L) .build(); - rows.add(Iterables.getOnlyElement(parser.parseBatch(event))); + rows.add(MapInputRowParser.parse(rowSchema, event)); IndexBuilder builder = IndexBuilder.create() .rows(rows) .schema( new IncrementalIndexSchema.Builder() .withMetrics(metricsSpecs) - .withDimensionsSpec(parser) + .withDimensionsSpec(rowSchema.getDimensionsSpec()) .withRollup(false) .build() ) @@ -118,7 +116,7 @@ public static void setup() throws IOException MMAP_INDEX = builder.buildMMappedIndex(); List rowsWithNulls = new ArrayList<>(); - rowsWithNulls.add(Iterables.getOnlyElement(parser.parseBatch(event))); + rowsWithNulls.add(MapInputRowParser.parse(rowSchema, event)); Map eventWithNulls = new HashMap<>(); eventWithNulls.put("time", DateTimes.nowUtc().getMillis()); @@ -128,14 +126,14 @@ public static void setup() throws IOException eventWithNulls.put("d4", null); eventWithNulls.put("d5", null); - rowsWithNulls.add(Iterables.getOnlyElement(parser.parseBatch(eventWithNulls))); + rowsWithNulls.add(MapInputRowParser.parse(rowSchema, eventWithNulls)); IndexBuilder builderWithNulls = IndexBuilder.create() .rows(rowsWithNulls) .schema( new IncrementalIndexSchema.Builder() .withMetrics(metricsSpecs) - .withDimensionsSpec(parser) + .withDimensionsSpec(rowSchema.getDimensionsSpec()) .withRollup(false) .build() ) diff --git a/processing/src/test/java/org/apache/druid/segment/nested/NestedDataColumnSupplierTest.java b/processing/src/test/java/org/apache/druid/segment/nested/NestedDataColumnSupplierTest.java index ce3e40e6556e..38764b093af4 100644 --- a/processing/src/test/java/org/apache/druid/segment/nested/NestedDataColumnSupplierTest.java +++ b/processing/src/test/java/org/apache/druid/segment/nested/NestedDataColumnSupplierTest.java @@ -172,7 +172,7 @@ public void testBasicFunctionality() throws IOException { ColumnBuilder bob = new ColumnBuilder(); bob.setFileMapper(fileMapper); - NestedDataColumnSupplier supplier = new NestedDataColumnSupplier( + NestedDataColumnSupplier supplier = NestedDataColumnSupplier.read( baseBuffer, bob, () -> 0, @@ -191,7 +191,7 @@ public void testConcurrency() throws ExecutionException, InterruptedException // if this test ever starts being to be a flake, there might be thread safety issues ColumnBuilder bob = new ColumnBuilder(); bob.setFileMapper(fileMapper); - NestedDataColumnSupplier supplier = new NestedDataColumnSupplier( + NestedDataColumnSupplier supplier = NestedDataColumnSupplier.read( baseBuffer, bob, () -> 0, diff --git a/processing/src/test/java/org/apache/druid/segment/vector/NilVectorSelectorTest.java b/processing/src/test/java/org/apache/druid/segment/vector/NilVectorSelectorTest.java new file mode 100644 index 000000000000..2f4c93e862ed --- /dev/null +++ b/processing/src/test/java/org/apache/druid/segment/vector/NilVectorSelectorTest.java @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.vector; + +import org.apache.druid.collections.bitmap.WrappedRoaringBitmap; +import org.apache.druid.common.config.NullHandling; +import org.apache.druid.segment.QueryableIndexStorageAdapter; +import org.apache.druid.testing.InitializedNullHandlingTest; +import org.junit.Assert; +import org.junit.Test; + +import java.util.concurrent.ThreadLocalRandom; + +public class NilVectorSelectorTest extends InitializedNullHandlingTest +{ + private static final int NUM_ROWS = 10_000; + + @Test + public void testDefaultSizedVector() + { + testVectorSize(QueryableIndexStorageAdapter.DEFAULT_VECTOR_SIZE); + } + + @Test + public void testCustomSizeVector() + { + testVectorSize(1024); + } + + private static void testVectorSize(int vectorSize) + { + NoFilterVectorOffset offset = new NoFilterVectorOffset(vectorSize, 0, NUM_ROWS); + testOffset(offset); + WrappedRoaringBitmap bitmap = new WrappedRoaringBitmap(); + int numSet = 0; + for (int i = 0; i < NUM_ROWS; i++) { + if (ThreadLocalRandom.current().nextDouble() > 0.2) { + bitmap.add(i); + numSet++; + } + } + BitmapVectorOffset bitmapOffset = new BitmapVectorOffset(vectorSize, bitmap.toImmutableBitmap(), 0, NUM_ROWS); + testOffset(bitmapOffset); + } + + private static void testOffset(VectorOffset offset) + { + NilVectorSelector nil = NilVectorSelector.create(offset); + while (!offset.isDone()) { + final int[] dict = nil.getRowVector(); + final long[] longs = nil.getLongVector(); + final double[] doubles = nil.getDoubleVector(); + final float[] floats = nil.getFloatVector(); + final boolean[] nulls = nil.getNullVector(); + final Object[] objects = nil.getObjectVector(); + + for (int i = 0; i < offset.getCurrentVectorSize(); i++) { + Assert.assertEquals(0, dict[i]); + Assert.assertEquals(0L, longs[i]); + Assert.assertEquals(0.0, doubles[i], 0.0); + Assert.assertEquals(0f, floats[i], 0.0); + Assert.assertEquals(NullHandling.sqlCompatible(), nulls[i]); + Assert.assertNull(objects[i]); + } + offset.advance(); + } + } +} diff --git a/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVectorSelectorsCastTest.java b/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVectorSelectorsCastTest.java new file mode 100644 index 000000000000..6f733ee0b91c --- /dev/null +++ b/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVectorSelectorsCastTest.java @@ -0,0 +1,228 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.virtual; + +import com.google.common.collect.ImmutableList; +import org.apache.druid.java.util.common.granularity.Granularities; +import org.apache.druid.java.util.common.io.Closer; +import org.apache.druid.math.expr.ExpressionType; +import org.apache.druid.query.expression.TestExprMacroTable; +import org.apache.druid.segment.QueryableIndex; +import org.apache.druid.segment.QueryableIndexStorageAdapter; +import org.apache.druid.segment.VirtualColumns; +import org.apache.druid.segment.column.ColumnCapabilities; +import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.column.ValueType; +import org.apache.druid.segment.generator.GeneratorBasicSchemas; +import org.apache.druid.segment.generator.GeneratorSchemaInfo; +import org.apache.druid.segment.generator.SegmentGenerator; +import org.apache.druid.segment.vector.VectorCursor; +import org.apache.druid.segment.vector.VectorObjectSelector; +import org.apache.druid.segment.vector.VectorValueSelector; +import org.apache.druid.timeline.DataSegment; +import org.apache.druid.timeline.partition.LinearShardSpec; +import org.junit.AfterClass; +import org.junit.Assert; +import org.junit.BeforeClass; +import org.junit.Test; + +import java.io.IOException; + +public class ExpressionVectorSelectorsCastTest +{ + private static final int ROWS_PER_SEGMENT = 10_000; + + private static QueryableIndex INDEX; + private static Closer CLOSER; + + @BeforeClass + public static void setupClass() + { + CLOSER = Closer.create(); + + final GeneratorSchemaInfo schemaInfo = GeneratorBasicSchemas.SCHEMA_MAP.get("expression-testbench"); + + final DataSegment dataSegment = DataSegment.builder() + .dataSource("foo") + .interval(schemaInfo.getDataInterval()) + .version("1") + .shardSpec(new LinearShardSpec(0)) + .size(0) + .build(); + + final SegmentGenerator segmentGenerator = CLOSER.register(new SegmentGenerator()); + INDEX = CLOSER.register( + segmentGenerator.generate(dataSegment, schemaInfo, Granularities.HOUR, ROWS_PER_SEGMENT) + ); + } + + @AfterClass + public static void teardownClass() throws IOException + { + CLOSER.close(); + } + + @Test + public void testCastObjectSelectorToValueSelector() + { + testCast(INDEX, "string1", ColumnType.LONG, CLOSER); + testCast(INDEX, "string2", ColumnType.DOUBLE, CLOSER); + testCast(INDEX, "string3", ColumnType.FLOAT, CLOSER); + } + + @Test + public void testCastValueSelectorSelectorToObjectSelector() + { + testCast(INDEX, "long1", ColumnType.STRING, CLOSER); + testCast(INDEX, "long2", ColumnType.STRING, CLOSER); + testCast(INDEX, "double1", ColumnType.STRING, CLOSER); + testCast(INDEX, "double2", ColumnType.STRING, CLOSER); + testCast(INDEX, "float1", ColumnType.STRING, CLOSER); + testCast(INDEX, "float2", ColumnType.STRING, CLOSER); + } + + public static void testCast( + QueryableIndex index, + String column, + ColumnType castTo, + Closer closer + ) + { + final VirtualColumns virtualColumns = VirtualColumns.create( + ImmutableList.of( + new ExpressionVirtualColumn( + "v", + "cast(" + column + ", '" + ExpressionType.fromColumnType(castTo) + "')", + castTo, + TestExprMacroTable.INSTANCE + ) + ) + ); + final QueryableIndexStorageAdapter storageAdapter = new QueryableIndexStorageAdapter(index); + VectorCursor cursor = storageAdapter.makeVectorCursor( + null, + index.getDataInterval(), + virtualColumns, + false, + 512, + null + ); + closer.register(cursor); + + ColumnCapabilities capabilities = INDEX.getColumnCapabilities(column); + + if (capabilities.isNumeric() && castTo.is(ValueType.STRING)) { + // numeric -> string + verifyNumericToString(column, castTo, cursor); + } else { + // string -> numeric + verifyStringToNumeric(column, castTo, cursor); + } + } + + private static void verifyStringToNumeric(String column, ColumnType castTo, VectorCursor cursor) + { + VectorValueSelector selector = cursor.getColumnSelectorFactory().makeValueSelector("v"); + VectorValueSelector castSelector = ExpressionVectorSelectors.castObjectSelectorToNumeric( + cursor.getColumnSelectorFactory().getReadableVectorInspector(), + column, + cursor.getColumnSelectorFactory().makeObjectSelector(column), + ColumnType.STRING, + castTo + ); + while (!cursor.isDone()) { + boolean[] nulls; + boolean[] castNulls; + switch (castTo.getType()) { + case LONG: + nulls = selector.getNullVector(); + castNulls = castSelector.getNullVector(); + long[] longs = selector.getLongVector(); + long[] castLongs = castSelector.getLongVector(); + for (int i = 0; i < selector.getCurrentVectorSize(); i++) { + if (nulls != null) { + Assert.assertEquals(nulls[i], castNulls[i]); + } + Assert.assertEquals(longs[i], castLongs[i]); + } + break; + case DOUBLE: + nulls = selector.getNullVector(); + castNulls = selector.getNullVector(); + double[] doubles = selector.getDoubleVector(); + double[] castDoubles = castSelector.getDoubleVector(); + for (int i = 0; i < selector.getCurrentVectorSize(); i++) { + if (nulls != null) { + Assert.assertEquals(nulls[i], castNulls[i]); + } + Assert.assertEquals(doubles[i], castDoubles[i], 0.0); + } + break; + + case FLOAT: + nulls = selector.getNullVector(); + castNulls = selector.getNullVector(); + float[] floats = selector.getFloatVector(); + float[] castFloats = castSelector.getFloatVector(); + for (int i = 0; i < selector.getCurrentVectorSize(); i++) { + if (nulls != null) { + Assert.assertEquals(nulls[i], castNulls[i]); + } + Assert.assertEquals(floats[i], castFloats[i], 0.0); + } + break; + default: + Assert.fail("this shouldn't happen"); + return; + } + + cursor.advance(); + } + } + + private static void verifyNumericToString(String column, ColumnType castTo, VectorCursor cursor) + { + VectorObjectSelector objectSelector = cursor.getColumnSelectorFactory().makeObjectSelector("v"); + VectorObjectSelector castSelector = ExpressionVectorSelectors.castValueSelectorToObject( + cursor.getColumnSelectorFactory().getReadableVectorInspector(), + column, + cursor.getColumnSelectorFactory().makeValueSelector(column), + cursor.getColumnSelectorFactory().getColumnCapabilities(column).toColumnType(), + castTo + ); + while (!cursor.isDone()) { + switch (castTo.getType()) { + case STRING: + Object[] objects = objectSelector.getObjectVector(); + Object[] otherObjects = castSelector.getObjectVector(); + Assert.assertEquals(objectSelector.getCurrentVectorSize(), castSelector.getCurrentVectorSize()); + for (int i = 0; i < objectSelector.getCurrentVectorSize(); i++) { + Assert.assertEquals(objects[i], otherObjects[i]); + } + break; + default: + Assert.fail("this shouldn't happen"); + return; + } + + cursor.advance(); + } + } +} diff --git a/processing/src/test/resources/simple-nested-test-data.json b/processing/src/test/resources/simple-nested-test-data.json index 17e16ce27361..3def4206ac9c 100644 --- a/processing/src/test/resources/simple-nested-test-data.json +++ b/processing/src/test/resources/simple-nested-test-data.json @@ -1,7 +1,7 @@ {"timestamp": "2021-01-01", "dim": "hello", "nest": {"x": 100, "y": 200, "z": 300}, "nester":{ "x": ["a", "b", "c"], "y": {"a": "a", "b": "b", "c": [1, 2, 3]}}, "variant": {"a": ["hello", "world"], "b": {"x": "hello", "y": "world"}}, "list":[{"x": 5, "y": 10}, {"x": 15, "y": 22}]} {"timestamp": "2021-01-01", "dim": "hello", "nester":{ "x": ["x", "y", "z"]}, "list":[{"x": 35, "y": 310}, {"x": 315, "y": 322}]} {"timestamp": "2021-01-01", "dim": "hello", "nest":{ "x": 300, "y": 800}, "nester": "hello"} -{"timestamp": "2021-01-01", "dim": "hello", "nest":{ "y": 500}, "list":[{"x": 115, "y": 410}, {"x": 415, "y": 422}]} +{"timestamp": "2021-01-01", "dim": "100", "nest":{ "y": 500}, "list":[{"x": 115, "y": 410}, {"x": 415, "y": 422}]} {"timestamp": "2021-01-02", "dim": "world", "nest": {"x": 200, "y": 100, "z": 101}, "nester":{ "x": ["x", "y", "z"], "y": {"a": "b", "b": "c", "c": [4, 5, 6]}}, "variant": {"b": ["hello", "world"], "c": {"x": ["hello"], "y": "world"}}} {"timestamp": "2021-01-02", "dim": "hello", "nester":{ "x": ["x", "y", "z"]}} {"timestamp": "2021-01-02", "dim": "hello", "nest":{ "x": 300, "y": 800}, "nester": 1} diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/schema/SegmentMetadataCache.java b/sql/src/main/java/org/apache/druid/sql/calcite/schema/SegmentMetadataCache.java index f9181e0914a8..2e5aa9f58bfb 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/schema/SegmentMetadataCache.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/schema/SegmentMetadataCache.java @@ -54,6 +54,7 @@ import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.RowSignature; import org.apache.druid.segment.join.JoinableFactory; +import org.apache.druid.segment.nested.NestedDataComplexTypeSerde; import org.apache.druid.server.QueryLifecycleFactory; import org.apache.druid.server.SegmentManager; import org.apache.druid.server.coordination.DruidServerMetadata; @@ -103,6 +104,7 @@ public class SegmentMetadataCache private static final EmittingLogger log = new EmittingLogger(SegmentMetadataCache.class); private static final int MAX_SEGMENTS_PER_QUERY = 15000; private static final long DEFAULT_NUM_ROWS = 0; + private static final Interner ROW_SIGNATURE_INTERNER = Interners.newWeakInterner(); private final QueryLifecycleFactory queryLifecycleFactory; private final SegmentMetadataCacheConfig config; @@ -119,8 +121,6 @@ public class SegmentMetadataCache */ private final ConcurrentMap tables = new ConcurrentHashMap<>(); - private static final Interner ROW_SIGNATURE_INTERNER = Interners.newWeakInterner(); - /** * DataSource -> Segment -> AvailableSegmentMetadata(contains RowSignature) for that segment. * Use SortedMap for segments so they are merged in deterministic order, from older to newer. @@ -798,7 +798,20 @@ DatasourceTable.PhysicalDatasourceMetadata buildDruidTable(final String dataSour rowSignature.getColumnType(column) .orElseThrow(() -> new ISE("Encountered null type for column [%s]", column)); - columnTypes.putIfAbsent(column, columnType); + columnTypes.compute(column, (c, existingType) -> { + if (existingType == null) { + return columnType; + } + if (columnType == null) { + return existingType; + } + // if any are json, are all json + if (NestedDataComplexTypeSerde.TYPE.equals(columnType) || NestedDataComplexTypeSerde.TYPE.equals(existingType)) { + return NestedDataComplexTypeSerde.TYPE; + } + // "existing type" is the 'newest' type, since we iterate the segments list by newest start time + return existingType; + }); } } } diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java index a8d0c94df73e..f11a7c2717d0 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java @@ -25,11 +25,11 @@ import com.google.inject.Injector; import org.apache.druid.common.config.NullHandling; import org.apache.druid.data.input.InputRow; +import org.apache.druid.data.input.InputRowSchema; import org.apache.druid.data.input.impl.DimensionSchema; import org.apache.druid.data.input.impl.DimensionsSpec; -import org.apache.druid.data.input.impl.InputRowParser; -import org.apache.druid.data.input.impl.MapInputRowParser; -import org.apache.druid.data.input.impl.TimeAndDimsParseSpec; +import org.apache.druid.data.input.impl.LongDimensionSchema; +import org.apache.druid.data.input.impl.StringDimensionSchema; import org.apache.druid.data.input.impl.TimestampSpec; import org.apache.druid.guice.DruidInjectorBuilder; import org.apache.druid.guice.NestedDataModule; @@ -70,17 +70,19 @@ import java.io.IOException; import java.util.List; -import java.util.Map; import java.util.stream.Collectors; public class CalciteNestedDataQueryTest extends BaseCalciteQueryTest { private static final String DATA_SOURCE = "nested"; + private static final String DATA_SOURCE_MIXED = "nested_mix"; + private static final String DATA_SOURCE_MIXED_2 = "nested_mix_2"; private static final List> RAW_ROWS = ImmutableList.of( ImmutableMap.builder() .put("t", "2000-01-01") .put("string", "aaa") + .put("string_sparse", "zzz") .put("nest", ImmutableMap.of("x", 100L, "y", 2.02, "z", "300", "mixed", 1L, "mixed2", "1")) .put( "nester", @@ -103,6 +105,7 @@ public class CalciteNestedDataQueryTest extends BaseCalciteQueryTest ImmutableMap.builder() .put("t", "2000-01-01") .put("string", "ddd") + .put("string_sparse", "yyy") .put("long", 2L) .build(), ImmutableMap.builder() @@ -126,21 +129,38 @@ public class CalciteNestedDataQueryTest extends BaseCalciteQueryTest .build() ); - private static final InputRowParser> PARSER = new MapInputRowParser( - new TimeAndDimsParseSpec( - new TimestampSpec("t", "iso", null), - DimensionsSpec.builder().setDimensions( - ImmutableList.builder() - .add(new NestedDataDimensionSchema("string")) - .add(new NestedDataDimensionSchema("nest")) - .add(new NestedDataDimensionSchema("nester")) - .add(new NestedDataDimensionSchema("long")) - .build() - ).build() - )); + private static final InputRowSchema ALL_JSON_COLUMNS = new InputRowSchema( + new TimestampSpec("t", "iso", null), + DimensionsSpec.builder().setDimensions( + ImmutableList.builder() + .add(new NestedDataDimensionSchema("string")) + .add(new NestedDataDimensionSchema("nest")) + .add(new NestedDataDimensionSchema("nester")) + .add(new NestedDataDimensionSchema("long")) + .add(new NestedDataDimensionSchema("string_sparse")) + .build() + ).build(), + null + ); + private static final InputRowSchema JSON_AND_SCALAR_MIX = new InputRowSchema( + new TimestampSpec("t", "iso", null), + DimensionsSpec.builder().setDimensions( + ImmutableList.builder() + .add(new StringDimensionSchema("string")) + .add(new NestedDataDimensionSchema("nest")) + .add(new NestedDataDimensionSchema("nester")) + .add(new LongDimensionSchema("long")) + .add(new StringDimensionSchema("string_sparse")) + .build() + ).build(), + null + ); private static final List ROWS = - RAW_ROWS.stream().map(raw -> TestDataBuilder.createRow(raw, PARSER)).collect(Collectors.toList()); + RAW_ROWS.stream().map(raw -> TestDataBuilder.createRow(raw, ALL_JSON_COLUMNS)).collect(Collectors.toList()); + + private static final List ROWS_MIX = + RAW_ROWS.stream().map(raw -> TestDataBuilder.createRow(raw, JSON_AND_SCALAR_MIX)).collect(Collectors.toList()); @Override public void configureGuice(DruidInjectorBuilder builder) @@ -167,14 +187,79 @@ public SpecificSegmentsQuerySegmentWalker createQuerySegmentWalker( .withMetrics( new CountAggregatorFactory("cnt") ) - .withDimensionsSpec(PARSER) + .withDimensionsSpec(ALL_JSON_COLUMNS.getDimensionsSpec()) .withRollup(false) .build() ) .rows(ROWS) .buildMMappedIndex(); - return new SpecificSegmentsQuerySegmentWalker(conglomerate).add( + final QueryableIndex indexMix11 = + IndexBuilder.create() + .tmpDir(temporaryFolder.newFolder()) + .segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance()) + .schema( + new IncrementalIndexSchema.Builder() + .withMetrics( + new CountAggregatorFactory("cnt") + ) + .withDimensionsSpec(ALL_JSON_COLUMNS.getDimensionsSpec()) + .withRollup(false) + .build() + ) + .rows(ROWS) + .buildMMappedIndex(); + + final QueryableIndex indexMix12 = + IndexBuilder.create() + .tmpDir(temporaryFolder.newFolder()) + .segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance()) + .schema( + new IncrementalIndexSchema.Builder() + .withMetrics( + new CountAggregatorFactory("cnt") + ) + .withDimensionsSpec(JSON_AND_SCALAR_MIX.getDimensionsSpec()) + .withRollup(false) + .build() + ) + .rows(ROWS_MIX) + .buildMMappedIndex(); + + final QueryableIndex indexMix21 = + IndexBuilder.create() + .tmpDir(temporaryFolder.newFolder()) + .segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance()) + .schema( + new IncrementalIndexSchema.Builder() + .withMetrics( + new CountAggregatorFactory("cnt") + ) + .withDimensionsSpec(JSON_AND_SCALAR_MIX.getDimensionsSpec()) + .withRollup(false) + .build() + ) + .rows(ROWS_MIX) + .buildMMappedIndex(); + + final QueryableIndex indexMix22 = + IndexBuilder.create() + .tmpDir(temporaryFolder.newFolder()) + .segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance()) + .schema( + new IncrementalIndexSchema.Builder() + .withMetrics( + new CountAggregatorFactory("cnt") + ) + .withDimensionsSpec(ALL_JSON_COLUMNS.getDimensionsSpec()) + .withRollup(false) + .build() + ) + .rows(ROWS) + .buildMMappedIndex(); + + SpecificSegmentsQuerySegmentWalker walker = new SpecificSegmentsQuerySegmentWalker(conglomerate); + walker.add( DataSegment.builder() .dataSource(DATA_SOURCE) .interval(index.getDataInterval()) @@ -183,7 +268,45 @@ public SpecificSegmentsQuerySegmentWalker createQuerySegmentWalker( .size(0) .build(), index + ).add( + DataSegment.builder() + .dataSource(DATA_SOURCE_MIXED) + .interval(index.getDataInterval()) + .version("1") + .shardSpec(new LinearShardSpec(0)) + .size(0) + .build(), + indexMix11 + ).add( + DataSegment.builder() + .dataSource(DATA_SOURCE_MIXED) + .interval(index.getDataInterval()) + .version("1") + .shardSpec(new LinearShardSpec(1)) + .size(0) + .build(), + indexMix12 + ).add( + DataSegment.builder() + .dataSource(DATA_SOURCE_MIXED_2) + .interval(index.getDataInterval()) + .version("1") + .shardSpec(new LinearShardSpec(0)) + .size(0) + .build(), + indexMix21 + ).add( + DataSegment.builder() + .dataSource(DATA_SOURCE_MIXED_2) + .interval(index.getDataInterval()) + .version("1") + .shardSpec(new LinearShardSpec(1)) + .size(0) + .build(), + indexMix22 ); + + return walker; } @Test @@ -443,6 +566,218 @@ public void testGroupByRootSingleTypeString() ); } + @Test + public void testGroupByRootSingleTypeLongMixed1() + { + testQuery( + "SELECT " + + "long, " + + "SUM(cnt) " + + "FROM druid.nested_mix GROUP BY 1", + ImmutableList.of( + GroupByQuery.builder() + .setDataSource(DATA_SOURCE_MIXED) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setDimensions( + dimensions( + new DefaultDimensionSpec("long", "d0", ColumnType.LONG) + ) + ) + .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{1L, 2L}, + new Object[]{2L, 4L}, + new Object[]{3L, 2L}, + new Object[]{4L, 2L}, + new Object[]{5L, 4L} + ), + RowSignature.builder() + .add("long", ColumnType.LONG) + .add("EXPR$1", ColumnType.LONG) + .build() + ); + } + + @Test + public void testGroupByRootSingleTypeStringMixed1() + { + testQuery( + "SELECT " + + "string, " + + "SUM(cnt) " + + "FROM druid.nested_mix GROUP BY 1", + ImmutableList.of( + GroupByQuery.builder() + .setDataSource(DATA_SOURCE_MIXED) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setDimensions( + dimensions( + new DefaultDimensionSpec("string", "d0") + ) + ) + .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{"aaa", 4L}, + new Object[]{"bbb", 2L}, + new Object[]{"ccc", 2L}, + new Object[]{"ddd", 4L}, + new Object[]{"eee", 2L} + ), + RowSignature.builder() + .add("string", ColumnType.STRING) + .add("EXPR$1", ColumnType.LONG) + .build() + ); + } + + @Test + public void testGroupByRootSingleTypeStringMixed1Sparse() + { + testQuery( + "SELECT " + + "string_sparse, " + + "SUM(cnt) " + + "FROM druid.nested_mix GROUP BY 1", + ImmutableList.of( + GroupByQuery.builder() + .setDataSource(DATA_SOURCE_MIXED) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setDimensions( + dimensions( + new DefaultDimensionSpec("string_sparse", "d0") + ) + ) + .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{NullHandling.defaultStringValue(), 10L}, + new Object[]{"yyy", 2L}, + new Object[]{"zzz", 2L} + ), + RowSignature.builder() + .add("string_sparse", ColumnType.STRING) + .add("EXPR$1", ColumnType.LONG) + .build() + ); + } + + @Test + public void testGroupByRootSingleTypeLongMixed2() + { + testQuery( + "SELECT " + + "long, " + + "SUM(cnt) " + + "FROM druid.nested_mix_2 GROUP BY 1", + ImmutableList.of( + GroupByQuery.builder() + .setDataSource(DATA_SOURCE_MIXED_2) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setDimensions( + dimensions( + new DefaultDimensionSpec("long", "d0", ColumnType.LONG) + ) + ) + .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{1L, 2L}, + new Object[]{2L, 4L}, + new Object[]{3L, 2L}, + new Object[]{4L, 2L}, + new Object[]{5L, 4L} + ), + RowSignature.builder() + .add("long", ColumnType.LONG) + .add("EXPR$1", ColumnType.LONG) + .build() + ); + } + + @Test + public void testGroupByRootSingleTypeStringMixed2() + { + testQuery( + "SELECT " + + "string, " + + "SUM(cnt) " + + "FROM druid.nested_mix_2 GROUP BY 1", + ImmutableList.of( + GroupByQuery.builder() + .setDataSource(DATA_SOURCE_MIXED_2) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setDimensions( + dimensions( + new DefaultDimensionSpec("string", "d0") + ) + ) + .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{"aaa", 4L}, + new Object[]{"bbb", 2L}, + new Object[]{"ccc", 2L}, + new Object[]{"ddd", 4L}, + new Object[]{"eee", 2L} + ), + RowSignature.builder() + .add("string", ColumnType.STRING) + .add("EXPR$1", ColumnType.LONG) + .build() + ); + } + + @Test + public void testGroupByRootSingleTypeStringMixed2Sparse() + { + testQuery( + "SELECT " + + "string_sparse, " + + "SUM(cnt) " + + "FROM druid.nested_mix_2 GROUP BY 1", + ImmutableList.of( + GroupByQuery.builder() + .setDataSource(DATA_SOURCE_MIXED_2) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setDimensions( + dimensions( + new DefaultDimensionSpec("string_sparse", "d0") + ) + ) + .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{NullHandling.defaultStringValue(), 10L}, + new Object[]{"yyy", 2L}, + new Object[]{"zzz", 2L} + ), + RowSignature.builder() + .add("string_sparse", ColumnType.STRING) + .add("EXPR$1", ColumnType.LONG) + .build() + ); + } + @Test public void testGroupByJsonValues() { diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/util/TestDataBuilder.java b/sql/src/test/java/org/apache/druid/sql/calcite/util/TestDataBuilder.java index f4a84f216979..f8547d10c467 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/util/TestDataBuilder.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/util/TestDataBuilder.java @@ -26,16 +26,15 @@ import com.google.common.collect.ImmutableSet; import com.google.inject.Injector; import org.apache.druid.data.input.InputRow; +import org.apache.druid.data.input.InputRowSchema; import org.apache.druid.data.input.MapBasedInputRow; import org.apache.druid.data.input.impl.DimensionSchema; import org.apache.druid.data.input.impl.DimensionsSpec; import org.apache.druid.data.input.impl.DoubleDimensionSchema; import org.apache.druid.data.input.impl.FloatDimensionSchema; -import org.apache.druid.data.input.impl.InputRowParser; import org.apache.druid.data.input.impl.LongDimensionSchema; import org.apache.druid.data.input.impl.MapInputRowParser; import org.apache.druid.data.input.impl.StringDimensionSchema; -import org.apache.druid.data.input.impl.TimeAndDimsParseSpec; import org.apache.druid.data.input.impl.TimestampSpec; import org.apache.druid.jackson.DefaultObjectMapper; import org.apache.druid.java.util.common.DateTimes; @@ -118,62 +117,60 @@ public Optional build( } }; - private static final InputRowParser> PARSER = new MapInputRowParser( - new TimeAndDimsParseSpec( - new TimestampSpec(TIMESTAMP_COLUMN, "iso", null), - new DimensionsSpec( - DimensionsSpec.getDefaultSchemas(ImmutableList.of("dim1", "dim2", "dim3")) - ) - ) + private static final InputRowSchema FOO_SCHEMA = new InputRowSchema( + new TimestampSpec(TIMESTAMP_COLUMN, "iso", null), + new DimensionsSpec( + DimensionsSpec.getDefaultSchemas(ImmutableList.of("dim1", "dim2", "dim3")) + ), + null ); - private static final InputRowParser> PARSER_NUMERIC_DIMS = new MapInputRowParser( - new TimeAndDimsParseSpec( - new TimestampSpec(TIMESTAMP_COLUMN, "iso", null), - new DimensionsSpec( - ImmutableList.builder() - .addAll(DimensionsSpec.getDefaultSchemas(ImmutableList.of( - "dim1", - "dim2", - "dim3", - "dim4", - "dim5", - "dim6" - ))) - .add(new DoubleDimensionSchema("d1")) - .add(new DoubleDimensionSchema("d2")) - .add(new FloatDimensionSchema("f1")) - .add(new FloatDimensionSchema("f2")) - .add(new LongDimensionSchema("l1")) - .add(new LongDimensionSchema("l2")) - .build() - ) - ) + private static final InputRowSchema NUMFOO_SCHEMA = new InputRowSchema( + new TimestampSpec(TIMESTAMP_COLUMN, "iso", null), + new DimensionsSpec( + ImmutableList.builder() + .addAll(DimensionsSpec.getDefaultSchemas(ImmutableList.of( + "dim1", + "dim2", + "dim3", + "dim4", + "dim5", + "dim6" + ))) + .add(new DoubleDimensionSchema("d1")) + .add(new DoubleDimensionSchema("d2")) + .add(new FloatDimensionSchema("f1")) + .add(new FloatDimensionSchema("f2")) + .add(new LongDimensionSchema("l1")) + .add(new LongDimensionSchema("l2")) + .build() + ), + null ); - private static final InputRowParser> PARSER_LOTS_OF_COLUMNS = new MapInputRowParser( - new TimeAndDimsParseSpec( - new TimestampSpec("timestamp", "millis", null), - new DimensionsSpec( - DimensionsSpec.getDefaultSchemas( - ImmutableList.builder().add("dimHyperUnique") - .add("dimMultivalEnumerated") - .add("dimMultivalEnumerated2") - .add("dimMultivalSequentialWithNulls") - .add("dimSequential") - .add("dimSequentialHalfNull") - .add("dimUniform") - .add("dimZipf") - .add("metFloatNormal") - .add("metFloatZipf") - .add("metLongSequential") - .add("metLongUniform") - .build() - ) + private static final InputRowSchema LOTS_OF_COLUMNS_SCHEMA = new InputRowSchema( + new TimestampSpec("timestamp", "millis", null), + new DimensionsSpec( + DimensionsSpec.getDefaultSchemas( + ImmutableList.builder().add("dimHyperUnique") + .add("dimMultivalEnumerated") + .add("dimMultivalEnumerated2") + .add("dimMultivalSequentialWithNulls") + .add("dimSequential") + .add("dimSequentialHalfNull") + .add("dimUniform") + .add("dimZipf") + .add("metFloatNormal") + .add("metFloatZipf") + .add("metLongSequential") + .add("metLongUniform") + .build() ) - ) + ), + null ); + private static final IncrementalIndexSchema INDEX_SCHEMA = new IncrementalIndexSchema.Builder() .withMetrics( new CountAggregatorFactory("cnt"), @@ -220,7 +217,7 @@ public Optional build( new DoubleSumAggregatorFactory("m2", "m2"), new HyperUniquesAggregatorFactory("unique_dim1", "dim1") ) - .withDimensionsSpec(PARSER_NUMERIC_DIMS) + .withDimensionsSpec(NUMFOO_SCHEMA.getDimensionsSpec()) .withRollup(false) .build(); @@ -228,7 +225,7 @@ public Optional build( .withMetrics( new CountAggregatorFactory("count") ) - .withDimensionsSpec(PARSER_LOTS_OF_COLUMNS) + .withDimensionsSpec(LOTS_OF_COLUMNS_SCHEMA.getDimensionsSpec()) .withRollup(false) .build(); @@ -436,7 +433,7 @@ public Optional build( .build() ); public static final List ROWS1_WITH_NUMERIC_DIMS = - RAW_ROWS1_WITH_NUMERIC_DIMS.stream().map(raw -> createRow(raw, PARSER_NUMERIC_DIMS)).collect(Collectors.toList()); + RAW_ROWS1_WITH_NUMERIC_DIMS.stream().map(raw -> createRow(raw, NUMFOO_SCHEMA)).collect(Collectors.toList()); public static final List> RAW_ROWS2 = ImmutableList.of( ImmutableMap.builder() @@ -509,7 +506,7 @@ public Optional build( .put("dimSequential", "0") .put("dimSequentialHalfNull", "0") .build(), - PARSER_LOTS_OF_COLUMNS + LOTS_OF_COLUMNS_SCHEMA ), createRow( ImmutableMap.builder() @@ -525,7 +522,7 @@ public Optional build( .put("dimHyperUnique", "8") .put("dimSequential", "8") .build(), - PARSER_LOTS_OF_COLUMNS + LOTS_OF_COLUMNS_SCHEMA ) ); @@ -928,23 +925,24 @@ private static MapBasedInputRow toRow(String time, List dimensions, Map< public static InputRow createRow(final ImmutableMap map) { - return PARSER.parseBatch((Map) map).get(0); + return MapInputRowParser.parse(FOO_SCHEMA, (Map) map); } - public static InputRow createRow(final ImmutableMap map, InputRowParser> parser) + public static InputRow createRow(final ImmutableMap map, InputRowSchema inputRowSchema) { - return parser.parseBatch((Map) map).get(0); + return MapInputRowParser.parse(inputRowSchema, (Map) map); } public static InputRow createRow(final Object t, final String dim1, final String dim2, final double m1) { - return PARSER.parseBatch( + return MapInputRowParser.parse( + FOO_SCHEMA, ImmutableMap.of( "t", new DateTime(t, ISOChronology.getInstanceUTC()).getMillis(), "dim1", dim1, "dim2", dim2, "m1", m1 ) - ).get(0); + ); } }