From f1c5b2debae73c92a6c0b19bcfda91a58584ffd6 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Thu, 16 Jul 2020 23:32:07 -0700 Subject: [PATCH 01/12] add isNullable to ColumnCapabilities, ColumnAnalysis --- .../druid/query/metadata/SegmentAnalyzer.java | 20 ++++-- .../metadata/metadata/ColumnAnalysis.java | 25 ++++++- .../druid/segment/column/ColumnBuilder.java | 11 ++- .../segment/column/ColumnCapabilities.java | 1 + .../column/ColumnCapabilitiesImpl.java | 17 +++++ .../segment/incremental/IncrementalIndex.java | 6 ++ .../DictionaryEncodedColumnPartSerde.java | 3 + .../serde/DoubleNumericColumnPartSerdeV2.java | 4 ++ .../serde/FloatNumericColumnPartSerdeV2.java | 4 ++ .../serde/LongNumericColumnPartSerdeV2.java | 4 ++ .../apache/druid/query/DoubleStorageTest.java | 6 ++ ...egmentMetadataQueryQueryToolChestTest.java | 1 + .../metadata/SegmentMetadataQueryTest.java | 69 +++++++++++++++++++ .../SegmentMetadataUnionQueryTest.java | 1 + .../metadata/metadata/ColumnAnalysisTest.java | 27 ++++---- 15 files changed, 175 insertions(+), 24 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java b/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java index 659b55b1680d..663ed13e4637 100644 --- a/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java +++ b/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java @@ -100,6 +100,11 @@ public Map analyze(Segment segment) Map columns = new TreeMap<>(); + Function adapterCapabilitesFn = + storageAdapter instanceof IncrementalIndexStorageAdapter + ? ((IncrementalIndexStorageAdapter) storageAdapter)::getSnapshotColumnCapabilities + : storageAdapter::getColumnCapabilities; + for (String columnName : columnNames) { final ColumnHolder columnHolder = index == null ? null : index.getColumnHolder(columnName); final ColumnCapabilities capabilities; @@ -108,11 +113,7 @@ public Map analyze(Segment segment) } else { // this can be removed if we get to the point where IncrementalIndexStorageAdapter.getColumnCapabilities // accurately reports the capabilities - if (storageAdapter instanceof IncrementalIndexStorageAdapter) { - capabilities = ((IncrementalIndexStorageAdapter) storageAdapter).getSnapshotColumnCapabilities(columnName); - } else { - capabilities = storageAdapter.getColumnCapabilities(columnName); - } + capabilities = adapterCapabilitesFn.apply(columnName); } final ColumnAnalysis analysis; @@ -146,7 +147,7 @@ public Map analyze(Segment segment) } // Add time column too - ColumnCapabilities timeCapabilities = storageAdapter.getColumnCapabilities(ColumnHolder.TIME_COLUMN_NAME); + ColumnCapabilities timeCapabilities = adapterCapabilitesFn.apply(ColumnHolder.TIME_COLUMN_NAME); if (timeCapabilities == null) { timeCapabilities = ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.LONG); } @@ -192,6 +193,7 @@ private ColumnAnalysis analyzeNumericColumn( return new ColumnAnalysis( capabilities.getType().name(), capabilities.hasMultipleValues().isTrue(), + capabilities.isNullable().isTrue(), size, null, null, @@ -242,6 +244,7 @@ private ColumnAnalysis analyzeStringColumn( return new ColumnAnalysis( capabilities.getType().name(), capabilities.hasMultipleValues().isTrue(), + capabilities.isNullable().isTrue(), size, analyzingCardinality() ? cardinality : 0, min, @@ -319,6 +322,7 @@ public Long accumulate(Long accumulated, Cursor cursor) return new ColumnAnalysis( capabilities.getType().name(), capabilities.hasMultipleValues().isTrue(), + capabilities.isNullable().isTrue(), size, cardinality, min, @@ -335,6 +339,7 @@ private ColumnAnalysis analyzeComplexColumn( { try (final ComplexColumn complexColumn = columnHolder != null ? (ComplexColumn) columnHolder.getColumn() : null) { final boolean hasMultipleValues = capabilities != null && capabilities.hasMultipleValues().isTrue(); + final boolean nullable = capabilities != null && capabilities.isNullable().isTrue(); long size = 0; if (analyzingSize() && complexColumn != null) { @@ -345,7 +350,7 @@ private ColumnAnalysis analyzeComplexColumn( final Function inputSizeFn = serde.inputSizeFn(); if (inputSizeFn == null) { - return new ColumnAnalysis(typeName, hasMultipleValues, 0, null, null, null, null); + return new ColumnAnalysis(typeName, hasMultipleValues, nullable, 0, null, null, null, null); } final int length = complexColumn.getLength(); @@ -357,6 +362,7 @@ private ColumnAnalysis analyzeComplexColumn( return new ColumnAnalysis( typeName, hasMultipleValues, + nullable, size, null, null, diff --git a/processing/src/main/java/org/apache/druid/query/metadata/metadata/ColumnAnalysis.java b/processing/src/main/java/org/apache/druid/query/metadata/metadata/ColumnAnalysis.java index aa9da8d40af4..bcc0d7074568 100644 --- a/processing/src/main/java/org/apache/druid/query/metadata/metadata/ColumnAnalysis.java +++ b/processing/src/main/java/org/apache/druid/query/metadata/metadata/ColumnAnalysis.java @@ -33,11 +33,12 @@ public class ColumnAnalysis public static ColumnAnalysis error(String reason) { - return new ColumnAnalysis("STRING", false, -1, null, null, null, ERROR_PREFIX + reason); + return new ColumnAnalysis("STRING", false, false, -1, null, null, null, ERROR_PREFIX + reason); } private final String type; private final boolean hasMultipleValues; + private final boolean nullable; private final long size; private final Integer cardinality; private final Comparable minValue; @@ -48,6 +49,7 @@ public static ColumnAnalysis error(String reason) public ColumnAnalysis( @JsonProperty("type") String type, @JsonProperty("hasMultipleValues") boolean hasMultipleValues, + @JsonProperty("isNullable") boolean isNullable, @JsonProperty("size") long size, @JsonProperty("cardinality") Integer cardinality, @JsonProperty("minValue") Comparable minValue, @@ -57,6 +59,7 @@ public ColumnAnalysis( { this.type = type; this.hasMultipleValues = hasMultipleValues; + this.nullable = isNullable; this.size = size; this.cardinality = cardinality; this.minValue = minValue; @@ -108,6 +111,11 @@ public String getErrorMessage() return errorMessage; } + @JsonProperty + public boolean isNullable() + { + return nullable; + } public boolean isError() { return (errorMessage != null && !errorMessage.isEmpty()); @@ -144,7 +152,16 @@ public ColumnAnalysis fold(ColumnAnalysis rhs) Comparable newMin = choose(minValue, rhs.minValue, false); Comparable newMax = choose(maxValue, rhs.maxValue, true); - return new ColumnAnalysis(type, multipleValues, size + rhs.getSize(), cardinality, newMin, newMax, null); + return new ColumnAnalysis( + type, + multipleValues, + nullable || rhs.nullable, + size + rhs.getSize(), + cardinality, + newMin, + newMax, + null + ); } private T choose(T obj1, T obj2, boolean max) @@ -165,6 +182,7 @@ public String toString() return "ColumnAnalysis{" + "type='" + type + '\'' + ", hasMultipleValues=" + hasMultipleValues + + ", isNullable=" + nullable + ", size=" + size + ", cardinality=" + cardinality + ", minValue=" + minValue + @@ -184,6 +202,7 @@ public boolean equals(Object o) } ColumnAnalysis that = (ColumnAnalysis) o; return hasMultipleValues == that.hasMultipleValues && + nullable == that.nullable && size == that.size && Objects.equals(type, that.type) && Objects.equals(cardinality, that.cardinality) && @@ -195,6 +214,6 @@ public boolean equals(Object o) @Override public int hashCode() { - return Objects.hash(type, hasMultipleValues, size, cardinality, minValue, maxValue, errorMessage); + return Objects.hash(type, hasMultipleValues, nullable, size, cardinality, minValue, maxValue, errorMessage); } } diff --git a/processing/src/main/java/org/apache/druid/segment/column/ColumnBuilder.java b/processing/src/main/java/org/apache/druid/segment/column/ColumnBuilder.java index cde454b70524..6dc6621100fb 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/ColumnBuilder.java +++ b/processing/src/main/java/org/apache/druid/segment/column/ColumnBuilder.java @@ -34,6 +34,7 @@ public class ColumnBuilder private boolean hasMultipleValues = false; private boolean filterable = false; private boolean dictionaryEncoded = false; + private boolean nullable = false; @Nullable private Supplier columnSupplier = null; @@ -44,6 +45,7 @@ public class ColumnBuilder @Nullable private SmooshedFileMapper fileMapper = null; + public ColumnBuilder setFileMapper(SmooshedFileMapper fileMapper) { this.fileMapper = fileMapper; @@ -105,6 +107,12 @@ public ColumnBuilder setSpatialIndex(Supplier spatialIndex) return this; } + public ColumnBuilder setNullable(boolean nullable) + { + this.nullable = nullable; + return this; + } + public ColumnHolder build() { Preconditions.checkState(type != null, "Type must be set."); @@ -118,7 +126,8 @@ public ColumnHolder build() .setDictionaryValuesUnique(dictionaryEncoded) .setHasSpatialIndexes(spatialIndex != null) .setHasMultipleValues(hasMultipleValues) - .setFilterable(filterable), + .setFilterable(filterable) + .setIsNullable(nullable), columnSupplier, bitmapIndex, spatialIndex diff --git a/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilities.java b/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilities.java index a9af25b4602c..4852629b4dba 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilities.java +++ b/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilities.java @@ -38,6 +38,7 @@ public interface ColumnCapabilities boolean hasSpatialIndexes(); Capable hasMultipleValues(); boolean isFilterable(); + Capable isNullable(); enum Capable { diff --git a/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilitiesImpl.java b/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilitiesImpl.java index 9ddbd04a372f..6d2cd1692136 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilitiesImpl.java +++ b/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilitiesImpl.java @@ -43,6 +43,7 @@ public static ColumnCapabilitiesImpl copyOf(@Nullable final ColumnCapabilities o capabilities.hasMultipleValues = other.hasMultipleValues(); capabilities.dictionaryValuesSorted = other.areDictionaryValuesSorted(); capabilities.dictionaryValuesUnique = other.areDictionaryValuesUnique(); + capabilities.nullable = other.isNullable(); capabilities.filterable = other.isFilterable(); } return capabilities; @@ -72,6 +73,7 @@ public static ColumnCapabilitiesImpl snapshot(@Nullable final ColumnCapabilities copy.hasMultipleValues = copy.hasMultipleValues.coerceUnknownToBoolean(unknownIsTrue); copy.dictionaryValuesSorted = copy.dictionaryValuesSorted.coerceUnknownToBoolean(unknownIsTrue); copy.dictionaryValuesUnique = copy.dictionaryValuesUnique.coerceUnknownToBoolean(unknownIsTrue); + copy.nullable = copy.nullable.coerceUnknownToBoolean(unknownIsTrue); return copy; } @@ -106,6 +108,8 @@ public static ColumnCapabilitiesImpl createSimpleNumericColumnCapabilities(Value private Capable dictionaryValuesUnique = Capable.UNKNOWN; @JsonIgnore private boolean filterable; + @JsonIgnore + private Capable nullable = Capable.UNKNOWN; @Override @JsonProperty @@ -203,6 +207,18 @@ public ColumnCapabilitiesImpl setHasMultipleValues(boolean hasMultipleValues) return this; } + @Override + public Capable isNullable() + { + return nullable; + } + + public ColumnCapabilitiesImpl setIsNullable(boolean isNullable) + { + nullable = Capable.of(isNullable); + return this; + } + @Override public boolean isFilterable() { @@ -241,6 +257,7 @@ public ColumnCapabilities merge(@Nullable ColumnCapabilities other) this.hasMultipleValues = this.hasMultipleValues.or(other.hasMultipleValues()); this.dictionaryValuesSorted = this.dictionaryValuesSorted.and(other.areDictionaryValuesSorted()); this.dictionaryValuesUnique = this.dictionaryValuesUnique.and(other.areDictionaryValuesUnique()); + this.nullable = this.nullable.or(other.isNullable()); return this; } diff --git a/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndex.java b/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndex.java index 649aea9b6997..ba1ca1883aad 100644 --- a/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndex.java +++ b/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndex.java @@ -685,6 +685,9 @@ IncrementalIndexRowResult toIncrementalIndexRow(InputRow row) } dimsKeySize += indexer.estimateEncodedKeyComponentSize(dimsKey); // Set column capabilities as data is coming in + if (dimsKey == null) { + capabilities.setIsNullable(NullHandling.sqlCompatible()); + } if (!capabilities.hasMultipleValues().isTrue() && dimsKey != null && handler.getLengthOfEncodedKeyComponent(dimsKey) > 1) { @@ -695,6 +698,7 @@ IncrementalIndexRowResult toIncrementalIndexRow(InputRow row) // unless this is the first row we are processing, all newly discovered columns will be sparse if (maxIngestedEventTime != null) { indexer.setSparseIndexed(); + capabilities.setIsNullable(NullHandling.sqlCompatible()); } if (overflow == null) { overflow = new ArrayList<>(); @@ -719,6 +723,8 @@ IncrementalIndexRowResult toIncrementalIndexRow(InputRow row) // process any dimensions with missing values in the row for (String missing : absentDimensions) { dimensionDescs.get(missing).getIndexer().setSparseIndexed(); + ColumnCapabilitiesImpl capabilities = columnCapabilities.get(missing); + capabilities.setIsNullable(NullHandling.sqlCompatible()); } } diff --git a/processing/src/main/java/org/apache/druid/segment/serde/DictionaryEncodedColumnPartSerde.java b/processing/src/main/java/org/apache/druid/segment/serde/DictionaryEncodedColumnPartSerde.java index a0a733ec6f62..27be3e16eb9b 100644 --- a/processing/src/main/java/org/apache/druid/segment/serde/DictionaryEncodedColumnPartSerde.java +++ b/processing/src/main/java/org/apache/druid/segment/serde/DictionaryEncodedColumnPartSerde.java @@ -321,6 +321,8 @@ public void read(ByteBuffer buffer, ColumnBuilder builder, ColumnConfig columnCo rMultiValuedColumn = null; } + final String firstDictionaryEntry = rDictionary.get(0); + DictionaryEncodedColumnSupplier dictionaryEncodedColumnSupplier = new DictionaryEncodedColumnSupplier( rDictionary, rSingleValuedColumn, @@ -329,6 +331,7 @@ public void read(ByteBuffer buffer, ColumnBuilder builder, ColumnConfig columnCo ); builder .setHasMultipleValues(hasMultipleValues) + .setNullable(firstDictionaryEntry == null) .setDictionaryEncodedColumnSupplier(dictionaryEncodedColumnSupplier); if (!Feature.NO_BITMAP_INDEX.isSet(rFlags)) { diff --git a/processing/src/main/java/org/apache/druid/segment/serde/DoubleNumericColumnPartSerdeV2.java b/processing/src/main/java/org/apache/druid/segment/serde/DoubleNumericColumnPartSerdeV2.java index e2a5cd213528..52e1e3c17a5a 100644 --- a/processing/src/main/java/org/apache/druid/segment/serde/DoubleNumericColumnPartSerdeV2.java +++ b/processing/src/main/java/org/apache/druid/segment/serde/DoubleNumericColumnPartSerdeV2.java @@ -152,13 +152,17 @@ public Deserializer getDeserializer() buffer.position(initialPos + offset); final ImmutableBitmap bitmap; + final boolean isNullable; if (buffer.hasRemaining()) { bitmap = bitmapSerdeFactory.getObjectStrategy().fromByteBufferWithSize(buffer); + isNullable = bitmap.size() > 0; } else { bitmap = bitmapSerdeFactory.getBitmapFactory().makeEmptyImmutableBitmap(); + isNullable = false; } builder.setType(ValueType.DOUBLE) .setHasMultipleValues(false) + .setNullable(isNullable) .setNumericColumnSupplier(new DoubleNumericColumnSupplier(column, bitmap)); }; } diff --git a/processing/src/main/java/org/apache/druid/segment/serde/FloatNumericColumnPartSerdeV2.java b/processing/src/main/java/org/apache/druid/segment/serde/FloatNumericColumnPartSerdeV2.java index 77d900dbe2ab..283a868df3c2 100644 --- a/processing/src/main/java/org/apache/druid/segment/serde/FloatNumericColumnPartSerdeV2.java +++ b/processing/src/main/java/org/apache/druid/segment/serde/FloatNumericColumnPartSerdeV2.java @@ -149,13 +149,17 @@ public Deserializer getDeserializer() ); buffer.position(initialPos + offset); final ImmutableBitmap bitmap; + final boolean isNullable; if (buffer.hasRemaining()) { bitmap = bitmapSerdeFactory.getObjectStrategy().fromByteBufferWithSize(buffer); + isNullable = bitmap.size() > 0; } else { bitmap = bitmapSerdeFactory.getBitmapFactory().makeEmptyImmutableBitmap(); + isNullable = false; } builder.setType(ValueType.FLOAT) .setHasMultipleValues(false) + .setNullable(isNullable) .setNumericColumnSupplier(new FloatNumericColumnSupplier(column, bitmap)); }; } diff --git a/processing/src/main/java/org/apache/druid/segment/serde/LongNumericColumnPartSerdeV2.java b/processing/src/main/java/org/apache/druid/segment/serde/LongNumericColumnPartSerdeV2.java index 75344ec9340c..6538cc8289ad 100644 --- a/processing/src/main/java/org/apache/druid/segment/serde/LongNumericColumnPartSerdeV2.java +++ b/processing/src/main/java/org/apache/druid/segment/serde/LongNumericColumnPartSerdeV2.java @@ -151,13 +151,17 @@ public Deserializer getDeserializer() ); buffer.position(initialPos + offset); final ImmutableBitmap bitmap; + final boolean isNullable; if (buffer.hasRemaining()) { bitmap = bitmapSerdeFactory.getObjectStrategy().fromByteBufferWithSize(buffer); + isNullable = bitmap.size() > 0; } else { bitmap = bitmapSerdeFactory.getBitmapFactory().makeEmptyImmutableBitmap(); + isNullable = false; } builder.setType(ValueType.LONG) .setHasMultipleValues(false) + .setNullable(isNullable) .setNumericColumnSupplier(new LongNumericColumnSupplier(column, bitmap)); }; } diff --git a/processing/src/test/java/org/apache/druid/query/DoubleStorageTest.java b/processing/src/test/java/org/apache/druid/query/DoubleStorageTest.java index cb93b2cf8874..d6d27b88332a 100644 --- a/processing/src/test/java/org/apache/druid/query/DoubleStorageTest.java +++ b/processing/src/test/java/org/apache/druid/query/DoubleStorageTest.java @@ -156,6 +156,7 @@ public static Collection dataFeeder() new ColumnAnalysis( ValueType.LONG.toString(), false, + false, 100, null, null, @@ -166,6 +167,7 @@ public static Collection dataFeeder() new ColumnAnalysis( ValueType.STRING.toString(), false, + false, 120, 1, DIM_VALUE, @@ -176,6 +178,7 @@ public static Collection dataFeeder() new ColumnAnalysis( ValueType.DOUBLE.toString(), false, + false, 80, null, null, @@ -198,6 +201,7 @@ public static Collection dataFeeder() new ColumnAnalysis( ValueType.LONG.toString(), false, + false, 100, null, null, @@ -208,6 +212,7 @@ public static Collection dataFeeder() new ColumnAnalysis( ValueType.STRING.toString(), false, + false, 120, 1, DIM_VALUE, @@ -218,6 +223,7 @@ public static Collection dataFeeder() new ColumnAnalysis( ValueType.FLOAT.toString(), false, + false, 80, null, null, diff --git a/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataQueryQueryToolChestTest.java b/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataQueryQueryToolChestTest.java index f823937ef03b..752db63d9832 100644 --- a/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataQueryQueryToolChestTest.java +++ b/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataQueryQueryToolChestTest.java @@ -81,6 +81,7 @@ public void testCacheStrategy() throws Exception new ColumnAnalysis( ValueType.STRING.toString(), true, + false, 10881, 1, "preferred", diff --git a/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataQueryTest.java b/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataQueryTest.java index 9da06719d9c2..062639be2b27 100644 --- a/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataQueryTest.java +++ b/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataQueryTest.java @@ -208,6 +208,7 @@ public SegmentMetadataQueryTest( new ColumnAnalysis( ValueType.LONG.toString(), false, + false, 12090, null, null, @@ -218,6 +219,7 @@ public SegmentMetadataQueryTest( new ColumnAnalysis( ValueType.DOUBLE.toString(), false, + false, 9672, null, null, @@ -228,6 +230,7 @@ public SegmentMetadataQueryTest( new ColumnAnalysis( ValueType.STRING.toString(), false, + false, preferedSize1, 1, "preferred", @@ -249,6 +252,7 @@ public SegmentMetadataQueryTest( new ColumnAnalysis( ValueType.LONG.toString(), false, + false, 12090, null, null, @@ -259,6 +263,7 @@ public SegmentMetadataQueryTest( new ColumnAnalysis( ValueType.DOUBLE.toString(), false, + false, 9672, null, null, @@ -269,6 +274,7 @@ public SegmentMetadataQueryTest( new ColumnAnalysis( ValueType.STRING.toString(), false, + false, placementSize2, 1, null, @@ -305,6 +311,7 @@ public void testSegmentMetadataQueryWithRollupMerge() new ColumnAnalysis( ValueType.STRING.toString(), false, + false, 0, 0, null, @@ -315,6 +322,7 @@ public void testSegmentMetadataQueryWithRollupMerge() new ColumnAnalysis( ValueType.STRING.toString(), true, + false, 0, 0, null, @@ -373,6 +381,7 @@ public void testSegmentMetadataQueryWithHasMultipleValuesMerge() new ColumnAnalysis( ValueType.STRING.toString(), false, + false, 0, 1, null, @@ -383,6 +392,7 @@ public void testSegmentMetadataQueryWithHasMultipleValuesMerge() new ColumnAnalysis( ValueType.STRING.toString(), true, + false, 0, 9, null, @@ -441,6 +451,7 @@ public void testSegmentMetadataQueryWithComplexColumnMerge() new ColumnAnalysis( ValueType.STRING.toString(), false, + false, 0, 1, null, @@ -451,6 +462,7 @@ public void testSegmentMetadataQueryWithComplexColumnMerge() new ColumnAnalysis( "hyperUnique", false, + false, 0, null, null, @@ -510,6 +522,7 @@ public void testSegmentMetadataQueryWithDefaultAnalysisMerge() ColumnAnalysis analysis = new ColumnAnalysis( ValueType.STRING.toString(), false, + false, size1 + size2, 1, "preferred", @@ -531,6 +544,7 @@ public void testSegmentMetadataQueryWithDefaultAnalysisMerge2() ColumnAnalysis analysis = new ColumnAnalysis( ValueType.STRING.toString(), false, + false, size1 + size2, 3, "spot", @@ -552,6 +566,7 @@ public void testSegmentMetadataQueryWithDefaultAnalysisMerge3() ColumnAnalysis analysis = new ColumnAnalysis( ValueType.STRING.toString(), false, + false, size1 + size2, 9, "automotive", @@ -574,6 +589,7 @@ private void testSegmentMetadataQueryWithDefaultAnalysisMerge( new ColumnAnalysis( ValueType.LONG.toString(), false, + false, 12090 * 2, null, null, @@ -584,6 +600,7 @@ private void testSegmentMetadataQueryWithDefaultAnalysisMerge( new ColumnAnalysis( ValueType.DOUBLE.toString(), false, + false, 9672 * 2, null, null, @@ -638,6 +655,7 @@ public void testSegmentMetadataQueryWithNoAnalysisTypesMerge() new ColumnAnalysis( ValueType.STRING.toString(), false, + false, 0, 0, null, @@ -700,6 +718,7 @@ public void testSegmentMetadataQueryWithAggregatorsMerge() new ColumnAnalysis( ValueType.STRING.toString(), false, + false, 0, 0, null, @@ -758,6 +777,7 @@ public void testSegmentMetadataQueryWithTimestampSpecMerge() new ColumnAnalysis( ValueType.STRING.toString(), false, + false, 0, 0, null, @@ -816,6 +836,7 @@ public void testSegmentMetadataQueryWithQueryGranularityMerge() new ColumnAnalysis( ValueType.STRING.toString(), false, + false, 0, 0, null, @@ -1273,4 +1294,52 @@ public void testAnanlysisTypesBeingSet() Assert.assertEquals(analysisWCfg2, expectedAnalysisWCfg2); } + @Test + public void testLongNullableColumn() + { + ColumnAnalysis analysis = new ColumnAnalysis( + ValueType.LONG.toString(), + false, + NullHandling.replaceWithDefault() ? false : true, + 19344, + null, + null, + null, + null + ); + testSegmentMetadataQueryWithDefaultAnalysisMerge("longNumericNull", analysis); + } + + @Test + public void testDoubleNullableColumn() + { + ColumnAnalysis analysis = new ColumnAnalysis( + ValueType.DOUBLE.toString(), + false, + NullHandling.replaceWithDefault() ? false : true, + 19344, + null, + null, + null, + null + ); + testSegmentMetadataQueryWithDefaultAnalysisMerge("doubleNumericNull", analysis); + } + + + @Test + public void testFloatNullableColumn() + { + ColumnAnalysis analysis = new ColumnAnalysis( + ValueType.FLOAT.toString(), + false, + NullHandling.replaceWithDefault() ? false : true, + 19344, + null, + null, + null, + null + ); + testSegmentMetadataQueryWithDefaultAnalysisMerge("floatNumericNull", analysis); + } } diff --git a/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataUnionQueryTest.java b/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataUnionQueryTest.java index 55d8b18babd3..2680d4b30e25 100644 --- a/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataUnionQueryTest.java +++ b/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataUnionQueryTest.java @@ -103,6 +103,7 @@ public void testSegmentMetadataUnionQuery() new ColumnAnalysis( ValueType.STRING.toString(), false, + false, mmap ? 43524 : 43056, 1, "preferred", diff --git a/processing/src/test/java/org/apache/druid/query/metadata/metadata/ColumnAnalysisTest.java b/processing/src/test/java/org/apache/druid/query/metadata/metadata/ColumnAnalysisTest.java index ccf3363737aa..798d67a06fb1 100644 --- a/processing/src/test/java/org/apache/druid/query/metadata/metadata/ColumnAnalysisTest.java +++ b/processing/src/test/java/org/apache/druid/query/metadata/metadata/ColumnAnalysisTest.java @@ -36,13 +36,13 @@ private void assertSerDe(ColumnAnalysis analysis) throws Exception @Test public void testFoldStringColumns() throws Exception { - final ColumnAnalysis analysis1 = new ColumnAnalysis("STRING", false, 1L, 2, "aaA", "Zzz", null); - final ColumnAnalysis analysis2 = new ColumnAnalysis("STRING", true, 3L, 4, "aAA", "ZZz", null); + final ColumnAnalysis analysis1 = new ColumnAnalysis("STRING", false, false, 1L, 2, "aaA", "Zzz", null); + final ColumnAnalysis analysis2 = new ColumnAnalysis("STRING", true, false, 3L, 4, "aAA", "ZZz", null); assertSerDe(analysis1); assertSerDe(analysis2); - final ColumnAnalysis expected = new ColumnAnalysis("STRING", true, 4L, 4, "aAA", "Zzz", null); + final ColumnAnalysis expected = new ColumnAnalysis("STRING", true, false, 4L, 4, "aAA", "Zzz", null); ColumnAnalysis fold1 = analysis1.fold(analysis2); ColumnAnalysis fold2 = analysis2.fold(analysis1); @@ -56,7 +56,7 @@ public void testFoldStringColumns() throws Exception @Test public void testFoldWithNull() throws Exception { - final ColumnAnalysis analysis1 = new ColumnAnalysis("STRING", false, 1L, 2, null, null, null); + final ColumnAnalysis analysis1 = new ColumnAnalysis("STRING", false, false, 1L, 2, null, null, null); Assert.assertEquals(analysis1, analysis1.fold(null)); assertSerDe(analysis1); } @@ -64,13 +64,13 @@ public void testFoldWithNull() throws Exception @Test public void testFoldComplexColumns() throws Exception { - final ColumnAnalysis analysis1 = new ColumnAnalysis("hyperUnique", false, 0L, null, null, null, null); - final ColumnAnalysis analysis2 = new ColumnAnalysis("hyperUnique", false, 0L, null, null, null, null); + final ColumnAnalysis analysis1 = new ColumnAnalysis("hyperUnique", false, false, 0L, null, null, null, null); + final ColumnAnalysis analysis2 = new ColumnAnalysis("hyperUnique", false, false, 0L, null, null, null, null); assertSerDe(analysis1); assertSerDe(analysis2); - final ColumnAnalysis expected = new ColumnAnalysis("hyperUnique", false, 0L, null, null, null, null); + final ColumnAnalysis expected = new ColumnAnalysis("hyperUnique", false, false, 0L, null, null, null, null); ColumnAnalysis fold1 = analysis1.fold(analysis2); ColumnAnalysis fold2 = analysis2.fold(analysis1); @@ -84,8 +84,8 @@ public void testFoldComplexColumns() throws Exception @Test public void testFoldDifferentTypes() throws Exception { - final ColumnAnalysis analysis1 = new ColumnAnalysis("hyperUnique", false, 1L, 1, null, null, null); - final ColumnAnalysis analysis2 = new ColumnAnalysis("COMPLEX", false, 2L, 2, null, null, null); + final ColumnAnalysis analysis1 = new ColumnAnalysis("hyperUnique", false, false, 1L, 1, null, null, null); + final ColumnAnalysis analysis2 = new ColumnAnalysis("COMPLEX", false, false, 2L, 2, null, null, null); assertSerDe(analysis1); assertSerDe(analysis2); @@ -93,6 +93,7 @@ public void testFoldDifferentTypes() throws Exception final ColumnAnalysis expected = new ColumnAnalysis( "STRING", false, + false, -1L, null, null, @@ -117,7 +118,7 @@ public void testFoldSameErrors() throws Exception assertSerDe(analysis1); assertSerDe(analysis2); - final ColumnAnalysis expected = new ColumnAnalysis("STRING", false, -1L, null, null, null, "error:foo"); + final ColumnAnalysis expected = new ColumnAnalysis("STRING", false, false, -1L, null, null, null, "error:foo"); ColumnAnalysis fold1 = analysis1.fold(analysis2); ColumnAnalysis fold2 = analysis2.fold(analysis1); Assert.assertEquals(expected, fold1); @@ -131,12 +132,12 @@ public void testFoldSameErrors() throws Exception public void testFoldErrorAndNoError() throws Exception { final ColumnAnalysis analysis1 = ColumnAnalysis.error("foo"); - final ColumnAnalysis analysis2 = new ColumnAnalysis("STRING", false, 2L, 2, "a", "z", null); + final ColumnAnalysis analysis2 = new ColumnAnalysis("STRING", false, false, 2L, 2, "a", "z", null); assertSerDe(analysis1); assertSerDe(analysis2); - final ColumnAnalysis expected = new ColumnAnalysis("STRING", false, -1L, null, null, null, "error:foo"); + final ColumnAnalysis expected = new ColumnAnalysis("STRING", false, false, -1L, null, null, null, "error:foo"); ColumnAnalysis fold1 = analysis1.fold(analysis2); ColumnAnalysis fold2 = analysis2.fold(analysis1); Assert.assertEquals(expected, fold1); @@ -155,7 +156,7 @@ public void testFoldDifferentErrors() throws Exception assertSerDe(analysis1); assertSerDe(analysis2); - final ColumnAnalysis expected = new ColumnAnalysis("STRING", false, -1L, null, null, null, "error:multiple_errors"); + final ColumnAnalysis expected = new ColumnAnalysis("STRING", false, false, -1L, null, null, null, "error:multiple_errors"); ColumnAnalysis fold1 = analysis1.fold(analysis2); ColumnAnalysis fold2 = analysis2.fold(analysis1); Assert.assertEquals(expected, fold1); From 69e772e2998375dccad0966a08e84ff4472ddf82 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Tue, 21 Jul 2020 04:09:01 -0700 Subject: [PATCH 02/12] better builder --- .../druid/segment/column/ColumnBuilder.java | 46 ++--- .../column/ColumnCapabilitiesImpl.java | 33 ++- .../segment/incremental/IncrementalIndex.java | 1 + .../segment/serde/ComplexColumnPartSerde.java | 6 + .../QueryableIndexColumnCapabilitiesTest.java | 193 +++++++++++++++++- .../column/ColumnCapabilitiesImplTest.java | 6 + 6 files changed, 240 insertions(+), 45 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/column/ColumnBuilder.java b/processing/src/main/java/org/apache/druid/segment/column/ColumnBuilder.java index 6dc6621100fb..d8df29138315 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/ColumnBuilder.java +++ b/processing/src/main/java/org/apache/druid/segment/column/ColumnBuilder.java @@ -29,12 +29,7 @@ */ public class ColumnBuilder { - @Nullable - private ValueType type = null; - private boolean hasMultipleValues = false; - private boolean filterable = false; - private boolean dictionaryEncoded = false; - private boolean nullable = false; + private final ColumnCapabilitiesImpl capabilitiesBuilder = ColumnCapabilitiesImpl.createDefault(false); @Nullable private Supplier columnSupplier = null; @@ -59,27 +54,29 @@ public SmooshedFileMapper getFileMapper() public ColumnBuilder setType(ValueType type) { - this.type = type; + this.capabilitiesBuilder.setType(type); return this; } public ColumnBuilder setHasMultipleValues(boolean hasMultipleValues) { - this.hasMultipleValues = hasMultipleValues; + this.capabilitiesBuilder.setHasMultipleValues(hasMultipleValues); return this; } public ColumnBuilder setDictionaryEncodedColumnSupplier(Supplier> columnSupplier) { this.columnSupplier = columnSupplier; - this.dictionaryEncoded = true; + this.capabilitiesBuilder.setDictionaryEncoded(true); + this.capabilitiesBuilder.setDictionaryValuesSorted(true); + this.capabilitiesBuilder.setDictionaryValuesUnique(true); return this; } @SuppressWarnings("unused") public ColumnBuilder setFilterable(boolean filterable) { - this.filterable = filterable; + this.capabilitiesBuilder.setFilterable(filterable); return this; } @@ -98,39 +95,32 @@ public ColumnBuilder setNumericColumnSupplier(Supplier public ColumnBuilder setBitmapIndex(Supplier bitmapIndex) { this.bitmapIndex = bitmapIndex; + this.capabilitiesBuilder.setHasBitmapIndexes(true); return this; } public ColumnBuilder setSpatialIndex(Supplier spatialIndex) { this.spatialIndex = spatialIndex; + this.capabilitiesBuilder.setHasSpatialIndexes(true); return this; } public ColumnBuilder setNullable(boolean nullable) { - this.nullable = nullable; + this.capabilitiesBuilder.setIsNullable(nullable); + return this; + } + public ColumnBuilder setNullable(ColumnCapabilities.Capable nullable) + { + this.capabilitiesBuilder.setIsNullable(nullable); return this; } public ColumnHolder build() { - Preconditions.checkState(type != null, "Type must be set."); - - return new SimpleColumnHolder( - new ColumnCapabilitiesImpl() - .setType(type) - .setDictionaryEncoded(dictionaryEncoded) - .setHasBitmapIndexes(bitmapIndex != null) - .setDictionaryValuesSorted(dictionaryEncoded) - .setDictionaryValuesUnique(dictionaryEncoded) - .setHasSpatialIndexes(spatialIndex != null) - .setHasMultipleValues(hasMultipleValues) - .setFilterable(filterable) - .setIsNullable(nullable), - columnSupplier, - bitmapIndex, - spatialIndex - ); + Preconditions.checkState(capabilitiesBuilder.getType() != null, "Type must be set."); + + return new SimpleColumnHolder(capabilitiesBuilder, columnSupplier, bitmapIndex, spatialIndex); } } diff --git a/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilitiesImpl.java b/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilitiesImpl.java index 6d2cd1692136..14f75ed7fa98 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilitiesImpl.java +++ b/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilitiesImpl.java @@ -22,6 +22,7 @@ import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonProperty; import com.google.common.base.Preconditions; +import org.apache.druid.common.config.NullHandling; import org.apache.druid.java.util.common.ISE; import javax.annotation.Nullable; @@ -77,19 +78,31 @@ public static ColumnCapabilitiesImpl snapshot(@Nullable final ColumnCapabilities return copy; } + /** + * Creates a {@link ColumnCapabilitiesImpl} where all {@link ColumnCapabilities.Capable} that default to unknown + * instead are coerced to true or false + */ + public static ColumnCapabilitiesImpl createDefault(boolean unknownIsTrue) + { + return ColumnCapabilitiesImpl.snapshot(new ColumnCapabilitiesImpl(), unknownIsTrue); + } /** * Create a no frills, simple column with {@link ValueType} set and everything else false */ public static ColumnCapabilitiesImpl createSimpleNumericColumnCapabilities(ValueType valueType) { - return new ColumnCapabilitiesImpl().setType(valueType) - .setHasMultipleValues(false) - .setHasBitmapIndexes(false) - .setDictionaryEncoded(false) - .setDictionaryValuesSorted(false) - .setDictionaryValuesUnique(false) - .setHasSpatialIndexes(false); + ColumnCapabilitiesImpl builder = new ColumnCapabilitiesImpl().setType(valueType) + .setHasMultipleValues(false) + .setHasBitmapIndexes(false) + .setDictionaryEncoded(false) + .setDictionaryValuesSorted(false) + .setDictionaryValuesUnique(false) + .setHasSpatialIndexes(false); + if (NullHandling.replaceWithDefault()) { + builder.setIsNullable(false); + } + return builder; } @Nullable @@ -219,6 +232,12 @@ public ColumnCapabilitiesImpl setIsNullable(boolean isNullable) return this; } + public ColumnCapabilitiesImpl setIsNullable(Capable isNullable) + { + nullable = isNullable; + return this; + } + @Override public boolean isFilterable() { diff --git a/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndex.java b/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndex.java index ba1ca1883aad..b2b919c89947 100644 --- a/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndex.java +++ b/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndex.java @@ -1150,6 +1150,7 @@ public MetricDesc(int index, AggregatorFactory factory) } else { // in an ideal world complex type reports its actual column capabilities... capabilities = ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.COMPLEX); + capabilities.setIsNullable(ColumnCapabilities.Capable.UNKNOWN); this.type = ComplexMetrics.getSerdeForType(typeInfo).getTypeName(); } } diff --git a/processing/src/main/java/org/apache/druid/segment/serde/ComplexColumnPartSerde.java b/processing/src/main/java/org/apache/druid/segment/serde/ComplexColumnPartSerde.java index dc0278baaed0..35cb0b1d7cef 100644 --- a/processing/src/main/java/org/apache/druid/segment/serde/ComplexColumnPartSerde.java +++ b/processing/src/main/java/org/apache/druid/segment/serde/ComplexColumnPartSerde.java @@ -23,6 +23,7 @@ import com.fasterxml.jackson.annotation.JsonProperty; import org.apache.druid.java.util.common.logger.Logger; import org.apache.druid.segment.GenericColumnSerializer; +import org.apache.druid.segment.column.ColumnCapabilities; import javax.annotation.Nullable; @@ -78,6 +79,11 @@ public Deserializer getDeserializer() { return (buffer, builder, columnConfig) -> { if (serde != null) { + // we don't currently know if complex column can have nulls (or can be multi-valued, but not making that change + // since it isn't supported anywhere in the query engines) + // longer term this needs to be captured by making the serde provide this information, and then this should + // no longer be set to unknown but rather the actual values + builder.setNullable(ColumnCapabilities.Capable.UNKNOWN); serde.deserializeColumn(buffer, builder); } }; diff --git a/processing/src/test/java/org/apache/druid/segment/QueryableIndexColumnCapabilitiesTest.java b/processing/src/test/java/org/apache/druid/segment/QueryableIndexColumnCapabilitiesTest.java index c7783e992197..2041b650c86a 100644 --- a/processing/src/test/java/org/apache/druid/segment/QueryableIndexColumnCapabilitiesTest.java +++ b/processing/src/test/java/org/apache/druid/segment/QueryableIndexColumnCapabilitiesTest.java @@ -22,6 +22,7 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Iterables; +import org.apache.druid.common.config.NullHandling; import org.apache.druid.data.input.InputRow; import org.apache.druid.data.input.impl.DimensionSchema; import org.apache.druid.data.input.impl.DimensionsSpec; @@ -32,6 +33,7 @@ import org.apache.druid.data.input.impl.TimeAndDimsParseSpec; import org.apache.druid.data.input.impl.TimestampSpec; import org.apache.druid.java.util.common.DateTimes; +import org.apache.druid.query.aggregation.AggregatorFactory; import org.apache.druid.query.aggregation.CountAggregatorFactory; import org.apache.druid.query.aggregation.DoubleSumAggregatorFactory; import org.apache.druid.query.aggregation.FloatSumAggregatorFactory; @@ -53,6 +55,7 @@ import java.io.IOException; import java.util.ArrayList; +import java.util.HashMap; import java.util.List; import java.util.Map; @@ -63,11 +66,12 @@ public class QueryableIndexColumnCapabilitiesTest extends InitializedNullHandlin private static IncrementalIndex INC_INDEX; private static QueryableIndex MMAP_INDEX; + private static IncrementalIndex INC_INDEX_WITH_NULLS; + private static QueryableIndex MMAP_INDEX_WITH_NULLS; @BeforeClass public static void setup() throws IOException { - List rows = new ArrayList<>(); MapInputRowParser parser = new MapInputRowParser( new TimeAndDimsParseSpec( new TimestampSpec("time", "auto", null), @@ -83,6 +87,14 @@ public static void setup() throws IOException ) ) ); + AggregatorFactory[] metricsSpecs = new AggregatorFactory[] { + new CountAggregatorFactory("cnt"), + new DoubleSumAggregatorFactory("m1", "d3"), + new FloatSumAggregatorFactory("m2", "d4"), + new LongSumAggregatorFactory("m3", "d5"), + new HyperUniquesAggregatorFactory("m4", "d1") + }; + List rows = new ArrayList<>(); Map event = ImmutableMap.builder().put("time", DateTimes.nowUtc().getMillis()) .put("d1", "some string") @@ -92,17 +104,12 @@ public static void setup() throws IOException .put("d5", 10L) .build(); rows.add(Iterables.getOnlyElement(parser.parseBatch(event))); + IndexBuilder builder = IndexBuilder.create() .rows(rows) .schema( new IncrementalIndexSchema.Builder() - .withMetrics( - new CountAggregatorFactory("cnt"), - new DoubleSumAggregatorFactory("m1", "d3"), - new FloatSumAggregatorFactory("m2", "d4"), - new LongSumAggregatorFactory("m3", "d5"), - new HyperUniquesAggregatorFactory("m4", "d1") - ) + .withMetrics(metricsSpecs) .withDimensionsSpec(parser) .withRollup(false) .build() @@ -110,6 +117,30 @@ public static void setup() throws IOException .tmpDir(temporaryFolder.newFolder()); INC_INDEX = builder.buildIncrementalIndex(); MMAP_INDEX = builder.buildMMappedIndex(); + + List rowsWithNulls = new ArrayList<>(); + rowsWithNulls.add(Iterables.getOnlyElement(parser.parseBatch(event))); + + Map eventWithNulls = new HashMap<>(); + eventWithNulls.put("time", DateTimes.nowUtc().getMillis()); + eventWithNulls.put("d1", null); + eventWithNulls.put("d2", null); + eventWithNulls.put("d3", null); + eventWithNulls.put("d4", null); + eventWithNulls.put("d5", null); + rowsWithNulls.add(Iterables.getOnlyElement(parser.parseBatch(eventWithNulls))); + IndexBuilder builderWithNulls = IndexBuilder.create() + .rows(rowsWithNulls) + .schema( + new IncrementalIndexSchema.Builder() + .withMetrics(metricsSpecs) + .withDimensionsSpec(parser) + .withRollup(false) + .build() + ) + .tmpDir(temporaryFolder.newFolder()); + INC_INDEX_WITH_NULLS = builderWithNulls.buildIncrementalIndex(); + MMAP_INDEX_WITH_NULLS = builderWithNulls.buildMMappedIndex(); } @AfterClass @@ -117,6 +148,8 @@ public static void teardown() { INC_INDEX.close(); MMAP_INDEX.close(); + INC_INDEX_WITH_NULLS.close(); + MMAP_INDEX_WITH_NULLS.close(); } @Test @@ -144,6 +177,53 @@ public void testNumericColumns() assertNonStringColumnCapabilities(MMAP_INDEX.getColumnHolder("m3").getCapabilities(), ValueType.LONG); } + @Test + public void testNumericColumnsWithNulls() + { + // incremental index + // time does not have nulls + assertNonStringColumnCapabilities( + INC_INDEX_WITH_NULLS.getCapabilities(ColumnHolder.TIME_COLUMN_NAME), + ValueType.LONG + ); + assertNonStringColumnCapabilitiesWithNulls(INC_INDEX_WITH_NULLS.getCapabilities("d3"), ValueType.DOUBLE); + assertNonStringColumnCapabilitiesWithNulls(INC_INDEX_WITH_NULLS.getCapabilities("d4"), ValueType.FLOAT); + assertNonStringColumnCapabilitiesWithNulls(INC_INDEX_WITH_NULLS.getCapabilities("d5"), ValueType.LONG); + assertNonStringColumnCapabilitiesWithNulls(INC_INDEX_WITH_NULLS.getCapabilities("m1"), ValueType.DOUBLE); + assertNonStringColumnCapabilitiesWithNulls(INC_INDEX_WITH_NULLS.getCapabilities("m2"), ValueType.FLOAT); + assertNonStringColumnCapabilitiesWithNulls(INC_INDEX_WITH_NULLS.getCapabilities("m3"), ValueType.LONG); + + // segment index + assertNonStringColumnCapabilities( + MMAP_INDEX_WITH_NULLS.getColumnHolder(ColumnHolder.TIME_COLUMN_NAME).getCapabilities(), + ValueType.LONG + ); + assertNonStringColumnCapabilitiesWithNulls( + MMAP_INDEX_WITH_NULLS.getColumnHolder("d3").getCapabilities(), + ValueType.DOUBLE + ); + assertNonStringColumnCapabilitiesWithNulls( + MMAP_INDEX_WITH_NULLS.getColumnHolder("d4").getCapabilities(), + ValueType.FLOAT + ); + assertNonStringColumnCapabilitiesWithNulls( + MMAP_INDEX_WITH_NULLS.getColumnHolder("d5").getCapabilities(), + ValueType.LONG + ); + assertNonStringColumnCapabilitiesWithNulls( + MMAP_INDEX_WITH_NULLS.getColumnHolder("m1").getCapabilities(), + ValueType.DOUBLE + ); + assertNonStringColumnCapabilitiesWithNulls( + MMAP_INDEX_WITH_NULLS.getColumnHolder("m2").getCapabilities(), + ValueType.FLOAT + ); + assertNonStringColumnCapabilitiesWithNulls( + MMAP_INDEX_WITH_NULLS.getColumnHolder("m3").getCapabilities(), + ValueType.LONG + ); + } + @Test public void testStringColumn() { @@ -159,6 +239,7 @@ public void testStringColumn() // coercing any 'UNKNOWN' values to false Assert.assertFalse(ColumnCapabilitiesImpl.snapshot(caps).hasMultipleValues().isMaybeTrue()); Assert.assertFalse(caps.hasSpatialIndexes()); + Assert.assertTrue(caps.isNullable().isUnknown()); caps = MMAP_INDEX.getColumnHolder("d1").getCapabilities(); Assert.assertEquals(ValueType.STRING, caps.getType()); @@ -168,6 +249,36 @@ public void testStringColumn() Assert.assertTrue(caps.areDictionaryValuesUnique().isTrue()); Assert.assertFalse(caps.hasMultipleValues().isMaybeTrue()); Assert.assertFalse(caps.hasSpatialIndexes()); + Assert.assertFalse(caps.isNullable().isMaybeTrue()); + } + + + @Test + public void testStringColumnWithNulls() + { + ColumnCapabilities caps = INC_INDEX_WITH_NULLS.getCapabilities("d1"); + Assert.assertEquals(ValueType.STRING, caps.getType()); + Assert.assertTrue(caps.hasBitmapIndexes()); + Assert.assertTrue(caps.isDictionaryEncoded()); + Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue()); + Assert.assertTrue(caps.areDictionaryValuesUnique().isTrue()); + // multi-value is unknown unless explicitly set to 'true' + Assert.assertTrue(caps.hasMultipleValues().isUnknown()); + // at index merge or query time we 'complete' the capabilities to take a snapshot of the current state, + // coercing any 'UNKNOWN' values to false + Assert.assertFalse(ColumnCapabilitiesImpl.snapshot(caps).hasMultipleValues().isMaybeTrue()); + Assert.assertFalse(caps.hasSpatialIndexes()); + Assert.assertTrue(caps.isNullable().isUnknown()); + + caps = MMAP_INDEX_WITH_NULLS.getColumnHolder("d1").getCapabilities(); + Assert.assertEquals(ValueType.STRING, caps.getType()); + Assert.assertTrue(caps.hasBitmapIndexes()); + Assert.assertTrue(caps.isDictionaryEncoded()); + Assert.assertTrue(caps.areDictionaryValuesSorted().isTrue()); + Assert.assertTrue(caps.areDictionaryValuesUnique().isTrue()); + Assert.assertFalse(caps.hasMultipleValues().isMaybeTrue()); + Assert.assertFalse(caps.hasSpatialIndexes()); + Assert.assertTrue(caps.isNullable().isTrue()); } @Test @@ -181,6 +292,7 @@ public void testMultiStringColumn() Assert.assertTrue(caps.areDictionaryValuesUnique().isTrue()); Assert.assertTrue(caps.hasMultipleValues().isTrue()); Assert.assertFalse(caps.hasSpatialIndexes()); + Assert.assertTrue(caps.isNullable().isUnknown()); caps = MMAP_INDEX.getColumnHolder("d2").getCapabilities(); Assert.assertEquals(ValueType.STRING, caps.getType()); @@ -190,15 +302,62 @@ public void testMultiStringColumn() Assert.assertTrue(caps.areDictionaryValuesUnique().isTrue()); Assert.assertTrue(caps.hasMultipleValues().isTrue()); Assert.assertFalse(caps.hasSpatialIndexes()); + Assert.assertFalse(caps.isNullable().isMaybeTrue()); + } + + + @Test + public void testMultiStringColumnWithNulls() + { + ColumnCapabilities caps = INC_INDEX_WITH_NULLS.getCapabilities("d2"); + Assert.assertEquals(ValueType.STRING, caps.getType()); + Assert.assertTrue(caps.hasBitmapIndexes()); + Assert.assertTrue(caps.isDictionaryEncoded()); + Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue()); + Assert.assertTrue(caps.areDictionaryValuesUnique().isTrue()); + Assert.assertTrue(caps.hasMultipleValues().isTrue()); + Assert.assertFalse(caps.hasSpatialIndexes()); + Assert.assertTrue(caps.isNullable().isUnknown()); + + caps = MMAP_INDEX_WITH_NULLS.getColumnHolder("d2").getCapabilities(); + Assert.assertEquals(ValueType.STRING, caps.getType()); + Assert.assertTrue(caps.hasBitmapIndexes()); + Assert.assertTrue(caps.isDictionaryEncoded()); + Assert.assertTrue(caps.areDictionaryValuesSorted().isTrue()); + Assert.assertTrue(caps.areDictionaryValuesUnique().isTrue()); + Assert.assertTrue(caps.hasMultipleValues().isTrue()); + Assert.assertFalse(caps.hasSpatialIndexes()); + Assert.assertTrue(caps.isNullable().isTrue()); } @Test public void testComplexColumn() { - assertNonStringColumnCapabilities(INC_INDEX.getCapabilities("m4"), ValueType.COMPLEX); - assertNonStringColumnCapabilities(MMAP_INDEX.getColumnHolder("m4").getCapabilities(), ValueType.COMPLEX); + assertComplexColumnCapabilites(INC_INDEX.getCapabilities("m4")); + assertComplexColumnCapabilites(MMAP_INDEX.getColumnHolder("m4").getCapabilities()); + // results for this complex aren't different, we only know that nullability is unknown + assertComplexColumnCapabilites(INC_INDEX_WITH_NULLS.getCapabilities("m4")); + assertComplexColumnCapabilites(MMAP_INDEX_WITH_NULLS.getColumnHolder("m4").getCapabilities()); } + @Test + public void testComplexColumnWithNulls() + { + + } + + private void assertComplexColumnCapabilites(ColumnCapabilities caps) + { + Assert.assertEquals(ValueType.COMPLEX, caps.getType()); + Assert.assertFalse(caps.hasBitmapIndexes()); + Assert.assertFalse(caps.isDictionaryEncoded()); + Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue()); + Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue()); + Assert.assertFalse(caps.hasSpatialIndexes()); + Assert.assertFalse(caps.hasMultipleValues().isUnknown()); + // we don't know + Assert.assertTrue(caps.isNullable().isUnknown()); + } private void assertNonStringColumnCapabilities(ColumnCapabilities caps, ValueType valueType) { @@ -209,5 +368,19 @@ private void assertNonStringColumnCapabilities(ColumnCapabilities caps, ValueTyp Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue()); Assert.assertFalse(caps.hasMultipleValues().isMaybeTrue()); Assert.assertFalse(caps.hasSpatialIndexes()); + Assert.assertFalse(caps.isNullable().isTrue()); + } + + private void assertNonStringColumnCapabilitiesWithNulls(ColumnCapabilities caps, ValueType valueType) + { + Assert.assertEquals(valueType, caps.getType()); + Assert.assertFalse(caps.hasBitmapIndexes()); + Assert.assertFalse(caps.isDictionaryEncoded()); + Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue()); + Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue()); + Assert.assertFalse(caps.hasMultipleValues().isMaybeTrue()); + Assert.assertFalse(caps.hasSpatialIndexes()); + // check isMaybeTrue because incremental index uses Unknown + Assert.assertEquals(NullHandling.sqlCompatible(), caps.isNullable().isMaybeTrue()); } } diff --git a/processing/src/test/java/org/apache/druid/segment/column/ColumnCapabilitiesImplTest.java b/processing/src/test/java/org/apache/druid/segment/column/ColumnCapabilitiesImplTest.java index e221edd9c73f..2de2ab859146 100644 --- a/processing/src/test/java/org/apache/druid/segment/column/ColumnCapabilitiesImplTest.java +++ b/processing/src/test/java/org/apache/druid/segment/column/ColumnCapabilitiesImplTest.java @@ -37,6 +37,7 @@ public void testSerde() throws Exception .setHasMultipleValues(true) .setHasSpatialIndexes(true) .setType(ValueType.COMPLEX) + .setIsNullable(true) .setFilterable(true)); Assert.assertFalse(json.contains("filterable")); @@ -49,6 +50,8 @@ public void testSerde() throws Exception Assert.assertTrue(cc.hasSpatialIndexes()); Assert.assertTrue(cc.hasMultipleValues().isTrue()); Assert.assertTrue(cc.hasBitmapIndexes()); + // isNullable and isFilterable are computed, these should not be set + Assert.assertFalse(cc.isNullable().isTrue()); Assert.assertFalse(cc.isFilterable()); } @@ -62,6 +65,7 @@ public void testDeserialization() throws Exception + " \"hasSpatialIndexes\":true,\n" + " \"hasMultipleValues\":true,\n" + " \"hasBitmapIndexes\":true,\n" + + " \"isNullable\":true,\n" + " \"filterable\":true\n" + "}"; @@ -74,6 +78,8 @@ public void testDeserialization() throws Exception Assert.assertTrue(cc.hasSpatialIndexes()); Assert.assertTrue(cc.hasMultipleValues().isTrue()); Assert.assertTrue(cc.hasBitmapIndexes()); + // isNullable and isFilterable are computed, these should not be set + Assert.assertFalse(cc.isNullable().isTrue()); Assert.assertFalse(cc.isFilterable()); } } From 1126f1579f4d284778aa385b0e9379df34d581d3 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Thu, 30 Jul 2020 13:00:54 -0700 Subject: [PATCH 03/12] fix segment metadata queries in integration tests --- .../queries/twitterstream_queries.json | 85 ++++++++++--------- .../queries/wikipedia_editstream_queries.json | 42 ++++----- 2 files changed, 66 insertions(+), 61 deletions(-) diff --git a/integration-tests/src/test/resources/queries/twitterstream_queries.json b/integration-tests/src/test/resources/queries/twitterstream_queries.json index efb024d1d781..fa85e6513907 100644 --- a/integration-tests/src/test/resources/queries/twitterstream_queries.json +++ b/integration-tests/src/test/resources/queries/twitterstream_queries.json @@ -592,66 +592,69 @@ }, "expectedResults": [ { - "id": "twitterstream_2013-01-01T00:00:00.000Z_2013-01-02T00:00:00.000Z_2013-01-02T04:13:41.980Z_v9", - "intervals": ["2013-01-01T00:00:00.000Z/2013-01-02T00:00:00.000Z"], - "columns": { - "has_links": { - "type": "STRING", - "hasMultipleValues": false, - "size": 0, - "cardinality": 2, + "id":"twitterstream_2013-01-01T00:00:00.000Z_2013-01-02T00:00:00.000Z_2013-01-02T04:13:41.980Z_v9", + "intervals":["2013-01-01T00:00:00.000Z/2013-01-02T00:00:00.000Z"], + "columns":{ + "has_links":{ + "type":"STRING", + "hasMultipleValues":false, + "size":0, + "cardinality":2, "minValue":"No", "maxValue":"Yes", - "errorMessage": null + "errorMessage":null, + "nullable":false } }, - "size": 0, - "numRows": 3702583, - "aggregators": null, - "timestampSpec": null, - "queryGranularity": null, + "size":0, + "numRows":3702583, + "aggregators":null, + "timestampSpec":null, + "queryGranularity":null, "rollup":null }, { - "id": "twitterstream_2013-01-02T00:00:00.000Z_2013-01-03T00:00:00.000Z_2013-01-03T03:44:58.791Z_v9", - "intervals": ["2013-01-02T00:00:00.000Z/2013-01-03T00:00:00.000Z"], - "columns": { - "has_links": { - "type": "STRING", - "hasMultipleValues": false, - "size": 0, - "cardinality": 2, + "id":"twitterstream_2013-01-02T00:00:00.000Z_2013-01-03T00:00:00.000Z_2013-01-03T03:44:58.791Z_v9", + "intervals":["2013-01-02T00:00:00.000Z/2013-01-03T00:00:00.000Z"], + "columns":{ + "has_links":{ + "type":"STRING", + "hasMultipleValues":false, + "size":0, + "cardinality":2, "minValue":"No", "maxValue":"Yes", - "errorMessage": null + "errorMessage":null, + "nullable":false } }, - "size": 0, - "numRows": 3743002, - "aggregators": null, - "timestampSpec": null, - "queryGranularity": null, + "size":0, + "numRows":3743002, + "aggregators":null, + "timestampSpec":null, + "queryGranularity":null, "rollup":null }, { - "id": "twitterstream_2013-01-03T00:00:00.000Z_2013-01-04T00:00:00.000Z_2013-01-04T04:09:13.590Z_v9", - "intervals": ["2013-01-03T00:00:00.000Z/2013-01-04T00:00:00.000Z"], - "columns": { - "has_links": { - "type": "STRING", - "hasMultipleValues": false, - "size": 0, - "cardinality": 2, + "id":"twitterstream_2013-01-03T00:00:00.000Z_2013-01-04T00:00:00.000Z_2013-01-04T04:09:13.590Z_v9", + "intervals":["2013-01-03T00:00:00.000Z/2013-01-04T00:00:00.000Z"], + "columns":{ + "has_links":{ + "type":"STRING", + "hasMultipleValues":false, + "size":0, + "cardinality":2, "minValue":"No", "maxValue":"Yes", - "errorMessage": null + "errorMessage":null, + "nullable":false } }, - "size": 0, + "size":0, "numRows":3502959, - "aggregators": null, - "timestampSpec": null, - "queryGranularity": null, + "aggregators":null, + "timestampSpec":null, + "queryGranularity":null, "rollup":null } ] diff --git a/integration-tests/src/test/resources/queries/wikipedia_editstream_queries.json b/integration-tests/src/test/resources/queries/wikipedia_editstream_queries.json index 92dc582e2ec5..f18820729318 100644 --- a/integration-tests/src/test/resources/queries/wikipedia_editstream_queries.json +++ b/integration-tests/src/test/resources/queries/wikipedia_editstream_queries.json @@ -1398,33 +1398,35 @@ }, "expectedResults": [ { - "id": "wikipedia_editstream_2012-12-29T00:00:00.000Z_2013-01-10T08:00:00.000Z_2013-01-10T08:13:47.830Z_v9", - "intervals": ["2012-12-29T00:00:00.000Z/2013-01-10T08:00:00.000Z"], - "columns": { - "country_name": { - "type": "STRING", - "hasMultipleValues": false, - "size": 0, - "cardinality": 208, + "id":"wikipedia_editstream_2012-12-29T00:00:00.000Z_2013-01-10T08:00:00.000Z_2013-01-10T08:13:47.830Z_v9", + "intervals":["2012-12-29T00:00:00.000Z/2013-01-10T08:00:00.000Z"], + "columns":{ + "country_name":{ + "type":"STRING", + "hasMultipleValues":false, + "size":0, + "cardinality":208, "minValue":"", "maxValue":"mmx._unknown", - "errorMessage": null + "errorMessage":null, + "nullable":false }, - "language": { - "type": "STRING", - "hasMultipleValues": false, - "size": 0, - "cardinality": 36, + "language":{ + "type":"STRING", + "hasMultipleValues":false, + "size":0, + "cardinality":36, "minValue":"ar", "maxValue":"zh", - "errorMessage": null + "errorMessage":null, + "nullable":false } }, - "size": 0, - "numRows": 4462111, - "aggregators": null, - "timestampSpec": null, - "queryGranularity": null, + "size":0, + "numRows":4462111, + "aggregators":null, + "timestampSpec":null, + "queryGranularity":null, "rollup":null } ] From 0455c2f518d005991f2ad00ed1aed3552872662c Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Wed, 12 Aug 2020 04:42:54 -0700 Subject: [PATCH 04/12] adjustments --- .../druid/segment/DoubleDimensionIndexer.java | 2 +- .../druid/segment/FloatDimensionIndexer.java | 2 +- .../druid/segment/LongDimensionIndexer.java | 2 +- .../druid/segment/column/ColumnBuilder.java | 2 +- .../column/ColumnCapabilitiesImpl.java | 37 ++++++++++++++++++- .../QueryableIndexColumnCapabilitiesTest.java | 30 +++++++++------ 6 files changed, 57 insertions(+), 18 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/DoubleDimensionIndexer.java b/processing/src/main/java/org/apache/druid/segment/DoubleDimensionIndexer.java index 7de36161ab65..185ee491a5e2 100644 --- a/processing/src/main/java/org/apache/druid/segment/DoubleDimensionIndexer.java +++ b/processing/src/main/java/org/apache/druid/segment/DoubleDimensionIndexer.java @@ -132,7 +132,7 @@ class IndexerDoubleColumnSelector implements DoubleColumnSelector public boolean isNull() { final Object[] dims = currEntry.get().getDims(); - return dimIndex >= dims.length || dims[dimIndex] == null; + return hasNulls && (dimIndex >= dims.length || dims[dimIndex] == null); } @Override diff --git a/processing/src/main/java/org/apache/druid/segment/FloatDimensionIndexer.java b/processing/src/main/java/org/apache/druid/segment/FloatDimensionIndexer.java index 113d5e525a72..ee67332edeea 100644 --- a/processing/src/main/java/org/apache/druid/segment/FloatDimensionIndexer.java +++ b/processing/src/main/java/org/apache/druid/segment/FloatDimensionIndexer.java @@ -133,7 +133,7 @@ class IndexerFloatColumnSelector implements FloatColumnSelector public boolean isNull() { final Object[] dims = currEntry.get().getDims(); - return dimIndex >= dims.length || dims[dimIndex] == null; + return hasNulls && (dimIndex >= dims.length || dims[dimIndex] == null); } @Override diff --git a/processing/src/main/java/org/apache/druid/segment/LongDimensionIndexer.java b/processing/src/main/java/org/apache/druid/segment/LongDimensionIndexer.java index 00a2f21df2d9..35835b73a433 100644 --- a/processing/src/main/java/org/apache/druid/segment/LongDimensionIndexer.java +++ b/processing/src/main/java/org/apache/druid/segment/LongDimensionIndexer.java @@ -133,7 +133,7 @@ class IndexerLongColumnSelector implements LongColumnSelector public boolean isNull() { final Object[] dims = currEntry.get().getDims(); - return dimIndex >= dims.length || dims[dimIndex] == null; + return hasNulls && (dimIndex >= dims.length || dims[dimIndex] == null); } @Override diff --git a/processing/src/main/java/org/apache/druid/segment/column/ColumnBuilder.java b/processing/src/main/java/org/apache/druid/segment/column/ColumnBuilder.java index 40181d3a080d..f20d99f9a85a 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/ColumnBuilder.java +++ b/processing/src/main/java/org/apache/druid/segment/column/ColumnBuilder.java @@ -29,7 +29,7 @@ */ public class ColumnBuilder { - private final ColumnCapabilitiesImpl capabilitiesBuilder = ColumnCapabilitiesImpl.createDefault(false); + private final ColumnCapabilitiesImpl capabilitiesBuilder = ColumnCapabilitiesImpl.createDefault(); @Nullable private Supplier columnSupplier = null; diff --git a/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilitiesImpl.java b/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilitiesImpl.java index 645904dcdf89..f389adb02176 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilitiesImpl.java +++ b/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilitiesImpl.java @@ -33,6 +33,39 @@ */ public class ColumnCapabilitiesImpl implements ColumnCapabilities { + private static final CoercionLogic ALL_FALSE = new CoercionLogic() + { + @Override + public boolean dictionaryEncoded() + { + return false; + } + + @Override + public boolean dictionaryValuesSorted() + { + return false; + } + + @Override + public boolean dictionaryValuesUnique() + { + return false; + } + + @Override + public boolean multipleValues() + { + return false; + } + + @Override + public boolean hasNulls() + { + return false; + } + }; + public static ColumnCapabilitiesImpl copyOf(@Nullable final ColumnCapabilities other) { final ColumnCapabilitiesImpl capabilities = new ColumnCapabilitiesImpl(); @@ -112,9 +145,9 @@ public static ColumnCapabilitiesImpl merge( * Creates a {@link ColumnCapabilitiesImpl} where all {@link ColumnCapabilities.Capable} that default to unknown * instead are coerced to true or false */ - public static ColumnCapabilitiesImpl createDefault(boolean unknownIsTrue) + public static ColumnCapabilitiesImpl createDefault() { - return ColumnCapabilitiesImpl.snapshot(new ColumnCapabilitiesImpl(), unknownIsTrue); + return ColumnCapabilitiesImpl.snapshot(new ColumnCapabilitiesImpl(), ALL_FALSE); } /** diff --git a/processing/src/test/java/org/apache/druid/segment/QueryableIndexColumnCapabilitiesTest.java b/processing/src/test/java/org/apache/druid/segment/QueryableIndexColumnCapabilitiesTest.java index 98f8bc5ead27..a3005b2a6221 100644 --- a/processing/src/test/java/org/apache/druid/segment/QueryableIndexColumnCapabilitiesTest.java +++ b/processing/src/test/java/org/apache/druid/segment/QueryableIndexColumnCapabilitiesTest.java @@ -239,9 +239,10 @@ public void testStringColumn() // at index merge or query time we 'complete' the capabilities to take a snapshot of the current state, // coercing any 'UNKNOWN' values to false Assert.assertFalse( - ColumnCapabilitiesImpl.snapshot(caps, IndexMergerV9.DIMENSION_CAPABILITY_MERGE_LOGIC) - .hasMultipleValues() - .isMaybeTrue() + ColumnCapabilitiesImpl.snapshot( + caps, + IndexMergerV9.DIMENSION_CAPABILITY_MERGE_LOGIC + ).hasMultipleValues().isMaybeTrue() ); Assert.assertFalse(caps.hasSpatialIndexes()); Assert.assertTrue(caps.hasNulls().isUnknown()); @@ -264,21 +265,26 @@ public void testStringColumnWithNulls() ColumnCapabilities caps = INC_INDEX_WITH_NULLS.getCapabilities("d1"); Assert.assertEquals(ValueType.STRING, caps.getType()); Assert.assertTrue(caps.hasBitmapIndexes()); - Assert.assertTrue(caps.isDictionaryEncoded()); + Assert.assertTrue(caps.isDictionaryEncoded().isTrue()); Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue()); Assert.assertTrue(caps.areDictionaryValuesUnique().isTrue()); // multi-value is unknown unless explicitly set to 'true' Assert.assertTrue(caps.hasMultipleValues().isUnknown()); // at index merge or query time we 'complete' the capabilities to take a snapshot of the current state, // coercing any 'UNKNOWN' values to false - Assert.assertFalse(ColumnCapabilitiesImpl.snapshot(caps).hasMultipleValues().isMaybeTrue()); + Assert.assertFalse( + ColumnCapabilitiesImpl.snapshot( + caps, + IndexMergerV9.DIMENSION_CAPABILITY_MERGE_LOGIC + ).hasMultipleValues().isMaybeTrue() + ); Assert.assertFalse(caps.hasSpatialIndexes()); - Assert.assertTrue(caps.hasNulls().isUnknown()); + Assert.assertTrue(caps.hasNulls().isTrue()); caps = MMAP_INDEX_WITH_NULLS.getColumnHolder("d1").getCapabilities(); Assert.assertEquals(ValueType.STRING, caps.getType()); Assert.assertTrue(caps.hasBitmapIndexes()); - Assert.assertTrue(caps.isDictionaryEncoded()); + Assert.assertTrue(caps.isDictionaryEncoded().isTrue()); Assert.assertTrue(caps.areDictionaryValuesSorted().isTrue()); Assert.assertTrue(caps.areDictionaryValuesUnique().isTrue()); Assert.assertFalse(caps.hasMultipleValues().isMaybeTrue()); @@ -317,17 +323,17 @@ public void testMultiStringColumnWithNulls() ColumnCapabilities caps = INC_INDEX_WITH_NULLS.getCapabilities("d2"); Assert.assertEquals(ValueType.STRING, caps.getType()); Assert.assertTrue(caps.hasBitmapIndexes()); - Assert.assertTrue(caps.isDictionaryEncoded()); + Assert.assertTrue(caps.isDictionaryEncoded().isTrue()); Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue()); Assert.assertTrue(caps.areDictionaryValuesUnique().isTrue()); Assert.assertTrue(caps.hasMultipleValues().isTrue()); Assert.assertFalse(caps.hasSpatialIndexes()); - Assert.assertTrue(caps.hasNulls().isUnknown()); + Assert.assertTrue(caps.hasNulls().isTrue()); caps = MMAP_INDEX_WITH_NULLS.getColumnHolder("d2").getCapabilities(); Assert.assertEquals(ValueType.STRING, caps.getType()); Assert.assertTrue(caps.hasBitmapIndexes()); - Assert.assertTrue(caps.isDictionaryEncoded()); + Assert.assertTrue(caps.isDictionaryEncoded().isTrue()); Assert.assertTrue(caps.areDictionaryValuesSorted().isTrue()); Assert.assertTrue(caps.areDictionaryValuesUnique().isTrue()); Assert.assertTrue(caps.hasMultipleValues().isTrue()); @@ -355,7 +361,7 @@ private void assertComplexColumnCapabilites(ColumnCapabilities caps) { Assert.assertEquals(ValueType.COMPLEX, caps.getType()); Assert.assertFalse(caps.hasBitmapIndexes()); - Assert.assertFalse(caps.isDictionaryEncoded()); + Assert.assertFalse(caps.isDictionaryEncoded().isTrue()); Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue()); Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue()); Assert.assertFalse(caps.hasSpatialIndexes()); @@ -380,7 +386,7 @@ private void assertNonStringColumnCapabilitiesWithNulls(ColumnCapabilities caps, { Assert.assertEquals(valueType, caps.getType()); Assert.assertFalse(caps.hasBitmapIndexes()); - Assert.assertFalse(caps.isDictionaryEncoded()); + Assert.assertFalse(caps.isDictionaryEncoded().isTrue()); Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue()); Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue()); Assert.assertFalse(caps.hasMultipleValues().isMaybeTrue()); From 5d8908542fd501ba9b5467563de974e2ff5309ec Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Wed, 12 Aug 2020 05:09:23 -0700 Subject: [PATCH 05/12] cleanup --- .../segment/QueryableIndexColumnCapabilitiesTest.java | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/processing/src/test/java/org/apache/druid/segment/QueryableIndexColumnCapabilitiesTest.java b/processing/src/test/java/org/apache/druid/segment/QueryableIndexColumnCapabilitiesTest.java index a3005b2a6221..12da540d561f 100644 --- a/processing/src/test/java/org/apache/druid/segment/QueryableIndexColumnCapabilitiesTest.java +++ b/processing/src/test/java/org/apache/druid/segment/QueryableIndexColumnCapabilitiesTest.java @@ -124,11 +124,13 @@ public static void setup() throws IOException Map eventWithNulls = new HashMap<>(); eventWithNulls.put("time", DateTimes.nowUtc().getMillis()); eventWithNulls.put("d1", null); - eventWithNulls.put("d2", null); + eventWithNulls.put("d2", ImmutableList.of()); eventWithNulls.put("d3", null); eventWithNulls.put("d4", null); eventWithNulls.put("d5", null); + rowsWithNulls.add(Iterables.getOnlyElement(parser.parseBatch(eventWithNulls))); + IndexBuilder builderWithNulls = IndexBuilder.create() .rows(rowsWithNulls) .schema( @@ -351,12 +353,6 @@ public void testComplexColumn() assertComplexColumnCapabilites(MMAP_INDEX_WITH_NULLS.getColumnHolder("m4").getCapabilities()); } - @Test - public void testComplexColumnWithNulls() - { - - } - private void assertComplexColumnCapabilites(ColumnCapabilities caps) { Assert.assertEquals(ValueType.COMPLEX, caps.getType()); From 9087a738c5b1306ae7fe942e1ce80f04fd9a1046 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Wed, 12 Aug 2020 13:02:52 -0700 Subject: [PATCH 06/12] fix spotbugs --- .../org/apache/druid/segment/DimensionIndexer.java | 1 + .../druid/segment/DoubleDimensionIndexer.java | 1 + .../druid/segment/FloatDimensionIndexer.java | 1 + .../apache/druid/segment/LongDimensionIndexer.java | 2 ++ .../segment/incremental/IncrementalIndex.java | 14 ++++++++------ 5 files changed, 13 insertions(+), 6 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/DimensionIndexer.java b/processing/src/main/java/org/apache/druid/segment/DimensionIndexer.java index 277deb94e9bb..00ae11da956f 100644 --- a/processing/src/main/java/org/apache/druid/segment/DimensionIndexer.java +++ b/processing/src/main/java/org/apache/druid/segment/DimensionIndexer.java @@ -126,6 +126,7 @@ public interface DimensionIndexer * @param reportParseExceptions * @return An array containing an encoded representation of the input row value. */ + @Nullable EncodedKeyComponentType processRowValsToUnsortedEncodedKeyComponent(@Nullable Object dimValues, boolean reportParseExceptions); /** diff --git a/processing/src/main/java/org/apache/druid/segment/DoubleDimensionIndexer.java b/processing/src/main/java/org/apache/druid/segment/DoubleDimensionIndexer.java index 185ee491a5e2..beb58df6208b 100644 --- a/processing/src/main/java/org/apache/druid/segment/DoubleDimensionIndexer.java +++ b/processing/src/main/java/org/apache/druid/segment/DoubleDimensionIndexer.java @@ -43,6 +43,7 @@ public class DoubleDimensionIndexer implements DimensionIndexer private volatile boolean hasNulls = false; + + @Nullable @Override public Long processRowValsToUnsortedEncodedKeyComponent(@Nullable Object dimValues, boolean reportParseExceptions) { diff --git a/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndex.java b/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndex.java index 44d4b3d66c76..29c62991263e 100644 --- a/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndex.java +++ b/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndex.java @@ -104,8 +104,6 @@ public abstract class IncrementalIndex extends AbstractIndex implements Iterable, Closeable { - private volatile DateTime maxIngestedEventTime; - // Used to discover ValueType based on the class of values in a row // Also used to convert between the duplicate ValueType enums in DimensionSchema (druid-api) and main druid. public static final Map TYPE_MAP = ImmutableMap.builder() @@ -259,6 +257,9 @@ public ColumnCapabilities getColumnCapabilities(String columnName) private final ThreadLocal in = new ThreadLocal<>(); private final Supplier rowSupplier = in::get; + private volatile DateTime maxIngestedEventTime; + + /** * Setting deserializeComplexMetrics to false is necessary for intermediate aggregation such as groupBy that * should not deserialize input columns using ComplexMetricSerde for aggregators that return complex metrics. @@ -474,10 +475,6 @@ public IncrementalIndex buildOffheap(final NonBlockingPool bufferPoo } } - public boolean isRollup() - { - return rollup; - } public abstract FactsHolder getFacts(); @@ -572,6 +569,11 @@ public List getParseExceptionMessages() } } + public boolean isRollup() + { + return rollup; + } + @Override public void close() { From 82ff3e452eae3ae1e77ce854e22f563d874bb055 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Wed, 12 Aug 2020 14:04:31 -0700 Subject: [PATCH 07/12] treat unknown as true in segmentmetadata --- .../org/apache/druid/query/metadata/SegmentAnalyzer.java | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java b/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java index 34fde0ccf9b9..a84b95b0f8ab 100644 --- a/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java +++ b/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java @@ -193,7 +193,7 @@ private ColumnAnalysis analyzeNumericColumn( return new ColumnAnalysis( capabilities.getType().name(), capabilities.hasMultipleValues().isTrue(), - capabilities.hasNulls().isTrue(), + capabilities.hasNulls().isMaybeTrue(), // if we don't know for sure, then we should plan to check for nulls size, null, null, @@ -244,7 +244,7 @@ private ColumnAnalysis analyzeStringColumn( return new ColumnAnalysis( capabilities.getType().name(), capabilities.hasMultipleValues().isTrue(), - capabilities.hasNulls().isTrue(), + capabilities.hasNulls().isMaybeTrue(), // if we don't know for sure, then we should plan to check for nulls size, analyzingCardinality() ? cardinality : 0, min, @@ -322,7 +322,7 @@ public Long accumulate(Long accumulated, Cursor cursor) return new ColumnAnalysis( capabilities.getType().name(), capabilities.hasMultipleValues().isTrue(), - capabilities.hasNulls().isTrue(), + capabilities.hasNulls().isMaybeTrue(), // if we don't know for sure, then we should plan to check for nulls size, cardinality, min, @@ -339,7 +339,8 @@ private ColumnAnalysis analyzeComplexColumn( { try (final ComplexColumn complexColumn = columnHolder != null ? (ComplexColumn) columnHolder.getColumn() : null) { final boolean hasMultipleValues = capabilities != null && capabilities.hasMultipleValues().isTrue(); - final boolean nullable = capabilities != null && capabilities.hasNulls().isTrue(); + // if we don't know for sure, then we should plan to check for nulls + final boolean nullable = capabilities != null && capabilities.hasNulls().isMaybeTrue(); long size = 0; if (analyzingSize() && complexColumn != null) { From bdd6b07d2c635fbe52f9ce044ad508fba96e0a55 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Wed, 12 Aug 2020 16:51:43 -0700 Subject: [PATCH 08/12] rename to hasNulls, add docs --- docs/querying/segmentmetadataquery.md | 29 ++++++++++--------- .../druid/query/metadata/SegmentAnalyzer.java | 7 ++--- .../metadata/metadata/ColumnAnalysis.java | 18 ++++++------ 3 files changed, 28 insertions(+), 26 deletions(-) diff --git a/docs/querying/segmentmetadataquery.md b/docs/querying/segmentmetadataquery.md index 6f59f1f6090f..424ace5bdce2 100644 --- a/docs/querying/segmentmetadataquery.md +++ b/docs/querying/segmentmetadataquery.md @@ -30,15 +30,18 @@ sidebar_label: "SegmentMetadata" Segment metadata queries return per-segment information about: -* Cardinality of all columns in the segment -* Min/max values of string type columns in the segment -* Estimated byte size for the segment columns if they were stored in a flat format * Number of rows stored inside the segment * Interval the segment covers -* Column type of all the columns in the segment -* Estimated total segment byte size in if it was stored in a flat format -* Is the segment rolled up +* Estimated total segment byte size in if it was stored in a 'flat format' (e.g. a csv file) * Segment id +* Is the segment rolled up +* Detailed per column information such as: + - type + - cardinality + - min/max values + - presence of null values + - estimated 'flat format' byte size + ```json { @@ -68,10 +71,10 @@ The format of the result is: "id" : "some_id", "intervals" : [ "2013-05-13T00:00:00.000Z/2013-05-14T00:00:00.000Z" ], "columns" : { - "__time" : { "type" : "LONG", "hasMultipleValues" : false, "size" : 407240380, "cardinality" : null, "errorMessage" : null }, - "dim1" : { "type" : "STRING", "hasMultipleValues" : false, "size" : 100000, "cardinality" : 1944, "errorMessage" : null }, - "dim2" : { "type" : "STRING", "hasMultipleValues" : true, "size" : 100000, "cardinality" : 1504, "errorMessage" : null }, - "metric1" : { "type" : "FLOAT", "hasMultipleValues" : false, "size" : 100000, "cardinality" : null, "errorMessage" : null } + "__time" : { "type" : "LONG", "hasMultipleValues" : false, "hasNulls": false, "size" : 407240380, "cardinality" : null, "errorMessage" : null }, + "dim1" : { "type" : "STRING", "hasMultipleValues" : false, "hasNulls": false, "size" : 100000, "cardinality" : 1944, "errorMessage" : null }, + "dim2" : { "type" : "STRING", "hasMultipleValues" : true, "hasNulls": true, "size" : 100000, "cardinality" : 1504, "errorMessage" : null }, + "metric1" : { "type" : "FLOAT", "hasMultipleValues" : false, "hasNulls": false, "size" : 100000, "cardinality" : null, "errorMessage" : null } }, "aggregators" : { "metric1" : { "type" : "longSum", "name" : "metric1", "fieldName" : "metric1" } @@ -84,14 +87,14 @@ The format of the result is: } ] ``` -Dimension columns will have type `STRING`. -Metric columns will have type `FLOAT` or `LONG` or name of the underlying complex type such as `hyperUnique` in case of COMPLEX metric. +Dimension columns will have type `STRING`, `FLOAT`, `DOUBLE`, or `LONG`. +Metric columns will have type `FLOAT`, `DOUBLE`, or `LONG`, or the name of the underlying complex type such as `hyperUnique` in case of COMPLEX metric. Timestamp column will have type `LONG`. If the `errorMessage` field is non-null, you should not trust the other fields in the response. Their contents are undefined. -Only columns which are dimensions (i.e., have type `STRING`) will have any cardinality. Rest of the columns (timestamp and metric columns) will show cardinality as `null`. +Only columns which are dictionary encoded (i.e., have type `STRING`) will have any cardinality. Rest of the columns (timestamp and metric columns) will show cardinality as `null`. ## intervals diff --git a/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java b/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java index a84b95b0f8ab..2388004d629a 100644 --- a/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java +++ b/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java @@ -339,8 +339,7 @@ private ColumnAnalysis analyzeComplexColumn( { try (final ComplexColumn complexColumn = columnHolder != null ? (ComplexColumn) columnHolder.getColumn() : null) { final boolean hasMultipleValues = capabilities != null && capabilities.hasMultipleValues().isTrue(); - // if we don't know for sure, then we should plan to check for nulls - final boolean nullable = capabilities != null && capabilities.hasNulls().isMaybeTrue(); + final boolean hasNulls = capabilities != null && capabilities.hasNulls().isMaybeTrue(); long size = 0; if (analyzingSize() && complexColumn != null) { @@ -351,7 +350,7 @@ private ColumnAnalysis analyzeComplexColumn( final Function inputSizeFn = serde.inputSizeFn(); if (inputSizeFn == null) { - return new ColumnAnalysis(typeName, hasMultipleValues, nullable, 0, null, null, null, null); + return new ColumnAnalysis(typeName, hasMultipleValues, hasNulls, 0, null, null, null, null); } final int length = complexColumn.getLength(); @@ -363,7 +362,7 @@ private ColumnAnalysis analyzeComplexColumn( return new ColumnAnalysis( typeName, hasMultipleValues, - nullable, + hasNulls, size, null, null, diff --git a/processing/src/main/java/org/apache/druid/query/metadata/metadata/ColumnAnalysis.java b/processing/src/main/java/org/apache/druid/query/metadata/metadata/ColumnAnalysis.java index bcc0d7074568..4edb4b7517dc 100644 --- a/processing/src/main/java/org/apache/druid/query/metadata/metadata/ColumnAnalysis.java +++ b/processing/src/main/java/org/apache/druid/query/metadata/metadata/ColumnAnalysis.java @@ -38,7 +38,7 @@ public static ColumnAnalysis error(String reason) private final String type; private final boolean hasMultipleValues; - private final boolean nullable; + private final boolean hasNulls; private final long size; private final Integer cardinality; private final Comparable minValue; @@ -49,7 +49,7 @@ public static ColumnAnalysis error(String reason) public ColumnAnalysis( @JsonProperty("type") String type, @JsonProperty("hasMultipleValues") boolean hasMultipleValues, - @JsonProperty("isNullable") boolean isNullable, + @JsonProperty("hasNulls") boolean hasNulls, @JsonProperty("size") long size, @JsonProperty("cardinality") Integer cardinality, @JsonProperty("minValue") Comparable minValue, @@ -59,7 +59,7 @@ public ColumnAnalysis( { this.type = type; this.hasMultipleValues = hasMultipleValues; - this.nullable = isNullable; + this.hasNulls = hasNulls; this.size = size; this.cardinality = cardinality; this.minValue = minValue; @@ -112,9 +112,9 @@ public String getErrorMessage() } @JsonProperty - public boolean isNullable() + public boolean isHasNulls() { - return nullable; + return hasNulls; } public boolean isError() { @@ -155,7 +155,7 @@ public ColumnAnalysis fold(ColumnAnalysis rhs) return new ColumnAnalysis( type, multipleValues, - nullable || rhs.nullable, + hasNulls || rhs.hasNulls, size + rhs.getSize(), cardinality, newMin, @@ -182,7 +182,7 @@ public String toString() return "ColumnAnalysis{" + "type='" + type + '\'' + ", hasMultipleValues=" + hasMultipleValues + - ", isNullable=" + nullable + + ", hasNulls=" + hasNulls + ", size=" + size + ", cardinality=" + cardinality + ", minValue=" + minValue + @@ -202,7 +202,7 @@ public boolean equals(Object o) } ColumnAnalysis that = (ColumnAnalysis) o; return hasMultipleValues == that.hasMultipleValues && - nullable == that.nullable && + hasNulls == that.hasNulls && size == that.size && Objects.equals(type, that.type) && Objects.equals(cardinality, that.cardinality) && @@ -214,6 +214,6 @@ public boolean equals(Object o) @Override public int hashCode() { - return Objects.hash(type, hasMultipleValues, nullable, size, cardinality, minValue, maxValue, errorMessage); + return Objects.hash(type, hasMultipleValues, hasNulls, size, cardinality, minValue, maxValue, errorMessage); } } From 09d452b82efed4cde8cc62957c72bedf439d61fc Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Wed, 12 Aug 2020 20:40:16 -0700 Subject: [PATCH 09/12] fixup --- .../queries/twitterstream_queries.json | 6 ++--- .../queries/wikipedia_editstream_queries.json | 4 ++-- .../segment/incremental/IncrementalIndex.java | 23 +++++++++++-------- .../metadata/SegmentMetadataQueryTest.java | 2 +- 4 files changed, 19 insertions(+), 16 deletions(-) diff --git a/integration-tests/src/test/resources/queries/twitterstream_queries.json b/integration-tests/src/test/resources/queries/twitterstream_queries.json index fa85e6513907..a0796fd4f9b8 100644 --- a/integration-tests/src/test/resources/queries/twitterstream_queries.json +++ b/integration-tests/src/test/resources/queries/twitterstream_queries.json @@ -603,7 +603,7 @@ "minValue":"No", "maxValue":"Yes", "errorMessage":null, - "nullable":false + "hasNulls":false } }, "size":0, @@ -625,7 +625,7 @@ "minValue":"No", "maxValue":"Yes", "errorMessage":null, - "nullable":false + "hasNulls":false } }, "size":0, @@ -647,7 +647,7 @@ "minValue":"No", "maxValue":"Yes", "errorMessage":null, - "nullable":false + "hasNulls":false } }, "size":0, diff --git a/integration-tests/src/test/resources/queries/wikipedia_editstream_queries.json b/integration-tests/src/test/resources/queries/wikipedia_editstream_queries.json index f18820729318..43c25e1f4edd 100644 --- a/integration-tests/src/test/resources/queries/wikipedia_editstream_queries.json +++ b/integration-tests/src/test/resources/queries/wikipedia_editstream_queries.json @@ -1409,7 +1409,7 @@ "minValue":"", "maxValue":"mmx._unknown", "errorMessage":null, - "nullable":false + "hasNulls":false }, "language":{ "type":"STRING", @@ -1419,7 +1419,7 @@ "minValue":"ar", "maxValue":"zh", "errorMessage":null, - "nullable":false + "hasNulls":false } }, "size":0, diff --git a/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndex.java b/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndex.java index 29c62991263e..baffbf40abeb 100644 --- a/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndex.java +++ b/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndex.java @@ -928,15 +928,18 @@ public List getDimensionOrder() private ColumnCapabilitiesImpl makeDefaultCapabilitiesFromValueType(ValueType type) { - if (type == ValueType.STRING) { - // we start out as not having multiple values, but this might change as we encounter them - return new ColumnCapabilitiesImpl().setType(type) - .setHasBitmapIndexes(true) - .setDictionaryEncoded(true) - .setDictionaryValuesUnique(true) - .setDictionaryValuesSorted(false); - } else { - return ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(type); + switch (type) { + case STRING: + // we start out as not having multiple values, but this might change as we encounter them + return new ColumnCapabilitiesImpl().setType(type) + .setHasBitmapIndexes(true) + .setDictionaryEncoded(true) + .setDictionaryValuesUnique(true) + .setDictionaryValuesSorted(false); + case COMPLEX: + return ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(type).setHasNulls(true); + default: + return ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(type); } } @@ -1145,7 +1148,7 @@ public MetricDesc(int index, AggregatorFactory factory) } else { // in an ideal world complex type reports its actual column capabilities... capabilities = ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.COMPLEX) - .setHasNulls(ColumnCapabilities.Capable.UNKNOWN); + .setHasNulls(ColumnCapabilities.Capable.TRUE); this.type = ComplexMetrics.getSerdeForType(typeInfo).getTypeName(); } } diff --git a/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataQueryTest.java b/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataQueryTest.java index 062639be2b27..00846ebb5db5 100644 --- a/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataQueryTest.java +++ b/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataQueryTest.java @@ -462,7 +462,7 @@ public void testSegmentMetadataQueryWithComplexColumnMerge() new ColumnAnalysis( "hyperUnique", false, - false, + true, 0, null, null, From 336dbbb630563f9081e548a45e7d979935f53451 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Wed, 12 Aug 2020 22:32:38 -0700 Subject: [PATCH 10/12] test the dim indexer selector isNull fix for numeric columns --- .../druid/segment/filter/BaseFilterTest.java | 55 +++++++++++++------ .../druid/segment/filter/BoundFilterTest.java | 8 ++- .../segment/filter/JavaScriptFilterTest.java | 2 +- .../segment/filter/SelectorFilterTest.java | 2 +- 4 files changed, 45 insertions(+), 22 deletions(-) diff --git a/processing/src/test/java/org/apache/druid/segment/filter/BaseFilterTest.java b/processing/src/test/java/org/apache/druid/segment/filter/BaseFilterTest.java index b5472fd870ff..86233773ee39 100644 --- a/processing/src/test/java/org/apache/druid/segment/filter/BaseFilterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/filter/BaseFilterTest.java @@ -136,6 +136,18 @@ public abstract class BaseFilterTest extends InitializedNullHandlingTest ) ); + // missing 'dim3' because makeDefaultSchemaRow does not expect to set it... + static final RowSignature DEFAULT_ROW_SIGNATURE = + RowSignature.builder() + .add("dim0", ValueType.STRING) + .add("dim1", ValueType.STRING) + .add("dim2", ValueType.STRING) + .add("timeDim", ValueType.STRING) + .add("d0", ValueType.DOUBLE) + .add("f0", ValueType.FLOAT) + .add("l0", ValueType.LONG) + .build(); + static final List DEFAULT_ROWS = ImmutableList.of( makeDefaultSchemaRow("0", "", ImmutableList.of("a", "b"), "2017-07-25", 0.0, 0.0f, 0L), makeDefaultSchemaRow("1", "10", ImmutableList.of(), "2017-07-25", 10.1, 10.1f, 100L), @@ -151,25 +163,27 @@ public abstract class BaseFilterTest extends InitializedNullHandlingTest .build(); static InputRow makeDefaultSchemaRow( - @Nullable String dim0, - @Nullable String dim1, - @Nullable List dim2, - @Nullable String timeDim, - @Nullable Double d0, - @Nullable Float f0, - @Nullable Long l0 + @Nullable Object ... elements + ) + { + return makeSchemaRow(DEFAULT_PARSER, DEFAULT_ROW_SIGNATURE, elements); + } + + + static InputRow makeSchemaRow( + final InputRowParser> parser, + final RowSignature signature, + @Nullable Object ... elements ) { - // for row selector to work correctly as part of the test matrix, default value coercion needs to happen to columns - Map mapRow = Maps.newHashMapWithExpectedSize(6); - mapRow.put("dim0", NullHandling.nullToEmptyIfNeeded(dim0)); - mapRow.put("dim1", NullHandling.nullToEmptyIfNeeded(dim1)); - mapRow.put("dim2", dim2 != null ? dim2 : NullHandling.defaultStringValue()); - mapRow.put("timeDim", NullHandling.nullToEmptyIfNeeded(timeDim)); - mapRow.put("d0", d0 != null ? d0 : NullHandling.defaultDoubleValue()); - mapRow.put("f0", f0 != null ? f0 : NullHandling.defaultFloatValue()); - mapRow.put("l0", l0 != null ? l0 : NullHandling.defaultLongValue()); - return DEFAULT_PARSER.parseBatch(mapRow).get(0); + Preconditions.checkArgument(signature.size() == elements.length); + Map mapRow = Maps.newHashMapWithExpectedSize(signature.size()); + for (int i = 0; i < signature.size(); i++) { + final String columnName = signature.getColumnName(i); + final Object value = elements[i]; + mapRow.put(columnName, value); + } + return parser.parseBatch(mapRow).get(0); } @@ -184,6 +198,11 @@ static InputRow makeDefaultSchemaRow( protected final boolean optimize; protected final String testName; + // 'rowBasedWithoutTypeSignature' does not handle numeric null default values correctly, is equivalent to + // druid.generic.useDefaultValueForNull being set to false, regardless of how it is actually set. + // In other words, numeric null values will be treated as nulls instead of the default value + protected final boolean canTestNumericNullsAsDefaultValues; + protected StorageAdapter adapter; // JUnit creates a new test instance for every test method call. @@ -208,6 +227,8 @@ public BaseFilterTest( this.finisher = finisher; this.cnf = cnf; this.optimize = optimize; + this.canTestNumericNullsAsDefaultValues = + NullHandling.replaceWithDefault() && !testName.contains("finisher[rowBasedWithoutTypeSignature]"); } @Before diff --git a/processing/src/test/java/org/apache/druid/segment/filter/BoundFilterTest.java b/processing/src/test/java/org/apache/druid/segment/filter/BoundFilterTest.java index 12e79cfe8794..8263538e0386 100644 --- a/processing/src/test/java/org/apache/druid/segment/filter/BoundFilterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/filter/BoundFilterTest.java @@ -656,7 +656,7 @@ public void testNumericNullsAndZeros() null, StringComparators.NUMERIC ), - NullHandling.replaceWithDefault() ? ImmutableList.of("0", "2", "7") : ImmutableList.of("0") + canTestNumericNullsAsDefaultValues ? ImmutableList.of("0", "2", "7") : ImmutableList.of("0") ); assertFilterMatches( @@ -670,7 +670,7 @@ public void testNumericNullsAndZeros() null, StringComparators.NUMERIC ), - NullHandling.replaceWithDefault() ? ImmutableList.of("0", "4", "6") : ImmutableList.of("0") + canTestNumericNullsAsDefaultValues ? ImmutableList.of("0", "4", "6") : ImmutableList.of("0") ); assertFilterMatches( @@ -684,7 +684,9 @@ public void testNumericNullsAndZeros() null, StringComparators.NUMERIC ), - NullHandling.replaceWithDefault() ? ImmutableList.of("0", "3", "7") : ImmutableList.of("0") + NullHandling.replaceWithDefault() && canTestNumericNullsAsDefaultValues + ? ImmutableList.of("0", "3", "7") + : ImmutableList.of("0") ); } diff --git a/processing/src/test/java/org/apache/druid/segment/filter/JavaScriptFilterTest.java b/processing/src/test/java/org/apache/druid/segment/filter/JavaScriptFilterTest.java index dba2b6bb0751..40ccaa64dbb4 100644 --- a/processing/src/test/java/org/apache/druid/segment/filter/JavaScriptFilterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/filter/JavaScriptFilterTest.java @@ -215,7 +215,7 @@ public void testJavascriptFilterWithLookupExtractionFn() @Test public void testNumericNull() { - if (NullHandling.replaceWithDefault()) { + if (canTestNumericNullsAsDefaultValues) { assertFilterMatchesSkipVectorize(newJavaScriptDimFilter("f0", jsNullFilter, null), ImmutableList.of()); assertFilterMatchesSkipVectorize(newJavaScriptDimFilter("d0", jsNullFilter, null), ImmutableList.of()); assertFilterMatchesSkipVectorize(newJavaScriptDimFilter("l0", jsNullFilter, null), ImmutableList.of()); diff --git a/processing/src/test/java/org/apache/druid/segment/filter/SelectorFilterTest.java b/processing/src/test/java/org/apache/druid/segment/filter/SelectorFilterTest.java index 2b343fc79174..aa104cedd791 100644 --- a/processing/src/test/java/org/apache/druid/segment/filter/SelectorFilterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/filter/SelectorFilterTest.java @@ -312,7 +312,7 @@ public void testSelectorWithLookupExtractionFn() @Test public void testNumericColumnNullsAndDefaults() { - if (NullHandling.replaceWithDefault()) { + if (canTestNumericNullsAsDefaultValues) { assertFilterMatches(new SelectorDimFilter("f0", "0", null), ImmutableList.of("0", "4")); assertFilterMatches(new SelectorDimFilter("d0", "0", null), ImmutableList.of("0", "2")); assertFilterMatches(new SelectorDimFilter("l0", "0", null), ImmutableList.of("0", "3")); From b8c1145feb382ee50c1c7e0db84a16beb0093629 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Thu, 13 Aug 2020 03:07:14 -0700 Subject: [PATCH 11/12] fixes --- integration-tests/docker/druid.sh | 2 +- .../resources/queries/wikipedia_editstream_queries.json | 2 +- .../druid/segment/QueryableIndexColumnCapabilitiesTest.java | 3 +-- .../org/apache/druid/segment/filter/BaseFilterTest.java | 6 +++--- 4 files changed, 6 insertions(+), 7 deletions(-) diff --git a/integration-tests/docker/druid.sh b/integration-tests/docker/druid.sh index 489610268137..f3269ee79abc 100755 --- a/integration-tests/docker/druid.sh +++ b/integration-tests/docker/druid.sh @@ -90,7 +90,7 @@ setupData() # below s3 credentials needed to access the pre-existing s3 bucket setKey $DRUID_SERVICE druid.s3.accessKey AKIAJI7DG7CDECGBQ6NA setKey $DRUID_SERVICE druid.s3.secretKey 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 - if [[ "$DRUID_INTEGRATION_TEST_GROUP" = "query-retry" ]]; then + if [ "$DRUID_INTEGRATION_TEST_GROUP" = "query-retry" ]; then setKey $DRUID_SERVICE druid.extensions.loadList [\"druid-s3-extensions\",\"druid-integration-tests\"] else setKey $DRUID_SERVICE druid.extensions.loadList [\"druid-s3-extensions\"] diff --git a/integration-tests/src/test/resources/queries/wikipedia_editstream_queries.json b/integration-tests/src/test/resources/queries/wikipedia_editstream_queries.json index 43c25e1f4edd..d28fe8c445f7 100644 --- a/integration-tests/src/test/resources/queries/wikipedia_editstream_queries.json +++ b/integration-tests/src/test/resources/queries/wikipedia_editstream_queries.json @@ -1409,7 +1409,7 @@ "minValue":"", "maxValue":"mmx._unknown", "errorMessage":null, - "hasNulls":false + "hasNulls":true }, "language":{ "type":"STRING", diff --git a/processing/src/test/java/org/apache/druid/segment/QueryableIndexColumnCapabilitiesTest.java b/processing/src/test/java/org/apache/druid/segment/QueryableIndexColumnCapabilitiesTest.java index 12da540d561f..cdfc1b149781 100644 --- a/processing/src/test/java/org/apache/druid/segment/QueryableIndexColumnCapabilitiesTest.java +++ b/processing/src/test/java/org/apache/druid/segment/QueryableIndexColumnCapabilitiesTest.java @@ -362,8 +362,7 @@ private void assertComplexColumnCapabilites(ColumnCapabilities caps) Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue()); Assert.assertFalse(caps.hasSpatialIndexes()); Assert.assertFalse(caps.hasMultipleValues().isUnknown()); - // we don't know - Assert.assertTrue(caps.hasNulls().isUnknown()); + Assert.assertTrue(caps.hasNulls().isTrue()); } private void assertNonStringColumnCapabilities(ColumnCapabilities caps, ValueType valueType) diff --git a/processing/src/test/java/org/apache/druid/segment/filter/BaseFilterTest.java b/processing/src/test/java/org/apache/druid/segment/filter/BaseFilterTest.java index 86233773ee39..931a794dbdae 100644 --- a/processing/src/test/java/org/apache/druid/segment/filter/BaseFilterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/filter/BaseFilterTest.java @@ -163,7 +163,7 @@ public abstract class BaseFilterTest extends InitializedNullHandlingTest .build(); static InputRow makeDefaultSchemaRow( - @Nullable Object ... elements + @Nullable Object... elements ) { return makeSchemaRow(DEFAULT_PARSER, DEFAULT_ROW_SIGNATURE, elements); @@ -173,7 +173,7 @@ static InputRow makeDefaultSchemaRow( static InputRow makeSchemaRow( final InputRowParser> parser, final RowSignature signature, - @Nullable Object ... elements + @Nullable Object... elements ) { Preconditions.checkArgument(signature.size() == elements.length); @@ -181,7 +181,7 @@ static InputRow makeSchemaRow( for (int i = 0; i < signature.size(); i++) { final String columnName = signature.getColumnName(i); final Object value = elements[i]; - mapRow.put(columnName, value); + mapRow.put(columnName, value); } return parser.parseBatch(mapRow).get(0); } From 2a21abb884ef9be7453840206752fcde5ec80a8e Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Thu, 13 Aug 2020 04:44:04 -0700 Subject: [PATCH 12/12] oof --- .../apache/druid/segment/serde/ComplexColumnPartSerde.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/serde/ComplexColumnPartSerde.java b/processing/src/main/java/org/apache/druid/segment/serde/ComplexColumnPartSerde.java index 7024c37dcff1..5b0341340496 100644 --- a/processing/src/main/java/org/apache/druid/segment/serde/ComplexColumnPartSerde.java +++ b/processing/src/main/java/org/apache/druid/segment/serde/ComplexColumnPartSerde.java @@ -82,8 +82,8 @@ public Deserializer getDeserializer() // we don't currently know if complex column can have nulls (or can be multi-valued, but not making that change // since it isn't supported anywhere in the query engines) // longer term this needs to be captured by making the serde provide this information, and then this should - // no longer be set to unknown but rather the actual values - builder.setHasNulls(ColumnCapabilities.Capable.UNKNOWN); + // no longer be set to true but rather the actual values + builder.setHasNulls(ColumnCapabilities.Capable.TRUE); serde.deserializeColumn(buffer, builder); } };