diff --git a/docs/querying/segmentmetadataquery.md b/docs/querying/segmentmetadataquery.md index 6f59f1f6090f..424ace5bdce2 100644 --- a/docs/querying/segmentmetadataquery.md +++ b/docs/querying/segmentmetadataquery.md @@ -30,15 +30,18 @@ sidebar_label: "SegmentMetadata" Segment metadata queries return per-segment information about: -* Cardinality of all columns in the segment -* Min/max values of string type columns in the segment -* Estimated byte size for the segment columns if they were stored in a flat format * Number of rows stored inside the segment * Interval the segment covers -* Column type of all the columns in the segment -* Estimated total segment byte size in if it was stored in a flat format -* Is the segment rolled up +* Estimated total segment byte size in if it was stored in a 'flat format' (e.g. a csv file) * Segment id +* Is the segment rolled up +* Detailed per column information such as: + - type + - cardinality + - min/max values + - presence of null values + - estimated 'flat format' byte size + ```json { @@ -68,10 +71,10 @@ The format of the result is: "id" : "some_id", "intervals" : [ "2013-05-13T00:00:00.000Z/2013-05-14T00:00:00.000Z" ], "columns" : { - "__time" : { "type" : "LONG", "hasMultipleValues" : false, "size" : 407240380, "cardinality" : null, "errorMessage" : null }, - "dim1" : { "type" : "STRING", "hasMultipleValues" : false, "size" : 100000, "cardinality" : 1944, "errorMessage" : null }, - "dim2" : { "type" : "STRING", "hasMultipleValues" : true, "size" : 100000, "cardinality" : 1504, "errorMessage" : null }, - "metric1" : { "type" : "FLOAT", "hasMultipleValues" : false, "size" : 100000, "cardinality" : null, "errorMessage" : null } + "__time" : { "type" : "LONG", "hasMultipleValues" : false, "hasNulls": false, "size" : 407240380, "cardinality" : null, "errorMessage" : null }, + "dim1" : { "type" : "STRING", "hasMultipleValues" : false, "hasNulls": false, "size" : 100000, "cardinality" : 1944, "errorMessage" : null }, + "dim2" : { "type" : "STRING", "hasMultipleValues" : true, "hasNulls": true, "size" : 100000, "cardinality" : 1504, "errorMessage" : null }, + "metric1" : { "type" : "FLOAT", "hasMultipleValues" : false, "hasNulls": false, "size" : 100000, "cardinality" : null, "errorMessage" : null } }, "aggregators" : { "metric1" : { "type" : "longSum", "name" : "metric1", "fieldName" : "metric1" } @@ -84,14 +87,14 @@ The format of the result is: } ] ``` -Dimension columns will have type `STRING`. -Metric columns will have type `FLOAT` or `LONG` or name of the underlying complex type such as `hyperUnique` in case of COMPLEX metric. +Dimension columns will have type `STRING`, `FLOAT`, `DOUBLE`, or `LONG`. +Metric columns will have type `FLOAT`, `DOUBLE`, or `LONG`, or the name of the underlying complex type such as `hyperUnique` in case of COMPLEX metric. Timestamp column will have type `LONG`. If the `errorMessage` field is non-null, you should not trust the other fields in the response. Their contents are undefined. -Only columns which are dimensions (i.e., have type `STRING`) will have any cardinality. Rest of the columns (timestamp and metric columns) will show cardinality as `null`. +Only columns which are dictionary encoded (i.e., have type `STRING`) will have any cardinality. Rest of the columns (timestamp and metric columns) will show cardinality as `null`. ## intervals diff --git a/integration-tests/docker/druid.sh b/integration-tests/docker/druid.sh index 489610268137..f3269ee79abc 100755 --- a/integration-tests/docker/druid.sh +++ b/integration-tests/docker/druid.sh @@ -90,7 +90,7 @@ setupData() # below s3 credentials needed to access the pre-existing s3 bucket setKey $DRUID_SERVICE druid.s3.accessKey AKIAJI7DG7CDECGBQ6NA setKey $DRUID_SERVICE druid.s3.secretKey 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 - if [[ "$DRUID_INTEGRATION_TEST_GROUP" = "query-retry" ]]; then + if [ "$DRUID_INTEGRATION_TEST_GROUP" = "query-retry" ]; then setKey $DRUID_SERVICE druid.extensions.loadList [\"druid-s3-extensions\",\"druid-integration-tests\"] else setKey $DRUID_SERVICE druid.extensions.loadList [\"druid-s3-extensions\"] diff --git a/integration-tests/src/test/resources/queries/twitterstream_queries.json b/integration-tests/src/test/resources/queries/twitterstream_queries.json index efb024d1d781..a0796fd4f9b8 100644 --- a/integration-tests/src/test/resources/queries/twitterstream_queries.json +++ b/integration-tests/src/test/resources/queries/twitterstream_queries.json @@ -592,66 +592,69 @@ }, "expectedResults": [ { - "id": "twitterstream_2013-01-01T00:00:00.000Z_2013-01-02T00:00:00.000Z_2013-01-02T04:13:41.980Z_v9", - "intervals": ["2013-01-01T00:00:00.000Z/2013-01-02T00:00:00.000Z"], - "columns": { - "has_links": { - "type": "STRING", - "hasMultipleValues": false, - "size": 0, - "cardinality": 2, + "id":"twitterstream_2013-01-01T00:00:00.000Z_2013-01-02T00:00:00.000Z_2013-01-02T04:13:41.980Z_v9", + "intervals":["2013-01-01T00:00:00.000Z/2013-01-02T00:00:00.000Z"], + "columns":{ + "has_links":{ + "type":"STRING", + "hasMultipleValues":false, + "size":0, + "cardinality":2, "minValue":"No", "maxValue":"Yes", - "errorMessage": null + "errorMessage":null, + "hasNulls":false } }, - "size": 0, - "numRows": 3702583, - "aggregators": null, - "timestampSpec": null, - "queryGranularity": null, + "size":0, + "numRows":3702583, + "aggregators":null, + "timestampSpec":null, + "queryGranularity":null, "rollup":null }, { - "id": "twitterstream_2013-01-02T00:00:00.000Z_2013-01-03T00:00:00.000Z_2013-01-03T03:44:58.791Z_v9", - "intervals": ["2013-01-02T00:00:00.000Z/2013-01-03T00:00:00.000Z"], - "columns": { - "has_links": { - "type": "STRING", - "hasMultipleValues": false, - "size": 0, - "cardinality": 2, + "id":"twitterstream_2013-01-02T00:00:00.000Z_2013-01-03T00:00:00.000Z_2013-01-03T03:44:58.791Z_v9", + "intervals":["2013-01-02T00:00:00.000Z/2013-01-03T00:00:00.000Z"], + "columns":{ + "has_links":{ + "type":"STRING", + "hasMultipleValues":false, + "size":0, + "cardinality":2, "minValue":"No", "maxValue":"Yes", - "errorMessage": null + "errorMessage":null, + "hasNulls":false } }, - "size": 0, - "numRows": 3743002, - "aggregators": null, - "timestampSpec": null, - "queryGranularity": null, + "size":0, + "numRows":3743002, + "aggregators":null, + "timestampSpec":null, + "queryGranularity":null, "rollup":null }, { - "id": "twitterstream_2013-01-03T00:00:00.000Z_2013-01-04T00:00:00.000Z_2013-01-04T04:09:13.590Z_v9", - "intervals": ["2013-01-03T00:00:00.000Z/2013-01-04T00:00:00.000Z"], - "columns": { - "has_links": { - "type": "STRING", - "hasMultipleValues": false, - "size": 0, - "cardinality": 2, + "id":"twitterstream_2013-01-03T00:00:00.000Z_2013-01-04T00:00:00.000Z_2013-01-04T04:09:13.590Z_v9", + "intervals":["2013-01-03T00:00:00.000Z/2013-01-04T00:00:00.000Z"], + "columns":{ + "has_links":{ + "type":"STRING", + "hasMultipleValues":false, + "size":0, + "cardinality":2, "minValue":"No", "maxValue":"Yes", - "errorMessage": null + "errorMessage":null, + "hasNulls":false } }, - "size": 0, + "size":0, "numRows":3502959, - "aggregators": null, - "timestampSpec": null, - "queryGranularity": null, + "aggregators":null, + "timestampSpec":null, + "queryGranularity":null, "rollup":null } ] diff --git a/integration-tests/src/test/resources/queries/wikipedia_editstream_queries.json b/integration-tests/src/test/resources/queries/wikipedia_editstream_queries.json index 92dc582e2ec5..d28fe8c445f7 100644 --- a/integration-tests/src/test/resources/queries/wikipedia_editstream_queries.json +++ b/integration-tests/src/test/resources/queries/wikipedia_editstream_queries.json @@ -1398,33 +1398,35 @@ }, "expectedResults": [ { - "id": "wikipedia_editstream_2012-12-29T00:00:00.000Z_2013-01-10T08:00:00.000Z_2013-01-10T08:13:47.830Z_v9", - "intervals": ["2012-12-29T00:00:00.000Z/2013-01-10T08:00:00.000Z"], - "columns": { - "country_name": { - "type": "STRING", - "hasMultipleValues": false, - "size": 0, - "cardinality": 208, + "id":"wikipedia_editstream_2012-12-29T00:00:00.000Z_2013-01-10T08:00:00.000Z_2013-01-10T08:13:47.830Z_v9", + "intervals":["2012-12-29T00:00:00.000Z/2013-01-10T08:00:00.000Z"], + "columns":{ + "country_name":{ + "type":"STRING", + "hasMultipleValues":false, + "size":0, + "cardinality":208, "minValue":"", "maxValue":"mmx._unknown", - "errorMessage": null + "errorMessage":null, + "hasNulls":true }, - "language": { - "type": "STRING", - "hasMultipleValues": false, - "size": 0, - "cardinality": 36, + "language":{ + "type":"STRING", + "hasMultipleValues":false, + "size":0, + "cardinality":36, "minValue":"ar", "maxValue":"zh", - "errorMessage": null + "errorMessage":null, + "hasNulls":false } }, - "size": 0, - "numRows": 4462111, - "aggregators": null, - "timestampSpec": null, - "queryGranularity": null, + "size":0, + "numRows":4462111, + "aggregators":null, + "timestampSpec":null, + "queryGranularity":null, "rollup":null } ] diff --git a/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java b/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java index 15a081d8fcad..2388004d629a 100644 --- a/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java +++ b/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java @@ -100,6 +100,11 @@ public Map analyze(Segment segment) Map columns = new TreeMap<>(); + Function adapterCapabilitesFn = + storageAdapter instanceof IncrementalIndexStorageAdapter + ? ((IncrementalIndexStorageAdapter) storageAdapter)::getSnapshotColumnCapabilities + : storageAdapter::getColumnCapabilities; + for (String columnName : columnNames) { final ColumnHolder columnHolder = index == null ? null : index.getColumnHolder(columnName); final ColumnCapabilities capabilities; @@ -108,11 +113,7 @@ public Map analyze(Segment segment) } else { // this can be removed if we get to the point where IncrementalIndexStorageAdapter.getColumnCapabilities // accurately reports the capabilities - if (storageAdapter instanceof IncrementalIndexStorageAdapter) { - capabilities = ((IncrementalIndexStorageAdapter) storageAdapter).getSnapshotColumnCapabilities(columnName); - } else { - capabilities = storageAdapter.getColumnCapabilities(columnName); - } + capabilities = adapterCapabilitesFn.apply(columnName); } final ColumnAnalysis analysis; @@ -146,7 +147,7 @@ public Map analyze(Segment segment) } // Add time column too - ColumnCapabilities timeCapabilities = storageAdapter.getColumnCapabilities(ColumnHolder.TIME_COLUMN_NAME); + ColumnCapabilities timeCapabilities = adapterCapabilitesFn.apply(ColumnHolder.TIME_COLUMN_NAME); if (timeCapabilities == null) { timeCapabilities = ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.LONG); } @@ -192,6 +193,7 @@ private ColumnAnalysis analyzeNumericColumn( return new ColumnAnalysis( capabilities.getType().name(), capabilities.hasMultipleValues().isTrue(), + capabilities.hasNulls().isMaybeTrue(), // if we don't know for sure, then we should plan to check for nulls size, null, null, @@ -242,6 +244,7 @@ private ColumnAnalysis analyzeStringColumn( return new ColumnAnalysis( capabilities.getType().name(), capabilities.hasMultipleValues().isTrue(), + capabilities.hasNulls().isMaybeTrue(), // if we don't know for sure, then we should plan to check for nulls size, analyzingCardinality() ? cardinality : 0, min, @@ -319,6 +322,7 @@ public Long accumulate(Long accumulated, Cursor cursor) return new ColumnAnalysis( capabilities.getType().name(), capabilities.hasMultipleValues().isTrue(), + capabilities.hasNulls().isMaybeTrue(), // if we don't know for sure, then we should plan to check for nulls size, cardinality, min, @@ -335,6 +339,7 @@ private ColumnAnalysis analyzeComplexColumn( { try (final ComplexColumn complexColumn = columnHolder != null ? (ComplexColumn) columnHolder.getColumn() : null) { final boolean hasMultipleValues = capabilities != null && capabilities.hasMultipleValues().isTrue(); + final boolean hasNulls = capabilities != null && capabilities.hasNulls().isMaybeTrue(); long size = 0; if (analyzingSize() && complexColumn != null) { @@ -345,7 +350,7 @@ private ColumnAnalysis analyzeComplexColumn( final Function inputSizeFn = serde.inputSizeFn(); if (inputSizeFn == null) { - return new ColumnAnalysis(typeName, hasMultipleValues, 0, null, null, null, null); + return new ColumnAnalysis(typeName, hasMultipleValues, hasNulls, 0, null, null, null, null); } final int length = complexColumn.getLength(); @@ -357,6 +362,7 @@ private ColumnAnalysis analyzeComplexColumn( return new ColumnAnalysis( typeName, hasMultipleValues, + hasNulls, size, null, null, diff --git a/processing/src/main/java/org/apache/druid/query/metadata/metadata/ColumnAnalysis.java b/processing/src/main/java/org/apache/druid/query/metadata/metadata/ColumnAnalysis.java index aa9da8d40af4..4edb4b7517dc 100644 --- a/processing/src/main/java/org/apache/druid/query/metadata/metadata/ColumnAnalysis.java +++ b/processing/src/main/java/org/apache/druid/query/metadata/metadata/ColumnAnalysis.java @@ -33,11 +33,12 @@ public class ColumnAnalysis public static ColumnAnalysis error(String reason) { - return new ColumnAnalysis("STRING", false, -1, null, null, null, ERROR_PREFIX + reason); + return new ColumnAnalysis("STRING", false, false, -1, null, null, null, ERROR_PREFIX + reason); } private final String type; private final boolean hasMultipleValues; + private final boolean hasNulls; private final long size; private final Integer cardinality; private final Comparable minValue; @@ -48,6 +49,7 @@ public static ColumnAnalysis error(String reason) public ColumnAnalysis( @JsonProperty("type") String type, @JsonProperty("hasMultipleValues") boolean hasMultipleValues, + @JsonProperty("hasNulls") boolean hasNulls, @JsonProperty("size") long size, @JsonProperty("cardinality") Integer cardinality, @JsonProperty("minValue") Comparable minValue, @@ -57,6 +59,7 @@ public ColumnAnalysis( { this.type = type; this.hasMultipleValues = hasMultipleValues; + this.hasNulls = hasNulls; this.size = size; this.cardinality = cardinality; this.minValue = minValue; @@ -108,6 +111,11 @@ public String getErrorMessage() return errorMessage; } + @JsonProperty + public boolean isHasNulls() + { + return hasNulls; + } public boolean isError() { return (errorMessage != null && !errorMessage.isEmpty()); @@ -144,7 +152,16 @@ public ColumnAnalysis fold(ColumnAnalysis rhs) Comparable newMin = choose(minValue, rhs.minValue, false); Comparable newMax = choose(maxValue, rhs.maxValue, true); - return new ColumnAnalysis(type, multipleValues, size + rhs.getSize(), cardinality, newMin, newMax, null); + return new ColumnAnalysis( + type, + multipleValues, + hasNulls || rhs.hasNulls, + size + rhs.getSize(), + cardinality, + newMin, + newMax, + null + ); } private T choose(T obj1, T obj2, boolean max) @@ -165,6 +182,7 @@ public String toString() return "ColumnAnalysis{" + "type='" + type + '\'' + ", hasMultipleValues=" + hasMultipleValues + + ", hasNulls=" + hasNulls + ", size=" + size + ", cardinality=" + cardinality + ", minValue=" + minValue + @@ -184,6 +202,7 @@ public boolean equals(Object o) } ColumnAnalysis that = (ColumnAnalysis) o; return hasMultipleValues == that.hasMultipleValues && + hasNulls == that.hasNulls && size == that.size && Objects.equals(type, that.type) && Objects.equals(cardinality, that.cardinality) && @@ -195,6 +214,6 @@ public boolean equals(Object o) @Override public int hashCode() { - return Objects.hash(type, hasMultipleValues, size, cardinality, minValue, maxValue, errorMessage); + return Objects.hash(type, hasMultipleValues, hasNulls, size, cardinality, minValue, maxValue, errorMessage); } } diff --git a/processing/src/main/java/org/apache/druid/segment/DimensionIndexer.java b/processing/src/main/java/org/apache/druid/segment/DimensionIndexer.java index 277deb94e9bb..00ae11da956f 100644 --- a/processing/src/main/java/org/apache/druid/segment/DimensionIndexer.java +++ b/processing/src/main/java/org/apache/druid/segment/DimensionIndexer.java @@ -126,6 +126,7 @@ public interface DimensionIndexer * @param reportParseExceptions * @return An array containing an encoded representation of the input row value. */ + @Nullable EncodedKeyComponentType processRowValsToUnsortedEncodedKeyComponent(@Nullable Object dimValues, boolean reportParseExceptions); /** diff --git a/processing/src/main/java/org/apache/druid/segment/DoubleDimensionIndexer.java b/processing/src/main/java/org/apache/druid/segment/DoubleDimensionIndexer.java index 677ed41ba239..beb58df6208b 100644 --- a/processing/src/main/java/org/apache/druid/segment/DoubleDimensionIndexer.java +++ b/processing/src/main/java/org/apache/druid/segment/DoubleDimensionIndexer.java @@ -41,22 +41,26 @@ public class DoubleDimensionIndexer implements DimensionIndexer DOUBLE_COMPARATOR = Comparators.naturalNullsFirst(); - private final ColumnCapabilitiesImpl capabilities = - ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.DOUBLE); + private volatile boolean hasNulls = false; + @Nullable @Override public Double processRowValsToUnsortedEncodedKeyComponent(@Nullable Object dimValues, boolean reportParseExceptions) { if (dimValues instanceof List) { throw new UnsupportedOperationException("Numeric columns do not support multivalue rows."); } - return DimensionHandlerUtils.convertObjectToDouble(dimValues, reportParseExceptions); + Double d = DimensionHandlerUtils.convertObjectToDouble(dimValues, reportParseExceptions); + if (d == null) { + hasNulls = NullHandling.sqlCompatible(); + } + return d; } @Override public void setSparseIndexed() { - // no-op, double columns do not have a dictionary to track null values + hasNulls = NullHandling.sqlCompatible(); } @Override @@ -98,7 +102,11 @@ public int getCardinality() @Override public ColumnCapabilities getColumnCapabilities() { - return capabilities; + ColumnCapabilitiesImpl builder = ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.DOUBLE); + if (hasNulls) { + builder.setHasNulls(hasNulls); + } + return builder; } @Override @@ -125,7 +133,7 @@ class IndexerDoubleColumnSelector implements DoubleColumnSelector public boolean isNull() { final Object[] dims = currEntry.get().getDims(); - return dimIndex >= dims.length || dims[dimIndex] == null; + return hasNulls && (dimIndex >= dims.length || dims[dimIndex] == null); } @Override diff --git a/processing/src/main/java/org/apache/druid/segment/FloatDimensionIndexer.java b/processing/src/main/java/org/apache/druid/segment/FloatDimensionIndexer.java index 2db5b59a668d..8d86315d7358 100644 --- a/processing/src/main/java/org/apache/druid/segment/FloatDimensionIndexer.java +++ b/processing/src/main/java/org/apache/druid/segment/FloatDimensionIndexer.java @@ -41,9 +41,9 @@ public class FloatDimensionIndexer implements DimensionIndexer FLOAT_COMPARATOR = Comparators.naturalNullsFirst(); - private final ColumnCapabilitiesImpl capabilities = - ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.FLOAT); + private volatile boolean hasNulls = false; + @Nullable @Override public Float processRowValsToUnsortedEncodedKeyComponent(@Nullable Object dimValues, boolean reportParseExceptions) { @@ -51,13 +51,17 @@ public Float processRowValsToUnsortedEncodedKeyComponent(@Nullable Object dimVal throw new UnsupportedOperationException("Numeric columns do not support multivalue rows."); } - return DimensionHandlerUtils.convertObjectToFloat(dimValues, reportParseExceptions); + Float f = DimensionHandlerUtils.convertObjectToFloat(dimValues, reportParseExceptions); + if (f == null) { + hasNulls = NullHandling.sqlCompatible(); + } + return f; } @Override public void setSparseIndexed() { - // no-op, float columns do not have a dictionary to track null values + hasNulls = NullHandling.sqlCompatible(); } @Override @@ -99,7 +103,11 @@ public int getCardinality() @Override public ColumnCapabilities getColumnCapabilities() { - return capabilities; + ColumnCapabilitiesImpl builder = ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.FLOAT); + if (hasNulls) { + builder.setHasNulls(hasNulls); + } + return builder; } @Override @@ -126,7 +134,7 @@ class IndexerFloatColumnSelector implements FloatColumnSelector public boolean isNull() { final Object[] dims = currEntry.get().getDims(); - return dimIndex >= dims.length || dims[dimIndex] == null; + return hasNulls && (dimIndex >= dims.length || dims[dimIndex] == null); } @Override diff --git a/processing/src/main/java/org/apache/druid/segment/IndexMergerV9.java b/processing/src/main/java/org/apache/druid/segment/IndexMergerV9.java index 19ce74574be7..caf9f3c3f76d 100644 --- a/processing/src/main/java/org/apache/druid/segment/IndexMergerV9.java +++ b/processing/src/main/java/org/apache/druid/segment/IndexMergerV9.java @@ -115,6 +115,12 @@ public boolean multipleValues() { return false; } + + @Override + public boolean hasNulls() + { + return false; + } }; public static final ColumnCapabilities.CoercionLogic METRIC_CAPABILITY_MERGE_LOGIC = @@ -143,6 +149,12 @@ public boolean multipleValues() { return false; } + + @Override + public boolean hasNulls() + { + return false; + } }; private final ObjectMapper mapper; diff --git a/processing/src/main/java/org/apache/druid/segment/LongDimensionIndexer.java b/processing/src/main/java/org/apache/druid/segment/LongDimensionIndexer.java index 266405504b4f..d9534bf77a5b 100644 --- a/processing/src/main/java/org/apache/druid/segment/LongDimensionIndexer.java +++ b/processing/src/main/java/org/apache/druid/segment/LongDimensionIndexer.java @@ -41,10 +41,10 @@ public class LongDimensionIndexer implements DimensionIndexer { public static final Comparator LONG_COMPARATOR = Comparators.naturalNullsFirst(); - private final ColumnCapabilitiesImpl capabilities = - ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.LONG); + private volatile boolean hasNulls = false; + @Nullable @Override public Long processRowValsToUnsortedEncodedKeyComponent(@Nullable Object dimValues, boolean reportParseExceptions) { @@ -52,13 +52,17 @@ public Long processRowValsToUnsortedEncodedKeyComponent(@Nullable Object dimValu throw new UnsupportedOperationException("Numeric columns do not support multivalue rows."); } - return DimensionHandlerUtils.convertObjectToLong(dimValues, reportParseExceptions); + Long l = DimensionHandlerUtils.convertObjectToLong(dimValues, reportParseExceptions); + if (l == null) { + hasNulls = NullHandling.sqlCompatible(); + } + return l; } @Override public void setSparseIndexed() { - // no-op, long columns do not have a dictionary to track null values + hasNulls = NullHandling.sqlCompatible(); } @Override @@ -100,7 +104,11 @@ public int getCardinality() @Override public ColumnCapabilities getColumnCapabilities() { - return capabilities; + ColumnCapabilitiesImpl builder = ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.LONG); + if (hasNulls) { + builder.setHasNulls(hasNulls); + } + return builder; } @Override @@ -127,7 +135,7 @@ class IndexerLongColumnSelector implements LongColumnSelector public boolean isNull() { final Object[] dims = currEntry.get().getDims(); - return dimIndex >= dims.length || dims[dimIndex] == null; + return hasNulls && (dimIndex >= dims.length || dims[dimIndex] == null); } @Override diff --git a/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java b/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java index bca0a5c72a9c..4f9fbfdb417a 100644 --- a/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java +++ b/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java @@ -500,6 +500,10 @@ public ColumnCapabilities getColumnCapabilities() if (allValuesEncoded) { capabilites.setDictionaryEncoded(true); } + + if (isSparse || dimLookup.idForNull != ABSENT_VALUE_ID) { + capabilites.setHasNulls(true); + } return capabilites; } diff --git a/processing/src/main/java/org/apache/druid/segment/column/ColumnBuilder.java b/processing/src/main/java/org/apache/druid/segment/column/ColumnBuilder.java index cde454b70524..f20d99f9a85a 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/ColumnBuilder.java +++ b/processing/src/main/java/org/apache/druid/segment/column/ColumnBuilder.java @@ -29,11 +29,7 @@ */ public class ColumnBuilder { - @Nullable - private ValueType type = null; - private boolean hasMultipleValues = false; - private boolean filterable = false; - private boolean dictionaryEncoded = false; + private final ColumnCapabilitiesImpl capabilitiesBuilder = ColumnCapabilitiesImpl.createDefault(); @Nullable private Supplier columnSupplier = null; @@ -44,6 +40,7 @@ public class ColumnBuilder @Nullable private SmooshedFileMapper fileMapper = null; + public ColumnBuilder setFileMapper(SmooshedFileMapper fileMapper) { this.fileMapper = fileMapper; @@ -57,27 +54,29 @@ public SmooshedFileMapper getFileMapper() public ColumnBuilder setType(ValueType type) { - this.type = type; + this.capabilitiesBuilder.setType(type); return this; } public ColumnBuilder setHasMultipleValues(boolean hasMultipleValues) { - this.hasMultipleValues = hasMultipleValues; + this.capabilitiesBuilder.setHasMultipleValues(hasMultipleValues); return this; } public ColumnBuilder setDictionaryEncodedColumnSupplier(Supplier> columnSupplier) { this.columnSupplier = columnSupplier; - this.dictionaryEncoded = true; + this.capabilitiesBuilder.setDictionaryEncoded(true); + this.capabilitiesBuilder.setDictionaryValuesSorted(true); + this.capabilitiesBuilder.setDictionaryValuesUnique(true); return this; } @SuppressWarnings("unused") public ColumnBuilder setFilterable(boolean filterable) { - this.filterable = filterable; + this.capabilitiesBuilder.setFilterable(filterable); return this; } @@ -96,32 +95,32 @@ public ColumnBuilder setNumericColumnSupplier(Supplier public ColumnBuilder setBitmapIndex(Supplier bitmapIndex) { this.bitmapIndex = bitmapIndex; + this.capabilitiesBuilder.setHasBitmapIndexes(true); return this; } public ColumnBuilder setSpatialIndex(Supplier spatialIndex) { this.spatialIndex = spatialIndex; + this.capabilitiesBuilder.setHasSpatialIndexes(true); + return this; + } + + public ColumnBuilder setHasNulls(boolean nullable) + { + this.capabilitiesBuilder.setHasNulls(nullable); + return this; + } + public ColumnBuilder setHasNulls(ColumnCapabilities.Capable nullable) + { + this.capabilitiesBuilder.setHasNulls(nullable); return this; } public ColumnHolder build() { - Preconditions.checkState(type != null, "Type must be set."); - - return new SimpleColumnHolder( - new ColumnCapabilitiesImpl() - .setType(type) - .setDictionaryEncoded(dictionaryEncoded) - .setHasBitmapIndexes(bitmapIndex != null) - .setDictionaryValuesSorted(dictionaryEncoded) - .setDictionaryValuesUnique(dictionaryEncoded) - .setHasSpatialIndexes(spatialIndex != null) - .setHasMultipleValues(hasMultipleValues) - .setFilterable(filterable), - columnSupplier, - bitmapIndex, - spatialIndex - ); + Preconditions.checkState(capabilitiesBuilder.getType() != null, "Type must be set."); + + return new SimpleColumnHolder(capabilitiesBuilder, columnSupplier, bitmapIndex, spatialIndex); } } diff --git a/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilities.java b/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilities.java index ee252aac93cc..86e88107e92c 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilities.java +++ b/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilities.java @@ -81,6 +81,12 @@ public interface ColumnCapabilities */ boolean isFilterable(); + /** + * Does this column contain null values? If so, callers, especially for primitive numeric columns, will need to check + * for null value rows and act accordingly + */ + Capable hasNulls(); + enum Capable { FALSE, @@ -167,14 +173,14 @@ interface CoercionLogic boolean dictionaryEncoded(); /** - * If {@link ColumnCapabilities#areDictionaryValuesSorted()} ()} is {@link Capable#UNKNOWN}, define if it should be treated - * as true or false. + * If {@link ColumnCapabilities#areDictionaryValuesSorted()} ()} is {@link Capable#UNKNOWN}, define if it should be + * treated as true or false. */ boolean dictionaryValuesSorted(); /** - * If {@link ColumnCapabilities#areDictionaryValuesUnique()} ()} is {@link Capable#UNKNOWN}, define if it should be treated - * as true or false. + * If {@link ColumnCapabilities#areDictionaryValuesUnique()} ()} is {@link Capable#UNKNOWN}, define if it should be + * treated as true or false. */ boolean dictionaryValuesUnique(); @@ -183,5 +189,11 @@ interface CoercionLogic * as true or false. */ boolean multipleValues(); + + /** + * If {@link ColumnCapabilities#hasNulls()} is {@link Capable#UNKNOWN}, define if it should be treated as true + * or false + */ + boolean hasNulls(); } } diff --git a/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilitiesImpl.java b/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilitiesImpl.java index 9473efde0e70..f389adb02176 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilitiesImpl.java +++ b/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilitiesImpl.java @@ -23,6 +23,7 @@ import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.annotation.JsonSetter; import com.google.common.base.Preconditions; +import org.apache.druid.common.config.NullHandling; import org.apache.druid.java.util.common.ISE; import javax.annotation.Nullable; @@ -32,6 +33,39 @@ */ public class ColumnCapabilitiesImpl implements ColumnCapabilities { + private static final CoercionLogic ALL_FALSE = new CoercionLogic() + { + @Override + public boolean dictionaryEncoded() + { + return false; + } + + @Override + public boolean dictionaryValuesSorted() + { + return false; + } + + @Override + public boolean dictionaryValuesUnique() + { + return false; + } + + @Override + public boolean multipleValues() + { + return false; + } + + @Override + public boolean hasNulls() + { + return false; + } + }; + public static ColumnCapabilitiesImpl copyOf(@Nullable final ColumnCapabilities other) { final ColumnCapabilitiesImpl capabilities = new ColumnCapabilitiesImpl(); @@ -43,6 +77,7 @@ public static ColumnCapabilitiesImpl copyOf(@Nullable final ColumnCapabilities o capabilities.hasMultipleValues = other.hasMultipleValues(); capabilities.dictionaryValuesSorted = other.areDictionaryValuesSorted(); capabilities.dictionaryValuesUnique = other.areDictionaryValuesUnique(); + capabilities.hasNulls = other.hasNulls(); capabilities.filterable = other.isFilterable(); } return capabilities; @@ -64,6 +99,7 @@ public static ColumnCapabilitiesImpl snapshot(@Nullable final ColumnCapabilities copy.dictionaryValuesSorted = copy.dictionaryValuesSorted.coerceUnknownToBoolean(coerce.dictionaryValuesSorted()); copy.dictionaryValuesUnique = copy.dictionaryValuesUnique.coerceUnknownToBoolean(coerce.dictionaryValuesUnique()); copy.hasMultipleValues = copy.hasMultipleValues.coerceUnknownToBoolean(coerce.multipleValues()); + copy.hasNulls = copy.hasNulls.coerceUnknownToBoolean(coerce.hasNulls()); return copy; } @@ -97,6 +133,7 @@ public static ColumnCapabilitiesImpl merge( merged.hasMultipleValues = merged.hasMultipleValues.or(otherSnapshot.hasMultipleValues()); merged.dictionaryValuesSorted = merged.dictionaryValuesSorted.and(otherSnapshot.areDictionaryValuesSorted()); merged.dictionaryValuesUnique = merged.dictionaryValuesUnique.and(otherSnapshot.areDictionaryValuesUnique()); + merged.hasNulls = merged.hasNulls.or(other.hasNulls()); merged.hasInvertedIndexes |= otherSnapshot.hasBitmapIndexes(); merged.hasSpatialIndexes |= otherSnapshot.hasSpatialIndexes(); merged.filterable &= otherSnapshot.isFilterable(); @@ -104,19 +141,31 @@ public static ColumnCapabilitiesImpl merge( return merged; } + /** + * Creates a {@link ColumnCapabilitiesImpl} where all {@link ColumnCapabilities.Capable} that default to unknown + * instead are coerced to true or false + */ + public static ColumnCapabilitiesImpl createDefault() + { + return ColumnCapabilitiesImpl.snapshot(new ColumnCapabilitiesImpl(), ALL_FALSE); + } /** * Create a no frills, simple column with {@link ValueType} set and everything else false */ public static ColumnCapabilitiesImpl createSimpleNumericColumnCapabilities(ValueType valueType) { - return new ColumnCapabilitiesImpl().setType(valueType) - .setHasMultipleValues(false) - .setHasBitmapIndexes(false) - .setDictionaryEncoded(false) - .setDictionaryValuesSorted(false) - .setDictionaryValuesUnique(false) - .setHasSpatialIndexes(false); + ColumnCapabilitiesImpl builder = new ColumnCapabilitiesImpl().setType(valueType) + .setHasMultipleValues(false) + .setHasBitmapIndexes(false) + .setDictionaryEncoded(false) + .setDictionaryValuesSorted(false) + .setDictionaryValuesUnique(false) + .setHasSpatialIndexes(false); + if (NullHandling.replaceWithDefault()) { + builder.setHasNulls(false); + } + return builder; } @Nullable @@ -134,6 +183,8 @@ public static ColumnCapabilitiesImpl createSimpleNumericColumnCapabilities(Value private Capable dictionaryValuesUnique = Capable.UNKNOWN; @JsonIgnore private boolean filterable; + @JsonIgnore + private Capable hasNulls = Capable.UNKNOWN; @Override @JsonProperty @@ -225,6 +276,24 @@ public ColumnCapabilitiesImpl setHasMultipleValues(boolean hasMultipleValues) return this; } + @Override + public Capable hasNulls() + { + return hasNulls; + } + + public ColumnCapabilitiesImpl setHasNulls(boolean hasNulls) + { + this.hasNulls = Capable.of(hasNulls); + return this; + } + + public ColumnCapabilitiesImpl setHasNulls(Capable hasNulls) + { + this.hasNulls = hasNulls; + return this; + } + @Override public boolean isFilterable() { diff --git a/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndex.java b/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndex.java index bb4377f4e396..baffbf40abeb 100644 --- a/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndex.java +++ b/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndex.java @@ -104,8 +104,6 @@ public abstract class IncrementalIndex extends AbstractIndex implements Iterable, Closeable { - private volatile DateTime maxIngestedEventTime; - // Used to discover ValueType based on the class of values in a row // Also used to convert between the duplicate ValueType enums in DimensionSchema (druid-api) and main druid. public static final Map TYPE_MAP = ImmutableMap.builder() @@ -259,6 +257,9 @@ public ColumnCapabilities getColumnCapabilities(String columnName) private final ThreadLocal in = new ThreadLocal<>(); private final Supplier rowSupplier = in::get; + private volatile DateTime maxIngestedEventTime; + + /** * Setting deserializeComplexMetrics to false is necessary for intermediate aggregation such as groupBy that * should not deserialize input columns using ComplexMetricSerde for aggregators that return complex metrics. @@ -474,10 +475,6 @@ public IncrementalIndex buildOffheap(final NonBlockingPool bufferPoo } } - public boolean isRollup() - { - return rollup; - } public abstract FactsHolder getFacts(); @@ -572,6 +569,11 @@ public List getParseExceptionMessages() } } + public boolean isRollup() + { + return rollup; + } + @Override public void close() { @@ -926,15 +928,18 @@ public List getDimensionOrder() private ColumnCapabilitiesImpl makeDefaultCapabilitiesFromValueType(ValueType type) { - if (type == ValueType.STRING) { - // we start out as not having multiple values, but this might change as we encounter them - return new ColumnCapabilitiesImpl().setType(type) - .setHasBitmapIndexes(true) - .setDictionaryEncoded(true) - .setDictionaryValuesUnique(true) - .setDictionaryValuesSorted(false); - } else { - return ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(type); + switch (type) { + case STRING: + // we start out as not having multiple values, but this might change as we encounter them + return new ColumnCapabilitiesImpl().setType(type) + .setHasBitmapIndexes(true) + .setDictionaryEncoded(true) + .setDictionaryValuesUnique(true) + .setDictionaryValuesSorted(false); + case COMPLEX: + return ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(type).setHasNulls(true); + default: + return ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(type); } } @@ -1142,7 +1147,8 @@ public MetricDesc(int index, AggregatorFactory factory) this.type = typeInfo; } else { // in an ideal world complex type reports its actual column capabilities... - capabilities = ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.COMPLEX); + capabilities = ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.COMPLEX) + .setHasNulls(ColumnCapabilities.Capable.TRUE); this.type = ComplexMetrics.getSerdeForType(typeInfo).getTypeName(); } } diff --git a/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexStorageAdapter.java b/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexStorageAdapter.java index 9beb16820abc..036e18e7a1c0 100644 --- a/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexStorageAdapter.java +++ b/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexStorageAdapter.java @@ -78,6 +78,12 @@ public boolean multipleValues() { return true; } + + @Override + public boolean hasNulls() + { + return true; + } }; private static final ColumnCapabilities.CoercionLogic SNAPSHOT_STORAGE_ADAPTER_CAPABILITIES_COERCE_LOGIC = @@ -106,6 +112,12 @@ public boolean multipleValues() { return false; } + + @Override + public boolean hasNulls() + { + return false; + } }; final IncrementalIndex index; diff --git a/processing/src/main/java/org/apache/druid/segment/serde/ComplexColumnPartSerde.java b/processing/src/main/java/org/apache/druid/segment/serde/ComplexColumnPartSerde.java index dc0278baaed0..5b0341340496 100644 --- a/processing/src/main/java/org/apache/druid/segment/serde/ComplexColumnPartSerde.java +++ b/processing/src/main/java/org/apache/druid/segment/serde/ComplexColumnPartSerde.java @@ -23,6 +23,7 @@ import com.fasterxml.jackson.annotation.JsonProperty; import org.apache.druid.java.util.common.logger.Logger; import org.apache.druid.segment.GenericColumnSerializer; +import org.apache.druid.segment.column.ColumnCapabilities; import javax.annotation.Nullable; @@ -78,6 +79,11 @@ public Deserializer getDeserializer() { return (buffer, builder, columnConfig) -> { if (serde != null) { + // we don't currently know if complex column can have nulls (or can be multi-valued, but not making that change + // since it isn't supported anywhere in the query engines) + // longer term this needs to be captured by making the serde provide this information, and then this should + // no longer be set to true but rather the actual values + builder.setHasNulls(ColumnCapabilities.Capable.TRUE); serde.deserializeColumn(buffer, builder); } }; diff --git a/processing/src/main/java/org/apache/druid/segment/serde/DictionaryEncodedColumnPartSerde.java b/processing/src/main/java/org/apache/druid/segment/serde/DictionaryEncodedColumnPartSerde.java index a0a733ec6f62..1005c9a6eff5 100644 --- a/processing/src/main/java/org/apache/druid/segment/serde/DictionaryEncodedColumnPartSerde.java +++ b/processing/src/main/java/org/apache/druid/segment/serde/DictionaryEncodedColumnPartSerde.java @@ -321,6 +321,8 @@ public void read(ByteBuffer buffer, ColumnBuilder builder, ColumnConfig columnCo rMultiValuedColumn = null; } + final String firstDictionaryEntry = rDictionary.get(0); + DictionaryEncodedColumnSupplier dictionaryEncodedColumnSupplier = new DictionaryEncodedColumnSupplier( rDictionary, rSingleValuedColumn, @@ -329,6 +331,7 @@ public void read(ByteBuffer buffer, ColumnBuilder builder, ColumnConfig columnCo ); builder .setHasMultipleValues(hasMultipleValues) + .setHasNulls(firstDictionaryEntry == null) .setDictionaryEncodedColumnSupplier(dictionaryEncodedColumnSupplier); if (!Feature.NO_BITMAP_INDEX.isSet(rFlags)) { diff --git a/processing/src/main/java/org/apache/druid/segment/serde/DoubleNumericColumnPartSerdeV2.java b/processing/src/main/java/org/apache/druid/segment/serde/DoubleNumericColumnPartSerdeV2.java index e2a5cd213528..53d43f4b841e 100644 --- a/processing/src/main/java/org/apache/druid/segment/serde/DoubleNumericColumnPartSerdeV2.java +++ b/processing/src/main/java/org/apache/druid/segment/serde/DoubleNumericColumnPartSerdeV2.java @@ -152,13 +152,17 @@ public Deserializer getDeserializer() buffer.position(initialPos + offset); final ImmutableBitmap bitmap; + final boolean hasNulls; if (buffer.hasRemaining()) { bitmap = bitmapSerdeFactory.getObjectStrategy().fromByteBufferWithSize(buffer); + hasNulls = !bitmap.isEmpty(); } else { bitmap = bitmapSerdeFactory.getBitmapFactory().makeEmptyImmutableBitmap(); + hasNulls = false; } builder.setType(ValueType.DOUBLE) .setHasMultipleValues(false) + .setHasNulls(hasNulls) .setNumericColumnSupplier(new DoubleNumericColumnSupplier(column, bitmap)); }; } diff --git a/processing/src/main/java/org/apache/druid/segment/serde/FloatNumericColumnPartSerdeV2.java b/processing/src/main/java/org/apache/druid/segment/serde/FloatNumericColumnPartSerdeV2.java index 77d900dbe2ab..6e33f73cad66 100644 --- a/processing/src/main/java/org/apache/druid/segment/serde/FloatNumericColumnPartSerdeV2.java +++ b/processing/src/main/java/org/apache/druid/segment/serde/FloatNumericColumnPartSerdeV2.java @@ -149,13 +149,17 @@ public Deserializer getDeserializer() ); buffer.position(initialPos + offset); final ImmutableBitmap bitmap; + final boolean hasNulls; if (buffer.hasRemaining()) { bitmap = bitmapSerdeFactory.getObjectStrategy().fromByteBufferWithSize(buffer); + hasNulls = !bitmap.isEmpty(); } else { bitmap = bitmapSerdeFactory.getBitmapFactory().makeEmptyImmutableBitmap(); + hasNulls = false; } builder.setType(ValueType.FLOAT) .setHasMultipleValues(false) + .setHasNulls(hasNulls) .setNumericColumnSupplier(new FloatNumericColumnSupplier(column, bitmap)); }; } diff --git a/processing/src/main/java/org/apache/druid/segment/serde/LongNumericColumnPartSerdeV2.java b/processing/src/main/java/org/apache/druid/segment/serde/LongNumericColumnPartSerdeV2.java index 75344ec9340c..ed7ecbf6a9c1 100644 --- a/processing/src/main/java/org/apache/druid/segment/serde/LongNumericColumnPartSerdeV2.java +++ b/processing/src/main/java/org/apache/druid/segment/serde/LongNumericColumnPartSerdeV2.java @@ -151,13 +151,17 @@ public Deserializer getDeserializer() ); buffer.position(initialPos + offset); final ImmutableBitmap bitmap; + final boolean hasNulls; if (buffer.hasRemaining()) { bitmap = bitmapSerdeFactory.getObjectStrategy().fromByteBufferWithSize(buffer); + hasNulls = !bitmap.isEmpty(); } else { bitmap = bitmapSerdeFactory.getBitmapFactory().makeEmptyImmutableBitmap(); + hasNulls = false; } builder.setType(ValueType.LONG) .setHasMultipleValues(false) + .setHasNulls(hasNulls) .setNumericColumnSupplier(new LongNumericColumnSupplier(column, bitmap)); }; } diff --git a/processing/src/test/java/org/apache/druid/query/DoubleStorageTest.java b/processing/src/test/java/org/apache/druid/query/DoubleStorageTest.java index cb93b2cf8874..d6d27b88332a 100644 --- a/processing/src/test/java/org/apache/druid/query/DoubleStorageTest.java +++ b/processing/src/test/java/org/apache/druid/query/DoubleStorageTest.java @@ -156,6 +156,7 @@ public static Collection dataFeeder() new ColumnAnalysis( ValueType.LONG.toString(), false, + false, 100, null, null, @@ -166,6 +167,7 @@ public static Collection dataFeeder() new ColumnAnalysis( ValueType.STRING.toString(), false, + false, 120, 1, DIM_VALUE, @@ -176,6 +178,7 @@ public static Collection dataFeeder() new ColumnAnalysis( ValueType.DOUBLE.toString(), false, + false, 80, null, null, @@ -198,6 +201,7 @@ public static Collection dataFeeder() new ColumnAnalysis( ValueType.LONG.toString(), false, + false, 100, null, null, @@ -208,6 +212,7 @@ public static Collection dataFeeder() new ColumnAnalysis( ValueType.STRING.toString(), false, + false, 120, 1, DIM_VALUE, @@ -218,6 +223,7 @@ public static Collection dataFeeder() new ColumnAnalysis( ValueType.FLOAT.toString(), false, + false, 80, null, null, diff --git a/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataQueryQueryToolChestTest.java b/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataQueryQueryToolChestTest.java index f823937ef03b..752db63d9832 100644 --- a/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataQueryQueryToolChestTest.java +++ b/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataQueryQueryToolChestTest.java @@ -81,6 +81,7 @@ public void testCacheStrategy() throws Exception new ColumnAnalysis( ValueType.STRING.toString(), true, + false, 10881, 1, "preferred", diff --git a/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataQueryTest.java b/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataQueryTest.java index 9da06719d9c2..00846ebb5db5 100644 --- a/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataQueryTest.java +++ b/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataQueryTest.java @@ -208,6 +208,7 @@ public SegmentMetadataQueryTest( new ColumnAnalysis( ValueType.LONG.toString(), false, + false, 12090, null, null, @@ -218,6 +219,7 @@ public SegmentMetadataQueryTest( new ColumnAnalysis( ValueType.DOUBLE.toString(), false, + false, 9672, null, null, @@ -228,6 +230,7 @@ public SegmentMetadataQueryTest( new ColumnAnalysis( ValueType.STRING.toString(), false, + false, preferedSize1, 1, "preferred", @@ -249,6 +252,7 @@ public SegmentMetadataQueryTest( new ColumnAnalysis( ValueType.LONG.toString(), false, + false, 12090, null, null, @@ -259,6 +263,7 @@ public SegmentMetadataQueryTest( new ColumnAnalysis( ValueType.DOUBLE.toString(), false, + false, 9672, null, null, @@ -269,6 +274,7 @@ public SegmentMetadataQueryTest( new ColumnAnalysis( ValueType.STRING.toString(), false, + false, placementSize2, 1, null, @@ -305,6 +311,7 @@ public void testSegmentMetadataQueryWithRollupMerge() new ColumnAnalysis( ValueType.STRING.toString(), false, + false, 0, 0, null, @@ -315,6 +322,7 @@ public void testSegmentMetadataQueryWithRollupMerge() new ColumnAnalysis( ValueType.STRING.toString(), true, + false, 0, 0, null, @@ -373,6 +381,7 @@ public void testSegmentMetadataQueryWithHasMultipleValuesMerge() new ColumnAnalysis( ValueType.STRING.toString(), false, + false, 0, 1, null, @@ -383,6 +392,7 @@ public void testSegmentMetadataQueryWithHasMultipleValuesMerge() new ColumnAnalysis( ValueType.STRING.toString(), true, + false, 0, 9, null, @@ -441,6 +451,7 @@ public void testSegmentMetadataQueryWithComplexColumnMerge() new ColumnAnalysis( ValueType.STRING.toString(), false, + false, 0, 1, null, @@ -451,6 +462,7 @@ public void testSegmentMetadataQueryWithComplexColumnMerge() new ColumnAnalysis( "hyperUnique", false, + true, 0, null, null, @@ -510,6 +522,7 @@ public void testSegmentMetadataQueryWithDefaultAnalysisMerge() ColumnAnalysis analysis = new ColumnAnalysis( ValueType.STRING.toString(), false, + false, size1 + size2, 1, "preferred", @@ -531,6 +544,7 @@ public void testSegmentMetadataQueryWithDefaultAnalysisMerge2() ColumnAnalysis analysis = new ColumnAnalysis( ValueType.STRING.toString(), false, + false, size1 + size2, 3, "spot", @@ -552,6 +566,7 @@ public void testSegmentMetadataQueryWithDefaultAnalysisMerge3() ColumnAnalysis analysis = new ColumnAnalysis( ValueType.STRING.toString(), false, + false, size1 + size2, 9, "automotive", @@ -574,6 +589,7 @@ private void testSegmentMetadataQueryWithDefaultAnalysisMerge( new ColumnAnalysis( ValueType.LONG.toString(), false, + false, 12090 * 2, null, null, @@ -584,6 +600,7 @@ private void testSegmentMetadataQueryWithDefaultAnalysisMerge( new ColumnAnalysis( ValueType.DOUBLE.toString(), false, + false, 9672 * 2, null, null, @@ -638,6 +655,7 @@ public void testSegmentMetadataQueryWithNoAnalysisTypesMerge() new ColumnAnalysis( ValueType.STRING.toString(), false, + false, 0, 0, null, @@ -700,6 +718,7 @@ public void testSegmentMetadataQueryWithAggregatorsMerge() new ColumnAnalysis( ValueType.STRING.toString(), false, + false, 0, 0, null, @@ -758,6 +777,7 @@ public void testSegmentMetadataQueryWithTimestampSpecMerge() new ColumnAnalysis( ValueType.STRING.toString(), false, + false, 0, 0, null, @@ -816,6 +836,7 @@ public void testSegmentMetadataQueryWithQueryGranularityMerge() new ColumnAnalysis( ValueType.STRING.toString(), false, + false, 0, 0, null, @@ -1273,4 +1294,52 @@ public void testAnanlysisTypesBeingSet() Assert.assertEquals(analysisWCfg2, expectedAnalysisWCfg2); } + @Test + public void testLongNullableColumn() + { + ColumnAnalysis analysis = new ColumnAnalysis( + ValueType.LONG.toString(), + false, + NullHandling.replaceWithDefault() ? false : true, + 19344, + null, + null, + null, + null + ); + testSegmentMetadataQueryWithDefaultAnalysisMerge("longNumericNull", analysis); + } + + @Test + public void testDoubleNullableColumn() + { + ColumnAnalysis analysis = new ColumnAnalysis( + ValueType.DOUBLE.toString(), + false, + NullHandling.replaceWithDefault() ? false : true, + 19344, + null, + null, + null, + null + ); + testSegmentMetadataQueryWithDefaultAnalysisMerge("doubleNumericNull", analysis); + } + + + @Test + public void testFloatNullableColumn() + { + ColumnAnalysis analysis = new ColumnAnalysis( + ValueType.FLOAT.toString(), + false, + NullHandling.replaceWithDefault() ? false : true, + 19344, + null, + null, + null, + null + ); + testSegmentMetadataQueryWithDefaultAnalysisMerge("floatNumericNull", analysis); + } } diff --git a/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataUnionQueryTest.java b/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataUnionQueryTest.java index 55d8b18babd3..2680d4b30e25 100644 --- a/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataUnionQueryTest.java +++ b/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataUnionQueryTest.java @@ -103,6 +103,7 @@ public void testSegmentMetadataUnionQuery() new ColumnAnalysis( ValueType.STRING.toString(), false, + false, mmap ? 43524 : 43056, 1, "preferred", diff --git a/processing/src/test/java/org/apache/druid/query/metadata/metadata/ColumnAnalysisTest.java b/processing/src/test/java/org/apache/druid/query/metadata/metadata/ColumnAnalysisTest.java index ccf3363737aa..798d67a06fb1 100644 --- a/processing/src/test/java/org/apache/druid/query/metadata/metadata/ColumnAnalysisTest.java +++ b/processing/src/test/java/org/apache/druid/query/metadata/metadata/ColumnAnalysisTest.java @@ -36,13 +36,13 @@ private void assertSerDe(ColumnAnalysis analysis) throws Exception @Test public void testFoldStringColumns() throws Exception { - final ColumnAnalysis analysis1 = new ColumnAnalysis("STRING", false, 1L, 2, "aaA", "Zzz", null); - final ColumnAnalysis analysis2 = new ColumnAnalysis("STRING", true, 3L, 4, "aAA", "ZZz", null); + final ColumnAnalysis analysis1 = new ColumnAnalysis("STRING", false, false, 1L, 2, "aaA", "Zzz", null); + final ColumnAnalysis analysis2 = new ColumnAnalysis("STRING", true, false, 3L, 4, "aAA", "ZZz", null); assertSerDe(analysis1); assertSerDe(analysis2); - final ColumnAnalysis expected = new ColumnAnalysis("STRING", true, 4L, 4, "aAA", "Zzz", null); + final ColumnAnalysis expected = new ColumnAnalysis("STRING", true, false, 4L, 4, "aAA", "Zzz", null); ColumnAnalysis fold1 = analysis1.fold(analysis2); ColumnAnalysis fold2 = analysis2.fold(analysis1); @@ -56,7 +56,7 @@ public void testFoldStringColumns() throws Exception @Test public void testFoldWithNull() throws Exception { - final ColumnAnalysis analysis1 = new ColumnAnalysis("STRING", false, 1L, 2, null, null, null); + final ColumnAnalysis analysis1 = new ColumnAnalysis("STRING", false, false, 1L, 2, null, null, null); Assert.assertEquals(analysis1, analysis1.fold(null)); assertSerDe(analysis1); } @@ -64,13 +64,13 @@ public void testFoldWithNull() throws Exception @Test public void testFoldComplexColumns() throws Exception { - final ColumnAnalysis analysis1 = new ColumnAnalysis("hyperUnique", false, 0L, null, null, null, null); - final ColumnAnalysis analysis2 = new ColumnAnalysis("hyperUnique", false, 0L, null, null, null, null); + final ColumnAnalysis analysis1 = new ColumnAnalysis("hyperUnique", false, false, 0L, null, null, null, null); + final ColumnAnalysis analysis2 = new ColumnAnalysis("hyperUnique", false, false, 0L, null, null, null, null); assertSerDe(analysis1); assertSerDe(analysis2); - final ColumnAnalysis expected = new ColumnAnalysis("hyperUnique", false, 0L, null, null, null, null); + final ColumnAnalysis expected = new ColumnAnalysis("hyperUnique", false, false, 0L, null, null, null, null); ColumnAnalysis fold1 = analysis1.fold(analysis2); ColumnAnalysis fold2 = analysis2.fold(analysis1); @@ -84,8 +84,8 @@ public void testFoldComplexColumns() throws Exception @Test public void testFoldDifferentTypes() throws Exception { - final ColumnAnalysis analysis1 = new ColumnAnalysis("hyperUnique", false, 1L, 1, null, null, null); - final ColumnAnalysis analysis2 = new ColumnAnalysis("COMPLEX", false, 2L, 2, null, null, null); + final ColumnAnalysis analysis1 = new ColumnAnalysis("hyperUnique", false, false, 1L, 1, null, null, null); + final ColumnAnalysis analysis2 = new ColumnAnalysis("COMPLEX", false, false, 2L, 2, null, null, null); assertSerDe(analysis1); assertSerDe(analysis2); @@ -93,6 +93,7 @@ public void testFoldDifferentTypes() throws Exception final ColumnAnalysis expected = new ColumnAnalysis( "STRING", false, + false, -1L, null, null, @@ -117,7 +118,7 @@ public void testFoldSameErrors() throws Exception assertSerDe(analysis1); assertSerDe(analysis2); - final ColumnAnalysis expected = new ColumnAnalysis("STRING", false, -1L, null, null, null, "error:foo"); + final ColumnAnalysis expected = new ColumnAnalysis("STRING", false, false, -1L, null, null, null, "error:foo"); ColumnAnalysis fold1 = analysis1.fold(analysis2); ColumnAnalysis fold2 = analysis2.fold(analysis1); Assert.assertEquals(expected, fold1); @@ -131,12 +132,12 @@ public void testFoldSameErrors() throws Exception public void testFoldErrorAndNoError() throws Exception { final ColumnAnalysis analysis1 = ColumnAnalysis.error("foo"); - final ColumnAnalysis analysis2 = new ColumnAnalysis("STRING", false, 2L, 2, "a", "z", null); + final ColumnAnalysis analysis2 = new ColumnAnalysis("STRING", false, false, 2L, 2, "a", "z", null); assertSerDe(analysis1); assertSerDe(analysis2); - final ColumnAnalysis expected = new ColumnAnalysis("STRING", false, -1L, null, null, null, "error:foo"); + final ColumnAnalysis expected = new ColumnAnalysis("STRING", false, false, -1L, null, null, null, "error:foo"); ColumnAnalysis fold1 = analysis1.fold(analysis2); ColumnAnalysis fold2 = analysis2.fold(analysis1); Assert.assertEquals(expected, fold1); @@ -155,7 +156,7 @@ public void testFoldDifferentErrors() throws Exception assertSerDe(analysis1); assertSerDe(analysis2); - final ColumnAnalysis expected = new ColumnAnalysis("STRING", false, -1L, null, null, null, "error:multiple_errors"); + final ColumnAnalysis expected = new ColumnAnalysis("STRING", false, false, -1L, null, null, null, "error:multiple_errors"); ColumnAnalysis fold1 = analysis1.fold(analysis2); ColumnAnalysis fold2 = analysis2.fold(analysis1); Assert.assertEquals(expected, fold1); diff --git a/processing/src/test/java/org/apache/druid/segment/QueryableIndexColumnCapabilitiesTest.java b/processing/src/test/java/org/apache/druid/segment/QueryableIndexColumnCapabilitiesTest.java index 57689a46cb0a..cdfc1b149781 100644 --- a/processing/src/test/java/org/apache/druid/segment/QueryableIndexColumnCapabilitiesTest.java +++ b/processing/src/test/java/org/apache/druid/segment/QueryableIndexColumnCapabilitiesTest.java @@ -22,6 +22,7 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Iterables; +import org.apache.druid.common.config.NullHandling; import org.apache.druid.data.input.InputRow; import org.apache.druid.data.input.impl.DimensionSchema; import org.apache.druid.data.input.impl.DimensionsSpec; @@ -32,6 +33,7 @@ import org.apache.druid.data.input.impl.TimeAndDimsParseSpec; import org.apache.druid.data.input.impl.TimestampSpec; import org.apache.druid.java.util.common.DateTimes; +import org.apache.druid.query.aggregation.AggregatorFactory; import org.apache.druid.query.aggregation.CountAggregatorFactory; import org.apache.druid.query.aggregation.DoubleSumAggregatorFactory; import org.apache.druid.query.aggregation.FloatSumAggregatorFactory; @@ -53,6 +55,7 @@ import java.io.IOException; import java.util.ArrayList; +import java.util.HashMap; import java.util.List; import java.util.Map; @@ -63,11 +66,12 @@ public class QueryableIndexColumnCapabilitiesTest extends InitializedNullHandlin private static IncrementalIndex INC_INDEX; private static QueryableIndex MMAP_INDEX; + private static IncrementalIndex INC_INDEX_WITH_NULLS; + private static QueryableIndex MMAP_INDEX_WITH_NULLS; @BeforeClass public static void setup() throws IOException { - List rows = new ArrayList<>(); MapInputRowParser parser = new MapInputRowParser( new TimeAndDimsParseSpec( new TimestampSpec("time", "auto", null), @@ -83,6 +87,14 @@ public static void setup() throws IOException ) ) ); + AggregatorFactory[] metricsSpecs = new AggregatorFactory[] { + new CountAggregatorFactory("cnt"), + new DoubleSumAggregatorFactory("m1", "d3"), + new FloatSumAggregatorFactory("m2", "d4"), + new LongSumAggregatorFactory("m3", "d5"), + new HyperUniquesAggregatorFactory("m4", "d1") + }; + List rows = new ArrayList<>(); Map event = ImmutableMap.builder().put("time", DateTimes.nowUtc().getMillis()) .put("d1", "some string") @@ -92,17 +104,12 @@ public static void setup() throws IOException .put("d5", 10L) .build(); rows.add(Iterables.getOnlyElement(parser.parseBatch(event))); + IndexBuilder builder = IndexBuilder.create() .rows(rows) .schema( new IncrementalIndexSchema.Builder() - .withMetrics( - new CountAggregatorFactory("cnt"), - new DoubleSumAggregatorFactory("m1", "d3"), - new FloatSumAggregatorFactory("m2", "d4"), - new LongSumAggregatorFactory("m3", "d5"), - new HyperUniquesAggregatorFactory("m4", "d1") - ) + .withMetrics(metricsSpecs) .withDimensionsSpec(parser) .withRollup(false) .build() @@ -110,6 +117,32 @@ public static void setup() throws IOException .tmpDir(temporaryFolder.newFolder()); INC_INDEX = builder.buildIncrementalIndex(); MMAP_INDEX = builder.buildMMappedIndex(); + + List rowsWithNulls = new ArrayList<>(); + rowsWithNulls.add(Iterables.getOnlyElement(parser.parseBatch(event))); + + Map eventWithNulls = new HashMap<>(); + eventWithNulls.put("time", DateTimes.nowUtc().getMillis()); + eventWithNulls.put("d1", null); + eventWithNulls.put("d2", ImmutableList.of()); + eventWithNulls.put("d3", null); + eventWithNulls.put("d4", null); + eventWithNulls.put("d5", null); + + rowsWithNulls.add(Iterables.getOnlyElement(parser.parseBatch(eventWithNulls))); + + IndexBuilder builderWithNulls = IndexBuilder.create() + .rows(rowsWithNulls) + .schema( + new IncrementalIndexSchema.Builder() + .withMetrics(metricsSpecs) + .withDimensionsSpec(parser) + .withRollup(false) + .build() + ) + .tmpDir(temporaryFolder.newFolder()); + INC_INDEX_WITH_NULLS = builderWithNulls.buildIncrementalIndex(); + MMAP_INDEX_WITH_NULLS = builderWithNulls.buildMMappedIndex(); } @AfterClass @@ -117,6 +150,8 @@ public static void teardown() { INC_INDEX.close(); MMAP_INDEX.close(); + INC_INDEX_WITH_NULLS.close(); + MMAP_INDEX_WITH_NULLS.close(); } @Test @@ -144,6 +179,53 @@ public void testNumericColumns() assertNonStringColumnCapabilities(MMAP_INDEX.getColumnHolder("m3").getCapabilities(), ValueType.LONG); } + @Test + public void testNumericColumnsWithNulls() + { + // incremental index + // time does not have nulls + assertNonStringColumnCapabilities( + INC_INDEX_WITH_NULLS.getCapabilities(ColumnHolder.TIME_COLUMN_NAME), + ValueType.LONG + ); + assertNonStringColumnCapabilitiesWithNulls(INC_INDEX_WITH_NULLS.getCapabilities("d3"), ValueType.DOUBLE); + assertNonStringColumnCapabilitiesWithNulls(INC_INDEX_WITH_NULLS.getCapabilities("d4"), ValueType.FLOAT); + assertNonStringColumnCapabilitiesWithNulls(INC_INDEX_WITH_NULLS.getCapabilities("d5"), ValueType.LONG); + assertNonStringColumnCapabilitiesWithNulls(INC_INDEX_WITH_NULLS.getCapabilities("m1"), ValueType.DOUBLE); + assertNonStringColumnCapabilitiesWithNulls(INC_INDEX_WITH_NULLS.getCapabilities("m2"), ValueType.FLOAT); + assertNonStringColumnCapabilitiesWithNulls(INC_INDEX_WITH_NULLS.getCapabilities("m3"), ValueType.LONG); + + // segment index + assertNonStringColumnCapabilities( + MMAP_INDEX_WITH_NULLS.getColumnHolder(ColumnHolder.TIME_COLUMN_NAME).getCapabilities(), + ValueType.LONG + ); + assertNonStringColumnCapabilitiesWithNulls( + MMAP_INDEX_WITH_NULLS.getColumnHolder("d3").getCapabilities(), + ValueType.DOUBLE + ); + assertNonStringColumnCapabilitiesWithNulls( + MMAP_INDEX_WITH_NULLS.getColumnHolder("d4").getCapabilities(), + ValueType.FLOAT + ); + assertNonStringColumnCapabilitiesWithNulls( + MMAP_INDEX_WITH_NULLS.getColumnHolder("d5").getCapabilities(), + ValueType.LONG + ); + assertNonStringColumnCapabilitiesWithNulls( + MMAP_INDEX_WITH_NULLS.getColumnHolder("m1").getCapabilities(), + ValueType.DOUBLE + ); + assertNonStringColumnCapabilitiesWithNulls( + MMAP_INDEX_WITH_NULLS.getColumnHolder("m2").getCapabilities(), + ValueType.FLOAT + ); + assertNonStringColumnCapabilitiesWithNulls( + MMAP_INDEX_WITH_NULLS.getColumnHolder("m3").getCapabilities(), + ValueType.LONG + ); + } + @Test public void testStringColumn() { @@ -159,11 +241,13 @@ public void testStringColumn() // at index merge or query time we 'complete' the capabilities to take a snapshot of the current state, // coercing any 'UNKNOWN' values to false Assert.assertFalse( - ColumnCapabilitiesImpl.snapshot(caps, IndexMergerV9.DIMENSION_CAPABILITY_MERGE_LOGIC) - .hasMultipleValues() - .isMaybeTrue() + ColumnCapabilitiesImpl.snapshot( + caps, + IndexMergerV9.DIMENSION_CAPABILITY_MERGE_LOGIC + ).hasMultipleValues().isMaybeTrue() ); Assert.assertFalse(caps.hasSpatialIndexes()); + Assert.assertTrue(caps.hasNulls().isUnknown()); caps = MMAP_INDEX.getColumnHolder("d1").getCapabilities(); Assert.assertEquals(ValueType.STRING, caps.getType()); @@ -173,6 +257,41 @@ public void testStringColumn() Assert.assertTrue(caps.areDictionaryValuesUnique().isTrue()); Assert.assertFalse(caps.hasMultipleValues().isMaybeTrue()); Assert.assertFalse(caps.hasSpatialIndexes()); + Assert.assertFalse(caps.hasNulls().isMaybeTrue()); + } + + + @Test + public void testStringColumnWithNulls() + { + ColumnCapabilities caps = INC_INDEX_WITH_NULLS.getCapabilities("d1"); + Assert.assertEquals(ValueType.STRING, caps.getType()); + Assert.assertTrue(caps.hasBitmapIndexes()); + Assert.assertTrue(caps.isDictionaryEncoded().isTrue()); + Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue()); + Assert.assertTrue(caps.areDictionaryValuesUnique().isTrue()); + // multi-value is unknown unless explicitly set to 'true' + Assert.assertTrue(caps.hasMultipleValues().isUnknown()); + // at index merge or query time we 'complete' the capabilities to take a snapshot of the current state, + // coercing any 'UNKNOWN' values to false + Assert.assertFalse( + ColumnCapabilitiesImpl.snapshot( + caps, + IndexMergerV9.DIMENSION_CAPABILITY_MERGE_LOGIC + ).hasMultipleValues().isMaybeTrue() + ); + Assert.assertFalse(caps.hasSpatialIndexes()); + Assert.assertTrue(caps.hasNulls().isTrue()); + + caps = MMAP_INDEX_WITH_NULLS.getColumnHolder("d1").getCapabilities(); + Assert.assertEquals(ValueType.STRING, caps.getType()); + Assert.assertTrue(caps.hasBitmapIndexes()); + Assert.assertTrue(caps.isDictionaryEncoded().isTrue()); + Assert.assertTrue(caps.areDictionaryValuesSorted().isTrue()); + Assert.assertTrue(caps.areDictionaryValuesUnique().isTrue()); + Assert.assertFalse(caps.hasMultipleValues().isMaybeTrue()); + Assert.assertFalse(caps.hasSpatialIndexes()); + Assert.assertTrue(caps.hasNulls().isTrue()); } @Test @@ -186,6 +305,7 @@ public void testMultiStringColumn() Assert.assertTrue(caps.areDictionaryValuesUnique().isTrue()); Assert.assertTrue(caps.hasMultipleValues().isTrue()); Assert.assertFalse(caps.hasSpatialIndexes()); + Assert.assertTrue(caps.hasNulls().isUnknown()); caps = MMAP_INDEX.getColumnHolder("d2").getCapabilities(); Assert.assertEquals(ValueType.STRING, caps.getType()); @@ -195,13 +315,54 @@ public void testMultiStringColumn() Assert.assertTrue(caps.areDictionaryValuesUnique().isTrue()); Assert.assertTrue(caps.hasMultipleValues().isTrue()); Assert.assertFalse(caps.hasSpatialIndexes()); + Assert.assertFalse(caps.hasNulls().isMaybeTrue()); + } + + + @Test + public void testMultiStringColumnWithNulls() + { + ColumnCapabilities caps = INC_INDEX_WITH_NULLS.getCapabilities("d2"); + Assert.assertEquals(ValueType.STRING, caps.getType()); + Assert.assertTrue(caps.hasBitmapIndexes()); + Assert.assertTrue(caps.isDictionaryEncoded().isTrue()); + Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue()); + Assert.assertTrue(caps.areDictionaryValuesUnique().isTrue()); + Assert.assertTrue(caps.hasMultipleValues().isTrue()); + Assert.assertFalse(caps.hasSpatialIndexes()); + Assert.assertTrue(caps.hasNulls().isTrue()); + + caps = MMAP_INDEX_WITH_NULLS.getColumnHolder("d2").getCapabilities(); + Assert.assertEquals(ValueType.STRING, caps.getType()); + Assert.assertTrue(caps.hasBitmapIndexes()); + Assert.assertTrue(caps.isDictionaryEncoded().isTrue()); + Assert.assertTrue(caps.areDictionaryValuesSorted().isTrue()); + Assert.assertTrue(caps.areDictionaryValuesUnique().isTrue()); + Assert.assertTrue(caps.hasMultipleValues().isTrue()); + Assert.assertFalse(caps.hasSpatialIndexes()); + Assert.assertTrue(caps.hasNulls().isTrue()); } @Test public void testComplexColumn() { - assertNonStringColumnCapabilities(INC_INDEX.getCapabilities("m4"), ValueType.COMPLEX); - assertNonStringColumnCapabilities(MMAP_INDEX.getColumnHolder("m4").getCapabilities(), ValueType.COMPLEX); + assertComplexColumnCapabilites(INC_INDEX.getCapabilities("m4")); + assertComplexColumnCapabilites(MMAP_INDEX.getColumnHolder("m4").getCapabilities()); + // results for this complex aren't different, we only know that nullability is unknown + assertComplexColumnCapabilites(INC_INDEX_WITH_NULLS.getCapabilities("m4")); + assertComplexColumnCapabilites(MMAP_INDEX_WITH_NULLS.getColumnHolder("m4").getCapabilities()); + } + + private void assertComplexColumnCapabilites(ColumnCapabilities caps) + { + Assert.assertEquals(ValueType.COMPLEX, caps.getType()); + Assert.assertFalse(caps.hasBitmapIndexes()); + Assert.assertFalse(caps.isDictionaryEncoded().isTrue()); + Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue()); + Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue()); + Assert.assertFalse(caps.hasSpatialIndexes()); + Assert.assertFalse(caps.hasMultipleValues().isUnknown()); + Assert.assertTrue(caps.hasNulls().isTrue()); } private void assertNonStringColumnCapabilities(ColumnCapabilities caps, ValueType valueType) @@ -213,5 +374,19 @@ private void assertNonStringColumnCapabilities(ColumnCapabilities caps, ValueTyp Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue()); Assert.assertFalse(caps.hasMultipleValues().isMaybeTrue()); Assert.assertFalse(caps.hasSpatialIndexes()); + Assert.assertFalse(caps.hasNulls().isTrue()); + } + + private void assertNonStringColumnCapabilitiesWithNulls(ColumnCapabilities caps, ValueType valueType) + { + Assert.assertEquals(valueType, caps.getType()); + Assert.assertFalse(caps.hasBitmapIndexes()); + Assert.assertFalse(caps.isDictionaryEncoded().isTrue()); + Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue()); + Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue()); + Assert.assertFalse(caps.hasMultipleValues().isMaybeTrue()); + Assert.assertFalse(caps.hasSpatialIndexes()); + // check isMaybeTrue because incremental index uses Unknown + Assert.assertEquals(NullHandling.sqlCompatible(), caps.hasNulls().isMaybeTrue()); } } diff --git a/processing/src/test/java/org/apache/druid/segment/column/ColumnCapabilitiesImplTest.java b/processing/src/test/java/org/apache/druid/segment/column/ColumnCapabilitiesImplTest.java index ce98506dc7be..30af23fc80f8 100644 --- a/processing/src/test/java/org/apache/druid/segment/column/ColumnCapabilitiesImplTest.java +++ b/processing/src/test/java/org/apache/druid/segment/column/ColumnCapabilitiesImplTest.java @@ -37,6 +37,7 @@ public void testSerde() throws Exception .setHasMultipleValues(true) .setHasSpatialIndexes(true) .setType(ValueType.COMPLEX) + .setHasNulls(true) .setFilterable(true)); Assert.assertFalse(json.contains("filterable")); @@ -48,6 +49,8 @@ public void testSerde() throws Exception Assert.assertTrue(cc.hasSpatialIndexes()); Assert.assertTrue(cc.hasMultipleValues().isTrue()); Assert.assertTrue(cc.hasBitmapIndexes()); + // hasNulls and isFilterable are computed, these should not be set + Assert.assertFalse(cc.hasNulls().isTrue()); Assert.assertFalse(cc.isFilterable()); } @@ -61,6 +64,7 @@ public void testDeserialization() throws Exception + " \"hasSpatialIndexes\":true,\n" + " \"hasMultipleValues\":true,\n" + " \"hasBitmapIndexes\":true,\n" + + " \"hasNulls\":true,\n" + " \"filterable\":true\n" + "}"; @@ -72,6 +76,8 @@ public void testDeserialization() throws Exception Assert.assertTrue(cc.hasSpatialIndexes()); Assert.assertTrue(cc.hasMultipleValues().isTrue()); Assert.assertTrue(cc.hasBitmapIndexes()); + // hasNulls and isFilterable are computed, these should not be set + Assert.assertFalse(cc.hasNulls().isTrue()); Assert.assertFalse(cc.isFilterable()); } } diff --git a/processing/src/test/java/org/apache/druid/segment/filter/BaseFilterTest.java b/processing/src/test/java/org/apache/druid/segment/filter/BaseFilterTest.java index b5472fd870ff..931a794dbdae 100644 --- a/processing/src/test/java/org/apache/druid/segment/filter/BaseFilterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/filter/BaseFilterTest.java @@ -136,6 +136,18 @@ public abstract class BaseFilterTest extends InitializedNullHandlingTest ) ); + // missing 'dim3' because makeDefaultSchemaRow does not expect to set it... + static final RowSignature DEFAULT_ROW_SIGNATURE = + RowSignature.builder() + .add("dim0", ValueType.STRING) + .add("dim1", ValueType.STRING) + .add("dim2", ValueType.STRING) + .add("timeDim", ValueType.STRING) + .add("d0", ValueType.DOUBLE) + .add("f0", ValueType.FLOAT) + .add("l0", ValueType.LONG) + .build(); + static final List DEFAULT_ROWS = ImmutableList.of( makeDefaultSchemaRow("0", "", ImmutableList.of("a", "b"), "2017-07-25", 0.0, 0.0f, 0L), makeDefaultSchemaRow("1", "10", ImmutableList.of(), "2017-07-25", 10.1, 10.1f, 100L), @@ -151,25 +163,27 @@ public abstract class BaseFilterTest extends InitializedNullHandlingTest .build(); static InputRow makeDefaultSchemaRow( - @Nullable String dim0, - @Nullable String dim1, - @Nullable List dim2, - @Nullable String timeDim, - @Nullable Double d0, - @Nullable Float f0, - @Nullable Long l0 + @Nullable Object... elements + ) + { + return makeSchemaRow(DEFAULT_PARSER, DEFAULT_ROW_SIGNATURE, elements); + } + + + static InputRow makeSchemaRow( + final InputRowParser> parser, + final RowSignature signature, + @Nullable Object... elements ) { - // for row selector to work correctly as part of the test matrix, default value coercion needs to happen to columns - Map mapRow = Maps.newHashMapWithExpectedSize(6); - mapRow.put("dim0", NullHandling.nullToEmptyIfNeeded(dim0)); - mapRow.put("dim1", NullHandling.nullToEmptyIfNeeded(dim1)); - mapRow.put("dim2", dim2 != null ? dim2 : NullHandling.defaultStringValue()); - mapRow.put("timeDim", NullHandling.nullToEmptyIfNeeded(timeDim)); - mapRow.put("d0", d0 != null ? d0 : NullHandling.defaultDoubleValue()); - mapRow.put("f0", f0 != null ? f0 : NullHandling.defaultFloatValue()); - mapRow.put("l0", l0 != null ? l0 : NullHandling.defaultLongValue()); - return DEFAULT_PARSER.parseBatch(mapRow).get(0); + Preconditions.checkArgument(signature.size() == elements.length); + Map mapRow = Maps.newHashMapWithExpectedSize(signature.size()); + for (int i = 0; i < signature.size(); i++) { + final String columnName = signature.getColumnName(i); + final Object value = elements[i]; + mapRow.put(columnName, value); + } + return parser.parseBatch(mapRow).get(0); } @@ -184,6 +198,11 @@ static InputRow makeDefaultSchemaRow( protected final boolean optimize; protected final String testName; + // 'rowBasedWithoutTypeSignature' does not handle numeric null default values correctly, is equivalent to + // druid.generic.useDefaultValueForNull being set to false, regardless of how it is actually set. + // In other words, numeric null values will be treated as nulls instead of the default value + protected final boolean canTestNumericNullsAsDefaultValues; + protected StorageAdapter adapter; // JUnit creates a new test instance for every test method call. @@ -208,6 +227,8 @@ public BaseFilterTest( this.finisher = finisher; this.cnf = cnf; this.optimize = optimize; + this.canTestNumericNullsAsDefaultValues = + NullHandling.replaceWithDefault() && !testName.contains("finisher[rowBasedWithoutTypeSignature]"); } @Before diff --git a/processing/src/test/java/org/apache/druid/segment/filter/BoundFilterTest.java b/processing/src/test/java/org/apache/druid/segment/filter/BoundFilterTest.java index 12e79cfe8794..8263538e0386 100644 --- a/processing/src/test/java/org/apache/druid/segment/filter/BoundFilterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/filter/BoundFilterTest.java @@ -656,7 +656,7 @@ public void testNumericNullsAndZeros() null, StringComparators.NUMERIC ), - NullHandling.replaceWithDefault() ? ImmutableList.of("0", "2", "7") : ImmutableList.of("0") + canTestNumericNullsAsDefaultValues ? ImmutableList.of("0", "2", "7") : ImmutableList.of("0") ); assertFilterMatches( @@ -670,7 +670,7 @@ public void testNumericNullsAndZeros() null, StringComparators.NUMERIC ), - NullHandling.replaceWithDefault() ? ImmutableList.of("0", "4", "6") : ImmutableList.of("0") + canTestNumericNullsAsDefaultValues ? ImmutableList.of("0", "4", "6") : ImmutableList.of("0") ); assertFilterMatches( @@ -684,7 +684,9 @@ public void testNumericNullsAndZeros() null, StringComparators.NUMERIC ), - NullHandling.replaceWithDefault() ? ImmutableList.of("0", "3", "7") : ImmutableList.of("0") + NullHandling.replaceWithDefault() && canTestNumericNullsAsDefaultValues + ? ImmutableList.of("0", "3", "7") + : ImmutableList.of("0") ); } diff --git a/processing/src/test/java/org/apache/druid/segment/filter/JavaScriptFilterTest.java b/processing/src/test/java/org/apache/druid/segment/filter/JavaScriptFilterTest.java index dba2b6bb0751..40ccaa64dbb4 100644 --- a/processing/src/test/java/org/apache/druid/segment/filter/JavaScriptFilterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/filter/JavaScriptFilterTest.java @@ -215,7 +215,7 @@ public void testJavascriptFilterWithLookupExtractionFn() @Test public void testNumericNull() { - if (NullHandling.replaceWithDefault()) { + if (canTestNumericNullsAsDefaultValues) { assertFilterMatchesSkipVectorize(newJavaScriptDimFilter("f0", jsNullFilter, null), ImmutableList.of()); assertFilterMatchesSkipVectorize(newJavaScriptDimFilter("d0", jsNullFilter, null), ImmutableList.of()); assertFilterMatchesSkipVectorize(newJavaScriptDimFilter("l0", jsNullFilter, null), ImmutableList.of()); diff --git a/processing/src/test/java/org/apache/druid/segment/filter/SelectorFilterTest.java b/processing/src/test/java/org/apache/druid/segment/filter/SelectorFilterTest.java index 2b343fc79174..aa104cedd791 100644 --- a/processing/src/test/java/org/apache/druid/segment/filter/SelectorFilterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/filter/SelectorFilterTest.java @@ -312,7 +312,7 @@ public void testSelectorWithLookupExtractionFn() @Test public void testNumericColumnNullsAndDefaults() { - if (NullHandling.replaceWithDefault()) { + if (canTestNumericNullsAsDefaultValues) { assertFilterMatches(new SelectorDimFilter("f0", "0", null), ImmutableList.of("0", "4")); assertFilterMatches(new SelectorDimFilter("d0", "0", null), ImmutableList.of("0", "2")); assertFilterMatches(new SelectorDimFilter("l0", "0", null), ImmutableList.of("0", "3"));