From eec2767fbacdb004c7c0c3849cf3df301d83747b Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Tue, 21 Apr 2020 00:32:50 -0700 Subject: [PATCH 01/11] transition ColumnCapabilities.hasMultipleValues to Capable enum, remove ColumnCapabilities.isComplete --- .../epinephelinae/GroupByQueryEngineV2.java | 2 +- .../druid/query/metadata/SegmentAnalyzer.java | 12 +-- .../druid/segment/ColumnProcessors.java | 2 +- .../ColumnSelectorBitmapIndexSelector.java | 4 +- .../druid/segment/DimensionHandlerUtils.java | 5 +- .../apache/druid/segment/IndexMergerV9.java | 12 +-- .../RowBasedColumnSelectorFactory.java | 9 +- .../segment/StringDimensionMergerV9.java | 4 +- .../druid/segment/column/ColumnBuilder.java | 1 - .../segment/column/ColumnCapabilities.java | 55 ++++++++---- .../column/ColumnCapabilitiesImpl.java | 84 +++++++++---------- .../apache/druid/segment/filter/Filters.java | 2 +- .../segment/incremental/IncrementalIndex.java | 44 +++++----- .../IndexedTableColumnSelectorFactory.java | 3 +- ...yableIndexVectorColumnSelectorFactory.java | 4 +- .../segment/virtual/ExpressionSelectors.java | 11 ++- .../virtual/ExpressionVirtualColumn.java | 8 +- .../GroupByQueryEngineV2Test.java | 18 ++-- .../druid/query/lookup/LookupSegmentTest.java | 6 +- .../QueryableIndexColumnCapabilitiesTest.java | 15 ++-- .../RowBasedColumnSelectorFactoryTest.java | 18 ++-- .../segment/RowBasedStorageAdapterTest.java | 15 ++-- .../column/ColumnCapabilitiesImplTest.java | 4 +- .../join/table/IndexedTableJoinableTest.java | 6 +- .../virtual/ExpressionVirtualColumnTest.java | 11 ++- .../segment/virtual/VirtualColumnsTest.java | 2 +- 26 files changed, 173 insertions(+), 184 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java index 823f5f250a82..7abe9505ba0c 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java @@ -333,7 +333,7 @@ public static boolean isAllSingleValueDims( // Now check column capabilities. final ColumnCapabilities columnCapabilities = capabilitiesFunction.apply(dimension.getDimension()); - return columnCapabilities == null || !columnCapabilities.hasMultipleValues(); + return columnCapabilities == null || !columnCapabilities.hasMultipleValues().isMaybeTrue(); }); } diff --git a/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java b/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java index c5f1800acd15..9f8954dc5098 100644 --- a/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java +++ b/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java @@ -138,7 +138,7 @@ public Map analyze(Segment segment) // Add time column too ColumnCapabilities timeCapabilities = storageAdapter.getColumnCapabilities(ColumnHolder.TIME_COLUMN_NAME); if (timeCapabilities == null) { - timeCapabilities = new ColumnCapabilitiesImpl().setType(ValueType.LONG).setHasMultipleValues(false); + timeCapabilities = ColumnCapabilitiesImpl.createSimpleNumericColumn(ValueType.LONG); } columns.put( ColumnHolder.TIME_COLUMN_NAME, @@ -172,7 +172,7 @@ private ColumnAnalysis analyzeNumericColumn( long size = 0; if (analyzingSize()) { - if (capabilities.hasMultipleValues()) { + if (capabilities.hasMultipleValues().isTrue()) { return ColumnAnalysis.error("multi_value"); } @@ -181,7 +181,7 @@ private ColumnAnalysis analyzeNumericColumn( return new ColumnAnalysis( capabilities.getType().name(), - capabilities.hasMultipleValues(), + capabilities.hasMultipleValues().isTrue(), size, null, null, @@ -231,7 +231,7 @@ private ColumnAnalysis analyzeStringColumn( return new ColumnAnalysis( capabilities.getType().name(), - capabilities.hasMultipleValues(), + capabilities.hasMultipleValues().isTrue(), size, analyzingCardinality() ? cardinality : 0, min, @@ -308,7 +308,7 @@ public Long accumulate(Long accumulated, Cursor cursor) return new ColumnAnalysis( capabilities.getType().name(), - capabilities.hasMultipleValues(), + capabilities.hasMultipleValues().isTrue(), size, cardinality, min, @@ -324,7 +324,7 @@ private ColumnAnalysis analyzeComplexColumn( ) { try (final ComplexColumn complexColumn = columnHolder != null ? (ComplexColumn) columnHolder.getColumn() : null) { - final boolean hasMultipleValues = capabilities != null && capabilities.hasMultipleValues(); + final boolean hasMultipleValues = capabilities != null && capabilities.hasMultipleValues().isTrue(); long size = 0; if (analyzingSize() && complexColumn != null) { diff --git a/processing/src/main/java/org/apache/druid/segment/ColumnProcessors.java b/processing/src/main/java/org/apache/druid/segment/ColumnProcessors.java index 5fb698a484e4..05e85fdd4c24 100644 --- a/processing/src/main/java/org/apache/druid/segment/ColumnProcessors.java +++ b/processing/src/main/java/org/apache/druid/segment/ColumnProcessors.java @@ -197,6 +197,6 @@ private static T makeProcessorInternal( */ private static boolean mayBeMultiValue(@Nullable final ColumnCapabilities capabilities) { - return capabilities == null || !capabilities.isComplete() || capabilities.hasMultipleValues(); + return capabilities == null || capabilities.hasMultipleValues().isMaybeTrue(); } } diff --git a/processing/src/main/java/org/apache/druid/segment/ColumnSelectorBitmapIndexSelector.java b/processing/src/main/java/org/apache/druid/segment/ColumnSelectorBitmapIndexSelector.java index 79b6e8900745..d0d5f3ed5180 100644 --- a/processing/src/main/java/org/apache/druid/segment/ColumnSelectorBitmapIndexSelector.java +++ b/processing/src/main/java/org/apache/druid/segment/ColumnSelectorBitmapIndexSelector.java @@ -160,11 +160,11 @@ public void close() throws IOException public boolean hasMultipleValues(final String dimension) { if (isVirtualColumn(dimension)) { - return virtualColumns.getVirtualColumn(dimension).capabilities(dimension).hasMultipleValues(); + return virtualColumns.getVirtualColumn(dimension).capabilities(dimension).hasMultipleValues().isMaybeTrue(); } final ColumnHolder columnHolder = index.getColumnHolder(dimension); - return columnHolder != null && columnHolder.getCapabilities().hasMultipleValues(); + return columnHolder != null && columnHolder.getCapabilities().hasMultipleValues().isMaybeTrue(); } @Override diff --git a/processing/src/main/java/org/apache/druid/segment/DimensionHandlerUtils.java b/processing/src/main/java/org/apache/druid/segment/DimensionHandlerUtils.java index 0c8b3c280c6e..4409e3046c73 100644 --- a/processing/src/main/java/org/apache/druid/segment/DimensionHandlerUtils.java +++ b/processing/src/main/java/org/apache/druid/segment/DimensionHandlerUtils.java @@ -60,7 +60,8 @@ public final class DimensionHandlerUtils .setDictionaryEncoded(false) .setDictionaryValuesUnique(false) .setDictionaryValuesSorted(false) - .setHasBitmapIndexes(false); + .setHasBitmapIndexes(false) + .setHasMultipleValues(false); private DimensionHandlerUtils() { @@ -297,7 +298,7 @@ public static T makeVectorProcessor( final ValueType type = capabilities.getType(); if (type == ValueType.STRING) { - if (capabilities.hasMultipleValues()) { + if (capabilities.hasMultipleValues().isMaybeTrue()) { return strategyFactory.makeMultiValueDimensionProcessor( selectorFactory.makeMultiValueDimensionSelector(dimensionSpec) ); diff --git a/processing/src/main/java/org/apache/druid/segment/IndexMergerV9.java b/processing/src/main/java/org/apache/druid/segment/IndexMergerV9.java index e1a92f7d1a36..82fbc67c2a92 100644 --- a/processing/src/main/java/org/apache/druid/segment/IndexMergerV9.java +++ b/processing/src/main/java/org/apache/druid/segment/IndexMergerV9.java @@ -159,7 +159,7 @@ private File makeIndexFiles( progress.progress(); final Map metricsValueTypes = new TreeMap<>(Comparators.naturalNullsFirst()); final Map metricTypeNames = new TreeMap<>(Comparators.naturalNullsFirst()); - final List dimCapabilities = Lists.newArrayListWithCapacity(mergedDimensions.size()); + final List dimCapabilities = Lists.newArrayListWithCapacity(mergedDimensions.size()); mergeCapabilities(adapters, mergedDimensions, metricsValueTypes, metricTypeNames, dimCapabilities); final Map handlers = makeDimensionHandlers(mergedDimensions, dimCapabilities); @@ -710,18 +710,18 @@ private void mergeCapabilities( final List mergedDimensions, final Map metricsValueTypes, final Map metricTypeNames, - final List dimCapabilities + final List dimCapabilities ) { - final Map capabilitiesMap = new HashMap<>(); + final Map capabilitiesMap = new HashMap<>(); for (IndexableAdapter adapter : adapters) { for (String dimension : adapter.getDimensionNames()) { ColumnCapabilities capabilities = adapter.getCapabilities(dimension); - capabilitiesMap.computeIfAbsent(dimension, d -> new ColumnCapabilitiesImpl().setIsComplete(true)).merge(capabilities); + capabilitiesMap.computeIfAbsent(dimension, d -> ColumnCapabilitiesImpl.complete(capabilities, false)); } for (String metric : adapter.getMetricNames()) { ColumnCapabilities capabilities = adapter.getCapabilities(metric); - capabilitiesMap.computeIfAbsent(metric, m -> new ColumnCapabilitiesImpl().setIsComplete(true)).merge(capabilities); + capabilitiesMap.computeIfAbsent(metric, m -> ColumnCapabilitiesImpl.complete(capabilities, false)); metricsValueTypes.put(metric, capabilities.getType()); metricTypeNames.put(metric, adapter.getMetricType(metric)); } @@ -996,7 +996,7 @@ public File append( private Map makeDimensionHandlers( final List mergedDimensions, - final List dimCapabilities + final List dimCapabilities ) { Map handlers = new LinkedHashMap<>(); diff --git a/processing/src/main/java/org/apache/druid/segment/RowBasedColumnSelectorFactory.java b/processing/src/main/java/org/apache/druid/segment/RowBasedColumnSelectorFactory.java index 7b21d1f0186c..2b20b0d2709e 100644 --- a/processing/src/main/java/org/apache/druid/segment/RowBasedColumnSelectorFactory.java +++ b/processing/src/main/java/org/apache/druid/segment/RowBasedColumnSelectorFactory.java @@ -96,7 +96,7 @@ static ColumnCapabilities getColumnCapabilities( { if (ColumnHolder.TIME_COLUMN_NAME.equals(columnName)) { // TIME_COLUMN_NAME is handled specially; override the provided rowSignature. - return new ColumnCapabilitiesImpl().setType(ValueType.LONG).setIsComplete(true); + return ColumnCapabilitiesImpl.createSimpleNumericColumn(ValueType.LONG); } else { final ValueType valueType = rowSignature.getColumnType(columnName).orElse(null); @@ -105,12 +105,13 @@ static ColumnCapabilities getColumnCapabilities( // causes expression selectors to always treat us as arrays. If we might have multiple values (i.e. if our type // is nonnumeric), set isComplete false to compensate. if (valueType != null) { + if (valueType.isNumeric()) { + return ColumnCapabilitiesImpl.createSimpleNumericColumn(valueType); + } return new ColumnCapabilitiesImpl() .setType(valueType) .setDictionaryValuesUnique(false) - .setDictionaryValuesSorted(false) - // Numeric types should be reported as complete, but not STRING or COMPLEX (because we don't have full info) - .setIsComplete(valueType.isNumeric()); + .setDictionaryValuesSorted(false); } else { return null; } diff --git a/processing/src/main/java/org/apache/druid/segment/StringDimensionMergerV9.java b/processing/src/main/java/org/apache/druid/segment/StringDimensionMergerV9.java index cff7ef99f805..abb4637dc7b8 100644 --- a/processing/src/main/java/org/apache/druid/segment/StringDimensionMergerV9.java +++ b/processing/src/main/java/org/apache/druid/segment/StringDimensionMergerV9.java @@ -221,7 +221,7 @@ protected void setupEncodedValueWriter() throws IOException final CompressionStrategy compressionStrategy = indexSpec.getDimensionCompression(); String filenameBase = StringUtils.format("%s.forward_dim", dimensionName); - if (capabilities.hasMultipleValues()) { + if (capabilities.hasMultipleValues().isTrue()) { if (compressionStrategy != CompressionStrategy.UNCOMPRESSED) { encodedValueSerializer = V3CompressedVSizeColumnarMultiIntsSerializer.create( dimensionName, @@ -533,7 +533,7 @@ public boolean canSkip() public ColumnDescriptor makeColumnDescriptor() { // Now write everything - boolean hasMultiValue = capabilities.hasMultipleValues(); + boolean hasMultiValue = capabilities.hasMultipleValues().isTrue(); final CompressionStrategy compressionStrategy = indexSpec.getDimensionCompression(); final BitmapSerdeFactory bitmapSerdeFactory = indexSpec.getBitmapSerdeFactory(); diff --git a/processing/src/main/java/org/apache/druid/segment/column/ColumnBuilder.java b/processing/src/main/java/org/apache/druid/segment/column/ColumnBuilder.java index 1b7163e0829b..cde454b70524 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/ColumnBuilder.java +++ b/processing/src/main/java/org/apache/druid/segment/column/ColumnBuilder.java @@ -118,7 +118,6 @@ public ColumnHolder build() .setDictionaryValuesUnique(dictionaryEncoded) .setHasSpatialIndexes(spatialIndex != null) .setHasMultipleValues(hasMultipleValues) - .setIsComplete(true) .setFilterable(filterable), columnSupplier, bitmapIndex, diff --git a/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilities.java b/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilities.java index 53f7440e87ef..79abdafa306f 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilities.java +++ b/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilities.java @@ -19,33 +19,26 @@ package org.apache.druid.segment.column; +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonValue; +import org.apache.druid.java.util.common.StringUtils; + +import javax.annotation.Nullable; + /** */ public interface ColumnCapabilities { ValueType getType(); - boolean isDictionaryEncoded(); Capable areDictionaryValuesSorted(); Capable areDictionaryValuesUnique(); boolean isRunLengthEncoded(); boolean hasBitmapIndexes(); boolean hasSpatialIndexes(); - boolean hasMultipleValues(); + Capable hasMultipleValues(); boolean isFilterable(); - /** - * This property indicates that this {@link ColumnCapabilities} is "complete" in that all properties can be expected - * to supply valid responses. This is mostly a hack to work around {@link ColumnCapabilities} generators that - * fail to set {@link #hasMultipleValues()} even when the associated column really could have multiple values. - * Until this situation is sorted out, if this method returns false, callers are encouraged to ignore - * {@link #hasMultipleValues()} and treat that property as if it were unknown. - * - * todo: replace all booleans with {@link Capable} and this method can be dropped - */ - boolean isComplete(); - - enum Capable { FALSE, @@ -57,6 +50,21 @@ public boolean isTrue() return this == TRUE; } + public boolean isMaybeTrue() + { + return isTrue() || isUnknown(); + } + + public boolean isUnknown() + { + return this == UNKNOWN; + } + + public Capable complete(boolean convertUnknownToTrue) + { + return this == UNKNOWN ? convertUnknownToTrue ? TRUE : FALSE : this; + } + public Capable and(Capable other) { if (this == UNKNOWN || other == UNKNOWN) { @@ -69,5 +77,24 @@ public static Capable of(boolean bool) { return bool ? TRUE : FALSE; } + + @JsonCreator + public static Capable ofNullable(@Nullable Boolean bool) + { + return bool == null ? Capable.UNKNOWN : of(bool); + } + + @JsonValue + @Nullable + public Boolean toJson() + { + return this == UNKNOWN ? null : isTrue(); + } + + @Override + public String toString() + { + return StringUtils.toLowerCase(super.toString()); + } } } diff --git a/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilitiesImpl.java b/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilitiesImpl.java index bee28ebdf832..ebf2bc509921 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilitiesImpl.java +++ b/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilitiesImpl.java @@ -22,7 +22,6 @@ import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonProperty; import com.google.common.base.Preconditions; -import org.apache.druid.java.util.common.ISE; import javax.annotation.Nullable; @@ -34,12 +33,44 @@ public class ColumnCapabilitiesImpl implements ColumnCapabilities public static ColumnCapabilitiesImpl copyOf(final ColumnCapabilities other) { final ColumnCapabilitiesImpl capabilities = new ColumnCapabilitiesImpl(); - capabilities.merge(other); - capabilities.setFilterable(other.isFilterable()); - capabilities.setIsComplete(other.isComplete()); + if (other != null) { + capabilities.type = other.getType(); + capabilities.dictionaryEncoded = other.isDictionaryEncoded(); + capabilities.runLengthEncoded = other.isRunLengthEncoded(); + capabilities.hasInvertedIndexes = other.hasBitmapIndexes(); + capabilities.hasSpatialIndexes = other.hasSpatialIndexes(); + capabilities.hasMultipleValues = other.hasMultipleValues(); + capabilities.dictionaryValuesSorted = other.areDictionaryValuesSorted(); + capabilities.dictionaryValuesUnique = other.areDictionaryValuesUnique(); + capabilities.filterable = other.isFilterable(); + } return capabilities; } + public static ColumnCapabilitiesImpl complete(final ColumnCapabilities capabilities, boolean convertUnknownToTrue) + { + ColumnCapabilitiesImpl copy = copyOf(capabilities); + copy.hasMultipleValues = copy.hasMultipleValues.complete(convertUnknownToTrue); + copy.dictionaryValuesSorted = copy.dictionaryValuesSorted.complete(convertUnknownToTrue); + copy.dictionaryValuesUnique = copy.dictionaryValuesUnique.complete(convertUnknownToTrue); + return copy; + } + + + /** + * Create a no frills, simple column with {@link ValueType} set and everything else false + */ + public static ColumnCapabilitiesImpl createSimpleNumericColumn(ValueType valueType) + { + return new ColumnCapabilitiesImpl().setType(valueType) + .setHasMultipleValues(false) + .setHasBitmapIndexes(false) + .setDictionaryEncoded(false) + .setDictionaryValuesSorted(false) + .setDictionaryValuesUnique(false) + .setHasSpatialIndexes(false); + } + @Nullable private ValueType type = null; @@ -47,7 +78,7 @@ public static ColumnCapabilitiesImpl copyOf(final ColumnCapabilities other) private boolean runLengthEncoded = false; private boolean hasInvertedIndexes = false; private boolean hasSpatialIndexes = false; - private boolean hasMultipleValues = false; + private Capable hasMultipleValues = Capable.UNKNOWN; // These capabilities are computed at query time and not persisted in the segment files. @JsonIgnore @@ -56,8 +87,6 @@ public static ColumnCapabilitiesImpl copyOf(final ColumnCapabilities other) private Capable dictionaryValuesUnique = Capable.UNKNOWN; @JsonIgnore private boolean filterable; - @JsonIgnore - private boolean complete = false; @Override @JsonProperty @@ -144,14 +173,14 @@ public ColumnCapabilitiesImpl setHasSpatialIndexes(boolean hasSpatialIndexes) @Override @JsonProperty("hasMultipleValues") - public boolean hasMultipleValues() + public Capable hasMultipleValues() { return hasMultipleValues; } public ColumnCapabilitiesImpl setHasMultipleValues(boolean hasMultipleValues) { - this.hasMultipleValues = hasMultipleValues; + this.hasMultipleValues = Capable.of(hasMultipleValues); return this; } @@ -170,41 +199,4 @@ public ColumnCapabilitiesImpl setFilterable(boolean filterable) this.filterable = filterable; return this; } - - @Override - public boolean isComplete() - { - return complete; - } - - public ColumnCapabilitiesImpl setIsComplete(boolean complete) - { - this.complete = complete; - return this; - } - - public void merge(ColumnCapabilities other) - { - if (other == null) { - return; - } - - if (type == null) { - type = other.getType(); - } - - if (!type.equals(other.getType())) { - throw new ISE("Cannot merge columns of type[%s] and [%s]", type, other.getType()); - } - - this.dictionaryEncoded |= other.isDictionaryEncoded(); - this.runLengthEncoded |= other.isRunLengthEncoded(); - this.hasInvertedIndexes |= other.hasBitmapIndexes(); - this.hasSpatialIndexes |= other.hasSpatialIndexes(); - this.hasMultipleValues |= other.hasMultipleValues(); - this.complete &= other.isComplete(); // these should always be the same? - this.filterable &= other.isFilterable(); - this.dictionaryValuesSorted = this.dictionaryValuesSorted.and(other.areDictionaryValuesSorted()); - this.dictionaryValuesUnique = this.dictionaryValuesUnique.and(other.areDictionaryValuesUnique()); - } } diff --git a/processing/src/main/java/org/apache/druid/segment/filter/Filters.java b/processing/src/main/java/org/apache/druid/segment/filter/Filters.java index 72e343ce93fb..6445b1796afe 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/Filters.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/Filters.java @@ -411,7 +411,7 @@ static boolean supportsSelectivityEstimation( if (filter.supportsBitmapIndex(indexSelector)) { final ColumnHolder columnHolder = columnSelector.getColumnHolder(dimension); if (columnHolder != null) { - return !columnHolder.getCapabilities().hasMultipleValues(); + return !columnHolder.getCapabilities().hasMultipleValues().isMaybeTrue(); } } return false; diff --git a/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndex.java b/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndex.java index c169d15f2230..aa3ebd19a1a1 100644 --- a/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndex.java +++ b/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndex.java @@ -327,9 +327,10 @@ protected IncrementalIndex( } //__time capabilities - ColumnCapabilitiesImpl timeCapabilities = new ColumnCapabilitiesImpl().setIsComplete(true); - timeCapabilities.setType(ValueType.LONG); - columnCapabilities.put(ColumnHolder.TIME_COLUMN_NAME, timeCapabilities); + columnCapabilities.put( + ColumnHolder.TIME_COLUMN_NAME, + ColumnCapabilitiesImpl.createSimpleNumericColumn(ValueType.LONG) + ); // This should really be more generic List spatialDimensions = dimensionsSpec.getSpatialDimensions(); @@ -660,14 +661,8 @@ IncrementalIndexRowResult toIncrementalIndexRow(InputRow row) wasNewDim = true; capabilities = columnCapabilities.get(dimension); if (capabilities == null) { - capabilities = new ColumnCapabilitiesImpl(); // For schemaless type discovery, assume everything is a String for now, can change later. - capabilities.setType(ValueType.STRING); - capabilities.setDictionaryEncoded(true); - capabilities.setHasBitmapIndexes(true); - capabilities.setDictionaryValuesSorted(false); - capabilities.setDictionaryValuesUnique(true); - capabilities.setIsComplete(true); + capabilities = makeCapabilitiesFromValueType(ValueType.STRING); columnCapabilities.put(dimension, capabilities); } DimensionHandler handler = DimensionHandlerUtils.getHandlerFromCapabilities(dimension, capabilities, null); @@ -687,7 +682,7 @@ IncrementalIndexRowResult toIncrementalIndexRow(InputRow row) } dimsKeySize += indexer.estimateEncodedKeyComponentSize(dimsKey); // Set column capabilities as data is coming in - if (!capabilities.hasMultipleValues() && + if (!capabilities.hasMultipleValues().isTrue() && dimsKey != null && handler.getLengthOfEncodedKeyComponent(dimsKey) > 1) { capabilities.setHasMultipleValues(true); @@ -923,16 +918,17 @@ public List getDimensionOrder() private ColumnCapabilitiesImpl makeCapabilitiesFromValueType(ValueType type) { - ColumnCapabilitiesImpl capabilities = new ColumnCapabilitiesImpl(); - capabilities.setDictionaryEncoded(type == ValueType.STRING); - capabilities.setHasBitmapIndexes(type == ValueType.STRING); if (type == ValueType.STRING) { - capabilities.setDictionaryValuesUnique(true); - capabilities.setDictionaryValuesSorted(false); + // we start out as not having multiple values, but this might change as we encounter them + return new ColumnCapabilitiesImpl().setType(type) + .setHasBitmapIndexes(true) + .setDictionaryEncoded(true) + .setDictionaryValuesUnique(true) + .setDictionaryValuesSorted(false) + .setHasMultipleValues(false); + } else { + return ColumnCapabilitiesImpl.createSimpleNumericColumn(type); } - capabilities.setType(type); - capabilities.setIsComplete(true); - return capabilities; } /** @@ -1124,18 +1120,18 @@ public MetricDesc(int index, AggregatorFactory factory) this.name = factory.getName(); String typeInfo = factory.getTypeName(); - this.capabilities = new ColumnCapabilitiesImpl().setIsComplete(true); if ("float".equalsIgnoreCase(typeInfo)) { - capabilities.setType(ValueType.FLOAT); + capabilities = ColumnCapabilitiesImpl.createSimpleNumericColumn(ValueType.FLOAT); this.type = typeInfo; } else if ("long".equalsIgnoreCase(typeInfo)) { - capabilities.setType(ValueType.LONG); + capabilities = ColumnCapabilitiesImpl.createSimpleNumericColumn(ValueType.LONG); this.type = typeInfo; } else if ("double".equalsIgnoreCase(typeInfo)) { - capabilities.setType(ValueType.DOUBLE); + capabilities = ColumnCapabilitiesImpl.createSimpleNumericColumn(ValueType.DOUBLE); this.type = typeInfo; } else { - capabilities.setType(ValueType.COMPLEX); + // in an ideal world complex type reports its actual column capabilities... + capabilities = ColumnCapabilitiesImpl.createSimpleNumericColumn(ValueType.COMPLEX); this.type = ComplexMetrics.getSerdeForType(typeInfo).getTypeName(); } } diff --git a/processing/src/main/java/org/apache/druid/segment/join/table/IndexedTableColumnSelectorFactory.java b/processing/src/main/java/org/apache/druid/segment/join/table/IndexedTableColumnSelectorFactory.java index 7e6466d78288..00cb51ffffff 100644 --- a/processing/src/main/java/org/apache/druid/segment/join/table/IndexedTableColumnSelectorFactory.java +++ b/processing/src/main/java/org/apache/druid/segment/join/table/IndexedTableColumnSelectorFactory.java @@ -58,8 +58,9 @@ static ColumnCapabilities columnCapabilities(final IndexedTable table, final Str capabilities.setDictionaryValuesSorted(false); capabilities.setDictionaryValuesUnique(false); + capabilities.setHasMultipleValues(false); - return capabilities.setIsComplete(true); + return capabilities; } else { return null; } diff --git a/processing/src/main/java/org/apache/druid/segment/vector/QueryableIndexVectorColumnSelectorFactory.java b/processing/src/main/java/org/apache/druid/segment/vector/QueryableIndexVectorColumnSelectorFactory.java index 83110499862f..c8f7b396eb82 100644 --- a/processing/src/main/java/org/apache/druid/segment/vector/QueryableIndexVectorColumnSelectorFactory.java +++ b/processing/src/main/java/org/apache/druid/segment/vector/QueryableIndexVectorColumnSelectorFactory.java @@ -85,7 +85,7 @@ public MultiValueDimensionVectorSelector makeMultiValueDimensionSelector(final D if (holder == null || !holder.getCapabilities().isDictionaryEncoded() || holder.getCapabilities().getType() != ValueType.STRING - || !holder.getCapabilities().hasMultipleValues()) { + || !holder.getCapabilities().hasMultipleValues().isMaybeTrue()) { throw new ISE( "Column[%s] is not a multi-value string column, do not ask for a multi-value selector", spec.getDimension() @@ -123,7 +123,7 @@ public SingleValueDimensionVectorSelector makeSingleValueDimensionSelector(final return NilVectorSelector.create(offset); } - if (holder.getCapabilities().hasMultipleValues()) { + if (holder.getCapabilities().hasMultipleValues().isMaybeTrue()) { // Asking for a single-value dimension selector on a multi-value column gets you an error. throw new ISE("Column[%s] is multi-value, do not ask for a single-value selector", spec.getDimension()); } diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java index 345274aa6e74..fdd8ff73ce85 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java @@ -154,8 +154,7 @@ public static ColumnValueSelector makeExprEvalSelector( } else if (capabilities != null && capabilities.getType() == ValueType.STRING && capabilities.isDictionaryEncoded() - && capabilities.isComplete() - && !capabilities.hasMultipleValues() + && !capabilities.hasMultipleValues().isMaybeTrue() && exprDetails.getArrayBindings().isEmpty()) { // Optimization for expressions that hit one scalar string column and nothing else. return new SingleStringInputCachingExpressionColumnValueSelector( @@ -227,7 +226,7 @@ public static DimensionSelector makeDimensionSelector( if (capabilities != null && capabilities.getType() == ValueType.STRING && capabilities.isDictionaryEncoded() - && capabilities.isComplete() + && !capabilities.hasMultipleValues().isUnknown() && !exprDetails.hasInputArrays() && !exprDetails.isOutputArray() ) { @@ -357,7 +356,7 @@ private static Expr.ObjectBinding createBindings( final ColumnCapabilities columnCapabilities = columnSelectorFactory .getColumnCapabilities(columnName); final ValueType nativeType = columnCapabilities != null ? columnCapabilities.getType() : null; - final boolean multiVal = columnCapabilities != null && columnCapabilities.hasMultipleValues(); + final boolean multiVal = columnCapabilities != null && columnCapabilities.hasMultipleValues().isTrue(); final Supplier supplier; if (nativeType == ValueType.FLOAT) { @@ -598,11 +597,11 @@ private static Pair, Set> examineColumnSelectorFactoryArrays for (String column : columns) { final ColumnCapabilities capabilities = columnSelectorFactory.getColumnCapabilities(column); if (capabilities != null) { - if (capabilities.hasMultipleValues()) { + if (capabilities.hasMultipleValues().isTrue()) { actualArrays.add(column); } else if ( - !capabilities.isComplete() && capabilities.getType().equals(ValueType.STRING) && + capabilities.hasMultipleValues().isMaybeTrue() && !exprDetails.getArrayBindings().contains(column) ) { unknownIfArrays.add(column); diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVirtualColumn.java b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVirtualColumn.java index 482365152931..59b846fa9a99 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVirtualColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVirtualColumn.java @@ -131,10 +131,10 @@ public ColumnValueSelector makeColumnValueSelector(String columnName, ColumnS @Override public ColumnCapabilities capabilities(String columnName) { - // Note: Ideally we would only "setHasMultipleValues(true)" if the expression in question could potentially return - // multiple values. However, we don't currently have a good way of determining this, so to be safe we always - // set the flag. - return new ColumnCapabilitiesImpl().setType(outputType).setHasMultipleValues(true); + // Note: Ideally we would fill out additional information instead of leaving capabilities as 'unknown', e.g. examine + // if the expression in question could potentially return multiple values and anything else. However, we don't + // currently have a good way of determining this, so fill this out more once we do + return new ColumnCapabilitiesImpl().setType(outputType); } @Override diff --git a/processing/src/test/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngineV2Test.java b/processing/src/test/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngineV2Test.java index d71d70a1aac9..75a12a90cb6c 100644 --- a/processing/src/test/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngineV2Test.java +++ b/processing/src/test/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngineV2Test.java @@ -47,8 +47,7 @@ public void testCanPushDownLimitForSegmentStringSelector() .setHasMultipleValues(false) .setDictionaryEncoded(true) .setDictionaryValuesSorted(true) - .setDictionaryValuesUnique(true) - .setIsComplete(true); + .setDictionaryValuesUnique(true); EasyMock.expect(factory.getColumnCapabilities(DIM)).andReturn(capabilities).once(); EasyMock.replay(factory); Assert.assertTrue(GroupByQueryEngineV2.canPushDownLimit(factory, DIM)); @@ -63,8 +62,7 @@ public void testCanPushDownLimitForIncrementalStringSelector() .setHasMultipleValues(false) .setDictionaryEncoded(false) .setDictionaryValuesSorted(false) - .setDictionaryValuesUnique(true) - .setIsComplete(true); + .setDictionaryValuesUnique(true); EasyMock.expect(factory.getColumnCapabilities(DIM)).andReturn(capabilities).once(); EasyMock.replay(factory); Assert.assertFalse(GroupByQueryEngineV2.canPushDownLimit(factory, DIM)); @@ -79,8 +77,7 @@ public void testCanPushDownLimitForExpressionStringSelector() .setHasMultipleValues(false) .setDictionaryEncoded(false) .setDictionaryValuesSorted(false) - .setDictionaryValuesUnique(false) - .setIsComplete(true); + .setDictionaryValuesUnique(false); EasyMock.expect(factory.getColumnCapabilities(DIM)).andReturn(capabilities).once(); EasyMock.replay(factory); Assert.assertFalse(GroupByQueryEngineV2.canPushDownLimit(factory, DIM)); @@ -95,8 +92,7 @@ public void testCanPushDownLimitForJoinStringSelector() .setHasMultipleValues(false) .setDictionaryEncoded(true) .setDictionaryValuesSorted(false) - .setDictionaryValuesUnique(false) - .setIsComplete(true); + .setDictionaryValuesUnique(false); EasyMock.expect(factory.getColumnCapabilities(DIM)).andReturn(capabilities).once(); EasyMock.replay(factory); Assert.assertFalse(GroupByQueryEngineV2.canPushDownLimit(factory, DIM)); @@ -111,8 +107,7 @@ public void testCanPushDownLimitForNumericSelector() .setHasMultipleValues(false) .setDictionaryEncoded(false) .setDictionaryValuesSorted(false) - .setDictionaryValuesUnique(false) - .setIsComplete(true); + .setDictionaryValuesUnique(false); EasyMock.expect(factory.getColumnCapabilities(DIM)).andReturn(capabilities).anyTimes(); EasyMock.replay(factory); Assert.assertTrue(GroupByQueryEngineV2.canPushDownLimit(factory, DIM)); @@ -131,8 +126,7 @@ public void testCanPushDownLimitForComplexSelector() .setHasMultipleValues(false) .setDictionaryEncoded(false) .setDictionaryValuesSorted(false) - .setDictionaryValuesUnique(false) - .setIsComplete(true); + .setDictionaryValuesUnique(false); EasyMock.expect(factory.getColumnCapabilities(DIM)).andReturn(capabilities).once(); EasyMock.replay(factory); Assert.assertTrue(GroupByQueryEngineV2.canPushDownLimit(factory, DIM)); diff --git a/processing/src/test/java/org/apache/druid/query/lookup/LookupSegmentTest.java b/processing/src/test/java/org/apache/druid/query/lookup/LookupSegmentTest.java index ce6035142374..3ca72aa5a698 100644 --- a/processing/src/test/java/org/apache/druid/query/lookup/LookupSegmentTest.java +++ b/processing/src/test/java/org/apache/druid/query/lookup/LookupSegmentTest.java @@ -137,9 +137,8 @@ public void test_asStorageAdapter_getColumnCapabilitiesK() // Note: the "k" column does not actually have multiple values, but the RowBasedStorageAdapter doesn't allow // reporting complete single-valued capabilities. It would be good to change this in the future, so query engines // running on top of lookups can take advantage of singly-valued optimizations. - Assert.assertFalse(capabilities.hasMultipleValues()); + Assert.assertTrue(capabilities.hasMultipleValues().isUnknown()); Assert.assertFalse(capabilities.isDictionaryEncoded()); - Assert.assertFalse(capabilities.isComplete()); } @Test @@ -151,9 +150,8 @@ public void test_asStorageAdapter_getColumnCapabilitiesV() // reporting complete single-valued capabilities. It would be good to change this in the future, so query engines // running on top of lookups can take advantage of singly-valued optimizations. Assert.assertEquals(ValueType.STRING, capabilities.getType()); - Assert.assertFalse(capabilities.hasMultipleValues()); + Assert.assertTrue(capabilities.hasMultipleValues().isUnknown()); Assert.assertFalse(capabilities.isDictionaryEncoded()); - Assert.assertFalse(capabilities.isComplete()); } @Test diff --git a/processing/src/test/java/org/apache/druid/segment/QueryableIndexColumnCapabilitiesTest.java b/processing/src/test/java/org/apache/druid/segment/QueryableIndexColumnCapabilitiesTest.java index dc102e688164..dc4a7f53146d 100644 --- a/processing/src/test/java/org/apache/druid/segment/QueryableIndexColumnCapabilitiesTest.java +++ b/processing/src/test/java/org/apache/druid/segment/QueryableIndexColumnCapabilitiesTest.java @@ -152,9 +152,8 @@ public void testStringColumn() Assert.assertTrue(caps.isDictionaryEncoded()); Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue()); Assert.assertTrue(caps.areDictionaryValuesUnique().isTrue()); - Assert.assertFalse(caps.hasMultipleValues()); + Assert.assertFalse(caps.hasMultipleValues().isMaybeTrue()); Assert.assertFalse(caps.hasSpatialIndexes()); - Assert.assertTrue(caps.isComplete()); caps = MMAP_INDEX.getColumnHolder("d1").getCapabilities(); Assert.assertEquals(ValueType.STRING, caps.getType()); @@ -162,9 +161,8 @@ public void testStringColumn() Assert.assertTrue(caps.isDictionaryEncoded()); Assert.assertTrue(caps.areDictionaryValuesSorted().isTrue()); Assert.assertTrue(caps.areDictionaryValuesUnique().isTrue()); - Assert.assertFalse(caps.hasMultipleValues()); + Assert.assertFalse(caps.hasMultipleValues().isMaybeTrue()); Assert.assertFalse(caps.hasSpatialIndexes()); - Assert.assertTrue(caps.isComplete()); } @Test @@ -176,9 +174,8 @@ public void testMultiStringColumn() Assert.assertTrue(caps.isDictionaryEncoded()); Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue()); Assert.assertTrue(caps.areDictionaryValuesUnique().isTrue()); - Assert.assertTrue(caps.hasMultipleValues()); + Assert.assertTrue(caps.hasMultipleValues().isTrue()); Assert.assertFalse(caps.hasSpatialIndexes()); - Assert.assertTrue(caps.isComplete()); caps = MMAP_INDEX.getColumnHolder("d2").getCapabilities(); Assert.assertEquals(ValueType.STRING, caps.getType()); @@ -186,9 +183,8 @@ public void testMultiStringColumn() Assert.assertTrue(caps.isDictionaryEncoded()); Assert.assertTrue(caps.areDictionaryValuesSorted().isTrue()); Assert.assertTrue(caps.areDictionaryValuesUnique().isTrue()); - Assert.assertTrue(caps.hasMultipleValues()); + Assert.assertTrue(caps.hasMultipleValues().isTrue()); Assert.assertFalse(caps.hasSpatialIndexes()); - Assert.assertTrue(caps.isComplete()); } @Test @@ -206,8 +202,7 @@ private void assertNonStringColumnCapabilities(ColumnCapabilities caps, ValueTyp Assert.assertFalse(caps.isDictionaryEncoded()); Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue()); Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue()); - Assert.assertFalse(caps.hasMultipleValues()); + Assert.assertFalse(caps.hasMultipleValues().isMaybeTrue()); Assert.assertFalse(caps.hasSpatialIndexes()); - Assert.assertTrue(caps.isComplete()); } } diff --git a/processing/src/test/java/org/apache/druid/segment/RowBasedColumnSelectorFactoryTest.java b/processing/src/test/java/org/apache/druid/segment/RowBasedColumnSelectorFactoryTest.java index 8886c7d2d8f1..e12dac4743cd 100644 --- a/processing/src/test/java/org/apache/druid/segment/RowBasedColumnSelectorFactoryTest.java +++ b/processing/src/test/java/org/apache/druid/segment/RowBasedColumnSelectorFactoryTest.java @@ -54,9 +54,8 @@ public void testCapabilitiesTime() Assert.assertFalse(caps.isDictionaryEncoded()); Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue()); Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue()); - Assert.assertFalse(caps.hasMultipleValues()); + Assert.assertFalse(caps.hasMultipleValues().isMaybeTrue()); Assert.assertFalse(caps.hasSpatialIndexes()); - Assert.assertTrue(caps.isComplete()); } @Test @@ -69,9 +68,8 @@ public void testCapabilitiesString() Assert.assertFalse(caps.isDictionaryEncoded()); Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue()); Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue()); - Assert.assertFalse(caps.hasMultipleValues()); + Assert.assertTrue(caps.hasMultipleValues().isUnknown()); Assert.assertFalse(caps.hasSpatialIndexes()); - Assert.assertFalse(caps.isComplete()); } @Test @@ -84,9 +82,8 @@ public void testCapabilitiesLong() Assert.assertFalse(caps.isDictionaryEncoded()); Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue()); Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue()); - Assert.assertFalse(caps.hasMultipleValues()); + Assert.assertFalse(caps.hasMultipleValues().isMaybeTrue()); Assert.assertFalse(caps.hasSpatialIndexes()); - Assert.assertTrue(caps.isComplete()); } @Test @@ -99,9 +96,8 @@ public void testCapabilitiesFloat() Assert.assertFalse(caps.isDictionaryEncoded()); Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue()); Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue()); - Assert.assertFalse(caps.hasMultipleValues()); + Assert.assertFalse(caps.hasMultipleValues().isMaybeTrue()); Assert.assertFalse(caps.hasSpatialIndexes()); - Assert.assertTrue(caps.isComplete()); } @Test @@ -114,9 +110,8 @@ public void testCapabilitiesDouble() Assert.assertFalse(caps.isDictionaryEncoded()); Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue()); Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue()); - Assert.assertFalse(caps.hasMultipleValues()); + Assert.assertFalse(caps.hasMultipleValues().isMaybeTrue()); Assert.assertFalse(caps.hasSpatialIndexes()); - Assert.assertTrue(caps.isComplete()); } @Test @@ -129,9 +124,8 @@ public void testCapabilitiesComplex() Assert.assertFalse(caps.isDictionaryEncoded()); Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue()); Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue()); - Assert.assertFalse(caps.hasMultipleValues()); + Assert.assertTrue(caps.hasMultipleValues().isUnknown()); Assert.assertFalse(caps.hasSpatialIndexes()); - Assert.assertFalse(caps.isComplete()); } @Test diff --git a/processing/src/test/java/org/apache/druid/segment/RowBasedStorageAdapterTest.java b/processing/src/test/java/org/apache/druid/segment/RowBasedStorageAdapterTest.java index a4fc2f254b78..652a59033fab 100644 --- a/processing/src/test/java/org/apache/druid/segment/RowBasedStorageAdapterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/RowBasedStorageAdapterTest.java @@ -332,8 +332,7 @@ public void test_getColumnCapabilities_float() final ColumnCapabilities capabilities = adapter.getColumnCapabilities(ValueType.FLOAT.name()); Assert.assertEquals(ValueType.FLOAT, capabilities.getType()); - Assert.assertFalse(capabilities.hasMultipleValues()); - Assert.assertTrue(capabilities.isComplete()); + Assert.assertFalse(capabilities.hasMultipleValues().isMaybeTrue()); } @Test @@ -343,8 +342,7 @@ public void test_getColumnCapabilities_double() final ColumnCapabilities capabilities = adapter.getColumnCapabilities(ValueType.DOUBLE.name()); Assert.assertEquals(ValueType.DOUBLE, capabilities.getType()); - Assert.assertFalse(capabilities.hasMultipleValues()); - Assert.assertTrue(capabilities.isComplete()); + Assert.assertFalse(capabilities.hasMultipleValues().isMaybeTrue()); } @Test @@ -354,8 +352,7 @@ public void test_getColumnCapabilities_long() final ColumnCapabilities capabilities = adapter.getColumnCapabilities(ValueType.LONG.name()); Assert.assertEquals(ValueType.LONG, capabilities.getType()); - Assert.assertFalse(capabilities.hasMultipleValues()); - Assert.assertTrue(capabilities.isComplete()); + Assert.assertFalse(capabilities.hasMultipleValues().isMaybeTrue()); } @Test @@ -369,8 +366,7 @@ public void test_getColumnCapabilities_string() // Note: unlike numeric types, STRING-typed columns might have multiple values, so they report as incomplete. It // would be good in the future to support some way of changing this, when it is known ahead of time that // multi-valuedness is definitely happening or is definitely impossible. - Assert.assertFalse(capabilities.hasMultipleValues()); - Assert.assertFalse(capabilities.isComplete()); + Assert.assertTrue(capabilities.hasMultipleValues().isUnknown()); } @Test @@ -382,8 +378,7 @@ public void test_getColumnCapabilities_complex() // Note: unlike numeric types, COMPLEX-typed columns report that they are incomplete. Assert.assertEquals(ValueType.COMPLEX, capabilities.getType()); - Assert.assertFalse(capabilities.hasMultipleValues()); - Assert.assertFalse(capabilities.isComplete()); + Assert.assertTrue(capabilities.hasMultipleValues().isUnknown()); } @Test diff --git a/processing/src/test/java/org/apache/druid/segment/column/ColumnCapabilitiesImplTest.java b/processing/src/test/java/org/apache/druid/segment/column/ColumnCapabilitiesImplTest.java index 53e93c7a3b54..e221edd9c73f 100644 --- a/processing/src/test/java/org/apache/druid/segment/column/ColumnCapabilitiesImplTest.java +++ b/processing/src/test/java/org/apache/druid/segment/column/ColumnCapabilitiesImplTest.java @@ -47,7 +47,7 @@ public void testSerde() throws Exception Assert.assertTrue(cc.isDictionaryEncoded()); Assert.assertFalse(cc.isRunLengthEncoded()); Assert.assertTrue(cc.hasSpatialIndexes()); - Assert.assertTrue(cc.hasMultipleValues()); + Assert.assertTrue(cc.hasMultipleValues().isTrue()); Assert.assertTrue(cc.hasBitmapIndexes()); Assert.assertFalse(cc.isFilterable()); } @@ -72,7 +72,7 @@ public void testDeserialization() throws Exception Assert.assertTrue(cc.isDictionaryEncoded()); Assert.assertTrue(cc.isRunLengthEncoded()); Assert.assertTrue(cc.hasSpatialIndexes()); - Assert.assertTrue(cc.hasMultipleValues()); + Assert.assertTrue(cc.hasMultipleValues().isTrue()); Assert.assertTrue(cc.hasBitmapIndexes()); Assert.assertFalse(cc.isFilterable()); } diff --git a/processing/src/test/java/org/apache/druid/segment/join/table/IndexedTableJoinableTest.java b/processing/src/test/java/org/apache/druid/segment/join/table/IndexedTableJoinableTest.java index e8e3be4e5d00..29046801cdcf 100644 --- a/processing/src/test/java/org/apache/druid/segment/join/table/IndexedTableJoinableTest.java +++ b/processing/src/test/java/org/apache/druid/segment/join/table/IndexedTableJoinableTest.java @@ -118,9 +118,8 @@ public void test_getColumnCapabilities_string() Assert.assertEquals(ValueType.STRING, capabilities.getType()); Assert.assertTrue(capabilities.isDictionaryEncoded()); Assert.assertFalse(capabilities.hasBitmapIndexes()); - Assert.assertFalse(capabilities.hasMultipleValues()); + Assert.assertFalse(capabilities.hasMultipleValues().isMaybeTrue()); Assert.assertFalse(capabilities.hasSpatialIndexes()); - Assert.assertTrue(capabilities.isComplete()); } @Test @@ -131,9 +130,8 @@ public void test_getColumnCapabilities_long() Assert.assertEquals(ValueType.LONG, capabilities.getType()); Assert.assertFalse(capabilities.isDictionaryEncoded()); Assert.assertFalse(capabilities.hasBitmapIndexes()); - Assert.assertFalse(capabilities.hasMultipleValues()); + Assert.assertFalse(capabilities.hasMultipleValues().isMaybeTrue()); Assert.assertFalse(capabilities.hasSpatialIndexes()); - Assert.assertTrue(capabilities.isComplete()); } @Test diff --git a/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVirtualColumnTest.java b/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVirtualColumnTest.java index 526c63c60c17..16e090dc3d53 100644 --- a/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVirtualColumnTest.java +++ b/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVirtualColumnTest.java @@ -357,8 +357,7 @@ public ColumnCapabilities getColumnCapabilities(String column) { return new ColumnCapabilitiesImpl().setType(ValueType.STRING) .setHasMultipleValues(true) - .setDictionaryEncoded(true) - .setIsComplete(true); + .setDictionaryEncoded(true); } }; final BaseObjectColumnValueSelector selectorImplicit = @@ -814,9 +813,9 @@ public void testCapabilities() Assert.assertFalse(caps.isDictionaryEncoded()); Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue()); Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue()); - Assert.assertTrue(caps.hasMultipleValues()); + Assert.assertTrue(caps.hasMultipleValues().isUnknown()); + Assert.assertTrue(caps.hasMultipleValues().isMaybeTrue()); Assert.assertFalse(caps.hasSpatialIndexes()); - Assert.assertFalse(caps.isComplete()); caps = Z_CONCAT_X.capabilities("expr"); Assert.assertEquals(ValueType.STRING, caps.getType()); @@ -824,8 +823,8 @@ public void testCapabilities() Assert.assertFalse(caps.isDictionaryEncoded()); Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue()); Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue()); - Assert.assertTrue(caps.hasMultipleValues()); + Assert.assertTrue(caps.hasMultipleValues().isUnknown()); + Assert.assertTrue(caps.hasMultipleValues().isMaybeTrue()); Assert.assertFalse(caps.hasSpatialIndexes()); - Assert.assertFalse(caps.isComplete()); } } diff --git a/processing/src/test/java/org/apache/druid/segment/virtual/VirtualColumnsTest.java b/processing/src/test/java/org/apache/druid/segment/virtual/VirtualColumnsTest.java index b1a9eeeab14d..d31bbe7bc30c 100644 --- a/processing/src/test/java/org/apache/druid/segment/virtual/VirtualColumnsTest.java +++ b/processing/src/test/java/org/apache/druid/segment/virtual/VirtualColumnsTest.java @@ -417,7 +417,7 @@ public boolean isNull() @Override public ColumnCapabilities capabilities(String columnName) { - return new ColumnCapabilitiesImpl().setType(ValueType.LONG); + return ColumnCapabilitiesImpl.createSimpleNumericColumn(ValueType.LONG); } @Override From 825bca7a3514caab1c1e4b9661622fa9ac3ea1cd Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Thu, 30 Apr 2020 11:57:20 -0700 Subject: [PATCH 02/11] remove artifical, always multi-value capabilities from IncrementalIndexStorageAdapter and fix up fallout from that, fix ColumnCapabilities merge in index merger --- .../druid/segment/DimensionIndexer.java | 13 ++++++ .../apache/druid/segment/IndexMergerV9.java | 8 +++- .../druid/segment/StringDimensionIndexer.java | 11 ++++- .../segment/column/ColumnCapabilities.java | 10 ++++- .../column/ColumnCapabilitiesImpl.java | 40 ++++++++++++++++++- .../segment/incremental/IncrementalIndex.java | 26 ++++++++---- .../IncrementalIndexStorageAdapter.java | 21 +--------- .../metadata/SegmentMetadataQueryTest.java | 24 +++++------ .../SegmentMetadataUnionQueryTest.java | 2 +- .../IndexMergerV9WithSpatialIndexTest.java | 3 +- .../QueryableIndexColumnCapabilitiesTest.java | 7 +++- 11 files changed, 118 insertions(+), 47 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/DimensionIndexer.java b/processing/src/main/java/org/apache/druid/segment/DimensionIndexer.java index 99921eb79c08..8f5a7fee1c26 100644 --- a/processing/src/main/java/org/apache/druid/segment/DimensionIndexer.java +++ b/processing/src/main/java/org/apache/druid/segment/DimensionIndexer.java @@ -127,6 +127,19 @@ public interface DimensionIndexer */ EncodedKeyComponentType processRowValsToUnsortedEncodedKeyComponent(@Nullable Object dimValues, boolean reportParseExceptions); + /** + * This method will be called whenever a known dimension is absent in a row to allow an indexer + * to account for any missing rows. Useful so that a {@link DimensionSelector} built on top of an + * {@link IncrementalIndex} may accurately report {@link DimensionSelector#nameLookupPossibleInAdvance()}. + * + * At index persist/merge time all missing columns for a row will be explicitly replaced with the value appropriate + * null or default value. + */ + default void setSparseIndexed() + { + // no-op, only implement this if the dimension needs to perform any special handling for absent rows + } + /** * Gives the estimated size in bytes for the given key * diff --git a/processing/src/main/java/org/apache/druid/segment/IndexMergerV9.java b/processing/src/main/java/org/apache/druid/segment/IndexMergerV9.java index 82fbc67c2a92..29b53dc7cc77 100644 --- a/processing/src/main/java/org/apache/druid/segment/IndexMergerV9.java +++ b/processing/src/main/java/org/apache/druid/segment/IndexMergerV9.java @@ -717,11 +717,15 @@ private void mergeCapabilities( for (IndexableAdapter adapter : adapters) { for (String dimension : adapter.getDimensionNames()) { ColumnCapabilities capabilities = adapter.getCapabilities(dimension); - capabilitiesMap.computeIfAbsent(dimension, d -> ColumnCapabilitiesImpl.complete(capabilities, false)); + capabilitiesMap.compute(dimension, (d, existingCapabilities) -> + ColumnCapabilitiesImpl.complete(capabilities, false) + .merge(ColumnCapabilitiesImpl.complete(existingCapabilities, false))); } for (String metric : adapter.getMetricNames()) { ColumnCapabilities capabilities = adapter.getCapabilities(metric); - capabilitiesMap.computeIfAbsent(metric, m -> ColumnCapabilitiesImpl.complete(capabilities, false)); + capabilitiesMap.compute(metric, (m, existingCapabilities) -> + ColumnCapabilitiesImpl.complete(capabilities, false) + .merge(ColumnCapabilitiesImpl.complete(existingCapabilities, false))); metricsValueTypes.put(metric, capabilities.getType()); metricTypeNames.put(metric, adapter.getMetricType(metric)); } diff --git a/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java b/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java index ada146d56d41..dd1cef8a4f1d 100644 --- a/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java +++ b/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java @@ -234,6 +234,7 @@ public String getValueFromSortedId(int index) private final MultiValueHandling multiValueHandling; private final boolean hasBitmapIndexes; private boolean hasMultipleValues = false; + private boolean isSparse = false; @Nullable private SortedDimensionDictionary sortedLookup; @@ -301,6 +302,12 @@ public int[] processRowValsToUnsortedEncodedKeyComponent(@Nullable Object dimVal return encodedDimensionValues; } + @Override + public void setSparseIndexed() + { + isSparse = true; + } + @Override public long estimateEncodedKeyComponentSize(int[] key) { @@ -623,7 +630,9 @@ public String lookupName(int id) @Override public boolean nameLookupPossibleInAdvance() { - return true; + // name lookup is possible in advance if we got a value for every row (setSparseIndexed was not called on this column) + // or we've encountered an actual null value and it is present in our dictionary + return !isSparse || dimLookup.idForNull != ABSENT_VALUE_ID; } @Nullable diff --git a/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilities.java b/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilities.java index 79abdafa306f..308a73ec197c 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilities.java +++ b/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilities.java @@ -62,7 +62,7 @@ public boolean isUnknown() public Capable complete(boolean convertUnknownToTrue) { - return this == UNKNOWN ? convertUnknownToTrue ? TRUE : FALSE : this; + return this == UNKNOWN ? Capable.of(convertUnknownToTrue) : this; } public Capable and(Capable other) @@ -73,6 +73,14 @@ public Capable and(Capable other) return this == TRUE && other == TRUE ? TRUE : FALSE; } + public Capable or(Capable other) + { + if (this == TRUE) { + return TRUE; + } + return other; + } + public static Capable of(boolean bool) { return bool ? TRUE : FALSE; diff --git a/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilitiesImpl.java b/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilitiesImpl.java index ebf2bc509921..9c07a2c9c32d 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilitiesImpl.java +++ b/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilitiesImpl.java @@ -22,6 +22,7 @@ import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonProperty; import com.google.common.base.Preconditions; +import org.apache.druid.java.util.common.ISE; import javax.annotation.Nullable; @@ -30,7 +31,7 @@ */ public class ColumnCapabilitiesImpl implements ColumnCapabilities { - public static ColumnCapabilitiesImpl copyOf(final ColumnCapabilities other) + public static ColumnCapabilitiesImpl copyOf(@Nullable final ColumnCapabilities other) { final ColumnCapabilitiesImpl capabilities = new ColumnCapabilitiesImpl(); if (other != null) { @@ -47,8 +48,12 @@ public static ColumnCapabilitiesImpl copyOf(final ColumnCapabilities other) return capabilities; } - public static ColumnCapabilitiesImpl complete(final ColumnCapabilities capabilities, boolean convertUnknownToTrue) + @Nullable + public static ColumnCapabilitiesImpl complete(@Nullable final ColumnCapabilities capabilities, boolean convertUnknownToTrue) { + if (capabilities == null) { + return null; + } ColumnCapabilitiesImpl copy = copyOf(capabilities); copy.hasMultipleValues = copy.hasMultipleValues.complete(convertUnknownToTrue); copy.dictionaryValuesSorted = copy.dictionaryValuesSorted.complete(convertUnknownToTrue); @@ -199,4 +204,35 @@ public ColumnCapabilitiesImpl setFilterable(boolean filterable) this.filterable = filterable; return this; } + + public ColumnCapabilities merge(@Nullable ColumnCapabilities other) + { + if (other == null) { + return this; + } + + if (type == null) { + type = other.getType(); + } + + if (!type.equals(other.getType())) { + throw new ISE("Cannot merge columns of type[%s] and [%s]", type, other.getType()); + } + + this.dictionaryEncoded |= other.isDictionaryEncoded(); + this.runLengthEncoded |= other.isRunLengthEncoded(); + this.hasInvertedIndexes |= other.hasBitmapIndexes(); + this.hasSpatialIndexes |= other.hasSpatialIndexes(); + this.filterable &= other.isFilterable(); + this.hasMultipleValues = this.hasMultipleValues.or(other.hasMultipleValues()); + this.dictionaryValuesSorted = this.dictionaryValuesSorted.and(other.areDictionaryValuesSorted()); + this.dictionaryValuesUnique = this.dictionaryValuesUnique.and(other.areDictionaryValuesUnique()); + + return this; + } + + public ColumnCapabilitiesImpl complete(boolean unknownToTrue) + { + return ColumnCapabilitiesImpl.complete(this, unknownToTrue); + } } diff --git a/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndex.java b/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndex.java index aa3ebd19a1a1..b1ec1f13f0dc 100644 --- a/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndex.java +++ b/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndex.java @@ -27,6 +27,7 @@ import com.google.common.collect.ImmutableMap; import com.google.common.collect.Iterators; import com.google.common.collect.Maps; +import com.google.common.collect.Sets; import com.google.common.primitives.Ints; import com.google.common.primitives.Longs; import com.google.errorprone.annotations.concurrent.GuardedBy; @@ -89,6 +90,7 @@ import java.util.List; import java.util.Map; import java.util.Objects; +import java.util.Set; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentLinkedDeque; import java.util.concurrent.ConcurrentMap; @@ -641,12 +643,15 @@ IncrementalIndexRowResult toIncrementalIndexRow(InputRow row) } final List rowDimensions = row.getDimensions(); - Object[] dims; List overflow = null; long dimsKeySize = 0; List parseExceptionMessages = new ArrayList<>(); synchronized (dimensionDescs) { + // all known dimensions are assumed missing until we encounter in the rowDimensions + Set absentDimensions = Sets.newHashSet(dimensionDescs.keySet()); + + // first, process dimension values present in the row dims = new Object[dimensionDescs.size()]; for (String dimension : rowDimensions) { if (Strings.isNullOrEmpty(dimension)) { @@ -657,6 +662,7 @@ IncrementalIndexRowResult toIncrementalIndexRow(InputRow row) DimensionDesc desc = dimensionDescs.get(dimension); if (desc != null) { capabilities = desc.getCapabilities(); + absentDimensions.remove(dimension); } else { wasNewDim = true; capabilities = columnCapabilities.get(dimension); @@ -672,10 +678,7 @@ IncrementalIndexRowResult toIncrementalIndexRow(InputRow row) DimensionIndexer indexer = desc.getIndexer(); Object dimsKey = null; try { - dimsKey = indexer.processRowValsToUnsortedEncodedKeyComponent( - row.getRaw(dimension), - true - ); + dimsKey = indexer.processRowValsToUnsortedEncodedKeyComponent(row.getRaw(dimension), true); } catch (ParseException pe) { parseExceptionMessages.add(pe.getMessage()); @@ -689,6 +692,10 @@ IncrementalIndexRowResult toIncrementalIndexRow(InputRow row) } if (wasNewDim) { + // unless this is the first row we are processing, all newly discovered columns will be sparse + if (maxIngestedEventTime != null) { + indexer.setSparseIndexed(); + } if (overflow == null) { overflow = new ArrayList<>(); } @@ -708,6 +715,11 @@ IncrementalIndexRowResult toIncrementalIndexRow(InputRow row) dims[desc.getIndex()] = dimsKey; } } + + // process any dimensions with missing values in the row + for (String missing : absentDimensions) { + dimensionDescs.get(missing).getIndexer().setSparseIndexed(); + } } if (overflow != null) { @@ -924,8 +936,7 @@ private ColumnCapabilitiesImpl makeCapabilitiesFromValueType(ValueType type) .setHasBitmapIndexes(true) .setDictionaryEncoded(true) .setDictionaryValuesUnique(true) - .setDictionaryValuesSorted(false) - .setHasMultipleValues(false); + .setDictionaryValuesSorted(false); } else { return ColumnCapabilitiesImpl.createSimpleNumericColumn(type); } @@ -984,6 +995,7 @@ public StorageAdapter toStorageAdapter() return new IncrementalIndexStorageAdapter(this); } + @Nullable public ColumnCapabilities getCapabilities(String column) { return columnCapabilities.get(column); diff --git a/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexStorageAdapter.java b/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexStorageAdapter.java index cc792a5314c6..9e284553c639 100644 --- a/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexStorageAdapter.java +++ b/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexStorageAdapter.java @@ -39,7 +39,6 @@ import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ColumnCapabilitiesImpl; import org.apache.druid.segment.column.ColumnHolder; -import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.data.Indexed; import org.apache.druid.segment.data.ListIndexed; import org.apache.druid.segment.filter.BooleanValueMatcher; @@ -142,24 +141,8 @@ public Comparable getMaxValue(String column) @Override public ColumnCapabilities getColumnCapabilities(String column) { - // Different from index.getCapabilities because, in a way, IncrementalIndex's string-typed dimensions - // are always potentially multi-valued at query time. (Missing / null values for a row can potentially be - // represented by an empty array; see StringDimensionIndexer.IndexerDimensionSelector's getRow method.) - // - // We don't want to represent this as having-multiple-values in index.getCapabilities, because that's used - // at index-persisting time to determine if we need a multi-value column or not. However, that means we - // need to tweak the capabilities here in the StorageAdapter (a query-time construct), so at query time - // they appear multi-valued. - - final ColumnCapabilities capabilitiesFromIndex = index.getCapabilities(column); - final IncrementalIndex.DimensionDesc dimensionDesc = index.getDimension(column); - if (dimensionDesc != null && dimensionDesc.getCapabilities().getType() == ValueType.STRING) { - final ColumnCapabilitiesImpl retVal = ColumnCapabilitiesImpl.copyOf(capabilitiesFromIndex); - retVal.setHasMultipleValues(true); - return retVal; - } else { - return capabilitiesFromIndex; - } + // snapshot the current state + return ColumnCapabilitiesImpl.complete(index.getCapabilities(column), false); } @Override diff --git a/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataQueryTest.java b/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataQueryTest.java index 5f044ec339f6..9da06719d9c2 100644 --- a/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataQueryTest.java +++ b/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataQueryTest.java @@ -227,7 +227,7 @@ public SegmentMetadataQueryTest( "placement", new ColumnAnalysis( ValueType.STRING.toString(), - !mmap1, + false, preferedSize1, 1, "preferred", @@ -268,7 +268,7 @@ public SegmentMetadataQueryTest( "placement", new ColumnAnalysis( ValueType.STRING.toString(), - !mmap2, + false, placementSize2, 1, null, @@ -304,7 +304,7 @@ public void testSegmentMetadataQueryWithRollupMerge() "placement", new ColumnAnalysis( ValueType.STRING.toString(), - !mmap1 || !mmap2, + false, 0, 0, null, @@ -372,7 +372,7 @@ public void testSegmentMetadataQueryWithHasMultipleValuesMerge() "placement", new ColumnAnalysis( ValueType.STRING.toString(), - !mmap1 || !mmap2, + false, 0, 1, null, @@ -440,7 +440,7 @@ public void testSegmentMetadataQueryWithComplexColumnMerge() "placement", new ColumnAnalysis( ValueType.STRING.toString(), - !mmap1 || !mmap2, + false, 0, 1, null, @@ -509,7 +509,7 @@ public void testSegmentMetadataQueryWithDefaultAnalysisMerge() } ColumnAnalysis analysis = new ColumnAnalysis( ValueType.STRING.toString(), - !mmap1 || !mmap2, + false, size1 + size2, 1, "preferred", @@ -530,7 +530,7 @@ public void testSegmentMetadataQueryWithDefaultAnalysisMerge2() } ColumnAnalysis analysis = new ColumnAnalysis( ValueType.STRING.toString(), - !mmap1 || !mmap2, + false, size1 + size2, 3, "spot", @@ -551,7 +551,7 @@ public void testSegmentMetadataQueryWithDefaultAnalysisMerge3() } ColumnAnalysis analysis = new ColumnAnalysis( ValueType.STRING.toString(), - !mmap1 || !mmap2, + false, size1 + size2, 9, "automotive", @@ -637,7 +637,7 @@ public void testSegmentMetadataQueryWithNoAnalysisTypesMerge() "placement", new ColumnAnalysis( ValueType.STRING.toString(), - !mmap1 || !mmap2, + false, 0, 0, null, @@ -699,7 +699,7 @@ public void testSegmentMetadataQueryWithAggregatorsMerge() "placement", new ColumnAnalysis( ValueType.STRING.toString(), - !mmap1 || !mmap2, + false, 0, 0, null, @@ -757,7 +757,7 @@ public void testSegmentMetadataQueryWithTimestampSpecMerge() "placement", new ColumnAnalysis( ValueType.STRING.toString(), - !mmap1 || !mmap2, + false, 0, 0, null, @@ -815,7 +815,7 @@ public void testSegmentMetadataQueryWithQueryGranularityMerge() "placement", new ColumnAnalysis( ValueType.STRING.toString(), - !mmap1 || !mmap2, + false, 0, 0, null, diff --git a/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataUnionQueryTest.java b/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataUnionQueryTest.java index ccfcce7e563f..55d8b18babd3 100644 --- a/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataUnionQueryTest.java +++ b/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataUnionQueryTest.java @@ -102,7 +102,7 @@ public void testSegmentMetadataUnionQuery() "placement", new ColumnAnalysis( ValueType.STRING.toString(), - !mmap, + false, mmap ? 43524 : 43056, 1, "preferred", diff --git a/processing/src/test/java/org/apache/druid/segment/IndexMergerV9WithSpatialIndexTest.java b/processing/src/test/java/org/apache/druid/segment/IndexMergerV9WithSpatialIndexTest.java index 1c68a21568d3..b4525c018eee 100644 --- a/processing/src/test/java/org/apache/druid/segment/IndexMergerV9WithSpatialIndexTest.java +++ b/processing/src/test/java/org/apache/druid/segment/IndexMergerV9WithSpatialIndexTest.java @@ -48,6 +48,7 @@ import org.apache.druid.segment.incremental.IncrementalIndex; import org.apache.druid.segment.incremental.IncrementalIndexSchema; import org.apache.druid.segment.writeout.SegmentWriteOutMediumFactory; +import org.apache.druid.testing.InitializedNullHandlingTest; import org.joda.time.Interval; import org.junit.Test; import org.junit.runner.RunWith; @@ -66,7 +67,7 @@ /** */ @RunWith(Parameterized.class) -public class IndexMergerV9WithSpatialIndexTest +public class IndexMergerV9WithSpatialIndexTest extends InitializedNullHandlingTest { public static final int NUM_POINTS = 5000; diff --git a/processing/src/test/java/org/apache/druid/segment/QueryableIndexColumnCapabilitiesTest.java b/processing/src/test/java/org/apache/druid/segment/QueryableIndexColumnCapabilitiesTest.java index dc4a7f53146d..66a59e5b4df5 100644 --- a/processing/src/test/java/org/apache/druid/segment/QueryableIndexColumnCapabilitiesTest.java +++ b/processing/src/test/java/org/apache/druid/segment/QueryableIndexColumnCapabilitiesTest.java @@ -38,6 +38,7 @@ import org.apache.druid.query.aggregation.LongSumAggregatorFactory; import org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory; import org.apache.druid.segment.column.ColumnCapabilities; +import org.apache.druid.segment.column.ColumnCapabilitiesImpl; import org.apache.druid.segment.column.ColumnHolder; import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.incremental.IncrementalIndex; @@ -152,7 +153,11 @@ public void testStringColumn() Assert.assertTrue(caps.isDictionaryEncoded()); Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue()); Assert.assertTrue(caps.areDictionaryValuesUnique().isTrue()); - Assert.assertFalse(caps.hasMultipleValues().isMaybeTrue()); + // multi-value is unknown unless explicitly set to 'true' + Assert.assertTrue(caps.hasMultipleValues().isUnknown()); + // at index merge or query time we 'complete' the capabilities to take a snapshot of the current state, + // coercing any 'UNKNOWN' values to false + Assert.assertFalse(ColumnCapabilitiesImpl.complete(caps, false).hasMultipleValues().isMaybeTrue()); Assert.assertFalse(caps.hasSpatialIndexes()); caps = MMAP_INDEX.getColumnHolder("d1").getCapabilities(); From 223b3e8a23af04b261186bd5db19402aff3bbed4 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Thu, 30 Apr 2020 13:08:32 -0700 Subject: [PATCH 03/11] fix typo --- .../query/groupby/epinephelinae/GroupByQueryEngineV2.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java index 58a838be1aab..ea810b99346d 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java @@ -315,13 +315,13 @@ public static int getCardinalityForArrayAggregation( /** * Checks whether all "dimensions" are either single-valued, or if allowed, nonexistent. Since non-existent column * selectors will show up as full of nulls they are effectively single valued, however they can also be null during - * broker merge, for example with an 'inline' datasource subquery. 'missingMeansNonexistent' is sort of a hack to let + * broker merge, for example with an 'inline' datasource subquery. 'missingMeansNonExistent' is sort of a hack to let * the vectorized engine, which only operates on actual segments, to still work in this case for non-existent columns. */ public static boolean isAllSingleValueDims( final Function capabilitiesFunction, final List dimensions, - final boolean missingMeansNonexistent + final boolean missingMeansNonExistent ) { return dimensions @@ -337,7 +337,7 @@ public static boolean isAllSingleValueDims( // Now check column capabilities. final ColumnCapabilities columnCapabilities = capabilitiesFunction.apply(dimension.getDimension()); return (columnCapabilities != null && !columnCapabilities.hasMultipleValues().isMaybeTrue()) || - (missingMeansNonexistent && columnCapabilities == null); + (missingMeansNonExistent && columnCapabilities == null); }); } From 8660879fc86b76c3a23cedec0287487f74357c19 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Thu, 30 Apr 2020 14:20:21 -0700 Subject: [PATCH 04/11] remove unused method --- .../segment/column/ColumnCapabilitiesImpl.java | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilitiesImpl.java b/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilitiesImpl.java index 9c07a2c9c32d..231305b4f035 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilitiesImpl.java +++ b/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilitiesImpl.java @@ -48,8 +48,15 @@ public static ColumnCapabilitiesImpl copyOf(@Nullable final ColumnCapabilities o return capabilities; } + /** + * Used at indexing time to finalize all {@link ColumnCapabilities.Capable#UNKNOWN} values to + * {@link ColumnCapabilities.Capable#FALSE}, in order to present a snapshot of the state of the this column + */ @Nullable - public static ColumnCapabilitiesImpl complete(@Nullable final ColumnCapabilities capabilities, boolean convertUnknownToTrue) + public static ColumnCapabilitiesImpl complete( + @Nullable final ColumnCapabilities capabilities, + boolean convertUnknownToTrue + ) { if (capabilities == null) { return null; @@ -230,9 +237,4 @@ public ColumnCapabilities merge(@Nullable ColumnCapabilities other) return this; } - - public ColumnCapabilitiesImpl complete(boolean unknownToTrue) - { - return ColumnCapabilitiesImpl.complete(this, unknownToTrue); - } } From 20effb45b9ee7a86ba9877e634404119dc35b4ae Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Tue, 26 May 2020 16:07:41 -0700 Subject: [PATCH 05/11] review stuffs, revert IncrementalIndexStorageAdapater capabilities change, plumb lame workaround to SegmentAnalyzer --- .../druid/query/metadata/SegmentAnalyzer.java | 17 +++++++++--- .../druid/segment/DimensionIndexer.java | 14 +++++----- .../druid/segment/DoubleDimensionIndexer.java | 6 +++++ .../druid/segment/FloatDimensionIndexer.java | 6 +++++ .../apache/druid/segment/IndexMergerV9.java | 8 +++--- .../druid/segment/LongDimensionIndexer.java | 6 +++++ .../RowBasedColumnSelectorFactory.java | 4 +-- .../segment/column/ColumnCapabilities.java | 4 +-- .../column/ColumnCapabilitiesImpl.java | 27 ++++++++++++------- .../segment/incremental/IncrementalIndex.java | 12 ++++----- .../IncrementalIndexStorageAdapter.java | 20 ++++++++++++-- .../QueryableIndexColumnCapabilitiesTest.java | 2 +- .../segment/virtual/VirtualColumnsTest.java | 2 +- 13 files changed, 89 insertions(+), 39 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java b/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java index 9f8954dc5098..425b86b9de0c 100644 --- a/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java +++ b/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java @@ -45,6 +45,7 @@ import org.apache.druid.segment.column.DictionaryEncodedColumn; import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.data.IndexedInts; +import org.apache.druid.segment.incremental.IncrementalIndexStorageAdapter; import org.apache.druid.segment.serde.ComplexMetricSerde; import org.apache.druid.segment.serde.ComplexMetrics; import org.joda.time.DateTime; @@ -101,9 +102,17 @@ public Map analyze(Segment segment) for (String columnName : columnNames) { final ColumnHolder columnHolder = index == null ? null : index.getColumnHolder(columnName); - final ColumnCapabilities capabilities = columnHolder != null - ? columnHolder.getCapabilities() - : storageAdapter.getColumnCapabilities(columnName); + final ColumnCapabilities capabilities; + if (columnHolder != null) { + capabilities = columnHolder.getCapabilities(); + } else { + // if + if (storageAdapter instanceof IncrementalIndexStorageAdapter) { + capabilities = ((IncrementalIndexStorageAdapter) storageAdapter).getSnapshotColumnCapabilities(columnName); + } else { + capabilities = storageAdapter.getColumnCapabilities(columnName); + } + } final ColumnAnalysis analysis; final ValueType type = capabilities.getType(); @@ -138,7 +147,7 @@ public Map analyze(Segment segment) // Add time column too ColumnCapabilities timeCapabilities = storageAdapter.getColumnCapabilities(ColumnHolder.TIME_COLUMN_NAME); if (timeCapabilities == null) { - timeCapabilities = ColumnCapabilitiesImpl.createSimpleNumericColumn(ValueType.LONG); + timeCapabilities = ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.LONG); } columns.put( ColumnHolder.TIME_COLUMN_NAME, diff --git a/processing/src/main/java/org/apache/druid/segment/DimensionIndexer.java b/processing/src/main/java/org/apache/druid/segment/DimensionIndexer.java index 8f5a7fee1c26..cf7631db08bb 100644 --- a/processing/src/main/java/org/apache/druid/segment/DimensionIndexer.java +++ b/processing/src/main/java/org/apache/druid/segment/DimensionIndexer.java @@ -128,17 +128,17 @@ public interface DimensionIndexer EncodedKeyComponentType processRowValsToUnsortedEncodedKeyComponent(@Nullable Object dimValues, boolean reportParseExceptions); /** - * This method will be called whenever a known dimension is absent in a row to allow an indexer - * to account for any missing rows. Useful so that a {@link DimensionSelector} built on top of an - * {@link IncrementalIndex} may accurately report {@link DimensionSelector#nameLookupPossibleInAdvance()}. + * This method will be called while building an {@link IncrementalIndex} whenever a known dimension column (either + * through an explicit schema on the ingestion spec, or auto-discovered while processing rows) is absent in any row + * that is processed, to allow an indexer to account for any missing rows if necessary. Useful so that a string + * {@link DimensionSelector} built on top of an {@link IncrementalIndex} may accurately report + * {@link DimensionSelector#nameLookupPossibleInAdvance()} by allowing it to track if it has any implicit null valued + * rows. * * At index persist/merge time all missing columns for a row will be explicitly replaced with the value appropriate * null or default value. */ - default void setSparseIndexed() - { - // no-op, only implement this if the dimension needs to perform any special handling for absent rows - } + void setSparseIndexed(); /** * Gives the estimated size in bytes for the given key diff --git a/processing/src/main/java/org/apache/druid/segment/DoubleDimensionIndexer.java b/processing/src/main/java/org/apache/druid/segment/DoubleDimensionIndexer.java index 6645e1fc7328..b802f7555135 100644 --- a/processing/src/main/java/org/apache/druid/segment/DoubleDimensionIndexer.java +++ b/processing/src/main/java/org/apache/druid/segment/DoubleDimensionIndexer.java @@ -47,6 +47,12 @@ public Double processRowValsToUnsortedEncodedKeyComponent(@Nullable Object dimVa return DimensionHandlerUtils.convertObjectToDouble(dimValues, reportParseExceptions); } + @Override + public void setSparseIndexed() + { + // no-op, double columns do not have a dictionary to track null values + } + @Override public long estimateEncodedKeyComponentSize(Double key) { diff --git a/processing/src/main/java/org/apache/druid/segment/FloatDimensionIndexer.java b/processing/src/main/java/org/apache/druid/segment/FloatDimensionIndexer.java index 41328828de2b..dce58a23b23f 100644 --- a/processing/src/main/java/org/apache/druid/segment/FloatDimensionIndexer.java +++ b/processing/src/main/java/org/apache/druid/segment/FloatDimensionIndexer.java @@ -48,6 +48,12 @@ public Float processRowValsToUnsortedEncodedKeyComponent(@Nullable Object dimVal return DimensionHandlerUtils.convertObjectToFloat(dimValues, reportParseExceptions); } + @Override + public void setSparseIndexed() + { + // no-op, float columns do not have a dictionary to track null values + } + @Override public long estimateEncodedKeyComponentSize(Float key) { diff --git a/processing/src/main/java/org/apache/druid/segment/IndexMergerV9.java b/processing/src/main/java/org/apache/druid/segment/IndexMergerV9.java index 29b53dc7cc77..b70588ecbf73 100644 --- a/processing/src/main/java/org/apache/druid/segment/IndexMergerV9.java +++ b/processing/src/main/java/org/apache/druid/segment/IndexMergerV9.java @@ -718,14 +718,14 @@ private void mergeCapabilities( for (String dimension : adapter.getDimensionNames()) { ColumnCapabilities capabilities = adapter.getCapabilities(dimension); capabilitiesMap.compute(dimension, (d, existingCapabilities) -> - ColumnCapabilitiesImpl.complete(capabilities, false) - .merge(ColumnCapabilitiesImpl.complete(existingCapabilities, false))); + ColumnCapabilitiesImpl.snapshot(capabilities) + .merge(ColumnCapabilitiesImpl.snapshot(existingCapabilities))); } for (String metric : adapter.getMetricNames()) { ColumnCapabilities capabilities = adapter.getCapabilities(metric); capabilitiesMap.compute(metric, (m, existingCapabilities) -> - ColumnCapabilitiesImpl.complete(capabilities, false) - .merge(ColumnCapabilitiesImpl.complete(existingCapabilities, false))); + ColumnCapabilitiesImpl.snapshot(capabilities) + .merge(ColumnCapabilitiesImpl.snapshot(existingCapabilities))); metricsValueTypes.put(metric, capabilities.getType()); metricTypeNames.put(metric, adapter.getMetricType(metric)); } diff --git a/processing/src/main/java/org/apache/druid/segment/LongDimensionIndexer.java b/processing/src/main/java/org/apache/druid/segment/LongDimensionIndexer.java index d35724398f9a..f2a91278f6bb 100644 --- a/processing/src/main/java/org/apache/druid/segment/LongDimensionIndexer.java +++ b/processing/src/main/java/org/apache/druid/segment/LongDimensionIndexer.java @@ -48,6 +48,12 @@ public Long processRowValsToUnsortedEncodedKeyComponent(@Nullable Object dimValu return DimensionHandlerUtils.convertObjectToLong(dimValues, reportParseExceptions); } + @Override + public void setSparseIndexed() + { + // no-op, long columns do not have a dictionary to track null values + } + @Override public long estimateEncodedKeyComponentSize(Long key) { diff --git a/processing/src/main/java/org/apache/druid/segment/RowBasedColumnSelectorFactory.java b/processing/src/main/java/org/apache/druid/segment/RowBasedColumnSelectorFactory.java index 2b20b0d2709e..77e8978e2bee 100644 --- a/processing/src/main/java/org/apache/druid/segment/RowBasedColumnSelectorFactory.java +++ b/processing/src/main/java/org/apache/druid/segment/RowBasedColumnSelectorFactory.java @@ -96,7 +96,7 @@ static ColumnCapabilities getColumnCapabilities( { if (ColumnHolder.TIME_COLUMN_NAME.equals(columnName)) { // TIME_COLUMN_NAME is handled specially; override the provided rowSignature. - return ColumnCapabilitiesImpl.createSimpleNumericColumn(ValueType.LONG); + return ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.LONG); } else { final ValueType valueType = rowSignature.getColumnType(columnName).orElse(null); @@ -106,7 +106,7 @@ static ColumnCapabilities getColumnCapabilities( // is nonnumeric), set isComplete false to compensate. if (valueType != null) { if (valueType.isNumeric()) { - return ColumnCapabilitiesImpl.createSimpleNumericColumn(valueType); + return ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(valueType); } return new ColumnCapabilitiesImpl() .setType(valueType) diff --git a/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilities.java b/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilities.java index 308a73ec197c..a9af25b4602c 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilities.java +++ b/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilities.java @@ -60,9 +60,9 @@ public boolean isUnknown() return this == UNKNOWN; } - public Capable complete(boolean convertUnknownToTrue) + public Capable coerceUnknownToBoolean(boolean unknownIsTrue) { - return this == UNKNOWN ? Capable.of(convertUnknownToTrue) : this; + return this == UNKNOWN ? Capable.of(unknownIsTrue) : this; } public Capable and(Capable other) diff --git a/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilitiesImpl.java b/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilitiesImpl.java index 231305b4f035..9ddbd04a372f 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilitiesImpl.java +++ b/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilitiesImpl.java @@ -49,22 +49,29 @@ public static ColumnCapabilitiesImpl copyOf(@Nullable final ColumnCapabilities o } /** - * Used at indexing time to finalize all {@link ColumnCapabilities.Capable#UNKNOWN} values to - * {@link ColumnCapabilities.Capable#FALSE}, in order to present a snapshot of the state of the this column + * Used at indexing time to finalize all {@link Capable#UNKNOWN} values to + * {@link Capable#FALSE}, in order to present a snapshot of the state of the this column */ @Nullable - public static ColumnCapabilitiesImpl complete( - @Nullable final ColumnCapabilities capabilities, - boolean convertUnknownToTrue - ) + public static ColumnCapabilitiesImpl snapshot(@Nullable final ColumnCapabilities capabilities) + { + return snapshot(capabilities, false); + } + + /** + * Used at indexing time to finalize all {@link Capable#UNKNOWN} values to + * {@link Capable#FALSE} or {@link Capable#TRUE}, in order to present a snapshot of the state of the this column + */ + @Nullable + public static ColumnCapabilitiesImpl snapshot(@Nullable final ColumnCapabilities capabilities, boolean unknownIsTrue) { if (capabilities == null) { return null; } ColumnCapabilitiesImpl copy = copyOf(capabilities); - copy.hasMultipleValues = copy.hasMultipleValues.complete(convertUnknownToTrue); - copy.dictionaryValuesSorted = copy.dictionaryValuesSorted.complete(convertUnknownToTrue); - copy.dictionaryValuesUnique = copy.dictionaryValuesUnique.complete(convertUnknownToTrue); + copy.hasMultipleValues = copy.hasMultipleValues.coerceUnknownToBoolean(unknownIsTrue); + copy.dictionaryValuesSorted = copy.dictionaryValuesSorted.coerceUnknownToBoolean(unknownIsTrue); + copy.dictionaryValuesUnique = copy.dictionaryValuesUnique.coerceUnknownToBoolean(unknownIsTrue); return copy; } @@ -72,7 +79,7 @@ public static ColumnCapabilitiesImpl complete( /** * Create a no frills, simple column with {@link ValueType} set and everything else false */ - public static ColumnCapabilitiesImpl createSimpleNumericColumn(ValueType valueType) + public static ColumnCapabilitiesImpl createSimpleNumericColumnCapabilities(ValueType valueType) { return new ColumnCapabilitiesImpl().setType(valueType) .setHasMultipleValues(false) diff --git a/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndex.java b/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndex.java index b1ec1f13f0dc..649aea9b6997 100644 --- a/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndex.java +++ b/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndex.java @@ -331,7 +331,7 @@ protected IncrementalIndex( //__time capabilities columnCapabilities.put( ColumnHolder.TIME_COLUMN_NAME, - ColumnCapabilitiesImpl.createSimpleNumericColumn(ValueType.LONG) + ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.LONG) ); // This should really be more generic @@ -938,7 +938,7 @@ private ColumnCapabilitiesImpl makeCapabilitiesFromValueType(ValueType type) .setDictionaryValuesUnique(true) .setDictionaryValuesSorted(false); } else { - return ColumnCapabilitiesImpl.createSimpleNumericColumn(type); + return ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(type); } } @@ -1133,17 +1133,17 @@ public MetricDesc(int index, AggregatorFactory factory) String typeInfo = factory.getTypeName(); if ("float".equalsIgnoreCase(typeInfo)) { - capabilities = ColumnCapabilitiesImpl.createSimpleNumericColumn(ValueType.FLOAT); + capabilities = ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.FLOAT); this.type = typeInfo; } else if ("long".equalsIgnoreCase(typeInfo)) { - capabilities = ColumnCapabilitiesImpl.createSimpleNumericColumn(ValueType.LONG); + capabilities = ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.LONG); this.type = typeInfo; } else if ("double".equalsIgnoreCase(typeInfo)) { - capabilities = ColumnCapabilitiesImpl.createSimpleNumericColumn(ValueType.DOUBLE); + capabilities = ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.DOUBLE); this.type = typeInfo; } else { // in an ideal world complex type reports its actual column capabilities... - capabilities = ColumnCapabilitiesImpl.createSimpleNumericColumn(ValueType.COMPLEX); + capabilities = ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.COMPLEX); this.type = ComplexMetrics.getSerdeForType(typeInfo).getTypeName(); } } diff --git a/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexStorageAdapter.java b/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexStorageAdapter.java index 9e284553c639..1ca5879564c2 100644 --- a/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexStorageAdapter.java +++ b/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexStorageAdapter.java @@ -141,8 +141,24 @@ public Comparable getMaxValue(String column) @Override public ColumnCapabilities getColumnCapabilities(String column) { - // snapshot the current state - return ColumnCapabilitiesImpl.complete(index.getCapabilities(column), false); + // Different from index.getCapabilities because, in a way, IncrementalIndex's string-typed dimensions + // are always potentially multi-valued at query time. (Missing / null values for a row can potentially be + // represented by an empty array; see StringDimensionIndexer.IndexerDimensionSelector's getRow method.) + // + // We don't want to represent this as having-multiple-values in index.getCapabilities, because that's used + // at index-persisting time to determine if we need a multi-value column or not. However, that means we + // need to tweak the capabilities here in the StorageAdapter (a query-time construct), so at query time + // they appear multi-valued. + + // Note that this could be improved if we snapshot the capabilities at cursor creation time and feed those through + // to the StringDimensionIndexer so the selector built on top of it can produce values from the snapshot state of + // multi-valuedness instead of the latest state. + return ColumnCapabilitiesImpl.snapshot(index.getCapabilities(column), true); + } + + public ColumnCapabilities getSnapshotColumnCapabilities(String column) + { + return ColumnCapabilitiesImpl.snapshot(index.getCapabilities(column)); } @Override diff --git a/processing/src/test/java/org/apache/druid/segment/QueryableIndexColumnCapabilitiesTest.java b/processing/src/test/java/org/apache/druid/segment/QueryableIndexColumnCapabilitiesTest.java index 66a59e5b4df5..c7783e992197 100644 --- a/processing/src/test/java/org/apache/druid/segment/QueryableIndexColumnCapabilitiesTest.java +++ b/processing/src/test/java/org/apache/druid/segment/QueryableIndexColumnCapabilitiesTest.java @@ -157,7 +157,7 @@ public void testStringColumn() Assert.assertTrue(caps.hasMultipleValues().isUnknown()); // at index merge or query time we 'complete' the capabilities to take a snapshot of the current state, // coercing any 'UNKNOWN' values to false - Assert.assertFalse(ColumnCapabilitiesImpl.complete(caps, false).hasMultipleValues().isMaybeTrue()); + Assert.assertFalse(ColumnCapabilitiesImpl.snapshot(caps).hasMultipleValues().isMaybeTrue()); Assert.assertFalse(caps.hasSpatialIndexes()); caps = MMAP_INDEX.getColumnHolder("d1").getCapabilities(); diff --git a/processing/src/test/java/org/apache/druid/segment/virtual/VirtualColumnsTest.java b/processing/src/test/java/org/apache/druid/segment/virtual/VirtualColumnsTest.java index d31bbe7bc30c..597dbb6ee4e4 100644 --- a/processing/src/test/java/org/apache/druid/segment/virtual/VirtualColumnsTest.java +++ b/processing/src/test/java/org/apache/druid/segment/virtual/VirtualColumnsTest.java @@ -417,7 +417,7 @@ public boolean isNull() @Override public ColumnCapabilities capabilities(String columnName) { - return ColumnCapabilitiesImpl.createSimpleNumericColumn(ValueType.LONG); + return ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.LONG); } @Override From 382c9ded5f9b53fb899f2660a2a638a1d3b0ed22 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Tue, 26 May 2020 16:19:02 -0700 Subject: [PATCH 06/11] more comment --- .../apache/druid/query/metadata/SegmentAnalyzer.java | 3 ++- .../incremental/IncrementalIndexStorageAdapter.java | 11 +++++++++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java b/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java index 425b86b9de0c..659b55b1680d 100644 --- a/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java +++ b/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java @@ -106,7 +106,8 @@ public Map analyze(Segment segment) if (columnHolder != null) { capabilities = columnHolder.getCapabilities(); } else { - // if + // this can be removed if we get to the point where IncrementalIndexStorageAdapter.getColumnCapabilities + // accurately reports the capabilities if (storageAdapter instanceof IncrementalIndexStorageAdapter) { capabilities = ((IncrementalIndexStorageAdapter) storageAdapter).getSnapshotColumnCapabilities(columnName); } else { diff --git a/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexStorageAdapter.java b/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexStorageAdapter.java index 1ca5879564c2..8e8520d458b7 100644 --- a/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexStorageAdapter.java +++ b/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexStorageAdapter.java @@ -149,13 +149,20 @@ public ColumnCapabilities getColumnCapabilities(String column) // at index-persisting time to determine if we need a multi-value column or not. However, that means we // need to tweak the capabilities here in the StorageAdapter (a query-time construct), so at query time // they appear multi-valued. - + // // Note that this could be improved if we snapshot the capabilities at cursor creation time and feed those through // to the StringDimensionIndexer so the selector built on top of it can produce values from the snapshot state of - // multi-valuedness instead of the latest state. + // multi-valuedness at cursor creation time, instead of the latest state, and getSnapshotColumnCapabilities could + // be removed. return ColumnCapabilitiesImpl.snapshot(index.getCapabilities(column), true); } + /** + * Sad workaround for {@link org.apache.druid.query.metadata.SegmentAnalyzer} to deal with the fact that the + * response from {@link #getColumnCapabilities} is not accurate for string columns, in that it reports all string + * string columns as having multiple values. This method returns the actual capabilities of the underlying + * {@link IncrementalIndex}at the time this method is called. + */ public ColumnCapabilities getSnapshotColumnCapabilities(String column) { return ColumnCapabilitiesImpl.snapshot(index.getCapabilities(column)); From b583134171df6dd7b401bbf8d1b1b37dbe43bf66 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Tue, 26 May 2020 16:43:38 -0700 Subject: [PATCH 07/11] use volatile booleans --- .../java/org/apache/druid/segment/StringDimensionIndexer.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java b/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java index dd1cef8a4f1d..cd638e8bf237 100644 --- a/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java +++ b/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java @@ -233,8 +233,8 @@ public String getValueFromSortedId(int index) private final DimensionDictionary dimLookup; private final MultiValueHandling multiValueHandling; private final boolean hasBitmapIndexes; - private boolean hasMultipleValues = false; - private boolean isSparse = false; + private volatile boolean hasMultipleValues = false; + private volatile boolean isSparse = false; @Nullable private SortedDimensionDictionary sortedLookup; From a11d95afbf749976dfe2c163e976e81c04fba1d9 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Tue, 26 May 2020 16:45:55 -0700 Subject: [PATCH 08/11] fix line length --- .../java/org/apache/druid/segment/StringDimensionIndexer.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java b/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java index cd638e8bf237..c0200e1e2eff 100644 --- a/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java +++ b/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java @@ -630,8 +630,8 @@ public String lookupName(int id) @Override public boolean nameLookupPossibleInAdvance() { - // name lookup is possible in advance if we got a value for every row (setSparseIndexed was not called on this column) - // or we've encountered an actual null value and it is present in our dictionary + // name lookup is possible in advance if we got a value for every row (setSparseIndexed was not called on this + // column) or we've encountered an actual null value and it is present in our dictionary return !isSparse || dimLookup.idForNull != ABSENT_VALUE_ID; } From f639c2c860e57827ed85740fd242036c73e4dd43 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Thu, 4 Jun 2020 14:06:16 -0700 Subject: [PATCH 09/11] correctly handle missing columns for vector processors --- .../druid/segment/DimensionHandlerUtils.java | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/DimensionHandlerUtils.java b/processing/src/main/java/org/apache/druid/segment/DimensionHandlerUtils.java index 4409e3046c73..63fdb693dfc5 100644 --- a/processing/src/main/java/org/apache/druid/segment/DimensionHandlerUtils.java +++ b/processing/src/main/java/org/apache/druid/segment/DimensionHandlerUtils.java @@ -60,8 +60,7 @@ public final class DimensionHandlerUtils .setDictionaryEncoded(false) .setDictionaryValuesUnique(false) .setDictionaryValuesSorted(false) - .setHasBitmapIndexes(false) - .setHasMultipleValues(false); + .setHasBitmapIndexes(false); private DimensionHandlerUtils() { @@ -290,15 +289,19 @@ public static T makeVectorProcessor( final VectorColumnSelectorFactory selectorFactory ) { - final ColumnCapabilities capabilities = getEffectiveCapabilities( + final ColumnCapabilities originalCapabilities = + selectorFactory.getColumnCapabilities(dimensionSpec.getDimension()); + + final ColumnCapabilities effectiveCapabilites = getEffectiveCapabilities( dimensionSpec, - selectorFactory.getColumnCapabilities(dimensionSpec.getDimension()) + originalCapabilities ); - final ValueType type = capabilities.getType(); + final ValueType type = effectiveCapabilites.getType(); if (type == ValueType.STRING) { - if (capabilities.hasMultipleValues().isMaybeTrue()) { + // vector selectors should never have null column capabilities, these signify a non-existent column + if (originalCapabilities != null && effectiveCapabilites.hasMultipleValues().isMaybeTrue()) { return strategyFactory.makeMultiValueDimensionProcessor( selectorFactory.makeMultiValueDimensionSelector(dimensionSpec) ); @@ -329,7 +332,7 @@ public static T makeVectorProcessor( selectorFactory.makeValueSelector(dimensionSpec.getDimension()) ); } else { - throw new ISE("Unsupported type[%s]", capabilities.getType()); + throw new ISE("Unsupported type[%s]", effectiveCapabilites.getType()); } } } From 7dc8754d4363e276b5502f355357a039a5b769b6 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Thu, 4 Jun 2020 15:25:42 -0700 Subject: [PATCH 10/11] return ColumnCapabilities.Capable for BitmapIndexSelector.hasMultipleValues, fix vector processor selection for complex --- .../org/apache/druid/benchmark/BoundFilterBenchmark.java | 3 ++- .../benchmark/DimensionPredicateFilterBenchmark.java | 3 ++- .../org/apache/druid/benchmark/LikeFilterBenchmark.java | 3 ++- .../apache/druid/query/filter/BitmapIndexSelector.java | 3 ++- .../druid/segment/ColumnSelectorBitmapIndexSelector.java | 9 ++++++--- .../org/apache/druid/segment/DimensionHandlerUtils.java | 8 ++++++-- .../apache/druid/segment/filter/ExpressionFilter.java | 2 +- .../druid/segment/filter/ExtractionDimFilterTest.java | 5 +++-- 8 files changed, 24 insertions(+), 12 deletions(-) diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/BoundFilterBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/BoundFilterBenchmark.java index 6c0b6d940ed4..f9a920a3d39f 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/BoundFilterBenchmark.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/BoundFilterBenchmark.java @@ -33,6 +33,7 @@ import org.apache.druid.query.filter.BoundDimFilter; import org.apache.druid.query.ordering.StringComparators; import org.apache.druid.segment.column.BitmapIndex; +import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.data.BitmapSerdeFactory; import org.apache.druid.segment.data.CloseableIndexed; import org.apache.druid.segment.data.GenericIndexed; @@ -195,7 +196,7 @@ public CloseableIndexed getDimensionValues(String dimension) } @Override - public boolean hasMultipleValues(final String dimension) + public ColumnCapabilities.Capable hasMultipleValues(final String dimension) { throw new UnsupportedOperationException(); } diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/DimensionPredicateFilterBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/DimensionPredicateFilterBenchmark.java index 8dd177f1e831..a845bb1a2233 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/DimensionPredicateFilterBenchmark.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/DimensionPredicateFilterBenchmark.java @@ -35,6 +35,7 @@ import org.apache.druid.query.filter.DruidLongPredicate; import org.apache.druid.query.filter.DruidPredicateFactory; import org.apache.druid.segment.column.BitmapIndex; +import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.data.BitmapSerdeFactory; import org.apache.druid.segment.data.CloseableIndexed; import org.apache.druid.segment.data.GenericIndexed; @@ -166,7 +167,7 @@ public CloseableIndexed getDimensionValues(String dimension) } @Override - public boolean hasMultipleValues(final String dimension) + public ColumnCapabilities.Capable hasMultipleValues(final String dimension) { throw new UnsupportedOperationException(); } diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/LikeFilterBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/LikeFilterBenchmark.java index 83551146eb24..5c4a0f35a60e 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/LikeFilterBenchmark.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/LikeFilterBenchmark.java @@ -35,6 +35,7 @@ import org.apache.druid.query.filter.SelectorDimFilter; import org.apache.druid.query.ordering.StringComparators; import org.apache.druid.segment.column.BitmapIndex; +import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.data.BitmapSerdeFactory; import org.apache.druid.segment.data.CloseableIndexed; import org.apache.druid.segment.data.GenericIndexed; @@ -166,7 +167,7 @@ public CloseableIndexed getDimensionValues(String dimension) } @Override - public boolean hasMultipleValues(final String dimension) + public ColumnCapabilities.Capable hasMultipleValues(final String dimension) { throw new UnsupportedOperationException(); } diff --git a/processing/src/main/java/org/apache/druid/query/filter/BitmapIndexSelector.java b/processing/src/main/java/org/apache/druid/query/filter/BitmapIndexSelector.java index 90307eb6380a..fd90e7412b68 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/BitmapIndexSelector.java +++ b/processing/src/main/java/org/apache/druid/query/filter/BitmapIndexSelector.java @@ -24,6 +24,7 @@ import org.apache.druid.collections.bitmap.ImmutableBitmap; import org.apache.druid.collections.spatial.ImmutableRTree; import org.apache.druid.segment.column.BitmapIndex; +import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.data.CloseableIndexed; import javax.annotation.Nullable; @@ -35,7 +36,7 @@ public interface BitmapIndexSelector @MustBeClosed @Nullable CloseableIndexed getDimensionValues(String dimension); - boolean hasMultipleValues(String dimension); + ColumnCapabilities.Capable hasMultipleValues(String dimension); int getNumRows(); BitmapFactory getBitmapFactory(); @Nullable diff --git a/processing/src/main/java/org/apache/druid/segment/ColumnSelectorBitmapIndexSelector.java b/processing/src/main/java/org/apache/druid/segment/ColumnSelectorBitmapIndexSelector.java index d0d5f3ed5180..8254e34566a0 100644 --- a/processing/src/main/java/org/apache/druid/segment/ColumnSelectorBitmapIndexSelector.java +++ b/processing/src/main/java/org/apache/druid/segment/ColumnSelectorBitmapIndexSelector.java @@ -27,6 +27,7 @@ import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; import org.apache.druid.segment.column.BaseColumn; import org.apache.druid.segment.column.BitmapIndex; +import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ColumnHolder; import org.apache.druid.segment.column.DictionaryEncodedColumn; import org.apache.druid.segment.column.NumericColumn; @@ -157,14 +158,16 @@ public void close() throws IOException } @Override - public boolean hasMultipleValues(final String dimension) + public ColumnCapabilities.Capable hasMultipleValues(final String dimension) { if (isVirtualColumn(dimension)) { - return virtualColumns.getVirtualColumn(dimension).capabilities(dimension).hasMultipleValues().isMaybeTrue(); + return virtualColumns.getVirtualColumn(dimension).capabilities(dimension).hasMultipleValues(); } final ColumnHolder columnHolder = index.getColumnHolder(dimension); - return columnHolder != null && columnHolder.getCapabilities().hasMultipleValues().isMaybeTrue(); + return columnHolder != null + ? columnHolder.getCapabilities().hasMultipleValues() + : ColumnCapabilities.Capable.UNKNOWN; } @Override diff --git a/processing/src/main/java/org/apache/druid/segment/DimensionHandlerUtils.java b/processing/src/main/java/org/apache/druid/segment/DimensionHandlerUtils.java index 63fdb693dfc5..9e5a12921c7c 100644 --- a/processing/src/main/java/org/apache/druid/segment/DimensionHandlerUtils.java +++ b/processing/src/main/java/org/apache/druid/segment/DimensionHandlerUtils.java @@ -299,9 +299,13 @@ public static T makeVectorProcessor( final ValueType type = effectiveCapabilites.getType(); + // vector selectors should never have null column capabilities, these signify a non-existent column, and complex + // columns should never be treated as a multi-value column, so always use single value string processor + final boolean forceSingleValue = + originalCapabilities == null || ValueType.COMPLEX.equals(originalCapabilities.getType()); + if (type == ValueType.STRING) { - // vector selectors should never have null column capabilities, these signify a non-existent column - if (originalCapabilities != null && effectiveCapabilites.hasMultipleValues().isMaybeTrue()) { + if (!forceSingleValue && effectiveCapabilites.hasMultipleValues().isMaybeTrue()) { return strategyFactory.makeMultiValueDimensionProcessor( selectorFactory.makeMultiValueDimensionSelector(dimensionSpec) ); diff --git a/processing/src/main/java/org/apache/druid/segment/filter/ExpressionFilter.java b/processing/src/main/java/org/apache/druid/segment/filter/ExpressionFilter.java index ef7613c6137c..0baa59490712 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/ExpressionFilter.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/ExpressionFilter.java @@ -115,7 +115,7 @@ public boolean supportsBitmapIndex(final BitmapIndexSelector selector) // multiple values. The lack of multiple values is important because expression filters treat multi-value // arrays as nulls, which doesn't permit index based filtering. final String column = Iterables.getOnlyElement(requiredBindings.get()); - return selector.getBitmapIndex(column) != null && !selector.hasMultipleValues(column); + return selector.getBitmapIndex(column) != null && !selector.hasMultipleValues(column).isMaybeTrue(); } else { // Multi-column expression. return false; diff --git a/processing/src/test/java/org/apache/druid/segment/filter/ExtractionDimFilterTest.java b/processing/src/test/java/org/apache/druid/segment/filter/ExtractionDimFilterTest.java index c7a560c19616..3ed33b8e3fc6 100644 --- a/processing/src/test/java/org/apache/druid/segment/filter/ExtractionDimFilterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/filter/ExtractionDimFilterTest.java @@ -36,6 +36,7 @@ import org.apache.druid.query.filter.SelectorDimFilter; import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; import org.apache.druid.segment.column.BitmapIndex; +import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.data.BitmapSerdeFactory; import org.apache.druid.segment.data.CloseableIndexed; import org.apache.druid.segment.data.ConciseBitmapSerdeFactory; @@ -146,9 +147,9 @@ public Iterator iterator() } @Override - public boolean hasMultipleValues(final String dimension) + public ColumnCapabilities.Capable hasMultipleValues(final String dimension) { - return true; + return ColumnCapabilities.Capable.TRUE; } @Override From 33b8313a4c6538ab158cf636a6025e05c4df1885 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Thu, 4 Jun 2020 20:48:50 -0700 Subject: [PATCH 11/11] false on non-existent --- .../druid/segment/ColumnSelectorBitmapIndexSelector.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/processing/src/main/java/org/apache/druid/segment/ColumnSelectorBitmapIndexSelector.java b/processing/src/main/java/org/apache/druid/segment/ColumnSelectorBitmapIndexSelector.java index 8254e34566a0..bd6de7a02d57 100644 --- a/processing/src/main/java/org/apache/druid/segment/ColumnSelectorBitmapIndexSelector.java +++ b/processing/src/main/java/org/apache/druid/segment/ColumnSelectorBitmapIndexSelector.java @@ -165,9 +165,11 @@ public ColumnCapabilities.Capable hasMultipleValues(final String dimension) } final ColumnHolder columnHolder = index.getColumnHolder(dimension); + // if ColumnHolder is null, the column doesn't exist, but report as not having multiple values so that + // the empty bitmap will be used return columnHolder != null ? columnHolder.getCapabilities().hasMultipleValues() - : ColumnCapabilities.Capable.UNKNOWN; + : ColumnCapabilities.Capable.FALSE; } @Override