diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/BoundFilterBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/BoundFilterBenchmark.java index 6c0b6d940ed4..f9a920a3d39f 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/BoundFilterBenchmark.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/BoundFilterBenchmark.java @@ -33,6 +33,7 @@ import org.apache.druid.query.filter.BoundDimFilter; import org.apache.druid.query.ordering.StringComparators; import org.apache.druid.segment.column.BitmapIndex; +import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.data.BitmapSerdeFactory; import org.apache.druid.segment.data.CloseableIndexed; import org.apache.druid.segment.data.GenericIndexed; @@ -195,7 +196,7 @@ public CloseableIndexed getDimensionValues(String dimension) } @Override - public boolean hasMultipleValues(final String dimension) + public ColumnCapabilities.Capable hasMultipleValues(final String dimension) { throw new UnsupportedOperationException(); } diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/DimensionPredicateFilterBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/DimensionPredicateFilterBenchmark.java index 8dd177f1e831..a845bb1a2233 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/DimensionPredicateFilterBenchmark.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/DimensionPredicateFilterBenchmark.java @@ -35,6 +35,7 @@ import org.apache.druid.query.filter.DruidLongPredicate; import org.apache.druid.query.filter.DruidPredicateFactory; import org.apache.druid.segment.column.BitmapIndex; +import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.data.BitmapSerdeFactory; import org.apache.druid.segment.data.CloseableIndexed; import org.apache.druid.segment.data.GenericIndexed; @@ -166,7 +167,7 @@ public CloseableIndexed getDimensionValues(String dimension) } @Override - public boolean hasMultipleValues(final String dimension) + public ColumnCapabilities.Capable hasMultipleValues(final String dimension) { throw new UnsupportedOperationException(); } diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/LikeFilterBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/LikeFilterBenchmark.java index 83551146eb24..5c4a0f35a60e 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/LikeFilterBenchmark.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/LikeFilterBenchmark.java @@ -35,6 +35,7 @@ import org.apache.druid.query.filter.SelectorDimFilter; import org.apache.druid.query.ordering.StringComparators; import org.apache.druid.segment.column.BitmapIndex; +import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.data.BitmapSerdeFactory; import org.apache.druid.segment.data.CloseableIndexed; import org.apache.druid.segment.data.GenericIndexed; @@ -166,7 +167,7 @@ public CloseableIndexed getDimensionValues(String dimension) } @Override - public boolean hasMultipleValues(final String dimension) + public ColumnCapabilities.Capable hasMultipleValues(final String dimension) { throw new UnsupportedOperationException(); } diff --git a/processing/src/main/java/org/apache/druid/query/filter/BitmapIndexSelector.java b/processing/src/main/java/org/apache/druid/query/filter/BitmapIndexSelector.java index 90307eb6380a..fd90e7412b68 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/BitmapIndexSelector.java +++ b/processing/src/main/java/org/apache/druid/query/filter/BitmapIndexSelector.java @@ -24,6 +24,7 @@ import org.apache.druid.collections.bitmap.ImmutableBitmap; import org.apache.druid.collections.spatial.ImmutableRTree; import org.apache.druid.segment.column.BitmapIndex; +import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.data.CloseableIndexed; import javax.annotation.Nullable; @@ -35,7 +36,7 @@ public interface BitmapIndexSelector @MustBeClosed @Nullable CloseableIndexed getDimensionValues(String dimension); - boolean hasMultipleValues(String dimension); + ColumnCapabilities.Capable hasMultipleValues(String dimension); int getNumRows(); BitmapFactory getBitmapFactory(); @Nullable diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java index 9a417c9d6290..f19ae25035ae 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java @@ -321,13 +321,13 @@ public static int getCardinalityForArrayAggregation( /** * Checks whether all "dimensions" are either single-valued, or if allowed, nonexistent. Since non-existent column * selectors will show up as full of nulls they are effectively single valued, however they can also be null during - * broker merge, for example with an 'inline' datasource subquery. 'missingMeansNonexistent' is sort of a hack to let + * broker merge, for example with an 'inline' datasource subquery. 'missingMeansNonExistent' is sort of a hack to let * the vectorized engine, which only operates on actual segments, to still work in this case for non-existent columns. */ public static boolean isAllSingleValueDims( final Function capabilitiesFunction, final List dimensions, - final boolean missingMeansNonexistent + final boolean missingMeansNonExistent ) { return dimensions @@ -342,8 +342,8 @@ public static boolean isAllSingleValueDims( // Now check column capabilities. final ColumnCapabilities columnCapabilities = capabilitiesFunction.apply(dimension.getDimension()); - return (columnCapabilities != null && !columnCapabilities.hasMultipleValues()) || - (missingMeansNonexistent && columnCapabilities == null); + return (columnCapabilities != null && !columnCapabilities.hasMultipleValues().isMaybeTrue()) || + (missingMeansNonExistent && columnCapabilities == null); }); } diff --git a/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java b/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java index c5f1800acd15..659b55b1680d 100644 --- a/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java +++ b/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java @@ -45,6 +45,7 @@ import org.apache.druid.segment.column.DictionaryEncodedColumn; import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.data.IndexedInts; +import org.apache.druid.segment.incremental.IncrementalIndexStorageAdapter; import org.apache.druid.segment.serde.ComplexMetricSerde; import org.apache.druid.segment.serde.ComplexMetrics; import org.joda.time.DateTime; @@ -101,9 +102,18 @@ public Map analyze(Segment segment) for (String columnName : columnNames) { final ColumnHolder columnHolder = index == null ? null : index.getColumnHolder(columnName); - final ColumnCapabilities capabilities = columnHolder != null - ? columnHolder.getCapabilities() - : storageAdapter.getColumnCapabilities(columnName); + final ColumnCapabilities capabilities; + if (columnHolder != null) { + capabilities = columnHolder.getCapabilities(); + } else { + // this can be removed if we get to the point where IncrementalIndexStorageAdapter.getColumnCapabilities + // accurately reports the capabilities + if (storageAdapter instanceof IncrementalIndexStorageAdapter) { + capabilities = ((IncrementalIndexStorageAdapter) storageAdapter).getSnapshotColumnCapabilities(columnName); + } else { + capabilities = storageAdapter.getColumnCapabilities(columnName); + } + } final ColumnAnalysis analysis; final ValueType type = capabilities.getType(); @@ -138,7 +148,7 @@ public Map analyze(Segment segment) // Add time column too ColumnCapabilities timeCapabilities = storageAdapter.getColumnCapabilities(ColumnHolder.TIME_COLUMN_NAME); if (timeCapabilities == null) { - timeCapabilities = new ColumnCapabilitiesImpl().setType(ValueType.LONG).setHasMultipleValues(false); + timeCapabilities = ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.LONG); } columns.put( ColumnHolder.TIME_COLUMN_NAME, @@ -172,7 +182,7 @@ private ColumnAnalysis analyzeNumericColumn( long size = 0; if (analyzingSize()) { - if (capabilities.hasMultipleValues()) { + if (capabilities.hasMultipleValues().isTrue()) { return ColumnAnalysis.error("multi_value"); } @@ -181,7 +191,7 @@ private ColumnAnalysis analyzeNumericColumn( return new ColumnAnalysis( capabilities.getType().name(), - capabilities.hasMultipleValues(), + capabilities.hasMultipleValues().isTrue(), size, null, null, @@ -231,7 +241,7 @@ private ColumnAnalysis analyzeStringColumn( return new ColumnAnalysis( capabilities.getType().name(), - capabilities.hasMultipleValues(), + capabilities.hasMultipleValues().isTrue(), size, analyzingCardinality() ? cardinality : 0, min, @@ -308,7 +318,7 @@ public Long accumulate(Long accumulated, Cursor cursor) return new ColumnAnalysis( capabilities.getType().name(), - capabilities.hasMultipleValues(), + capabilities.hasMultipleValues().isTrue(), size, cardinality, min, @@ -324,7 +334,7 @@ private ColumnAnalysis analyzeComplexColumn( ) { try (final ComplexColumn complexColumn = columnHolder != null ? (ComplexColumn) columnHolder.getColumn() : null) { - final boolean hasMultipleValues = capabilities != null && capabilities.hasMultipleValues(); + final boolean hasMultipleValues = capabilities != null && capabilities.hasMultipleValues().isTrue(); long size = 0; if (analyzingSize() && complexColumn != null) { diff --git a/processing/src/main/java/org/apache/druid/segment/ColumnProcessors.java b/processing/src/main/java/org/apache/druid/segment/ColumnProcessors.java index 5fb698a484e4..05e85fdd4c24 100644 --- a/processing/src/main/java/org/apache/druid/segment/ColumnProcessors.java +++ b/processing/src/main/java/org/apache/druid/segment/ColumnProcessors.java @@ -197,6 +197,6 @@ private static T makeProcessorInternal( */ private static boolean mayBeMultiValue(@Nullable final ColumnCapabilities capabilities) { - return capabilities == null || !capabilities.isComplete() || capabilities.hasMultipleValues(); + return capabilities == null || capabilities.hasMultipleValues().isMaybeTrue(); } } diff --git a/processing/src/main/java/org/apache/druid/segment/ColumnSelectorBitmapIndexSelector.java b/processing/src/main/java/org/apache/druid/segment/ColumnSelectorBitmapIndexSelector.java index 79b6e8900745..bd6de7a02d57 100644 --- a/processing/src/main/java/org/apache/druid/segment/ColumnSelectorBitmapIndexSelector.java +++ b/processing/src/main/java/org/apache/druid/segment/ColumnSelectorBitmapIndexSelector.java @@ -27,6 +27,7 @@ import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; import org.apache.druid.segment.column.BaseColumn; import org.apache.druid.segment.column.BitmapIndex; +import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ColumnHolder; import org.apache.druid.segment.column.DictionaryEncodedColumn; import org.apache.druid.segment.column.NumericColumn; @@ -157,14 +158,18 @@ public void close() throws IOException } @Override - public boolean hasMultipleValues(final String dimension) + public ColumnCapabilities.Capable hasMultipleValues(final String dimension) { if (isVirtualColumn(dimension)) { return virtualColumns.getVirtualColumn(dimension).capabilities(dimension).hasMultipleValues(); } final ColumnHolder columnHolder = index.getColumnHolder(dimension); - return columnHolder != null && columnHolder.getCapabilities().hasMultipleValues(); + // if ColumnHolder is null, the column doesn't exist, but report as not having multiple values so that + // the empty bitmap will be used + return columnHolder != null + ? columnHolder.getCapabilities().hasMultipleValues() + : ColumnCapabilities.Capable.FALSE; } @Override diff --git a/processing/src/main/java/org/apache/druid/segment/DimensionHandlerUtils.java b/processing/src/main/java/org/apache/druid/segment/DimensionHandlerUtils.java index 0c8b3c280c6e..9e5a12921c7c 100644 --- a/processing/src/main/java/org/apache/druid/segment/DimensionHandlerUtils.java +++ b/processing/src/main/java/org/apache/druid/segment/DimensionHandlerUtils.java @@ -289,15 +289,23 @@ public static T makeVectorProcessor( final VectorColumnSelectorFactory selectorFactory ) { - final ColumnCapabilities capabilities = getEffectiveCapabilities( + final ColumnCapabilities originalCapabilities = + selectorFactory.getColumnCapabilities(dimensionSpec.getDimension()); + + final ColumnCapabilities effectiveCapabilites = getEffectiveCapabilities( dimensionSpec, - selectorFactory.getColumnCapabilities(dimensionSpec.getDimension()) + originalCapabilities ); - final ValueType type = capabilities.getType(); + final ValueType type = effectiveCapabilites.getType(); + + // vector selectors should never have null column capabilities, these signify a non-existent column, and complex + // columns should never be treated as a multi-value column, so always use single value string processor + final boolean forceSingleValue = + originalCapabilities == null || ValueType.COMPLEX.equals(originalCapabilities.getType()); if (type == ValueType.STRING) { - if (capabilities.hasMultipleValues()) { + if (!forceSingleValue && effectiveCapabilites.hasMultipleValues().isMaybeTrue()) { return strategyFactory.makeMultiValueDimensionProcessor( selectorFactory.makeMultiValueDimensionSelector(dimensionSpec) ); @@ -328,7 +336,7 @@ public static T makeVectorProcessor( selectorFactory.makeValueSelector(dimensionSpec.getDimension()) ); } else { - throw new ISE("Unsupported type[%s]", capabilities.getType()); + throw new ISE("Unsupported type[%s]", effectiveCapabilites.getType()); } } } diff --git a/processing/src/main/java/org/apache/druid/segment/DimensionIndexer.java b/processing/src/main/java/org/apache/druid/segment/DimensionIndexer.java index 99921eb79c08..cf7631db08bb 100644 --- a/processing/src/main/java/org/apache/druid/segment/DimensionIndexer.java +++ b/processing/src/main/java/org/apache/druid/segment/DimensionIndexer.java @@ -127,6 +127,19 @@ public interface DimensionIndexer */ EncodedKeyComponentType processRowValsToUnsortedEncodedKeyComponent(@Nullable Object dimValues, boolean reportParseExceptions); + /** + * This method will be called while building an {@link IncrementalIndex} whenever a known dimension column (either + * through an explicit schema on the ingestion spec, or auto-discovered while processing rows) is absent in any row + * that is processed, to allow an indexer to account for any missing rows if necessary. Useful so that a string + * {@link DimensionSelector} built on top of an {@link IncrementalIndex} may accurately report + * {@link DimensionSelector#nameLookupPossibleInAdvance()} by allowing it to track if it has any implicit null valued + * rows. + * + * At index persist/merge time all missing columns for a row will be explicitly replaced with the value appropriate + * null or default value. + */ + void setSparseIndexed(); + /** * Gives the estimated size in bytes for the given key * diff --git a/processing/src/main/java/org/apache/druid/segment/DoubleDimensionIndexer.java b/processing/src/main/java/org/apache/druid/segment/DoubleDimensionIndexer.java index 6645e1fc7328..b802f7555135 100644 --- a/processing/src/main/java/org/apache/druid/segment/DoubleDimensionIndexer.java +++ b/processing/src/main/java/org/apache/druid/segment/DoubleDimensionIndexer.java @@ -47,6 +47,12 @@ public Double processRowValsToUnsortedEncodedKeyComponent(@Nullable Object dimVa return DimensionHandlerUtils.convertObjectToDouble(dimValues, reportParseExceptions); } + @Override + public void setSparseIndexed() + { + // no-op, double columns do not have a dictionary to track null values + } + @Override public long estimateEncodedKeyComponentSize(Double key) { diff --git a/processing/src/main/java/org/apache/druid/segment/FloatDimensionIndexer.java b/processing/src/main/java/org/apache/druid/segment/FloatDimensionIndexer.java index 41328828de2b..dce58a23b23f 100644 --- a/processing/src/main/java/org/apache/druid/segment/FloatDimensionIndexer.java +++ b/processing/src/main/java/org/apache/druid/segment/FloatDimensionIndexer.java @@ -48,6 +48,12 @@ public Float processRowValsToUnsortedEncodedKeyComponent(@Nullable Object dimVal return DimensionHandlerUtils.convertObjectToFloat(dimValues, reportParseExceptions); } + @Override + public void setSparseIndexed() + { + // no-op, float columns do not have a dictionary to track null values + } + @Override public long estimateEncodedKeyComponentSize(Float key) { diff --git a/processing/src/main/java/org/apache/druid/segment/IndexMergerV9.java b/processing/src/main/java/org/apache/druid/segment/IndexMergerV9.java index bdf0bccf672a..066b8dc41ea6 100644 --- a/processing/src/main/java/org/apache/druid/segment/IndexMergerV9.java +++ b/processing/src/main/java/org/apache/druid/segment/IndexMergerV9.java @@ -165,7 +165,7 @@ private File makeIndexFiles( progress.progress(); final Map metricsValueTypes = new TreeMap<>(Comparators.naturalNullsFirst()); final Map metricTypeNames = new TreeMap<>(Comparators.naturalNullsFirst()); - final List dimCapabilities = Lists.newArrayListWithCapacity(mergedDimensions.size()); + final List dimCapabilities = Lists.newArrayListWithCapacity(mergedDimensions.size()); mergeCapabilities(adapters, mergedDimensions, metricsValueTypes, metricTypeNames, dimCapabilities); final Map handlers = makeDimensionHandlers(mergedDimensions, dimCapabilities); @@ -716,18 +716,22 @@ private void mergeCapabilities( final List mergedDimensions, final Map metricsValueTypes, final Map metricTypeNames, - final List dimCapabilities + final List dimCapabilities ) { - final Map capabilitiesMap = new HashMap<>(); + final Map capabilitiesMap = new HashMap<>(); for (IndexableAdapter adapter : adapters) { for (String dimension : adapter.getDimensionNames()) { ColumnCapabilities capabilities = adapter.getCapabilities(dimension); - capabilitiesMap.computeIfAbsent(dimension, d -> new ColumnCapabilitiesImpl().setIsComplete(true)).merge(capabilities); + capabilitiesMap.compute(dimension, (d, existingCapabilities) -> + ColumnCapabilitiesImpl.snapshot(capabilities) + .merge(ColumnCapabilitiesImpl.snapshot(existingCapabilities))); } for (String metric : adapter.getMetricNames()) { ColumnCapabilities capabilities = adapter.getCapabilities(metric); - capabilitiesMap.computeIfAbsent(metric, m -> new ColumnCapabilitiesImpl().setIsComplete(true)).merge(capabilities); + capabilitiesMap.compute(metric, (m, existingCapabilities) -> + ColumnCapabilitiesImpl.snapshot(capabilities) + .merge(ColumnCapabilitiesImpl.snapshot(existingCapabilities))); metricsValueTypes.put(metric, capabilities.getType()); metricTypeNames.put(metric, adapter.getMetricType(metric)); } @@ -1002,7 +1006,7 @@ public File append( private Map makeDimensionHandlers( final List mergedDimensions, - final List dimCapabilities + final List dimCapabilities ) { Map handlers = new LinkedHashMap<>(); diff --git a/processing/src/main/java/org/apache/druid/segment/LongDimensionIndexer.java b/processing/src/main/java/org/apache/druid/segment/LongDimensionIndexer.java index d35724398f9a..f2a91278f6bb 100644 --- a/processing/src/main/java/org/apache/druid/segment/LongDimensionIndexer.java +++ b/processing/src/main/java/org/apache/druid/segment/LongDimensionIndexer.java @@ -48,6 +48,12 @@ public Long processRowValsToUnsortedEncodedKeyComponent(@Nullable Object dimValu return DimensionHandlerUtils.convertObjectToLong(dimValues, reportParseExceptions); } + @Override + public void setSparseIndexed() + { + // no-op, long columns do not have a dictionary to track null values + } + @Override public long estimateEncodedKeyComponentSize(Long key) { diff --git a/processing/src/main/java/org/apache/druid/segment/RowBasedColumnSelectorFactory.java b/processing/src/main/java/org/apache/druid/segment/RowBasedColumnSelectorFactory.java index 7b21d1f0186c..77e8978e2bee 100644 --- a/processing/src/main/java/org/apache/druid/segment/RowBasedColumnSelectorFactory.java +++ b/processing/src/main/java/org/apache/druid/segment/RowBasedColumnSelectorFactory.java @@ -96,7 +96,7 @@ static ColumnCapabilities getColumnCapabilities( { if (ColumnHolder.TIME_COLUMN_NAME.equals(columnName)) { // TIME_COLUMN_NAME is handled specially; override the provided rowSignature. - return new ColumnCapabilitiesImpl().setType(ValueType.LONG).setIsComplete(true); + return ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.LONG); } else { final ValueType valueType = rowSignature.getColumnType(columnName).orElse(null); @@ -105,12 +105,13 @@ static ColumnCapabilities getColumnCapabilities( // causes expression selectors to always treat us as arrays. If we might have multiple values (i.e. if our type // is nonnumeric), set isComplete false to compensate. if (valueType != null) { + if (valueType.isNumeric()) { + return ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(valueType); + } return new ColumnCapabilitiesImpl() .setType(valueType) .setDictionaryValuesUnique(false) - .setDictionaryValuesSorted(false) - // Numeric types should be reported as complete, but not STRING or COMPLEX (because we don't have full info) - .setIsComplete(valueType.isNumeric()); + .setDictionaryValuesSorted(false); } else { return null; } diff --git a/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java b/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java index ada146d56d41..c0200e1e2eff 100644 --- a/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java +++ b/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java @@ -233,7 +233,8 @@ public String getValueFromSortedId(int index) private final DimensionDictionary dimLookup; private final MultiValueHandling multiValueHandling; private final boolean hasBitmapIndexes; - private boolean hasMultipleValues = false; + private volatile boolean hasMultipleValues = false; + private volatile boolean isSparse = false; @Nullable private SortedDimensionDictionary sortedLookup; @@ -301,6 +302,12 @@ public int[] processRowValsToUnsortedEncodedKeyComponent(@Nullable Object dimVal return encodedDimensionValues; } + @Override + public void setSparseIndexed() + { + isSparse = true; + } + @Override public long estimateEncodedKeyComponentSize(int[] key) { @@ -623,7 +630,9 @@ public String lookupName(int id) @Override public boolean nameLookupPossibleInAdvance() { - return true; + // name lookup is possible in advance if we got a value for every row (setSparseIndexed was not called on this + // column) or we've encountered an actual null value and it is present in our dictionary + return !isSparse || dimLookup.idForNull != ABSENT_VALUE_ID; } @Nullable diff --git a/processing/src/main/java/org/apache/druid/segment/StringDimensionMergerV9.java b/processing/src/main/java/org/apache/druid/segment/StringDimensionMergerV9.java index cff7ef99f805..abb4637dc7b8 100644 --- a/processing/src/main/java/org/apache/druid/segment/StringDimensionMergerV9.java +++ b/processing/src/main/java/org/apache/druid/segment/StringDimensionMergerV9.java @@ -221,7 +221,7 @@ protected void setupEncodedValueWriter() throws IOException final CompressionStrategy compressionStrategy = indexSpec.getDimensionCompression(); String filenameBase = StringUtils.format("%s.forward_dim", dimensionName); - if (capabilities.hasMultipleValues()) { + if (capabilities.hasMultipleValues().isTrue()) { if (compressionStrategy != CompressionStrategy.UNCOMPRESSED) { encodedValueSerializer = V3CompressedVSizeColumnarMultiIntsSerializer.create( dimensionName, @@ -533,7 +533,7 @@ public boolean canSkip() public ColumnDescriptor makeColumnDescriptor() { // Now write everything - boolean hasMultiValue = capabilities.hasMultipleValues(); + boolean hasMultiValue = capabilities.hasMultipleValues().isTrue(); final CompressionStrategy compressionStrategy = indexSpec.getDimensionCompression(); final BitmapSerdeFactory bitmapSerdeFactory = indexSpec.getBitmapSerdeFactory(); diff --git a/processing/src/main/java/org/apache/druid/segment/column/ColumnBuilder.java b/processing/src/main/java/org/apache/druid/segment/column/ColumnBuilder.java index 1b7163e0829b..cde454b70524 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/ColumnBuilder.java +++ b/processing/src/main/java/org/apache/druid/segment/column/ColumnBuilder.java @@ -118,7 +118,6 @@ public ColumnHolder build() .setDictionaryValuesUnique(dictionaryEncoded) .setHasSpatialIndexes(spatialIndex != null) .setHasMultipleValues(hasMultipleValues) - .setIsComplete(true) .setFilterable(filterable), columnSupplier, bitmapIndex, diff --git a/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilities.java b/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilities.java index 53f7440e87ef..a9af25b4602c 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilities.java +++ b/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilities.java @@ -19,33 +19,26 @@ package org.apache.druid.segment.column; +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonValue; +import org.apache.druid.java.util.common.StringUtils; + +import javax.annotation.Nullable; + /** */ public interface ColumnCapabilities { ValueType getType(); - boolean isDictionaryEncoded(); Capable areDictionaryValuesSorted(); Capable areDictionaryValuesUnique(); boolean isRunLengthEncoded(); boolean hasBitmapIndexes(); boolean hasSpatialIndexes(); - boolean hasMultipleValues(); + Capable hasMultipleValues(); boolean isFilterable(); - /** - * This property indicates that this {@link ColumnCapabilities} is "complete" in that all properties can be expected - * to supply valid responses. This is mostly a hack to work around {@link ColumnCapabilities} generators that - * fail to set {@link #hasMultipleValues()} even when the associated column really could have multiple values. - * Until this situation is sorted out, if this method returns false, callers are encouraged to ignore - * {@link #hasMultipleValues()} and treat that property as if it were unknown. - * - * todo: replace all booleans with {@link Capable} and this method can be dropped - */ - boolean isComplete(); - - enum Capable { FALSE, @@ -57,6 +50,21 @@ public boolean isTrue() return this == TRUE; } + public boolean isMaybeTrue() + { + return isTrue() || isUnknown(); + } + + public boolean isUnknown() + { + return this == UNKNOWN; + } + + public Capable coerceUnknownToBoolean(boolean unknownIsTrue) + { + return this == UNKNOWN ? Capable.of(unknownIsTrue) : this; + } + public Capable and(Capable other) { if (this == UNKNOWN || other == UNKNOWN) { @@ -65,9 +73,36 @@ public Capable and(Capable other) return this == TRUE && other == TRUE ? TRUE : FALSE; } + public Capable or(Capable other) + { + if (this == TRUE) { + return TRUE; + } + return other; + } + public static Capable of(boolean bool) { return bool ? TRUE : FALSE; } + + @JsonCreator + public static Capable ofNullable(@Nullable Boolean bool) + { + return bool == null ? Capable.UNKNOWN : of(bool); + } + + @JsonValue + @Nullable + public Boolean toJson() + { + return this == UNKNOWN ? null : isTrue(); + } + + @Override + public String toString() + { + return StringUtils.toLowerCase(super.toString()); + } } } diff --git a/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilitiesImpl.java b/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilitiesImpl.java index bee28ebdf832..9ddbd04a372f 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilitiesImpl.java +++ b/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilitiesImpl.java @@ -31,15 +31,65 @@ */ public class ColumnCapabilitiesImpl implements ColumnCapabilities { - public static ColumnCapabilitiesImpl copyOf(final ColumnCapabilities other) + public static ColumnCapabilitiesImpl copyOf(@Nullable final ColumnCapabilities other) { final ColumnCapabilitiesImpl capabilities = new ColumnCapabilitiesImpl(); - capabilities.merge(other); - capabilities.setFilterable(other.isFilterable()); - capabilities.setIsComplete(other.isComplete()); + if (other != null) { + capabilities.type = other.getType(); + capabilities.dictionaryEncoded = other.isDictionaryEncoded(); + capabilities.runLengthEncoded = other.isRunLengthEncoded(); + capabilities.hasInvertedIndexes = other.hasBitmapIndexes(); + capabilities.hasSpatialIndexes = other.hasSpatialIndexes(); + capabilities.hasMultipleValues = other.hasMultipleValues(); + capabilities.dictionaryValuesSorted = other.areDictionaryValuesSorted(); + capabilities.dictionaryValuesUnique = other.areDictionaryValuesUnique(); + capabilities.filterable = other.isFilterable(); + } return capabilities; } + /** + * Used at indexing time to finalize all {@link Capable#UNKNOWN} values to + * {@link Capable#FALSE}, in order to present a snapshot of the state of the this column + */ + @Nullable + public static ColumnCapabilitiesImpl snapshot(@Nullable final ColumnCapabilities capabilities) + { + return snapshot(capabilities, false); + } + + /** + * Used at indexing time to finalize all {@link Capable#UNKNOWN} values to + * {@link Capable#FALSE} or {@link Capable#TRUE}, in order to present a snapshot of the state of the this column + */ + @Nullable + public static ColumnCapabilitiesImpl snapshot(@Nullable final ColumnCapabilities capabilities, boolean unknownIsTrue) + { + if (capabilities == null) { + return null; + } + ColumnCapabilitiesImpl copy = copyOf(capabilities); + copy.hasMultipleValues = copy.hasMultipleValues.coerceUnknownToBoolean(unknownIsTrue); + copy.dictionaryValuesSorted = copy.dictionaryValuesSorted.coerceUnknownToBoolean(unknownIsTrue); + copy.dictionaryValuesUnique = copy.dictionaryValuesUnique.coerceUnknownToBoolean(unknownIsTrue); + return copy; + } + + + /** + * Create a no frills, simple column with {@link ValueType} set and everything else false + */ + public static ColumnCapabilitiesImpl createSimpleNumericColumnCapabilities(ValueType valueType) + { + return new ColumnCapabilitiesImpl().setType(valueType) + .setHasMultipleValues(false) + .setHasBitmapIndexes(false) + .setDictionaryEncoded(false) + .setDictionaryValuesSorted(false) + .setDictionaryValuesUnique(false) + .setHasSpatialIndexes(false); + } + @Nullable private ValueType type = null; @@ -47,7 +97,7 @@ public static ColumnCapabilitiesImpl copyOf(final ColumnCapabilities other) private boolean runLengthEncoded = false; private boolean hasInvertedIndexes = false; private boolean hasSpatialIndexes = false; - private boolean hasMultipleValues = false; + private Capable hasMultipleValues = Capable.UNKNOWN; // These capabilities are computed at query time and not persisted in the segment files. @JsonIgnore @@ -56,8 +106,6 @@ public static ColumnCapabilitiesImpl copyOf(final ColumnCapabilities other) private Capable dictionaryValuesUnique = Capable.UNKNOWN; @JsonIgnore private boolean filterable; - @JsonIgnore - private boolean complete = false; @Override @JsonProperty @@ -144,14 +192,14 @@ public ColumnCapabilitiesImpl setHasSpatialIndexes(boolean hasSpatialIndexes) @Override @JsonProperty("hasMultipleValues") - public boolean hasMultipleValues() + public Capable hasMultipleValues() { return hasMultipleValues; } public ColumnCapabilitiesImpl setHasMultipleValues(boolean hasMultipleValues) { - this.hasMultipleValues = hasMultipleValues; + this.hasMultipleValues = Capable.of(hasMultipleValues); return this; } @@ -171,22 +219,10 @@ public ColumnCapabilitiesImpl setFilterable(boolean filterable) return this; } - @Override - public boolean isComplete() - { - return complete; - } - - public ColumnCapabilitiesImpl setIsComplete(boolean complete) - { - this.complete = complete; - return this; - } - - public void merge(ColumnCapabilities other) + public ColumnCapabilities merge(@Nullable ColumnCapabilities other) { if (other == null) { - return; + return this; } if (type == null) { @@ -201,10 +237,11 @@ public void merge(ColumnCapabilities other) this.runLengthEncoded |= other.isRunLengthEncoded(); this.hasInvertedIndexes |= other.hasBitmapIndexes(); this.hasSpatialIndexes |= other.hasSpatialIndexes(); - this.hasMultipleValues |= other.hasMultipleValues(); - this.complete &= other.isComplete(); // these should always be the same? this.filterable &= other.isFilterable(); + this.hasMultipleValues = this.hasMultipleValues.or(other.hasMultipleValues()); this.dictionaryValuesSorted = this.dictionaryValuesSorted.and(other.areDictionaryValuesSorted()); this.dictionaryValuesUnique = this.dictionaryValuesUnique.and(other.areDictionaryValuesUnique()); + + return this; } } diff --git a/processing/src/main/java/org/apache/druid/segment/filter/ExpressionFilter.java b/processing/src/main/java/org/apache/druid/segment/filter/ExpressionFilter.java index ef7613c6137c..0baa59490712 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/ExpressionFilter.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/ExpressionFilter.java @@ -115,7 +115,7 @@ public boolean supportsBitmapIndex(final BitmapIndexSelector selector) // multiple values. The lack of multiple values is important because expression filters treat multi-value // arrays as nulls, which doesn't permit index based filtering. final String column = Iterables.getOnlyElement(requiredBindings.get()); - return selector.getBitmapIndex(column) != null && !selector.hasMultipleValues(column); + return selector.getBitmapIndex(column) != null && !selector.hasMultipleValues(column).isMaybeTrue(); } else { // Multi-column expression. return false; diff --git a/processing/src/main/java/org/apache/druid/segment/filter/Filters.java b/processing/src/main/java/org/apache/druid/segment/filter/Filters.java index b6a717bff1b1..990127f99f04 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/Filters.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/Filters.java @@ -414,7 +414,7 @@ static boolean supportsSelectivityEstimation( if (filter.supportsBitmapIndex(indexSelector)) { final ColumnHolder columnHolder = columnSelector.getColumnHolder(dimension); if (columnHolder != null) { - return !columnHolder.getCapabilities().hasMultipleValues(); + return !columnHolder.getCapabilities().hasMultipleValues().isMaybeTrue(); } } return false; diff --git a/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndex.java b/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndex.java index c169d15f2230..649aea9b6997 100644 --- a/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndex.java +++ b/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndex.java @@ -27,6 +27,7 @@ import com.google.common.collect.ImmutableMap; import com.google.common.collect.Iterators; import com.google.common.collect.Maps; +import com.google.common.collect.Sets; import com.google.common.primitives.Ints; import com.google.common.primitives.Longs; import com.google.errorprone.annotations.concurrent.GuardedBy; @@ -89,6 +90,7 @@ import java.util.List; import java.util.Map; import java.util.Objects; +import java.util.Set; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentLinkedDeque; import java.util.concurrent.ConcurrentMap; @@ -327,9 +329,10 @@ protected IncrementalIndex( } //__time capabilities - ColumnCapabilitiesImpl timeCapabilities = new ColumnCapabilitiesImpl().setIsComplete(true); - timeCapabilities.setType(ValueType.LONG); - columnCapabilities.put(ColumnHolder.TIME_COLUMN_NAME, timeCapabilities); + columnCapabilities.put( + ColumnHolder.TIME_COLUMN_NAME, + ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.LONG) + ); // This should really be more generic List spatialDimensions = dimensionsSpec.getSpatialDimensions(); @@ -640,12 +643,15 @@ IncrementalIndexRowResult toIncrementalIndexRow(InputRow row) } final List rowDimensions = row.getDimensions(); - Object[] dims; List overflow = null; long dimsKeySize = 0; List parseExceptionMessages = new ArrayList<>(); synchronized (dimensionDescs) { + // all known dimensions are assumed missing until we encounter in the rowDimensions + Set absentDimensions = Sets.newHashSet(dimensionDescs.keySet()); + + // first, process dimension values present in the row dims = new Object[dimensionDescs.size()]; for (String dimension : rowDimensions) { if (Strings.isNullOrEmpty(dimension)) { @@ -656,18 +662,13 @@ IncrementalIndexRowResult toIncrementalIndexRow(InputRow row) DimensionDesc desc = dimensionDescs.get(dimension); if (desc != null) { capabilities = desc.getCapabilities(); + absentDimensions.remove(dimension); } else { wasNewDim = true; capabilities = columnCapabilities.get(dimension); if (capabilities == null) { - capabilities = new ColumnCapabilitiesImpl(); // For schemaless type discovery, assume everything is a String for now, can change later. - capabilities.setType(ValueType.STRING); - capabilities.setDictionaryEncoded(true); - capabilities.setHasBitmapIndexes(true); - capabilities.setDictionaryValuesSorted(false); - capabilities.setDictionaryValuesUnique(true); - capabilities.setIsComplete(true); + capabilities = makeCapabilitiesFromValueType(ValueType.STRING); columnCapabilities.put(dimension, capabilities); } DimensionHandler handler = DimensionHandlerUtils.getHandlerFromCapabilities(dimension, capabilities, null); @@ -677,23 +678,24 @@ IncrementalIndexRowResult toIncrementalIndexRow(InputRow row) DimensionIndexer indexer = desc.getIndexer(); Object dimsKey = null; try { - dimsKey = indexer.processRowValsToUnsortedEncodedKeyComponent( - row.getRaw(dimension), - true - ); + dimsKey = indexer.processRowValsToUnsortedEncodedKeyComponent(row.getRaw(dimension), true); } catch (ParseException pe) { parseExceptionMessages.add(pe.getMessage()); } dimsKeySize += indexer.estimateEncodedKeyComponentSize(dimsKey); // Set column capabilities as data is coming in - if (!capabilities.hasMultipleValues() && + if (!capabilities.hasMultipleValues().isTrue() && dimsKey != null && handler.getLengthOfEncodedKeyComponent(dimsKey) > 1) { capabilities.setHasMultipleValues(true); } if (wasNewDim) { + // unless this is the first row we are processing, all newly discovered columns will be sparse + if (maxIngestedEventTime != null) { + indexer.setSparseIndexed(); + } if (overflow == null) { overflow = new ArrayList<>(); } @@ -713,6 +715,11 @@ IncrementalIndexRowResult toIncrementalIndexRow(InputRow row) dims[desc.getIndex()] = dimsKey; } } + + // process any dimensions with missing values in the row + for (String missing : absentDimensions) { + dimensionDescs.get(missing).getIndexer().setSparseIndexed(); + } } if (overflow != null) { @@ -923,16 +930,16 @@ public List getDimensionOrder() private ColumnCapabilitiesImpl makeCapabilitiesFromValueType(ValueType type) { - ColumnCapabilitiesImpl capabilities = new ColumnCapabilitiesImpl(); - capabilities.setDictionaryEncoded(type == ValueType.STRING); - capabilities.setHasBitmapIndexes(type == ValueType.STRING); if (type == ValueType.STRING) { - capabilities.setDictionaryValuesUnique(true); - capabilities.setDictionaryValuesSorted(false); + // we start out as not having multiple values, but this might change as we encounter them + return new ColumnCapabilitiesImpl().setType(type) + .setHasBitmapIndexes(true) + .setDictionaryEncoded(true) + .setDictionaryValuesUnique(true) + .setDictionaryValuesSorted(false); + } else { + return ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(type); } - capabilities.setType(type); - capabilities.setIsComplete(true); - return capabilities; } /** @@ -988,6 +995,7 @@ public StorageAdapter toStorageAdapter() return new IncrementalIndexStorageAdapter(this); } + @Nullable public ColumnCapabilities getCapabilities(String column) { return columnCapabilities.get(column); @@ -1124,18 +1132,18 @@ public MetricDesc(int index, AggregatorFactory factory) this.name = factory.getName(); String typeInfo = factory.getTypeName(); - this.capabilities = new ColumnCapabilitiesImpl().setIsComplete(true); if ("float".equalsIgnoreCase(typeInfo)) { - capabilities.setType(ValueType.FLOAT); + capabilities = ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.FLOAT); this.type = typeInfo; } else if ("long".equalsIgnoreCase(typeInfo)) { - capabilities.setType(ValueType.LONG); + capabilities = ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.LONG); this.type = typeInfo; } else if ("double".equalsIgnoreCase(typeInfo)) { - capabilities.setType(ValueType.DOUBLE); + capabilities = ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.DOUBLE); this.type = typeInfo; } else { - capabilities.setType(ValueType.COMPLEX); + // in an ideal world complex type reports its actual column capabilities... + capabilities = ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.COMPLEX); this.type = ComplexMetrics.getSerdeForType(typeInfo).getTypeName(); } } diff --git a/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexStorageAdapter.java b/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexStorageAdapter.java index cc792a5314c6..8e8520d458b7 100644 --- a/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexStorageAdapter.java +++ b/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexStorageAdapter.java @@ -39,7 +39,6 @@ import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ColumnCapabilitiesImpl; import org.apache.druid.segment.column.ColumnHolder; -import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.data.Indexed; import org.apache.druid.segment.data.ListIndexed; import org.apache.druid.segment.filter.BooleanValueMatcher; @@ -150,16 +149,23 @@ public ColumnCapabilities getColumnCapabilities(String column) // at index-persisting time to determine if we need a multi-value column or not. However, that means we // need to tweak the capabilities here in the StorageAdapter (a query-time construct), so at query time // they appear multi-valued. + // + // Note that this could be improved if we snapshot the capabilities at cursor creation time and feed those through + // to the StringDimensionIndexer so the selector built on top of it can produce values from the snapshot state of + // multi-valuedness at cursor creation time, instead of the latest state, and getSnapshotColumnCapabilities could + // be removed. + return ColumnCapabilitiesImpl.snapshot(index.getCapabilities(column), true); + } - final ColumnCapabilities capabilitiesFromIndex = index.getCapabilities(column); - final IncrementalIndex.DimensionDesc dimensionDesc = index.getDimension(column); - if (dimensionDesc != null && dimensionDesc.getCapabilities().getType() == ValueType.STRING) { - final ColumnCapabilitiesImpl retVal = ColumnCapabilitiesImpl.copyOf(capabilitiesFromIndex); - retVal.setHasMultipleValues(true); - return retVal; - } else { - return capabilitiesFromIndex; - } + /** + * Sad workaround for {@link org.apache.druid.query.metadata.SegmentAnalyzer} to deal with the fact that the + * response from {@link #getColumnCapabilities} is not accurate for string columns, in that it reports all string + * string columns as having multiple values. This method returns the actual capabilities of the underlying + * {@link IncrementalIndex}at the time this method is called. + */ + public ColumnCapabilities getSnapshotColumnCapabilities(String column) + { + return ColumnCapabilitiesImpl.snapshot(index.getCapabilities(column)); } @Override diff --git a/processing/src/main/java/org/apache/druid/segment/join/table/IndexedTableColumnSelectorFactory.java b/processing/src/main/java/org/apache/druid/segment/join/table/IndexedTableColumnSelectorFactory.java index 7e6466d78288..00cb51ffffff 100644 --- a/processing/src/main/java/org/apache/druid/segment/join/table/IndexedTableColumnSelectorFactory.java +++ b/processing/src/main/java/org/apache/druid/segment/join/table/IndexedTableColumnSelectorFactory.java @@ -58,8 +58,9 @@ static ColumnCapabilities columnCapabilities(final IndexedTable table, final Str capabilities.setDictionaryValuesSorted(false); capabilities.setDictionaryValuesUnique(false); + capabilities.setHasMultipleValues(false); - return capabilities.setIsComplete(true); + return capabilities; } else { return null; } diff --git a/processing/src/main/java/org/apache/druid/segment/vector/QueryableIndexVectorColumnSelectorFactory.java b/processing/src/main/java/org/apache/druid/segment/vector/QueryableIndexVectorColumnSelectorFactory.java index b9cfe4f03d69..269ac38429bb 100644 --- a/processing/src/main/java/org/apache/druid/segment/vector/QueryableIndexVectorColumnSelectorFactory.java +++ b/processing/src/main/java/org/apache/druid/segment/vector/QueryableIndexVectorColumnSelectorFactory.java @@ -85,7 +85,7 @@ public MultiValueDimensionVectorSelector makeMultiValueDimensionSelector(final D if (holder == null || !holder.getCapabilities().isDictionaryEncoded() || holder.getCapabilities().getType() != ValueType.STRING - || !holder.getCapabilities().hasMultipleValues()) { + || !holder.getCapabilities().hasMultipleValues().isMaybeTrue()) { throw new ISE( "Column[%s] is not a multi-value string column, do not ask for a multi-value selector", spec.getDimension() @@ -125,7 +125,7 @@ public SingleValueDimensionVectorSelector makeSingleValueDimensionSelector(final return NilVectorSelector.create(offset); } - if (holder.getCapabilities().hasMultipleValues()) { + if (holder.getCapabilities().hasMultipleValues().isMaybeTrue()) { // Asking for a single-value dimension selector on a multi-value column gets you an error. throw new ISE("Column[%s] is multi-value, do not ask for a single-value selector", spec.getDimension()); } diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java index 5ae6987df88a..5ab4e4694a2b 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java @@ -154,8 +154,7 @@ public static ColumnValueSelector makeExprEvalSelector( } else if (capabilities != null && capabilities.getType() == ValueType.STRING && capabilities.isDictionaryEncoded() - && capabilities.isComplete() - && !capabilities.hasMultipleValues() + && !capabilities.hasMultipleValues().isMaybeTrue() && exprDetails.getArrayBindings().isEmpty()) { // Optimization for expressions that hit one scalar string column and nothing else. return new SingleStringInputCachingExpressionColumnValueSelector( @@ -227,7 +226,7 @@ public static DimensionSelector makeDimensionSelector( if (capabilities != null && capabilities.getType() == ValueType.STRING && capabilities.isDictionaryEncoded() - && capabilities.isComplete() + && !capabilities.hasMultipleValues().isUnknown() && !exprDetails.hasInputArrays() && !exprDetails.isOutputArray() ) { @@ -356,7 +355,7 @@ private static Expr.ObjectBinding createBindings( final ColumnCapabilities columnCapabilities = columnSelectorFactory .getColumnCapabilities(columnName); final ValueType nativeType = columnCapabilities != null ? columnCapabilities.getType() : null; - final boolean multiVal = columnCapabilities != null && columnCapabilities.hasMultipleValues(); + final boolean multiVal = columnCapabilities != null && columnCapabilities.hasMultipleValues().isTrue(); final Supplier supplier; if (nativeType == ValueType.FLOAT) { @@ -597,11 +596,11 @@ private static Pair, Set> examineColumnSelectorFactoryArrays for (String column : columns) { final ColumnCapabilities capabilities = columnSelectorFactory.getColumnCapabilities(column); if (capabilities != null) { - if (capabilities.hasMultipleValues()) { + if (capabilities.hasMultipleValues().isTrue()) { actualArrays.add(column); } else if ( - !capabilities.isComplete() && capabilities.getType().equals(ValueType.STRING) && + capabilities.hasMultipleValues().isMaybeTrue() && !exprDetails.getArrayBindings().contains(column) ) { unknownIfArrays.add(column); diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVirtualColumn.java b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVirtualColumn.java index 482365152931..59b846fa9a99 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVirtualColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVirtualColumn.java @@ -131,10 +131,10 @@ public ColumnValueSelector makeColumnValueSelector(String columnName, ColumnS @Override public ColumnCapabilities capabilities(String columnName) { - // Note: Ideally we would only "setHasMultipleValues(true)" if the expression in question could potentially return - // multiple values. However, we don't currently have a good way of determining this, so to be safe we always - // set the flag. - return new ColumnCapabilitiesImpl().setType(outputType).setHasMultipleValues(true); + // Note: Ideally we would fill out additional information instead of leaving capabilities as 'unknown', e.g. examine + // if the expression in question could potentially return multiple values and anything else. However, we don't + // currently have a good way of determining this, so fill this out more once we do + return new ColumnCapabilitiesImpl().setType(outputType); } @Override diff --git a/processing/src/test/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngineV2Test.java b/processing/src/test/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngineV2Test.java index d71d70a1aac9..75a12a90cb6c 100644 --- a/processing/src/test/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngineV2Test.java +++ b/processing/src/test/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngineV2Test.java @@ -47,8 +47,7 @@ public void testCanPushDownLimitForSegmentStringSelector() .setHasMultipleValues(false) .setDictionaryEncoded(true) .setDictionaryValuesSorted(true) - .setDictionaryValuesUnique(true) - .setIsComplete(true); + .setDictionaryValuesUnique(true); EasyMock.expect(factory.getColumnCapabilities(DIM)).andReturn(capabilities).once(); EasyMock.replay(factory); Assert.assertTrue(GroupByQueryEngineV2.canPushDownLimit(factory, DIM)); @@ -63,8 +62,7 @@ public void testCanPushDownLimitForIncrementalStringSelector() .setHasMultipleValues(false) .setDictionaryEncoded(false) .setDictionaryValuesSorted(false) - .setDictionaryValuesUnique(true) - .setIsComplete(true); + .setDictionaryValuesUnique(true); EasyMock.expect(factory.getColumnCapabilities(DIM)).andReturn(capabilities).once(); EasyMock.replay(factory); Assert.assertFalse(GroupByQueryEngineV2.canPushDownLimit(factory, DIM)); @@ -79,8 +77,7 @@ public void testCanPushDownLimitForExpressionStringSelector() .setHasMultipleValues(false) .setDictionaryEncoded(false) .setDictionaryValuesSorted(false) - .setDictionaryValuesUnique(false) - .setIsComplete(true); + .setDictionaryValuesUnique(false); EasyMock.expect(factory.getColumnCapabilities(DIM)).andReturn(capabilities).once(); EasyMock.replay(factory); Assert.assertFalse(GroupByQueryEngineV2.canPushDownLimit(factory, DIM)); @@ -95,8 +92,7 @@ public void testCanPushDownLimitForJoinStringSelector() .setHasMultipleValues(false) .setDictionaryEncoded(true) .setDictionaryValuesSorted(false) - .setDictionaryValuesUnique(false) - .setIsComplete(true); + .setDictionaryValuesUnique(false); EasyMock.expect(factory.getColumnCapabilities(DIM)).andReturn(capabilities).once(); EasyMock.replay(factory); Assert.assertFalse(GroupByQueryEngineV2.canPushDownLimit(factory, DIM)); @@ -111,8 +107,7 @@ public void testCanPushDownLimitForNumericSelector() .setHasMultipleValues(false) .setDictionaryEncoded(false) .setDictionaryValuesSorted(false) - .setDictionaryValuesUnique(false) - .setIsComplete(true); + .setDictionaryValuesUnique(false); EasyMock.expect(factory.getColumnCapabilities(DIM)).andReturn(capabilities).anyTimes(); EasyMock.replay(factory); Assert.assertTrue(GroupByQueryEngineV2.canPushDownLimit(factory, DIM)); @@ -131,8 +126,7 @@ public void testCanPushDownLimitForComplexSelector() .setHasMultipleValues(false) .setDictionaryEncoded(false) .setDictionaryValuesSorted(false) - .setDictionaryValuesUnique(false) - .setIsComplete(true); + .setDictionaryValuesUnique(false); EasyMock.expect(factory.getColumnCapabilities(DIM)).andReturn(capabilities).once(); EasyMock.replay(factory); Assert.assertTrue(GroupByQueryEngineV2.canPushDownLimit(factory, DIM)); diff --git a/processing/src/test/java/org/apache/druid/query/lookup/LookupSegmentTest.java b/processing/src/test/java/org/apache/druid/query/lookup/LookupSegmentTest.java index ce6035142374..3ca72aa5a698 100644 --- a/processing/src/test/java/org/apache/druid/query/lookup/LookupSegmentTest.java +++ b/processing/src/test/java/org/apache/druid/query/lookup/LookupSegmentTest.java @@ -137,9 +137,8 @@ public void test_asStorageAdapter_getColumnCapabilitiesK() // Note: the "k" column does not actually have multiple values, but the RowBasedStorageAdapter doesn't allow // reporting complete single-valued capabilities. It would be good to change this in the future, so query engines // running on top of lookups can take advantage of singly-valued optimizations. - Assert.assertFalse(capabilities.hasMultipleValues()); + Assert.assertTrue(capabilities.hasMultipleValues().isUnknown()); Assert.assertFalse(capabilities.isDictionaryEncoded()); - Assert.assertFalse(capabilities.isComplete()); } @Test @@ -151,9 +150,8 @@ public void test_asStorageAdapter_getColumnCapabilitiesV() // reporting complete single-valued capabilities. It would be good to change this in the future, so query engines // running on top of lookups can take advantage of singly-valued optimizations. Assert.assertEquals(ValueType.STRING, capabilities.getType()); - Assert.assertFalse(capabilities.hasMultipleValues()); + Assert.assertTrue(capabilities.hasMultipleValues().isUnknown()); Assert.assertFalse(capabilities.isDictionaryEncoded()); - Assert.assertFalse(capabilities.isComplete()); } @Test diff --git a/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataQueryTest.java b/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataQueryTest.java index 5f044ec339f6..9da06719d9c2 100644 --- a/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataQueryTest.java +++ b/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataQueryTest.java @@ -227,7 +227,7 @@ public SegmentMetadataQueryTest( "placement", new ColumnAnalysis( ValueType.STRING.toString(), - !mmap1, + false, preferedSize1, 1, "preferred", @@ -268,7 +268,7 @@ public SegmentMetadataQueryTest( "placement", new ColumnAnalysis( ValueType.STRING.toString(), - !mmap2, + false, placementSize2, 1, null, @@ -304,7 +304,7 @@ public void testSegmentMetadataQueryWithRollupMerge() "placement", new ColumnAnalysis( ValueType.STRING.toString(), - !mmap1 || !mmap2, + false, 0, 0, null, @@ -372,7 +372,7 @@ public void testSegmentMetadataQueryWithHasMultipleValuesMerge() "placement", new ColumnAnalysis( ValueType.STRING.toString(), - !mmap1 || !mmap2, + false, 0, 1, null, @@ -440,7 +440,7 @@ public void testSegmentMetadataQueryWithComplexColumnMerge() "placement", new ColumnAnalysis( ValueType.STRING.toString(), - !mmap1 || !mmap2, + false, 0, 1, null, @@ -509,7 +509,7 @@ public void testSegmentMetadataQueryWithDefaultAnalysisMerge() } ColumnAnalysis analysis = new ColumnAnalysis( ValueType.STRING.toString(), - !mmap1 || !mmap2, + false, size1 + size2, 1, "preferred", @@ -530,7 +530,7 @@ public void testSegmentMetadataQueryWithDefaultAnalysisMerge2() } ColumnAnalysis analysis = new ColumnAnalysis( ValueType.STRING.toString(), - !mmap1 || !mmap2, + false, size1 + size2, 3, "spot", @@ -551,7 +551,7 @@ public void testSegmentMetadataQueryWithDefaultAnalysisMerge3() } ColumnAnalysis analysis = new ColumnAnalysis( ValueType.STRING.toString(), - !mmap1 || !mmap2, + false, size1 + size2, 9, "automotive", @@ -637,7 +637,7 @@ public void testSegmentMetadataQueryWithNoAnalysisTypesMerge() "placement", new ColumnAnalysis( ValueType.STRING.toString(), - !mmap1 || !mmap2, + false, 0, 0, null, @@ -699,7 +699,7 @@ public void testSegmentMetadataQueryWithAggregatorsMerge() "placement", new ColumnAnalysis( ValueType.STRING.toString(), - !mmap1 || !mmap2, + false, 0, 0, null, @@ -757,7 +757,7 @@ public void testSegmentMetadataQueryWithTimestampSpecMerge() "placement", new ColumnAnalysis( ValueType.STRING.toString(), - !mmap1 || !mmap2, + false, 0, 0, null, @@ -815,7 +815,7 @@ public void testSegmentMetadataQueryWithQueryGranularityMerge() "placement", new ColumnAnalysis( ValueType.STRING.toString(), - !mmap1 || !mmap2, + false, 0, 0, null, diff --git a/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataUnionQueryTest.java b/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataUnionQueryTest.java index ccfcce7e563f..55d8b18babd3 100644 --- a/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataUnionQueryTest.java +++ b/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataUnionQueryTest.java @@ -102,7 +102,7 @@ public void testSegmentMetadataUnionQuery() "placement", new ColumnAnalysis( ValueType.STRING.toString(), - !mmap, + false, mmap ? 43524 : 43056, 1, "preferred", diff --git a/processing/src/test/java/org/apache/druid/segment/IndexMergerV9WithSpatialIndexTest.java b/processing/src/test/java/org/apache/druid/segment/IndexMergerV9WithSpatialIndexTest.java index 1c68a21568d3..b4525c018eee 100644 --- a/processing/src/test/java/org/apache/druid/segment/IndexMergerV9WithSpatialIndexTest.java +++ b/processing/src/test/java/org/apache/druid/segment/IndexMergerV9WithSpatialIndexTest.java @@ -48,6 +48,7 @@ import org.apache.druid.segment.incremental.IncrementalIndex; import org.apache.druid.segment.incremental.IncrementalIndexSchema; import org.apache.druid.segment.writeout.SegmentWriteOutMediumFactory; +import org.apache.druid.testing.InitializedNullHandlingTest; import org.joda.time.Interval; import org.junit.Test; import org.junit.runner.RunWith; @@ -66,7 +67,7 @@ /** */ @RunWith(Parameterized.class) -public class IndexMergerV9WithSpatialIndexTest +public class IndexMergerV9WithSpatialIndexTest extends InitializedNullHandlingTest { public static final int NUM_POINTS = 5000; diff --git a/processing/src/test/java/org/apache/druid/segment/QueryableIndexColumnCapabilitiesTest.java b/processing/src/test/java/org/apache/druid/segment/QueryableIndexColumnCapabilitiesTest.java index dc102e688164..c7783e992197 100644 --- a/processing/src/test/java/org/apache/druid/segment/QueryableIndexColumnCapabilitiesTest.java +++ b/processing/src/test/java/org/apache/druid/segment/QueryableIndexColumnCapabilitiesTest.java @@ -38,6 +38,7 @@ import org.apache.druid.query.aggregation.LongSumAggregatorFactory; import org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory; import org.apache.druid.segment.column.ColumnCapabilities; +import org.apache.druid.segment.column.ColumnCapabilitiesImpl; import org.apache.druid.segment.column.ColumnHolder; import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.incremental.IncrementalIndex; @@ -152,9 +153,12 @@ public void testStringColumn() Assert.assertTrue(caps.isDictionaryEncoded()); Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue()); Assert.assertTrue(caps.areDictionaryValuesUnique().isTrue()); - Assert.assertFalse(caps.hasMultipleValues()); + // multi-value is unknown unless explicitly set to 'true' + Assert.assertTrue(caps.hasMultipleValues().isUnknown()); + // at index merge or query time we 'complete' the capabilities to take a snapshot of the current state, + // coercing any 'UNKNOWN' values to false + Assert.assertFalse(ColumnCapabilitiesImpl.snapshot(caps).hasMultipleValues().isMaybeTrue()); Assert.assertFalse(caps.hasSpatialIndexes()); - Assert.assertTrue(caps.isComplete()); caps = MMAP_INDEX.getColumnHolder("d1").getCapabilities(); Assert.assertEquals(ValueType.STRING, caps.getType()); @@ -162,9 +166,8 @@ public void testStringColumn() Assert.assertTrue(caps.isDictionaryEncoded()); Assert.assertTrue(caps.areDictionaryValuesSorted().isTrue()); Assert.assertTrue(caps.areDictionaryValuesUnique().isTrue()); - Assert.assertFalse(caps.hasMultipleValues()); + Assert.assertFalse(caps.hasMultipleValues().isMaybeTrue()); Assert.assertFalse(caps.hasSpatialIndexes()); - Assert.assertTrue(caps.isComplete()); } @Test @@ -176,9 +179,8 @@ public void testMultiStringColumn() Assert.assertTrue(caps.isDictionaryEncoded()); Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue()); Assert.assertTrue(caps.areDictionaryValuesUnique().isTrue()); - Assert.assertTrue(caps.hasMultipleValues()); + Assert.assertTrue(caps.hasMultipleValues().isTrue()); Assert.assertFalse(caps.hasSpatialIndexes()); - Assert.assertTrue(caps.isComplete()); caps = MMAP_INDEX.getColumnHolder("d2").getCapabilities(); Assert.assertEquals(ValueType.STRING, caps.getType()); @@ -186,9 +188,8 @@ public void testMultiStringColumn() Assert.assertTrue(caps.isDictionaryEncoded()); Assert.assertTrue(caps.areDictionaryValuesSorted().isTrue()); Assert.assertTrue(caps.areDictionaryValuesUnique().isTrue()); - Assert.assertTrue(caps.hasMultipleValues()); + Assert.assertTrue(caps.hasMultipleValues().isTrue()); Assert.assertFalse(caps.hasSpatialIndexes()); - Assert.assertTrue(caps.isComplete()); } @Test @@ -206,8 +207,7 @@ private void assertNonStringColumnCapabilities(ColumnCapabilities caps, ValueTyp Assert.assertFalse(caps.isDictionaryEncoded()); Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue()); Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue()); - Assert.assertFalse(caps.hasMultipleValues()); + Assert.assertFalse(caps.hasMultipleValues().isMaybeTrue()); Assert.assertFalse(caps.hasSpatialIndexes()); - Assert.assertTrue(caps.isComplete()); } } diff --git a/processing/src/test/java/org/apache/druid/segment/RowBasedColumnSelectorFactoryTest.java b/processing/src/test/java/org/apache/druid/segment/RowBasedColumnSelectorFactoryTest.java index 8886c7d2d8f1..e12dac4743cd 100644 --- a/processing/src/test/java/org/apache/druid/segment/RowBasedColumnSelectorFactoryTest.java +++ b/processing/src/test/java/org/apache/druid/segment/RowBasedColumnSelectorFactoryTest.java @@ -54,9 +54,8 @@ public void testCapabilitiesTime() Assert.assertFalse(caps.isDictionaryEncoded()); Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue()); Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue()); - Assert.assertFalse(caps.hasMultipleValues()); + Assert.assertFalse(caps.hasMultipleValues().isMaybeTrue()); Assert.assertFalse(caps.hasSpatialIndexes()); - Assert.assertTrue(caps.isComplete()); } @Test @@ -69,9 +68,8 @@ public void testCapabilitiesString() Assert.assertFalse(caps.isDictionaryEncoded()); Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue()); Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue()); - Assert.assertFalse(caps.hasMultipleValues()); + Assert.assertTrue(caps.hasMultipleValues().isUnknown()); Assert.assertFalse(caps.hasSpatialIndexes()); - Assert.assertFalse(caps.isComplete()); } @Test @@ -84,9 +82,8 @@ public void testCapabilitiesLong() Assert.assertFalse(caps.isDictionaryEncoded()); Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue()); Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue()); - Assert.assertFalse(caps.hasMultipleValues()); + Assert.assertFalse(caps.hasMultipleValues().isMaybeTrue()); Assert.assertFalse(caps.hasSpatialIndexes()); - Assert.assertTrue(caps.isComplete()); } @Test @@ -99,9 +96,8 @@ public void testCapabilitiesFloat() Assert.assertFalse(caps.isDictionaryEncoded()); Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue()); Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue()); - Assert.assertFalse(caps.hasMultipleValues()); + Assert.assertFalse(caps.hasMultipleValues().isMaybeTrue()); Assert.assertFalse(caps.hasSpatialIndexes()); - Assert.assertTrue(caps.isComplete()); } @Test @@ -114,9 +110,8 @@ public void testCapabilitiesDouble() Assert.assertFalse(caps.isDictionaryEncoded()); Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue()); Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue()); - Assert.assertFalse(caps.hasMultipleValues()); + Assert.assertFalse(caps.hasMultipleValues().isMaybeTrue()); Assert.assertFalse(caps.hasSpatialIndexes()); - Assert.assertTrue(caps.isComplete()); } @Test @@ -129,9 +124,8 @@ public void testCapabilitiesComplex() Assert.assertFalse(caps.isDictionaryEncoded()); Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue()); Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue()); - Assert.assertFalse(caps.hasMultipleValues()); + Assert.assertTrue(caps.hasMultipleValues().isUnknown()); Assert.assertFalse(caps.hasSpatialIndexes()); - Assert.assertFalse(caps.isComplete()); } @Test diff --git a/processing/src/test/java/org/apache/druid/segment/RowBasedStorageAdapterTest.java b/processing/src/test/java/org/apache/druid/segment/RowBasedStorageAdapterTest.java index a4fc2f254b78..652a59033fab 100644 --- a/processing/src/test/java/org/apache/druid/segment/RowBasedStorageAdapterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/RowBasedStorageAdapterTest.java @@ -332,8 +332,7 @@ public void test_getColumnCapabilities_float() final ColumnCapabilities capabilities = adapter.getColumnCapabilities(ValueType.FLOAT.name()); Assert.assertEquals(ValueType.FLOAT, capabilities.getType()); - Assert.assertFalse(capabilities.hasMultipleValues()); - Assert.assertTrue(capabilities.isComplete()); + Assert.assertFalse(capabilities.hasMultipleValues().isMaybeTrue()); } @Test @@ -343,8 +342,7 @@ public void test_getColumnCapabilities_double() final ColumnCapabilities capabilities = adapter.getColumnCapabilities(ValueType.DOUBLE.name()); Assert.assertEquals(ValueType.DOUBLE, capabilities.getType()); - Assert.assertFalse(capabilities.hasMultipleValues()); - Assert.assertTrue(capabilities.isComplete()); + Assert.assertFalse(capabilities.hasMultipleValues().isMaybeTrue()); } @Test @@ -354,8 +352,7 @@ public void test_getColumnCapabilities_long() final ColumnCapabilities capabilities = adapter.getColumnCapabilities(ValueType.LONG.name()); Assert.assertEquals(ValueType.LONG, capabilities.getType()); - Assert.assertFalse(capabilities.hasMultipleValues()); - Assert.assertTrue(capabilities.isComplete()); + Assert.assertFalse(capabilities.hasMultipleValues().isMaybeTrue()); } @Test @@ -369,8 +366,7 @@ public void test_getColumnCapabilities_string() // Note: unlike numeric types, STRING-typed columns might have multiple values, so they report as incomplete. It // would be good in the future to support some way of changing this, when it is known ahead of time that // multi-valuedness is definitely happening or is definitely impossible. - Assert.assertFalse(capabilities.hasMultipleValues()); - Assert.assertFalse(capabilities.isComplete()); + Assert.assertTrue(capabilities.hasMultipleValues().isUnknown()); } @Test @@ -382,8 +378,7 @@ public void test_getColumnCapabilities_complex() // Note: unlike numeric types, COMPLEX-typed columns report that they are incomplete. Assert.assertEquals(ValueType.COMPLEX, capabilities.getType()); - Assert.assertFalse(capabilities.hasMultipleValues()); - Assert.assertFalse(capabilities.isComplete()); + Assert.assertTrue(capabilities.hasMultipleValues().isUnknown()); } @Test diff --git a/processing/src/test/java/org/apache/druid/segment/column/ColumnCapabilitiesImplTest.java b/processing/src/test/java/org/apache/druid/segment/column/ColumnCapabilitiesImplTest.java index 53e93c7a3b54..e221edd9c73f 100644 --- a/processing/src/test/java/org/apache/druid/segment/column/ColumnCapabilitiesImplTest.java +++ b/processing/src/test/java/org/apache/druid/segment/column/ColumnCapabilitiesImplTest.java @@ -47,7 +47,7 @@ public void testSerde() throws Exception Assert.assertTrue(cc.isDictionaryEncoded()); Assert.assertFalse(cc.isRunLengthEncoded()); Assert.assertTrue(cc.hasSpatialIndexes()); - Assert.assertTrue(cc.hasMultipleValues()); + Assert.assertTrue(cc.hasMultipleValues().isTrue()); Assert.assertTrue(cc.hasBitmapIndexes()); Assert.assertFalse(cc.isFilterable()); } @@ -72,7 +72,7 @@ public void testDeserialization() throws Exception Assert.assertTrue(cc.isDictionaryEncoded()); Assert.assertTrue(cc.isRunLengthEncoded()); Assert.assertTrue(cc.hasSpatialIndexes()); - Assert.assertTrue(cc.hasMultipleValues()); + Assert.assertTrue(cc.hasMultipleValues().isTrue()); Assert.assertTrue(cc.hasBitmapIndexes()); Assert.assertFalse(cc.isFilterable()); } diff --git a/processing/src/test/java/org/apache/druid/segment/filter/ExtractionDimFilterTest.java b/processing/src/test/java/org/apache/druid/segment/filter/ExtractionDimFilterTest.java index c7a560c19616..3ed33b8e3fc6 100644 --- a/processing/src/test/java/org/apache/druid/segment/filter/ExtractionDimFilterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/filter/ExtractionDimFilterTest.java @@ -36,6 +36,7 @@ import org.apache.druid.query.filter.SelectorDimFilter; import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; import org.apache.druid.segment.column.BitmapIndex; +import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.data.BitmapSerdeFactory; import org.apache.druid.segment.data.CloseableIndexed; import org.apache.druid.segment.data.ConciseBitmapSerdeFactory; @@ -146,9 +147,9 @@ public Iterator iterator() } @Override - public boolean hasMultipleValues(final String dimension) + public ColumnCapabilities.Capable hasMultipleValues(final String dimension) { - return true; + return ColumnCapabilities.Capable.TRUE; } @Override diff --git a/processing/src/test/java/org/apache/druid/segment/join/table/IndexedTableJoinableTest.java b/processing/src/test/java/org/apache/druid/segment/join/table/IndexedTableJoinableTest.java index 20cfdb786fb0..13cc45fa590d 100644 --- a/processing/src/test/java/org/apache/druid/segment/join/table/IndexedTableJoinableTest.java +++ b/processing/src/test/java/org/apache/druid/segment/join/table/IndexedTableJoinableTest.java @@ -137,9 +137,8 @@ public void getColumnCapabilitiesForStringColumn() Assert.assertEquals(ValueType.STRING, capabilities.getType()); Assert.assertTrue(capabilities.isDictionaryEncoded()); Assert.assertFalse(capabilities.hasBitmapIndexes()); - Assert.assertFalse(capabilities.hasMultipleValues()); + Assert.assertFalse(capabilities.hasMultipleValues().isMaybeTrue()); Assert.assertFalse(capabilities.hasSpatialIndexes()); - Assert.assertTrue(capabilities.isComplete()); } @Test @@ -149,9 +148,8 @@ public void getColumnCapabilitiesForLongColumn() Assert.assertEquals(ValueType.LONG, capabilities.getType()); Assert.assertFalse(capabilities.isDictionaryEncoded()); Assert.assertFalse(capabilities.hasBitmapIndexes()); - Assert.assertFalse(capabilities.hasMultipleValues()); + Assert.assertFalse(capabilities.hasMultipleValues().isMaybeTrue()); Assert.assertFalse(capabilities.hasSpatialIndexes()); - Assert.assertTrue(capabilities.isComplete()); } @Test diff --git a/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVirtualColumnTest.java b/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVirtualColumnTest.java index 526c63c60c17..16e090dc3d53 100644 --- a/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVirtualColumnTest.java +++ b/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVirtualColumnTest.java @@ -357,8 +357,7 @@ public ColumnCapabilities getColumnCapabilities(String column) { return new ColumnCapabilitiesImpl().setType(ValueType.STRING) .setHasMultipleValues(true) - .setDictionaryEncoded(true) - .setIsComplete(true); + .setDictionaryEncoded(true); } }; final BaseObjectColumnValueSelector selectorImplicit = @@ -814,9 +813,9 @@ public void testCapabilities() Assert.assertFalse(caps.isDictionaryEncoded()); Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue()); Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue()); - Assert.assertTrue(caps.hasMultipleValues()); + Assert.assertTrue(caps.hasMultipleValues().isUnknown()); + Assert.assertTrue(caps.hasMultipleValues().isMaybeTrue()); Assert.assertFalse(caps.hasSpatialIndexes()); - Assert.assertFalse(caps.isComplete()); caps = Z_CONCAT_X.capabilities("expr"); Assert.assertEquals(ValueType.STRING, caps.getType()); @@ -824,8 +823,8 @@ public void testCapabilities() Assert.assertFalse(caps.isDictionaryEncoded()); Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue()); Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue()); - Assert.assertTrue(caps.hasMultipleValues()); + Assert.assertTrue(caps.hasMultipleValues().isUnknown()); + Assert.assertTrue(caps.hasMultipleValues().isMaybeTrue()); Assert.assertFalse(caps.hasSpatialIndexes()); - Assert.assertFalse(caps.isComplete()); } } diff --git a/processing/src/test/java/org/apache/druid/segment/virtual/VirtualColumnsTest.java b/processing/src/test/java/org/apache/druid/segment/virtual/VirtualColumnsTest.java index b1a9eeeab14d..597dbb6ee4e4 100644 --- a/processing/src/test/java/org/apache/druid/segment/virtual/VirtualColumnsTest.java +++ b/processing/src/test/java/org/apache/druid/segment/virtual/VirtualColumnsTest.java @@ -417,7 +417,7 @@ public boolean isNull() @Override public ColumnCapabilities capabilities(String columnName) { - return new ColumnCapabilitiesImpl().setType(ValueType.LONG); + return ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.LONG); } @Override