diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/indexing/StringDimensionIndexerBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/indexing/StringDimensionIndexerBenchmark.java index 52fc85194e7c..2e4490bd26c3 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/indexing/StringDimensionIndexerBenchmark.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/indexing/StringDimensionIndexerBenchmark.java @@ -59,7 +59,7 @@ public class StringDimensionIndexerBenchmark @Setup public void setup() { - indexer = new StringDimensionIndexer(DimensionSchema.MultiValueHandling.ofDefault(), true); + indexer = new StringDimensionIndexer(DimensionSchema.MultiValueHandling.ofDefault(), true, false); for (int i = 0; i < cardinality; i++) { indexer.processRowValsToUnsortedEncodedKeyComponent("abcd-" + i, true); diff --git a/indexing-hadoop/src/main/java/org/apache/druid/indexer/IndexGeneratorJob.java b/indexing-hadoop/src/main/java/org/apache/druid/indexer/IndexGeneratorJob.java index 5dfdcac9c521..72bbaee335ad 100644 --- a/indexing-hadoop/src/main/java/org/apache/druid/indexer/IndexGeneratorJob.java +++ b/indexing-hadoop/src/main/java/org/apache/druid/indexer/IndexGeneratorJob.java @@ -47,7 +47,7 @@ import org.apache.druid.segment.BaseProgressIndicator; import org.apache.druid.segment.ProgressIndicator; import org.apache.druid.segment.QueryableIndex; -import org.apache.druid.segment.column.ColumnCapabilitiesImpl; +import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.incremental.IncrementalIndex; import org.apache.druid.segment.incremental.IncrementalIndexSchema; import org.apache.druid.segment.indexing.TuningConfigs; @@ -289,7 +289,7 @@ private static IncrementalIndex makeIncrementalIndex( AggregatorFactory[] aggs, HadoopDruidIndexerConfig config, Iterable oldDimOrder, - Map oldCapabilities + Map oldCapabilities ) { final HadoopTuningConfig tuningConfig = config.getSchema().getTuningConfig(); diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java index d3aaa4fd3ee1..b4cda9b54d93 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java @@ -340,7 +340,7 @@ public static boolean isAllSingleValueDims( // Now check column capabilities. final ColumnCapabilities columnCapabilities = capabilitiesFunction.apply(dimension.getDimension()); - return (columnCapabilities != null && !columnCapabilities.hasMultipleValues().isMaybeTrue()) || + return (columnCapabilities != null && columnCapabilities.hasMultipleValues().isFalse()) || (missingMeansNonExistent && columnCapabilities == null); }); } diff --git a/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java b/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java index 659b55b1680d..15a081d8fcad 100644 --- a/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java +++ b/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java @@ -227,7 +227,7 @@ private ColumnAnalysis analyzeStringColumn( min = NullHandling.nullToEmptyIfNeeded(bitmapIndex.getValue(0)); max = NullHandling.nullToEmptyIfNeeded(bitmapIndex.getValue(cardinality - 1)); } - } else if (capabilities.isDictionaryEncoded()) { + } else if (capabilities.isDictionaryEncoded().isTrue()) { // fallback if no bitmap index DictionaryEncodedColumn theColumn = (DictionaryEncodedColumn) columnHolder.getColumn(); cardinality = theColumn.getCardinality(); diff --git a/processing/src/main/java/org/apache/druid/query/topn/TopNQueryEngine.java b/processing/src/main/java/org/apache/druid/query/topn/TopNQueryEngine.java index eb22dc9b4ae6..daad69fc7a6c 100644 --- a/processing/src/main/java/org/apache/druid/query/topn/TopNQueryEngine.java +++ b/processing/src/main/java/org/apache/druid/query/topn/TopNQueryEngine.java @@ -194,7 +194,7 @@ private static boolean canUsePooledAlgorithm( } if (capabilities != null && capabilities.getType() == ValueType.STRING) { // string columns must use the on heap algorithm unless they have the following capabilites - return capabilities.isDictionaryEncoded() && capabilities.areDictionaryValuesUnique().isTrue(); + return capabilities.isDictionaryEncoded().isTrue() && capabilities.areDictionaryValuesUnique().isTrue(); } else { // non-strings are not eligible to use the pooled algorithm, and should use a heap algorithm return false; diff --git a/processing/src/main/java/org/apache/druid/segment/DimensionHandlerUtils.java b/processing/src/main/java/org/apache/druid/segment/DimensionHandlerUtils.java index 9e5a12921c7c..f5b7e9feb346 100644 --- a/processing/src/main/java/org/apache/druid/segment/DimensionHandlerUtils.java +++ b/processing/src/main/java/org/apache/druid/segment/DimensionHandlerUtils.java @@ -73,16 +73,16 @@ private DimensionHandlerUtils() ) { if (capabilities == null) { - return new StringDimensionHandler(dimensionName, multiValueHandling, true); + return new StringDimensionHandler(dimensionName, multiValueHandling, true, false); } multiValueHandling = multiValueHandling == null ? MultiValueHandling.ofDefault() : multiValueHandling; if (capabilities.getType() == ValueType.STRING) { - if (!capabilities.isDictionaryEncoded()) { + if (!capabilities.isDictionaryEncoded().isTrue()) { throw new IAE("String column must have dictionary encoding."); } - return new StringDimensionHandler(dimensionName, multiValueHandling, capabilities.hasBitmapIndexes()); + return new StringDimensionHandler(dimensionName, multiValueHandling, capabilities.hasBitmapIndexes(), capabilities.hasSpatialIndexes()); } if (capabilities.getType() == ValueType.LONG) { @@ -98,7 +98,7 @@ private DimensionHandlerUtils() } // Return a StringDimensionHandler by default (null columns will be treated as String typed) - return new StringDimensionHandler(dimensionName, multiValueHandling, true); + return new StringDimensionHandler(dimensionName, multiValueHandling, true, false); } public static List getValueTypesFromDimensionSpecs(List dimSpecs) @@ -226,11 +226,11 @@ private static ColumnCapabilities getEffectiveCapabilities( capabilities = ColumnCapabilitiesImpl.copyOf(capabilities) .setType(ValueType.STRING) .setDictionaryValuesUnique( - capabilities.isDictionaryEncoded() && + capabilities.isDictionaryEncoded().isTrue() && fn.getExtractionType() == ExtractionFn.ExtractionType.ONE_TO_ONE ) .setDictionaryValuesSorted( - capabilities.isDictionaryEncoded() && fn.preservesOrdering() + capabilities.isDictionaryEncoded().isTrue() && fn.preservesOrdering() ); } diff --git a/processing/src/main/java/org/apache/druid/segment/DimensionIndexer.java b/processing/src/main/java/org/apache/druid/segment/DimensionIndexer.java index cf7631db08bb..277deb94e9bb 100644 --- a/processing/src/main/java/org/apache/druid/segment/DimensionIndexer.java +++ b/processing/src/main/java/org/apache/druid/segment/DimensionIndexer.java @@ -22,6 +22,7 @@ import org.apache.druid.collections.bitmap.BitmapFactory; import org.apache.druid.collections.bitmap.MutableBitmap; import org.apache.druid.query.dimension.DimensionSpec; +import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.data.CloseableIndexed; import org.apache.druid.segment.incremental.IncrementalIndex; import org.apache.druid.segment.incremental.IncrementalIndexRowHolder; @@ -236,6 +237,7 @@ ColumnValueSelector makeColumnValueSelector( IncrementalIndex.DimensionDesc desc ); + ColumnCapabilities getColumnCapabilities(); /** * Compares the row values for this DimensionIndexer's dimension from a Row key. * diff --git a/processing/src/main/java/org/apache/druid/segment/DoubleDimensionIndexer.java b/processing/src/main/java/org/apache/druid/segment/DoubleDimensionIndexer.java index b802f7555135..677ed41ba239 100644 --- a/processing/src/main/java/org/apache/druid/segment/DoubleDimensionIndexer.java +++ b/processing/src/main/java/org/apache/druid/segment/DoubleDimensionIndexer.java @@ -25,6 +25,9 @@ import org.apache.druid.java.util.common.guava.Comparators; import org.apache.druid.query.dimension.DimensionSpec; import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; +import org.apache.druid.segment.column.ColumnCapabilities; +import org.apache.druid.segment.column.ColumnCapabilitiesImpl; +import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.data.CloseableIndexed; import org.apache.druid.segment.incremental.IncrementalIndex; import org.apache.druid.segment.incremental.IncrementalIndexRowHolder; @@ -38,6 +41,9 @@ public class DoubleDimensionIndexer implements DimensionIndexer DOUBLE_COMPARATOR = Comparators.naturalNullsFirst(); + private final ColumnCapabilitiesImpl capabilities = + ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.DOUBLE); + @Override public Double processRowValsToUnsortedEncodedKeyComponent(@Nullable Object dimValues, boolean reportParseExceptions) { @@ -89,6 +95,12 @@ public int getCardinality() return DimensionDictionarySelector.CARDINALITY_UNKNOWN; } + @Override + public ColumnCapabilities getColumnCapabilities() + { + return capabilities; + } + @Override public DimensionSelector makeDimensionSelector( DimensionSpec spec, diff --git a/processing/src/main/java/org/apache/druid/segment/FloatDimensionIndexer.java b/processing/src/main/java/org/apache/druid/segment/FloatDimensionIndexer.java index dce58a23b23f..2db5b59a668d 100644 --- a/processing/src/main/java/org/apache/druid/segment/FloatDimensionIndexer.java +++ b/processing/src/main/java/org/apache/druid/segment/FloatDimensionIndexer.java @@ -25,6 +25,9 @@ import org.apache.druid.java.util.common.guava.Comparators; import org.apache.druid.query.dimension.DimensionSpec; import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; +import org.apache.druid.segment.column.ColumnCapabilities; +import org.apache.druid.segment.column.ColumnCapabilitiesImpl; +import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.data.CloseableIndexed; import org.apache.druid.segment.incremental.IncrementalIndex; import org.apache.druid.segment.incremental.IncrementalIndexRowHolder; @@ -38,6 +41,9 @@ public class FloatDimensionIndexer implements DimensionIndexer FLOAT_COMPARATOR = Comparators.naturalNullsFirst(); + private final ColumnCapabilitiesImpl capabilities = + ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.FLOAT); + @Override public Float processRowValsToUnsortedEncodedKeyComponent(@Nullable Object dimValues, boolean reportParseExceptions) { @@ -90,6 +96,12 @@ public int getCardinality() return DimensionDictionarySelector.CARDINALITY_UNKNOWN; } + @Override + public ColumnCapabilities getColumnCapabilities() + { + return capabilities; + } + @Override public DimensionSelector makeDimensionSelector( DimensionSpec spec, diff --git a/processing/src/main/java/org/apache/druid/segment/IndexMergerV9.java b/processing/src/main/java/org/apache/druid/segment/IndexMergerV9.java index 066b8dc41ea6..19ce74574be7 100644 --- a/processing/src/main/java/org/apache/druid/segment/IndexMergerV9.java +++ b/processing/src/main/java/org/apache/druid/segment/IndexMergerV9.java @@ -88,6 +88,63 @@ public class IndexMergerV9 implements IndexMerger { private static final Logger log = new Logger(IndexMergerV9.class); + // merge logic for the state capabilities will be in after incremental index is persisted + public static final ColumnCapabilities.CoercionLogic DIMENSION_CAPABILITY_MERGE_LOGIC = + new ColumnCapabilities.CoercionLogic() + { + @Override + public boolean dictionaryEncoded() + { + return true; + } + + @Override + public boolean dictionaryValuesSorted() + { + return true; + } + + @Override + public boolean dictionaryValuesUnique() + { + return true; + } + + @Override + public boolean multipleValues() + { + return false; + } + }; + + public static final ColumnCapabilities.CoercionLogic METRIC_CAPABILITY_MERGE_LOGIC = + new ColumnCapabilities.CoercionLogic() + { + @Override + public boolean dictionaryEncoded() + { + return false; + } + + @Override + public boolean dictionaryValuesSorted() + { + return false; + } + + @Override + public boolean dictionaryValuesUnique() + { + return false; + } + + @Override + public boolean multipleValues() + { + return false; + } + }; + private final ObjectMapper mapper; private final IndexIO indexIO; private final SegmentWriteOutMediumFactory defaultSegmentWriteOutMediumFactory; @@ -724,14 +781,14 @@ private void mergeCapabilities( for (String dimension : adapter.getDimensionNames()) { ColumnCapabilities capabilities = adapter.getCapabilities(dimension); capabilitiesMap.compute(dimension, (d, existingCapabilities) -> - ColumnCapabilitiesImpl.snapshot(capabilities) - .merge(ColumnCapabilitiesImpl.snapshot(existingCapabilities))); + ColumnCapabilitiesImpl.merge(capabilities, existingCapabilities, DIMENSION_CAPABILITY_MERGE_LOGIC) + ); } for (String metric : adapter.getMetricNames()) { ColumnCapabilities capabilities = adapter.getCapabilities(metric); capabilitiesMap.compute(metric, (m, existingCapabilities) -> - ColumnCapabilitiesImpl.snapshot(capabilities) - .merge(ColumnCapabilitiesImpl.snapshot(existingCapabilities))); + ColumnCapabilitiesImpl.merge(capabilities, existingCapabilities, METRIC_CAPABILITY_MERGE_LOGIC) + ); metricsValueTypes.put(metric, capabilities.getType()); metricTypeNames.put(metric, adapter.getMetricType(metric)); } @@ -1011,7 +1068,10 @@ private Map makeDimensionHandlers( { Map handlers = new LinkedHashMap<>(); for (int i = 0; i < mergedDimensions.size(); i++) { - ColumnCapabilities capabilities = dimCapabilities.get(i); + ColumnCapabilities capabilities = ColumnCapabilitiesImpl.snapshot( + dimCapabilities.get(i), + DIMENSION_CAPABILITY_MERGE_LOGIC + ); String dimName = mergedDimensions.get(i); DimensionHandler handler = DimensionHandlerUtils.getHandlerFromCapabilities(dimName, capabilities, null); handlers.put(dimName, handler); diff --git a/processing/src/main/java/org/apache/druid/segment/LongDimensionIndexer.java b/processing/src/main/java/org/apache/druid/segment/LongDimensionIndexer.java index f2a91278f6bb..266405504b4f 100644 --- a/processing/src/main/java/org/apache/druid/segment/LongDimensionIndexer.java +++ b/processing/src/main/java/org/apache/druid/segment/LongDimensionIndexer.java @@ -25,6 +25,9 @@ import org.apache.druid.java.util.common.guava.Comparators; import org.apache.druid.query.dimension.DimensionSpec; import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; +import org.apache.druid.segment.column.ColumnCapabilities; +import org.apache.druid.segment.column.ColumnCapabilitiesImpl; +import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.data.CloseableIndexed; import org.apache.druid.segment.incremental.IncrementalIndex; import org.apache.druid.segment.incremental.IncrementalIndexRowHolder; @@ -38,6 +41,10 @@ public class LongDimensionIndexer implements DimensionIndexer { public static final Comparator LONG_COMPARATOR = Comparators.naturalNullsFirst(); + private final ColumnCapabilitiesImpl capabilities = + ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.LONG); + + @Override public Long processRowValsToUnsortedEncodedKeyComponent(@Nullable Object dimValues, boolean reportParseExceptions) { @@ -90,6 +97,12 @@ public int getCardinality() return DimensionDictionarySelector.CARDINALITY_UNKNOWN; } + @Override + public ColumnCapabilities getColumnCapabilities() + { + return capabilities; + } + @Override public DimensionSelector makeDimensionSelector( DimensionSpec spec, diff --git a/processing/src/main/java/org/apache/druid/segment/StringDimensionHandler.java b/processing/src/main/java/org/apache/druid/segment/StringDimensionHandler.java index 65ff7f356d70..b7fdad15c190 100644 --- a/processing/src/main/java/org/apache/druid/segment/StringDimensionHandler.java +++ b/processing/src/main/java/org/apache/druid/segment/StringDimensionHandler.java @@ -98,12 +98,14 @@ private static IndexedInts getRow(ColumnValueSelector s) private final String dimensionName; private final MultiValueHandling multiValueHandling; private final boolean hasBitmapIndexes; + private final boolean hasSpatialIndexes; - public StringDimensionHandler(String dimensionName, MultiValueHandling multiValueHandling, boolean hasBitmapIndexes) + public StringDimensionHandler(String dimensionName, MultiValueHandling multiValueHandling, boolean hasBitmapIndexes, boolean hasSpatialIndexes) { this.dimensionName = dimensionName; this.multiValueHandling = multiValueHandling; this.hasBitmapIndexes = hasBitmapIndexes; + this.hasSpatialIndexes = hasSpatialIndexes; } @Override @@ -139,7 +141,7 @@ public SettableColumnValueSelector makeNewSettableEncodedValueSelector() @Override public DimensionIndexer makeIndexer() { - return new StringDimensionIndexer(multiValueHandling, hasBitmapIndexes); + return new StringDimensionIndexer(multiValueHandling, hasBitmapIndexes, hasSpatialIndexes); } @Override diff --git a/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java b/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java index c0200e1e2eff..bca0a5c72a9c 100644 --- a/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java +++ b/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java @@ -40,6 +40,9 @@ import org.apache.druid.query.extraction.ExtractionFn; import org.apache.druid.query.filter.ValueMatcher; import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; +import org.apache.druid.segment.column.ColumnCapabilities; +import org.apache.druid.segment.column.ColumnCapabilitiesImpl; +import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.data.ArrayBasedIndexedInts; import org.apache.druid.segment.data.CloseableIndexed; import org.apache.druid.segment.data.IndexedInts; @@ -74,7 +77,7 @@ private static class DimensionDictionary private String minValue = null; @Nullable private String maxValue = null; - private int idForNull = ABSENT_VALUE_ID; + private volatile int idForNull = ABSENT_VALUE_ID; private final Object2IntMap valueToId = new Object2IntOpenHashMap<>(); @@ -233,17 +236,19 @@ public String getValueFromSortedId(int index) private final DimensionDictionary dimLookup; private final MultiValueHandling multiValueHandling; private final boolean hasBitmapIndexes; + private final boolean hasSpatialIndexes; private volatile boolean hasMultipleValues = false; private volatile boolean isSparse = false; @Nullable private SortedDimensionDictionary sortedLookup; - public StringDimensionIndexer(MultiValueHandling multiValueHandling, boolean hasBitmapIndexes) + public StringDimensionIndexer(MultiValueHandling multiValueHandling, boolean hasBitmapIndexes, boolean hasSpatialIndexes) { this.dimLookup = new DimensionDictionary(); this.multiValueHandling = multiValueHandling == null ? MultiValueHandling.ofDefault() : multiValueHandling; this.hasBitmapIndexes = hasBitmapIndexes; + this.hasSpatialIndexes = hasSpatialIndexes; } @Override @@ -400,6 +405,17 @@ public int getCardinality() return dimLookup.size(); } + /** + * returns true if all values are encoded in {@link #dimLookup} + */ + private boolean dictionaryEncodesAllValues() + { + // name lookup is possible in advance if we explicitly process a value for every row, or if we've encountered an + // actual null value and it is present in our dictionary. otherwise the dictionary will be missing ids for implicit + // null values + return !isSparse || dimLookup.idForNull != ABSENT_VALUE_ID; + } + @Override public int compareUnsortedEncodedKeyComponents(int[] lhs, int[] rhs) { @@ -456,6 +472,37 @@ public int getUnsortedEncodedKeyComponentHashCode(int[] key) return Arrays.hashCode(key); } + @Override + public ColumnCapabilities getColumnCapabilities() + { + ColumnCapabilitiesImpl capabilites = new ColumnCapabilitiesImpl().setType(ValueType.STRING) + .setHasBitmapIndexes(hasBitmapIndexes) + .setHasSpatialIndexes(hasSpatialIndexes) + .setDictionaryValuesUnique(true) + .setDictionaryValuesSorted(false); + + // Strings are opportunistically multi-valued, but the capabilities are initialized as 'unknown', since a + // multi-valued row might be processed at any point during ingestion. + // We only explicitly set multiple values if we are certain that there are multiple values, otherwise, a race + // condition might occur where this indexer might process a multi-valued row in the period between obtaining the + // capabilities, and actually processing the rows with a selector. Leaving as unknown allows the caller to decide + // how to handle this. + if (hasMultipleValues) { + capabilites.setHasMultipleValues(true); + } + // Likewise, only set dictionaryEncoded if explicitly if true for a similar reason as multi-valued handling. The + // dictionary is populated as rows are processed, but there might be implicit default values not accounted for in + // the dictionary yet. We can be certain that the dictionary has an entry for every value if either of + // a) we have already processed an explitic default (null) valued row for this column + // b) the processing was not 'sparse', meaning that this indexer has processed an explict value for every row + // is true. + final boolean allValuesEncoded = dictionaryEncodesAllValues(); + if (allValuesEncoded) { + capabilites.setDictionaryEncoded(true); + } + return capabilites; + } + @Override public DimensionSelector makeDimensionSelector( final DimensionSpec spec, @@ -630,9 +677,7 @@ public String lookupName(int id) @Override public boolean nameLookupPossibleInAdvance() { - // name lookup is possible in advance if we got a value for every row (setSparseIndexed was not called on this - // column) or we've encountered an actual null value and it is present in our dictionary - return !isSparse || dimLookup.idForNull != ABSENT_VALUE_ID; + return dictionaryEncodesAllValues(); } @Nullable @@ -696,6 +741,7 @@ public ColumnValueSelector makeColumnValueSelector( return makeDimensionSelector(DefaultDimensionSpec.of(desc.getName()), currEntry, desc); } + @Nullable @Override public Object convertUnsortedEncodedKeyComponentToActualList(int[] key) diff --git a/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilities.java b/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilities.java index a9af25b4602c..ee252aac93cc 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilities.java +++ b/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilities.java @@ -26,17 +26,59 @@ import javax.annotation.Nullable; /** + * This interface is used to expose information about columns that is interesting to know for all matters dealing with + * reading from columns, including query planning and optimization, creating readers to merge segments at ingestion + * time, and probably nearly anything else you can imagine. */ public interface ColumnCapabilities { + /** + * Column type, good to know so caller can know what to expect and which optimal selector to use + */ ValueType getType(); - boolean isDictionaryEncoded(); + + /** + * Is the column dictionary encoded? If so, a DimensionDictionarySelector may be used instead of using a value + * selector, allowing algorithms to operate on primitive integer dictionary ids rather than the looked up dictionary + * values + */ + Capable isDictionaryEncoded(); + + /** + * If the column is dictionary encoded, are those values sorted? Useful to know for optimizations that can defer + * looking up values and allowing sorting with the dictionary ids directly + */ Capable areDictionaryValuesSorted(); + + /** + * If the column is dictionary encoded, is there a 1:1 mapping of dictionary ids to values? If this is true, it + * unlocks optimizations such as allowing for things like grouping directly on dictionary ids and deferred value + * lookup + */ Capable areDictionaryValuesUnique(); - boolean isRunLengthEncoded(); + + /** + * String columns are sneaky, and might have multiple values, this is to allow callers to know and appropriately + * prepare themselves + */ + Capable hasMultipleValues(); + + /** + * Does the column have an inverted index bitmap for each value? If so, these may be employed to 'pre-filter' the + * column by examining if the values match the filter and intersecting the bitmaps, to avoid having to scan and + * evaluate if every row matches the filter + */ boolean hasBitmapIndexes(); + + /** + * Does the column have spatial indexes available to allow use with spatial filtering? + */ boolean hasSpatialIndexes(); - Capable hasMultipleValues(); + + /** + * All Druid primitive columns support filtering, maybe with or without indexes, but by default complex columns + * do not support direct filtering, unless provided by through a custom implementation. + */ boolean isFilterable(); enum Capable @@ -55,6 +97,11 @@ public boolean isMaybeTrue() return isTrue() || isUnknown(); } + public boolean isFalse() + { + return this == FALSE; + } + public boolean isUnknown() { return this == UNKNOWN; @@ -105,4 +152,36 @@ public String toString() return StringUtils.toLowerCase(super.toString()); } } + + /** + * This interface define the shape of a mechnism to allow for bespoke coercion of {@link Capable#UNKNOWN} into + * {@link Capable#TRUE} or {@link Capable#FALSE} for each {@link Capable} of a {@link ColumnCapabilities}, as is + * appropriate for the situation of the caller. + */ + interface CoercionLogic + { + /** + * If {@link ColumnCapabilities#isDictionaryEncoded()} is {@link Capable#UNKNOWN}, define if it should be treated + * as true or false. + */ + boolean dictionaryEncoded(); + + /** + * If {@link ColumnCapabilities#areDictionaryValuesSorted()} ()} is {@link Capable#UNKNOWN}, define if it should be treated + * as true or false. + */ + boolean dictionaryValuesSorted(); + + /** + * If {@link ColumnCapabilities#areDictionaryValuesUnique()} ()} is {@link Capable#UNKNOWN}, define if it should be treated + * as true or false. + */ + boolean dictionaryValuesUnique(); + + /** + * If {@link ColumnCapabilities#hasMultipleValues()} is {@link Capable#UNKNOWN}, define if it should be treated + * as true or false. + */ + boolean multipleValues(); + } } diff --git a/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilitiesImpl.java b/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilitiesImpl.java index 9ddbd04a372f..9473efde0e70 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilitiesImpl.java +++ b/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilitiesImpl.java @@ -21,6 +21,7 @@ import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.annotation.JsonSetter; import com.google.common.base.Preconditions; import org.apache.druid.java.util.common.ISE; @@ -37,7 +38,6 @@ public static ColumnCapabilitiesImpl copyOf(@Nullable final ColumnCapabilities o if (other != null) { capabilities.type = other.getType(); capabilities.dictionaryEncoded = other.isDictionaryEncoded(); - capabilities.runLengthEncoded = other.isRunLengthEncoded(); capabilities.hasInvertedIndexes = other.hasBitmapIndexes(); capabilities.hasSpatialIndexes = other.hasSpatialIndexes(); capabilities.hasMultipleValues = other.hasMultipleValues(); @@ -49,30 +49,59 @@ public static ColumnCapabilitiesImpl copyOf(@Nullable final ColumnCapabilities o } /** - * Used at indexing time to finalize all {@link Capable#UNKNOWN} values to - * {@link Capable#FALSE}, in order to present a snapshot of the state of the this column + * Copy a {@link ColumnCapabilities} and coerce all {@link ColumnCapabilities.Capable#UNKNOWN} to + * {@link ColumnCapabilities.Capable#TRUE} or {@link ColumnCapabilities.Capable#FALSE} as specified by + * {@link ColumnCapabilities.CoercionLogic} */ @Nullable - public static ColumnCapabilitiesImpl snapshot(@Nullable final ColumnCapabilities capabilities) + public static ColumnCapabilitiesImpl snapshot(@Nullable final ColumnCapabilities capabilities, CoercionLogic coerce) { - return snapshot(capabilities, false); + if (capabilities == null) { + return null; + } + ColumnCapabilitiesImpl copy = copyOf(capabilities); + copy.dictionaryEncoded = copy.dictionaryEncoded.coerceUnknownToBoolean(coerce.dictionaryEncoded()); + copy.dictionaryValuesSorted = copy.dictionaryValuesSorted.coerceUnknownToBoolean(coerce.dictionaryValuesSorted()); + copy.dictionaryValuesUnique = copy.dictionaryValuesUnique.coerceUnknownToBoolean(coerce.dictionaryValuesUnique()); + copy.hasMultipleValues = copy.hasMultipleValues.coerceUnknownToBoolean(coerce.multipleValues()); + return copy; } /** - * Used at indexing time to finalize all {@link Capable#UNKNOWN} values to - * {@link Capable#FALSE} or {@link Capable#TRUE}, in order to present a snapshot of the state of the this column + * Snapshots a pair of capabilities and then merges them */ @Nullable - public static ColumnCapabilitiesImpl snapshot(@Nullable final ColumnCapabilities capabilities, boolean unknownIsTrue) - { - if (capabilities == null) { - return null; + public static ColumnCapabilitiesImpl merge( + @Nullable final ColumnCapabilities capabilities, + @Nullable final ColumnCapabilities other, + CoercionLogic coercionLogic + ) + { + ColumnCapabilitiesImpl merged = snapshot(capabilities, coercionLogic); + ColumnCapabilitiesImpl otherSnapshot = snapshot(other, coercionLogic); + if (merged == null) { + return otherSnapshot; + } else if (otherSnapshot == null) { + return merged; } - ColumnCapabilitiesImpl copy = copyOf(capabilities); - copy.hasMultipleValues = copy.hasMultipleValues.coerceUnknownToBoolean(unknownIsTrue); - copy.dictionaryValuesSorted = copy.dictionaryValuesSorted.coerceUnknownToBoolean(unknownIsTrue); - copy.dictionaryValuesUnique = copy.dictionaryValuesUnique.coerceUnknownToBoolean(unknownIsTrue); - return copy; + + if (merged.type == null) { + merged.type = other.getType(); + } + + if (!merged.type.equals(otherSnapshot.getType())) { + throw new ISE("Cannot merge columns of type[%s] and [%s]", merged.type, otherSnapshot.getType()); + } + + merged.dictionaryEncoded = merged.dictionaryEncoded.or(otherSnapshot.isDictionaryEncoded()); + merged.hasMultipleValues = merged.hasMultipleValues.or(otherSnapshot.hasMultipleValues()); + merged.dictionaryValuesSorted = merged.dictionaryValuesSorted.and(otherSnapshot.areDictionaryValuesSorted()); + merged.dictionaryValuesUnique = merged.dictionaryValuesUnique.and(otherSnapshot.areDictionaryValuesUnique()); + merged.hasInvertedIndexes |= otherSnapshot.hasBitmapIndexes(); + merged.hasSpatialIndexes |= otherSnapshot.hasSpatialIndexes(); + merged.filterable &= otherSnapshot.isFilterable(); + + return merged; } @@ -93,10 +122,9 @@ public static ColumnCapabilitiesImpl createSimpleNumericColumnCapabilities(Value @Nullable private ValueType type = null; - private boolean dictionaryEncoded = false; - private boolean runLengthEncoded = false; private boolean hasInvertedIndexes = false; private boolean hasSpatialIndexes = false; + private Capable dictionaryEncoded = Capable.UNKNOWN; private Capable hasMultipleValues = Capable.UNKNOWN; // These capabilities are computed at query time and not persisted in the segment files. @@ -121,15 +149,16 @@ public ColumnCapabilitiesImpl setType(ValueType type) } @Override - @JsonProperty - public boolean isDictionaryEncoded() + @JsonProperty("dictionaryEncoded") + public Capable isDictionaryEncoded() { return dictionaryEncoded; } + @JsonSetter("dictionaryEncoded") public ColumnCapabilitiesImpl setDictionaryEncoded(boolean dictionaryEncoded) { - this.dictionaryEncoded = dictionaryEncoded; + this.dictionaryEncoded = Capable.of(dictionaryEncoded); return this; } @@ -157,13 +186,6 @@ public ColumnCapabilitiesImpl setDictionaryValuesUnique(boolean dictionaryValues return this; } - @Override - @JsonProperty - public boolean isRunLengthEncoded() - { - return runLengthEncoded; - } - @Override @JsonProperty("hasBitmapIndexes") public boolean hasBitmapIndexes() @@ -218,30 +240,4 @@ public ColumnCapabilitiesImpl setFilterable(boolean filterable) this.filterable = filterable; return this; } - - public ColumnCapabilities merge(@Nullable ColumnCapabilities other) - { - if (other == null) { - return this; - } - - if (type == null) { - type = other.getType(); - } - - if (!type.equals(other.getType())) { - throw new ISE("Cannot merge columns of type[%s] and [%s]", type, other.getType()); - } - - this.dictionaryEncoded |= other.isDictionaryEncoded(); - this.runLengthEncoded |= other.isRunLengthEncoded(); - this.hasInvertedIndexes |= other.hasBitmapIndexes(); - this.hasSpatialIndexes |= other.hasSpatialIndexes(); - this.filterable &= other.isFilterable(); - this.hasMultipleValues = this.hasMultipleValues.or(other.hasMultipleValues()); - this.dictionaryValuesSorted = this.dictionaryValuesSorted.and(other.areDictionaryValuesSorted()); - this.dictionaryValuesUnique = this.dictionaryValuesUnique.and(other.areDictionaryValuesUnique()); - - return this; - } } diff --git a/processing/src/main/java/org/apache/druid/segment/filter/ExpressionFilter.java b/processing/src/main/java/org/apache/druid/segment/filter/ExpressionFilter.java index 0baa59490712..f11e7a776d4a 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/ExpressionFilter.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/ExpressionFilter.java @@ -115,7 +115,7 @@ public boolean supportsBitmapIndex(final BitmapIndexSelector selector) // multiple values. The lack of multiple values is important because expression filters treat multi-value // arrays as nulls, which doesn't permit index based filtering. final String column = Iterables.getOnlyElement(requiredBindings.get()); - return selector.getBitmapIndex(column) != null && !selector.hasMultipleValues(column).isMaybeTrue(); + return selector.getBitmapIndex(column) != null && selector.hasMultipleValues(column).isFalse(); } else { // Multi-column expression. return false; diff --git a/processing/src/main/java/org/apache/druid/segment/filter/Filters.java b/processing/src/main/java/org/apache/druid/segment/filter/Filters.java index 399adb9eaaf3..116f59b47648 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/Filters.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/Filters.java @@ -414,7 +414,7 @@ static boolean supportsSelectivityEstimation( if (filter.supportsBitmapIndex(indexSelector)) { final ColumnHolder columnHolder = columnSelector.getColumnHolder(dimension); if (columnHolder != null) { - return !columnHolder.getCapabilities().hasMultipleValues().isMaybeTrue(); + return columnHolder.getCapabilities().hasMultipleValues().isFalse(); } } return false; diff --git a/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndex.java b/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndex.java index 649aea9b6997..bb4377f4e396 100644 --- a/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndex.java +++ b/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndex.java @@ -59,6 +59,7 @@ import org.apache.druid.segment.DimensionSelector; import org.apache.druid.segment.DoubleColumnSelector; import org.apache.druid.segment.FloatColumnSelector; +import org.apache.druid.segment.IndexMergerV9; import org.apache.druid.segment.LongColumnSelector; import org.apache.druid.segment.Metadata; import org.apache.druid.segment.NilColumnValueSelector; @@ -249,7 +250,8 @@ public ColumnCapabilities getColumnCapabilities(String columnName) private final Map dimensionDescs; private final List dimensionDescsList; - private final Map columnCapabilities; + // dimension capabilities are provided by the indexers + private final Map timeAndMetricsColumnCapabilities; private final AtomicInteger numEntries = new AtomicInteger(); private final AtomicLong bytesInMemory = new AtomicLong(); @@ -287,7 +289,7 @@ protected IncrementalIndex( this.deserializeComplexMetrics = deserializeComplexMetrics; this.reportParseExceptions = reportParseExceptions; - this.columnCapabilities = new HashMap<>(); + this.timeAndMetricsColumnCapabilities = new HashMap<>(); this.metadata = new Metadata( null, getCombiningAggregators(metrics), @@ -302,7 +304,7 @@ protected IncrementalIndex( for (AggregatorFactory metric : metrics) { MetricDesc metricDesc = new MetricDesc(metricDescs.size(), metric); metricDescs.put(metricDesc.getName(), metricDesc); - columnCapabilities.put(metricDesc.getName(), metricDesc.getCapabilities()); + timeAndMetricsColumnCapabilities.put(metricDesc.getName(), metricDesc.getCapabilities()); } DimensionsSpec dimensionsSpec = incrementalIndexSchema.getDimensionsSpec(); @@ -312,24 +314,22 @@ protected IncrementalIndex( for (DimensionSchema dimSchema : dimensionsSpec.getDimensions()) { ValueType type = TYPE_MAP.get(dimSchema.getValueType()); String dimName = dimSchema.getName(); - ColumnCapabilitiesImpl capabilities = makeCapabilitiesFromValueType(type); + ColumnCapabilitiesImpl capabilities = makeDefaultCapabilitiesFromValueType(type); capabilities.setHasBitmapIndexes(dimSchema.hasBitmapIndex()); if (dimSchema.getTypeName().equals(DimensionSchema.SPATIAL_TYPE_NAME)) { capabilities.setHasSpatialIndexes(true); - } else { - DimensionHandler handler = DimensionHandlerUtils.getHandlerFromCapabilities( - dimName, - capabilities, - dimSchema.getMultiValueHandling() - ); - addNewDimension(dimName, capabilities, handler); } - columnCapabilities.put(dimName, capabilities); + DimensionHandler handler = DimensionHandlerUtils.getHandlerFromCapabilities( + dimName, + capabilities, + dimSchema.getMultiValueHandling() + ); + addNewDimension(dimName, handler); } //__time capabilities - columnCapabilities.put( + timeAndMetricsColumnCapabilities.put( ColumnHolder.TIME_COLUMN_NAME, ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.LONG) ); @@ -589,9 +589,13 @@ public InputRow formatRow(InputRow row) return row; } - public Map getColumnCapabilities() + public Map getColumnCapabilities() { - return columnCapabilities; + ImmutableMap.Builder builder = + ImmutableMap.builder().putAll(timeAndMetricsColumnCapabilities); + + dimensionDescs.forEach((dimension, desc) -> builder.put(dimension, desc.getCapabilities())); + return builder.build(); } /** @@ -658,23 +662,22 @@ IncrementalIndexRowResult toIncrementalIndexRow(InputRow row) continue; } boolean wasNewDim = false; - ColumnCapabilitiesImpl capabilities; DimensionDesc desc = dimensionDescs.get(dimension); if (desc != null) { - capabilities = desc.getCapabilities(); absentDimensions.remove(dimension); } else { wasNewDim = true; - capabilities = columnCapabilities.get(dimension); - if (capabilities == null) { - // For schemaless type discovery, assume everything is a String for now, can change later. - capabilities = makeCapabilitiesFromValueType(ValueType.STRING); - columnCapabilities.put(dimension, capabilities); - } - DimensionHandler handler = DimensionHandlerUtils.getHandlerFromCapabilities(dimension, capabilities, null); - desc = addNewDimension(dimension, capabilities, handler); + desc = addNewDimension( + dimension, + DimensionHandlerUtils.getHandlerFromCapabilities( + dimension, + // for schemaless type discovery, everything is a String. this should probably try to autodetect + // based on the value to use a better handler + makeDefaultCapabilitiesFromValueType(ValueType.STRING), + null + ) + ); } - DimensionHandler handler = desc.getHandler(); DimensionIndexer indexer = desc.getIndexer(); Object dimsKey = null; try { @@ -684,13 +687,6 @@ IncrementalIndexRowResult toIncrementalIndexRow(InputRow row) parseExceptionMessages.add(pe.getMessage()); } dimsKeySize += indexer.estimateEncodedKeyComponentSize(dimsKey); - // Set column capabilities as data is coming in - if (!capabilities.hasMultipleValues().isTrue() && - dimsKey != null && - handler.getLengthOfEncodedKeyComponent(dimsKey) > 1) { - capabilities.setHasMultipleValues(true); - } - if (wasNewDim) { // unless this is the first row we are processing, all newly discovered columns will be sparse if (maxIngestedEventTime != null) { @@ -928,7 +924,7 @@ public List getDimensionOrder() } } - private ColumnCapabilitiesImpl makeCapabilitiesFromValueType(ValueType type) + private ColumnCapabilitiesImpl makeDefaultCapabilitiesFromValueType(ValueType type) { if (type == ValueType.STRING) { // we start out as not having multiple values, but this might change as we encounter them @@ -949,7 +945,7 @@ private ColumnCapabilitiesImpl makeCapabilitiesFromValueType(ValueType type) */ public void loadDimensionIterable( Iterable oldDimensionOrder, - Map oldColumnCapabilities + Map oldColumnCapabilities ) { synchronized (dimensionDescs) { @@ -958,19 +954,21 @@ public void loadDimensionIterable( } for (String dim : oldDimensionOrder) { if (dimensionDescs.get(dim) == null) { - ColumnCapabilitiesImpl capabilities = oldColumnCapabilities.get(dim); - columnCapabilities.put(dim, capabilities); + ColumnCapabilitiesImpl capabilities = ColumnCapabilitiesImpl.snapshot( + oldColumnCapabilities.get(dim), + IndexMergerV9.DIMENSION_CAPABILITY_MERGE_LOGIC + ); DimensionHandler handler = DimensionHandlerUtils.getHandlerFromCapabilities(dim, capabilities, null); - addNewDimension(dim, capabilities, handler); + addNewDimension(dim, handler); } } } } @GuardedBy("dimensionDescs") - private DimensionDesc addNewDimension(String dim, ColumnCapabilitiesImpl capabilities, DimensionHandler handler) + private DimensionDesc addNewDimension(String dim, DimensionHandler handler) { - DimensionDesc desc = new DimensionDesc(dimensionDescs.size(), dim, capabilities, handler); + DimensionDesc desc = new DimensionDesc(dimensionDescs.size(), dim, handler); dimensionDescs.put(dim, desc); dimensionDescsList.add(desc); return desc; @@ -998,7 +996,10 @@ public StorageAdapter toStorageAdapter() @Nullable public ColumnCapabilities getCapabilities(String column) { - return columnCapabilities.get(column); + if (dimensionDescs.containsKey(column)) { + return dimensionDescs.get(column).getCapabilities(); + } + return timeAndMetricsColumnCapabilities.get(column); } public Metadata getMetadata() @@ -1080,15 +1081,13 @@ public static final class DimensionDesc { private final int index; private final String name; - private final ColumnCapabilitiesImpl capabilities; private final DimensionHandler handler; private final DimensionIndexer indexer; - public DimensionDesc(int index, String name, ColumnCapabilitiesImpl capabilities, DimensionHandler handler) + public DimensionDesc(int index, String name, DimensionHandler handler) { this.index = index; this.name = name; - this.capabilities = capabilities; this.handler = handler; this.indexer = handler.makeIndexer(); } @@ -1103,9 +1102,9 @@ public String getName() return name; } - public ColumnCapabilitiesImpl getCapabilities() + public ColumnCapabilities getCapabilities() { - return capabilities; + return indexer.getColumnCapabilities(); } public DimensionHandler getHandler() @@ -1124,7 +1123,7 @@ public static final class MetricDesc private final int index; private final String name; private final String type; - private final ColumnCapabilitiesImpl capabilities; + private final ColumnCapabilities capabilities; public MetricDesc(int index, AggregatorFactory factory) { @@ -1163,7 +1162,7 @@ public String getType() return type; } - public ColumnCapabilitiesImpl getCapabilities() + public ColumnCapabilities getCapabilities() { return capabilities; } diff --git a/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexStorageAdapter.java b/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexStorageAdapter.java index 8e8520d458b7..9beb16820abc 100644 --- a/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexStorageAdapter.java +++ b/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexStorageAdapter.java @@ -52,6 +52,62 @@ */ public class IncrementalIndexStorageAdapter implements StorageAdapter { + private static final ColumnCapabilities.CoercionLogic STORAGE_ADAPTER_CAPABILITIES_COERCE_LOGIC = + new ColumnCapabilities.CoercionLogic() + { + @Override + public boolean dictionaryEncoded() + { + return false; + } + + @Override + public boolean dictionaryValuesSorted() + { + return false; + } + + @Override + public boolean dictionaryValuesUnique() + { + return true; + } + + @Override + public boolean multipleValues() + { + return true; + } + }; + + private static final ColumnCapabilities.CoercionLogic SNAPSHOT_STORAGE_ADAPTER_CAPABILITIES_COERCE_LOGIC = + new ColumnCapabilities.CoercionLogic() + { + @Override + public boolean dictionaryEncoded() + { + return true; + } + + @Override + public boolean dictionaryValuesSorted() + { + return true; + } + + @Override + public boolean dictionaryValuesUnique() + { + return true; + } + + @Override + public boolean multipleValues() + { + return false; + } + }; + final IncrementalIndex index; public IncrementalIndexStorageAdapter(IncrementalIndex index) @@ -154,7 +210,7 @@ public ColumnCapabilities getColumnCapabilities(String column) // to the StringDimensionIndexer so the selector built on top of it can produce values from the snapshot state of // multi-valuedness at cursor creation time, instead of the latest state, and getSnapshotColumnCapabilities could // be removed. - return ColumnCapabilitiesImpl.snapshot(index.getCapabilities(column), true); + return ColumnCapabilitiesImpl.snapshot(index.getCapabilities(column), STORAGE_ADAPTER_CAPABILITIES_COERCE_LOGIC); } /** @@ -165,7 +221,10 @@ public ColumnCapabilities getColumnCapabilities(String column) */ public ColumnCapabilities getSnapshotColumnCapabilities(String column) { - return ColumnCapabilitiesImpl.snapshot(index.getCapabilities(column)); + return ColumnCapabilitiesImpl.snapshot( + index.getCapabilities(column), + SNAPSHOT_STORAGE_ADAPTER_CAPABILITIES_COERCE_LOGIC + ); } @Override diff --git a/processing/src/main/java/org/apache/druid/segment/vector/QueryableIndexVectorColumnSelectorFactory.java b/processing/src/main/java/org/apache/druid/segment/vector/QueryableIndexVectorColumnSelectorFactory.java index 269ac38429bb..48c56c9ef6df 100644 --- a/processing/src/main/java/org/apache/druid/segment/vector/QueryableIndexVectorColumnSelectorFactory.java +++ b/processing/src/main/java/org/apache/druid/segment/vector/QueryableIndexVectorColumnSelectorFactory.java @@ -83,9 +83,9 @@ public MultiValueDimensionVectorSelector makeMultiValueDimensionSelector(final D spec -> { final ColumnHolder holder = index.getColumnHolder(spec.getDimension()); if (holder == null - || !holder.getCapabilities().isDictionaryEncoded() + || holder.getCapabilities().isDictionaryEncoded().isFalse() || holder.getCapabilities().getType() != ValueType.STRING - || !holder.getCapabilities().hasMultipleValues().isMaybeTrue()) { + || holder.getCapabilities().hasMultipleValues().isFalse()) { throw new ISE( "Column[%s] is not a multi-value string column, do not ask for a multi-value selector", spec.getDimension() @@ -119,7 +119,7 @@ public SingleValueDimensionVectorSelector makeSingleValueDimensionSelector(final spec -> { final ColumnHolder holder = index.getColumnHolder(spec.getDimension()); if (holder == null - || !holder.getCapabilities().isDictionaryEncoded() + || !holder.getCapabilities().isDictionaryEncoded().isTrue() || holder.getCapabilities().getType() != ValueType.STRING) { // Asking for a single-value dimension selector on a non-string column gets you a bunch of nulls. return NilVectorSelector.create(offset); diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java index 5ab4e4694a2b..17c6a3f39898 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java @@ -153,8 +153,8 @@ public static ColumnValueSelector makeExprEvalSelector( ); } else if (capabilities != null && capabilities.getType() == ValueType.STRING - && capabilities.isDictionaryEncoded() - && !capabilities.hasMultipleValues().isMaybeTrue() + && capabilities.isDictionaryEncoded().isTrue() + && capabilities.hasMultipleValues().isFalse() && exprDetails.getArrayBindings().isEmpty()) { // Optimization for expressions that hit one scalar string column and nothing else. return new SingleStringInputCachingExpressionColumnValueSelector( @@ -225,7 +225,7 @@ public static DimensionSelector makeDimensionSelector( // not treating it as an array and not wanting to output an array if (capabilities != null && capabilities.getType() == ValueType.STRING - && capabilities.isDictionaryEncoded() + && capabilities.isDictionaryEncoded().isTrue() && !capabilities.hasMultipleValues().isUnknown() && !exprDetails.hasInputArrays() && !exprDetails.isOutputArray() diff --git a/processing/src/test/java/org/apache/druid/query/lookup/LookupSegmentTest.java b/processing/src/test/java/org/apache/druid/query/lookup/LookupSegmentTest.java index 3ca72aa5a698..96167fd38bdc 100644 --- a/processing/src/test/java/org/apache/druid/query/lookup/LookupSegmentTest.java +++ b/processing/src/test/java/org/apache/druid/query/lookup/LookupSegmentTest.java @@ -138,7 +138,7 @@ public void test_asStorageAdapter_getColumnCapabilitiesK() // reporting complete single-valued capabilities. It would be good to change this in the future, so query engines // running on top of lookups can take advantage of singly-valued optimizations. Assert.assertTrue(capabilities.hasMultipleValues().isUnknown()); - Assert.assertFalse(capabilities.isDictionaryEncoded()); + Assert.assertFalse(capabilities.isDictionaryEncoded().isTrue()); } @Test @@ -151,7 +151,7 @@ public void test_asStorageAdapter_getColumnCapabilitiesV() // running on top of lookups can take advantage of singly-valued optimizations. Assert.assertEquals(ValueType.STRING, capabilities.getType()); Assert.assertTrue(capabilities.hasMultipleValues().isUnknown()); - Assert.assertFalse(capabilities.isDictionaryEncoded()); + Assert.assertFalse(capabilities.isDictionaryEncoded().isTrue()); } @Test diff --git a/processing/src/test/java/org/apache/druid/segment/IndexMergerTestBase.java b/processing/src/test/java/org/apache/druid/segment/IndexMergerTestBase.java index b253614cc3d7..86e8503f997f 100644 --- a/processing/src/test/java/org/apache/druid/segment/IndexMergerTestBase.java +++ b/processing/src/test/java/org/apache/druid/segment/IndexMergerTestBase.java @@ -45,7 +45,6 @@ import org.apache.druid.query.aggregation.AggregatorFactory; import org.apache.druid.query.aggregation.CountAggregatorFactory; import org.apache.druid.query.aggregation.LongSumAggregatorFactory; -import org.apache.druid.segment.column.ColumnCapabilitiesImpl; import org.apache.druid.segment.column.ColumnHolder; import org.apache.druid.segment.column.DictionaryEncodedColumn; import org.apache.druid.segment.column.StringDictionaryEncodedColumn; @@ -2012,32 +2011,6 @@ public void testDictIdSeeker() Assert.assertEquals(-1, dictIdSeeker.seek(5)); } - @Test(expected = IllegalArgumentException.class) - public void testCloser() throws Exception - { - final long timestamp = System.currentTimeMillis(); - IncrementalIndex toPersist = IncrementalIndexTest.createIndex(null); - IncrementalIndexTest.populateIndex(timestamp, toPersist); - ColumnCapabilitiesImpl capabilities = (ColumnCapabilitiesImpl) toPersist.getCapabilities("dim1"); - capabilities.setHasSpatialIndexes(true); - - final File tempDir = temporaryFolder.newFolder(); - final File v8TmpDir = new File(tempDir, "v8-tmp"); - final File v9TmpDir = new File(tempDir, "v9-tmp"); - - try { - indexMerger.persist(toPersist, tempDir, indexSpec, null); - } - finally { - if (v8TmpDir.exists()) { - Assert.fail("v8-tmp dir not clean."); - } - if (v9TmpDir.exists()) { - Assert.fail("v9-tmp dir not clean."); - } - } - } - @Test public void testMultiValueHandling() throws Exception { diff --git a/processing/src/test/java/org/apache/druid/segment/QueryableIndexColumnCapabilitiesTest.java b/processing/src/test/java/org/apache/druid/segment/QueryableIndexColumnCapabilitiesTest.java index c7783e992197..57689a46cb0a 100644 --- a/processing/src/test/java/org/apache/druid/segment/QueryableIndexColumnCapabilitiesTest.java +++ b/processing/src/test/java/org/apache/druid/segment/QueryableIndexColumnCapabilitiesTest.java @@ -150,20 +150,25 @@ public void testStringColumn() ColumnCapabilities caps = INC_INDEX.getCapabilities("d1"); Assert.assertEquals(ValueType.STRING, caps.getType()); Assert.assertTrue(caps.hasBitmapIndexes()); - Assert.assertTrue(caps.isDictionaryEncoded()); + Assert.assertTrue(caps.isDictionaryEncoded().isMaybeTrue()); + Assert.assertTrue(caps.isDictionaryEncoded().isTrue()); Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue()); Assert.assertTrue(caps.areDictionaryValuesUnique().isTrue()); // multi-value is unknown unless explicitly set to 'true' Assert.assertTrue(caps.hasMultipleValues().isUnknown()); // at index merge or query time we 'complete' the capabilities to take a snapshot of the current state, // coercing any 'UNKNOWN' values to false - Assert.assertFalse(ColumnCapabilitiesImpl.snapshot(caps).hasMultipleValues().isMaybeTrue()); + Assert.assertFalse( + ColumnCapabilitiesImpl.snapshot(caps, IndexMergerV9.DIMENSION_CAPABILITY_MERGE_LOGIC) + .hasMultipleValues() + .isMaybeTrue() + ); Assert.assertFalse(caps.hasSpatialIndexes()); caps = MMAP_INDEX.getColumnHolder("d1").getCapabilities(); Assert.assertEquals(ValueType.STRING, caps.getType()); Assert.assertTrue(caps.hasBitmapIndexes()); - Assert.assertTrue(caps.isDictionaryEncoded()); + Assert.assertTrue(caps.isDictionaryEncoded().isTrue()); Assert.assertTrue(caps.areDictionaryValuesSorted().isTrue()); Assert.assertTrue(caps.areDictionaryValuesUnique().isTrue()); Assert.assertFalse(caps.hasMultipleValues().isMaybeTrue()); @@ -176,7 +181,7 @@ public void testMultiStringColumn() ColumnCapabilities caps = INC_INDEX.getCapabilities("d2"); Assert.assertEquals(ValueType.STRING, caps.getType()); Assert.assertTrue(caps.hasBitmapIndexes()); - Assert.assertTrue(caps.isDictionaryEncoded()); + Assert.assertTrue(caps.isDictionaryEncoded().isTrue()); Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue()); Assert.assertTrue(caps.areDictionaryValuesUnique().isTrue()); Assert.assertTrue(caps.hasMultipleValues().isTrue()); @@ -185,7 +190,7 @@ public void testMultiStringColumn() caps = MMAP_INDEX.getColumnHolder("d2").getCapabilities(); Assert.assertEquals(ValueType.STRING, caps.getType()); Assert.assertTrue(caps.hasBitmapIndexes()); - Assert.assertTrue(caps.isDictionaryEncoded()); + Assert.assertTrue(caps.isDictionaryEncoded().isTrue()); Assert.assertTrue(caps.areDictionaryValuesSorted().isTrue()); Assert.assertTrue(caps.areDictionaryValuesUnique().isTrue()); Assert.assertTrue(caps.hasMultipleValues().isTrue()); @@ -199,12 +204,11 @@ public void testComplexColumn() assertNonStringColumnCapabilities(MMAP_INDEX.getColumnHolder("m4").getCapabilities(), ValueType.COMPLEX); } - private void assertNonStringColumnCapabilities(ColumnCapabilities caps, ValueType valueType) { Assert.assertEquals(valueType, caps.getType()); Assert.assertFalse(caps.hasBitmapIndexes()); - Assert.assertFalse(caps.isDictionaryEncoded()); + Assert.assertFalse(caps.isDictionaryEncoded().isTrue()); Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue()); Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue()); Assert.assertFalse(caps.hasMultipleValues().isMaybeTrue()); diff --git a/processing/src/test/java/org/apache/druid/segment/RowBasedColumnSelectorFactoryTest.java b/processing/src/test/java/org/apache/druid/segment/RowBasedColumnSelectorFactoryTest.java index e12dac4743cd..a802b819b570 100644 --- a/processing/src/test/java/org/apache/druid/segment/RowBasedColumnSelectorFactoryTest.java +++ b/processing/src/test/java/org/apache/druid/segment/RowBasedColumnSelectorFactoryTest.java @@ -51,7 +51,7 @@ public void testCapabilitiesTime() RowBasedColumnSelectorFactory.getColumnCapabilities(ROW_SIGNATURE, ColumnHolder.TIME_COLUMN_NAME); Assert.assertEquals(ValueType.LONG, caps.getType()); Assert.assertFalse(caps.hasBitmapIndexes()); - Assert.assertFalse(caps.isDictionaryEncoded()); + Assert.assertFalse(caps.isDictionaryEncoded().isTrue()); Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue()); Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue()); Assert.assertFalse(caps.hasMultipleValues().isMaybeTrue()); @@ -65,7 +65,7 @@ public void testCapabilitiesString() RowBasedColumnSelectorFactory.getColumnCapabilities(ROW_SIGNATURE, STRING_COLUMN_NAME); Assert.assertEquals(ValueType.STRING, caps.getType()); Assert.assertFalse(caps.hasBitmapIndexes()); - Assert.assertFalse(caps.isDictionaryEncoded()); + Assert.assertFalse(caps.isDictionaryEncoded().isTrue()); Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue()); Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue()); Assert.assertTrue(caps.hasMultipleValues().isUnknown()); @@ -79,7 +79,7 @@ public void testCapabilitiesLong() RowBasedColumnSelectorFactory.getColumnCapabilities(ROW_SIGNATURE, LONG_COLUMN_NAME); Assert.assertEquals(ValueType.LONG, caps.getType()); Assert.assertFalse(caps.hasBitmapIndexes()); - Assert.assertFalse(caps.isDictionaryEncoded()); + Assert.assertFalse(caps.isDictionaryEncoded().isTrue()); Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue()); Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue()); Assert.assertFalse(caps.hasMultipleValues().isMaybeTrue()); @@ -93,7 +93,7 @@ public void testCapabilitiesFloat() RowBasedColumnSelectorFactory.getColumnCapabilities(ROW_SIGNATURE, FLOAT_COLUMN_NAME); Assert.assertEquals(ValueType.FLOAT, caps.getType()); Assert.assertFalse(caps.hasBitmapIndexes()); - Assert.assertFalse(caps.isDictionaryEncoded()); + Assert.assertFalse(caps.isDictionaryEncoded().isTrue()); Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue()); Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue()); Assert.assertFalse(caps.hasMultipleValues().isMaybeTrue()); @@ -107,7 +107,7 @@ public void testCapabilitiesDouble() RowBasedColumnSelectorFactory.getColumnCapabilities(ROW_SIGNATURE, DOUBLE_COLUMN_NAME); Assert.assertEquals(ValueType.DOUBLE, caps.getType()); Assert.assertFalse(caps.hasBitmapIndexes()); - Assert.assertFalse(caps.isDictionaryEncoded()); + Assert.assertFalse(caps.isDictionaryEncoded().isTrue()); Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue()); Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue()); Assert.assertFalse(caps.hasMultipleValues().isMaybeTrue()); @@ -121,7 +121,7 @@ public void testCapabilitiesComplex() RowBasedColumnSelectorFactory.getColumnCapabilities(ROW_SIGNATURE, COMPLEX_COLUMN_NAME); Assert.assertEquals(ValueType.COMPLEX, caps.getType()); Assert.assertFalse(caps.hasBitmapIndexes()); - Assert.assertFalse(caps.isDictionaryEncoded()); + Assert.assertFalse(caps.isDictionaryEncoded().isTrue()); Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue()); Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue()); Assert.assertTrue(caps.hasMultipleValues().isUnknown()); diff --git a/processing/src/test/java/org/apache/druid/segment/column/ColumnCapabilitiesImplTest.java b/processing/src/test/java/org/apache/druid/segment/column/ColumnCapabilitiesImplTest.java index e221edd9c73f..ce98506dc7be 100644 --- a/processing/src/test/java/org/apache/druid/segment/column/ColumnCapabilitiesImplTest.java +++ b/processing/src/test/java/org/apache/druid/segment/column/ColumnCapabilitiesImplTest.java @@ -44,8 +44,7 @@ public void testSerde() throws Exception ColumnCapabilities cc = mapper.readValue(json, ColumnCapabilitiesImpl.class); Assert.assertEquals(ValueType.COMPLEX, cc.getType()); - Assert.assertTrue(cc.isDictionaryEncoded()); - Assert.assertFalse(cc.isRunLengthEncoded()); + Assert.assertTrue(cc.isDictionaryEncoded().isTrue()); Assert.assertTrue(cc.hasSpatialIndexes()); Assert.assertTrue(cc.hasMultipleValues().isTrue()); Assert.assertTrue(cc.hasBitmapIndexes()); @@ -69,8 +68,7 @@ public void testDeserialization() throws Exception ColumnCapabilities cc = mapper.readValue(json, ColumnCapabilitiesImpl.class); Assert.assertEquals(ValueType.COMPLEX, cc.getType()); - Assert.assertTrue(cc.isDictionaryEncoded()); - Assert.assertTrue(cc.isRunLengthEncoded()); + Assert.assertTrue(cc.isDictionaryEncoded().isTrue()); Assert.assertTrue(cc.hasSpatialIndexes()); Assert.assertTrue(cc.hasMultipleValues().isTrue()); Assert.assertTrue(cc.hasBitmapIndexes()); diff --git a/processing/src/test/java/org/apache/druid/segment/join/HashJoinSegmentStorageAdapterTest.java b/processing/src/test/java/org/apache/druid/segment/join/HashJoinSegmentStorageAdapterTest.java index 7b80bd2094c5..6406d7afe098 100644 --- a/processing/src/test/java/org/apache/druid/segment/join/HashJoinSegmentStorageAdapterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/join/HashJoinSegmentStorageAdapterTest.java @@ -200,7 +200,7 @@ public void test_getColumnCapabilities_factToCountryFactColumn() Assert.assertEquals(ValueType.STRING, capabilities.getType()); Assert.assertTrue(capabilities.hasBitmapIndexes()); - Assert.assertTrue(capabilities.isDictionaryEncoded()); + Assert.assertTrue(capabilities.isDictionaryEncoded().isTrue()); Assert.assertTrue(capabilities.areDictionaryValuesSorted().isTrue()); Assert.assertTrue(capabilities.areDictionaryValuesUnique().isTrue()); } @@ -216,7 +216,7 @@ public void test_getColumnCapabilities_factToCountryJoinColumn() Assert.assertFalse(capabilities.hasBitmapIndexes()); Assert.assertFalse(capabilities.areDictionaryValuesUnique().isTrue()); Assert.assertFalse(capabilities.areDictionaryValuesSorted().isTrue()); - Assert.assertTrue(capabilities.isDictionaryEncoded()); + Assert.assertTrue(capabilities.isDictionaryEncoded().isTrue()); } @Test diff --git a/processing/src/test/java/org/apache/druid/segment/join/table/IndexedTableJoinableTest.java b/processing/src/test/java/org/apache/druid/segment/join/table/IndexedTableJoinableTest.java index c75232c9be9b..61b56377b5d9 100644 --- a/processing/src/test/java/org/apache/druid/segment/join/table/IndexedTableJoinableTest.java +++ b/processing/src/test/java/org/apache/druid/segment/join/table/IndexedTableJoinableTest.java @@ -137,7 +137,7 @@ public void getColumnCapabilitiesForStringColumn() { final ColumnCapabilities capabilities = target.getColumnCapabilities("str"); Assert.assertEquals(ValueType.STRING, capabilities.getType()); - Assert.assertTrue(capabilities.isDictionaryEncoded()); + Assert.assertTrue(capabilities.isDictionaryEncoded().isTrue()); Assert.assertFalse(capabilities.hasBitmapIndexes()); Assert.assertFalse(capabilities.hasMultipleValues().isMaybeTrue()); Assert.assertFalse(capabilities.hasSpatialIndexes()); @@ -148,7 +148,7 @@ public void getColumnCapabilitiesForLongColumn() { final ColumnCapabilities capabilities = target.getColumnCapabilities("long"); Assert.assertEquals(ValueType.LONG, capabilities.getType()); - Assert.assertFalse(capabilities.isDictionaryEncoded()); + Assert.assertFalse(capabilities.isDictionaryEncoded().isTrue()); Assert.assertFalse(capabilities.hasBitmapIndexes()); Assert.assertFalse(capabilities.hasMultipleValues().isMaybeTrue()); Assert.assertFalse(capabilities.hasSpatialIndexes()); diff --git a/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionColumnValueSelectorTest.java b/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionSelectorsTest.java similarity index 67% rename from processing/src/test/java/org/apache/druid/segment/virtual/ExpressionColumnValueSelectorTest.java rename to processing/src/test/java/org/apache/druid/segment/virtual/ExpressionSelectorsTest.java index 0b7a66db20fe..08b18c44ff79 100644 --- a/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionColumnValueSelectorTest.java +++ b/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionSelectorsTest.java @@ -21,20 +21,41 @@ import com.google.common.base.Supplier; import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import org.apache.druid.common.config.NullHandling; import org.apache.druid.common.guava.SettableSupplier; +import org.apache.druid.data.input.MapBasedInputRow; +import org.apache.druid.data.input.impl.DimensionsSpec; +import org.apache.druid.data.input.impl.TimestampSpec; +import org.apache.druid.java.util.common.DateTimes; +import org.apache.druid.java.util.common.Intervals; +import org.apache.druid.java.util.common.granularity.Granularities; +import org.apache.druid.java.util.common.guava.Sequence; import org.apache.druid.math.expr.ExprEval; +import org.apache.druid.math.expr.ExprMacroTable; +import org.apache.druid.math.expr.Parser; +import org.apache.druid.query.aggregation.AggregatorFactory; +import org.apache.druid.query.aggregation.CountAggregatorFactory; import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; import org.apache.druid.segment.BaseSingleValueDimensionSelector; import org.apache.druid.segment.ColumnValueSelector; +import org.apache.druid.segment.Cursor; import org.apache.druid.segment.DimensionSelector; import org.apache.druid.segment.TestObjectColumnSelector; +import org.apache.druid.segment.VirtualColumns; +import org.apache.druid.segment.incremental.IncrementalIndex; +import org.apache.druid.segment.incremental.IncrementalIndexSchema; +import org.apache.druid.segment.incremental.IncrementalIndexStorageAdapter; +import org.apache.druid.segment.incremental.IndexSizeExceededException; +import org.apache.druid.testing.InitializedNullHandlingTest; import org.junit.Assert; import org.junit.Test; import java.util.ArrayList; +import java.util.Collections; import java.util.List; -public class ExpressionColumnValueSelectorTest +public class ExpressionSelectorsTest extends InitializedNullHandlingTest { @Test public void testSupplierFromDimensionSelector() @@ -231,6 +252,86 @@ public void testCoerceExprToValue() ); } + @Test + public void testIncrementIndexStringSelector() throws IndexSizeExceededException + { + // This test covers a regression caused by ColumnCapabilites.isDictionaryEncoded not matching the value of + // DimensionSelector.nameLookupPossibleInAdvance in the indexers of an IncrementalIndex, which resulted in an + // exception trying to make an optimized string expression selector that was not appropriate to use for the + // underlying dimension selector. + // This occurred during schemaless ingestion with spare dimension values and no explicit null rows, so the + // conditions are replicated by this test. See https://github.com/apache/druid/pull/10248 for details + IncrementalIndexSchema schema = new IncrementalIndexSchema( + 0, + new TimestampSpec("time", "millis", DateTimes.nowUtc()), + Granularities.NONE, + VirtualColumns.EMPTY, + DimensionsSpec.EMPTY, + new AggregatorFactory[]{new CountAggregatorFactory("count")}, + true + ); + + IncrementalIndex index = new IncrementalIndex.Builder().setMaxRowCount(100).setIndexSchema(schema).buildOnheap(); + index.add( + new MapBasedInputRow( + DateTimes.nowUtc().getMillis(), + ImmutableList.of("x"), + ImmutableMap.of("x", "foo") + ) + ); + index.add( + new MapBasedInputRow( + DateTimes.nowUtc().plusMillis(1000).getMillis(), + ImmutableList.of("y"), + ImmutableMap.of("y", "foo") + ) + ); + + IncrementalIndexStorageAdapter adapter = new IncrementalIndexStorageAdapter(index); + + Sequence cursors = adapter.makeCursors( + null, + Intervals.ETERNITY, + VirtualColumns.EMPTY, + Granularities.ALL, + false, + null + ); + int rowsProcessed = cursors.map(cursor -> { + DimensionSelector xExprSelector = ExpressionSelectors.makeDimensionSelector( + cursor.getColumnSelectorFactory(), + Parser.parse("concat(x, 'foo')", ExprMacroTable.nil()), + null + ); + DimensionSelector yExprSelector = ExpressionSelectors.makeDimensionSelector( + cursor.getColumnSelectorFactory(), + Parser.parse("concat(y, 'foo')", ExprMacroTable.nil()), + null + ); + int rowCount = 0; + while (!cursor.isDone()) { + Object x = xExprSelector.getObject(); + Object y = yExprSelector.getObject(); + List expectedFoo = Collections.singletonList("foofoo"); + List expectedNull = NullHandling.replaceWithDefault() + ? Collections.singletonList("foo") + : Collections.singletonList(null); + if (rowCount == 0) { + Assert.assertEquals(expectedFoo, x); + Assert.assertEquals(expectedNull, y); + } else { + Assert.assertEquals(expectedNull, x); + Assert.assertEquals(expectedFoo, y); + } + rowCount++; + cursor.advance(); + } + return rowCount; + }).accumulate(0, (in, acc) -> in + acc); + + Assert.assertEquals(2, rowsProcessed); + } + private static DimensionSelector dimensionSelectorFromSupplier( final Supplier supplier ) diff --git a/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVirtualColumnTest.java b/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVirtualColumnTest.java index 16e090dc3d53..84bf5fd7e741 100644 --- a/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVirtualColumnTest.java +++ b/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVirtualColumnTest.java @@ -810,7 +810,7 @@ public void testCapabilities() ColumnCapabilities caps = X_PLUS_Y.capabilities("expr"); Assert.assertEquals(ValueType.FLOAT, caps.getType()); Assert.assertFalse(caps.hasBitmapIndexes()); - Assert.assertFalse(caps.isDictionaryEncoded()); + Assert.assertFalse(caps.isDictionaryEncoded().isTrue()); Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue()); Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue()); Assert.assertTrue(caps.hasMultipleValues().isUnknown()); @@ -820,7 +820,7 @@ public void testCapabilities() caps = Z_CONCAT_X.capabilities("expr"); Assert.assertEquals(ValueType.STRING, caps.getType()); Assert.assertFalse(caps.hasBitmapIndexes()); - Assert.assertFalse(caps.isDictionaryEncoded()); + Assert.assertFalse(caps.isDictionaryEncoded().isTrue()); Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue()); Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue()); Assert.assertTrue(caps.hasMultipleValues().isUnknown()); diff --git a/server/src/main/java/org/apache/druid/segment/realtime/plumber/Sink.java b/server/src/main/java/org/apache/druid/segment/realtime/plumber/Sink.java index 7002ac22fa66..2324a2aca6a9 100644 --- a/server/src/main/java/org/apache/druid/segment/realtime/plumber/Sink.java +++ b/server/src/main/java/org/apache/druid/segment/realtime/plumber/Sink.java @@ -29,7 +29,7 @@ import org.apache.druid.query.aggregation.AggregatorFactory; import org.apache.druid.segment.QueryableIndex; import org.apache.druid.segment.ReferenceCountingSegment; -import org.apache.druid.segment.column.ColumnCapabilitiesImpl; +import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.incremental.IncrementalIndex; import org.apache.druid.segment.incremental.IncrementalIndexAddResult; import org.apache.druid.segment.incremental.IncrementalIndexSchema; @@ -377,7 +377,7 @@ private FireHydrant makeNewCurrIndex(long minTimestamp, DataSchema schema) FireHydrant lastHydrant = hydrants.get(numHydrants - 1); newCount = lastHydrant.getCount() + 1; if (!indexSchema.getDimensionsSpec().hasCustomDimensions()) { - Map oldCapabilities; + Map oldCapabilities; if (lastHydrant.hasSwapped()) { oldCapabilities = new HashMap<>(); ReferenceCountingSegment segment = lastHydrant.getIncrementedSegment(); @@ -385,7 +385,7 @@ private FireHydrant makeNewCurrIndex(long minTimestamp, DataSchema schema) QueryableIndex oldIndex = segment.asQueryableIndex(); for (String dim : oldIndex.getAvailableDimensions()) { dimOrder.add(dim); - oldCapabilities.put(dim, (ColumnCapabilitiesImpl) oldIndex.getColumnHolder(dim).getCapabilities()); + oldCapabilities.put(dim, oldIndex.getColumnHolder(dim).getCapabilities()); } } finally {