From 0a93dada22a8d3eaf1d0fe42fa98de5af0921ec0 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Thu, 6 Aug 2020 03:05:09 -0700 Subject: [PATCH 01/16] fix bug with realtime expressions on sparse string columns --- .../druid/segment/StringDimensionIndexer.java | 14 ++- .../IncrementalIndexStorageAdapter.java | 11 ++- ...Test.java => ExpressionSelectorsTest.java} | 94 ++++++++++++++++++- 3 files changed, 113 insertions(+), 6 deletions(-) rename processing/src/test/java/org/apache/druid/segment/virtual/{ExpressionColumnValueSelectorTest.java => ExpressionSelectorsTest.java} (71%) diff --git a/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java b/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java index c0200e1e2eff..fbb504a59ca9 100644 --- a/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java +++ b/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java @@ -74,7 +74,7 @@ private static class DimensionDictionary private String minValue = null; @Nullable private String maxValue = null; - private int idForNull = ABSENT_VALUE_ID; + private volatile int idForNull = ABSENT_VALUE_ID; private final Object2IntMap valueToId = new Object2IntOpenHashMap<>(); @@ -400,6 +400,14 @@ public int getCardinality() return dimLookup.size(); } + /** + * returns true if all values are encoded in {@link #dimLookup} + */ + public boolean dictionaryEncodesAllValues() + { + return !isSparse || dimLookup.idForNull != ABSENT_VALUE_ID; + } + @Override public int compareUnsortedEncodedKeyComponents(int[] lhs, int[] rhs) { @@ -630,9 +638,7 @@ public String lookupName(int id) @Override public boolean nameLookupPossibleInAdvance() { - // name lookup is possible in advance if we got a value for every row (setSparseIndexed was not called on this - // column) or we've encountered an actual null value and it is present in our dictionary - return !isSparse || dimLookup.idForNull != ABSENT_VALUE_ID; + return dictionaryEncodesAllValues(); } @Nullable diff --git a/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexStorageAdapter.java b/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexStorageAdapter.java index 8e8520d458b7..e4d7b18ec7b7 100644 --- a/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexStorageAdapter.java +++ b/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexStorageAdapter.java @@ -35,10 +35,12 @@ import org.apache.druid.segment.DimensionIndexer; import org.apache.druid.segment.Metadata; import org.apache.druid.segment.StorageAdapter; +import org.apache.druid.segment.StringDimensionIndexer; import org.apache.druid.segment.VirtualColumns; import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ColumnCapabilitiesImpl; import org.apache.druid.segment.column.ColumnHolder; +import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.data.Indexed; import org.apache.druid.segment.data.ListIndexed; import org.apache.druid.segment.filter.BooleanValueMatcher; @@ -154,7 +156,14 @@ public ColumnCapabilities getColumnCapabilities(String column) // to the StringDimensionIndexer so the selector built on top of it can produce values from the snapshot state of // multi-valuedness at cursor creation time, instead of the latest state, and getSnapshotColumnCapabilities could // be removed. - return ColumnCapabilitiesImpl.snapshot(index.getCapabilities(column), true); + ColumnCapabilitiesImpl snapshot = ColumnCapabilitiesImpl.snapshot(index.getCapabilities(column), true); + if (snapshot != null && snapshot.isDictionaryEncoded() && snapshot.getType().equals(ValueType.STRING)) { + // only consider the column dictionary encoded if all dictionary entries have a value + snapshot.setDictionaryEncoded( + ((StringDimensionIndexer) index.getDimension(column).getIndexer()).dictionaryEncodesAllValues() + ); + } + return snapshot; } /** diff --git a/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionColumnValueSelectorTest.java b/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionSelectorsTest.java similarity index 71% rename from processing/src/test/java/org/apache/druid/segment/virtual/ExpressionColumnValueSelectorTest.java rename to processing/src/test/java/org/apache/druid/segment/virtual/ExpressionSelectorsTest.java index 0b7a66db20fe..02676c56bfdf 100644 --- a/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionColumnValueSelectorTest.java +++ b/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionSelectorsTest.java @@ -21,20 +21,40 @@ import com.google.common.base.Supplier; import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import org.apache.druid.common.config.NullHandling; import org.apache.druid.common.guava.SettableSupplier; +import org.apache.druid.data.input.MapBasedInputRow; +import org.apache.druid.data.input.impl.DimensionsSpec; +import org.apache.druid.data.input.impl.TimestampSpec; +import org.apache.druid.java.util.common.DateTimes; +import org.apache.druid.java.util.common.Intervals; +import org.apache.druid.java.util.common.granularity.Granularities; +import org.apache.druid.java.util.common.guava.Sequence; import org.apache.druid.math.expr.ExprEval; +import org.apache.druid.math.expr.ExprMacroTable; +import org.apache.druid.math.expr.Parser; +import org.apache.druid.query.aggregation.AggregatorFactory; +import org.apache.druid.query.aggregation.CountAggregatorFactory; import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; import org.apache.druid.segment.BaseSingleValueDimensionSelector; import org.apache.druid.segment.ColumnValueSelector; +import org.apache.druid.segment.Cursor; import org.apache.druid.segment.DimensionSelector; import org.apache.druid.segment.TestObjectColumnSelector; +import org.apache.druid.segment.VirtualColumns; +import org.apache.druid.segment.incremental.IncrementalIndex; +import org.apache.druid.segment.incremental.IncrementalIndexSchema; +import org.apache.druid.segment.incremental.IncrementalIndexStorageAdapter; +import org.apache.druid.segment.incremental.IndexSizeExceededException; +import org.apache.druid.testing.InitializedNullHandlingTest; import org.junit.Assert; import org.junit.Test; import java.util.ArrayList; import java.util.List; -public class ExpressionColumnValueSelectorTest +public class ExpressionSelectorsTest extends InitializedNullHandlingTest { @Test public void testSupplierFromDimensionSelector() @@ -231,6 +251,78 @@ public void testCoerceExprToValue() ); } + @Test + public void testIncrementIndexStringSelector() throws IndexSizeExceededException + { + IncrementalIndexSchema schema = new IncrementalIndexSchema( + 0, + new TimestampSpec("time", "millis", DateTimes.nowUtc()), + Granularities.NONE, + VirtualColumns.EMPTY, + DimensionsSpec.EMPTY, + new AggregatorFactory[]{ new CountAggregatorFactory("count")}, + true + ); + + IncrementalIndex index = new IncrementalIndex.Builder().setMaxRowCount(100).setIndexSchema(schema).buildOnheap(); + index.add( + new MapBasedInputRow( + DateTimes.nowUtc().getMillis(), + ImmutableList.of("x"), + ImmutableMap.of("x", "foo") + ) + ); + index.add( + new MapBasedInputRow( + DateTimes.nowUtc().plusMillis(1000).getMillis(), + ImmutableList.of("y"), + ImmutableMap.of("y", "foo") + ) + ); + + IncrementalIndexStorageAdapter adapter = new IncrementalIndexStorageAdapter(index); + + Sequence cursors = adapter.makeCursors( + null, + Intervals.ETERNITY, + VirtualColumns.EMPTY, + Granularities.ALL, + false, + null + ); + int rowsProcessed = cursors.map(cursor -> { + DimensionSelector xExprSelector = ExpressionSelectors.makeDimensionSelector( + cursor.getColumnSelectorFactory(), + Parser.parse("concat(x, 'foo')", ExprMacroTable.nil()), + null + ); + DimensionSelector yExprSelector = ExpressionSelectors.makeDimensionSelector( + cursor.getColumnSelectorFactory(), + Parser.parse("concat(y, 'foo')", ExprMacroTable.nil()), + null + ); + int rowCount = 0; + while(!cursor.isDone()) { + Object x = xExprSelector.getObject(); + Object y = yExprSelector.getObject(); + List expectedFoo = ImmutableList.of("foofoo"); + List expectedNull = NullHandling.replaceWithDefault() ? ImmutableList.of("foo") : null; + if (rowCount == 0) { + Assert.assertEquals(expectedFoo, x); + Assert.assertEquals(expectedNull, y); + } else { + Assert.assertEquals(expectedNull, x); + Assert.assertEquals(expectedFoo, y); + } + rowCount++; + cursor.advance(); + } + return rowCount; + }).accumulate(0, (in, acc) -> in + acc); + + Assert.assertEquals(2, rowsProcessed); + } + private static DimensionSelector dimensionSelectorFromSupplier( final Supplier supplier ) From bcedf207ebaef2d9066859b19dd2c181fb09ca6a Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Thu, 6 Aug 2020 03:18:34 -0700 Subject: [PATCH 02/16] fix test --- .../druid/segment/virtual/ExpressionSelectorsTest.java | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionSelectorsTest.java b/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionSelectorsTest.java index 02676c56bfdf..18b75a8f51dc 100644 --- a/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionSelectorsTest.java +++ b/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionSelectorsTest.java @@ -52,6 +52,7 @@ import org.junit.Test; import java.util.ArrayList; +import java.util.Collections; import java.util.List; public class ExpressionSelectorsTest extends InitializedNullHandlingTest @@ -305,8 +306,10 @@ public void testIncrementIndexStringSelector() throws IndexSizeExceededException while(!cursor.isDone()) { Object x = xExprSelector.getObject(); Object y = yExprSelector.getObject(); - List expectedFoo = ImmutableList.of("foofoo"); - List expectedNull = NullHandling.replaceWithDefault() ? ImmutableList.of("foo") : null; + List expectedFoo = Collections.singletonList("foofoo"); + List expectedNull = NullHandling.replaceWithDefault() + ? Collections.singletonList("foo") + : Collections.singletonList(null); if (rowCount == 0) { Assert.assertEquals(expectedFoo, x); Assert.assertEquals(expectedNull, y); From e3c6e73284d2eb60fbcfe691260c3b32756da679 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Thu, 6 Aug 2020 03:25:14 -0700 Subject: [PATCH 03/16] add comment back --- .../java/org/apache/druid/segment/StringDimensionIndexer.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java b/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java index fbb504a59ca9..7c56449016e4 100644 --- a/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java +++ b/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java @@ -405,6 +405,9 @@ public int getCardinality() */ public boolean dictionaryEncodesAllValues() { + // name lookup is possible in advance if we process a value for every row (setSparseIndexed was not called on this + // column) or we've encountered an actual null value and it is present in our dictionary. otherwise the dictionary + // will be missing null values return !isSparse || dimLookup.idForNull != ABSENT_VALUE_ID; } From 3951eb3b9946f19afa541530c9ed4989ce15a59f Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Thu, 6 Aug 2020 05:13:23 -0700 Subject: [PATCH 04/16] push capabilities for dimensions to dimension indexers since they know things --- .../druid/segment/DimensionIndexer.java | 2 + .../druid/segment/DoubleDimensionIndexer.java | 11 +++ .../druid/segment/FloatDimensionIndexer.java | 11 +++ .../druid/segment/LongDimensionIndexer.java | 12 ++++ .../druid/segment/StringDimensionIndexer.java | 28 +++++++- .../segment/incremental/IncrementalIndex.java | 71 +++++++++---------- .../IncrementalIndexStorageAdapter.java | 11 +-- 7 files changed, 97 insertions(+), 49 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/DimensionIndexer.java b/processing/src/main/java/org/apache/druid/segment/DimensionIndexer.java index cf7631db08bb..9b80fb50c6e6 100644 --- a/processing/src/main/java/org/apache/druid/segment/DimensionIndexer.java +++ b/processing/src/main/java/org/apache/druid/segment/DimensionIndexer.java @@ -22,6 +22,7 @@ import org.apache.druid.collections.bitmap.BitmapFactory; import org.apache.druid.collections.bitmap.MutableBitmap; import org.apache.druid.query.dimension.DimensionSpec; +import org.apache.druid.segment.column.ColumnCapabilitiesImpl; import org.apache.druid.segment.data.CloseableIndexed; import org.apache.druid.segment.incremental.IncrementalIndex; import org.apache.druid.segment.incremental.IncrementalIndexRowHolder; @@ -236,6 +237,7 @@ ColumnValueSelector makeColumnValueSelector( IncrementalIndex.DimensionDesc desc ); + ColumnCapabilitiesImpl getColumnCapabilities(); /** * Compares the row values for this DimensionIndexer's dimension from a Row key. * diff --git a/processing/src/main/java/org/apache/druid/segment/DoubleDimensionIndexer.java b/processing/src/main/java/org/apache/druid/segment/DoubleDimensionIndexer.java index b802f7555135..6c9beb029df0 100644 --- a/processing/src/main/java/org/apache/druid/segment/DoubleDimensionIndexer.java +++ b/processing/src/main/java/org/apache/druid/segment/DoubleDimensionIndexer.java @@ -25,6 +25,8 @@ import org.apache.druid.java.util.common.guava.Comparators; import org.apache.druid.query.dimension.DimensionSpec; import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; +import org.apache.druid.segment.column.ColumnCapabilitiesImpl; +import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.data.CloseableIndexed; import org.apache.druid.segment.incremental.IncrementalIndex; import org.apache.druid.segment.incremental.IncrementalIndexRowHolder; @@ -38,6 +40,9 @@ public class DoubleDimensionIndexer implements DimensionIndexer DOUBLE_COMPARATOR = Comparators.naturalNullsFirst(); + private final ColumnCapabilitiesImpl capabilities = + ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.DOUBLE); + @Override public Double processRowValsToUnsortedEncodedKeyComponent(@Nullable Object dimValues, boolean reportParseExceptions) { @@ -89,6 +94,12 @@ public int getCardinality() return DimensionDictionarySelector.CARDINALITY_UNKNOWN; } + @Override + public ColumnCapabilitiesImpl getColumnCapabilities() + { + return capabilities; + } + @Override public DimensionSelector makeDimensionSelector( DimensionSpec spec, diff --git a/processing/src/main/java/org/apache/druid/segment/FloatDimensionIndexer.java b/processing/src/main/java/org/apache/druid/segment/FloatDimensionIndexer.java index dce58a23b23f..3a9823021a86 100644 --- a/processing/src/main/java/org/apache/druid/segment/FloatDimensionIndexer.java +++ b/processing/src/main/java/org/apache/druid/segment/FloatDimensionIndexer.java @@ -25,6 +25,8 @@ import org.apache.druid.java.util.common.guava.Comparators; import org.apache.druid.query.dimension.DimensionSpec; import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; +import org.apache.druid.segment.column.ColumnCapabilitiesImpl; +import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.data.CloseableIndexed; import org.apache.druid.segment.incremental.IncrementalIndex; import org.apache.druid.segment.incremental.IncrementalIndexRowHolder; @@ -38,6 +40,9 @@ public class FloatDimensionIndexer implements DimensionIndexer FLOAT_COMPARATOR = Comparators.naturalNullsFirst(); + private final ColumnCapabilitiesImpl capabilities = + ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.FLOAT); + @Override public Float processRowValsToUnsortedEncodedKeyComponent(@Nullable Object dimValues, boolean reportParseExceptions) { @@ -90,6 +95,12 @@ public int getCardinality() return DimensionDictionarySelector.CARDINALITY_UNKNOWN; } + @Override + public ColumnCapabilitiesImpl getColumnCapabilities() + { + return capabilities; + } + @Override public DimensionSelector makeDimensionSelector( DimensionSpec spec, diff --git a/processing/src/main/java/org/apache/druid/segment/LongDimensionIndexer.java b/processing/src/main/java/org/apache/druid/segment/LongDimensionIndexer.java index f2a91278f6bb..6e8c4a95d4de 100644 --- a/processing/src/main/java/org/apache/druid/segment/LongDimensionIndexer.java +++ b/processing/src/main/java/org/apache/druid/segment/LongDimensionIndexer.java @@ -25,6 +25,8 @@ import org.apache.druid.java.util.common.guava.Comparators; import org.apache.druid.query.dimension.DimensionSpec; import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; +import org.apache.druid.segment.column.ColumnCapabilitiesImpl; +import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.data.CloseableIndexed; import org.apache.druid.segment.incremental.IncrementalIndex; import org.apache.druid.segment.incremental.IncrementalIndexRowHolder; @@ -38,6 +40,10 @@ public class LongDimensionIndexer implements DimensionIndexer { public static final Comparator LONG_COMPARATOR = Comparators.naturalNullsFirst(); + private final ColumnCapabilitiesImpl capabilities = + ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.LONG); + + @Override public Long processRowValsToUnsortedEncodedKeyComponent(@Nullable Object dimValues, boolean reportParseExceptions) { @@ -90,6 +96,12 @@ public int getCardinality() return DimensionDictionarySelector.CARDINALITY_UNKNOWN; } + @Override + public ColumnCapabilitiesImpl getColumnCapabilities() + { + return capabilities; + } + @Override public DimensionSelector makeDimensionSelector( DimensionSpec spec, diff --git a/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java b/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java index 7c56449016e4..644e0bca4fb1 100644 --- a/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java +++ b/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java @@ -40,6 +40,8 @@ import org.apache.druid.query.extraction.ExtractionFn; import org.apache.druid.query.filter.ValueMatcher; import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; +import org.apache.druid.segment.column.ColumnCapabilitiesImpl; +import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.data.ArrayBasedIndexedInts; import org.apache.druid.segment.data.CloseableIndexed; import org.apache.druid.segment.data.IndexedInts; @@ -405,9 +407,9 @@ public int getCardinality() */ public boolean dictionaryEncodesAllValues() { - // name lookup is possible in advance if we process a value for every row (setSparseIndexed was not called on this - // column) or we've encountered an actual null value and it is present in our dictionary. otherwise the dictionary - // will be missing null values + // name lookup is possible in advance if we explicitly process a value for every row, or if we've encountered an + // actual null value and it is present in our dictionary. otherwise the dictionary will be missing ids for implicit + // null values return !isSparse || dimLookup.idForNull != ABSENT_VALUE_ID; } @@ -467,6 +469,25 @@ public int getUnsortedEncodedKeyComponentHashCode(int[] key) return Arrays.hashCode(key); } + @Override + public ColumnCapabilitiesImpl getColumnCapabilities() + { + ColumnCapabilitiesImpl capabilites = new ColumnCapabilitiesImpl().setType(ValueType.STRING) + .setHasBitmapIndexes(hasBitmapIndexes) + .setHasSpatialIndexes(false) + .setDictionaryEncoded(dictionaryEncodesAllValues()) + .setDictionaryValuesUnique(true) + .setDictionaryValuesSorted(false); + // strings are only single valued, until they are not... + // only explicitly set multiple values if they are certain, otherwise this indexer might process a multi-valued + // row in the future in the period between obtaining capabilities and actually processing the rows with a selector + // leaving as unknown allows the caller to decide + if (hasMultipleValues) { + capabilites.setHasMultipleValues(true); + } + return capabilites; + } + @Override public DimensionSelector makeDimensionSelector( final DimensionSpec spec, @@ -705,6 +726,7 @@ public ColumnValueSelector makeColumnValueSelector( return makeDimensionSelector(DefaultDimensionSpec.of(desc.getName()), currEntry, desc); } + @Nullable @Override public Object convertUnsortedEncodedKeyComponentToActualList(int[] key) diff --git a/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndex.java b/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndex.java index 649aea9b6997..991ee0c4b956 100644 --- a/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndex.java +++ b/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndex.java @@ -249,7 +249,8 @@ public ColumnCapabilities getColumnCapabilities(String columnName) private final Map dimensionDescs; private final List dimensionDescsList; - private final Map columnCapabilities; + // dimension capabilities are provided by the indexers + private final Map timeAndSpaceAndMetricsColumnCapabilities; private final AtomicInteger numEntries = new AtomicInteger(); private final AtomicLong bytesInMemory = new AtomicLong(); @@ -287,7 +288,7 @@ protected IncrementalIndex( this.deserializeComplexMetrics = deserializeComplexMetrics; this.reportParseExceptions = reportParseExceptions; - this.columnCapabilities = new HashMap<>(); + this.timeAndSpaceAndMetricsColumnCapabilities = new HashMap<>(); this.metadata = new Metadata( null, getCombiningAggregators(metrics), @@ -302,7 +303,7 @@ protected IncrementalIndex( for (AggregatorFactory metric : metrics) { MetricDesc metricDesc = new MetricDesc(metricDescs.size(), metric); metricDescs.put(metricDesc.getName(), metricDesc); - columnCapabilities.put(metricDesc.getName(), metricDesc.getCapabilities()); + timeAndSpaceAndMetricsColumnCapabilities.put(metricDesc.getName(), metricDesc.getCapabilities()); } DimensionsSpec dimensionsSpec = incrementalIndexSchema.getDimensionsSpec(); @@ -312,24 +313,26 @@ protected IncrementalIndex( for (DimensionSchema dimSchema : dimensionsSpec.getDimensions()) { ValueType type = TYPE_MAP.get(dimSchema.getValueType()); String dimName = dimSchema.getName(); - ColumnCapabilitiesImpl capabilities = makeCapabilitiesFromValueType(type); + ColumnCapabilitiesImpl capabilities = makeDefaultCapabilitiesFromValueType(type); capabilities.setHasBitmapIndexes(dimSchema.hasBitmapIndex()); if (dimSchema.getTypeName().equals(DimensionSchema.SPATIAL_TYPE_NAME)) { + // spatial indexed dimensions do not directly have a dimension indexer to provide column capabilities, so add + // capabilites to static map capabilities.setHasSpatialIndexes(true); + timeAndSpaceAndMetricsColumnCapabilities.put(dimName, capabilities); } else { DimensionHandler handler = DimensionHandlerUtils.getHandlerFromCapabilities( dimName, capabilities, dimSchema.getMultiValueHandling() ); - addNewDimension(dimName, capabilities, handler); + addNewDimension(dimName, handler); } - columnCapabilities.put(dimName, capabilities); } //__time capabilities - columnCapabilities.put( + timeAndSpaceAndMetricsColumnCapabilities.put( ColumnHolder.TIME_COLUMN_NAME, ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.LONG) ); @@ -591,7 +594,11 @@ public InputRow formatRow(InputRow row) public Map getColumnCapabilities() { - return columnCapabilities; + ImmutableMap.Builder builder = + ImmutableMap.builder().putAll(timeAndSpaceAndMetricsColumnCapabilities); + + dimensionDescs.forEach((dimension, desc) -> builder.put(dimension, desc.getCapabilities())); + return builder.build(); } /** @@ -658,23 +665,22 @@ IncrementalIndexRowResult toIncrementalIndexRow(InputRow row) continue; } boolean wasNewDim = false; - ColumnCapabilitiesImpl capabilities; DimensionDesc desc = dimensionDescs.get(dimension); if (desc != null) { - capabilities = desc.getCapabilities(); absentDimensions.remove(dimension); } else { wasNewDim = true; - capabilities = columnCapabilities.get(dimension); - if (capabilities == null) { - // For schemaless type discovery, assume everything is a String for now, can change later. - capabilities = makeCapabilitiesFromValueType(ValueType.STRING); - columnCapabilities.put(dimension, capabilities); - } - DimensionHandler handler = DimensionHandlerUtils.getHandlerFromCapabilities(dimension, capabilities, null); - desc = addNewDimension(dimension, capabilities, handler); + desc = addNewDimension( + dimension, + DimensionHandlerUtils.getHandlerFromCapabilities( + dimension, + // for schemaless type discovery, everything is a String. this should probably try to autodetect + // based on the value to use a better handler + makeDefaultCapabilitiesFromValueType(ValueType.STRING), + null + ) + ); } - DimensionHandler handler = desc.getHandler(); DimensionIndexer indexer = desc.getIndexer(); Object dimsKey = null; try { @@ -684,13 +690,6 @@ IncrementalIndexRowResult toIncrementalIndexRow(InputRow row) parseExceptionMessages.add(pe.getMessage()); } dimsKeySize += indexer.estimateEncodedKeyComponentSize(dimsKey); - // Set column capabilities as data is coming in - if (!capabilities.hasMultipleValues().isTrue() && - dimsKey != null && - handler.getLengthOfEncodedKeyComponent(dimsKey) > 1) { - capabilities.setHasMultipleValues(true); - } - if (wasNewDim) { // unless this is the first row we are processing, all newly discovered columns will be sparse if (maxIngestedEventTime != null) { @@ -928,7 +927,7 @@ public List getDimensionOrder() } } - private ColumnCapabilitiesImpl makeCapabilitiesFromValueType(ValueType type) + private ColumnCapabilitiesImpl makeDefaultCapabilitiesFromValueType(ValueType type) { if (type == ValueType.STRING) { // we start out as not having multiple values, but this might change as we encounter them @@ -959,18 +958,17 @@ public void loadDimensionIterable( for (String dim : oldDimensionOrder) { if (dimensionDescs.get(dim) == null) { ColumnCapabilitiesImpl capabilities = oldColumnCapabilities.get(dim); - columnCapabilities.put(dim, capabilities); DimensionHandler handler = DimensionHandlerUtils.getHandlerFromCapabilities(dim, capabilities, null); - addNewDimension(dim, capabilities, handler); + addNewDimension(dim, handler); } } } } @GuardedBy("dimensionDescs") - private DimensionDesc addNewDimension(String dim, ColumnCapabilitiesImpl capabilities, DimensionHandler handler) + private DimensionDesc addNewDimension(String dim, DimensionHandler handler) { - DimensionDesc desc = new DimensionDesc(dimensionDescs.size(), dim, capabilities, handler); + DimensionDesc desc = new DimensionDesc(dimensionDescs.size(), dim, handler); dimensionDescs.put(dim, desc); dimensionDescsList.add(desc); return desc; @@ -998,7 +996,10 @@ public StorageAdapter toStorageAdapter() @Nullable public ColumnCapabilities getCapabilities(String column) { - return columnCapabilities.get(column); + if (dimensionDescs.containsKey(column)) { + return dimensionDescs.get(column).getCapabilities(); + } + return timeAndSpaceAndMetricsColumnCapabilities.get(column); } public Metadata getMetadata() @@ -1080,15 +1081,13 @@ public static final class DimensionDesc { private final int index; private final String name; - private final ColumnCapabilitiesImpl capabilities; private final DimensionHandler handler; private final DimensionIndexer indexer; - public DimensionDesc(int index, String name, ColumnCapabilitiesImpl capabilities, DimensionHandler handler) + public DimensionDesc(int index, String name, DimensionHandler handler) { this.index = index; this.name = name; - this.capabilities = capabilities; this.handler = handler; this.indexer = handler.makeIndexer(); } @@ -1105,7 +1104,7 @@ public String getName() public ColumnCapabilitiesImpl getCapabilities() { - return capabilities; + return indexer.getColumnCapabilities(); } public DimensionHandler getHandler() diff --git a/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexStorageAdapter.java b/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexStorageAdapter.java index e4d7b18ec7b7..8e8520d458b7 100644 --- a/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexStorageAdapter.java +++ b/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexStorageAdapter.java @@ -35,12 +35,10 @@ import org.apache.druid.segment.DimensionIndexer; import org.apache.druid.segment.Metadata; import org.apache.druid.segment.StorageAdapter; -import org.apache.druid.segment.StringDimensionIndexer; import org.apache.druid.segment.VirtualColumns; import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ColumnCapabilitiesImpl; import org.apache.druid.segment.column.ColumnHolder; -import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.data.Indexed; import org.apache.druid.segment.data.ListIndexed; import org.apache.druid.segment.filter.BooleanValueMatcher; @@ -156,14 +154,7 @@ public ColumnCapabilities getColumnCapabilities(String column) // to the StringDimensionIndexer so the selector built on top of it can produce values from the snapshot state of // multi-valuedness at cursor creation time, instead of the latest state, and getSnapshotColumnCapabilities could // be removed. - ColumnCapabilitiesImpl snapshot = ColumnCapabilitiesImpl.snapshot(index.getCapabilities(column), true); - if (snapshot != null && snapshot.isDictionaryEncoded() && snapshot.getType().equals(ValueType.STRING)) { - // only consider the column dictionary encoded if all dictionary entries have a value - snapshot.setDictionaryEncoded( - ((StringDimensionIndexer) index.getDimension(column).getIndexer()).dictionaryEncodesAllValues() - ); - } - return snapshot; + return ColumnCapabilitiesImpl.snapshot(index.getCapabilities(column), true); } /** From 96f19350716fa9de7c9cb7f402cbbf112f53daef Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Thu, 6 Aug 2020 05:15:35 -0700 Subject: [PATCH 05/16] style --- .../apache/druid/segment/virtual/ExpressionSelectorsTest.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionSelectorsTest.java b/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionSelectorsTest.java index 18b75a8f51dc..bff21d83c8ea 100644 --- a/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionSelectorsTest.java +++ b/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionSelectorsTest.java @@ -261,7 +261,7 @@ public void testIncrementIndexStringSelector() throws IndexSizeExceededException Granularities.NONE, VirtualColumns.EMPTY, DimensionsSpec.EMPTY, - new AggregatorFactory[]{ new CountAggregatorFactory("count")}, + new AggregatorFactory[]{new CountAggregatorFactory("count")}, true ); @@ -303,7 +303,7 @@ public void testIncrementIndexStringSelector() throws IndexSizeExceededException null ); int rowCount = 0; - while(!cursor.isDone()) { + while (!cursor.isDone()) { Object x = xExprSelector.getObject(); Object y = yExprSelector.getObject(); List expectedFoo = Collections.singletonList("foofoo"); From cf8e34efc700910165a8a3a6e1f773f804a03cc0 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Thu, 6 Aug 2020 05:31:10 -0700 Subject: [PATCH 06/16] style --- .../apache/druid/segment/StringDimensionIndexer.java | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java b/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java index 644e0bca4fb1..480045bb9ef1 100644 --- a/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java +++ b/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java @@ -472,12 +472,12 @@ public int getUnsortedEncodedKeyComponentHashCode(int[] key) @Override public ColumnCapabilitiesImpl getColumnCapabilities() { - ColumnCapabilitiesImpl capabilites = new ColumnCapabilitiesImpl().setType(ValueType.STRING) - .setHasBitmapIndexes(hasBitmapIndexes) - .setHasSpatialIndexes(false) - .setDictionaryEncoded(dictionaryEncodesAllValues()) - .setDictionaryValuesUnique(true) - .setDictionaryValuesSorted(false); + ColumnCapabilitiesImpl capabilites = new ColumnCapabilitiesImpl().setType(ValueType.STRING) + .setHasBitmapIndexes(hasBitmapIndexes) + .setHasSpatialIndexes(false) + .setDictionaryEncoded(dictionaryEncodesAllValues()) + .setDictionaryValuesUnique(true) + .setDictionaryValuesSorted(false); // strings are only single valued, until they are not... // only explicitly set multiple values if they are certain, otherwise this indexer might process a multi-valued // row in the future in the period between obtaining capabilities and actually processing the rows with a selector From 8be87df0b22371bb1241d4d2705c4243b5519a40 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Thu, 6 Aug 2020 14:43:59 -0700 Subject: [PATCH 07/16] fixes --- .../druid/segment/DimensionHandlerUtils.java | 6 ++-- .../druid/segment/StringDimensionHandler.java | 6 ++-- .../druid/segment/StringDimensionIndexer.java | 6 ++-- .../segment/incremental/IncrementalIndex.java | 28 ++++++++----------- .../druid/segment/IndexMergerTestBase.java | 27 ------------------ 5 files changed, 23 insertions(+), 50 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/DimensionHandlerUtils.java b/processing/src/main/java/org/apache/druid/segment/DimensionHandlerUtils.java index 9e5a12921c7c..6ce8f53289b7 100644 --- a/processing/src/main/java/org/apache/druid/segment/DimensionHandlerUtils.java +++ b/processing/src/main/java/org/apache/druid/segment/DimensionHandlerUtils.java @@ -73,7 +73,7 @@ private DimensionHandlerUtils() ) { if (capabilities == null) { - return new StringDimensionHandler(dimensionName, multiValueHandling, true); + return new StringDimensionHandler(dimensionName, multiValueHandling, true, false); } multiValueHandling = multiValueHandling == null ? MultiValueHandling.ofDefault() : multiValueHandling; @@ -82,7 +82,7 @@ private DimensionHandlerUtils() if (!capabilities.isDictionaryEncoded()) { throw new IAE("String column must have dictionary encoding."); } - return new StringDimensionHandler(dimensionName, multiValueHandling, capabilities.hasBitmapIndexes()); + return new StringDimensionHandler(dimensionName, multiValueHandling, capabilities.hasBitmapIndexes(), capabilities.hasSpatialIndexes()); } if (capabilities.getType() == ValueType.LONG) { @@ -98,7 +98,7 @@ private DimensionHandlerUtils() } // Return a StringDimensionHandler by default (null columns will be treated as String typed) - return new StringDimensionHandler(dimensionName, multiValueHandling, true); + return new StringDimensionHandler(dimensionName, multiValueHandling, true, false); } public static List getValueTypesFromDimensionSpecs(List dimSpecs) diff --git a/processing/src/main/java/org/apache/druid/segment/StringDimensionHandler.java b/processing/src/main/java/org/apache/druid/segment/StringDimensionHandler.java index 65ff7f356d70..b7fdad15c190 100644 --- a/processing/src/main/java/org/apache/druid/segment/StringDimensionHandler.java +++ b/processing/src/main/java/org/apache/druid/segment/StringDimensionHandler.java @@ -98,12 +98,14 @@ private static IndexedInts getRow(ColumnValueSelector s) private final String dimensionName; private final MultiValueHandling multiValueHandling; private final boolean hasBitmapIndexes; + private final boolean hasSpatialIndexes; - public StringDimensionHandler(String dimensionName, MultiValueHandling multiValueHandling, boolean hasBitmapIndexes) + public StringDimensionHandler(String dimensionName, MultiValueHandling multiValueHandling, boolean hasBitmapIndexes, boolean hasSpatialIndexes) { this.dimensionName = dimensionName; this.multiValueHandling = multiValueHandling; this.hasBitmapIndexes = hasBitmapIndexes; + this.hasSpatialIndexes = hasSpatialIndexes; } @Override @@ -139,7 +141,7 @@ public SettableColumnValueSelector makeNewSettableEncodedValueSelector() @Override public DimensionIndexer makeIndexer() { - return new StringDimensionIndexer(multiValueHandling, hasBitmapIndexes); + return new StringDimensionIndexer(multiValueHandling, hasBitmapIndexes, hasSpatialIndexes); } @Override diff --git a/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java b/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java index 480045bb9ef1..bdec6f62c6a6 100644 --- a/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java +++ b/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java @@ -235,17 +235,19 @@ public String getValueFromSortedId(int index) private final DimensionDictionary dimLookup; private final MultiValueHandling multiValueHandling; private final boolean hasBitmapIndexes; + private final boolean hasSpatialIndexes; private volatile boolean hasMultipleValues = false; private volatile boolean isSparse = false; @Nullable private SortedDimensionDictionary sortedLookup; - public StringDimensionIndexer(MultiValueHandling multiValueHandling, boolean hasBitmapIndexes) + public StringDimensionIndexer(MultiValueHandling multiValueHandling, boolean hasBitmapIndexes, boolean hasSpatialIndexes) { this.dimLookup = new DimensionDictionary(); this.multiValueHandling = multiValueHandling == null ? MultiValueHandling.ofDefault() : multiValueHandling; this.hasBitmapIndexes = hasBitmapIndexes; + this.hasSpatialIndexes = hasSpatialIndexes; } @Override @@ -474,7 +476,7 @@ public ColumnCapabilitiesImpl getColumnCapabilities() { ColumnCapabilitiesImpl capabilites = new ColumnCapabilitiesImpl().setType(ValueType.STRING) .setHasBitmapIndexes(hasBitmapIndexes) - .setHasSpatialIndexes(false) + .setHasSpatialIndexes(hasSpatialIndexes) .setDictionaryEncoded(dictionaryEncodesAllValues()) .setDictionaryValuesUnique(true) .setDictionaryValuesSorted(false); diff --git a/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndex.java b/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndex.java index 991ee0c4b956..f5beb9de0d91 100644 --- a/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndex.java +++ b/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndex.java @@ -250,7 +250,7 @@ public ColumnCapabilities getColumnCapabilities(String columnName) private final Map dimensionDescs; private final List dimensionDescsList; // dimension capabilities are provided by the indexers - private final Map timeAndSpaceAndMetricsColumnCapabilities; + private final Map timeAndMetricsColumnCapabilities; private final AtomicInteger numEntries = new AtomicInteger(); private final AtomicLong bytesInMemory = new AtomicLong(); @@ -288,7 +288,7 @@ protected IncrementalIndex( this.deserializeComplexMetrics = deserializeComplexMetrics; this.reportParseExceptions = reportParseExceptions; - this.timeAndSpaceAndMetricsColumnCapabilities = new HashMap<>(); + this.timeAndMetricsColumnCapabilities = new HashMap<>(); this.metadata = new Metadata( null, getCombiningAggregators(metrics), @@ -303,7 +303,7 @@ protected IncrementalIndex( for (AggregatorFactory metric : metrics) { MetricDesc metricDesc = new MetricDesc(metricDescs.size(), metric); metricDescs.put(metricDesc.getName(), metricDesc); - timeAndSpaceAndMetricsColumnCapabilities.put(metricDesc.getName(), metricDesc.getCapabilities()); + timeAndMetricsColumnCapabilities.put(metricDesc.getName(), metricDesc.getCapabilities()); } DimensionsSpec dimensionsSpec = incrementalIndexSchema.getDimensionsSpec(); @@ -317,22 +317,18 @@ protected IncrementalIndex( capabilities.setHasBitmapIndexes(dimSchema.hasBitmapIndex()); if (dimSchema.getTypeName().equals(DimensionSchema.SPATIAL_TYPE_NAME)) { - // spatial indexed dimensions do not directly have a dimension indexer to provide column capabilities, so add - // capabilites to static map capabilities.setHasSpatialIndexes(true); - timeAndSpaceAndMetricsColumnCapabilities.put(dimName, capabilities); - } else { - DimensionHandler handler = DimensionHandlerUtils.getHandlerFromCapabilities( - dimName, - capabilities, - dimSchema.getMultiValueHandling() - ); - addNewDimension(dimName, handler); } + DimensionHandler handler = DimensionHandlerUtils.getHandlerFromCapabilities( + dimName, + capabilities, + dimSchema.getMultiValueHandling() + ); + addNewDimension(dimName, handler); } //__time capabilities - timeAndSpaceAndMetricsColumnCapabilities.put( + timeAndMetricsColumnCapabilities.put( ColumnHolder.TIME_COLUMN_NAME, ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.LONG) ); @@ -595,7 +591,7 @@ public InputRow formatRow(InputRow row) public Map getColumnCapabilities() { ImmutableMap.Builder builder = - ImmutableMap.builder().putAll(timeAndSpaceAndMetricsColumnCapabilities); + ImmutableMap.builder().putAll(timeAndMetricsColumnCapabilities); dimensionDescs.forEach((dimension, desc) -> builder.put(dimension, desc.getCapabilities())); return builder.build(); @@ -999,7 +995,7 @@ public ColumnCapabilities getCapabilities(String column) if (dimensionDescs.containsKey(column)) { return dimensionDescs.get(column).getCapabilities(); } - return timeAndSpaceAndMetricsColumnCapabilities.get(column); + return timeAndMetricsColumnCapabilities.get(column); } public Metadata getMetadata() diff --git a/processing/src/test/java/org/apache/druid/segment/IndexMergerTestBase.java b/processing/src/test/java/org/apache/druid/segment/IndexMergerTestBase.java index b253614cc3d7..86e8503f997f 100644 --- a/processing/src/test/java/org/apache/druid/segment/IndexMergerTestBase.java +++ b/processing/src/test/java/org/apache/druid/segment/IndexMergerTestBase.java @@ -45,7 +45,6 @@ import org.apache.druid.query.aggregation.AggregatorFactory; import org.apache.druid.query.aggregation.CountAggregatorFactory; import org.apache.druid.query.aggregation.LongSumAggregatorFactory; -import org.apache.druid.segment.column.ColumnCapabilitiesImpl; import org.apache.druid.segment.column.ColumnHolder; import org.apache.druid.segment.column.DictionaryEncodedColumn; import org.apache.druid.segment.column.StringDictionaryEncodedColumn; @@ -2012,32 +2011,6 @@ public void testDictIdSeeker() Assert.assertEquals(-1, dictIdSeeker.seek(5)); } - @Test(expected = IllegalArgumentException.class) - public void testCloser() throws Exception - { - final long timestamp = System.currentTimeMillis(); - IncrementalIndex toPersist = IncrementalIndexTest.createIndex(null); - IncrementalIndexTest.populateIndex(timestamp, toPersist); - ColumnCapabilitiesImpl capabilities = (ColumnCapabilitiesImpl) toPersist.getCapabilities("dim1"); - capabilities.setHasSpatialIndexes(true); - - final File tempDir = temporaryFolder.newFolder(); - final File v8TmpDir = new File(tempDir, "v8-tmp"); - final File v9TmpDir = new File(tempDir, "v9-tmp"); - - try { - indexMerger.persist(toPersist, tempDir, indexSpec, null); - } - finally { - if (v8TmpDir.exists()) { - Assert.fail("v8-tmp dir not clean."); - } - if (v9TmpDir.exists()) { - Assert.fail("v9-tmp dir not clean."); - } - } - } - @Test public void testMultiValueHandling() throws Exception { From 366a1c74953efb07a2b9ede134c274e2b79384a2 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Thu, 6 Aug 2020 19:18:10 -0700 Subject: [PATCH 08/16] getting a bit carried away --- .../druid/query/metadata/SegmentAnalyzer.java | 2 +- .../druid/query/topn/TopNQueryEngine.java | 2 +- .../druid/segment/DimensionHandlerUtils.java | 6 +- .../apache/druid/segment/IndexMergerV9.java | 37 +++++- .../segment/column/ColumnCapabilities.java | 47 +++++++- .../column/ColumnCapabilitiesImpl.java | 107 +++++++++--------- .../segment/incremental/IncrementalIndex.java | 1 + .../IncrementalIndexStorageAdapter.java | 4 +- ...yableIndexVectorColumnSelectorFactory.java | 4 +- .../segment/virtual/ExpressionSelectors.java | 4 +- .../druid/query/lookup/LookupSegmentTest.java | 4 +- .../QueryableIndexColumnCapabilitiesTest.java | 15 ++- .../RowBasedColumnSelectorFactoryTest.java | 12 +- .../column/ColumnCapabilitiesImplTest.java | 6 +- .../HashJoinSegmentStorageAdapterTest.java | 4 +- .../join/table/IndexedTableJoinableTest.java | 4 +- .../virtual/ExpressionVirtualColumnTest.java | 4 +- 17 files changed, 167 insertions(+), 96 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java b/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java index 659b55b1680d..15a081d8fcad 100644 --- a/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java +++ b/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java @@ -227,7 +227,7 @@ private ColumnAnalysis analyzeStringColumn( min = NullHandling.nullToEmptyIfNeeded(bitmapIndex.getValue(0)); max = NullHandling.nullToEmptyIfNeeded(bitmapIndex.getValue(cardinality - 1)); } - } else if (capabilities.isDictionaryEncoded()) { + } else if (capabilities.isDictionaryEncoded().isTrue()) { // fallback if no bitmap index DictionaryEncodedColumn theColumn = (DictionaryEncodedColumn) columnHolder.getColumn(); cardinality = theColumn.getCardinality(); diff --git a/processing/src/main/java/org/apache/druid/query/topn/TopNQueryEngine.java b/processing/src/main/java/org/apache/druid/query/topn/TopNQueryEngine.java index eb22dc9b4ae6..daad69fc7a6c 100644 --- a/processing/src/main/java/org/apache/druid/query/topn/TopNQueryEngine.java +++ b/processing/src/main/java/org/apache/druid/query/topn/TopNQueryEngine.java @@ -194,7 +194,7 @@ private static boolean canUsePooledAlgorithm( } if (capabilities != null && capabilities.getType() == ValueType.STRING) { // string columns must use the on heap algorithm unless they have the following capabilites - return capabilities.isDictionaryEncoded() && capabilities.areDictionaryValuesUnique().isTrue(); + return capabilities.isDictionaryEncoded().isTrue() && capabilities.areDictionaryValuesUnique().isTrue(); } else { // non-strings are not eligible to use the pooled algorithm, and should use a heap algorithm return false; diff --git a/processing/src/main/java/org/apache/druid/segment/DimensionHandlerUtils.java b/processing/src/main/java/org/apache/druid/segment/DimensionHandlerUtils.java index 6ce8f53289b7..6bec81e763ea 100644 --- a/processing/src/main/java/org/apache/druid/segment/DimensionHandlerUtils.java +++ b/processing/src/main/java/org/apache/druid/segment/DimensionHandlerUtils.java @@ -79,7 +79,7 @@ private DimensionHandlerUtils() multiValueHandling = multiValueHandling == null ? MultiValueHandling.ofDefault() : multiValueHandling; if (capabilities.getType() == ValueType.STRING) { - if (!capabilities.isDictionaryEncoded()) { + if (!capabilities.isDictionaryEncoded().isMaybeTrue()) { throw new IAE("String column must have dictionary encoding."); } return new StringDimensionHandler(dimensionName, multiValueHandling, capabilities.hasBitmapIndexes(), capabilities.hasSpatialIndexes()); @@ -226,11 +226,11 @@ private static ColumnCapabilities getEffectiveCapabilities( capabilities = ColumnCapabilitiesImpl.copyOf(capabilities) .setType(ValueType.STRING) .setDictionaryValuesUnique( - capabilities.isDictionaryEncoded() && + capabilities.isDictionaryEncoded().isTrue() && fn.getExtractionType() == ExtractionFn.ExtractionType.ONE_TO_ONE ) .setDictionaryValuesSorted( - capabilities.isDictionaryEncoded() && fn.preservesOrdering() + capabilities.isDictionaryEncoded().isTrue() && fn.preservesOrdering() ); } diff --git a/processing/src/main/java/org/apache/druid/segment/IndexMergerV9.java b/processing/src/main/java/org/apache/druid/segment/IndexMergerV9.java index 066b8dc41ea6..2e687d5c85c6 100644 --- a/processing/src/main/java/org/apache/druid/segment/IndexMergerV9.java +++ b/processing/src/main/java/org/apache/druid/segment/IndexMergerV9.java @@ -88,6 +88,35 @@ public class IndexMergerV9 implements IndexMerger { private static final Logger log = new Logger(IndexMergerV9.class); + // merge logic for the state capabilities will be in after incremental index is persisted + private static final ColumnCapabilities.CoercionLogic DIMENSION_CAPABILITY_MERGE_LOGIC = + new ColumnCapabilities.CoercionLogic() + { + @Override + public boolean dictionaryEncoded() + { + return true; + } + + @Override + public boolean dictionaryValuesSorted() + { + return true; + } + + @Override + public boolean dictionaryValuesUnique() + { + return true; + } + + @Override + public boolean multipleValues() + { + return false; + } + }; + private final ObjectMapper mapper; private final IndexIO indexIO; private final SegmentWriteOutMediumFactory defaultSegmentWriteOutMediumFactory; @@ -724,14 +753,14 @@ private void mergeCapabilities( for (String dimension : adapter.getDimensionNames()) { ColumnCapabilities capabilities = adapter.getCapabilities(dimension); capabilitiesMap.compute(dimension, (d, existingCapabilities) -> - ColumnCapabilitiesImpl.snapshot(capabilities) - .merge(ColumnCapabilitiesImpl.snapshot(existingCapabilities))); + ColumnCapabilitiesImpl.merge(capabilities, existingCapabilities, DIMENSION_CAPABILITY_MERGE_LOGIC) + ); } for (String metric : adapter.getMetricNames()) { ColumnCapabilities capabilities = adapter.getCapabilities(metric); capabilitiesMap.compute(metric, (m, existingCapabilities) -> - ColumnCapabilitiesImpl.snapshot(capabilities) - .merge(ColumnCapabilitiesImpl.snapshot(existingCapabilities))); + ColumnCapabilitiesImpl.merge(capabilities, existingCapabilities, ColumnCapabilitiesImpl.ALL_FALSE) + ); metricsValueTypes.put(metric, capabilities.getType()); metricTypeNames.put(metric, adapter.getMetricType(metric)); } diff --git a/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilities.java b/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilities.java index a9af25b4602c..329324d7b90b 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilities.java +++ b/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilities.java @@ -30,13 +30,12 @@ public interface ColumnCapabilities { ValueType getType(); - boolean isDictionaryEncoded(); + Capable isDictionaryEncoded(); Capable areDictionaryValuesSorted(); Capable areDictionaryValuesUnique(); - boolean isRunLengthEncoded(); + Capable hasMultipleValues(); boolean hasBitmapIndexes(); boolean hasSpatialIndexes(); - Capable hasMultipleValues(); boolean isFilterable(); enum Capable @@ -105,4 +104,46 @@ public String toString() return StringUtils.toLowerCase(super.toString()); } } + + interface CoercionLogic + { + boolean dictionaryEncoded(); + boolean dictionaryValuesSorted(); + boolean dictionaryValuesUnique(); + boolean multipleValues(); + } + + class AllCoercionLogic implements CoercionLogic + { + private final boolean coerceTo; + + public AllCoercionLogic(boolean coerceTo) + { + this.coerceTo = coerceTo; + } + + @Override + public boolean dictionaryEncoded() + { + return coerceTo; + } + + @Override + public boolean dictionaryValuesSorted() + { + return coerceTo; + } + + @Override + public boolean dictionaryValuesUnique() + { + return coerceTo; + } + + @Override + public boolean multipleValues() + { + return coerceTo; + } + } } diff --git a/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilitiesImpl.java b/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilitiesImpl.java index 9ddbd04a372f..3beb30c6feeb 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilitiesImpl.java +++ b/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilitiesImpl.java @@ -21,6 +21,7 @@ import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.annotation.JsonSetter; import com.google.common.base.Preconditions; import org.apache.druid.java.util.common.ISE; @@ -31,13 +32,15 @@ */ public class ColumnCapabilitiesImpl implements ColumnCapabilities { + public static final CoercionLogic ALL_TRUE = new AllCoercionLogic(true); + public static final CoercionLogic ALL_FALSE = new AllCoercionLogic(false); + public static ColumnCapabilitiesImpl copyOf(@Nullable final ColumnCapabilities other) { final ColumnCapabilitiesImpl capabilities = new ColumnCapabilitiesImpl(); if (other != null) { capabilities.type = other.getType(); capabilities.dictionaryEncoded = other.isDictionaryEncoded(); - capabilities.runLengthEncoded = other.isRunLengthEncoded(); capabilities.hasInvertedIndexes = other.hasBitmapIndexes(); capabilities.hasSpatialIndexes = other.hasSpatialIndexes(); capabilities.hasMultipleValues = other.hasMultipleValues(); @@ -49,30 +52,59 @@ public static ColumnCapabilitiesImpl copyOf(@Nullable final ColumnCapabilities o } /** - * Used at indexing time to finalize all {@link Capable#UNKNOWN} values to - * {@link Capable#FALSE}, in order to present a snapshot of the state of the this column + * Copy a {@link ColumnCapabilities} and coerce all {@link ColumnCapabilities.Capable#UNKNOWN} to + * {@link ColumnCapabilities.Capable#TRUE} or {@link ColumnCapabilities.Capable#FALSE} as specified by + * {@link ColumnCapabilities.CoercionLogic} */ @Nullable - public static ColumnCapabilitiesImpl snapshot(@Nullable final ColumnCapabilities capabilities) + public static ColumnCapabilitiesImpl snapshot(@Nullable final ColumnCapabilities capabilities, CoercionLogic coerce) { - return snapshot(capabilities, false); + if (capabilities == null) { + return null; + } + ColumnCapabilitiesImpl copy = copyOf(capabilities); + copy.dictionaryEncoded = copy.dictionaryEncoded.coerceUnknownToBoolean(coerce.dictionaryEncoded()); + copy.dictionaryValuesSorted = copy.dictionaryValuesSorted.coerceUnknownToBoolean(coerce.dictionaryValuesSorted()); + copy.dictionaryValuesUnique = copy.dictionaryValuesUnique.coerceUnknownToBoolean(coerce.dictionaryValuesUnique()); + copy.hasMultipleValues = copy.hasMultipleValues.coerceUnknownToBoolean(coerce.multipleValues()); + return copy; } /** - * Used at indexing time to finalize all {@link Capable#UNKNOWN} values to - * {@link Capable#FALSE} or {@link Capable#TRUE}, in order to present a snapshot of the state of the this column + * Snapshots a pair of capabilities and then merges them */ @Nullable - public static ColumnCapabilitiesImpl snapshot(@Nullable final ColumnCapabilities capabilities, boolean unknownIsTrue) - { - if (capabilities == null) { - return null; + public static ColumnCapabilitiesImpl merge( + @Nullable final ColumnCapabilities capabilities, + @Nullable final ColumnCapabilities other, + CoercionLogic coercionLogic + ) + { + ColumnCapabilitiesImpl merged = snapshot(capabilities, coercionLogic); + ColumnCapabilitiesImpl otherSnapshot = snapshot(other, coercionLogic); + if (merged == null) { + return otherSnapshot; + } else if (otherSnapshot == null) { + return merged; } - ColumnCapabilitiesImpl copy = copyOf(capabilities); - copy.hasMultipleValues = copy.hasMultipleValues.coerceUnknownToBoolean(unknownIsTrue); - copy.dictionaryValuesSorted = copy.dictionaryValuesSorted.coerceUnknownToBoolean(unknownIsTrue); - copy.dictionaryValuesUnique = copy.dictionaryValuesUnique.coerceUnknownToBoolean(unknownIsTrue); - return copy; + + if (merged.type == null) { + merged.type = other.getType(); + } + + if (!merged.type.equals(otherSnapshot.getType())) { + throw new ISE("Cannot merge columns of type[%s] and [%s]", merged.type, otherSnapshot.getType()); + } + + merged.dictionaryEncoded = merged.dictionaryEncoded.or(otherSnapshot.isDictionaryEncoded()); + merged.hasMultipleValues = merged.hasMultipleValues.or(otherSnapshot.hasMultipleValues()); + merged.dictionaryValuesSorted = merged.dictionaryValuesSorted.and(otherSnapshot.areDictionaryValuesSorted()); + merged.dictionaryValuesUnique = merged.dictionaryValuesUnique.and(otherSnapshot.areDictionaryValuesUnique()); + merged.hasInvertedIndexes |= otherSnapshot.hasBitmapIndexes(); + merged.hasSpatialIndexes |= otherSnapshot.hasSpatialIndexes(); + merged.filterable &= otherSnapshot.isFilterable(); + + return merged; } @@ -93,10 +125,9 @@ public static ColumnCapabilitiesImpl createSimpleNumericColumnCapabilities(Value @Nullable private ValueType type = null; - private boolean dictionaryEncoded = false; - private boolean runLengthEncoded = false; private boolean hasInvertedIndexes = false; private boolean hasSpatialIndexes = false; + private Capable dictionaryEncoded = Capable.UNKNOWN; private Capable hasMultipleValues = Capable.UNKNOWN; // These capabilities are computed at query time and not persisted in the segment files. @@ -121,15 +152,16 @@ public ColumnCapabilitiesImpl setType(ValueType type) } @Override - @JsonProperty - public boolean isDictionaryEncoded() + @JsonProperty("dictionaryEncoded") + public Capable isDictionaryEncoded() { return dictionaryEncoded; } + @JsonSetter("dictionaryEncoded") public ColumnCapabilitiesImpl setDictionaryEncoded(boolean dictionaryEncoded) { - this.dictionaryEncoded = dictionaryEncoded; + this.dictionaryEncoded = Capable.of(dictionaryEncoded); return this; } @@ -157,13 +189,6 @@ public ColumnCapabilitiesImpl setDictionaryValuesUnique(boolean dictionaryValues return this; } - @Override - @JsonProperty - public boolean isRunLengthEncoded() - { - return runLengthEncoded; - } - @Override @JsonProperty("hasBitmapIndexes") public boolean hasBitmapIndexes() @@ -218,30 +243,4 @@ public ColumnCapabilitiesImpl setFilterable(boolean filterable) this.filterable = filterable; return this; } - - public ColumnCapabilities merge(@Nullable ColumnCapabilities other) - { - if (other == null) { - return this; - } - - if (type == null) { - type = other.getType(); - } - - if (!type.equals(other.getType())) { - throw new ISE("Cannot merge columns of type[%s] and [%s]", type, other.getType()); - } - - this.dictionaryEncoded |= other.isDictionaryEncoded(); - this.runLengthEncoded |= other.isRunLengthEncoded(); - this.hasInvertedIndexes |= other.hasBitmapIndexes(); - this.hasSpatialIndexes |= other.hasSpatialIndexes(); - this.filterable &= other.isFilterable(); - this.hasMultipleValues = this.hasMultipleValues.or(other.hasMultipleValues()); - this.dictionaryValuesSorted = this.dictionaryValuesSorted.and(other.areDictionaryValuesSorted()); - this.dictionaryValuesUnique = this.dictionaryValuesUnique.and(other.areDictionaryValuesUnique()); - - return this; - } } diff --git a/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndex.java b/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndex.java index f5beb9de0d91..053883805a90 100644 --- a/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndex.java +++ b/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndex.java @@ -954,6 +954,7 @@ public void loadDimensionIterable( for (String dim : oldDimensionOrder) { if (dimensionDescs.get(dim) == null) { ColumnCapabilitiesImpl capabilities = oldColumnCapabilities.get(dim); + capabilities.setDictionaryEncoded(capabilities.getType().equals(ValueType.STRING)); DimensionHandler handler = DimensionHandlerUtils.getHandlerFromCapabilities(dim, capabilities, null); addNewDimension(dim, handler); } diff --git a/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexStorageAdapter.java b/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexStorageAdapter.java index 8e8520d458b7..862aa7d28bd0 100644 --- a/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexStorageAdapter.java +++ b/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexStorageAdapter.java @@ -154,7 +154,7 @@ public ColumnCapabilities getColumnCapabilities(String column) // to the StringDimensionIndexer so the selector built on top of it can produce values from the snapshot state of // multi-valuedness at cursor creation time, instead of the latest state, and getSnapshotColumnCapabilities could // be removed. - return ColumnCapabilitiesImpl.snapshot(index.getCapabilities(column), true); + return ColumnCapabilitiesImpl.snapshot(index.getCapabilities(column), ColumnCapabilitiesImpl.ALL_TRUE); } /** @@ -165,7 +165,7 @@ public ColumnCapabilities getColumnCapabilities(String column) */ public ColumnCapabilities getSnapshotColumnCapabilities(String column) { - return ColumnCapabilitiesImpl.snapshot(index.getCapabilities(column)); + return ColumnCapabilitiesImpl.snapshot(index.getCapabilities(column), ColumnCapabilitiesImpl.ALL_FALSE); } @Override diff --git a/processing/src/main/java/org/apache/druid/segment/vector/QueryableIndexVectorColumnSelectorFactory.java b/processing/src/main/java/org/apache/druid/segment/vector/QueryableIndexVectorColumnSelectorFactory.java index 269ac38429bb..b417920f26bb 100644 --- a/processing/src/main/java/org/apache/druid/segment/vector/QueryableIndexVectorColumnSelectorFactory.java +++ b/processing/src/main/java/org/apache/druid/segment/vector/QueryableIndexVectorColumnSelectorFactory.java @@ -83,7 +83,7 @@ public MultiValueDimensionVectorSelector makeMultiValueDimensionSelector(final D spec -> { final ColumnHolder holder = index.getColumnHolder(spec.getDimension()); if (holder == null - || !holder.getCapabilities().isDictionaryEncoded() + || !holder.getCapabilities().isDictionaryEncoded().isTrue() || holder.getCapabilities().getType() != ValueType.STRING || !holder.getCapabilities().hasMultipleValues().isMaybeTrue()) { throw new ISE( @@ -119,7 +119,7 @@ public SingleValueDimensionVectorSelector makeSingleValueDimensionSelector(final spec -> { final ColumnHolder holder = index.getColumnHolder(spec.getDimension()); if (holder == null - || !holder.getCapabilities().isDictionaryEncoded() + || !holder.getCapabilities().isDictionaryEncoded().isTrue() || holder.getCapabilities().getType() != ValueType.STRING) { // Asking for a single-value dimension selector on a non-string column gets you a bunch of nulls. return NilVectorSelector.create(offset); diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java index 5ab4e4694a2b..e0ad6d12a516 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java @@ -153,7 +153,7 @@ public static ColumnValueSelector makeExprEvalSelector( ); } else if (capabilities != null && capabilities.getType() == ValueType.STRING - && capabilities.isDictionaryEncoded() + && capabilities.isDictionaryEncoded().isTrue() && !capabilities.hasMultipleValues().isMaybeTrue() && exprDetails.getArrayBindings().isEmpty()) { // Optimization for expressions that hit one scalar string column and nothing else. @@ -225,7 +225,7 @@ public static DimensionSelector makeDimensionSelector( // not treating it as an array and not wanting to output an array if (capabilities != null && capabilities.getType() == ValueType.STRING - && capabilities.isDictionaryEncoded() + && capabilities.isDictionaryEncoded().isTrue() && !capabilities.hasMultipleValues().isUnknown() && !exprDetails.hasInputArrays() && !exprDetails.isOutputArray() diff --git a/processing/src/test/java/org/apache/druid/query/lookup/LookupSegmentTest.java b/processing/src/test/java/org/apache/druid/query/lookup/LookupSegmentTest.java index 3ca72aa5a698..96167fd38bdc 100644 --- a/processing/src/test/java/org/apache/druid/query/lookup/LookupSegmentTest.java +++ b/processing/src/test/java/org/apache/druid/query/lookup/LookupSegmentTest.java @@ -138,7 +138,7 @@ public void test_asStorageAdapter_getColumnCapabilitiesK() // reporting complete single-valued capabilities. It would be good to change this in the future, so query engines // running on top of lookups can take advantage of singly-valued optimizations. Assert.assertTrue(capabilities.hasMultipleValues().isUnknown()); - Assert.assertFalse(capabilities.isDictionaryEncoded()); + Assert.assertFalse(capabilities.isDictionaryEncoded().isTrue()); } @Test @@ -151,7 +151,7 @@ public void test_asStorageAdapter_getColumnCapabilitiesV() // running on top of lookups can take advantage of singly-valued optimizations. Assert.assertEquals(ValueType.STRING, capabilities.getType()); Assert.assertTrue(capabilities.hasMultipleValues().isUnknown()); - Assert.assertFalse(capabilities.isDictionaryEncoded()); + Assert.assertFalse(capabilities.isDictionaryEncoded().isTrue()); } @Test diff --git a/processing/src/test/java/org/apache/druid/segment/QueryableIndexColumnCapabilitiesTest.java b/processing/src/test/java/org/apache/druid/segment/QueryableIndexColumnCapabilitiesTest.java index c7783e992197..1ecb17c9a7c4 100644 --- a/processing/src/test/java/org/apache/druid/segment/QueryableIndexColumnCapabilitiesTest.java +++ b/processing/src/test/java/org/apache/druid/segment/QueryableIndexColumnCapabilitiesTest.java @@ -150,20 +150,23 @@ public void testStringColumn() ColumnCapabilities caps = INC_INDEX.getCapabilities("d1"); Assert.assertEquals(ValueType.STRING, caps.getType()); Assert.assertTrue(caps.hasBitmapIndexes()); - Assert.assertTrue(caps.isDictionaryEncoded()); + Assert.assertTrue(caps.isDictionaryEncoded().isMaybeTrue()); + Assert.assertTrue(caps.isDictionaryEncoded().isTrue()); Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue()); Assert.assertTrue(caps.areDictionaryValuesUnique().isTrue()); // multi-value is unknown unless explicitly set to 'true' Assert.assertTrue(caps.hasMultipleValues().isUnknown()); // at index merge or query time we 'complete' the capabilities to take a snapshot of the current state, // coercing any 'UNKNOWN' values to false - Assert.assertFalse(ColumnCapabilitiesImpl.snapshot(caps).hasMultipleValues().isMaybeTrue()); + Assert.assertFalse( + ColumnCapabilitiesImpl.snapshot(caps, ColumnCapabilitiesImpl.ALL_FALSE).hasMultipleValues().isMaybeTrue() + ); Assert.assertFalse(caps.hasSpatialIndexes()); caps = MMAP_INDEX.getColumnHolder("d1").getCapabilities(); Assert.assertEquals(ValueType.STRING, caps.getType()); Assert.assertTrue(caps.hasBitmapIndexes()); - Assert.assertTrue(caps.isDictionaryEncoded()); + Assert.assertTrue(caps.isDictionaryEncoded().isTrue()); Assert.assertTrue(caps.areDictionaryValuesSorted().isTrue()); Assert.assertTrue(caps.areDictionaryValuesUnique().isTrue()); Assert.assertFalse(caps.hasMultipleValues().isMaybeTrue()); @@ -176,7 +179,7 @@ public void testMultiStringColumn() ColumnCapabilities caps = INC_INDEX.getCapabilities("d2"); Assert.assertEquals(ValueType.STRING, caps.getType()); Assert.assertTrue(caps.hasBitmapIndexes()); - Assert.assertTrue(caps.isDictionaryEncoded()); + Assert.assertTrue(caps.isDictionaryEncoded().isTrue()); Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue()); Assert.assertTrue(caps.areDictionaryValuesUnique().isTrue()); Assert.assertTrue(caps.hasMultipleValues().isTrue()); @@ -185,7 +188,7 @@ public void testMultiStringColumn() caps = MMAP_INDEX.getColumnHolder("d2").getCapabilities(); Assert.assertEquals(ValueType.STRING, caps.getType()); Assert.assertTrue(caps.hasBitmapIndexes()); - Assert.assertTrue(caps.isDictionaryEncoded()); + Assert.assertTrue(caps.isDictionaryEncoded().isTrue()); Assert.assertTrue(caps.areDictionaryValuesSorted().isTrue()); Assert.assertTrue(caps.areDictionaryValuesUnique().isTrue()); Assert.assertTrue(caps.hasMultipleValues().isTrue()); @@ -204,7 +207,7 @@ private void assertNonStringColumnCapabilities(ColumnCapabilities caps, ValueTyp { Assert.assertEquals(valueType, caps.getType()); Assert.assertFalse(caps.hasBitmapIndexes()); - Assert.assertFalse(caps.isDictionaryEncoded()); + Assert.assertFalse(caps.isDictionaryEncoded().isTrue()); Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue()); Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue()); Assert.assertFalse(caps.hasMultipleValues().isMaybeTrue()); diff --git a/processing/src/test/java/org/apache/druid/segment/RowBasedColumnSelectorFactoryTest.java b/processing/src/test/java/org/apache/druid/segment/RowBasedColumnSelectorFactoryTest.java index e12dac4743cd..a802b819b570 100644 --- a/processing/src/test/java/org/apache/druid/segment/RowBasedColumnSelectorFactoryTest.java +++ b/processing/src/test/java/org/apache/druid/segment/RowBasedColumnSelectorFactoryTest.java @@ -51,7 +51,7 @@ public void testCapabilitiesTime() RowBasedColumnSelectorFactory.getColumnCapabilities(ROW_SIGNATURE, ColumnHolder.TIME_COLUMN_NAME); Assert.assertEquals(ValueType.LONG, caps.getType()); Assert.assertFalse(caps.hasBitmapIndexes()); - Assert.assertFalse(caps.isDictionaryEncoded()); + Assert.assertFalse(caps.isDictionaryEncoded().isTrue()); Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue()); Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue()); Assert.assertFalse(caps.hasMultipleValues().isMaybeTrue()); @@ -65,7 +65,7 @@ public void testCapabilitiesString() RowBasedColumnSelectorFactory.getColumnCapabilities(ROW_SIGNATURE, STRING_COLUMN_NAME); Assert.assertEquals(ValueType.STRING, caps.getType()); Assert.assertFalse(caps.hasBitmapIndexes()); - Assert.assertFalse(caps.isDictionaryEncoded()); + Assert.assertFalse(caps.isDictionaryEncoded().isTrue()); Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue()); Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue()); Assert.assertTrue(caps.hasMultipleValues().isUnknown()); @@ -79,7 +79,7 @@ public void testCapabilitiesLong() RowBasedColumnSelectorFactory.getColumnCapabilities(ROW_SIGNATURE, LONG_COLUMN_NAME); Assert.assertEquals(ValueType.LONG, caps.getType()); Assert.assertFalse(caps.hasBitmapIndexes()); - Assert.assertFalse(caps.isDictionaryEncoded()); + Assert.assertFalse(caps.isDictionaryEncoded().isTrue()); Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue()); Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue()); Assert.assertFalse(caps.hasMultipleValues().isMaybeTrue()); @@ -93,7 +93,7 @@ public void testCapabilitiesFloat() RowBasedColumnSelectorFactory.getColumnCapabilities(ROW_SIGNATURE, FLOAT_COLUMN_NAME); Assert.assertEquals(ValueType.FLOAT, caps.getType()); Assert.assertFalse(caps.hasBitmapIndexes()); - Assert.assertFalse(caps.isDictionaryEncoded()); + Assert.assertFalse(caps.isDictionaryEncoded().isTrue()); Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue()); Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue()); Assert.assertFalse(caps.hasMultipleValues().isMaybeTrue()); @@ -107,7 +107,7 @@ public void testCapabilitiesDouble() RowBasedColumnSelectorFactory.getColumnCapabilities(ROW_SIGNATURE, DOUBLE_COLUMN_NAME); Assert.assertEquals(ValueType.DOUBLE, caps.getType()); Assert.assertFalse(caps.hasBitmapIndexes()); - Assert.assertFalse(caps.isDictionaryEncoded()); + Assert.assertFalse(caps.isDictionaryEncoded().isTrue()); Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue()); Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue()); Assert.assertFalse(caps.hasMultipleValues().isMaybeTrue()); @@ -121,7 +121,7 @@ public void testCapabilitiesComplex() RowBasedColumnSelectorFactory.getColumnCapabilities(ROW_SIGNATURE, COMPLEX_COLUMN_NAME); Assert.assertEquals(ValueType.COMPLEX, caps.getType()); Assert.assertFalse(caps.hasBitmapIndexes()); - Assert.assertFalse(caps.isDictionaryEncoded()); + Assert.assertFalse(caps.isDictionaryEncoded().isTrue()); Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue()); Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue()); Assert.assertTrue(caps.hasMultipleValues().isUnknown()); diff --git a/processing/src/test/java/org/apache/druid/segment/column/ColumnCapabilitiesImplTest.java b/processing/src/test/java/org/apache/druid/segment/column/ColumnCapabilitiesImplTest.java index e221edd9c73f..ce98506dc7be 100644 --- a/processing/src/test/java/org/apache/druid/segment/column/ColumnCapabilitiesImplTest.java +++ b/processing/src/test/java/org/apache/druid/segment/column/ColumnCapabilitiesImplTest.java @@ -44,8 +44,7 @@ public void testSerde() throws Exception ColumnCapabilities cc = mapper.readValue(json, ColumnCapabilitiesImpl.class); Assert.assertEquals(ValueType.COMPLEX, cc.getType()); - Assert.assertTrue(cc.isDictionaryEncoded()); - Assert.assertFalse(cc.isRunLengthEncoded()); + Assert.assertTrue(cc.isDictionaryEncoded().isTrue()); Assert.assertTrue(cc.hasSpatialIndexes()); Assert.assertTrue(cc.hasMultipleValues().isTrue()); Assert.assertTrue(cc.hasBitmapIndexes()); @@ -69,8 +68,7 @@ public void testDeserialization() throws Exception ColumnCapabilities cc = mapper.readValue(json, ColumnCapabilitiesImpl.class); Assert.assertEquals(ValueType.COMPLEX, cc.getType()); - Assert.assertTrue(cc.isDictionaryEncoded()); - Assert.assertTrue(cc.isRunLengthEncoded()); + Assert.assertTrue(cc.isDictionaryEncoded().isTrue()); Assert.assertTrue(cc.hasSpatialIndexes()); Assert.assertTrue(cc.hasMultipleValues().isTrue()); Assert.assertTrue(cc.hasBitmapIndexes()); diff --git a/processing/src/test/java/org/apache/druid/segment/join/HashJoinSegmentStorageAdapterTest.java b/processing/src/test/java/org/apache/druid/segment/join/HashJoinSegmentStorageAdapterTest.java index 7b80bd2094c5..6406d7afe098 100644 --- a/processing/src/test/java/org/apache/druid/segment/join/HashJoinSegmentStorageAdapterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/join/HashJoinSegmentStorageAdapterTest.java @@ -200,7 +200,7 @@ public void test_getColumnCapabilities_factToCountryFactColumn() Assert.assertEquals(ValueType.STRING, capabilities.getType()); Assert.assertTrue(capabilities.hasBitmapIndexes()); - Assert.assertTrue(capabilities.isDictionaryEncoded()); + Assert.assertTrue(capabilities.isDictionaryEncoded().isTrue()); Assert.assertTrue(capabilities.areDictionaryValuesSorted().isTrue()); Assert.assertTrue(capabilities.areDictionaryValuesUnique().isTrue()); } @@ -216,7 +216,7 @@ public void test_getColumnCapabilities_factToCountryJoinColumn() Assert.assertFalse(capabilities.hasBitmapIndexes()); Assert.assertFalse(capabilities.areDictionaryValuesUnique().isTrue()); Assert.assertFalse(capabilities.areDictionaryValuesSorted().isTrue()); - Assert.assertTrue(capabilities.isDictionaryEncoded()); + Assert.assertTrue(capabilities.isDictionaryEncoded().isTrue()); } @Test diff --git a/processing/src/test/java/org/apache/druid/segment/join/table/IndexedTableJoinableTest.java b/processing/src/test/java/org/apache/druid/segment/join/table/IndexedTableJoinableTest.java index c75232c9be9b..61b56377b5d9 100644 --- a/processing/src/test/java/org/apache/druid/segment/join/table/IndexedTableJoinableTest.java +++ b/processing/src/test/java/org/apache/druid/segment/join/table/IndexedTableJoinableTest.java @@ -137,7 +137,7 @@ public void getColumnCapabilitiesForStringColumn() { final ColumnCapabilities capabilities = target.getColumnCapabilities("str"); Assert.assertEquals(ValueType.STRING, capabilities.getType()); - Assert.assertTrue(capabilities.isDictionaryEncoded()); + Assert.assertTrue(capabilities.isDictionaryEncoded().isTrue()); Assert.assertFalse(capabilities.hasBitmapIndexes()); Assert.assertFalse(capabilities.hasMultipleValues().isMaybeTrue()); Assert.assertFalse(capabilities.hasSpatialIndexes()); @@ -148,7 +148,7 @@ public void getColumnCapabilitiesForLongColumn() { final ColumnCapabilities capabilities = target.getColumnCapabilities("long"); Assert.assertEquals(ValueType.LONG, capabilities.getType()); - Assert.assertFalse(capabilities.isDictionaryEncoded()); + Assert.assertFalse(capabilities.isDictionaryEncoded().isTrue()); Assert.assertFalse(capabilities.hasBitmapIndexes()); Assert.assertFalse(capabilities.hasMultipleValues().isMaybeTrue()); Assert.assertFalse(capabilities.hasSpatialIndexes()); diff --git a/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVirtualColumnTest.java b/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVirtualColumnTest.java index 16e090dc3d53..84bf5fd7e741 100644 --- a/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVirtualColumnTest.java +++ b/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVirtualColumnTest.java @@ -810,7 +810,7 @@ public void testCapabilities() ColumnCapabilities caps = X_PLUS_Y.capabilities("expr"); Assert.assertEquals(ValueType.FLOAT, caps.getType()); Assert.assertFalse(caps.hasBitmapIndexes()); - Assert.assertFalse(caps.isDictionaryEncoded()); + Assert.assertFalse(caps.isDictionaryEncoded().isTrue()); Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue()); Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue()); Assert.assertTrue(caps.hasMultipleValues().isUnknown()); @@ -820,7 +820,7 @@ public void testCapabilities() caps = Z_CONCAT_X.capabilities("expr"); Assert.assertEquals(ValueType.STRING, caps.getType()); Assert.assertFalse(caps.hasBitmapIndexes()); - Assert.assertFalse(caps.isDictionaryEncoded()); + Assert.assertFalse(caps.isDictionaryEncoded().isTrue()); Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue()); Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue()); Assert.assertTrue(caps.hasMultipleValues().isUnknown()); From 5c0c2ad3207e782e101dd942bbdfbab3f3398cf4 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Thu, 6 Aug 2020 19:26:13 -0700 Subject: [PATCH 09/16] missed one --- .../org/apache/druid/segment/StringDimensionIndexer.java | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java b/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java index bdec6f62c6a6..e8d3c96e5007 100644 --- a/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java +++ b/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java @@ -477,9 +477,9 @@ public ColumnCapabilitiesImpl getColumnCapabilities() ColumnCapabilitiesImpl capabilites = new ColumnCapabilitiesImpl().setType(ValueType.STRING) .setHasBitmapIndexes(hasBitmapIndexes) .setHasSpatialIndexes(hasSpatialIndexes) - .setDictionaryEncoded(dictionaryEncodesAllValues()) .setDictionaryValuesUnique(true) .setDictionaryValuesSorted(false); + // strings are only single valued, until they are not... // only explicitly set multiple values if they are certain, otherwise this indexer might process a multi-valued // row in the future in the period between obtaining capabilities and actually processing the rows with a selector @@ -487,6 +487,11 @@ public ColumnCapabilitiesImpl getColumnCapabilities() if (hasMultipleValues) { capabilites.setHasMultipleValues(true); } + // likewise only set dictionaryEncoded if explicitly if true + final boolean allValuesEncoded = dictionaryEncodesAllValues(); + if (allValuesEncoded) { + capabilites.setDictionaryEncoded(true); + } return capabilites; } From dee4293c31a6b8488c455c4a27f8f7fdb4aced98 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Thu, 6 Aug 2020 19:53:46 -0700 Subject: [PATCH 10/16] fix it --- .../column/ColumnCapabilitiesImpl.java | 1 - .../IncrementalIndexStorageAdapter.java | 63 ++++++++++++++++++- 2 files changed, 61 insertions(+), 3 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilitiesImpl.java b/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilitiesImpl.java index 3beb30c6feeb..3c821c7cf1b2 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilitiesImpl.java +++ b/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilitiesImpl.java @@ -32,7 +32,6 @@ */ public class ColumnCapabilitiesImpl implements ColumnCapabilities { - public static final CoercionLogic ALL_TRUE = new AllCoercionLogic(true); public static final CoercionLogic ALL_FALSE = new AllCoercionLogic(false); public static ColumnCapabilitiesImpl copyOf(@Nullable final ColumnCapabilities other) diff --git a/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexStorageAdapter.java b/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexStorageAdapter.java index 862aa7d28bd0..9beb16820abc 100644 --- a/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexStorageAdapter.java +++ b/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexStorageAdapter.java @@ -52,6 +52,62 @@ */ public class IncrementalIndexStorageAdapter implements StorageAdapter { + private static final ColumnCapabilities.CoercionLogic STORAGE_ADAPTER_CAPABILITIES_COERCE_LOGIC = + new ColumnCapabilities.CoercionLogic() + { + @Override + public boolean dictionaryEncoded() + { + return false; + } + + @Override + public boolean dictionaryValuesSorted() + { + return false; + } + + @Override + public boolean dictionaryValuesUnique() + { + return true; + } + + @Override + public boolean multipleValues() + { + return true; + } + }; + + private static final ColumnCapabilities.CoercionLogic SNAPSHOT_STORAGE_ADAPTER_CAPABILITIES_COERCE_LOGIC = + new ColumnCapabilities.CoercionLogic() + { + @Override + public boolean dictionaryEncoded() + { + return true; + } + + @Override + public boolean dictionaryValuesSorted() + { + return true; + } + + @Override + public boolean dictionaryValuesUnique() + { + return true; + } + + @Override + public boolean multipleValues() + { + return false; + } + }; + final IncrementalIndex index; public IncrementalIndexStorageAdapter(IncrementalIndex index) @@ -154,7 +210,7 @@ public ColumnCapabilities getColumnCapabilities(String column) // to the StringDimensionIndexer so the selector built on top of it can produce values from the snapshot state of // multi-valuedness at cursor creation time, instead of the latest state, and getSnapshotColumnCapabilities could // be removed. - return ColumnCapabilitiesImpl.snapshot(index.getCapabilities(column), ColumnCapabilitiesImpl.ALL_TRUE); + return ColumnCapabilitiesImpl.snapshot(index.getCapabilities(column), STORAGE_ADAPTER_CAPABILITIES_COERCE_LOGIC); } /** @@ -165,7 +221,10 @@ public ColumnCapabilities getColumnCapabilities(String column) */ public ColumnCapabilities getSnapshotColumnCapabilities(String column) { - return ColumnCapabilitiesImpl.snapshot(index.getCapabilities(column), ColumnCapabilitiesImpl.ALL_FALSE); + return ColumnCapabilitiesImpl.snapshot( + index.getCapabilities(column), + SNAPSHOT_STORAGE_ADAPTER_CAPABILITIES_COERCE_LOGIC + ); } @Override From 9fffcd58695a516072b9274bd05fc90a9109c5a3 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Thu, 6 Aug 2020 21:55:15 -0700 Subject: [PATCH 11/16] benchmark build fix --- .../benchmark/indexing/StringDimensionIndexerBenchmark.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/indexing/StringDimensionIndexerBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/indexing/StringDimensionIndexerBenchmark.java index 52fc85194e7c..2e4490bd26c3 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/indexing/StringDimensionIndexerBenchmark.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/indexing/StringDimensionIndexerBenchmark.java @@ -59,7 +59,7 @@ public class StringDimensionIndexerBenchmark @Setup public void setup() { - indexer = new StringDimensionIndexer(DimensionSchema.MultiValueHandling.ofDefault(), true); + indexer = new StringDimensionIndexer(DimensionSchema.MultiValueHandling.ofDefault(), true, false); for (int i = 0; i < cardinality; i++) { indexer.processRowValsToUnsortedEncodedKeyComponent("abcd-" + i, true); From 49f7cea16a1a9c03cb7b7f89d114c084043e5fe0 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Fri, 7 Aug 2020 04:48:46 -0700 Subject: [PATCH 12/16] review stuffs --- .../druid/indexer/IndexGeneratorJob.java | 4 +- .../epinephelinae/GroupByQueryEngineV2.java | 2 +- .../druid/segment/DimensionHandlerUtils.java | 2 +- .../druid/segment/DimensionIndexer.java | 4 +- .../druid/segment/DoubleDimensionIndexer.java | 3 +- .../druid/segment/FloatDimensionIndexer.java | 3 +- .../apache/druid/segment/IndexMergerV9.java | 32 ++++++++++++++- .../druid/segment/LongDimensionIndexer.java | 3 +- .../druid/segment/StringDimensionIndexer.java | 14 ++++--- .../segment/column/ColumnCapabilities.java | 39 +++---------------- .../column/ColumnCapabilitiesImpl.java | 2 - .../segment/filter/ExpressionFilter.java | 2 +- .../apache/druid/segment/filter/Filters.java | 2 +- .../segment/incremental/IncrementalIndex.java | 14 +++---- ...yableIndexVectorColumnSelectorFactory.java | 4 +- .../segment/virtual/ExpressionSelectors.java | 2 +- .../QueryableIndexColumnCapabilitiesTest.java | 5 ++- .../druid/segment/realtime/plumber/Sink.java | 6 +-- 18 files changed, 73 insertions(+), 70 deletions(-) diff --git a/indexing-hadoop/src/main/java/org/apache/druid/indexer/IndexGeneratorJob.java b/indexing-hadoop/src/main/java/org/apache/druid/indexer/IndexGeneratorJob.java index 5dfdcac9c521..72bbaee335ad 100644 --- a/indexing-hadoop/src/main/java/org/apache/druid/indexer/IndexGeneratorJob.java +++ b/indexing-hadoop/src/main/java/org/apache/druid/indexer/IndexGeneratorJob.java @@ -47,7 +47,7 @@ import org.apache.druid.segment.BaseProgressIndicator; import org.apache.druid.segment.ProgressIndicator; import org.apache.druid.segment.QueryableIndex; -import org.apache.druid.segment.column.ColumnCapabilitiesImpl; +import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.incremental.IncrementalIndex; import org.apache.druid.segment.incremental.IncrementalIndexSchema; import org.apache.druid.segment.indexing.TuningConfigs; @@ -289,7 +289,7 @@ private static IncrementalIndex makeIncrementalIndex( AggregatorFactory[] aggs, HadoopDruidIndexerConfig config, Iterable oldDimOrder, - Map oldCapabilities + Map oldCapabilities ) { final HadoopTuningConfig tuningConfig = config.getSchema().getTuningConfig(); diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java index d3aaa4fd3ee1..b4cda9b54d93 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java @@ -340,7 +340,7 @@ public static boolean isAllSingleValueDims( // Now check column capabilities. final ColumnCapabilities columnCapabilities = capabilitiesFunction.apply(dimension.getDimension()); - return (columnCapabilities != null && !columnCapabilities.hasMultipleValues().isMaybeTrue()) || + return (columnCapabilities != null && columnCapabilities.hasMultipleValues().isFalse()) || (missingMeansNonExistent && columnCapabilities == null); }); } diff --git a/processing/src/main/java/org/apache/druid/segment/DimensionHandlerUtils.java b/processing/src/main/java/org/apache/druid/segment/DimensionHandlerUtils.java index 6bec81e763ea..c83aa0d7cb26 100644 --- a/processing/src/main/java/org/apache/druid/segment/DimensionHandlerUtils.java +++ b/processing/src/main/java/org/apache/druid/segment/DimensionHandlerUtils.java @@ -79,7 +79,7 @@ private DimensionHandlerUtils() multiValueHandling = multiValueHandling == null ? MultiValueHandling.ofDefault() : multiValueHandling; if (capabilities.getType() == ValueType.STRING) { - if (!capabilities.isDictionaryEncoded().isMaybeTrue()) { + if (capabilities.isDictionaryEncoded().isFalse()) { throw new IAE("String column must have dictionary encoding."); } return new StringDimensionHandler(dimensionName, multiValueHandling, capabilities.hasBitmapIndexes(), capabilities.hasSpatialIndexes()); diff --git a/processing/src/main/java/org/apache/druid/segment/DimensionIndexer.java b/processing/src/main/java/org/apache/druid/segment/DimensionIndexer.java index 9b80fb50c6e6..277deb94e9bb 100644 --- a/processing/src/main/java/org/apache/druid/segment/DimensionIndexer.java +++ b/processing/src/main/java/org/apache/druid/segment/DimensionIndexer.java @@ -22,7 +22,7 @@ import org.apache.druid.collections.bitmap.BitmapFactory; import org.apache.druid.collections.bitmap.MutableBitmap; import org.apache.druid.query.dimension.DimensionSpec; -import org.apache.druid.segment.column.ColumnCapabilitiesImpl; +import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.data.CloseableIndexed; import org.apache.druid.segment.incremental.IncrementalIndex; import org.apache.druid.segment.incremental.IncrementalIndexRowHolder; @@ -237,7 +237,7 @@ ColumnValueSelector makeColumnValueSelector( IncrementalIndex.DimensionDesc desc ); - ColumnCapabilitiesImpl getColumnCapabilities(); + ColumnCapabilities getColumnCapabilities(); /** * Compares the row values for this DimensionIndexer's dimension from a Row key. * diff --git a/processing/src/main/java/org/apache/druid/segment/DoubleDimensionIndexer.java b/processing/src/main/java/org/apache/druid/segment/DoubleDimensionIndexer.java index 6c9beb029df0..677ed41ba239 100644 --- a/processing/src/main/java/org/apache/druid/segment/DoubleDimensionIndexer.java +++ b/processing/src/main/java/org/apache/druid/segment/DoubleDimensionIndexer.java @@ -25,6 +25,7 @@ import org.apache.druid.java.util.common.guava.Comparators; import org.apache.druid.query.dimension.DimensionSpec; import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; +import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ColumnCapabilitiesImpl; import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.data.CloseableIndexed; @@ -95,7 +96,7 @@ public int getCardinality() } @Override - public ColumnCapabilitiesImpl getColumnCapabilities() + public ColumnCapabilities getColumnCapabilities() { return capabilities; } diff --git a/processing/src/main/java/org/apache/druid/segment/FloatDimensionIndexer.java b/processing/src/main/java/org/apache/druid/segment/FloatDimensionIndexer.java index 3a9823021a86..2db5b59a668d 100644 --- a/processing/src/main/java/org/apache/druid/segment/FloatDimensionIndexer.java +++ b/processing/src/main/java/org/apache/druid/segment/FloatDimensionIndexer.java @@ -25,6 +25,7 @@ import org.apache.druid.java.util.common.guava.Comparators; import org.apache.druid.query.dimension.DimensionSpec; import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; +import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ColumnCapabilitiesImpl; import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.data.CloseableIndexed; @@ -96,7 +97,7 @@ public int getCardinality() } @Override - public ColumnCapabilitiesImpl getColumnCapabilities() + public ColumnCapabilities getColumnCapabilities() { return capabilities; } diff --git a/processing/src/main/java/org/apache/druid/segment/IndexMergerV9.java b/processing/src/main/java/org/apache/druid/segment/IndexMergerV9.java index 2e687d5c85c6..55db9faa0a00 100644 --- a/processing/src/main/java/org/apache/druid/segment/IndexMergerV9.java +++ b/processing/src/main/java/org/apache/druid/segment/IndexMergerV9.java @@ -89,7 +89,7 @@ public class IndexMergerV9 implements IndexMerger private static final Logger log = new Logger(IndexMergerV9.class); // merge logic for the state capabilities will be in after incremental index is persisted - private static final ColumnCapabilities.CoercionLogic DIMENSION_CAPABILITY_MERGE_LOGIC = + public static final ColumnCapabilities.CoercionLogic DIMENSION_CAPABILITY_MERGE_LOGIC = new ColumnCapabilities.CoercionLogic() { @Override @@ -117,6 +117,34 @@ public boolean multipleValues() } }; + public static final ColumnCapabilities.CoercionLogic METRIC_CAPABILITY_MERGE_LOGIC = + new ColumnCapabilities.CoercionLogic() + { + @Override + public boolean dictionaryEncoded() + { + return false; + } + + @Override + public boolean dictionaryValuesSorted() + { + return false; + } + + @Override + public boolean dictionaryValuesUnique() + { + return false; + } + + @Override + public boolean multipleValues() + { + return false; + } + }; + private final ObjectMapper mapper; private final IndexIO indexIO; private final SegmentWriteOutMediumFactory defaultSegmentWriteOutMediumFactory; @@ -759,7 +787,7 @@ private void mergeCapabilities( for (String metric : adapter.getMetricNames()) { ColumnCapabilities capabilities = adapter.getCapabilities(metric); capabilitiesMap.compute(metric, (m, existingCapabilities) -> - ColumnCapabilitiesImpl.merge(capabilities, existingCapabilities, ColumnCapabilitiesImpl.ALL_FALSE) + ColumnCapabilitiesImpl.merge(capabilities, existingCapabilities, METRIC_CAPABILITY_MERGE_LOGIC) ); metricsValueTypes.put(metric, capabilities.getType()); metricTypeNames.put(metric, adapter.getMetricType(metric)); diff --git a/processing/src/main/java/org/apache/druid/segment/LongDimensionIndexer.java b/processing/src/main/java/org/apache/druid/segment/LongDimensionIndexer.java index 6e8c4a95d4de..266405504b4f 100644 --- a/processing/src/main/java/org/apache/druid/segment/LongDimensionIndexer.java +++ b/processing/src/main/java/org/apache/druid/segment/LongDimensionIndexer.java @@ -25,6 +25,7 @@ import org.apache.druid.java.util.common.guava.Comparators; import org.apache.druid.query.dimension.DimensionSpec; import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; +import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ColumnCapabilitiesImpl; import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.data.CloseableIndexed; @@ -97,7 +98,7 @@ public int getCardinality() } @Override - public ColumnCapabilitiesImpl getColumnCapabilities() + public ColumnCapabilities getColumnCapabilities() { return capabilities; } diff --git a/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java b/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java index e8d3c96e5007..49609940c5ab 100644 --- a/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java +++ b/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java @@ -40,6 +40,7 @@ import org.apache.druid.query.extraction.ExtractionFn; import org.apache.druid.query.filter.ValueMatcher; import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; +import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ColumnCapabilitiesImpl; import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.data.ArrayBasedIndexedInts; @@ -407,7 +408,7 @@ public int getCardinality() /** * returns true if all values are encoded in {@link #dimLookup} */ - public boolean dictionaryEncodesAllValues() + private boolean dictionaryEncodesAllValues() { // name lookup is possible in advance if we explicitly process a value for every row, or if we've encountered an // actual null value and it is present in our dictionary. otherwise the dictionary will be missing ids for implicit @@ -472,7 +473,7 @@ public int getUnsortedEncodedKeyComponentHashCode(int[] key) } @Override - public ColumnCapabilitiesImpl getColumnCapabilities() + public ColumnCapabilities getColumnCapabilities() { ColumnCapabilitiesImpl capabilites = new ColumnCapabilitiesImpl().setType(ValueType.STRING) .setHasBitmapIndexes(hasBitmapIndexes) @@ -481,13 +482,14 @@ public ColumnCapabilitiesImpl getColumnCapabilities() .setDictionaryValuesSorted(false); // strings are only single valued, until they are not... - // only explicitly set multiple values if they are certain, otherwise this indexer might process a multi-valued - // row in the future in the period between obtaining capabilities and actually processing the rows with a selector - // leaving as unknown allows the caller to decide + // We only explicitly set multiple values if we are certain that there are multiple values. + // Otherwise, this indexer might process a multi-valued row in the period between obtaining the + // capabilities, and actually processing the rows with a selector. + // Leaving as unknown allows the caller to decide how to handle this if (hasMultipleValues) { capabilites.setHasMultipleValues(true); } - // likewise only set dictionaryEncoded if explicitly if true + // likewise only set dictionaryEncoded if explicitly if true for the same reason final boolean allValuesEncoded = dictionaryEncodesAllValues(); if (allValuesEncoded) { capabilites.setDictionaryEncoded(true); diff --git a/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilities.java b/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilities.java index 329324d7b90b..037510208ab5 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilities.java +++ b/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilities.java @@ -54,6 +54,11 @@ public boolean isMaybeTrue() return isTrue() || isUnknown(); } + public boolean isFalse() + { + return this == FALSE; + } + public boolean isUnknown() { return this == UNKNOWN; @@ -112,38 +117,4 @@ interface CoercionLogic boolean dictionaryValuesUnique(); boolean multipleValues(); } - - class AllCoercionLogic implements CoercionLogic - { - private final boolean coerceTo; - - public AllCoercionLogic(boolean coerceTo) - { - this.coerceTo = coerceTo; - } - - @Override - public boolean dictionaryEncoded() - { - return coerceTo; - } - - @Override - public boolean dictionaryValuesSorted() - { - return coerceTo; - } - - @Override - public boolean dictionaryValuesUnique() - { - return coerceTo; - } - - @Override - public boolean multipleValues() - { - return coerceTo; - } - } } diff --git a/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilitiesImpl.java b/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilitiesImpl.java index 3c821c7cf1b2..9473efde0e70 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilitiesImpl.java +++ b/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilitiesImpl.java @@ -32,8 +32,6 @@ */ public class ColumnCapabilitiesImpl implements ColumnCapabilities { - public static final CoercionLogic ALL_FALSE = new AllCoercionLogic(false); - public static ColumnCapabilitiesImpl copyOf(@Nullable final ColumnCapabilities other) { final ColumnCapabilitiesImpl capabilities = new ColumnCapabilitiesImpl(); diff --git a/processing/src/main/java/org/apache/druid/segment/filter/ExpressionFilter.java b/processing/src/main/java/org/apache/druid/segment/filter/ExpressionFilter.java index 0baa59490712..f11e7a776d4a 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/ExpressionFilter.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/ExpressionFilter.java @@ -115,7 +115,7 @@ public boolean supportsBitmapIndex(final BitmapIndexSelector selector) // multiple values. The lack of multiple values is important because expression filters treat multi-value // arrays as nulls, which doesn't permit index based filtering. final String column = Iterables.getOnlyElement(requiredBindings.get()); - return selector.getBitmapIndex(column) != null && !selector.hasMultipleValues(column).isMaybeTrue(); + return selector.getBitmapIndex(column) != null && selector.hasMultipleValues(column).isFalse(); } else { // Multi-column expression. return false; diff --git a/processing/src/main/java/org/apache/druid/segment/filter/Filters.java b/processing/src/main/java/org/apache/druid/segment/filter/Filters.java index 399adb9eaaf3..116f59b47648 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/Filters.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/Filters.java @@ -414,7 +414,7 @@ static boolean supportsSelectivityEstimation( if (filter.supportsBitmapIndex(indexSelector)) { final ColumnHolder columnHolder = columnSelector.getColumnHolder(dimension); if (columnHolder != null) { - return !columnHolder.getCapabilities().hasMultipleValues().isMaybeTrue(); + return columnHolder.getCapabilities().hasMultipleValues().isFalse(); } } return false; diff --git a/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndex.java b/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndex.java index 053883805a90..d2f4fd5326c6 100644 --- a/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndex.java +++ b/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndex.java @@ -250,7 +250,7 @@ public ColumnCapabilities getColumnCapabilities(String columnName) private final Map dimensionDescs; private final List dimensionDescsList; // dimension capabilities are provided by the indexers - private final Map timeAndMetricsColumnCapabilities; + private final Map timeAndMetricsColumnCapabilities; private final AtomicInteger numEntries = new AtomicInteger(); private final AtomicLong bytesInMemory = new AtomicLong(); @@ -588,10 +588,10 @@ public InputRow formatRow(InputRow row) return row; } - public Map getColumnCapabilities() + public Map getColumnCapabilities() { - ImmutableMap.Builder builder = - ImmutableMap.builder().putAll(timeAndMetricsColumnCapabilities); + ImmutableMap.Builder builder = + ImmutableMap.builder().putAll(timeAndMetricsColumnCapabilities); dimensionDescs.forEach((dimension, desc) -> builder.put(dimension, desc.getCapabilities())); return builder.build(); @@ -1099,7 +1099,7 @@ public String getName() return name; } - public ColumnCapabilitiesImpl getCapabilities() + public ColumnCapabilities getCapabilities() { return indexer.getColumnCapabilities(); } @@ -1120,7 +1120,7 @@ public static final class MetricDesc private final int index; private final String name; private final String type; - private final ColumnCapabilitiesImpl capabilities; + private final ColumnCapabilities capabilities; public MetricDesc(int index, AggregatorFactory factory) { @@ -1159,7 +1159,7 @@ public String getType() return type; } - public ColumnCapabilitiesImpl getCapabilities() + public ColumnCapabilities getCapabilities() { return capabilities; } diff --git a/processing/src/main/java/org/apache/druid/segment/vector/QueryableIndexVectorColumnSelectorFactory.java b/processing/src/main/java/org/apache/druid/segment/vector/QueryableIndexVectorColumnSelectorFactory.java index b417920f26bb..48c56c9ef6df 100644 --- a/processing/src/main/java/org/apache/druid/segment/vector/QueryableIndexVectorColumnSelectorFactory.java +++ b/processing/src/main/java/org/apache/druid/segment/vector/QueryableIndexVectorColumnSelectorFactory.java @@ -83,9 +83,9 @@ public MultiValueDimensionVectorSelector makeMultiValueDimensionSelector(final D spec -> { final ColumnHolder holder = index.getColumnHolder(spec.getDimension()); if (holder == null - || !holder.getCapabilities().isDictionaryEncoded().isTrue() + || holder.getCapabilities().isDictionaryEncoded().isFalse() || holder.getCapabilities().getType() != ValueType.STRING - || !holder.getCapabilities().hasMultipleValues().isMaybeTrue()) { + || holder.getCapabilities().hasMultipleValues().isFalse()) { throw new ISE( "Column[%s] is not a multi-value string column, do not ask for a multi-value selector", spec.getDimension() diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java index e0ad6d12a516..17c6a3f39898 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java @@ -154,7 +154,7 @@ public static ColumnValueSelector makeExprEvalSelector( } else if (capabilities != null && capabilities.getType() == ValueType.STRING && capabilities.isDictionaryEncoded().isTrue() - && !capabilities.hasMultipleValues().isMaybeTrue() + && capabilities.hasMultipleValues().isFalse() && exprDetails.getArrayBindings().isEmpty()) { // Optimization for expressions that hit one scalar string column and nothing else. return new SingleStringInputCachingExpressionColumnValueSelector( diff --git a/processing/src/test/java/org/apache/druid/segment/QueryableIndexColumnCapabilitiesTest.java b/processing/src/test/java/org/apache/druid/segment/QueryableIndexColumnCapabilitiesTest.java index 1ecb17c9a7c4..57689a46cb0a 100644 --- a/processing/src/test/java/org/apache/druid/segment/QueryableIndexColumnCapabilitiesTest.java +++ b/processing/src/test/java/org/apache/druid/segment/QueryableIndexColumnCapabilitiesTest.java @@ -159,7 +159,9 @@ public void testStringColumn() // at index merge or query time we 'complete' the capabilities to take a snapshot of the current state, // coercing any 'UNKNOWN' values to false Assert.assertFalse( - ColumnCapabilitiesImpl.snapshot(caps, ColumnCapabilitiesImpl.ALL_FALSE).hasMultipleValues().isMaybeTrue() + ColumnCapabilitiesImpl.snapshot(caps, IndexMergerV9.DIMENSION_CAPABILITY_MERGE_LOGIC) + .hasMultipleValues() + .isMaybeTrue() ); Assert.assertFalse(caps.hasSpatialIndexes()); @@ -202,7 +204,6 @@ public void testComplexColumn() assertNonStringColumnCapabilities(MMAP_INDEX.getColumnHolder("m4").getCapabilities(), ValueType.COMPLEX); } - private void assertNonStringColumnCapabilities(ColumnCapabilities caps, ValueType valueType) { Assert.assertEquals(valueType, caps.getType()); diff --git a/server/src/main/java/org/apache/druid/segment/realtime/plumber/Sink.java b/server/src/main/java/org/apache/druid/segment/realtime/plumber/Sink.java index 7002ac22fa66..2324a2aca6a9 100644 --- a/server/src/main/java/org/apache/druid/segment/realtime/plumber/Sink.java +++ b/server/src/main/java/org/apache/druid/segment/realtime/plumber/Sink.java @@ -29,7 +29,7 @@ import org.apache.druid.query.aggregation.AggregatorFactory; import org.apache.druid.segment.QueryableIndex; import org.apache.druid.segment.ReferenceCountingSegment; -import org.apache.druid.segment.column.ColumnCapabilitiesImpl; +import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.incremental.IncrementalIndex; import org.apache.druid.segment.incremental.IncrementalIndexAddResult; import org.apache.druid.segment.incremental.IncrementalIndexSchema; @@ -377,7 +377,7 @@ private FireHydrant makeNewCurrIndex(long minTimestamp, DataSchema schema) FireHydrant lastHydrant = hydrants.get(numHydrants - 1); newCount = lastHydrant.getCount() + 1; if (!indexSchema.getDimensionsSpec().hasCustomDimensions()) { - Map oldCapabilities; + Map oldCapabilities; if (lastHydrant.hasSwapped()) { oldCapabilities = new HashMap<>(); ReferenceCountingSegment segment = lastHydrant.getIncrementedSegment(); @@ -385,7 +385,7 @@ private FireHydrant makeNewCurrIndex(long minTimestamp, DataSchema schema) QueryableIndex oldIndex = segment.asQueryableIndex(); for (String dim : oldIndex.getAvailableDimensions()) { dimOrder.add(dim); - oldCapabilities.put(dim, (ColumnCapabilitiesImpl) oldIndex.getColumnHolder(dim).getCapabilities()); + oldCapabilities.put(dim, oldIndex.getColumnHolder(dim).getCapabilities()); } } finally { From 626fbb43b12ce1656f37483fbdb8c73cbb9fcb09 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Fri, 7 Aug 2020 19:22:22 -0700 Subject: [PATCH 13/16] javadoc and comments --- .../druid/segment/StringDimensionIndexer.java | 18 +++-- .../segment/column/ColumnCapabilities.java | 67 +++++++++++++++++++ .../virtual/ExpressionSelectorsTest.java | 6 ++ 3 files changed, 85 insertions(+), 6 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java b/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java index 49609940c5ab..bca0a5c72a9c 100644 --- a/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java +++ b/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java @@ -481,15 +481,21 @@ public ColumnCapabilities getColumnCapabilities() .setDictionaryValuesUnique(true) .setDictionaryValuesSorted(false); - // strings are only single valued, until they are not... - // We only explicitly set multiple values if we are certain that there are multiple values. - // Otherwise, this indexer might process a multi-valued row in the period between obtaining the - // capabilities, and actually processing the rows with a selector. - // Leaving as unknown allows the caller to decide how to handle this + // Strings are opportunistically multi-valued, but the capabilities are initialized as 'unknown', since a + // multi-valued row might be processed at any point during ingestion. + // We only explicitly set multiple values if we are certain that there are multiple values, otherwise, a race + // condition might occur where this indexer might process a multi-valued row in the period between obtaining the + // capabilities, and actually processing the rows with a selector. Leaving as unknown allows the caller to decide + // how to handle this. if (hasMultipleValues) { capabilites.setHasMultipleValues(true); } - // likewise only set dictionaryEncoded if explicitly if true for the same reason + // Likewise, only set dictionaryEncoded if explicitly if true for a similar reason as multi-valued handling. The + // dictionary is populated as rows are processed, but there might be implicit default values not accounted for in + // the dictionary yet. We can be certain that the dictionary has an entry for every value if either of + // a) we have already processed an explitic default (null) valued row for this column + // b) the processing was not 'sparse', meaning that this indexer has processed an explict value for every row + // is true. final boolean allValuesEncoded = dictionaryEncodesAllValues(); if (allValuesEncoded) { capabilites.setDictionaryEncoded(true); diff --git a/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilities.java b/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilities.java index 037510208ab5..ee252aac93cc 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilities.java +++ b/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilities.java @@ -26,16 +26,59 @@ import javax.annotation.Nullable; /** + * This interface is used to expose information about columns that is interesting to know for all matters dealing with + * reading from columns, including query planning and optimization, creating readers to merge segments at ingestion + * time, and probably nearly anything else you can imagine. */ public interface ColumnCapabilities { + /** + * Column type, good to know so caller can know what to expect and which optimal selector to use + */ ValueType getType(); + + /** + * Is the column dictionary encoded? If so, a DimensionDictionarySelector may be used instead of using a value + * selector, allowing algorithms to operate on primitive integer dictionary ids rather than the looked up dictionary + * values + */ Capable isDictionaryEncoded(); + + /** + * If the column is dictionary encoded, are those values sorted? Useful to know for optimizations that can defer + * looking up values and allowing sorting with the dictionary ids directly + */ Capable areDictionaryValuesSorted(); + + /** + * If the column is dictionary encoded, is there a 1:1 mapping of dictionary ids to values? If this is true, it + * unlocks optimizations such as allowing for things like grouping directly on dictionary ids and deferred value + * lookup + */ Capable areDictionaryValuesUnique(); + + /** + * String columns are sneaky, and might have multiple values, this is to allow callers to know and appropriately + * prepare themselves + */ Capable hasMultipleValues(); + + /** + * Does the column have an inverted index bitmap for each value? If so, these may be employed to 'pre-filter' the + * column by examining if the values match the filter and intersecting the bitmaps, to avoid having to scan and + * evaluate if every row matches the filter + */ boolean hasBitmapIndexes(); + + /** + * Does the column have spatial indexes available to allow use with spatial filtering? + */ boolean hasSpatialIndexes(); + + /** + * All Druid primitive columns support filtering, maybe with or without indexes, but by default complex columns + * do not support direct filtering, unless provided by through a custom implementation. + */ boolean isFilterable(); enum Capable @@ -110,11 +153,35 @@ public String toString() } } + /** + * This interface define the shape of a mechnism to allow for bespoke coercion of {@link Capable#UNKNOWN} into + * {@link Capable#TRUE} or {@link Capable#FALSE} for each {@link Capable} of a {@link ColumnCapabilities}, as is + * appropriate for the situation of the caller. + */ interface CoercionLogic { + /** + * If {@link ColumnCapabilities#isDictionaryEncoded()} is {@link Capable#UNKNOWN}, define if it should be treated + * as true or false. + */ boolean dictionaryEncoded(); + + /** + * If {@link ColumnCapabilities#areDictionaryValuesSorted()} ()} is {@link Capable#UNKNOWN}, define if it should be treated + * as true or false. + */ boolean dictionaryValuesSorted(); + + /** + * If {@link ColumnCapabilities#areDictionaryValuesUnique()} ()} is {@link Capable#UNKNOWN}, define if it should be treated + * as true or false. + */ boolean dictionaryValuesUnique(); + + /** + * If {@link ColumnCapabilities#hasMultipleValues()} is {@link Capable#UNKNOWN}, define if it should be treated + * as true or false. + */ boolean multipleValues(); } } diff --git a/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionSelectorsTest.java b/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionSelectorsTest.java index bff21d83c8ea..52143b0ffda9 100644 --- a/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionSelectorsTest.java +++ b/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionSelectorsTest.java @@ -255,6 +255,12 @@ public void testCoerceExprToValue() @Test public void testIncrementIndexStringSelector() throws IndexSizeExceededException { + // This test covers a regression caused by ColumnCapabilites.isDictionaryEncoded not matching the value of + // DimensionSelector.nameLookupPossibleInAdvance in the indexers of an IncrementalIndex, which resulted in an + // exception trying to make an optimized string expression selector that was not appropriate to use for the + // underlying dimension selector. + // This occurred during schemaless ingestion with spare dimension values and no explicit null rows, so the + // conditions are replicated by this test. IncrementalIndexSchema schema = new IncrementalIndexSchema( 0, new TimestampSpec("time", "millis", DateTimes.nowUtc()), From 0b46811abe0f7917274a4b8af645ddea10a7ef7f Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Mon, 10 Aug 2020 17:18:20 -0700 Subject: [PATCH 14/16] add comment --- .../apache/druid/segment/virtual/ExpressionSelectorsTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionSelectorsTest.java b/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionSelectorsTest.java index 52143b0ffda9..08b18c44ff79 100644 --- a/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionSelectorsTest.java +++ b/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionSelectorsTest.java @@ -260,7 +260,7 @@ public void testIncrementIndexStringSelector() throws IndexSizeExceededException // exception trying to make an optimized string expression selector that was not appropriate to use for the // underlying dimension selector. // This occurred during schemaless ingestion with spare dimension values and no explicit null rows, so the - // conditions are replicated by this test. + // conditions are replicated by this test. See https://github.com/apache/druid/pull/10248 for details IncrementalIndexSchema schema = new IncrementalIndexSchema( 0, new TimestampSpec("time", "millis", DateTimes.nowUtc()), From 09abd38e2d2e94e5f572fa213cea901c519665ac Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Mon, 10 Aug 2020 19:58:45 -0700 Subject: [PATCH 15/16] more strict check --- .../org/apache/druid/segment/DimensionHandlerUtils.java | 2 +- .../main/java/org/apache/druid/segment/IndexMergerV9.java | 5 ++++- .../apache/druid/segment/incremental/IncrementalIndex.java | 7 +++++-- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/DimensionHandlerUtils.java b/processing/src/main/java/org/apache/druid/segment/DimensionHandlerUtils.java index c83aa0d7cb26..f5b7e9feb346 100644 --- a/processing/src/main/java/org/apache/druid/segment/DimensionHandlerUtils.java +++ b/processing/src/main/java/org/apache/druid/segment/DimensionHandlerUtils.java @@ -79,7 +79,7 @@ private DimensionHandlerUtils() multiValueHandling = multiValueHandling == null ? MultiValueHandling.ofDefault() : multiValueHandling; if (capabilities.getType() == ValueType.STRING) { - if (capabilities.isDictionaryEncoded().isFalse()) { + if (!capabilities.isDictionaryEncoded().isTrue()) { throw new IAE("String column must have dictionary encoding."); } return new StringDimensionHandler(dimensionName, multiValueHandling, capabilities.hasBitmapIndexes(), capabilities.hasSpatialIndexes()); diff --git a/processing/src/main/java/org/apache/druid/segment/IndexMergerV9.java b/processing/src/main/java/org/apache/druid/segment/IndexMergerV9.java index 55db9faa0a00..19ce74574be7 100644 --- a/processing/src/main/java/org/apache/druid/segment/IndexMergerV9.java +++ b/processing/src/main/java/org/apache/druid/segment/IndexMergerV9.java @@ -1068,7 +1068,10 @@ private Map makeDimensionHandlers( { Map handlers = new LinkedHashMap<>(); for (int i = 0; i < mergedDimensions.size(); i++) { - ColumnCapabilities capabilities = dimCapabilities.get(i); + ColumnCapabilities capabilities = ColumnCapabilitiesImpl.snapshot( + dimCapabilities.get(i), + DIMENSION_CAPABILITY_MERGE_LOGIC + ); String dimName = mergedDimensions.get(i); DimensionHandler handler = DimensionHandlerUtils.getHandlerFromCapabilities(dimName, capabilities, null); handlers.put(dimName, handler); diff --git a/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndex.java b/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndex.java index d2f4fd5326c6..fec4587909b0 100644 --- a/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndex.java +++ b/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndex.java @@ -59,6 +59,7 @@ import org.apache.druid.segment.DimensionSelector; import org.apache.druid.segment.DoubleColumnSelector; import org.apache.druid.segment.FloatColumnSelector; +import org.apache.druid.segment.IndexMergerV9; import org.apache.druid.segment.LongColumnSelector; import org.apache.druid.segment.Metadata; import org.apache.druid.segment.NilColumnValueSelector; @@ -953,8 +954,10 @@ public void loadDimensionIterable( } for (String dim : oldDimensionOrder) { if (dimensionDescs.get(dim) == null) { - ColumnCapabilitiesImpl capabilities = oldColumnCapabilities.get(dim); - capabilities.setDictionaryEncoded(capabilities.getType().equals(ValueType.STRING)); + ColumnCapabilitiesImpl capabilities = ColumnCapabilitiesImpl.snapshot( + oldColumnCapabilities.get(dim), + IndexMergerV9.DIMENSION_CAPABILITY_MERGE_LOGIC + ); DimensionHandler handler = DimensionHandlerUtils.getHandlerFromCapabilities(dim, capabilities, null); addNewDimension(dim, handler); } From 9ecefc2309863f313a6c91531455d89e9eac797a Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Tue, 11 Aug 2020 04:21:32 -0700 Subject: [PATCH 16/16] fix missed usaged of impl instead of interface --- .../org/apache/druid/segment/incremental/IncrementalIndex.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndex.java b/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndex.java index fec4587909b0..bb4377f4e396 100644 --- a/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndex.java +++ b/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndex.java @@ -945,7 +945,7 @@ private ColumnCapabilitiesImpl makeDefaultCapabilitiesFromValueType(ValueType ty */ public void loadDimensionIterable( Iterable oldDimensionOrder, - Map oldColumnCapabilities + Map oldColumnCapabilities ) { synchronized (dimensionDescs) {