diff --git a/processing/src/main/java/org/apache/druid/common/config/NullHandling.java b/processing/src/main/java/org/apache/druid/common/config/NullHandling.java index f7d31469c318..8e6f242e9a34 100644 --- a/processing/src/main/java/org/apache/druid/common/config/NullHandling.java +++ b/processing/src/main/java/org/apache/druid/common/config/NullHandling.java @@ -22,8 +22,10 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Strings; import com.google.inject.Inject; +import org.apache.druid.segment.data.Indexed; import javax.annotation.Nullable; +import java.nio.ByteBuffer; /** * Helper class for NullHandling. This class is used to switch between SQL compatible Null Handling behavior @@ -163,4 +165,45 @@ public static boolean isNullOrEquivalent(@Nullable String value) { return replaceWithDefault() ? Strings.isNullOrEmpty(value) : value == null; } + + public static boolean isNullOrEquivalent(@Nullable ByteBuffer buffer) + { + return buffer == null || (replaceWithDefault() && buffer.remaining() == 0); + } + + /** + * Given a UTF-8 dictionary, returns whether the first two entries must be coalesced into a single null entry. + * This happens if we are in default-value mode and the first two entries are null and empty string. + * + * This and {@link #mustReplaceFirstValueWithNullInDictionary(Indexed)} are never both true. + * + * Provided to enable compatibility for segments written under {@link #sqlCompatible()} mode but + * read under {@link #replaceWithDefault()} mode. + */ + public static boolean mustCombineNullAndEmptyInDictionary(final Indexed dictionaryUtf8) + { + return NullHandling.replaceWithDefault() + && dictionaryUtf8.size() >= 2 + && isNullOrEquivalent(dictionaryUtf8.get(0)) + && isNullOrEquivalent(dictionaryUtf8.get(1)); + } + + /** + * Given a UTF-8 dictionary, returns whether the first entry must be replaced with null. This happens if we + * are in default-value mode and the first entry is an empty string. (Default-value mode expects it to be null.) + * + * This and {@link #mustCombineNullAndEmptyInDictionary(Indexed)} are never both true. + * + * Provided to enable compatibility for segments written under {@link #sqlCompatible()} mode but + * read under {@link #replaceWithDefault()} mode. + */ + public static boolean mustReplaceFirstValueWithNullInDictionary(final Indexed dictionaryUtf8) + { + if (NullHandling.replaceWithDefault() && dictionaryUtf8.size() >= 1) { + final ByteBuffer firstValue = dictionaryUtf8.get(0); + return firstValue != null && firstValue.remaining() == 0; + } + + return false; + } } diff --git a/processing/src/main/java/org/apache/druid/segment/column/StringDictionaryEncodedColumn.java b/processing/src/main/java/org/apache/druid/segment/column/StringDictionaryEncodedColumn.java index 66bbd71c5dd8..69b3af4c1404 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/StringDictionaryEncodedColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/column/StringDictionaryEncodedColumn.java @@ -27,7 +27,6 @@ import org.apache.druid.segment.AbstractDimensionSelector; import org.apache.druid.segment.DimensionSelectorUtils; import org.apache.druid.segment.IdLookup; -import org.apache.druid.segment.data.CachingIndexed; import org.apache.druid.segment.data.ColumnarInts; import org.apache.druid.segment.data.ColumnarMultiInts; import org.apache.druid.segment.data.Indexed; @@ -45,6 +44,7 @@ import org.apache.druid.utils.CloseableUtils; import javax.annotation.Nullable; +import java.io.Closeable; import java.io.IOException; import java.nio.ByteBuffer; import java.util.ArrayList; @@ -60,19 +60,19 @@ public class StringDictionaryEncodedColumn implements DictionaryEncodedColumn cachedDictionary; + private final Indexed dictionary; private final Indexed dictionaryUtf8; public StringDictionaryEncodedColumn( @Nullable ColumnarInts singleValueColumn, @Nullable ColumnarMultiInts multiValueColumn, - CachingIndexed dictionary, + Indexed dictionary, Indexed dictionaryUtf8 ) { this.column = singleValueColumn; this.multiValueColumn = multiValueColumn; - this.cachedDictionary = dictionary; + this.dictionary = dictionary; this.dictionaryUtf8 = dictionaryUtf8; } @@ -104,7 +104,7 @@ public IndexedInts getMultiValueRow(int rowNum) @Nullable public String lookupName(int id) { - return cachedDictionary.get(id); + return dictionary.get(id); } @@ -130,13 +130,13 @@ public ByteBuffer lookupNameUtf8(int id) @Override public int lookupId(String name) { - return cachedDictionary.indexOf(name); + return dictionary.indexOf(name); } @Override public int getCardinality() { - return cachedDictionary.size(); + return dictionary.size(); } @Override @@ -495,7 +495,11 @@ public String lookupName(int id) @Override public void close() throws IOException { - CloseableUtils.closeAll(cachedDictionary, column, multiValueColumn); + CloseableUtils.closeAll( + dictionary instanceof Closeable ? (Closeable) dictionary : null /* Dictionary may be CachingIndexed */, + column, + multiValueColumn + ); } /** diff --git a/processing/src/main/java/org/apache/druid/segment/column/StringFrontCodedDictionaryEncodedColumn.java b/processing/src/main/java/org/apache/druid/segment/column/StringUtf8DictionaryEncodedColumn.java similarity index 90% rename from processing/src/main/java/org/apache/druid/segment/column/StringFrontCodedDictionaryEncodedColumn.java rename to processing/src/main/java/org/apache/druid/segment/column/StringUtf8DictionaryEncodedColumn.java index 1d8eea5e1b3e..baf7a4be9c75 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/StringFrontCodedDictionaryEncodedColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/column/StringUtf8DictionaryEncodedColumn.java @@ -30,7 +30,6 @@ import org.apache.druid.segment.IdLookup; import org.apache.druid.segment.data.ColumnarInts; import org.apache.druid.segment.data.ColumnarMultiInts; -import org.apache.druid.segment.data.FrontCodedIndexed; import org.apache.druid.segment.data.Indexed; import org.apache.druid.segment.data.IndexedInts; import org.apache.druid.segment.data.ReadableOffset; @@ -51,29 +50,27 @@ import java.util.BitSet; /** - * {@link DictionaryEncodedColumn} for a column which uses a {@link FrontCodedIndexed} to store its value - * dictionary, which 'delta encodes' strings (instead of {@link org.apache.druid.segment.data.GenericIndexed} like - * {@link StringDictionaryEncodedColumn}). + * {@link DictionaryEncodedColumn} for a column which has only a UTF-8 dictionary, no String dictionary. *

- * This class is otherwise nearly identical to {@link StringDictionaryEncodedColumn} other than the dictionary - * difference. + * This class is otherwise nearly identical to {@link StringDictionaryEncodedColumn} other than lacking a + * String dictionary. *

* Implements {@link NestedCommonFormatColumn} so it can be used as a reader for single value string specializations * of {@link org.apache.druid.segment.AutoTypeColumnIndexer}. */ -public class StringFrontCodedDictionaryEncodedColumn implements DictionaryEncodedColumn, +public class StringUtf8DictionaryEncodedColumn implements DictionaryEncodedColumn, NestedCommonFormatColumn { @Nullable private final ColumnarInts column; @Nullable private final ColumnarMultiInts multiValueColumn; - private final FrontCodedIndexed utf8Dictionary; + private final Indexed utf8Dictionary; - public StringFrontCodedDictionaryEncodedColumn( + public StringUtf8DictionaryEncodedColumn( @Nullable ColumnarInts singleValueColumn, @Nullable ColumnarMultiInts multiValueColumn, - FrontCodedIndexed utf8Dictionary + Indexed utf8Dictionary ) { this.column = singleValueColumn; @@ -102,6 +99,9 @@ public int getSingleValueRow(int rowNum) @Override public IndexedInts getMultiValueRow(int rowNum) { + if (!hasMultipleValues()) { + throw new UnsupportedOperationException("Column is not multi-valued"); + } return multiValueColumn.get(rowNum); } @@ -154,7 +154,7 @@ public int getValueCardinality() @Override public String lookupName(int id) { - final String value = StringFrontCodedDictionaryEncodedColumn.this.lookupName(id); + final String value = StringUtf8DictionaryEncodedColumn.this.lookupName(id); return extractionFn == null ? value : extractionFn.apply(value); } @@ -190,7 +190,7 @@ public int lookupId(String name) if (extractionFn != null) { throw new UnsupportedOperationException("cannot perform lookup when applying an extraction function"); } - return StringFrontCodedDictionaryEncodedColumn.this.lookupId(name); + return StringUtf8DictionaryEncodedColumn.this.lookupId(name); } } @@ -291,7 +291,7 @@ public boolean matches() @Override public void inspectRuntimeShape(RuntimeShapeInspector inspector) { - inspector.visit("column", StringFrontCodedDictionaryEncodedColumn.this); + inspector.visit("column", StringUtf8DictionaryEncodedColumn.this); } }; } else { @@ -332,7 +332,7 @@ public boolean matches() @Override public void inspectRuntimeShape(RuntimeShapeInspector inspector) { - inspector.visit("column", StringFrontCodedDictionaryEncodedColumn.this); + inspector.visit("column", StringUtf8DictionaryEncodedColumn.this); } }; } @@ -381,7 +381,7 @@ public int getValueCardinality() @Override public String lookupName(final int id) { - return StringFrontCodedDictionaryEncodedColumn.this.lookupName(id); + return StringUtf8DictionaryEncodedColumn.this.lookupName(id); } @Nullable @@ -394,7 +394,7 @@ public ByteBuffer lookupNameUtf8(int id) @Override public int lookupId(@Nullable String name) { - return StringFrontCodedDictionaryEncodedColumn.this.lookupId(name); + return StringUtf8DictionaryEncodedColumn.this.lookupId(name); } } @@ -421,7 +421,7 @@ public int getValueCardinality() @Override public String lookupName(final int id) { - return StringFrontCodedDictionaryEncodedColumn.this.lookupName(id); + return StringUtf8DictionaryEncodedColumn.this.lookupName(id); } @Nullable @@ -435,7 +435,7 @@ public ByteBuffer lookupNameUtf8(int id) @Override public int lookupId(@Nullable String name) { - return StringFrontCodedDictionaryEncodedColumn.this.lookupId(name); + return StringUtf8DictionaryEncodedColumn.this.lookupId(name); } } @@ -457,7 +457,7 @@ public StringVectorSelector() @Override public String lookupName(int id) { - return StringFrontCodedDictionaryEncodedColumn.this.lookupName(id); + return StringUtf8DictionaryEncodedColumn.this.lookupName(id); } } return new StringVectorSelector(); @@ -473,7 +473,7 @@ public MultiStringVectorSelector() @Override public String lookupName(int id) { - return StringFrontCodedDictionaryEncodedColumn.this.lookupName(id); + return StringUtf8DictionaryEncodedColumn.this.lookupName(id); } } return new MultiStringVectorSelector(); diff --git a/processing/src/main/java/org/apache/druid/segment/data/CachingIndexed.java b/processing/src/main/java/org/apache/druid/segment/data/CachingIndexed.java index 4c4823550a71..6632eaa95e64 100644 --- a/processing/src/main/java/org/apache/druid/segment/data/CachingIndexed.java +++ b/processing/src/main/java/org/apache/druid/segment/data/CachingIndexed.java @@ -27,6 +27,7 @@ import java.util.Iterator; import java.util.LinkedHashMap; import java.util.Map; +import java.util.function.ToIntFunction; public class CachingIndexed implements CloseableIndexed { @@ -34,7 +35,8 @@ public class CachingIndexed implements CloseableIndexed private static final Logger log = new Logger(CachingIndexed.class); - private final GenericIndexed.BufferIndexed delegate; + private final Indexed delegate; + private final ToIntFunction sizeFn; @Nullable private final SizedLRUMap cachedValues; @@ -44,12 +46,14 @@ public class CachingIndexed implements CloseableIndexed * CachingIndexed objects are not thread safe and should only be used by a single thread at a time. * CachingIndexed objects must be closed to release any underlying cache resources. * - * @param delegate the GenericIndexed to wrap with a lookup cache. + * @param delegate the Indexed to wrap with a lookup cache. + * @param sizeFn function that determines the size in bytes of an object * @param lookupCacheSize maximum size in bytes of the lookup cache if greater than zero */ - public CachingIndexed(GenericIndexed delegate, final int lookupCacheSize) + public CachingIndexed(Indexed delegate, final ToIntFunction sizeFn, final int lookupCacheSize) { - this.delegate = delegate.singleThreaded(); + this.delegate = delegate; + this.sizeFn = sizeFn; if (lookupCacheSize > 0) { log.debug("Allocating column cache of max size[%d]", lookupCacheSize); @@ -75,7 +79,7 @@ public T get(int index) } final T value = delegate.get(index); - cachedValues.put(index, value, delegate.getLastValueSize()); + cachedValues.put(index, value, sizeFn.applyAsInt(value)); return value; } else { return delegate.get(index); diff --git a/processing/src/main/java/org/apache/druid/segment/data/GenericIndexed.java b/processing/src/main/java/org/apache/druid/segment/data/GenericIndexed.java index 6f549e9a0a89..11a8b61531bc 100644 --- a/processing/src/main/java/org/apache/druid/segment/data/GenericIndexed.java +++ b/processing/src/main/java/org/apache/druid/segment/data/GenericIndexed.java @@ -459,8 +459,6 @@ public void inspectRuntimeShape(RuntimeShapeInspector inspector) */ public abstract class BufferIndexed implements Indexed { - int lastReadSize; - @Override public int size() { @@ -492,7 +490,6 @@ ByteBuffer bufferedIndexedGetByteBuffer(ByteBuffer copyValueBuffer, int startOff || copyValueBuffer.get(startOffset - Integer.BYTES) == NULL_VALUE_SIZE_MARKER)) { return null; } - lastReadSize = size; // ObjectStrategy.fromByteBuffer() is allowed to reset the limit of the buffer. So if the limit is changed, // position() call could throw an exception, if the position is set beyond the new limit. Calling limit() @@ -511,16 +508,6 @@ ByteBuffer bufferedIndexedGetByteBuffer(ByteBuffer copyValueBuffer, int startOff @Nullable protected abstract ByteBuffer getByteBuffer(int index); - /** - * This method makes no guarantees with respect to thread safety - * - * @return the size in bytes of the last value read - */ - int getLastValueSize() - { - return lastReadSize; - } - @Override public int indexOf(@Nullable T value) { diff --git a/processing/src/main/java/org/apache/druid/segment/nested/NestedCommonFormatColumn.java b/processing/src/main/java/org/apache/druid/segment/nested/NestedCommonFormatColumn.java index 53208409284d..59c0070d2430 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/NestedCommonFormatColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/NestedCommonFormatColumn.java @@ -30,6 +30,7 @@ import org.apache.druid.segment.column.ColumnCapabilitiesImpl; import org.apache.druid.segment.column.ColumnFormat; import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.column.StringUtf8DictionaryEncodedColumn; import org.apache.druid.segment.data.Indexed; import org.apache.druid.segment.serde.NestedCommonFormatColumnPartSerde; @@ -45,8 +46,7 @@ * * @see ScalarDoubleColumn * @see ScalarLongColumn - * @see ScalarStringDictionaryEncodedColumn - * @see org.apache.druid.segment.column.StringFrontCodedDictionaryEncodedColumn + * @see StringUtf8DictionaryEncodedColumn * @see VariantColumn * @see CompressedNestedDataComplexColumn */ diff --git a/processing/src/main/java/org/apache/druid/segment/nested/ScalarStringColumnAndIndexSupplier.java b/processing/src/main/java/org/apache/druid/segment/nested/ScalarStringColumnAndIndexSupplier.java index 8c1aa67c9987..51b5d2d335d6 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/ScalarStringColumnAndIndexSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/ScalarStringColumnAndIndexSupplier.java @@ -42,7 +42,7 @@ import org.apache.druid.segment.column.SimpleImmutableBitmapIndex; import org.apache.druid.segment.column.StringEncodingStrategies; import org.apache.druid.segment.column.StringEncodingStrategy; -import org.apache.druid.segment.column.StringFrontCodedDictionaryEncodedColumn; +import org.apache.druid.segment.column.StringUtf8DictionaryEncodedColumn; import org.apache.druid.segment.column.StringValueSetIndex; import org.apache.druid.segment.data.BitmapSerdeFactory; import org.apache.druid.segment.data.ColumnarInts; @@ -190,13 +190,13 @@ private ScalarStringColumnAndIndexSupplier( public NestedCommonFormatColumn get() { if (frontCodedStringDictionarySupplier != null) { - return new StringFrontCodedDictionaryEncodedColumn( + return new StringUtf8DictionaryEncodedColumn( encodedColumnSupplier.get(), null, frontCodedStringDictionarySupplier.get() ); } - return new ScalarStringDictionaryEncodedColumn<>(encodedColumnSupplier.get(), stringDictionary.singleThreaded()); + return new StringUtf8DictionaryEncodedColumn(encodedColumnSupplier.get(), null, stringDictionary.singleThreaded()); } @Nullable diff --git a/processing/src/main/java/org/apache/druid/segment/nested/ScalarStringColumnSerializer.java b/processing/src/main/java/org/apache/druid/segment/nested/ScalarStringColumnSerializer.java index 72838cc3d7e3..909d0d50f4bb 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/ScalarStringColumnSerializer.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/ScalarStringColumnSerializer.java @@ -33,6 +33,7 @@ import org.apache.druid.segment.ColumnValueSelector; import org.apache.druid.segment.IndexSpec; import org.apache.druid.segment.column.StringEncodingStrategies; +import org.apache.druid.segment.column.StringUtf8DictionaryEncodedColumn; import org.apache.druid.segment.data.CompressedVSizeColumnarIntsSerializer; import org.apache.druid.segment.data.CompressionStrategy; import org.apache.druid.segment.data.DictionaryWriter; @@ -45,10 +46,8 @@ import java.nio.channels.WritableByteChannel; /** - * Serializer for a string {@link NestedCommonFormatColumn} that can be read with either - * {@link ScalarStringDictionaryEncodedColumn} or - * {@link org.apache.druid.segment.column.StringFrontCodedDictionaryEncodedColumn} (if written with a front-coded - * dictionary). + * Serializer for a string {@link NestedCommonFormatColumn} that can be read with + * {@link StringUtf8DictionaryEncodedColumn}. */ public class ScalarStringColumnSerializer extends NestedCommonFormatColumnSerializer { diff --git a/processing/src/main/java/org/apache/druid/segment/nested/ScalarStringDictionaryEncodedColumn.java b/processing/src/main/java/org/apache/druid/segment/nested/ScalarStringDictionaryEncodedColumn.java deleted file mode 100644 index 97d3a1b56d65..000000000000 --- a/processing/src/main/java/org/apache/druid/segment/nested/ScalarStringDictionaryEncodedColumn.java +++ /dev/null @@ -1,382 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.segment.nested; - -import com.google.common.base.Predicate; -import com.google.common.base.Predicates; -import org.apache.druid.java.util.common.StringUtils; -import org.apache.druid.query.extraction.ExtractionFn; -import org.apache.druid.query.filter.ValueMatcher; -import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; -import org.apache.druid.segment.AbstractDimensionSelector; -import org.apache.druid.segment.IdLookup; -import org.apache.druid.segment.column.ColumnType; -import org.apache.druid.segment.column.DictionaryEncodedColumn; -import org.apache.druid.segment.column.StringDictionaryEncodedColumn; -import org.apache.druid.segment.column.StringEncodingStrategies; -import org.apache.druid.segment.data.ColumnarInts; -import org.apache.druid.segment.data.Indexed; -import org.apache.druid.segment.data.IndexedInts; -import org.apache.druid.segment.data.ReadableOffset; -import org.apache.druid.segment.data.SingleIndexedInt; -import org.apache.druid.segment.filter.BooleanValueMatcher; -import org.apache.druid.segment.historical.HistoricalDimensionSelector; -import org.apache.druid.segment.historical.SingleValueHistoricalDimensionSelector; -import org.apache.druid.segment.vector.MultiValueDimensionVectorSelector; -import org.apache.druid.segment.vector.ReadableVectorOffset; -import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector; -import org.apache.druid.segment.vector.VectorObjectSelector; -import org.apache.druid.utils.CloseableUtils; - -import javax.annotation.Nullable; -import java.io.IOException; -import java.nio.ByteBuffer; -import java.util.BitSet; - -/** - * {@link NestedCommonFormatColumn} specialization for {@link ColumnType#STRING} with a generic buffer based utf8 - * dictionary. This is used when not using the more specific - * {@link org.apache.druid.segment.column.StringFrontCodedDictionaryEncodedColumn}, and only supports single value - * strings. - */ -public class ScalarStringDictionaryEncodedColumn> - implements DictionaryEncodedColumn, NestedCommonFormatColumn -{ - private final ColumnarInts column; - private final TIndexed utf8Dictionary; - - public ScalarStringDictionaryEncodedColumn( - ColumnarInts singleValueColumn, - TIndexed utf8Dictionary - ) - { - this.column = singleValueColumn; - this.utf8Dictionary = utf8Dictionary; - } - - @Override - public int length() - { - return column.size(); - } - - @Override - public boolean hasMultipleValues() - { - return false; - } - - @Override - public int getSingleValueRow(int rowNum) - { - return column.get(rowNum); - } - - @Override - public IndexedInts getMultiValueRow(int rowNum) - { - throw new UnsupportedOperationException("Column is not multi-valued"); - } - - @Override - @Nullable - public String lookupName(int id) - { - final ByteBuffer buffer = utf8Dictionary.get(id); - if (buffer == null) { - return null; - } - return StringUtils.fromUtf8(buffer); - } - - @Override - public int lookupId(String name) - { - return utf8Dictionary.indexOf(StringUtils.toUtf8ByteBuffer(name)); - } - - @Override - public int getCardinality() - { - return utf8Dictionary.size(); - } - - @Override - public HistoricalDimensionSelector makeDimensionSelector( - final ReadableOffset offset, - @Nullable final ExtractionFn extractionFn - ) - { - class SingleValueQueryableDimensionSelector extends AbstractDimensionSelector - implements SingleValueHistoricalDimensionSelector, IdLookup, HistoricalDimensionSelector - { - private final SingleIndexedInt row = new SingleIndexedInt(); - - @Override - public int getValueCardinality() - { - /* - This is technically wrong if - extractionFn != null && (extractionFn.getExtractionType() != ExtractionFn.ExtractionType.ONE_TO_ONE || - !extractionFn.preservesOrdering()) - However current behavior allows some GroupBy-V1 queries to work that wouldn't work otherwise and doesn't - cause any problems due to special handling of extractionFn everywhere. - See https://github.com/apache/druid/pull/8433 - */ - return getCardinality(); - } - - @Override - public String lookupName(int id) - { - final String value = ScalarStringDictionaryEncodedColumn.this.lookupName(id); - return extractionFn == null ? value : extractionFn.apply(value); - } - - @Nullable - @Override - public ByteBuffer lookupNameUtf8(int id) - { - return utf8Dictionary.get(id); - } - - @Override - public boolean supportsLookupNameUtf8() - { - return true; - } - - @Override - public boolean nameLookupPossibleInAdvance() - { - return true; - } - - @Nullable - @Override - public IdLookup idLookup() - { - return extractionFn == null ? this : null; - } - - @Override - public int lookupId(String name) - { - if (extractionFn != null) { - throw new UnsupportedOperationException("cannot perform lookup when applying an extraction function"); - } - return ScalarStringDictionaryEncodedColumn.this.lookupId(name); - } - - @Override - public IndexedInts getRow() - { - row.setValue(getRowValue()); - return row; - } - - public int getRowValue() - { - return column.get(offset.getOffset()); - } - - @Override - public IndexedInts getRow(int offset) - { - row.setValue(getRowValue(offset)); - return row; - } - - @Override - public int getRowValue(int offset) - { - return column.get(offset); - } - - @Override - public ValueMatcher makeValueMatcher(final @Nullable String value) - { - if (extractionFn == null) { - final int valueId = lookupId(value); - if (valueId >= 0) { - return new ValueMatcher() - { - @Override - public boolean matches() - { - return getRowValue() == valueId; - } - - @Override - public void inspectRuntimeShape(RuntimeShapeInspector inspector) - { - inspector.visit("column", ScalarStringDictionaryEncodedColumn.this); - } - }; - } else { - return BooleanValueMatcher.of(false); - } - } else { - // Employ caching BitSet optimization - return makeValueMatcher(Predicates.equalTo(value)); - } - } - - @Override - public ValueMatcher makeValueMatcher(final Predicate predicate) - { - final BitSet checkedIds = new BitSet(getCardinality()); - final BitSet matchingIds = new BitSet(getCardinality()); - - // Lazy matcher; only check an id if matches() is called. - return new ValueMatcher() - { - @Override - public boolean matches() - { - final int id = getRowValue(); - - if (checkedIds.get(id)) { - return matchingIds.get(id); - } else { - final boolean matches = predicate.apply(lookupName(id)); - checkedIds.set(id); - if (matches) { - matchingIds.set(id); - } - return matches; - } - } - - @Override - public void inspectRuntimeShape(RuntimeShapeInspector inspector) - { - inspector.visit("column", ScalarStringDictionaryEncodedColumn.this); - } - }; - } - - @Override - public Object getObject() - { - return lookupName(getRowValue()); - } - - @Override - public Class classOfObject() - { - return String.class; - } - - @Override - public void inspectRuntimeShape(RuntimeShapeInspector inspector) - { - inspector.visit("column", column); - inspector.visit("offset", offset); - inspector.visit("extractionFn", extractionFn); - } - } - return new SingleValueQueryableDimensionSelector(); - } - - @Override - public SingleValueDimensionVectorSelector makeSingleValueDimensionVectorSelector(final ReadableVectorOffset offset) - { - final class StringVectorSelector extends StringDictionaryEncodedColumn.StringSingleValueDimensionVectorSelector - { - public StringVectorSelector() - { - super(column, offset); - } - - @Override - public int getValueCardinality() - { - return getCardinality(); - } - - @Nullable - @Override - public String lookupName(final int id) - { - return ScalarStringDictionaryEncodedColumn.this.lookupName(id); - } - - @Nullable - @Override - public ByteBuffer lookupNameUtf8(int id) - { - return utf8Dictionary.get(id); - } - - @Override - public int lookupId(@Nullable String name) - { - return ScalarStringDictionaryEncodedColumn.this.lookupId(name); - } - } - - return new StringVectorSelector(); - } - - @Override - public MultiValueDimensionVectorSelector makeMultiValueDimensionVectorSelector(ReadableVectorOffset vectorOffset) - { - throw new UnsupportedOperationException("Column not multi-valued"); - } - - @Override - public VectorObjectSelector makeVectorObjectSelector(ReadableVectorOffset offset) - { - final class StringVectorSelector extends StringDictionaryEncodedColumn.StringVectorObjectSelector - { - public StringVectorSelector() - { - super(column, offset); - } - - @Nullable - @Override - public String lookupName(int id) - { - return ScalarStringDictionaryEncodedColumn.this.lookupName(id); - } - } - return new StringVectorSelector(); - } - - @Override - public void close() throws IOException - { - CloseableUtils.closeAll(column); - } - - @Override - public ColumnType getLogicalType() - { - return ColumnType.STRING; - } - - @Override - public Indexed getStringDictionary() - { - return new StringEncodingStrategies.Utf8ToStringIndexed(utf8Dictionary); - } -} diff --git a/processing/src/main/java/org/apache/druid/segment/serde/CombineFirstTwoEntriesIndexed.java b/processing/src/main/java/org/apache/druid/segment/serde/CombineFirstTwoEntriesIndexed.java new file mode 100644 index 000000000000..3e109946d72e --- /dev/null +++ b/processing/src/main/java/org/apache/druid/segment/serde/CombineFirstTwoEntriesIndexed.java @@ -0,0 +1,196 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.serde; + +import com.google.common.collect.ImmutableList; +import org.apache.druid.collections.bitmap.BitmapFactory; +import org.apache.druid.collections.bitmap.ImmutableBitmap; +import org.apache.druid.common.config.NullHandling; +import org.apache.druid.java.util.common.ISE; +import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; +import org.apache.druid.segment.data.Indexed; + +import javax.annotation.Nullable; +import java.util.Iterator; +import java.util.Objects; + +/** + * An {@link Indexed} that delegates to an underyling instance, but combines the first two entries. + * + * Unlike {@link CombineFirstTwoValuesIndexedInts}, this class combines the first two *entries*. + * So [0, 1, 2] becomes [(something), 2]. The first two entries, 0 and 1, were replaced with (something). That something + * is given by {@link #newFirstValue()}. + * + * Provided to enable compatibility for segments written under {@link NullHandling#sqlCompatible()} mode but + * read under {@link NullHandling#replaceWithDefault()} mode. + * + * Important note: {@link #isSorted()} returns the same value as the underlying delegate. In this case, this class + * assumes that {@link #newFirstValue()} is the lowest possible value in the universe: including anything in + * {@link #delegate} and anything that might be passed to {@link #indexOf(Object)}. Callers must ensure that this + * precondition is met. + * + * @see NullHandling#mustCombineNullAndEmptyInDictionary(Indexed) + */ +public abstract class CombineFirstTwoEntriesIndexed implements Indexed +{ + private static final int FIRST_ID = 0; + + protected final Indexed delegate; + + protected CombineFirstTwoEntriesIndexed(Indexed delegate) + { + this.delegate = delegate; + + if (delegate.size() < 2) { + throw new ISE("Size[%s] must be >= 2", delegate.size()); + } + } + + /** + * Combine the first two values into a literal null. + */ + public static CombineFirstTwoEntriesIndexed returnNull(final Indexed delegate) + { + return new CombineFirstTwoEntriesIndexed(delegate) + { + @Nullable + @Override + protected T newFirstValue() + { + return null; + } + }; + } + + /** + * Union the first two bitmaps. + */ + public static CombineFirstTwoEntriesIndexed unionBitmaps( + final BitmapFactory bitmapFactory, + final Indexed delegate + ) + { + return new CombineFirstTwoEntriesIndexed(delegate) + { + @Nullable + @Override + protected ImmutableBitmap newFirstValue() + { + return bitmapFactory.union(ImmutableList.of(delegate.get(FIRST_ID), delegate.get(FIRST_ID + 1))); + } + }; + } + + @Nullable + protected abstract T newFirstValue(); + + @Override + public int size() + { + return delegate.size() - 1; + } + + @Nullable + @Override + public T get(int index) + { + if (index == FIRST_ID) { + return newFirstValue(); + } else { + return delegate.get(index + 1); + } + } + + @Override + public int indexOf(@Nullable T value) + { + if (Objects.equals(newFirstValue(), value)) { + return FIRST_ID; + } else { + final int index = delegate.indexOf(value); + + if (index > FIRST_ID + 1) { + // Item found, index needs adjustment. + return index - 1; + } else if (index >= 0) { + // Item found, but shadowed, so really not found. + // Insertion point is after FIRST_ID. (See class-level javadoc: newFirstValue is required to be + // lower than all elements in the universe.) + return -2; + } else if (index >= -2) { + // Item not found, and insertion point is prior to, or within, the shadowed portion of delegate. Return + // insertion point immediately after newFirstValue, since that value is required to be lower than all elements + // in the universe. + return -2; + } else { + // Item not found, and insertion point is after the shadowed portion of delegate. Adjust and return. + return index + 1; + } + } + } + + @Override + public Iterator iterator() + { + final Iterator it = delegate.iterator(); + + // Skip first two values. + //CHECKSTYLE.OFF: Regexp + it.next(); + it.next(); + //CHECKSTYLE.ON: Regexp + + class CoalescingIndexedIterator implements Iterator + { + boolean returnedFirstValue; + + @Override + public boolean hasNext() + { + return !returnedFirstValue || it.hasNext(); + } + + @Override + public T next() + { + if (!returnedFirstValue) { + returnedFirstValue = true; + return newFirstValue(); + } else { + return it.next(); + } + } + } + + return new CoalescingIndexedIterator(); + } + + @Override + public void inspectRuntimeShape(RuntimeShapeInspector inspector) + { + delegate.inspectRuntimeShape(inspector); + } + + @Override + public boolean isSorted() + { + return delegate.isSorted(); + } +} diff --git a/processing/src/main/java/org/apache/druid/segment/serde/CombineFirstTwoValuesColumnarInts.java b/processing/src/main/java/org/apache/druid/segment/serde/CombineFirstTwoValuesColumnarInts.java new file mode 100644 index 000000000000..07ccd303db63 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/segment/serde/CombineFirstTwoValuesColumnarInts.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.serde; + +import org.apache.druid.common.config.NullHandling; +import org.apache.druid.segment.data.ColumnarInts; +import org.apache.druid.segment.data.Indexed; + +import java.io.IOException; + +/** + * A {@link ColumnarInts} facade over {@link CombineFirstTwoValuesIndexedInts}. + * + * Provided to enable compatibility for segments written under {@link NullHandling#sqlCompatible()} mode but + * read under {@link NullHandling#replaceWithDefault()} mode. + * + * @see NullHandling#mustCombineNullAndEmptyInDictionary(Indexed) + */ +public class CombineFirstTwoValuesColumnarInts extends CombineFirstTwoValuesIndexedInts implements ColumnarInts +{ + public CombineFirstTwoValuesColumnarInts(ColumnarInts delegate) + { + super(delegate); + } + + @Override + public void close() throws IOException + { + ((ColumnarInts) delegate).close(); + } +} diff --git a/processing/src/main/java/org/apache/druid/segment/serde/CombineFirstTwoValuesColumnarMultiInts.java b/processing/src/main/java/org/apache/druid/segment/serde/CombineFirstTwoValuesColumnarMultiInts.java new file mode 100644 index 000000000000..4d8cc5f61f6f --- /dev/null +++ b/processing/src/main/java/org/apache/druid/segment/serde/CombineFirstTwoValuesColumnarMultiInts.java @@ -0,0 +1,106 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.serde; + +import com.google.common.collect.Iterators; +import org.apache.druid.common.config.NullHandling; +import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; +import org.apache.druid.segment.data.ColumnarMultiInts; +import org.apache.druid.segment.data.Indexed; +import org.apache.druid.segment.data.IndexedInts; +import org.apache.druid.segment.data.ZeroIndexedInts; + +import javax.annotation.Nullable; +import java.io.IOException; +import java.util.Iterator; + +/** + * A {@link ColumnarMultiInts} that delegates to an underyling instance, but applies + * {@link CombineFirstTwoValuesIndexedInts} to each row's set of values. + * + * Provided to enable compatibility for segments written under {@link NullHandling#sqlCompatible()} mode but + * read under {@link NullHandling#replaceWithDefault()} mode. + * + * @see NullHandling#mustCombineNullAndEmptyInDictionary(Indexed) + */ +public class CombineFirstTwoValuesColumnarMultiInts implements ColumnarMultiInts +{ + private final ColumnarMultiInts delegate; + private final CombineFirstTwoValuesIndexedInts rowValues; + + public CombineFirstTwoValuesColumnarMultiInts(ColumnarMultiInts delegate) + { + this.delegate = delegate; + this.rowValues = new CombineFirstTwoValuesIndexedInts(ZeroIndexedInts.instance()); + } + + @Override + public IndexedInts get(int index) + { + rowValues.delegate = delegate.get(index); + return rowValues; + } + + @Override + public IndexedInts getUnshared(int index) + { + return new CombineFirstTwoValuesIndexedInts(delegate.getUnshared(index)); + } + + @Override + public int size() + { + return delegate.size(); + } + + @Override + public int indexOf(@Nullable IndexedInts value) + { + // No ColumnarMultiInts implement this method + throw new UnsupportedOperationException("Reverse lookup not allowed."); + } + + @Override + public boolean isSorted() + { + return delegate.isSorted(); + } + + @Override + public void inspectRuntimeShape(RuntimeShapeInspector inspector) + { + delegate.inspectRuntimeShape(inspector); + } + + @Override + public Iterator iterator() + { + return Iterators.transform( + delegate.iterator(), + CombineFirstTwoValuesIndexedInts::new + ); + } + + @Override + public void close() throws IOException + { + delegate.close(); + } +} diff --git a/processing/src/main/java/org/apache/druid/segment/serde/CombineFirstTwoValuesIndexedInts.java b/processing/src/main/java/org/apache/druid/segment/serde/CombineFirstTwoValuesIndexedInts.java new file mode 100644 index 000000000000..e2e6bf6d6e99 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/segment/serde/CombineFirstTwoValuesIndexedInts.java @@ -0,0 +1,97 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.serde; + +import org.apache.druid.common.config.NullHandling; +import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; +import org.apache.druid.segment.data.Indexed; +import org.apache.druid.segment.data.IndexedInts; + +/** + * A {@link IndexedInts} that delegates to an underyling instance, but combines the values 0 and 1 into 0, and shifts + * all other values down by one. For example: + * + * - [2, 0, 1] => [1, 0, 0] + * - [3, 2, 1] => [2, 1, 0] + * - [0, 1, 0] => [0, 0, 0] + * + * Provided to enable compatibility for segments written under {@link NullHandling#sqlCompatible()} mode but + * read under {@link NullHandling#replaceWithDefault()} mode. + * + * @see NullHandling#mustCombineNullAndEmptyInDictionary(Indexed) + */ +public class CombineFirstTwoValuesIndexedInts implements IndexedInts +{ + private static final int ZERO_ID = 0; + + IndexedInts delegate; + + public CombineFirstTwoValuesIndexedInts(IndexedInts delegate) + { + this.delegate = delegate; + } + + @Override + public int size() + { + return delegate.size(); + } + + @Override + public int get(int index) + { + final int i = delegate.get(index); + if (i == ZERO_ID) { + return i; + } else { + return i - 1; + } + } + + @Override + public void get(int[] out, int start, int length) + { + delegate.get(out, start, length); + + for (int i = 0; i < length; i++) { + if (out[i] != ZERO_ID) { + out[i]--; + } + } + } + + @Override + public void get(int[] out, int[] indexes, int length) + { + delegate.get(out, indexes, length); + + for (int i = 0; i < length; i++) { + if (out[i] != ZERO_ID) { + out[i]--; + } + } + } + + @Override + public void inspectRuntimeShape(RuntimeShapeInspector inspector) + { + delegate.inspectRuntimeShape(inspector); + } +} diff --git a/processing/src/main/java/org/apache/druid/segment/serde/DictionaryEncodedColumnSupplier.java b/processing/src/main/java/org/apache/druid/segment/serde/DictionaryEncodedColumnSupplier.java index f48dcfc61b2d..a8148ca6a25a 100644 --- a/processing/src/main/java/org/apache/druid/segment/serde/DictionaryEncodedColumnSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/serde/DictionaryEncodedColumnSupplier.java @@ -20,17 +20,20 @@ package org.apache.druid.segment.serde; import com.google.common.base.Supplier; +import org.apache.druid.common.config.NullHandling; import org.apache.druid.segment.column.DictionaryEncodedColumn; import org.apache.druid.segment.column.StringDictionaryEncodedColumn; import org.apache.druid.segment.data.CachingIndexed; import org.apache.druid.segment.data.ColumnarInts; import org.apache.druid.segment.data.ColumnarMultiInts; import org.apache.druid.segment.data.GenericIndexed; +import org.apache.druid.segment.data.Indexed; import javax.annotation.Nullable; import java.nio.ByteBuffer; /** + * */ public class DictionaryEncodedColumnSupplier implements Supplier> { @@ -58,11 +61,40 @@ public DictionaryEncodedColumnSupplier( @Override public DictionaryEncodedColumn get() { - return new StringDictionaryEncodedColumn( - singleValuedColumn != null ? singleValuedColumn.get() : null, - multiValuedColumn != null ? multiValuedColumn.get() : null, - new CachingIndexed<>(dictionary, lookupCacheSize), - dictionaryUtf8.singleThreaded() - ); + final Indexed cacheWrappedDictionary; + final Indexed singleThreadedDictionaryUtf8 = dictionaryUtf8.singleThreaded(); + + if (lookupCacheSize > 0) { + cacheWrappedDictionary = new CachingIndexed<>( + dictionary.singleThreaded(), + s -> s == null ? 0 : s.length() * Character.BYTES, + lookupCacheSize + ); + } else { + cacheWrappedDictionary = dictionary.singleThreaded(); + } + + if (NullHandling.mustCombineNullAndEmptyInDictionary(singleThreadedDictionaryUtf8)) { + return new StringDictionaryEncodedColumn( + singleValuedColumn != null ? new CombineFirstTwoValuesColumnarInts(singleValuedColumn.get()) : null, + multiValuedColumn != null ? new CombineFirstTwoValuesColumnarMultiInts(multiValuedColumn.get()) : null, + CombineFirstTwoEntriesIndexed.returnNull(cacheWrappedDictionary), + CombineFirstTwoEntriesIndexed.returnNull(singleThreadedDictionaryUtf8) + ); + } else if (NullHandling.mustReplaceFirstValueWithNullInDictionary(singleThreadedDictionaryUtf8)) { + return new StringDictionaryEncodedColumn( + singleValuedColumn != null ? singleValuedColumn.get() : null, + multiValuedColumn != null ? multiValuedColumn.get() : null, + new ReplaceFirstValueWithNullIndexed<>(cacheWrappedDictionary), + new ReplaceFirstValueWithNullIndexed<>(singleThreadedDictionaryUtf8) + ); + } else { + return new StringDictionaryEncodedColumn( + singleValuedColumn != null ? singleValuedColumn.get() : null, + multiValuedColumn != null ? multiValuedColumn.get() : null, + cacheWrappedDictionary, + singleThreadedDictionaryUtf8 + ); + } } } diff --git a/processing/src/main/java/org/apache/druid/segment/serde/DictionaryEncodedStringIndexSupplier.java b/processing/src/main/java/org/apache/druid/segment/serde/DictionaryEncodedStringIndexSupplier.java index ae2a1e148236..d7dc25b97964 100644 --- a/processing/src/main/java/org/apache/druid/segment/serde/DictionaryEncodedStringIndexSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/serde/DictionaryEncodedStringIndexSupplier.java @@ -75,13 +75,24 @@ public DictionaryEncodedStringIndexSupplier( public T as(Class clazz) { if (bitmaps != null) { - final Indexed singleThreadedStrings = dictionary.singleThreaded(); - final Indexed singleThreadedUtf8 = dictionaryUtf8.singleThreaded(); - final Indexed singleThreadedBitmaps = bitmaps.singleThreaded(); + Indexed singleThreadedStrings = dictionary.singleThreaded(); + Indexed singleThreadedUtf8 = dictionaryUtf8.singleThreaded(); + Indexed singleThreadedBitmaps = bitmaps.singleThreaded(); + + if (NullHandling.mustCombineNullAndEmptyInDictionary(singleThreadedUtf8)) { + singleThreadedStrings = CombineFirstTwoEntriesIndexed.returnNull(singleThreadedStrings); + singleThreadedUtf8 = CombineFirstTwoEntriesIndexed.returnNull(singleThreadedUtf8); + singleThreadedBitmaps = CombineFirstTwoEntriesIndexed.unionBitmaps(bitmapFactory, singleThreadedBitmaps); + } else if (NullHandling.mustReplaceFirstValueWithNullInDictionary(singleThreadedUtf8)) { + singleThreadedStrings = new ReplaceFirstValueWithNullIndexed<>(singleThreadedStrings); + singleThreadedUtf8 = new ReplaceFirstValueWithNullIndexed<>(singleThreadedUtf8); + } + if (clazz.equals(NullValueIndex.class)) { final BitmapColumnIndex nullIndex; - if (NullHandling.isNullOrEquivalent(dictionary.get(0))) { - nullIndex = new SimpleImmutableBitmapIndex(bitmaps.get(0)); + final ByteBuffer firstValue = singleThreadedUtf8.get(0); + if (NullHandling.isNullOrEquivalent(firstValue)) { + nullIndex = new SimpleImmutableBitmapIndex(singleThreadedBitmaps.get(0)); } else { nullIndex = new SimpleImmutableBitmapIndex(bitmapFactory.makeEmptyImmutableBitmap()); } @@ -97,13 +108,14 @@ public T as(Class clazz) bitmapFactory, singleThreadedUtf8, singleThreadedBitmaps, - NullHandling.isNullOrEquivalent(dictionary.get(0)) + NullHandling.isNullOrEquivalent(singleThreadedStrings.get(0)) ); - } else if (clazz.equals(DictionaryEncodedStringValueIndex.class) || clazz.equals(DictionaryEncodedValueIndex.class)) { + } else if (clazz.equals(DictionaryEncodedStringValueIndex.class) + || clazz.equals(DictionaryEncodedValueIndex.class)) { return (T) new IndexedStringDictionaryEncodedStringValueIndex<>( bitmapFactory, singleThreadedStrings, - bitmaps + singleThreadedBitmaps ); } } diff --git a/processing/src/main/java/org/apache/druid/segment/serde/DoubleNumericColumnPartSerdeV2.java b/processing/src/main/java/org/apache/druid/segment/serde/DoubleNumericColumnPartSerdeV2.java index 3e851484b2c9..5eda4f84ae90 100644 --- a/processing/src/main/java/org/apache/druid/segment/serde/DoubleNumericColumnPartSerdeV2.java +++ b/processing/src/main/java/org/apache/druid/segment/serde/DoubleNumericColumnPartSerdeV2.java @@ -23,6 +23,7 @@ import com.fasterxml.jackson.annotation.JsonProperty; import com.google.common.base.Supplier; import org.apache.druid.collections.bitmap.ImmutableBitmap; +import org.apache.druid.common.config.NullHandling; import org.apache.druid.java.util.common.io.smoosh.FileSmoosher; import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.data.BitmapSerde; @@ -154,7 +155,14 @@ public Deserializer getDeserializer() final ImmutableBitmap bitmap; final boolean hasNulls; if (buffer.hasRemaining()) { - bitmap = bitmapSerdeFactory.getObjectStrategy().fromByteBufferWithSize(buffer); + if (NullHandling.sqlCompatible()) { + bitmap = bitmapSerdeFactory.getObjectStrategy().fromByteBufferWithSize(buffer); + } else { + // Read from the buffer (to advance its position) but do not actually retain the bitmaps. + bitmapSerdeFactory.getObjectStrategy().fromByteBufferWithSize(buffer); + bitmap = bitmapSerdeFactory.getBitmapFactory().makeEmptyImmutableBitmap(); + } + hasNulls = !bitmap.isEmpty(); } else { bitmap = bitmapSerdeFactory.getBitmapFactory().makeEmptyImmutableBitmap(); diff --git a/processing/src/main/java/org/apache/druid/segment/serde/FloatNumericColumnPartSerdeV2.java b/processing/src/main/java/org/apache/druid/segment/serde/FloatNumericColumnPartSerdeV2.java index 29a1e2c46fe6..8e79bc7a3fa8 100644 --- a/processing/src/main/java/org/apache/druid/segment/serde/FloatNumericColumnPartSerdeV2.java +++ b/processing/src/main/java/org/apache/druid/segment/serde/FloatNumericColumnPartSerdeV2.java @@ -22,6 +22,7 @@ import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; import org.apache.druid.collections.bitmap.ImmutableBitmap; +import org.apache.druid.common.config.NullHandling; import org.apache.druid.java.util.common.io.smoosh.FileSmoosher; import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.data.BitmapSerde; @@ -150,8 +151,15 @@ public Deserializer getDeserializer() buffer.position(initialPos + offset); final ImmutableBitmap bitmap; final boolean hasNulls; - if (buffer.hasRemaining()) { - bitmap = bitmapSerdeFactory.getObjectStrategy().fromByteBufferWithSize(buffer); + if (buffer.hasRemaining() && NullHandling.sqlCompatible()) { + if (NullHandling.sqlCompatible()) { + bitmap = bitmapSerdeFactory.getObjectStrategy().fromByteBufferWithSize(buffer); + } else { + // Read from the buffer (to advance its position) but do not actually retain the bitmaps. + bitmapSerdeFactory.getObjectStrategy().fromByteBufferWithSize(buffer); + bitmap = bitmapSerdeFactory.getBitmapFactory().makeEmptyImmutableBitmap(); + } + hasNulls = !bitmap.isEmpty(); } else { bitmap = bitmapSerdeFactory.getBitmapFactory().makeEmptyImmutableBitmap(); diff --git a/processing/src/main/java/org/apache/druid/segment/serde/LongNumericColumnPartSerdeV2.java b/processing/src/main/java/org/apache/druid/segment/serde/LongNumericColumnPartSerdeV2.java index c59bb99b1c7d..7f145b36a413 100644 --- a/processing/src/main/java/org/apache/druid/segment/serde/LongNumericColumnPartSerdeV2.java +++ b/processing/src/main/java/org/apache/druid/segment/serde/LongNumericColumnPartSerdeV2.java @@ -22,6 +22,7 @@ import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; import org.apache.druid.collections.bitmap.ImmutableBitmap; +import org.apache.druid.common.config.NullHandling; import org.apache.druid.java.util.common.io.smoosh.FileSmoosher; import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.data.BitmapSerde; @@ -152,8 +153,15 @@ public Deserializer getDeserializer() buffer.position(initialPos + offset); final ImmutableBitmap bitmap; final boolean hasNulls; - if (buffer.hasRemaining()) { - bitmap = bitmapSerdeFactory.getObjectStrategy().fromByteBufferWithSize(buffer); + if (buffer.hasRemaining() && NullHandling.sqlCompatible()) { + if (NullHandling.sqlCompatible()) { + bitmap = bitmapSerdeFactory.getObjectStrategy().fromByteBufferWithSize(buffer); + } else { + // Read from the buffer (to advance its position) but do not actually retain the bitmaps. + bitmapSerdeFactory.getObjectStrategy().fromByteBufferWithSize(buffer); + bitmap = bitmapSerdeFactory.getBitmapFactory().makeEmptyImmutableBitmap(); + } + hasNulls = !bitmap.isEmpty(); } else { bitmap = bitmapSerdeFactory.getBitmapFactory().makeEmptyImmutableBitmap(); diff --git a/processing/src/main/java/org/apache/druid/segment/serde/ReplaceFirstValueWithNullIndexed.java b/processing/src/main/java/org/apache/druid/segment/serde/ReplaceFirstValueWithNullIndexed.java new file mode 100644 index 000000000000..be9e1385d944 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/segment/serde/ReplaceFirstValueWithNullIndexed.java @@ -0,0 +1,131 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.serde; + +import org.apache.druid.common.config.NullHandling; +import org.apache.druid.java.util.common.ISE; +import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; +import org.apache.druid.segment.data.Indexed; + +import javax.annotation.Nullable; +import java.util.Iterator; + +/** + * An Indexed that replaces the first value with a literal null. + * + * Provided to enable compatibility for segments written under {@link NullHandling#sqlCompatible()} mode but + * read under {@link NullHandling#replaceWithDefault()} mode. + * + * Important note: {@link #isSorted()} returns the same value as the underlying delegate. In this case, this class + * assumes that {@code null} is the lowest possible value in the universe: including anything in {@link #delegate} and + * anything that might be passed to {@link #indexOf(Object)}. Callers must ensure that this precondition is met. + * + * @see NullHandling#mustReplaceFirstValueWithNullInDictionary(Indexed) + */ +public class ReplaceFirstValueWithNullIndexed implements Indexed +{ + private final Indexed delegate; + + public ReplaceFirstValueWithNullIndexed(Indexed delegate) + { + this.delegate = delegate; + + if (delegate.size() < 1) { + throw new ISE("Size[%s] must be >= 1", delegate.size()); + } + } + + @Override + public int size() + { + return delegate.size(); + } + + @Nullable + @Override + public T get(int index) + { + if (index == 0) { + return null; + } else { + return delegate.get(index); + } + } + + @Override + public int indexOf(@Nullable T value) + { + if (value == null) { + return 0; + } else { + final int result = delegate.indexOf(value); + if (result == 0 || result == -1) { + return -2; + } else { + return result; + } + } + } + + @Override + public boolean isSorted() + { + return delegate.isSorted(); + } + + @Override + public Iterator iterator() + { + final Iterator it = delegate.iterator(); + + // Skip first value. + it.next(); + + class ReplaceFirstValueWithNullIndexedIterator implements Iterator + { + boolean returnedNull; + + @Override + public boolean hasNext() + { + return !returnedNull || it.hasNext(); + } + + @Override + public T next() + { + if (!returnedNull) { + returnedNull = true; + return null; + } else { + return it.next(); + } + } + } + + return new ReplaceFirstValueWithNullIndexedIterator(); + } + + @Override + public void inspectRuntimeShape(RuntimeShapeInspector inspector) + { + delegate.inspectRuntimeShape(inspector); + } +} diff --git a/processing/src/main/java/org/apache/druid/segment/serde/StringFrontCodedColumnIndexSupplier.java b/processing/src/main/java/org/apache/druid/segment/serde/StringFrontCodedColumnIndexSupplier.java index 894ddb55ffc6..a617e3b409f5 100644 --- a/processing/src/main/java/org/apache/druid/segment/serde/StringFrontCodedColumnIndexSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/serde/StringFrontCodedColumnIndexSupplier.java @@ -44,11 +44,11 @@ import org.apache.druid.segment.data.Indexed; import javax.annotation.Nullable; +import java.nio.ByteBuffer; public class StringFrontCodedColumnIndexSupplier implements ColumnIndexSupplier { private final BitmapFactory bitmapFactory; - private final Supplier dictionary; private final Supplier utf8Dictionary; @Nullable @@ -67,21 +67,30 @@ public StringFrontCodedColumnIndexSupplier( this.bitmapFactory = bitmapFactory; this.bitmaps = bitmaps; this.utf8Dictionary = utf8Dictionary; - this.dictionary = () -> new StringEncodingStrategies.Utf8ToStringIndexed(this.utf8Dictionary.get()); this.indexedTree = indexedTree; } @Nullable @Override + @SuppressWarnings("unchecked") public T as(Class clazz) { if (bitmaps != null) { - final Indexed singleThreadedBitmaps = bitmaps.singleThreaded(); + Indexed dict = utf8Dictionary.get(); + Indexed singleThreadedBitmaps = bitmaps.singleThreaded(); + + if (NullHandling.mustCombineNullAndEmptyInDictionary(dict)) { + dict = CombineFirstTwoEntriesIndexed.returnNull(dict); + singleThreadedBitmaps = CombineFirstTwoEntriesIndexed.unionBitmaps(bitmapFactory, singleThreadedBitmaps); + } else if (NullHandling.mustReplaceFirstValueWithNullInDictionary(dict)) { + dict = new ReplaceFirstValueWithNullIndexed<>(dict); + } + if (clazz.equals(NullValueIndex.class)) { final BitmapColumnIndex nullIndex; - final StringEncodingStrategies.Utf8ToStringIndexed stringDictionary = dictionary.get(); - if (NullHandling.isNullOrEquivalent(stringDictionary.get(0))) { - nullIndex = new SimpleImmutableBitmapIndex(bitmaps.get(0)); + final ByteBuffer firstValue = dict.get(0); + if (NullHandling.isNullOrEquivalent(firstValue)) { + nullIndex = new SimpleImmutableBitmapIndex(singleThreadedBitmaps.get(0)); } else { nullIndex = new SimpleImmutableBitmapIndex(bitmapFactory.makeEmptyImmutableBitmap()); } @@ -89,17 +98,16 @@ public T as(Class clazz) } else if (clazz.equals(StringValueSetIndex.class)) { return (T) new IndexedUtf8ValueSetIndex<>( bitmapFactory, - utf8Dictionary.get(), + dict, singleThreadedBitmaps ); } else if (clazz.equals(DruidPredicateIndex.class)) { return (T) new IndexedStringDruidPredicateIndex<>( bitmapFactory, - dictionary.get(), + new StringEncodingStrategies.Utf8ToStringIndexed(dict), singleThreadedBitmaps ); } else if (clazz.equals(LexicographicalRangeIndex.class)) { - final FrontCodedIndexed dict = utf8Dictionary.get(); return (T) new IndexedUtf8LexicographicalRangeIndex<>( bitmapFactory, dict, @@ -108,10 +116,11 @@ public T as(Class clazz) ); } else if (clazz.equals(DictionaryEncodedStringValueIndex.class) || clazz.equals(DictionaryEncodedValueIndex.class)) { + // Need string dictionary instead of UTF8 dictionary return (T) new IndexedStringDictionaryEncodedStringValueIndex<>( bitmapFactory, - dictionary.get(), - bitmaps + new StringEncodingStrategies.Utf8ToStringIndexed(dict), + singleThreadedBitmaps ); } } diff --git a/processing/src/main/java/org/apache/druid/segment/serde/StringFrontCodedDictionaryEncodedColumnSupplier.java b/processing/src/main/java/org/apache/druid/segment/serde/StringFrontCodedDictionaryEncodedColumnSupplier.java index d67730de4363..23bc28acff1d 100644 --- a/processing/src/main/java/org/apache/druid/segment/serde/StringFrontCodedDictionaryEncodedColumnSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/serde/StringFrontCodedDictionaryEncodedColumnSupplier.java @@ -20,9 +20,10 @@ package org.apache.druid.segment.serde; import com.google.common.base.Supplier; +import org.apache.druid.common.config.NullHandling; import org.apache.druid.segment.column.DictionaryEncodedColumn; import org.apache.druid.segment.column.StringDictionaryEncodedColumn; -import org.apache.druid.segment.column.StringFrontCodedDictionaryEncodedColumn; +import org.apache.druid.segment.column.StringUtf8DictionaryEncodedColumn; import org.apache.druid.segment.data.ColumnarInts; import org.apache.druid.segment.data.ColumnarMultiInts; import org.apache.druid.segment.data.FrontCodedIndexed; @@ -30,7 +31,7 @@ import javax.annotation.Nullable; /** - * {@link DictionaryEncodedColumnSupplier} but for columns using a {@link StringFrontCodedDictionaryEncodedColumn} + * {@link DictionaryEncodedColumnSupplier} but for columns using a {@link StringUtf8DictionaryEncodedColumn} * instead of the traditional {@link StringDictionaryEncodedColumn} */ public class StringFrontCodedDictionaryEncodedColumnSupplier implements Supplier> @@ -53,10 +54,26 @@ public StringFrontCodedDictionaryEncodedColumnSupplier( @Override public DictionaryEncodedColumn get() { - return new StringFrontCodedDictionaryEncodedColumn( - singleValuedColumn != null ? singleValuedColumn.get() : null, - multiValuedColumn != null ? multiValuedColumn.get() : null, - utf8Dictionary.get() - ); + final FrontCodedIndexed suppliedUtf8Dictionary = utf8Dictionary.get(); + + if (NullHandling.mustCombineNullAndEmptyInDictionary(suppliedUtf8Dictionary)) { + return new StringUtf8DictionaryEncodedColumn( + singleValuedColumn != null ? new CombineFirstTwoValuesColumnarInts(singleValuedColumn.get()) : null, + multiValuedColumn != null ? new CombineFirstTwoValuesColumnarMultiInts(multiValuedColumn.get()) : null, + CombineFirstTwoEntriesIndexed.returnNull(suppliedUtf8Dictionary) + ); + } else if (NullHandling.mustReplaceFirstValueWithNullInDictionary(suppliedUtf8Dictionary)) { + return new StringUtf8DictionaryEncodedColumn( + singleValuedColumn != null ? singleValuedColumn.get() : null, + multiValuedColumn != null ? multiValuedColumn.get() : null, + new ReplaceFirstValueWithNullIndexed<>(suppliedUtf8Dictionary) + ); + } else { + return new StringUtf8DictionaryEncodedColumn( + singleValuedColumn != null ? singleValuedColumn.get() : null, + multiValuedColumn != null ? multiValuedColumn.get() : null, + suppliedUtf8Dictionary + ); + } } } diff --git a/processing/src/test/java/org/apache/druid/common/config/NullHandlingTest.java b/processing/src/test/java/org/apache/druid/common/config/NullHandlingTest.java index 8bfb36d25d3c..db9ade70f425 100644 --- a/processing/src/test/java/org/apache/druid/common/config/NullHandlingTest.java +++ b/processing/src/test/java/org/apache/druid/common/config/NullHandlingTest.java @@ -19,10 +19,14 @@ package org.apache.druid.common.config; +import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.segment.data.ListIndexed; import org.apache.druid.testing.InitializedNullHandlingTest; import org.junit.Assert; import org.junit.Test; +import java.util.Collections; + import static org.apache.druid.common.config.NullHandling.replaceWithDefault; import static org.junit.Assert.assertEquals; @@ -99,4 +103,86 @@ public void test_ignoreNullsStrings() NullHandling.initializeForTests(); } } + + @Test + public void test_mustCombineNullAndEmptyInDictionary() + { + Assert.assertFalse( + NullHandling.mustCombineNullAndEmptyInDictionary( + new ListIndexed<>(Collections.singletonList(null)) + ) + ); + + Assert.assertFalse( + NullHandling.mustCombineNullAndEmptyInDictionary( + new ListIndexed<>(StringUtils.toUtf8ByteBuffer("foo")) + ) + ); + + Assert.assertFalse( + NullHandling.mustCombineNullAndEmptyInDictionary( + new ListIndexed<>(StringUtils.toUtf8ByteBuffer("")) + ) + ); + + Assert.assertFalse( + NullHandling.mustCombineNullAndEmptyInDictionary( + new ListIndexed<>(StringUtils.toUtf8ByteBuffer(""), StringUtils.toUtf8ByteBuffer("foo")) + ) + ); + + Assert.assertEquals( + NullHandling.replaceWithDefault(), + NullHandling.mustCombineNullAndEmptyInDictionary( + new ListIndexed<>(null, StringUtils.toUtf8ByteBuffer("")) + ) + ); + + Assert.assertEquals( + NullHandling.replaceWithDefault(), + NullHandling.mustCombineNullAndEmptyInDictionary( + new ListIndexed<>(null, StringUtils.toUtf8ByteBuffer(""), StringUtils.toUtf8ByteBuffer("foo"))) + ); + } + + @Test + public void test_mustReplaceFirstValueWithNullInDictionary() + { + Assert.assertFalse( + NullHandling.mustReplaceFirstValueWithNullInDictionary( + new ListIndexed<>(Collections.singletonList(null)) + ) + ); + + Assert.assertFalse( + NullHandling.mustReplaceFirstValueWithNullInDictionary( + new ListIndexed<>(StringUtils.toUtf8ByteBuffer("foo")) + ) + ); + + Assert.assertEquals( + NullHandling.replaceWithDefault(), + NullHandling.mustReplaceFirstValueWithNullInDictionary( + new ListIndexed<>(StringUtils.toUtf8ByteBuffer("")) + ) + ); + + Assert.assertEquals( + NullHandling.replaceWithDefault(), + NullHandling.mustReplaceFirstValueWithNullInDictionary( + new ListIndexed<>(StringUtils.toUtf8ByteBuffer(""), StringUtils.toUtf8ByteBuffer("foo")) + ) + ); + + Assert.assertFalse( + NullHandling.mustReplaceFirstValueWithNullInDictionary( + new ListIndexed<>(null, StringUtils.toUtf8ByteBuffer("")) + ) + ); + + Assert.assertFalse( + NullHandling.mustReplaceFirstValueWithNullInDictionary( + new ListIndexed<>(null, StringUtils.toUtf8ByteBuffer(""), StringUtils.toUtf8ByteBuffer("foo"))) + ); + } } diff --git a/processing/src/test/java/org/apache/druid/segment/IndexBuilder.java b/processing/src/test/java/org/apache/druid/segment/IndexBuilder.java index 842e351fa583..8632a6ee01fe 100644 --- a/processing/src/test/java/org/apache/druid/segment/IndexBuilder.java +++ b/processing/src/test/java/org/apache/druid/segment/IndexBuilder.java @@ -116,6 +116,11 @@ public static IndexBuilder create(ObjectMapper jsonMapper, ColumnConfig columnCo return new IndexBuilder(jsonMapper, columnConfig); } + public IndexIO getIndexIO() + { + return indexIO; + } + public IndexBuilder schema(IncrementalIndexSchema schema) { this.schema = schema; @@ -198,12 +203,6 @@ public IndexBuilder rows(Iterable rows) return this; } - public IndexBuilder maxRows(int maxRows) - { - this.maxRows = maxRows; - return this; - } - public IndexBuilder intermediaryPersistSize(int rows) { this.intermediatePersistSize = rows; @@ -231,7 +230,7 @@ public IncrementalIndex buildIncrementalIndex() return buildIncrementalIndexWithRows(schema, maxRows, rows); } - public QueryableIndex buildMMappedIndex() + public File buildMMappedIndexFile() { Preconditions.checkNotNull(indexMerger, "indexMerger"); Preconditions.checkNotNull(tmpDir, "tmpDir"); @@ -255,16 +254,14 @@ public QueryableIndex buildMMappedIndex() // queryable index instead of the incremental index, which also mimics the behavior of real ingestion tasks // which persist incremental indexes as intermediate segments and then merges all the intermediate segments to // publish - return indexIO.loadIndex( - indexMerger.merge( - adapters, - schema.isRollup(), - schema.getMetrics(), - tmpDir, - schema.getDimensionsSpec(), - indexSpec, - Integer.MAX_VALUE - ) + return indexMerger.merge( + adapters, + schema.isRollup(), + schema.getMetrics(), + tmpDir, + schema.getDimensionsSpec(), + indexSpec, + Integer.MAX_VALUE ); } catch (IOException e) { @@ -272,6 +269,15 @@ public QueryableIndex buildMMappedIndex() } } + public QueryableIndex buildMMappedIndex() + { + try { + return indexIO.loadIndex(buildMMappedIndexFile()); + } + catch (IOException e) { + throw new RuntimeException(e); + } + } public QueryableIndex buildMMappedMergedIndex() { diff --git a/processing/src/test/java/org/apache/druid/segment/IndexMergerNullHandlingTest.java b/processing/src/test/java/org/apache/druid/segment/IndexMergerNullHandlingTest.java index 6568da52c4fb..9f0ee07d0928 100644 --- a/processing/src/test/java/org/apache/druid/segment/IndexMergerNullHandlingTest.java +++ b/processing/src/test/java/org/apache/druid/segment/IndexMergerNullHandlingTest.java @@ -140,7 +140,7 @@ public void testStringColumnNullHandling() throws Exception // Compute all unique values, the same way that IndexMerger is expected to do it. final Set uniqueValues = new HashSet<>(); for (Map m : subsetList) { - final List dValues = normalize(m.get("d"), hasMultipleValues); + final List dValues = normalize(m.get("d")); uniqueValues.addAll(dValues); if (nullFlavors.contains(m)) { @@ -167,7 +167,7 @@ public void testStringColumnNullHandling() throws Exception subsetList.toString(), ImmutableMultiset.copyOf( subsetList.stream() - .map(m -> normalize(m.get("d"), hasMultipleValues)) + .map(m -> normalize(m.get("d"))) .distinct() // Distinct values only, because we expect rollup. .collect(Collectors.toList()) ), @@ -224,7 +224,7 @@ public void testStringColumnNullHandling() throws Exception /** * Normalize an input value the same way that IndexMerger is expected to do it. */ - private static List normalize(final Object value, final boolean hasMultipleValues) + private static List normalize(final Object value) { final List retVal = new ArrayList<>(); @@ -237,7 +237,7 @@ private static List normalize(final Object value, final boolean hasMulti if (list.isEmpty()) { // empty lists become nulls in single valued columns // they sometimes also become nulls in multi-valued columns (see comments in getRow()) - retVal.add(NullHandling.emptyToNullIfNeeded(null)); + retVal.add(null); } else { retVal.addAll(list.stream().map(NullHandling::emptyToNullIfNeeded).collect(Collectors.toList())); } diff --git a/processing/src/test/java/org/apache/druid/segment/filter/BaseFilterTest.java b/processing/src/test/java/org/apache/druid/segment/filter/BaseFilterTest.java index 65c39b359bcc..3495d1cb6d9c 100644 --- a/processing/src/test/java/org/apache/druid/segment/filter/BaseFilterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/filter/BaseFilterTest.java @@ -104,6 +104,7 @@ import org.apache.druid.segment.writeout.TmpFileSegmentWriteOutMediumFactory; import org.apache.druid.testing.InitializedNullHandlingTest; import org.junit.Assert; +import org.junit.Assume; import org.junit.Before; import org.junit.Rule; import org.junit.rules.TemporaryFolder; @@ -111,6 +112,8 @@ import javax.annotation.Nullable; import java.io.Closeable; +import java.io.File; +import java.io.IOException; import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Arrays; @@ -146,11 +149,17 @@ public abstract class BaseFilterTest extends InitializedNullHandlingTest static final TimestampSpec DEFAULT_TIMESTAMP_SPEC = new TimestampSpec(TIMESTAMP_COLUMN, "iso", DateTimes.of("2000")); static final DimensionsSpec DEFAULT_DIM_SPEC = new DimensionsSpec( ImmutableList.builder() - .addAll(DimensionsSpec.getDefaultSchemas(ImmutableList.of("dim0", "dim1", "dim2", "dim3", "timeDim"))) - .add(new DoubleDimensionSchema("d0")) - .add(new FloatDimensionSchema("f0")) - .add(new LongDimensionSchema("l0")) - .build() + .addAll(DimensionsSpec.getDefaultSchemas(ImmutableList.of( + "dim0", + "dim1", + "dim2", + "dim3", + "timeDim" + ))) + .add(new DoubleDimensionSchema("d0")) + .add(new FloatDimensionSchema("f0")) + .add(new LongDimensionSchema("l0")) + .build() ); static final InputRowParser> DEFAULT_PARSER = new MapInputRowParser( @@ -350,32 +359,35 @@ public static Collection makeConstructors() .put( "mmappedAutoTypesMerged", input -> { - input.mapSchema( - schema -> - new IncrementalIndexSchema( - schema.getMinTimestamp(), - schema.getTimestampSpec(), - schema.getGran(), - schema.getVirtualColumns(), - schema.getDimensionsSpec().withDimensions( - schema.getDimensionsSpec() - .getDimensions() - .stream() - .map( - dimensionSchema -> new AutoTypeColumnSchema(dimensionSchema.getName()) - ) - .collect(Collectors.toList()) - ), - schema.getMetrics(), - schema.isRollup() + final QueryableIndex index = + input + .mapSchema( + schema -> + new IncrementalIndexSchema( + schema.getMinTimestamp(), + schema.getTimestampSpec(), + schema.getGran(), + schema.getVirtualColumns(), + schema.getDimensionsSpec().withDimensions( + schema.getDimensionsSpec() + .getDimensions() + .stream() + .map( + dimensionSchema -> new AutoTypeColumnSchema(dimensionSchema.getName()) + ) + .collect(Collectors.toList()) + ), + schema.getMetrics(), + schema.isRollup() + ) ) - ); - // if 1 row per segment some of the columns have null values for the row which causes 'auto' - // typing default value coercion to be lost in default value mode, so make sure there is at - // least one number in each segment for these tests to pass correctly because the column - // is typeless and so doesn't write out zeros like regular numbers do - input.intermediaryPersistSize(3); - final QueryableIndex index = input.buildMMappedMergedIndex(); + // if 1 row per segment some of the columns have null values for the row which causes 'auto' + // typing default value coercion to be lost in default value mode, so make sure there is at + // least one number in each segment for these tests to pass correctly because the column + // is typeless and so doesn't write out zeros like regular numbers do + .intermediaryPersistSize(3) + .buildMMappedIndex(); + return Pair.of(new QueryableIndexStorageAdapter(index), index); } ) @@ -393,15 +405,38 @@ public static Collection makeConstructors() return Pair.of(new QueryableIndexStorageAdapter(index), index); } ) + .put( + "mmappedWithSqlCompatibleNulls", + input -> { + // Build mmapped index in SQL-compatible null handling mode; read it in default-value mode. + Assume.assumeTrue(NullHandling.replaceWithDefault()); + final File file; + try { + NullHandling.initializeForTestsWithValues(false, null); + Assert.assertTrue(NullHandling.sqlCompatible()); + file = input.buildMMappedIndexFile(); + } + finally { + NullHandling.initializeForTests(); + } + + Assert.assertTrue(NullHandling.replaceWithDefault()); + try { + final QueryableIndex index = input.getIndexIO().loadIndex(file); + return Pair.of(new QueryableIndexStorageAdapter(index), index); + } + catch (IOException e) { + throw new RuntimeException(e); + } + } + ) .put( "rowBasedWithoutTypeSignature", - input -> Pair.of(input.buildRowBasedSegmentWithoutTypeSignature().asStorageAdapter(), () -> { - }) + input -> Pair.of(input.buildRowBasedSegmentWithoutTypeSignature().asStorageAdapter(), () -> {}) ) .put( "rowBasedWithTypeSignature", - input -> Pair.of(input.buildRowBasedSegmentWithTypeSignature().asStorageAdapter(), () -> { - }) + input -> Pair.of(input.buildRowBasedSegmentWithTypeSignature().asStorageAdapter(), () -> {}) ) .put("frame (row-based)", input -> { final FrameSegment segment = input.buildFrameSegment(FrameType.ROW_BASED); @@ -811,7 +846,6 @@ private List selectColumnValuesMatchingFilterUsingVectorVirtualColumnCur } - return values; } } diff --git a/processing/src/test/java/org/apache/druid/segment/nested/ScalarStringColumnSupplierTest.java b/processing/src/test/java/org/apache/druid/segment/nested/ScalarStringColumnSupplierTest.java index 954e4998535e..5b86747aa2ae 100644 --- a/processing/src/test/java/org/apache/druid/segment/nested/ScalarStringColumnSupplierTest.java +++ b/processing/src/test/java/org/apache/druid/segment/nested/ScalarStringColumnSupplierTest.java @@ -45,6 +45,7 @@ import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.DruidPredicateIndex; import org.apache.druid.segment.column.NullValueIndex; +import org.apache.druid.segment.column.StringUtf8DictionaryEncodedColumn; import org.apache.druid.segment.column.StringValueSetIndex; import org.apache.druid.segment.data.BitmapSerdeFactory; import org.apache.druid.segment.data.RoaringBitmapSerdeFactory; @@ -192,7 +193,7 @@ public void testBasicFunctionality() throws IOException bob, NestedFieldColumnIndexSupplierTest.ALWAYS_USE_INDEXES ); - try (ScalarStringDictionaryEncodedColumn column = (ScalarStringDictionaryEncodedColumn) supplier.get()) { + try (StringUtf8DictionaryEncodedColumn column = (StringUtf8DictionaryEncodedColumn) supplier.get()) { smokeTest(supplier, column); } } @@ -225,7 +226,7 @@ public void testConcurrency() throws ExecutionException, InterruptedException try { threadsStartLatch.await(); for (int iter = 0; iter < 5000; iter++) { - try (ScalarStringDictionaryEncodedColumn column = (ScalarStringDictionaryEncodedColumn) supplier.get()) { + try (StringUtf8DictionaryEncodedColumn column = (StringUtf8DictionaryEncodedColumn) supplier.get()) { smokeTest(supplier, column); } } @@ -241,7 +242,7 @@ public void testConcurrency() throws ExecutionException, InterruptedException Assert.assertEquals(expectedReason, failureReason.get()); } - private void smokeTest(ScalarStringColumnAndIndexSupplier supplier, ScalarStringDictionaryEncodedColumn column) + private void smokeTest(ScalarStringColumnAndIndexSupplier supplier, StringUtf8DictionaryEncodedColumn column) { SimpleAscendingOffset offset = new SimpleAscendingOffset(data.size()); ColumnValueSelector valueSelector = column.makeColumnValueSelector(offset); diff --git a/processing/src/test/java/org/apache/druid/segment/serde/CombineFirstTwoEntriesIndexedTest.java b/processing/src/test/java/org/apache/druid/segment/serde/CombineFirstTwoEntriesIndexedTest.java new file mode 100644 index 000000000000..e5dd68978a3d --- /dev/null +++ b/processing/src/test/java/org/apache/druid/segment/serde/CombineFirstTwoEntriesIndexedTest.java @@ -0,0 +1,162 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.serde; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Lists; +import org.apache.druid.segment.data.GenericIndexed; +import org.apache.druid.segment.data.Indexed; +import org.apache.druid.segment.data.ListIndexed; +import org.apache.druid.testing.InitializedNullHandlingTest; +import org.hamcrest.CoreMatchers; +import org.hamcrest.MatcherAssert; +import org.junit.Assert; +import org.junit.Test; +import org.junit.internal.matchers.ThrowableMessageMatcher; + +import javax.annotation.Nullable; +import java.util.Collections; + +/** + * Test for {@link CombineFirstTwoEntriesIndexed}. + */ +public class CombineFirstTwoEntriesIndexedTest extends InitializedNullHandlingTest +{ + @Test + public void testSizeZero() + { + final IllegalStateException e = Assert.assertThrows( + IllegalStateException.class, + () -> wrap(Indexed.empty(), "xyz") + ); + + MatcherAssert.assertThat( + e, + ThrowableMessageMatcher.hasMessage(CoreMatchers.containsString("Size[0] must be >= 2")) + ); + } + + @Test + public void testSizeOne() + { + final IllegalStateException e = Assert.assertThrows( + IllegalStateException.class, + () -> wrap(new ListIndexed<>("foo"), "xyz") + ); + + MatcherAssert.assertThat( + e, + ThrowableMessageMatcher.hasMessage(CoreMatchers.containsString("Size[1] must be >= 2")) + ); + } + + @Test + public void testSizeTwo() + { + final CombineFirstTwoEntriesIndexed indexed = wrap(new ListIndexed<>("bar", "foo"), "xyz"); + Assert.assertEquals(0, indexed.indexOf("xyz")); + Assert.assertEquals(-2, indexed.indexOf("foo")); + Assert.assertEquals(-2, indexed.indexOf("bar")); + Assert.assertEquals(-2, indexed.indexOf("baz")); + Assert.assertEquals(-2, indexed.indexOf("qux")); + Assert.assertEquals(-2, indexed.indexOf("")); + Assert.assertEquals(-2, indexed.indexOf(null)); + Assert.assertEquals(1, indexed.size()); + Assert.assertEquals("xyz", indexed.get(0)); + Assert.assertFalse(indexed.isSorted()); // Matches delegate. See class-level note in CombineFirstTwoEntriesIndexed. + Assert.assertEquals(ImmutableList.of("xyz"), ImmutableList.copyOf(indexed)); + } + + @Test + public void testSizeThree() + { + final CombineFirstTwoEntriesIndexed indexed = wrap(new ListIndexed<>("bar", "baz", "foo"), "xyz"); + Assert.assertEquals(0, indexed.indexOf("xyz")); + Assert.assertEquals(1, indexed.indexOf("foo")); + Assert.assertEquals(-2, indexed.indexOf("bar")); + Assert.assertEquals(-2, indexed.indexOf("baz")); + Assert.assertEquals(-2, indexed.indexOf("qux")); + Assert.assertEquals(-2, indexed.indexOf("")); + Assert.assertEquals(-2, indexed.indexOf(null)); + Assert.assertEquals("xyz", indexed.get(0)); + Assert.assertEquals("foo", indexed.get(1)); + Assert.assertFalse(indexed.isSorted()); // Matches delegate. See class-level note in CombineFirstTwoEntriesIndexed. + Assert.assertEquals(ImmutableList.of("xyz", "foo"), ImmutableList.copyOf(indexed)); + } + + @Test + public void testSizeTwoSorted() + { + final CombineFirstTwoEntriesIndexed indexed = wrap( + GenericIndexed.fromArray( + new String[]{"bar", "foo"}, + GenericIndexed.STRING_STRATEGY + ), + null + ); + + Assert.assertEquals(0, indexed.indexOf(null)); + Assert.assertEquals(-2, indexed.indexOf("foo")); + Assert.assertEquals(-2, indexed.indexOf("bar")); + Assert.assertEquals(-2, indexed.indexOf("baz")); + Assert.assertEquals(-2, indexed.indexOf("qux")); + Assert.assertEquals(-2, indexed.indexOf("")); + Assert.assertEquals(1, indexed.size()); + Assert.assertNull(indexed.get(0)); + Assert.assertTrue(indexed.isSorted()); // Matches delegate. See class-level note in CombineFirstTwoEntriesIndexed. + Assert.assertEquals(Collections.singletonList(null), Lists.newArrayList(indexed)); + } + + @Test + public void testSizeThreeSorted() + { + final CombineFirstTwoEntriesIndexed indexed = wrap( + GenericIndexed.fromArray( + new String[]{"bar", "baz", "foo"}, + GenericIndexed.STRING_STRATEGY + ), + null + ); + + Assert.assertEquals(0, indexed.indexOf(null)); + Assert.assertEquals(1, indexed.indexOf("foo")); + Assert.assertEquals(-2, indexed.indexOf("bar")); + Assert.assertEquals(-2, indexed.indexOf("baz")); + Assert.assertEquals(-3, indexed.indexOf("qux")); + Assert.assertEquals(-2, indexed.indexOf("")); + Assert.assertEquals(2, indexed.size()); + Assert.assertNull(indexed.get(0)); + Assert.assertEquals("foo", indexed.get(1)); + Assert.assertTrue(indexed.isSorted()); // Matches delegate. See class-level note in CombineFirstTwoEntriesIndexed. + Assert.assertEquals(Lists.newArrayList(null, "foo"), Lists.newArrayList(indexed)); + } + + private CombineFirstTwoEntriesIndexed wrap(final Indexed indexed, @Nullable final T newFirstValue) + { + return new CombineFirstTwoEntriesIndexed(indexed) + { + @Override + protected T newFirstValue() + { + return newFirstValue; + } + }; + } +} diff --git a/processing/src/test/java/org/apache/druid/segment/serde/CombineFirstTwoValuesColumnarIntsTest.java b/processing/src/test/java/org/apache/druid/segment/serde/CombineFirstTwoValuesColumnarIntsTest.java new file mode 100644 index 000000000000..fee6a8395011 --- /dev/null +++ b/processing/src/test/java/org/apache/druid/segment/serde/CombineFirstTwoValuesColumnarIntsTest.java @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.serde; + +import org.apache.druid.segment.data.ArrayBasedIndexedInts; +import org.junit.Test; + +/** + * Test for {@link CombineFirstTwoValuesColumnarInts}. + */ +public class CombineFirstTwoValuesColumnarIntsTest +{ + @Test + public void testCombineFirstTwoValues() + { + // (expectedCombined, original) + assertCombine(new int[]{0, 1, 2}, new int[]{1, 2, 3}); + assertCombine(new int[]{0, 0, 1, 2}, new int[]{0, 1, 2, 3}); + assertCombine(new int[]{2, 0, 1, 0, 4, 0}, new int[]{3, 0, 2, 1, 5, 0}); + } + + private static void assertCombine(final int[] expectedCombined, final int[] original) + { + CombineFirstTwoValuesIndexedIntsTest.assertCombine( + expectedCombined, + original, + arr -> new CombineFirstTwoValuesIndexedInts(new ArrayBasedIndexedInts(arr)) + ); + } +} diff --git a/processing/src/test/java/org/apache/druid/segment/serde/CombineFirstTwoValuesColumnarMultiIntsTest.java b/processing/src/test/java/org/apache/druid/segment/serde/CombineFirstTwoValuesColumnarMultiIntsTest.java new file mode 100644 index 000000000000..de8edf171bff --- /dev/null +++ b/processing/src/test/java/org/apache/druid/segment/serde/CombineFirstTwoValuesColumnarMultiIntsTest.java @@ -0,0 +1,119 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.serde; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Lists; +import org.apache.druid.segment.data.ColumnarMultiInts; +import org.apache.druid.segment.data.IndexedInts; +import org.apache.druid.segment.data.VSizeColumnarInts; +import org.apache.druid.segment.data.VSizeColumnarMultiInts; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import java.util.List; + +/** + * Test for {@link CombineFirstTwoValuesColumnarMultiInts}. + */ +public class CombineFirstTwoValuesColumnarMultiIntsTest +{ + private ColumnarMultiInts original; + private CombineFirstTwoValuesColumnarMultiInts combined; + + @Before + public void setUp() + { + original = VSizeColumnarMultiInts.fromIterable( + ImmutableList.of( + VSizeColumnarInts.fromArray(new int[]{1, 2, 3}), + VSizeColumnarInts.fromArray(new int[]{0, 1, 2, 3}), + VSizeColumnarInts.fromArray(new int[]{3, 0, 2, 1, 5, 0}) + ) + ); + + combined = new CombineFirstTwoValuesColumnarMultiInts(original); + } + + @Test + public void testSize() + { + Assert.assertEquals(original.size(), combined.size()); + } + + @Test + public void testGet() + { + assertEquals(new int[]{0, 1, 2}, combined.get(0)); + assertEquals(new int[]{0, 0, 1, 2}, combined.get(1)); + assertEquals(new int[]{2, 0, 1, 0, 4, 0}, combined.get(2)); + + // "get" reuses a holder + Assert.assertSame(combined.get(1), combined.get(0)); + } + + @Test + public void testGetUnshared() + { + assertEquals(new int[]{0, 1, 2}, combined.getUnshared(0)); + assertEquals(new int[]{0, 0, 1, 2}, combined.getUnshared(1)); + assertEquals(new int[]{2, 0, 1, 0, 4, 0}, combined.getUnshared(2)); + + // Unlike "get", "getUnshared" does not reuse a holder + Assert.assertNotSame(combined.getUnshared(1), combined.getUnshared(0)); + } + + @Test + public void testIndexOf() + { + Assert.assertThrows( + UnsupportedOperationException.class, + () -> combined.indexOf(original.get(0)) + ); + } + + @Test + public void testIsSorted() + { + Assert.assertFalse(combined.isSorted()); + } + + @Test + public void testIterator() + { + final List fromIterator = Lists.newArrayList(combined.iterator()); + Assert.assertEquals(3, fromIterator.size()); + assertEquals(new int[]{0, 1, 2}, fromIterator.get(0)); + assertEquals(new int[]{0, 0, 1, 2}, fromIterator.get(1)); + assertEquals(new int[]{2, 0, 1, 0, 4, 0}, fromIterator.get(2)); + } + + public void assertEquals(final int[] expected, final IndexedInts actual) + { + final int sz = actual.size(); + final int[] actualArray = new int[sz]; + for (int i = 0; i < sz; i++) { + actualArray[i] = actual.get(i); + } + + Assert.assertArrayEquals(expected, actualArray); + } +} diff --git a/processing/src/test/java/org/apache/druid/segment/serde/CombineFirstTwoValuesIndexedIntsTest.java b/processing/src/test/java/org/apache/druid/segment/serde/CombineFirstTwoValuesIndexedIntsTest.java new file mode 100644 index 000000000000..26a444735993 --- /dev/null +++ b/processing/src/test/java/org/apache/druid/segment/serde/CombineFirstTwoValuesIndexedIntsTest.java @@ -0,0 +1,101 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.serde; + +import it.unimi.dsi.fastutil.ints.IntArrays; +import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.segment.data.ArrayBasedIndexedInts; +import org.apache.druid.segment.data.IndexedInts; +import org.junit.Assert; +import org.junit.Test; + +import java.util.Arrays; +import java.util.function.Function; + +/** + * Test for {@link CombineFirstTwoValuesIndexedInts}. + */ +public class CombineFirstTwoValuesIndexedIntsTest +{ + @Test + public void testCombineFirstTwoValues() + { + // (expectedCombined, original) + assertCombine(new int[]{0, 1, 2}, new int[]{1, 2, 3}); + assertCombine(new int[]{0, 0, 1, 2}, new int[]{0, 1, 2, 3}); + assertCombine(new int[]{2, 0, 1, 0, 4, 0}, new int[]{3, 0, 2, 1, 5, 0}); + } + + private static void assertCombine(final int[] expectedCombined, final int[] original) + { + assertCombine( + expectedCombined, + original, + arr -> new CombineFirstTwoValuesIndexedInts(new ArrayBasedIndexedInts(arr)) + ); + } + + static void assertCombine( + final int[] expectedCombined, + final int[] original, + final Function combineFn + ) + { + final IndexedInts combined = combineFn.apply(original); + + // Check size. + Assert.assertEquals( + StringUtils.format("%s (size)", Arrays.toString(original)), + expectedCombined.length, + combined.size() + ); + + // Check regular get. + final int[] arr = new int[expectedCombined.length]; + for (int i = 0; i < expectedCombined.length; i++) { + arr[i] = combined.get(i); + } + Assert.assertArrayEquals(StringUtils.format("%s (get)", Arrays.toString(original)), expectedCombined, arr); + + // Check contiguous vector get. + Arrays.fill(arr, Integer.MIN_VALUE); + combined.get(arr, 0, arr.length); + Assert.assertArrayEquals( + StringUtils.format("%s (contiguous vector get)", Arrays.toString(original)), + expectedCombined, + arr + ); + + // Check noncontiguous vector get. + final int[] indexes = new int[expectedCombined.length]; + for (int i = 0; i < expectedCombined.length; i++) { + indexes[indexes.length - 1 - i] = i; // Fetch backwards. + } + + Arrays.fill(arr, Integer.MIN_VALUE); + combined.get(arr, indexes, arr.length); + final int[] expectedCombinedReversed = IntArrays.reverse(IntArrays.copy(expectedCombined)); + Assert.assertArrayEquals( + StringUtils.format("%s (noncontiguous vector get, reversed)", Arrays.toString(original)), + expectedCombinedReversed, + arr + ); + } +} diff --git a/processing/src/test/java/org/apache/druid/segment/serde/ReplaceFirstValueWithNullIndexedTest.java b/processing/src/test/java/org/apache/druid/segment/serde/ReplaceFirstValueWithNullIndexedTest.java new file mode 100644 index 000000000000..cc01ac6589dc --- /dev/null +++ b/processing/src/test/java/org/apache/druid/segment/serde/ReplaceFirstValueWithNullIndexedTest.java @@ -0,0 +1,137 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.serde; + +import com.google.common.collect.Lists; +import org.apache.druid.segment.data.GenericIndexed; +import org.apache.druid.segment.data.Indexed; +import org.apache.druid.segment.data.ListIndexed; +import org.apache.druid.testing.InitializedNullHandlingTest; +import org.hamcrest.CoreMatchers; +import org.hamcrest.MatcherAssert; +import org.junit.Assert; +import org.junit.Test; +import org.junit.internal.matchers.ThrowableMessageMatcher; + +import java.util.Collections; + +/** + * Test for {@link ReplaceFirstValueWithNullIndexed}. + */ +public class ReplaceFirstValueWithNullIndexedTest extends InitializedNullHandlingTest +{ + @Test + public void testSizeZero() + { + final IllegalStateException e = Assert.assertThrows( + IllegalStateException.class, + () -> new ReplaceFirstValueWithNullIndexed<>(Indexed.empty()) + ); + + MatcherAssert.assertThat( + e, + ThrowableMessageMatcher.hasMessage(CoreMatchers.containsString("Size[0] must be >= 1")) + ); + } + + @Test + public void testSizeOne() + { + final ReplaceFirstValueWithNullIndexed indexed = + new ReplaceFirstValueWithNullIndexed<>(new ListIndexed<>("bar")); + + Assert.assertEquals(0, indexed.indexOf(null)); + Assert.assertEquals(-2, indexed.indexOf("")); + Assert.assertEquals(-2, indexed.indexOf("foo")); + Assert.assertEquals(-2, indexed.indexOf("bar")); + Assert.assertEquals(-2, indexed.indexOf("baz")); + Assert.assertEquals(-2, indexed.indexOf("qux")); + Assert.assertEquals(1, indexed.size()); + Assert.assertNull(indexed.get(0)); + Assert.assertFalse(indexed.isSorted()); // Matches delegate. See class doc for ReplaceFirstValueWithNullIndexed. + Assert.assertEquals(Collections.singletonList(null), Lists.newArrayList(indexed)); + } + + @Test + public void testSizeTwo() + { + final ReplaceFirstValueWithNullIndexed indexed = + new ReplaceFirstValueWithNullIndexed<>(new ListIndexed<>("bar", "foo")); + + Assert.assertEquals(0, indexed.indexOf(null)); + Assert.assertEquals(1, indexed.indexOf("foo")); + Assert.assertEquals(-2, indexed.indexOf("")); + Assert.assertEquals(-2, indexed.indexOf("bar")); + Assert.assertEquals(-2, indexed.indexOf("baz")); + Assert.assertEquals(-2, indexed.indexOf("qux")); + Assert.assertEquals(2, indexed.size()); + Assert.assertNull(indexed.get(0)); + Assert.assertEquals("foo", indexed.get(1)); + Assert.assertFalse(indexed.isSorted()); // Matches delegate. See class doc for ReplaceFirstValueWithNullIndexed. + Assert.assertEquals(Lists.newArrayList(null, "foo"), Lists.newArrayList(indexed)); + } + + @Test + public void testSizeOneSorted() + { + final ReplaceFirstValueWithNullIndexed indexed = + new ReplaceFirstValueWithNullIndexed<>( + GenericIndexed.fromArray( + new String[]{"bar"}, + GenericIndexed.STRING_STRATEGY + ) + ); + + Assert.assertEquals(0, indexed.indexOf(null)); + Assert.assertEquals(-2, indexed.indexOf("")); + Assert.assertEquals(-2, indexed.indexOf("foo")); + Assert.assertEquals(-2, indexed.indexOf("bar")); + Assert.assertEquals(-2, indexed.indexOf("baz")); + Assert.assertEquals(-2, indexed.indexOf("qux")); + Assert.assertEquals(1, indexed.size()); + Assert.assertNull(indexed.get(0)); + Assert.assertTrue(indexed.isSorted()); // Matches delegate. See class doc for ReplaceFirstValueWithNullIndexed. + Assert.assertEquals(Collections.singletonList(null), Lists.newArrayList(indexed)); + } + + @Test + public void testSizeTwoSorted() + { + final ReplaceFirstValueWithNullIndexed indexed = + new ReplaceFirstValueWithNullIndexed<>( + GenericIndexed.fromArray( + new String[]{"bar", "foo"}, + GenericIndexed.STRING_STRATEGY + ) + ); + + Assert.assertEquals(0, indexed.indexOf(null)); + Assert.assertEquals(1, indexed.indexOf("foo")); + Assert.assertEquals(-2, indexed.indexOf("")); + Assert.assertEquals(-2, indexed.indexOf("bar")); + Assert.assertEquals(-2, indexed.indexOf("baz")); + Assert.assertEquals(-3, indexed.indexOf("qux")); + Assert.assertEquals(2, indexed.size()); + Assert.assertNull(indexed.get(0)); + Assert.assertEquals("foo", indexed.get(1)); + Assert.assertTrue(indexed.isSorted()); // Matches delegate. See class doc for ReplaceFirstValueWithNullIndexed. + Assert.assertEquals(Lists.newArrayList(null, "foo"), Lists.newArrayList(indexed)); + } +}