From 247b06afb5b3d6b237b46e205c11d4693b4db18f Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Fri, 21 Apr 2023 13:33:52 -0700 Subject: [PATCH 1/9] Properly read SQL-compatible segments in default-value mode. Main changes: 1) Dictionary-encoded and front-coded string columns: in default-value mode, detect cases where a dictionary has the empty string in it, then either combine it with null (if null is present) or replace it with null (if null is not present). 2) Numeric nullable columns: in default-value mode, ignore the null value bitmap. This causes all null numbers to be read as zeroes. Testing strategy: 1) Add a mmappedWithSqlCompatibleNulls case to BaseFilterTest that writes segments under SQL-compatible mode, and reads them under default-value mode. 2) Unit tests for the new wrapper classes (CombineFirstTwoEntriesIndexed, CombineFirstTwoValuesColumnarInts, CombineFirstTwoValuesColumnarMultiInts, CombineFirstTwoValuesIndexedInts). --- .../druid/common/config/NullHandling.java | 53 ++++++ .../column/StringDictionaryEncodedColumn.java | 20 +- ...ringFrontCodedDictionaryEncodedColumn.java | 4 +- .../druid/segment/data/CachingIndexed.java | 14 +- .../druid/segment/data/GenericIndexed.java | 13 -- .../serde/CombineFirstTwoEntriesIndexed.java | 175 ++++++++++++++++++ .../CombineFirstTwoValuesColumnarInts.java | 50 +++++ ...ombineFirstTwoValuesColumnarMultiInts.java | 101 ++++++++++ .../CombineFirstTwoValuesIndexedInts.java | 94 ++++++++++ .../DictionaryEncodedColumnSupplier.java | 43 ++++- .../DictionaryEncodedStringIndexSupplier.java | 28 ++- .../serde/DoubleNumericColumnPartSerdeV2.java | 3 +- .../serde/FloatNumericColumnPartSerdeV2.java | 3 +- .../serde/LongNumericColumnPartSerdeV2.java | 3 +- .../ReplaceFirstValueWithNullIndexed.java | 122 ++++++++++++ .../StringFrontCodedColumnIndexSupplier.java | 31 ++-- ...tCodedDictionaryEncodedColumnSupplier.java | 27 ++- .../apache/druid/segment/IndexBuilder.java | 47 ++--- .../segment/IndexMergerNullHandlingTest.java | 8 +- .../druid/segment/filter/BaseFilterTest.java | 28 +++ 20 files changed, 779 insertions(+), 88 deletions(-) create mode 100644 processing/src/main/java/org/apache/druid/segment/serde/CombineFirstTwoEntriesIndexed.java create mode 100644 processing/src/main/java/org/apache/druid/segment/serde/CombineFirstTwoValuesColumnarInts.java create mode 100644 processing/src/main/java/org/apache/druid/segment/serde/CombineFirstTwoValuesColumnarMultiInts.java create mode 100644 processing/src/main/java/org/apache/druid/segment/serde/CombineFirstTwoValuesIndexedInts.java create mode 100644 processing/src/main/java/org/apache/druid/segment/serde/ReplaceFirstValueWithNullIndexed.java diff --git a/processing/src/main/java/org/apache/druid/common/config/NullHandling.java b/processing/src/main/java/org/apache/druid/common/config/NullHandling.java index f7d31469c318..40dc3d5508b0 100644 --- a/processing/src/main/java/org/apache/druid/common/config/NullHandling.java +++ b/processing/src/main/java/org/apache/druid/common/config/NullHandling.java @@ -22,8 +22,10 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Strings; import com.google.inject.Inject; +import org.apache.druid.segment.data.Indexed; import javax.annotation.Nullable; +import java.nio.ByteBuffer; /** * Helper class for NullHandling. This class is used to switch between SQL compatible Null Handling behavior @@ -112,6 +114,16 @@ public static String emptyToNullIfNeeded(@Nullable String value) //CHECKSTYLE.ON: Regexp } + /** + * Returns null if passed an empty ByteBuffers ({@link ByteBuffer#remaining()} == 0) in default-value mode. + * Otherwise, returns the original ByteBuffer. + */ + @Nullable + public static ByteBuffer emptyToNullIfNeeded(@Nullable ByteBuffer buffer) + { + return replaceWithDefault() && buffer != null && buffer.remaining() == 0 ? null : buffer; + } + @Nullable public static String defaultStringValue() { @@ -163,4 +175,45 @@ public static boolean isNullOrEquivalent(@Nullable String value) { return replaceWithDefault() ? Strings.isNullOrEmpty(value) : value == null; } + + public static boolean isNullOrEquivalent(@Nullable ByteBuffer buffer) + { + return buffer == null || (replaceWithDefault() && buffer.remaining() == 0); + } + + /** + * Given a UTF-8 dictionary, returns whether the first two entries must be coalesced into a single null entry. + * This happens if we are in default-value mode and the first two entries are null and empty string. + * + * This and {@link #mustReplaceFirstValueWithNull(Indexed)} are never both true. + * + * Provided to enable compatibility for segments written under {@link #sqlCompatible()} mode but + * read under {@link #replaceWithDefault()} mode. + */ + public static boolean mustCombineNullAndEmpty(final Indexed dictionaryUtf8) + { + return NullHandling.replaceWithDefault() + && dictionaryUtf8.size() >= 2 + && isNullOrEquivalent(dictionaryUtf8.get(0)) + && isNullOrEquivalent(dictionaryUtf8.get(1)); + } + + /** + * Given a UTF-8 dictionary, returns whether the first entry must be replaced with null. This happens if we + * are in default-value mode and the first entry is an empty string. (Default-value mode expects it to be null.) + * + * This and {@link #mustCombineNullAndEmpty(Indexed)} are never both true. + * + * Provided to enable compatibility for segments written under {@link #sqlCompatible()} mode but + * read under {@link #replaceWithDefault()} mode. + */ + public static boolean mustReplaceFirstValueWithNull(final Indexed dictionaryUtf8) + { + if (NullHandling.replaceWithDefault() && dictionaryUtf8.size() >= 1) { + final ByteBuffer firstValue = dictionaryUtf8.get(0); + return firstValue != null && firstValue.remaining() == 0; + } + + return false; + } } diff --git a/processing/src/main/java/org/apache/druid/segment/column/StringDictionaryEncodedColumn.java b/processing/src/main/java/org/apache/druid/segment/column/StringDictionaryEncodedColumn.java index 7af2273f509f..3518a39615b1 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/StringDictionaryEncodedColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/column/StringDictionaryEncodedColumn.java @@ -27,7 +27,6 @@ import org.apache.druid.segment.AbstractDimensionSelector; import org.apache.druid.segment.DimensionSelectorUtils; import org.apache.druid.segment.IdLookup; -import org.apache.druid.segment.data.CachingIndexed; import org.apache.druid.segment.data.ColumnarInts; import org.apache.druid.segment.data.ColumnarMultiInts; import org.apache.druid.segment.data.Indexed; @@ -45,6 +44,7 @@ import org.apache.druid.utils.CloseableUtils; import javax.annotation.Nullable; +import java.io.Closeable; import java.io.IOException; import java.nio.ByteBuffer; import java.util.ArrayList; @@ -60,19 +60,19 @@ public class StringDictionaryEncodedColumn implements DictionaryEncodedColumn cachedDictionary; + private final Indexed dictionary; private final Indexed dictionaryUtf8; public StringDictionaryEncodedColumn( @Nullable ColumnarInts singleValueColumn, @Nullable ColumnarMultiInts multiValueColumn, - CachingIndexed dictionary, + Indexed dictionary, Indexed dictionaryUtf8 ) { this.column = singleValueColumn; this.multiValueColumn = multiValueColumn; - this.cachedDictionary = dictionary; + this.dictionary = dictionary; this.dictionaryUtf8 = dictionaryUtf8; } @@ -104,7 +104,7 @@ public IndexedInts getMultiValueRow(int rowNum) @Nullable public String lookupName(int id) { - return cachedDictionary.get(id); + return dictionary.get(id); } @@ -130,13 +130,13 @@ public ByteBuffer lookupNameUtf8(int id) @Override public int lookupId(String name) { - return cachedDictionary.indexOf(name); + return dictionary.indexOf(name); } @Override public int getCardinality() { - return cachedDictionary.size(); + return dictionary.size(); } @Override @@ -495,7 +495,11 @@ public String lookupName(int id) @Override public void close() throws IOException { - CloseableUtils.closeAll(cachedDictionary, column, multiValueColumn); + CloseableUtils.closeAll( + dictionary instanceof Closeable ? (Closeable) dictionary : null /* Dictionary may be CachingIndexed */, + column, + multiValueColumn + ); } /** diff --git a/processing/src/main/java/org/apache/druid/segment/column/StringFrontCodedDictionaryEncodedColumn.java b/processing/src/main/java/org/apache/druid/segment/column/StringFrontCodedDictionaryEncodedColumn.java index c447e9b63651..af1a5dd6888c 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/StringFrontCodedDictionaryEncodedColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/column/StringFrontCodedDictionaryEncodedColumn.java @@ -65,12 +65,12 @@ public class StringFrontCodedDictionaryEncodedColumn implements DictionaryEncode private final ColumnarInts column; @Nullable private final ColumnarMultiInts multiValueColumn; - private final FrontCodedIndexed utf8Dictionary; + private final Indexed utf8Dictionary; public StringFrontCodedDictionaryEncodedColumn( @Nullable ColumnarInts singleValueColumn, @Nullable ColumnarMultiInts multiValueColumn, - FrontCodedIndexed utf8Dictionary + Indexed utf8Dictionary ) { this.column = singleValueColumn; diff --git a/processing/src/main/java/org/apache/druid/segment/data/CachingIndexed.java b/processing/src/main/java/org/apache/druid/segment/data/CachingIndexed.java index 4c4823550a71..6632eaa95e64 100644 --- a/processing/src/main/java/org/apache/druid/segment/data/CachingIndexed.java +++ b/processing/src/main/java/org/apache/druid/segment/data/CachingIndexed.java @@ -27,6 +27,7 @@ import java.util.Iterator; import java.util.LinkedHashMap; import java.util.Map; +import java.util.function.ToIntFunction; public class CachingIndexed implements CloseableIndexed { @@ -34,7 +35,8 @@ public class CachingIndexed implements CloseableIndexed private static final Logger log = new Logger(CachingIndexed.class); - private final GenericIndexed.BufferIndexed delegate; + private final Indexed delegate; + private final ToIntFunction sizeFn; @Nullable private final SizedLRUMap cachedValues; @@ -44,12 +46,14 @@ public class CachingIndexed implements CloseableIndexed * CachingIndexed objects are not thread safe and should only be used by a single thread at a time. * CachingIndexed objects must be closed to release any underlying cache resources. * - * @param delegate the GenericIndexed to wrap with a lookup cache. + * @param delegate the Indexed to wrap with a lookup cache. + * @param sizeFn function that determines the size in bytes of an object * @param lookupCacheSize maximum size in bytes of the lookup cache if greater than zero */ - public CachingIndexed(GenericIndexed delegate, final int lookupCacheSize) + public CachingIndexed(Indexed delegate, final ToIntFunction sizeFn, final int lookupCacheSize) { - this.delegate = delegate.singleThreaded(); + this.delegate = delegate; + this.sizeFn = sizeFn; if (lookupCacheSize > 0) { log.debug("Allocating column cache of max size[%d]", lookupCacheSize); @@ -75,7 +79,7 @@ public T get(int index) } final T value = delegate.get(index); - cachedValues.put(index, value, delegate.getLastValueSize()); + cachedValues.put(index, value, sizeFn.applyAsInt(value)); return value; } else { return delegate.get(index); diff --git a/processing/src/main/java/org/apache/druid/segment/data/GenericIndexed.java b/processing/src/main/java/org/apache/druid/segment/data/GenericIndexed.java index 62f50b0dc609..532907e75b52 100644 --- a/processing/src/main/java/org/apache/druid/segment/data/GenericIndexed.java +++ b/processing/src/main/java/org/apache/druid/segment/data/GenericIndexed.java @@ -474,8 +474,6 @@ public String toString() */ public abstract class BufferIndexed implements Indexed { - int lastReadSize; - @Override public int size() { @@ -507,7 +505,6 @@ ByteBuffer bufferedIndexedGetByteBuffer(ByteBuffer copyValueBuffer, int startOff || copyValueBuffer.get(startOffset - Integer.BYTES) == NULL_VALUE_SIZE_MARKER)) { return null; } - lastReadSize = size; // ObjectStrategy.fromByteBuffer() is allowed to reset the limit of the buffer. So if the limit is changed, // position() call could throw an exception, if the position is set beyond the new limit. Calling limit() @@ -526,16 +523,6 @@ ByteBuffer bufferedIndexedGetByteBuffer(ByteBuffer copyValueBuffer, int startOff @Nullable protected abstract ByteBuffer getByteBuffer(int index); - /** - * This method makes no guarantees with respect to thread safety - * - * @return the size in bytes of the last value read - */ - int getLastValueSize() - { - return lastReadSize; - } - @Override public int indexOf(@Nullable T value) { diff --git a/processing/src/main/java/org/apache/druid/segment/serde/CombineFirstTwoEntriesIndexed.java b/processing/src/main/java/org/apache/druid/segment/serde/CombineFirstTwoEntriesIndexed.java new file mode 100644 index 000000000000..7661f9f2094f --- /dev/null +++ b/processing/src/main/java/org/apache/druid/segment/serde/CombineFirstTwoEntriesIndexed.java @@ -0,0 +1,175 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.serde; + +import com.google.common.collect.ImmutableList; +import org.apache.druid.collections.bitmap.BitmapFactory; +import org.apache.druid.collections.bitmap.ImmutableBitmap; +import org.apache.druid.common.config.NullHandling; +import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; +import org.apache.druid.segment.data.Indexed; + +import javax.annotation.Nullable; +import java.util.Iterator; +import java.util.Objects; + +/** + * An {@link Indexed} that delegates to an underyling instance, but combines the first two entries. + * + * Unlike {@link CombineFirstTwoValuesIndexedInts}, this class combines the first two *entries*. + * So [0, 1, 2] becomes [(something), 2]. The first two entries, 0 and 1, were replaced with (something). That something + * is given by {@link #newFirstValue()}. + * + * Provided to enable compatibility for segments written under {@link NullHandling#sqlCompatible()} mode but + * read under {@link NullHandling#replaceWithDefault()} mode. + * + * @see NullHandling#mustCombineNullAndEmpty(Indexed) + */ +public abstract class CombineFirstTwoEntriesIndexed implements Indexed +{ + private static final int FIRST_ID = 0; + + protected final Indexed delegate; + + protected CombineFirstTwoEntriesIndexed(Indexed delegate) + { + this.delegate = delegate; + } + + /** + * Combine the first two values into a literal null. + */ + public static CombineFirstTwoEntriesIndexed returnNull(final Indexed delegate) + { + return new CombineFirstTwoEntriesIndexed(delegate) + { + @Nullable + @Override + protected T newFirstValue() + { + return null; + } + }; + } + + /** + * Union the first two bitmaps. + */ + public static CombineFirstTwoEntriesIndexed unionBitmaps( + final BitmapFactory bitmapFactory, + final Indexed delegate + ) + { + return new CombineFirstTwoEntriesIndexed(delegate) + { + @Nullable + @Override + protected ImmutableBitmap newFirstValue() + { + return bitmapFactory.union(ImmutableList.of(delegate.get(FIRST_ID), delegate.get(FIRST_ID + 1))); + } + }; + } + + @Nullable + protected abstract T newFirstValue(); + + @Override + public int size() + { + return delegate.size() - 1; + } + + @Nullable + @Override + public T get(int index) + { + if (index == FIRST_ID) { + return newFirstValue(); + } else { + return delegate.get(index + 1); + } + } + + @Override + public int indexOf(@Nullable T value) + { + if (Objects.equals(newFirstValue(), value)) { + return FIRST_ID; + } else { + final int index = delegate.indexOf(value); + if (index > FIRST_ID) { + // Item found, index needs adjustment. + return index - 1; + } else if (index < -1) { + // Item not found, insertion point is > FIRST_ID. + return index + 1; + } else { + return index; + } + } + } + + @Override + public Iterator iterator() + { + final Iterator it = delegate.iterator(); + + // Skip first two values. + it.next(); + it.next(); + + class CoalescingIndexedIterator implements Iterator + { + boolean returnedFirstValue; + + @Override + public boolean hasNext() + { + return !returnedFirstValue || it.hasNext(); + } + + @Override + public T next() + { + if (!returnedFirstValue) { + returnedFirstValue = true; + return newFirstValue(); + } else { + return it.next(); + } + } + } + + return new CoalescingIndexedIterator(); + } + + @Override + public void inspectRuntimeShape(RuntimeShapeInspector inspector) + { + delegate.inspectRuntimeShape(inspector); + } + + @Override + public boolean isSorted() + { + return delegate.isSorted(); + } +} diff --git a/processing/src/main/java/org/apache/druid/segment/serde/CombineFirstTwoValuesColumnarInts.java b/processing/src/main/java/org/apache/druid/segment/serde/CombineFirstTwoValuesColumnarInts.java new file mode 100644 index 000000000000..b80cd2cc33ba --- /dev/null +++ b/processing/src/main/java/org/apache/druid/segment/serde/CombineFirstTwoValuesColumnarInts.java @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.serde; + +import org.apache.druid.common.config.NullHandling; +import org.apache.druid.segment.data.ColumnarInts; +import org.apache.druid.segment.data.Indexed; + +import java.io.IOException; + +/** + * A {@link ColumnarInts} that delegates to an underyling instance, but combines the values 0 and 1 into 0. + * + * This class combines *values*, not *indexes*. So [0, 1, 2] becomes [0, 0, 2]. (The 1 was replaced with a 0.) + * + * Provided to enable compatibility for segments written under {@link NullHandling#sqlCompatible()} mode but + * read under {@link NullHandling#replaceWithDefault()} mode. + * + * @see NullHandling#mustCombineNullAndEmpty(Indexed) + */ +public class CombineFirstTwoValuesColumnarInts extends CombineFirstTwoValuesIndexedInts implements ColumnarInts +{ + public CombineFirstTwoValuesColumnarInts(ColumnarInts delegate) + { + super(delegate); + } + + @Override + public void close() throws IOException + { + ((ColumnarInts) delegate).close(); + } +} diff --git a/processing/src/main/java/org/apache/druid/segment/serde/CombineFirstTwoValuesColumnarMultiInts.java b/processing/src/main/java/org/apache/druid/segment/serde/CombineFirstTwoValuesColumnarMultiInts.java new file mode 100644 index 000000000000..a6813b22f001 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/segment/serde/CombineFirstTwoValuesColumnarMultiInts.java @@ -0,0 +1,101 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.serde; + +import org.apache.druid.common.config.NullHandling; +import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; +import org.apache.druid.segment.data.ColumnarMultiInts; +import org.apache.druid.segment.data.Indexed; +import org.apache.druid.segment.data.IndexedInts; +import org.apache.druid.segment.data.IndexedIterable; + +import javax.annotation.Nullable; +import java.io.IOException; +import java.util.Iterator; + +/** + * A {@link ColumnarMultiInts} that delegates to an underyling instance, but combines the values 0 and 1 into 0. + * + * This class combines *values*, not *indexes*. So [[0, 1], [1, 2]] becomes [[0, 0], [0, 2]]. (The ones were replaced + * with zeroes.) + * + * Provided to enable compatibility for segments written under {@link NullHandling#sqlCompatible()} mode but + * read under {@link NullHandling#replaceWithDefault()} ()} mode. + * + * @see NullHandling#mustCombineNullAndEmpty(Indexed) + */ +public class CombineFirstTwoValuesColumnarMultiInts implements ColumnarMultiInts +{ + private final ColumnarMultiInts delegate; + + public CombineFirstTwoValuesColumnarMultiInts(ColumnarMultiInts delegate) + { + this.delegate = delegate; + } + + @Override + public IndexedInts get(int index) + { + return new CombineFirstTwoValuesIndexedInts(delegate.get(index)); + } + + @Override + public IndexedInts getUnshared(int index) + { + return new CombineFirstTwoValuesIndexedInts(delegate.getUnshared(index)); + } + + @Override + public int size() + { + return delegate.size(); + } + + @Override + public int indexOf(@Nullable IndexedInts value) + { + // No ColumnarMultiInts implement this method + throw new UnsupportedOperationException("Reverse lookup not allowed."); + } + + @Override + public boolean isSorted() + { + return delegate.isSorted(); + } + + @Override + public void inspectRuntimeShape(RuntimeShapeInspector inspector) + { + delegate.inspectRuntimeShape(inspector); + } + + @Override + public Iterator iterator() + { + return IndexedIterable.create(this).iterator(); + } + + @Override + public void close() throws IOException + { + delegate.close(); + } +} diff --git a/processing/src/main/java/org/apache/druid/segment/serde/CombineFirstTwoValuesIndexedInts.java b/processing/src/main/java/org/apache/druid/segment/serde/CombineFirstTwoValuesIndexedInts.java new file mode 100644 index 000000000000..a63e91e6ada3 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/segment/serde/CombineFirstTwoValuesIndexedInts.java @@ -0,0 +1,94 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.serde; + +import org.apache.druid.common.config.NullHandling; +import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; +import org.apache.druid.segment.data.Indexed; +import org.apache.druid.segment.data.IndexedInts; + +/** + * A {@link IndexedInts} that delegates to an underyling instance, but combines the values 0 and 1 into 0. + * + * This class combines *values*, not *indexes*. So [0, 1, 2] becomes [0, 0, 2]. (The 1 was replaced with a 0.) + * + * Provided to enable compatibility for segments written under {@link NullHandling#sqlCompatible()} mode but + * read under {@link NullHandling#replaceWithDefault()} ()} mode. + * + * @see NullHandling#mustCombineNullAndEmpty(Indexed) + */ +public class CombineFirstTwoValuesIndexedInts implements IndexedInts +{ + private static final int NULL_ID = 0; + + protected final IndexedInts delegate; + + public CombineFirstTwoValuesIndexedInts(IndexedInts delegate) + { + this.delegate = delegate; + } + + @Override + public int size() + { + return delegate.size(); + } + + @Override + public int get(int index) + { + final int i = delegate.get(index); + if (i == NULL_ID) { + return i; + } else { + return i - 1; + } + } + + @Override + public void get(int[] out, int start, int length) + { + delegate.get(out, start, length); + + for (int i = 0 ; i < length ; i++) { + if (out[i] != NULL_ID) { + out[i] --; + } + } + } + + @Override + public void get(int[] out, int[] indexes, int length) + { + delegate.get(out, indexes, length); + + for (int i = 0 ; i < length ; i++) { + if (out[i] != NULL_ID) { + out[i] --; + } + } + } + + @Override + public void inspectRuntimeShape(RuntimeShapeInspector inspector) + { + delegate.inspectRuntimeShape(inspector); + } +} diff --git a/processing/src/main/java/org/apache/druid/segment/serde/DictionaryEncodedColumnSupplier.java b/processing/src/main/java/org/apache/druid/segment/serde/DictionaryEncodedColumnSupplier.java index f48dcfc61b2d..6564c0e25f9f 100644 --- a/processing/src/main/java/org/apache/druid/segment/serde/DictionaryEncodedColumnSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/serde/DictionaryEncodedColumnSupplier.java @@ -20,17 +20,20 @@ package org.apache.druid.segment.serde; import com.google.common.base.Supplier; +import org.apache.druid.common.config.NullHandling; import org.apache.druid.segment.column.DictionaryEncodedColumn; import org.apache.druid.segment.column.StringDictionaryEncodedColumn; import org.apache.druid.segment.data.CachingIndexed; import org.apache.druid.segment.data.ColumnarInts; import org.apache.druid.segment.data.ColumnarMultiInts; import org.apache.druid.segment.data.GenericIndexed; +import org.apache.druid.segment.data.Indexed; import javax.annotation.Nullable; import java.nio.ByteBuffer; /** + * */ public class DictionaryEncodedColumnSupplier implements Supplier> { @@ -58,11 +61,39 @@ public DictionaryEncodedColumnSupplier( @Override public DictionaryEncodedColumn get() { - return new StringDictionaryEncodedColumn( - singleValuedColumn != null ? singleValuedColumn.get() : null, - multiValuedColumn != null ? multiValuedColumn.get() : null, - new CachingIndexed<>(dictionary, lookupCacheSize), - dictionaryUtf8.singleThreaded() - ); + final Indexed cacheWrappedDictionary; + + if (lookupCacheSize > 0) { + cacheWrappedDictionary = new CachingIndexed<>( + dictionary.singleThreaded(), + s -> s == null ? 0 : s.length() * Character.BYTES, + lookupCacheSize + ); + } else { + cacheWrappedDictionary = dictionary.singleThreaded(); + } + + if (NullHandling.mustCombineNullAndEmpty(dictionaryUtf8)) { + return new StringDictionaryEncodedColumn( + singleValuedColumn != null ? new CombineFirstTwoValuesColumnarInts(singleValuedColumn.get()) : null, + multiValuedColumn != null ? new CombineFirstTwoValuesColumnarMultiInts(multiValuedColumn.get()) : null, + CombineFirstTwoEntriesIndexed.returnNull(cacheWrappedDictionary), + CombineFirstTwoEntriesIndexed.returnNull(dictionaryUtf8.singleThreaded()) + ); + } else if (NullHandling.mustReplaceFirstValueWithNull(dictionaryUtf8)) { + return new StringDictionaryEncodedColumn( + singleValuedColumn != null ? singleValuedColumn.get() : null, + multiValuedColumn != null ? multiValuedColumn.get() : null, + new ReplaceFirstValueWithNullIndexed<>(cacheWrappedDictionary), + new ReplaceFirstValueWithNullIndexed<>(dictionaryUtf8.singleThreaded()) + ); + } else { + return new StringDictionaryEncodedColumn( + singleValuedColumn != null ? singleValuedColumn.get() : null, + multiValuedColumn != null ? multiValuedColumn.get() : null, + cacheWrappedDictionary, + dictionaryUtf8.singleThreaded() + ); + } } } diff --git a/processing/src/main/java/org/apache/druid/segment/serde/DictionaryEncodedStringIndexSupplier.java b/processing/src/main/java/org/apache/druid/segment/serde/DictionaryEncodedStringIndexSupplier.java index ae2a1e148236..942373128d7d 100644 --- a/processing/src/main/java/org/apache/druid/segment/serde/DictionaryEncodedStringIndexSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/serde/DictionaryEncodedStringIndexSupplier.java @@ -75,13 +75,24 @@ public DictionaryEncodedStringIndexSupplier( public T as(Class clazz) { if (bitmaps != null) { - final Indexed singleThreadedStrings = dictionary.singleThreaded(); - final Indexed singleThreadedUtf8 = dictionaryUtf8.singleThreaded(); - final Indexed singleThreadedBitmaps = bitmaps.singleThreaded(); + Indexed singleThreadedStrings = dictionary.singleThreaded(); + Indexed singleThreadedUtf8 = dictionaryUtf8.singleThreaded(); + Indexed singleThreadedBitmaps = bitmaps.singleThreaded(); + + if (NullHandling.mustCombineNullAndEmpty(singleThreadedUtf8)) { + singleThreadedStrings = CombineFirstTwoEntriesIndexed.returnNull(singleThreadedStrings); + singleThreadedUtf8 = CombineFirstTwoEntriesIndexed.returnNull(singleThreadedUtf8); + singleThreadedBitmaps = CombineFirstTwoEntriesIndexed.unionBitmaps(bitmapFactory, singleThreadedBitmaps); + } else if (NullHandling.mustReplaceFirstValueWithNull(singleThreadedUtf8)) { + singleThreadedStrings = new ReplaceFirstValueWithNullIndexed<>(singleThreadedStrings); + singleThreadedUtf8 = new ReplaceFirstValueWithNullIndexed<>(singleThreadedUtf8); + } + if (clazz.equals(NullValueIndex.class)) { final BitmapColumnIndex nullIndex; - if (NullHandling.isNullOrEquivalent(dictionary.get(0))) { - nullIndex = new SimpleImmutableBitmapIndex(bitmaps.get(0)); + final ByteBuffer firstValue = singleThreadedUtf8.get(0); + if (firstValue == null || firstValue.remaining() == 0) { + nullIndex = new SimpleImmutableBitmapIndex(singleThreadedBitmaps.get(0)); } else { nullIndex = new SimpleImmutableBitmapIndex(bitmapFactory.makeEmptyImmutableBitmap()); } @@ -97,13 +108,14 @@ public T as(Class clazz) bitmapFactory, singleThreadedUtf8, singleThreadedBitmaps, - NullHandling.isNullOrEquivalent(dictionary.get(0)) + NullHandling.isNullOrEquivalent(singleThreadedStrings.get(0)) ); - } else if (clazz.equals(DictionaryEncodedStringValueIndex.class) || clazz.equals(DictionaryEncodedValueIndex.class)) { + } else if (clazz.equals(DictionaryEncodedStringValueIndex.class) + || clazz.equals(DictionaryEncodedValueIndex.class)) { return (T) new IndexedStringDictionaryEncodedStringValueIndex<>( bitmapFactory, singleThreadedStrings, - bitmaps + singleThreadedBitmaps ); } } diff --git a/processing/src/main/java/org/apache/druid/segment/serde/DoubleNumericColumnPartSerdeV2.java b/processing/src/main/java/org/apache/druid/segment/serde/DoubleNumericColumnPartSerdeV2.java index 3e851484b2c9..99c63620cc7d 100644 --- a/processing/src/main/java/org/apache/druid/segment/serde/DoubleNumericColumnPartSerdeV2.java +++ b/processing/src/main/java/org/apache/druid/segment/serde/DoubleNumericColumnPartSerdeV2.java @@ -23,6 +23,7 @@ import com.fasterxml.jackson.annotation.JsonProperty; import com.google.common.base.Supplier; import org.apache.druid.collections.bitmap.ImmutableBitmap; +import org.apache.druid.common.config.NullHandling; import org.apache.druid.java.util.common.io.smoosh.FileSmoosher; import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.data.BitmapSerde; @@ -153,7 +154,7 @@ public Deserializer getDeserializer() buffer.position(initialPos + offset); final ImmutableBitmap bitmap; final boolean hasNulls; - if (buffer.hasRemaining()) { + if (buffer.hasRemaining() && NullHandling.sqlCompatible()) { bitmap = bitmapSerdeFactory.getObjectStrategy().fromByteBufferWithSize(buffer); hasNulls = !bitmap.isEmpty(); } else { diff --git a/processing/src/main/java/org/apache/druid/segment/serde/FloatNumericColumnPartSerdeV2.java b/processing/src/main/java/org/apache/druid/segment/serde/FloatNumericColumnPartSerdeV2.java index 29a1e2c46fe6..07978bd81719 100644 --- a/processing/src/main/java/org/apache/druid/segment/serde/FloatNumericColumnPartSerdeV2.java +++ b/processing/src/main/java/org/apache/druid/segment/serde/FloatNumericColumnPartSerdeV2.java @@ -22,6 +22,7 @@ import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; import org.apache.druid.collections.bitmap.ImmutableBitmap; +import org.apache.druid.common.config.NullHandling; import org.apache.druid.java.util.common.io.smoosh.FileSmoosher; import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.data.BitmapSerde; @@ -150,7 +151,7 @@ public Deserializer getDeserializer() buffer.position(initialPos + offset); final ImmutableBitmap bitmap; final boolean hasNulls; - if (buffer.hasRemaining()) { + if (buffer.hasRemaining() && NullHandling.sqlCompatible()) { bitmap = bitmapSerdeFactory.getObjectStrategy().fromByteBufferWithSize(buffer); hasNulls = !bitmap.isEmpty(); } else { diff --git a/processing/src/main/java/org/apache/druid/segment/serde/LongNumericColumnPartSerdeV2.java b/processing/src/main/java/org/apache/druid/segment/serde/LongNumericColumnPartSerdeV2.java index c59bb99b1c7d..4e2fee0b729f 100644 --- a/processing/src/main/java/org/apache/druid/segment/serde/LongNumericColumnPartSerdeV2.java +++ b/processing/src/main/java/org/apache/druid/segment/serde/LongNumericColumnPartSerdeV2.java @@ -22,6 +22,7 @@ import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; import org.apache.druid.collections.bitmap.ImmutableBitmap; +import org.apache.druid.common.config.NullHandling; import org.apache.druid.java.util.common.io.smoosh.FileSmoosher; import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.data.BitmapSerde; @@ -152,7 +153,7 @@ public Deserializer getDeserializer() buffer.position(initialPos + offset); final ImmutableBitmap bitmap; final boolean hasNulls; - if (buffer.hasRemaining()) { + if (buffer.hasRemaining() && NullHandling.sqlCompatible()) { bitmap = bitmapSerdeFactory.getObjectStrategy().fromByteBufferWithSize(buffer); hasNulls = !bitmap.isEmpty(); } else { diff --git a/processing/src/main/java/org/apache/druid/segment/serde/ReplaceFirstValueWithNullIndexed.java b/processing/src/main/java/org/apache/druid/segment/serde/ReplaceFirstValueWithNullIndexed.java new file mode 100644 index 000000000000..551460af256e --- /dev/null +++ b/processing/src/main/java/org/apache/druid/segment/serde/ReplaceFirstValueWithNullIndexed.java @@ -0,0 +1,122 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.serde; + +import org.apache.druid.common.config.NullHandling; +import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; +import org.apache.druid.segment.data.Indexed; + +import javax.annotation.Nullable; +import java.util.Iterator; + +/** + * An Indexed that replaces the first value with a literal null. + * + * Provided to enable compatibility for segments written under {@link NullHandling#sqlCompatible()} mode but + * read under {@link NullHandling#replaceWithDefault()} mode. + * + * @see NullHandling#mustReplaceFirstValueWithNull(Indexed) + */ +public class ReplaceFirstValueWithNullIndexed implements Indexed +{ + private final Indexed delegate; + + public ReplaceFirstValueWithNullIndexed(Indexed delegate) + { + this.delegate = delegate; + } + + @Override + public int size() + { + return delegate.size(); + } + + @Nullable + @Override + public T get(int index) + { + if (index == 0) { + return null; + } else { + return delegate.get(index); + } + } + + @Override + public int indexOf(@Nullable T value) + { + if (value == null) { + return 0; + } else { + final int result = delegate.indexOf(value); + if (result == 0) { + return -2; + } else { + return result; + } + } + } + + @Override + public boolean isSorted() + { + return delegate.isSorted(); + } + + @Override + public Iterator iterator() + { + final Iterator it = delegate.iterator(); + + // Skip first value. + it.next(); + + class ReplaceFirstValueWithNullIndexedIterator implements Iterator + { + boolean returnedNull; + + @Override + public boolean hasNext() + { + return !returnedNull || it.hasNext(); + } + + @Override + public T next() + { + if (!returnedNull) { + returnedNull = true; + return null; + } else { + return it.next(); + } + } + } + + return new ReplaceFirstValueWithNullIndexedIterator(); + } + + @Override + public void inspectRuntimeShape(RuntimeShapeInspector inspector) + { + delegate.inspectRuntimeShape(inspector); + } +} diff --git a/processing/src/main/java/org/apache/druid/segment/serde/StringFrontCodedColumnIndexSupplier.java b/processing/src/main/java/org/apache/druid/segment/serde/StringFrontCodedColumnIndexSupplier.java index 894ddb55ffc6..fc1de494f2fc 100644 --- a/processing/src/main/java/org/apache/druid/segment/serde/StringFrontCodedColumnIndexSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/serde/StringFrontCodedColumnIndexSupplier.java @@ -44,11 +44,11 @@ import org.apache.druid.segment.data.Indexed; import javax.annotation.Nullable; +import java.nio.ByteBuffer; public class StringFrontCodedColumnIndexSupplier implements ColumnIndexSupplier { private final BitmapFactory bitmapFactory; - private final Supplier dictionary; private final Supplier utf8Dictionary; @Nullable @@ -67,21 +67,30 @@ public StringFrontCodedColumnIndexSupplier( this.bitmapFactory = bitmapFactory; this.bitmaps = bitmaps; this.utf8Dictionary = utf8Dictionary; - this.dictionary = () -> new StringEncodingStrategies.Utf8ToStringIndexed(this.utf8Dictionary.get()); this.indexedTree = indexedTree; } @Nullable @Override + @SuppressWarnings("unchecked") public T as(Class clazz) { if (bitmaps != null) { - final Indexed singleThreadedBitmaps = bitmaps.singleThreaded(); + Indexed dict = utf8Dictionary.get(); + Indexed singleThreadedBitmaps = bitmaps.singleThreaded(); + + if (NullHandling.mustCombineNullAndEmpty(dict)) { + dict = CombineFirstTwoEntriesIndexed.returnNull(dict); + singleThreadedBitmaps = CombineFirstTwoEntriesIndexed.unionBitmaps(bitmapFactory, singleThreadedBitmaps); + } else if (NullHandling.mustReplaceFirstValueWithNull(dict)) { + dict = new ReplaceFirstValueWithNullIndexed<>(dict); + } + if (clazz.equals(NullValueIndex.class)) { final BitmapColumnIndex nullIndex; - final StringEncodingStrategies.Utf8ToStringIndexed stringDictionary = dictionary.get(); - if (NullHandling.isNullOrEquivalent(stringDictionary.get(0))) { - nullIndex = new SimpleImmutableBitmapIndex(bitmaps.get(0)); + final ByteBuffer firstValue = dict.get(0); + if (NullHandling.isNullOrEquivalent(firstValue)) { + nullIndex = new SimpleImmutableBitmapIndex(singleThreadedBitmaps.get(0)); } else { nullIndex = new SimpleImmutableBitmapIndex(bitmapFactory.makeEmptyImmutableBitmap()); } @@ -89,17 +98,16 @@ public T as(Class clazz) } else if (clazz.equals(StringValueSetIndex.class)) { return (T) new IndexedUtf8ValueSetIndex<>( bitmapFactory, - utf8Dictionary.get(), + dict, singleThreadedBitmaps ); } else if (clazz.equals(DruidPredicateIndex.class)) { return (T) new IndexedStringDruidPredicateIndex<>( bitmapFactory, - dictionary.get(), + new StringEncodingStrategies.Utf8ToStringIndexed(dict), singleThreadedBitmaps ); } else if (clazz.equals(LexicographicalRangeIndex.class)) { - final FrontCodedIndexed dict = utf8Dictionary.get(); return (T) new IndexedUtf8LexicographicalRangeIndex<>( bitmapFactory, dict, @@ -108,10 +116,11 @@ public T as(Class clazz) ); } else if (clazz.equals(DictionaryEncodedStringValueIndex.class) || clazz.equals(DictionaryEncodedValueIndex.class)) { + // Need string dictionary instead of UTF8 dictionary return (T) new IndexedStringDictionaryEncodedStringValueIndex<>( bitmapFactory, - dictionary.get(), - bitmaps + new StringEncodingStrategies.Utf8ToStringIndexed(dict), + singleThreadedBitmaps ); } } diff --git a/processing/src/main/java/org/apache/druid/segment/serde/StringFrontCodedDictionaryEncodedColumnSupplier.java b/processing/src/main/java/org/apache/druid/segment/serde/StringFrontCodedDictionaryEncodedColumnSupplier.java index d67730de4363..37d82e8851da 100644 --- a/processing/src/main/java/org/apache/druid/segment/serde/StringFrontCodedDictionaryEncodedColumnSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/serde/StringFrontCodedDictionaryEncodedColumnSupplier.java @@ -20,6 +20,7 @@ package org.apache.druid.segment.serde; import com.google.common.base.Supplier; +import org.apache.druid.common.config.NullHandling; import org.apache.druid.segment.column.DictionaryEncodedColumn; import org.apache.druid.segment.column.StringDictionaryEncodedColumn; import org.apache.druid.segment.column.StringFrontCodedDictionaryEncodedColumn; @@ -53,10 +54,26 @@ public StringFrontCodedDictionaryEncodedColumnSupplier( @Override public DictionaryEncodedColumn get() { - return new StringFrontCodedDictionaryEncodedColumn( - singleValuedColumn != null ? singleValuedColumn.get() : null, - multiValuedColumn != null ? multiValuedColumn.get() : null, - utf8Dictionary.get() - ); + final FrontCodedIndexed suppliedUtf8Dictionary = utf8Dictionary.get(); + + if (NullHandling.mustCombineNullAndEmpty(suppliedUtf8Dictionary)) { + return new StringFrontCodedDictionaryEncodedColumn( + singleValuedColumn != null ? new CombineFirstTwoValuesColumnarInts(singleValuedColumn.get()) : null, + multiValuedColumn != null ? new CombineFirstTwoValuesColumnarMultiInts(multiValuedColumn.get()) : null, + CombineFirstTwoEntriesIndexed.returnNull(suppliedUtf8Dictionary) + ); + } else if (NullHandling.mustReplaceFirstValueWithNull(suppliedUtf8Dictionary)) { + return new StringFrontCodedDictionaryEncodedColumn( + singleValuedColumn != null ? singleValuedColumn.get() : null, + multiValuedColumn != null ? multiValuedColumn.get() : null, + new ReplaceFirstValueWithNullIndexed<>(suppliedUtf8Dictionary) + ); + } else { + return new StringFrontCodedDictionaryEncodedColumn( + singleValuedColumn != null ? singleValuedColumn.get() : null, + multiValuedColumn != null ? multiValuedColumn.get() : null, + suppliedUtf8Dictionary + ); + } } } diff --git a/processing/src/test/java/org/apache/druid/segment/IndexBuilder.java b/processing/src/test/java/org/apache/druid/segment/IndexBuilder.java index 95a863dbb017..a9b3182ba4c1 100644 --- a/processing/src/test/java/org/apache/druid/segment/IndexBuilder.java +++ b/processing/src/test/java/org/apache/druid/segment/IndexBuilder.java @@ -115,6 +115,11 @@ public static IndexBuilder create(ObjectMapper jsonMapper, ColumnConfig columnCo return new IndexBuilder(jsonMapper, columnConfig); } + public IndexIO getIndexIO() + { + return indexIO; + } + public IndexBuilder schema(IncrementalIndexSchema schema) { this.schema = schema; @@ -191,18 +196,6 @@ public IndexBuilder rows(Iterable rows) return this; } - public IndexBuilder maxRows(int maxRows) - { - this.maxRows = maxRows; - return this; - } - - public IndexBuilder intermediaryPersistSize(int rows) - { - this.intermediatePersistSize = rows; - return this; - } - public IncrementalIndex buildIncrementalIndex() { if (inputSource != null) { @@ -218,7 +211,7 @@ public IncrementalIndex buildIncrementalIndex() return buildIncrementalIndexWithRows(schema, maxRows, rows); } - public QueryableIndex buildMMappedIndex() + public File buildMMappedIndexFile() { Preconditions.checkNotNull(indexMerger, "indexMerger"); Preconditions.checkNotNull(tmpDir, "tmpDir"); @@ -242,16 +235,14 @@ public QueryableIndex buildMMappedIndex() // queryable index instead of the incremental index, which also mimics the behavior of real ingestion tasks // which persist incremental indexes as intermediate segments and then merges all the intermediate segments to // publish - return indexIO.loadIndex( - indexMerger.merge( - adapters, - schema.isRollup(), - schema.getMetrics(), - tmpDir, - schema.getDimensionsSpec(), - indexSpec, - Integer.MAX_VALUE - ) + return indexMerger.merge( + adapters, + schema.isRollup(), + schema.getMetrics(), + tmpDir, + schema.getDimensionsSpec(), + indexSpec, + Integer.MAX_VALUE ); } catch (IOException e) { @@ -259,6 +250,16 @@ public QueryableIndex buildMMappedIndex() } } + public QueryableIndex buildMMappedIndex() + { + try { + return indexIO.loadIndex(buildMMappedIndexFile()); + } + catch (IOException e) { + throw new RuntimeException(e); + } + } + public QueryableIndex buildMMappedMergedIndex() { Preconditions.checkNotNull(tmpDir, "tmpDir"); diff --git a/processing/src/test/java/org/apache/druid/segment/IndexMergerNullHandlingTest.java b/processing/src/test/java/org/apache/druid/segment/IndexMergerNullHandlingTest.java index 44948bbbd89c..2fc2e31fddc6 100644 --- a/processing/src/test/java/org/apache/druid/segment/IndexMergerNullHandlingTest.java +++ b/processing/src/test/java/org/apache/druid/segment/IndexMergerNullHandlingTest.java @@ -140,7 +140,7 @@ public void testStringColumnNullHandling() throws Exception // Compute all unique values, the same way that IndexMerger is expected to do it. final Set uniqueValues = new HashSet<>(); for (Map m : subsetList) { - final List dValues = normalize(m.get("d"), hasMultipleValues); + final List dValues = normalize(m.get("d")); uniqueValues.addAll(dValues); if (nullFlavors.contains(m)) { @@ -167,7 +167,7 @@ public void testStringColumnNullHandling() throws Exception subsetList.toString(), ImmutableMultiset.copyOf( subsetList.stream() - .map(m -> normalize(m.get("d"), hasMultipleValues)) + .map(m -> normalize(m.get("d"))) .distinct() // Distinct values only, because we expect rollup. .collect(Collectors.toList()) ), @@ -224,7 +224,7 @@ public void testStringColumnNullHandling() throws Exception /** * Normalize an input value the same way that IndexMerger is expected to do it. */ - private static List normalize(final Object value, final boolean hasMultipleValues) + private static List normalize(final Object value) { final List retVal = new ArrayList<>(); @@ -237,7 +237,7 @@ private static List normalize(final Object value, final boolean hasMulti if (list.isEmpty()) { // empty lists become nulls in single valued columns // they sometimes also become nulls in multi-valued columns (see comments in getRow()) - retVal.add(NullHandling.emptyToNullIfNeeded(null)); + retVal.add(null); } else { retVal.addAll(list.stream().map(NullHandling::emptyToNullIfNeeded).collect(Collectors.toList())); } diff --git a/processing/src/test/java/org/apache/druid/segment/filter/BaseFilterTest.java b/processing/src/test/java/org/apache/druid/segment/filter/BaseFilterTest.java index 99ab89103125..fe69533da027 100644 --- a/processing/src/test/java/org/apache/druid/segment/filter/BaseFilterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/filter/BaseFilterTest.java @@ -103,6 +103,7 @@ import org.apache.druid.segment.writeout.TmpFileSegmentWriteOutMediumFactory; import org.apache.druid.testing.InitializedNullHandlingTest; import org.junit.Assert; +import org.junit.Assume; import org.junit.Before; import org.junit.Rule; import org.junit.rules.TemporaryFolder; @@ -110,6 +111,8 @@ import javax.annotation.Nullable; import java.io.Closeable; +import java.io.File; +import java.io.IOException; import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Arrays; @@ -324,6 +327,31 @@ public static Collection makeConstructors() return Pair.of(new QueryableIndexStorageAdapter(index), index); } ) + .put( + "mmappedWithSqlCompatibleNulls", + input -> { + // Build mmapped index in SQL-compatible null handling mode; read it in default-value mode. + Assume.assumeTrue(NullHandling.replaceWithDefault()); + final File file; + try { + NullHandling.initializeForTestsWithValues(false, null); + Assert.assertTrue(NullHandling.sqlCompatible()); + file = input.buildMMappedIndexFile(); + } + finally { + NullHandling.initializeForTests(); + } + + Assert.assertTrue(NullHandling.replaceWithDefault()); + try { + final QueryableIndex index = input.getIndexIO().loadIndex(file); + return Pair.of(new QueryableIndexStorageAdapter(index), index); + } + catch (IOException e) { + throw new RuntimeException(e); + } + } + ) .put( "mmappedMerged", input -> { From 782ecf1d472439bc5e6dae94821da3f3f93add38 Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Fri, 21 Apr 2023 16:46:06 -0700 Subject: [PATCH 2/9] Fix a mistake, use more singlethreadedness. --- .../serde/DictionaryEncodedColumnSupplier.java | 11 ++++++----- .../serde/DictionaryEncodedStringIndexSupplier.java | 2 +- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/serde/DictionaryEncodedColumnSupplier.java b/processing/src/main/java/org/apache/druid/segment/serde/DictionaryEncodedColumnSupplier.java index 6564c0e25f9f..d825a75fcddc 100644 --- a/processing/src/main/java/org/apache/druid/segment/serde/DictionaryEncodedColumnSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/serde/DictionaryEncodedColumnSupplier.java @@ -62,6 +62,7 @@ public DictionaryEncodedColumnSupplier( public DictionaryEncodedColumn get() { final Indexed cacheWrappedDictionary; + final Indexed singleThreadedDictionaryUtf8 = dictionaryUtf8.singleThreaded(); if (lookupCacheSize > 0) { cacheWrappedDictionary = new CachingIndexed<>( @@ -73,26 +74,26 @@ public DictionaryEncodedColumn get() cacheWrappedDictionary = dictionary.singleThreaded(); } - if (NullHandling.mustCombineNullAndEmpty(dictionaryUtf8)) { + if (NullHandling.mustCombineNullAndEmpty(singleThreadedDictionaryUtf8)) { return new StringDictionaryEncodedColumn( singleValuedColumn != null ? new CombineFirstTwoValuesColumnarInts(singleValuedColumn.get()) : null, multiValuedColumn != null ? new CombineFirstTwoValuesColumnarMultiInts(multiValuedColumn.get()) : null, CombineFirstTwoEntriesIndexed.returnNull(cacheWrappedDictionary), - CombineFirstTwoEntriesIndexed.returnNull(dictionaryUtf8.singleThreaded()) + CombineFirstTwoEntriesIndexed.returnNull(singleThreadedDictionaryUtf8) ); - } else if (NullHandling.mustReplaceFirstValueWithNull(dictionaryUtf8)) { + } else if (NullHandling.mustReplaceFirstValueWithNull(singleThreadedDictionaryUtf8)) { return new StringDictionaryEncodedColumn( singleValuedColumn != null ? singleValuedColumn.get() : null, multiValuedColumn != null ? multiValuedColumn.get() : null, new ReplaceFirstValueWithNullIndexed<>(cacheWrappedDictionary), - new ReplaceFirstValueWithNullIndexed<>(dictionaryUtf8.singleThreaded()) + new ReplaceFirstValueWithNullIndexed<>(singleThreadedDictionaryUtf8) ); } else { return new StringDictionaryEncodedColumn( singleValuedColumn != null ? singleValuedColumn.get() : null, multiValuedColumn != null ? multiValuedColumn.get() : null, cacheWrappedDictionary, - dictionaryUtf8.singleThreaded() + singleThreadedDictionaryUtf8 ); } } diff --git a/processing/src/main/java/org/apache/druid/segment/serde/DictionaryEncodedStringIndexSupplier.java b/processing/src/main/java/org/apache/druid/segment/serde/DictionaryEncodedStringIndexSupplier.java index 942373128d7d..c6f47a1096f5 100644 --- a/processing/src/main/java/org/apache/druid/segment/serde/DictionaryEncodedStringIndexSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/serde/DictionaryEncodedStringIndexSupplier.java @@ -91,7 +91,7 @@ public T as(Class clazz) if (clazz.equals(NullValueIndex.class)) { final BitmapColumnIndex nullIndex; final ByteBuffer firstValue = singleThreadedUtf8.get(0); - if (firstValue == null || firstValue.remaining() == 0) { + if (NullHandling.isNullOrEquivalent(firstValue)) { nullIndex = new SimpleImmutableBitmapIndex(singleThreadedBitmaps.get(0)); } else { nullIndex = new SimpleImmutableBitmapIndex(bitmapFactory.makeEmptyImmutableBitmap()); From 56086b00426708b140f06b07552d68cdc607aadf Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Thu, 4 May 2023 15:08:03 -0700 Subject: [PATCH 3/9] WIP --- .../druid/common/config/NullHandling.java | 8 ++--- .../serde/CombineFirstTwoEntriesIndexed.java | 2 +- .../CombineFirstTwoValuesColumnarInts.java | 2 +- ...ombineFirstTwoValuesColumnarMultiInts.java | 4 +-- .../CombineFirstTwoValuesIndexedInts.java | 4 +-- .../DictionaryEncodedColumnSupplier.java | 4 +-- .../DictionaryEncodedStringIndexSupplier.java | 4 +-- .../ReplaceFirstValueWithNullIndexed.java | 2 +- .../StringFrontCodedColumnIndexSupplier.java | 4 +-- ...tCodedDictionaryEncodedColumnSupplier.java | 4 +-- .../druid/common/config/NullHandlingTest.java | 12 +++++++ .../CombineFirstTwoEntriesIndexedTest.java | 24 ++++++++++++++ .../CombineFirstTwoValuesIndexedIntsTest.java | 24 ++++++++++++++ .../ReplaceFirstValueWithNullIndexedTest.java | 33 +++++++++++++++++++ 14 files changed, 112 insertions(+), 19 deletions(-) create mode 100644 processing/src/test/java/org/apache/druid/segment/serde/CombineFirstTwoEntriesIndexedTest.java create mode 100644 processing/src/test/java/org/apache/druid/segment/serde/CombineFirstTwoValuesIndexedIntsTest.java create mode 100644 processing/src/test/java/org/apache/druid/segment/serde/ReplaceFirstValueWithNullIndexedTest.java diff --git a/processing/src/main/java/org/apache/druid/common/config/NullHandling.java b/processing/src/main/java/org/apache/druid/common/config/NullHandling.java index 40dc3d5508b0..7b1acc344b72 100644 --- a/processing/src/main/java/org/apache/druid/common/config/NullHandling.java +++ b/processing/src/main/java/org/apache/druid/common/config/NullHandling.java @@ -185,12 +185,12 @@ public static boolean isNullOrEquivalent(@Nullable ByteBuffer buffer) * Given a UTF-8 dictionary, returns whether the first two entries must be coalesced into a single null entry. * This happens if we are in default-value mode and the first two entries are null and empty string. * - * This and {@link #mustReplaceFirstValueWithNull(Indexed)} are never both true. + * This and {@link #mustReplaceFirstValueWithNullInDictionary(Indexed)} are never both true. * * Provided to enable compatibility for segments written under {@link #sqlCompatible()} mode but * read under {@link #replaceWithDefault()} mode. */ - public static boolean mustCombineNullAndEmpty(final Indexed dictionaryUtf8) + public static boolean mustCombineNullAndEmptyInDictionary(final Indexed dictionaryUtf8) { return NullHandling.replaceWithDefault() && dictionaryUtf8.size() >= 2 @@ -202,12 +202,12 @@ && isNullOrEquivalent(dictionaryUtf8.get(0)) * Given a UTF-8 dictionary, returns whether the first entry must be replaced with null. This happens if we * are in default-value mode and the first entry is an empty string. (Default-value mode expects it to be null.) * - * This and {@link #mustCombineNullAndEmpty(Indexed)} are never both true. + * This and {@link #mustCombineNullAndEmptyInDictionary(Indexed)} are never both true. * * Provided to enable compatibility for segments written under {@link #sqlCompatible()} mode but * read under {@link #replaceWithDefault()} mode. */ - public static boolean mustReplaceFirstValueWithNull(final Indexed dictionaryUtf8) + public static boolean mustReplaceFirstValueWithNullInDictionary(final Indexed dictionaryUtf8) { if (NullHandling.replaceWithDefault() && dictionaryUtf8.size() >= 1) { final ByteBuffer firstValue = dictionaryUtf8.get(0); diff --git a/processing/src/main/java/org/apache/druid/segment/serde/CombineFirstTwoEntriesIndexed.java b/processing/src/main/java/org/apache/druid/segment/serde/CombineFirstTwoEntriesIndexed.java index 7661f9f2094f..bbe14c68d5f0 100644 --- a/processing/src/main/java/org/apache/druid/segment/serde/CombineFirstTwoEntriesIndexed.java +++ b/processing/src/main/java/org/apache/druid/segment/serde/CombineFirstTwoEntriesIndexed.java @@ -40,7 +40,7 @@ * Provided to enable compatibility for segments written under {@link NullHandling#sqlCompatible()} mode but * read under {@link NullHandling#replaceWithDefault()} mode. * - * @see NullHandling#mustCombineNullAndEmpty(Indexed) + * @see NullHandling#mustCombineNullAndEmptyInDictionary(Indexed) */ public abstract class CombineFirstTwoEntriesIndexed implements Indexed { diff --git a/processing/src/main/java/org/apache/druid/segment/serde/CombineFirstTwoValuesColumnarInts.java b/processing/src/main/java/org/apache/druid/segment/serde/CombineFirstTwoValuesColumnarInts.java index b80cd2cc33ba..1a5dda6686c8 100644 --- a/processing/src/main/java/org/apache/druid/segment/serde/CombineFirstTwoValuesColumnarInts.java +++ b/processing/src/main/java/org/apache/druid/segment/serde/CombineFirstTwoValuesColumnarInts.java @@ -33,7 +33,7 @@ * Provided to enable compatibility for segments written under {@link NullHandling#sqlCompatible()} mode but * read under {@link NullHandling#replaceWithDefault()} mode. * - * @see NullHandling#mustCombineNullAndEmpty(Indexed) + * @see NullHandling#mustCombineNullAndEmptyInDictionary(Indexed) */ public class CombineFirstTwoValuesColumnarInts extends CombineFirstTwoValuesIndexedInts implements ColumnarInts { diff --git a/processing/src/main/java/org/apache/druid/segment/serde/CombineFirstTwoValuesColumnarMultiInts.java b/processing/src/main/java/org/apache/druid/segment/serde/CombineFirstTwoValuesColumnarMultiInts.java index a6813b22f001..ccd01d3dac02 100644 --- a/processing/src/main/java/org/apache/druid/segment/serde/CombineFirstTwoValuesColumnarMultiInts.java +++ b/processing/src/main/java/org/apache/druid/segment/serde/CombineFirstTwoValuesColumnarMultiInts.java @@ -37,9 +37,9 @@ * with zeroes.) * * Provided to enable compatibility for segments written under {@link NullHandling#sqlCompatible()} mode but - * read under {@link NullHandling#replaceWithDefault()} ()} mode. + * read under {@link NullHandling#replaceWithDefault()} mode. * - * @see NullHandling#mustCombineNullAndEmpty(Indexed) + * @see NullHandling#mustCombineNullAndEmptyInDictionary(Indexed) */ public class CombineFirstTwoValuesColumnarMultiInts implements ColumnarMultiInts { diff --git a/processing/src/main/java/org/apache/druid/segment/serde/CombineFirstTwoValuesIndexedInts.java b/processing/src/main/java/org/apache/druid/segment/serde/CombineFirstTwoValuesIndexedInts.java index a63e91e6ada3..c21a6aada85f 100644 --- a/processing/src/main/java/org/apache/druid/segment/serde/CombineFirstTwoValuesIndexedInts.java +++ b/processing/src/main/java/org/apache/druid/segment/serde/CombineFirstTwoValuesIndexedInts.java @@ -30,9 +30,9 @@ * This class combines *values*, not *indexes*. So [0, 1, 2] becomes [0, 0, 2]. (The 1 was replaced with a 0.) * * Provided to enable compatibility for segments written under {@link NullHandling#sqlCompatible()} mode but - * read under {@link NullHandling#replaceWithDefault()} ()} mode. + * read under {@link NullHandling#replaceWithDefault()} mode. * - * @see NullHandling#mustCombineNullAndEmpty(Indexed) + * @see NullHandling#mustCombineNullAndEmptyInDictionary(Indexed) */ public class CombineFirstTwoValuesIndexedInts implements IndexedInts { diff --git a/processing/src/main/java/org/apache/druid/segment/serde/DictionaryEncodedColumnSupplier.java b/processing/src/main/java/org/apache/druid/segment/serde/DictionaryEncodedColumnSupplier.java index d825a75fcddc..a8148ca6a25a 100644 --- a/processing/src/main/java/org/apache/druid/segment/serde/DictionaryEncodedColumnSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/serde/DictionaryEncodedColumnSupplier.java @@ -74,14 +74,14 @@ public DictionaryEncodedColumn get() cacheWrappedDictionary = dictionary.singleThreaded(); } - if (NullHandling.mustCombineNullAndEmpty(singleThreadedDictionaryUtf8)) { + if (NullHandling.mustCombineNullAndEmptyInDictionary(singleThreadedDictionaryUtf8)) { return new StringDictionaryEncodedColumn( singleValuedColumn != null ? new CombineFirstTwoValuesColumnarInts(singleValuedColumn.get()) : null, multiValuedColumn != null ? new CombineFirstTwoValuesColumnarMultiInts(multiValuedColumn.get()) : null, CombineFirstTwoEntriesIndexed.returnNull(cacheWrappedDictionary), CombineFirstTwoEntriesIndexed.returnNull(singleThreadedDictionaryUtf8) ); - } else if (NullHandling.mustReplaceFirstValueWithNull(singleThreadedDictionaryUtf8)) { + } else if (NullHandling.mustReplaceFirstValueWithNullInDictionary(singleThreadedDictionaryUtf8)) { return new StringDictionaryEncodedColumn( singleValuedColumn != null ? singleValuedColumn.get() : null, multiValuedColumn != null ? multiValuedColumn.get() : null, diff --git a/processing/src/main/java/org/apache/druid/segment/serde/DictionaryEncodedStringIndexSupplier.java b/processing/src/main/java/org/apache/druid/segment/serde/DictionaryEncodedStringIndexSupplier.java index c6f47a1096f5..d7dc25b97964 100644 --- a/processing/src/main/java/org/apache/druid/segment/serde/DictionaryEncodedStringIndexSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/serde/DictionaryEncodedStringIndexSupplier.java @@ -79,11 +79,11 @@ public T as(Class clazz) Indexed singleThreadedUtf8 = dictionaryUtf8.singleThreaded(); Indexed singleThreadedBitmaps = bitmaps.singleThreaded(); - if (NullHandling.mustCombineNullAndEmpty(singleThreadedUtf8)) { + if (NullHandling.mustCombineNullAndEmptyInDictionary(singleThreadedUtf8)) { singleThreadedStrings = CombineFirstTwoEntriesIndexed.returnNull(singleThreadedStrings); singleThreadedUtf8 = CombineFirstTwoEntriesIndexed.returnNull(singleThreadedUtf8); singleThreadedBitmaps = CombineFirstTwoEntriesIndexed.unionBitmaps(bitmapFactory, singleThreadedBitmaps); - } else if (NullHandling.mustReplaceFirstValueWithNull(singleThreadedUtf8)) { + } else if (NullHandling.mustReplaceFirstValueWithNullInDictionary(singleThreadedUtf8)) { singleThreadedStrings = new ReplaceFirstValueWithNullIndexed<>(singleThreadedStrings); singleThreadedUtf8 = new ReplaceFirstValueWithNullIndexed<>(singleThreadedUtf8); } diff --git a/processing/src/main/java/org/apache/druid/segment/serde/ReplaceFirstValueWithNullIndexed.java b/processing/src/main/java/org/apache/druid/segment/serde/ReplaceFirstValueWithNullIndexed.java index 551460af256e..25ae785ddab0 100644 --- a/processing/src/main/java/org/apache/druid/segment/serde/ReplaceFirstValueWithNullIndexed.java +++ b/processing/src/main/java/org/apache/druid/segment/serde/ReplaceFirstValueWithNullIndexed.java @@ -32,7 +32,7 @@ * Provided to enable compatibility for segments written under {@link NullHandling#sqlCompatible()} mode but * read under {@link NullHandling#replaceWithDefault()} mode. * - * @see NullHandling#mustReplaceFirstValueWithNull(Indexed) + * @see NullHandling#mustReplaceFirstValueWithNullInDictionary(Indexed) */ public class ReplaceFirstValueWithNullIndexed implements Indexed { diff --git a/processing/src/main/java/org/apache/druid/segment/serde/StringFrontCodedColumnIndexSupplier.java b/processing/src/main/java/org/apache/druid/segment/serde/StringFrontCodedColumnIndexSupplier.java index fc1de494f2fc..a617e3b409f5 100644 --- a/processing/src/main/java/org/apache/druid/segment/serde/StringFrontCodedColumnIndexSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/serde/StringFrontCodedColumnIndexSupplier.java @@ -79,10 +79,10 @@ public T as(Class clazz) Indexed dict = utf8Dictionary.get(); Indexed singleThreadedBitmaps = bitmaps.singleThreaded(); - if (NullHandling.mustCombineNullAndEmpty(dict)) { + if (NullHandling.mustCombineNullAndEmptyInDictionary(dict)) { dict = CombineFirstTwoEntriesIndexed.returnNull(dict); singleThreadedBitmaps = CombineFirstTwoEntriesIndexed.unionBitmaps(bitmapFactory, singleThreadedBitmaps); - } else if (NullHandling.mustReplaceFirstValueWithNull(dict)) { + } else if (NullHandling.mustReplaceFirstValueWithNullInDictionary(dict)) { dict = new ReplaceFirstValueWithNullIndexed<>(dict); } diff --git a/processing/src/main/java/org/apache/druid/segment/serde/StringFrontCodedDictionaryEncodedColumnSupplier.java b/processing/src/main/java/org/apache/druid/segment/serde/StringFrontCodedDictionaryEncodedColumnSupplier.java index 37d82e8851da..82ba770c1f74 100644 --- a/processing/src/main/java/org/apache/druid/segment/serde/StringFrontCodedDictionaryEncodedColumnSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/serde/StringFrontCodedDictionaryEncodedColumnSupplier.java @@ -56,13 +56,13 @@ public DictionaryEncodedColumn get() { final FrontCodedIndexed suppliedUtf8Dictionary = utf8Dictionary.get(); - if (NullHandling.mustCombineNullAndEmpty(suppliedUtf8Dictionary)) { + if (NullHandling.mustCombineNullAndEmptyInDictionary(suppliedUtf8Dictionary)) { return new StringFrontCodedDictionaryEncodedColumn( singleValuedColumn != null ? new CombineFirstTwoValuesColumnarInts(singleValuedColumn.get()) : null, multiValuedColumn != null ? new CombineFirstTwoValuesColumnarMultiInts(multiValuedColumn.get()) : null, CombineFirstTwoEntriesIndexed.returnNull(suppliedUtf8Dictionary) ); - } else if (NullHandling.mustReplaceFirstValueWithNull(suppliedUtf8Dictionary)) { + } else if (NullHandling.mustReplaceFirstValueWithNullInDictionary(suppliedUtf8Dictionary)) { return new StringFrontCodedDictionaryEncodedColumn( singleValuedColumn != null ? singleValuedColumn.get() : null, multiValuedColumn != null ? multiValuedColumn.get() : null, diff --git a/processing/src/test/java/org/apache/druid/common/config/NullHandlingTest.java b/processing/src/test/java/org/apache/druid/common/config/NullHandlingTest.java index 3e1a1a87c3d4..c5dc6acce376 100644 --- a/processing/src/test/java/org/apache/druid/common/config/NullHandlingTest.java +++ b/processing/src/test/java/org/apache/druid/common/config/NullHandlingTest.java @@ -105,4 +105,16 @@ public void test_ignoreNullsStrings() NullHandling.initializeForTests(); } } + + @Test + public void test_mustCombineNullAndEmptyInDictionary() + { + // TODO(gianm) + } + + @Test + public void test_mustReplaceFirstValueWithNullInDictionary() + { + // TODO(gianm) + } } diff --git a/processing/src/test/java/org/apache/druid/segment/serde/CombineFirstTwoEntriesIndexedTest.java b/processing/src/test/java/org/apache/druid/segment/serde/CombineFirstTwoEntriesIndexedTest.java new file mode 100644 index 000000000000..71c514f48d64 --- /dev/null +++ b/processing/src/test/java/org/apache/druid/segment/serde/CombineFirstTwoEntriesIndexedTest.java @@ -0,0 +1,24 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.serde; + +public class CombineFirstTwoEntriesIndexedTest +{ +} diff --git a/processing/src/test/java/org/apache/druid/segment/serde/CombineFirstTwoValuesIndexedIntsTest.java b/processing/src/test/java/org/apache/druid/segment/serde/CombineFirstTwoValuesIndexedIntsTest.java new file mode 100644 index 000000000000..7d4f2d61644d --- /dev/null +++ b/processing/src/test/java/org/apache/druid/segment/serde/CombineFirstTwoValuesIndexedIntsTest.java @@ -0,0 +1,24 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.serde; + +public class CombineFirstTwoValuesIndexedIntsTest +{ +} diff --git a/processing/src/test/java/org/apache/druid/segment/serde/ReplaceFirstValueWithNullIndexedTest.java b/processing/src/test/java/org/apache/druid/segment/serde/ReplaceFirstValueWithNullIndexedTest.java new file mode 100644 index 000000000000..3d580e551916 --- /dev/null +++ b/processing/src/test/java/org/apache/druid/segment/serde/ReplaceFirstValueWithNullIndexedTest.java @@ -0,0 +1,33 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.serde; + +import org.junit.Test; + +public class ReplaceFirstValueWithNullIndexedTest +{ + @Test + public void testX() + { + + } + + +} From f8fcd312596c167eeb2c291e4d23db1b146d0e7e Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Sun, 25 Jun 2023 20:21:33 -0700 Subject: [PATCH 4/9] Tests, improvements. --- .../serde/CombineFirstTwoEntriesIndexed.java | 29 +++- .../CombineFirstTwoValuesColumnarInts.java | 4 +- ...ombineFirstTwoValuesColumnarMultiInts.java | 18 ++- .../CombineFirstTwoValuesIndexedInts.java | 17 ++- .../ReplaceFirstValueWithNullIndexed.java | 11 +- .../CombineFirstTwoEntriesIndexedTest.java | 140 +++++++++++++++++- ...CombineFirstTwoValuesColumnarIntsTest.java | 47 ++++++ ...neFirstTwoValuesColumnarMultiIntsTest.java | 119 +++++++++++++++ .../CombineFirstTwoValuesIndexedIntsTest.java | 77 ++++++++++ .../ReplaceFirstValueWithNullIndexedTest.java | 108 +++++++++++++- 10 files changed, 544 insertions(+), 26 deletions(-) create mode 100644 processing/src/test/java/org/apache/druid/segment/serde/CombineFirstTwoValuesColumnarIntsTest.java create mode 100644 processing/src/test/java/org/apache/druid/segment/serde/CombineFirstTwoValuesColumnarMultiIntsTest.java diff --git a/processing/src/main/java/org/apache/druid/segment/serde/CombineFirstTwoEntriesIndexed.java b/processing/src/main/java/org/apache/druid/segment/serde/CombineFirstTwoEntriesIndexed.java index bbe14c68d5f0..b42833b78270 100644 --- a/processing/src/main/java/org/apache/druid/segment/serde/CombineFirstTwoEntriesIndexed.java +++ b/processing/src/main/java/org/apache/druid/segment/serde/CombineFirstTwoEntriesIndexed.java @@ -23,6 +23,7 @@ import org.apache.druid.collections.bitmap.BitmapFactory; import org.apache.druid.collections.bitmap.ImmutableBitmap; import org.apache.druid.common.config.NullHandling; +import org.apache.druid.java.util.common.ISE; import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; import org.apache.druid.segment.data.Indexed; @@ -40,6 +41,11 @@ * Provided to enable compatibility for segments written under {@link NullHandling#sqlCompatible()} mode but * read under {@link NullHandling#replaceWithDefault()} mode. * + * Important note: {@link #isSorted()} returns the same value as the underlying delegate. In this case, this class + * assumes that {@link #newFirstValue()} is the lowest possible value in the universe: including anything in + * {@link #delegate} and anything that might be passed to {@link #indexOf(Object)}. Callers must ensure that this + * precondition is met. + * * @see NullHandling#mustCombineNullAndEmptyInDictionary(Indexed) */ public abstract class CombineFirstTwoEntriesIndexed implements Indexed @@ -51,6 +57,10 @@ public abstract class CombineFirstTwoEntriesIndexed implements Indexed protected CombineFirstTwoEntriesIndexed(Indexed delegate) { this.delegate = delegate; + + if (delegate.size() < 2) { + throw new ISE("Size[%s] must be >= 2", delegate.size()); + } } /** @@ -115,14 +125,23 @@ public int indexOf(@Nullable T value) return FIRST_ID; } else { final int index = delegate.indexOf(value); - if (index > FIRST_ID) { + + if (index > FIRST_ID + 1) { // Item found, index needs adjustment. return index - 1; - } else if (index < -1) { - // Item not found, insertion point is > FIRST_ID. - return index + 1; + } else if (index >= 0) { + // Item found, but shadowed, so really not found. + // Insertion point is after FIRST_ID. (See class-level javadoc: newFirstValue is required to be + // lower than all elements in the universe.) + return -2; + } else if (index >= -2) { + // Item not found, and insertion point is prior to, or within, the shadowed portion of delegate. Return + // insertion point immediately after newFirstValue, since that value is required to be lower than all elements + // in the universe. + return -2; } else { - return index; + // Item not found, and insertion point is after the shadowed portion of delegate. Adjust and return. + return index + 1; } } } diff --git a/processing/src/main/java/org/apache/druid/segment/serde/CombineFirstTwoValuesColumnarInts.java b/processing/src/main/java/org/apache/druid/segment/serde/CombineFirstTwoValuesColumnarInts.java index 1a5dda6686c8..07ccd303db63 100644 --- a/processing/src/main/java/org/apache/druid/segment/serde/CombineFirstTwoValuesColumnarInts.java +++ b/processing/src/main/java/org/apache/druid/segment/serde/CombineFirstTwoValuesColumnarInts.java @@ -26,9 +26,7 @@ import java.io.IOException; /** - * A {@link ColumnarInts} that delegates to an underyling instance, but combines the values 0 and 1 into 0. - * - * This class combines *values*, not *indexes*. So [0, 1, 2] becomes [0, 0, 2]. (The 1 was replaced with a 0.) + * A {@link ColumnarInts} facade over {@link CombineFirstTwoValuesIndexedInts}. * * Provided to enable compatibility for segments written under {@link NullHandling#sqlCompatible()} mode but * read under {@link NullHandling#replaceWithDefault()} mode. diff --git a/processing/src/main/java/org/apache/druid/segment/serde/CombineFirstTwoValuesColumnarMultiInts.java b/processing/src/main/java/org/apache/druid/segment/serde/CombineFirstTwoValuesColumnarMultiInts.java index ccd01d3dac02..3ce540f60e7a 100644 --- a/processing/src/main/java/org/apache/druid/segment/serde/CombineFirstTwoValuesColumnarMultiInts.java +++ b/processing/src/main/java/org/apache/druid/segment/serde/CombineFirstTwoValuesColumnarMultiInts.java @@ -19,22 +19,20 @@ package org.apache.druid.segment.serde; +import com.google.common.collect.Iterators; import org.apache.druid.common.config.NullHandling; import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; import org.apache.druid.segment.data.ColumnarMultiInts; import org.apache.druid.segment.data.Indexed; import org.apache.druid.segment.data.IndexedInts; -import org.apache.druid.segment.data.IndexedIterable; import javax.annotation.Nullable; import java.io.IOException; import java.util.Iterator; /** - * A {@link ColumnarMultiInts} that delegates to an underyling instance, but combines the values 0 and 1 into 0. - * - * This class combines *values*, not *indexes*. So [[0, 1], [1, 2]] becomes [[0, 0], [0, 2]]. (The ones were replaced - * with zeroes.) + * A {@link ColumnarMultiInts} that delegates to an underyling instance, but applies + * {@link CombineFirstTwoValuesIndexedInts} to each row's set of values. * * Provided to enable compatibility for segments written under {@link NullHandling#sqlCompatible()} mode but * read under {@link NullHandling#replaceWithDefault()} mode. @@ -44,16 +42,19 @@ public class CombineFirstTwoValuesColumnarMultiInts implements ColumnarMultiInts { private final ColumnarMultiInts delegate; + private final CombineFirstTwoValuesIndexedInts rowValues; public CombineFirstTwoValuesColumnarMultiInts(ColumnarMultiInts delegate) { this.delegate = delegate; + this.rowValues = new CombineFirstTwoValuesIndexedInts(null); } @Override public IndexedInts get(int index) { - return new CombineFirstTwoValuesIndexedInts(delegate.get(index)); + rowValues.delegate = delegate.get(index); + return rowValues; } @Override @@ -90,7 +91,10 @@ public void inspectRuntimeShape(RuntimeShapeInspector inspector) @Override public Iterator iterator() { - return IndexedIterable.create(this).iterator(); + return Iterators.transform( + delegate.iterator(), + CombineFirstTwoValuesIndexedInts::new + ); } @Override diff --git a/processing/src/main/java/org/apache/druid/segment/serde/CombineFirstTwoValuesIndexedInts.java b/processing/src/main/java/org/apache/druid/segment/serde/CombineFirstTwoValuesIndexedInts.java index c21a6aada85f..360408f22de0 100644 --- a/processing/src/main/java/org/apache/druid/segment/serde/CombineFirstTwoValuesIndexedInts.java +++ b/processing/src/main/java/org/apache/druid/segment/serde/CombineFirstTwoValuesIndexedInts.java @@ -25,9 +25,12 @@ import org.apache.druid.segment.data.IndexedInts; /** - * A {@link IndexedInts} that delegates to an underyling instance, but combines the values 0 and 1 into 0. + * A {@link IndexedInts} that delegates to an underyling instance, but combines the values 0 and 1 into 0, and shifts + * all other values down by one. For example: * - * This class combines *values*, not *indexes*. So [0, 1, 2] becomes [0, 0, 2]. (The 1 was replaced with a 0.) + * - [2, 0, 1] => [1, 0, 0] + * - [3, 2, 1] => [2, 1, 0] + * - [0, 1, 0] => [0, 0, 0] * * Provided to enable compatibility for segments written under {@link NullHandling#sqlCompatible()} mode but * read under {@link NullHandling#replaceWithDefault()} mode. @@ -36,9 +39,9 @@ */ public class CombineFirstTwoValuesIndexedInts implements IndexedInts { - private static final int NULL_ID = 0; + private static final int ZERO_ID = 0; - protected final IndexedInts delegate; + IndexedInts delegate; public CombineFirstTwoValuesIndexedInts(IndexedInts delegate) { @@ -55,7 +58,7 @@ public int size() public int get(int index) { final int i = delegate.get(index); - if (i == NULL_ID) { + if (i == ZERO_ID) { return i; } else { return i - 1; @@ -68,7 +71,7 @@ public void get(int[] out, int start, int length) delegate.get(out, start, length); for (int i = 0 ; i < length ; i++) { - if (out[i] != NULL_ID) { + if (out[i] != ZERO_ID) { out[i] --; } } @@ -80,7 +83,7 @@ public void get(int[] out, int[] indexes, int length) delegate.get(out, indexes, length); for (int i = 0 ; i < length ; i++) { - if (out[i] != NULL_ID) { + if (out[i] != ZERO_ID) { out[i] --; } } diff --git a/processing/src/main/java/org/apache/druid/segment/serde/ReplaceFirstValueWithNullIndexed.java b/processing/src/main/java/org/apache/druid/segment/serde/ReplaceFirstValueWithNullIndexed.java index 25ae785ddab0..be9e1385d944 100644 --- a/processing/src/main/java/org/apache/druid/segment/serde/ReplaceFirstValueWithNullIndexed.java +++ b/processing/src/main/java/org/apache/druid/segment/serde/ReplaceFirstValueWithNullIndexed.java @@ -20,6 +20,7 @@ package org.apache.druid.segment.serde; import org.apache.druid.common.config.NullHandling; +import org.apache.druid.java.util.common.ISE; import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; import org.apache.druid.segment.data.Indexed; @@ -32,6 +33,10 @@ * Provided to enable compatibility for segments written under {@link NullHandling#sqlCompatible()} mode but * read under {@link NullHandling#replaceWithDefault()} mode. * + * Important note: {@link #isSorted()} returns the same value as the underlying delegate. In this case, this class + * assumes that {@code null} is the lowest possible value in the universe: including anything in {@link #delegate} and + * anything that might be passed to {@link #indexOf(Object)}. Callers must ensure that this precondition is met. + * * @see NullHandling#mustReplaceFirstValueWithNullInDictionary(Indexed) */ public class ReplaceFirstValueWithNullIndexed implements Indexed @@ -41,6 +46,10 @@ public class ReplaceFirstValueWithNullIndexed implements Indexed public ReplaceFirstValueWithNullIndexed(Indexed delegate) { this.delegate = delegate; + + if (delegate.size() < 1) { + throw new ISE("Size[%s] must be >= 1", delegate.size()); + } } @Override @@ -67,7 +76,7 @@ public int indexOf(@Nullable T value) return 0; } else { final int result = delegate.indexOf(value); - if (result == 0) { + if (result == 0 || result == -1) { return -2; } else { return result; diff --git a/processing/src/test/java/org/apache/druid/segment/serde/CombineFirstTwoEntriesIndexedTest.java b/processing/src/test/java/org/apache/druid/segment/serde/CombineFirstTwoEntriesIndexedTest.java index 71c514f48d64..e5dd68978a3d 100644 --- a/processing/src/test/java/org/apache/druid/segment/serde/CombineFirstTwoEntriesIndexedTest.java +++ b/processing/src/test/java/org/apache/druid/segment/serde/CombineFirstTwoEntriesIndexedTest.java @@ -19,6 +19,144 @@ package org.apache.druid.segment.serde; -public class CombineFirstTwoEntriesIndexedTest +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Lists; +import org.apache.druid.segment.data.GenericIndexed; +import org.apache.druid.segment.data.Indexed; +import org.apache.druid.segment.data.ListIndexed; +import org.apache.druid.testing.InitializedNullHandlingTest; +import org.hamcrest.CoreMatchers; +import org.hamcrest.MatcherAssert; +import org.junit.Assert; +import org.junit.Test; +import org.junit.internal.matchers.ThrowableMessageMatcher; + +import javax.annotation.Nullable; +import java.util.Collections; + +/** + * Test for {@link CombineFirstTwoEntriesIndexed}. + */ +public class CombineFirstTwoEntriesIndexedTest extends InitializedNullHandlingTest { + @Test + public void testSizeZero() + { + final IllegalStateException e = Assert.assertThrows( + IllegalStateException.class, + () -> wrap(Indexed.empty(), "xyz") + ); + + MatcherAssert.assertThat( + e, + ThrowableMessageMatcher.hasMessage(CoreMatchers.containsString("Size[0] must be >= 2")) + ); + } + + @Test + public void testSizeOne() + { + final IllegalStateException e = Assert.assertThrows( + IllegalStateException.class, + () -> wrap(new ListIndexed<>("foo"), "xyz") + ); + + MatcherAssert.assertThat( + e, + ThrowableMessageMatcher.hasMessage(CoreMatchers.containsString("Size[1] must be >= 2")) + ); + } + + @Test + public void testSizeTwo() + { + final CombineFirstTwoEntriesIndexed indexed = wrap(new ListIndexed<>("bar", "foo"), "xyz"); + Assert.assertEquals(0, indexed.indexOf("xyz")); + Assert.assertEquals(-2, indexed.indexOf("foo")); + Assert.assertEquals(-2, indexed.indexOf("bar")); + Assert.assertEquals(-2, indexed.indexOf("baz")); + Assert.assertEquals(-2, indexed.indexOf("qux")); + Assert.assertEquals(-2, indexed.indexOf("")); + Assert.assertEquals(-2, indexed.indexOf(null)); + Assert.assertEquals(1, indexed.size()); + Assert.assertEquals("xyz", indexed.get(0)); + Assert.assertFalse(indexed.isSorted()); // Matches delegate. See class-level note in CombineFirstTwoEntriesIndexed. + Assert.assertEquals(ImmutableList.of("xyz"), ImmutableList.copyOf(indexed)); + } + + @Test + public void testSizeThree() + { + final CombineFirstTwoEntriesIndexed indexed = wrap(new ListIndexed<>("bar", "baz", "foo"), "xyz"); + Assert.assertEquals(0, indexed.indexOf("xyz")); + Assert.assertEquals(1, indexed.indexOf("foo")); + Assert.assertEquals(-2, indexed.indexOf("bar")); + Assert.assertEquals(-2, indexed.indexOf("baz")); + Assert.assertEquals(-2, indexed.indexOf("qux")); + Assert.assertEquals(-2, indexed.indexOf("")); + Assert.assertEquals(-2, indexed.indexOf(null)); + Assert.assertEquals("xyz", indexed.get(0)); + Assert.assertEquals("foo", indexed.get(1)); + Assert.assertFalse(indexed.isSorted()); // Matches delegate. See class-level note in CombineFirstTwoEntriesIndexed. + Assert.assertEquals(ImmutableList.of("xyz", "foo"), ImmutableList.copyOf(indexed)); + } + + @Test + public void testSizeTwoSorted() + { + final CombineFirstTwoEntriesIndexed indexed = wrap( + GenericIndexed.fromArray( + new String[]{"bar", "foo"}, + GenericIndexed.STRING_STRATEGY + ), + null + ); + + Assert.assertEquals(0, indexed.indexOf(null)); + Assert.assertEquals(-2, indexed.indexOf("foo")); + Assert.assertEquals(-2, indexed.indexOf("bar")); + Assert.assertEquals(-2, indexed.indexOf("baz")); + Assert.assertEquals(-2, indexed.indexOf("qux")); + Assert.assertEquals(-2, indexed.indexOf("")); + Assert.assertEquals(1, indexed.size()); + Assert.assertNull(indexed.get(0)); + Assert.assertTrue(indexed.isSorted()); // Matches delegate. See class-level note in CombineFirstTwoEntriesIndexed. + Assert.assertEquals(Collections.singletonList(null), Lists.newArrayList(indexed)); + } + + @Test + public void testSizeThreeSorted() + { + final CombineFirstTwoEntriesIndexed indexed = wrap( + GenericIndexed.fromArray( + new String[]{"bar", "baz", "foo"}, + GenericIndexed.STRING_STRATEGY + ), + null + ); + + Assert.assertEquals(0, indexed.indexOf(null)); + Assert.assertEquals(1, indexed.indexOf("foo")); + Assert.assertEquals(-2, indexed.indexOf("bar")); + Assert.assertEquals(-2, indexed.indexOf("baz")); + Assert.assertEquals(-3, indexed.indexOf("qux")); + Assert.assertEquals(-2, indexed.indexOf("")); + Assert.assertEquals(2, indexed.size()); + Assert.assertNull(indexed.get(0)); + Assert.assertEquals("foo", indexed.get(1)); + Assert.assertTrue(indexed.isSorted()); // Matches delegate. See class-level note in CombineFirstTwoEntriesIndexed. + Assert.assertEquals(Lists.newArrayList(null, "foo"), Lists.newArrayList(indexed)); + } + + private CombineFirstTwoEntriesIndexed wrap(final Indexed indexed, @Nullable final T newFirstValue) + { + return new CombineFirstTwoEntriesIndexed(indexed) + { + @Override + protected T newFirstValue() + { + return newFirstValue; + } + }; + } } diff --git a/processing/src/test/java/org/apache/druid/segment/serde/CombineFirstTwoValuesColumnarIntsTest.java b/processing/src/test/java/org/apache/druid/segment/serde/CombineFirstTwoValuesColumnarIntsTest.java new file mode 100644 index 000000000000..fee6a8395011 --- /dev/null +++ b/processing/src/test/java/org/apache/druid/segment/serde/CombineFirstTwoValuesColumnarIntsTest.java @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.serde; + +import org.apache.druid.segment.data.ArrayBasedIndexedInts; +import org.junit.Test; + +/** + * Test for {@link CombineFirstTwoValuesColumnarInts}. + */ +public class CombineFirstTwoValuesColumnarIntsTest +{ + @Test + public void testCombineFirstTwoValues() + { + // (expectedCombined, original) + assertCombine(new int[]{0, 1, 2}, new int[]{1, 2, 3}); + assertCombine(new int[]{0, 0, 1, 2}, new int[]{0, 1, 2, 3}); + assertCombine(new int[]{2, 0, 1, 0, 4, 0}, new int[]{3, 0, 2, 1, 5, 0}); + } + + private static void assertCombine(final int[] expectedCombined, final int[] original) + { + CombineFirstTwoValuesIndexedIntsTest.assertCombine( + expectedCombined, + original, + arr -> new CombineFirstTwoValuesIndexedInts(new ArrayBasedIndexedInts(arr)) + ); + } +} diff --git a/processing/src/test/java/org/apache/druid/segment/serde/CombineFirstTwoValuesColumnarMultiIntsTest.java b/processing/src/test/java/org/apache/druid/segment/serde/CombineFirstTwoValuesColumnarMultiIntsTest.java new file mode 100644 index 000000000000..de8edf171bff --- /dev/null +++ b/processing/src/test/java/org/apache/druid/segment/serde/CombineFirstTwoValuesColumnarMultiIntsTest.java @@ -0,0 +1,119 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.serde; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Lists; +import org.apache.druid.segment.data.ColumnarMultiInts; +import org.apache.druid.segment.data.IndexedInts; +import org.apache.druid.segment.data.VSizeColumnarInts; +import org.apache.druid.segment.data.VSizeColumnarMultiInts; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import java.util.List; + +/** + * Test for {@link CombineFirstTwoValuesColumnarMultiInts}. + */ +public class CombineFirstTwoValuesColumnarMultiIntsTest +{ + private ColumnarMultiInts original; + private CombineFirstTwoValuesColumnarMultiInts combined; + + @Before + public void setUp() + { + original = VSizeColumnarMultiInts.fromIterable( + ImmutableList.of( + VSizeColumnarInts.fromArray(new int[]{1, 2, 3}), + VSizeColumnarInts.fromArray(new int[]{0, 1, 2, 3}), + VSizeColumnarInts.fromArray(new int[]{3, 0, 2, 1, 5, 0}) + ) + ); + + combined = new CombineFirstTwoValuesColumnarMultiInts(original); + } + + @Test + public void testSize() + { + Assert.assertEquals(original.size(), combined.size()); + } + + @Test + public void testGet() + { + assertEquals(new int[]{0, 1, 2}, combined.get(0)); + assertEquals(new int[]{0, 0, 1, 2}, combined.get(1)); + assertEquals(new int[]{2, 0, 1, 0, 4, 0}, combined.get(2)); + + // "get" reuses a holder + Assert.assertSame(combined.get(1), combined.get(0)); + } + + @Test + public void testGetUnshared() + { + assertEquals(new int[]{0, 1, 2}, combined.getUnshared(0)); + assertEquals(new int[]{0, 0, 1, 2}, combined.getUnshared(1)); + assertEquals(new int[]{2, 0, 1, 0, 4, 0}, combined.getUnshared(2)); + + // Unlike "get", "getUnshared" does not reuse a holder + Assert.assertNotSame(combined.getUnshared(1), combined.getUnshared(0)); + } + + @Test + public void testIndexOf() + { + Assert.assertThrows( + UnsupportedOperationException.class, + () -> combined.indexOf(original.get(0)) + ); + } + + @Test + public void testIsSorted() + { + Assert.assertFalse(combined.isSorted()); + } + + @Test + public void testIterator() + { + final List fromIterator = Lists.newArrayList(combined.iterator()); + Assert.assertEquals(3, fromIterator.size()); + assertEquals(new int[]{0, 1, 2}, fromIterator.get(0)); + assertEquals(new int[]{0, 0, 1, 2}, fromIterator.get(1)); + assertEquals(new int[]{2, 0, 1, 0, 4, 0}, fromIterator.get(2)); + } + + public void assertEquals(final int[] expected, final IndexedInts actual) + { + final int sz = actual.size(); + final int[] actualArray = new int[sz]; + for (int i = 0; i < sz; i++) { + actualArray[i] = actual.get(i); + } + + Assert.assertArrayEquals(expected, actualArray); + } +} diff --git a/processing/src/test/java/org/apache/druid/segment/serde/CombineFirstTwoValuesIndexedIntsTest.java b/processing/src/test/java/org/apache/druid/segment/serde/CombineFirstTwoValuesIndexedIntsTest.java index 7d4f2d61644d..26a444735993 100644 --- a/processing/src/test/java/org/apache/druid/segment/serde/CombineFirstTwoValuesIndexedIntsTest.java +++ b/processing/src/test/java/org/apache/druid/segment/serde/CombineFirstTwoValuesIndexedIntsTest.java @@ -19,6 +19,83 @@ package org.apache.druid.segment.serde; +import it.unimi.dsi.fastutil.ints.IntArrays; +import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.segment.data.ArrayBasedIndexedInts; +import org.apache.druid.segment.data.IndexedInts; +import org.junit.Assert; +import org.junit.Test; + +import java.util.Arrays; +import java.util.function.Function; + +/** + * Test for {@link CombineFirstTwoValuesIndexedInts}. + */ public class CombineFirstTwoValuesIndexedIntsTest { + @Test + public void testCombineFirstTwoValues() + { + // (expectedCombined, original) + assertCombine(new int[]{0, 1, 2}, new int[]{1, 2, 3}); + assertCombine(new int[]{0, 0, 1, 2}, new int[]{0, 1, 2, 3}); + assertCombine(new int[]{2, 0, 1, 0, 4, 0}, new int[]{3, 0, 2, 1, 5, 0}); + } + + private static void assertCombine(final int[] expectedCombined, final int[] original) + { + assertCombine( + expectedCombined, + original, + arr -> new CombineFirstTwoValuesIndexedInts(new ArrayBasedIndexedInts(arr)) + ); + } + + static void assertCombine( + final int[] expectedCombined, + final int[] original, + final Function combineFn + ) + { + final IndexedInts combined = combineFn.apply(original); + + // Check size. + Assert.assertEquals( + StringUtils.format("%s (size)", Arrays.toString(original)), + expectedCombined.length, + combined.size() + ); + + // Check regular get. + final int[] arr = new int[expectedCombined.length]; + for (int i = 0; i < expectedCombined.length; i++) { + arr[i] = combined.get(i); + } + Assert.assertArrayEquals(StringUtils.format("%s (get)", Arrays.toString(original)), expectedCombined, arr); + + // Check contiguous vector get. + Arrays.fill(arr, Integer.MIN_VALUE); + combined.get(arr, 0, arr.length); + Assert.assertArrayEquals( + StringUtils.format("%s (contiguous vector get)", Arrays.toString(original)), + expectedCombined, + arr + ); + + // Check noncontiguous vector get. + final int[] indexes = new int[expectedCombined.length]; + for (int i = 0; i < expectedCombined.length; i++) { + indexes[indexes.length - 1 - i] = i; // Fetch backwards. + } + + Arrays.fill(arr, Integer.MIN_VALUE); + combined.get(arr, indexes, arr.length); + final int[] expectedCombinedReversed = IntArrays.reverse(IntArrays.copy(expectedCombined)); + Assert.assertArrayEquals( + StringUtils.format("%s (noncontiguous vector get, reversed)", Arrays.toString(original)), + expectedCombinedReversed, + arr + ); + } } diff --git a/processing/src/test/java/org/apache/druid/segment/serde/ReplaceFirstValueWithNullIndexedTest.java b/processing/src/test/java/org/apache/druid/segment/serde/ReplaceFirstValueWithNullIndexedTest.java index 3d580e551916..cc01ac6589dc 100644 --- a/processing/src/test/java/org/apache/druid/segment/serde/ReplaceFirstValueWithNullIndexedTest.java +++ b/processing/src/test/java/org/apache/druid/segment/serde/ReplaceFirstValueWithNullIndexedTest.java @@ -19,15 +19,119 @@ package org.apache.druid.segment.serde; +import com.google.common.collect.Lists; +import org.apache.druid.segment.data.GenericIndexed; +import org.apache.druid.segment.data.Indexed; +import org.apache.druid.segment.data.ListIndexed; +import org.apache.druid.testing.InitializedNullHandlingTest; +import org.hamcrest.CoreMatchers; +import org.hamcrest.MatcherAssert; +import org.junit.Assert; import org.junit.Test; +import org.junit.internal.matchers.ThrowableMessageMatcher; -public class ReplaceFirstValueWithNullIndexedTest +import java.util.Collections; + +/** + * Test for {@link ReplaceFirstValueWithNullIndexed}. + */ +public class ReplaceFirstValueWithNullIndexedTest extends InitializedNullHandlingTest { @Test - public void testX() + public void testSizeZero() + { + final IllegalStateException e = Assert.assertThrows( + IllegalStateException.class, + () -> new ReplaceFirstValueWithNullIndexed<>(Indexed.empty()) + ); + + MatcherAssert.assertThat( + e, + ThrowableMessageMatcher.hasMessage(CoreMatchers.containsString("Size[0] must be >= 1")) + ); + } + + @Test + public void testSizeOne() + { + final ReplaceFirstValueWithNullIndexed indexed = + new ReplaceFirstValueWithNullIndexed<>(new ListIndexed<>("bar")); + + Assert.assertEquals(0, indexed.indexOf(null)); + Assert.assertEquals(-2, indexed.indexOf("")); + Assert.assertEquals(-2, indexed.indexOf("foo")); + Assert.assertEquals(-2, indexed.indexOf("bar")); + Assert.assertEquals(-2, indexed.indexOf("baz")); + Assert.assertEquals(-2, indexed.indexOf("qux")); + Assert.assertEquals(1, indexed.size()); + Assert.assertNull(indexed.get(0)); + Assert.assertFalse(indexed.isSorted()); // Matches delegate. See class doc for ReplaceFirstValueWithNullIndexed. + Assert.assertEquals(Collections.singletonList(null), Lists.newArrayList(indexed)); + } + + @Test + public void testSizeTwo() + { + final ReplaceFirstValueWithNullIndexed indexed = + new ReplaceFirstValueWithNullIndexed<>(new ListIndexed<>("bar", "foo")); + + Assert.assertEquals(0, indexed.indexOf(null)); + Assert.assertEquals(1, indexed.indexOf("foo")); + Assert.assertEquals(-2, indexed.indexOf("")); + Assert.assertEquals(-2, indexed.indexOf("bar")); + Assert.assertEquals(-2, indexed.indexOf("baz")); + Assert.assertEquals(-2, indexed.indexOf("qux")); + Assert.assertEquals(2, indexed.size()); + Assert.assertNull(indexed.get(0)); + Assert.assertEquals("foo", indexed.get(1)); + Assert.assertFalse(indexed.isSorted()); // Matches delegate. See class doc for ReplaceFirstValueWithNullIndexed. + Assert.assertEquals(Lists.newArrayList(null, "foo"), Lists.newArrayList(indexed)); + } + + @Test + public void testSizeOneSorted() { + final ReplaceFirstValueWithNullIndexed indexed = + new ReplaceFirstValueWithNullIndexed<>( + GenericIndexed.fromArray( + new String[]{"bar"}, + GenericIndexed.STRING_STRATEGY + ) + ); + Assert.assertEquals(0, indexed.indexOf(null)); + Assert.assertEquals(-2, indexed.indexOf("")); + Assert.assertEquals(-2, indexed.indexOf("foo")); + Assert.assertEquals(-2, indexed.indexOf("bar")); + Assert.assertEquals(-2, indexed.indexOf("baz")); + Assert.assertEquals(-2, indexed.indexOf("qux")); + Assert.assertEquals(1, indexed.size()); + Assert.assertNull(indexed.get(0)); + Assert.assertTrue(indexed.isSorted()); // Matches delegate. See class doc for ReplaceFirstValueWithNullIndexed. + Assert.assertEquals(Collections.singletonList(null), Lists.newArrayList(indexed)); } + @Test + public void testSizeTwoSorted() + { + final ReplaceFirstValueWithNullIndexed indexed = + new ReplaceFirstValueWithNullIndexed<>( + GenericIndexed.fromArray( + new String[]{"bar", "foo"}, + GenericIndexed.STRING_STRATEGY + ) + ); + Assert.assertEquals(0, indexed.indexOf(null)); + Assert.assertEquals(1, indexed.indexOf("foo")); + Assert.assertEquals(-2, indexed.indexOf("")); + Assert.assertEquals(-2, indexed.indexOf("bar")); + Assert.assertEquals(-2, indexed.indexOf("baz")); + Assert.assertEquals(-3, indexed.indexOf("qux")); + Assert.assertEquals(2, indexed.size()); + Assert.assertNull(indexed.get(0)); + Assert.assertEquals("foo", indexed.get(1)); + Assert.assertTrue(indexed.isSorted()); // Matches delegate. See class doc for ReplaceFirstValueWithNullIndexed. + Assert.assertEquals(Lists.newArrayList(null, "foo"), Lists.newArrayList(indexed)); + } } From eef70cb02c8c2fd95182d79a81df6c67a3e8e4b5 Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Sun, 25 Jun 2023 22:34:31 -0700 Subject: [PATCH 5/9] Style. --- .../segment/serde/CombineFirstTwoEntriesIndexed.java | 2 ++ .../segment/serde/CombineFirstTwoValuesIndexedInts.java | 8 ++++---- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/serde/CombineFirstTwoEntriesIndexed.java b/processing/src/main/java/org/apache/druid/segment/serde/CombineFirstTwoEntriesIndexed.java index b42833b78270..3e109946d72e 100644 --- a/processing/src/main/java/org/apache/druid/segment/serde/CombineFirstTwoEntriesIndexed.java +++ b/processing/src/main/java/org/apache/druid/segment/serde/CombineFirstTwoEntriesIndexed.java @@ -152,8 +152,10 @@ public Iterator iterator() final Iterator it = delegate.iterator(); // Skip first two values. + //CHECKSTYLE.OFF: Regexp it.next(); it.next(); + //CHECKSTYLE.ON: Regexp class CoalescingIndexedIterator implements Iterator { diff --git a/processing/src/main/java/org/apache/druid/segment/serde/CombineFirstTwoValuesIndexedInts.java b/processing/src/main/java/org/apache/druid/segment/serde/CombineFirstTwoValuesIndexedInts.java index 360408f22de0..e2e6bf6d6e99 100644 --- a/processing/src/main/java/org/apache/druid/segment/serde/CombineFirstTwoValuesIndexedInts.java +++ b/processing/src/main/java/org/apache/druid/segment/serde/CombineFirstTwoValuesIndexedInts.java @@ -70,9 +70,9 @@ public void get(int[] out, int start, int length) { delegate.get(out, start, length); - for (int i = 0 ; i < length ; i++) { + for (int i = 0; i < length; i++) { if (out[i] != ZERO_ID) { - out[i] --; + out[i]--; } } } @@ -82,9 +82,9 @@ public void get(int[] out, int[] indexes, int length) { delegate.get(out, indexes, length); - for (int i = 0 ; i < length ; i++) { + for (int i = 0; i < length; i++) { if (out[i] != ZERO_ID) { - out[i] --; + out[i]--; } } } From 6f131016aee50523afc2af095c53ca9d9d5d5211 Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Mon, 26 Jun 2023 08:11:01 -0700 Subject: [PATCH 6/9] See Spot bug. --- .../segment/serde/CombineFirstTwoValuesColumnarMultiInts.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/processing/src/main/java/org/apache/druid/segment/serde/CombineFirstTwoValuesColumnarMultiInts.java b/processing/src/main/java/org/apache/druid/segment/serde/CombineFirstTwoValuesColumnarMultiInts.java index 3ce540f60e7a..4d8cc5f61f6f 100644 --- a/processing/src/main/java/org/apache/druid/segment/serde/CombineFirstTwoValuesColumnarMultiInts.java +++ b/processing/src/main/java/org/apache/druid/segment/serde/CombineFirstTwoValuesColumnarMultiInts.java @@ -25,6 +25,7 @@ import org.apache.druid.segment.data.ColumnarMultiInts; import org.apache.druid.segment.data.Indexed; import org.apache.druid.segment.data.IndexedInts; +import org.apache.druid.segment.data.ZeroIndexedInts; import javax.annotation.Nullable; import java.io.IOException; @@ -47,7 +48,7 @@ public class CombineFirstTwoValuesColumnarMultiInts implements ColumnarMultiInts public CombineFirstTwoValuesColumnarMultiInts(ColumnarMultiInts delegate) { this.delegate = delegate; - this.rowValues = new CombineFirstTwoValuesIndexedInts(null); + this.rowValues = new CombineFirstTwoValuesIndexedInts(ZeroIndexedInts.instance()); } @Override From e17e0beee903c86af5b533cb0fa16d6a4cc63216 Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Mon, 26 Jun 2023 09:30:41 -0700 Subject: [PATCH 7/9] Remove unused method. --- .../org/apache/druid/common/config/NullHandling.java | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/common/config/NullHandling.java b/processing/src/main/java/org/apache/druid/common/config/NullHandling.java index 7b1acc344b72..8e6f242e9a34 100644 --- a/processing/src/main/java/org/apache/druid/common/config/NullHandling.java +++ b/processing/src/main/java/org/apache/druid/common/config/NullHandling.java @@ -114,16 +114,6 @@ public static String emptyToNullIfNeeded(@Nullable String value) //CHECKSTYLE.ON: Regexp } - /** - * Returns null if passed an empty ByteBuffers ({@link ByteBuffer#remaining()} == 0) in default-value mode. - * Otherwise, returns the original ByteBuffer. - */ - @Nullable - public static ByteBuffer emptyToNullIfNeeded(@Nullable ByteBuffer buffer) - { - return replaceWithDefault() && buffer != null && buffer.remaining() == 0 ? null : buffer; - } - @Nullable public static String defaultStringValue() { From 897c81ab84a6834631591c2907ea2312371c9a3e Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Tue, 27 Jun 2023 09:38:13 -0700 Subject: [PATCH 8/9] Address review comments. 1) Read bitmaps even if we don't retain them. 2) Combine StringFrontCodedDictionaryEncodedColumn and ScalarStringDictionaryEncodedColumn. --- ...=> StringUtf8DictionaryEncodedColumn.java} | 36 +- .../nested/NestedCommonFormatColumn.java | 4 +- .../ScalarStringColumnAndIndexSupplier.java | 6 +- .../nested/ScalarStringColumnSerializer.java | 7 +- .../ScalarStringDictionaryEncodedColumn.java | 382 ------------------ .../serde/DoubleNumericColumnPartSerdeV2.java | 11 +- .../serde/FloatNumericColumnPartSerdeV2.java | 9 +- .../serde/LongNumericColumnPartSerdeV2.java | 9 +- ...tCodedDictionaryEncodedColumnSupplier.java | 10 +- .../ScalarStringColumnSupplierTest.java | 7 +- 10 files changed, 60 insertions(+), 421 deletions(-) rename processing/src/main/java/org/apache/druid/segment/column/{StringFrontCodedDictionaryEncodedColumn.java => StringUtf8DictionaryEncodedColumn.java} (90%) delete mode 100644 processing/src/main/java/org/apache/druid/segment/nested/ScalarStringDictionaryEncodedColumn.java diff --git a/processing/src/main/java/org/apache/druid/segment/column/StringFrontCodedDictionaryEncodedColumn.java b/processing/src/main/java/org/apache/druid/segment/column/StringUtf8DictionaryEncodedColumn.java similarity index 90% rename from processing/src/main/java/org/apache/druid/segment/column/StringFrontCodedDictionaryEncodedColumn.java rename to processing/src/main/java/org/apache/druid/segment/column/StringUtf8DictionaryEncodedColumn.java index 80e90c8f45de..baf7a4be9c75 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/StringFrontCodedDictionaryEncodedColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/column/StringUtf8DictionaryEncodedColumn.java @@ -30,7 +30,6 @@ import org.apache.druid.segment.IdLookup; import org.apache.druid.segment.data.ColumnarInts; import org.apache.druid.segment.data.ColumnarMultiInts; -import org.apache.druid.segment.data.FrontCodedIndexed; import org.apache.druid.segment.data.Indexed; import org.apache.druid.segment.data.IndexedInts; import org.apache.druid.segment.data.ReadableOffset; @@ -51,17 +50,15 @@ import java.util.BitSet; /** - * {@link DictionaryEncodedColumn} for a column which uses a {@link FrontCodedIndexed} to store its value - * dictionary, which 'delta encodes' strings (instead of {@link org.apache.druid.segment.data.GenericIndexed} like - * {@link StringDictionaryEncodedColumn}). + * {@link DictionaryEncodedColumn} for a column which has only a UTF-8 dictionary, no String dictionary. *

- * This class is otherwise nearly identical to {@link StringDictionaryEncodedColumn} other than the dictionary - * difference. + * This class is otherwise nearly identical to {@link StringDictionaryEncodedColumn} other than lacking a + * String dictionary. *

* Implements {@link NestedCommonFormatColumn} so it can be used as a reader for single value string specializations * of {@link org.apache.druid.segment.AutoTypeColumnIndexer}. */ -public class StringFrontCodedDictionaryEncodedColumn implements DictionaryEncodedColumn, +public class StringUtf8DictionaryEncodedColumn implements DictionaryEncodedColumn, NestedCommonFormatColumn { @Nullable @@ -70,7 +67,7 @@ public class StringFrontCodedDictionaryEncodedColumn implements DictionaryEncode private final ColumnarMultiInts multiValueColumn; private final Indexed utf8Dictionary; - public StringFrontCodedDictionaryEncodedColumn( + public StringUtf8DictionaryEncodedColumn( @Nullable ColumnarInts singleValueColumn, @Nullable ColumnarMultiInts multiValueColumn, Indexed utf8Dictionary @@ -102,6 +99,9 @@ public int getSingleValueRow(int rowNum) @Override public IndexedInts getMultiValueRow(int rowNum) { + if (!hasMultipleValues()) { + throw new UnsupportedOperationException("Column is not multi-valued"); + } return multiValueColumn.get(rowNum); } @@ -154,7 +154,7 @@ public int getValueCardinality() @Override public String lookupName(int id) { - final String value = StringFrontCodedDictionaryEncodedColumn.this.lookupName(id); + final String value = StringUtf8DictionaryEncodedColumn.this.lookupName(id); return extractionFn == null ? value : extractionFn.apply(value); } @@ -190,7 +190,7 @@ public int lookupId(String name) if (extractionFn != null) { throw new UnsupportedOperationException("cannot perform lookup when applying an extraction function"); } - return StringFrontCodedDictionaryEncodedColumn.this.lookupId(name); + return StringUtf8DictionaryEncodedColumn.this.lookupId(name); } } @@ -291,7 +291,7 @@ public boolean matches() @Override public void inspectRuntimeShape(RuntimeShapeInspector inspector) { - inspector.visit("column", StringFrontCodedDictionaryEncodedColumn.this); + inspector.visit("column", StringUtf8DictionaryEncodedColumn.this); } }; } else { @@ -332,7 +332,7 @@ public boolean matches() @Override public void inspectRuntimeShape(RuntimeShapeInspector inspector) { - inspector.visit("column", StringFrontCodedDictionaryEncodedColumn.this); + inspector.visit("column", StringUtf8DictionaryEncodedColumn.this); } }; } @@ -381,7 +381,7 @@ public int getValueCardinality() @Override public String lookupName(final int id) { - return StringFrontCodedDictionaryEncodedColumn.this.lookupName(id); + return StringUtf8DictionaryEncodedColumn.this.lookupName(id); } @Nullable @@ -394,7 +394,7 @@ public ByteBuffer lookupNameUtf8(int id) @Override public int lookupId(@Nullable String name) { - return StringFrontCodedDictionaryEncodedColumn.this.lookupId(name); + return StringUtf8DictionaryEncodedColumn.this.lookupId(name); } } @@ -421,7 +421,7 @@ public int getValueCardinality() @Override public String lookupName(final int id) { - return StringFrontCodedDictionaryEncodedColumn.this.lookupName(id); + return StringUtf8DictionaryEncodedColumn.this.lookupName(id); } @Nullable @@ -435,7 +435,7 @@ public ByteBuffer lookupNameUtf8(int id) @Override public int lookupId(@Nullable String name) { - return StringFrontCodedDictionaryEncodedColumn.this.lookupId(name); + return StringUtf8DictionaryEncodedColumn.this.lookupId(name); } } @@ -457,7 +457,7 @@ public StringVectorSelector() @Override public String lookupName(int id) { - return StringFrontCodedDictionaryEncodedColumn.this.lookupName(id); + return StringUtf8DictionaryEncodedColumn.this.lookupName(id); } } return new StringVectorSelector(); @@ -473,7 +473,7 @@ public MultiStringVectorSelector() @Override public String lookupName(int id) { - return StringFrontCodedDictionaryEncodedColumn.this.lookupName(id); + return StringUtf8DictionaryEncodedColumn.this.lookupName(id); } } return new MultiStringVectorSelector(); diff --git a/processing/src/main/java/org/apache/druid/segment/nested/NestedCommonFormatColumn.java b/processing/src/main/java/org/apache/druid/segment/nested/NestedCommonFormatColumn.java index 53208409284d..59c0070d2430 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/NestedCommonFormatColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/NestedCommonFormatColumn.java @@ -30,6 +30,7 @@ import org.apache.druid.segment.column.ColumnCapabilitiesImpl; import org.apache.druid.segment.column.ColumnFormat; import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.column.StringUtf8DictionaryEncodedColumn; import org.apache.druid.segment.data.Indexed; import org.apache.druid.segment.serde.NestedCommonFormatColumnPartSerde; @@ -45,8 +46,7 @@ * * @see ScalarDoubleColumn * @see ScalarLongColumn - * @see ScalarStringDictionaryEncodedColumn - * @see org.apache.druid.segment.column.StringFrontCodedDictionaryEncodedColumn + * @see StringUtf8DictionaryEncodedColumn * @see VariantColumn * @see CompressedNestedDataComplexColumn */ diff --git a/processing/src/main/java/org/apache/druid/segment/nested/ScalarStringColumnAndIndexSupplier.java b/processing/src/main/java/org/apache/druid/segment/nested/ScalarStringColumnAndIndexSupplier.java index 8c1aa67c9987..51b5d2d335d6 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/ScalarStringColumnAndIndexSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/ScalarStringColumnAndIndexSupplier.java @@ -42,7 +42,7 @@ import org.apache.druid.segment.column.SimpleImmutableBitmapIndex; import org.apache.druid.segment.column.StringEncodingStrategies; import org.apache.druid.segment.column.StringEncodingStrategy; -import org.apache.druid.segment.column.StringFrontCodedDictionaryEncodedColumn; +import org.apache.druid.segment.column.StringUtf8DictionaryEncodedColumn; import org.apache.druid.segment.column.StringValueSetIndex; import org.apache.druid.segment.data.BitmapSerdeFactory; import org.apache.druid.segment.data.ColumnarInts; @@ -190,13 +190,13 @@ private ScalarStringColumnAndIndexSupplier( public NestedCommonFormatColumn get() { if (frontCodedStringDictionarySupplier != null) { - return new StringFrontCodedDictionaryEncodedColumn( + return new StringUtf8DictionaryEncodedColumn( encodedColumnSupplier.get(), null, frontCodedStringDictionarySupplier.get() ); } - return new ScalarStringDictionaryEncodedColumn<>(encodedColumnSupplier.get(), stringDictionary.singleThreaded()); + return new StringUtf8DictionaryEncodedColumn(encodedColumnSupplier.get(), null, stringDictionary.singleThreaded()); } @Nullable diff --git a/processing/src/main/java/org/apache/druid/segment/nested/ScalarStringColumnSerializer.java b/processing/src/main/java/org/apache/druid/segment/nested/ScalarStringColumnSerializer.java index 72838cc3d7e3..909d0d50f4bb 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/ScalarStringColumnSerializer.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/ScalarStringColumnSerializer.java @@ -33,6 +33,7 @@ import org.apache.druid.segment.ColumnValueSelector; import org.apache.druid.segment.IndexSpec; import org.apache.druid.segment.column.StringEncodingStrategies; +import org.apache.druid.segment.column.StringUtf8DictionaryEncodedColumn; import org.apache.druid.segment.data.CompressedVSizeColumnarIntsSerializer; import org.apache.druid.segment.data.CompressionStrategy; import org.apache.druid.segment.data.DictionaryWriter; @@ -45,10 +46,8 @@ import java.nio.channels.WritableByteChannel; /** - * Serializer for a string {@link NestedCommonFormatColumn} that can be read with either - * {@link ScalarStringDictionaryEncodedColumn} or - * {@link org.apache.druid.segment.column.StringFrontCodedDictionaryEncodedColumn} (if written with a front-coded - * dictionary). + * Serializer for a string {@link NestedCommonFormatColumn} that can be read with + * {@link StringUtf8DictionaryEncodedColumn}. */ public class ScalarStringColumnSerializer extends NestedCommonFormatColumnSerializer { diff --git a/processing/src/main/java/org/apache/druid/segment/nested/ScalarStringDictionaryEncodedColumn.java b/processing/src/main/java/org/apache/druid/segment/nested/ScalarStringDictionaryEncodedColumn.java deleted file mode 100644 index 97d3a1b56d65..000000000000 --- a/processing/src/main/java/org/apache/druid/segment/nested/ScalarStringDictionaryEncodedColumn.java +++ /dev/null @@ -1,382 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.segment.nested; - -import com.google.common.base.Predicate; -import com.google.common.base.Predicates; -import org.apache.druid.java.util.common.StringUtils; -import org.apache.druid.query.extraction.ExtractionFn; -import org.apache.druid.query.filter.ValueMatcher; -import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; -import org.apache.druid.segment.AbstractDimensionSelector; -import org.apache.druid.segment.IdLookup; -import org.apache.druid.segment.column.ColumnType; -import org.apache.druid.segment.column.DictionaryEncodedColumn; -import org.apache.druid.segment.column.StringDictionaryEncodedColumn; -import org.apache.druid.segment.column.StringEncodingStrategies; -import org.apache.druid.segment.data.ColumnarInts; -import org.apache.druid.segment.data.Indexed; -import org.apache.druid.segment.data.IndexedInts; -import org.apache.druid.segment.data.ReadableOffset; -import org.apache.druid.segment.data.SingleIndexedInt; -import org.apache.druid.segment.filter.BooleanValueMatcher; -import org.apache.druid.segment.historical.HistoricalDimensionSelector; -import org.apache.druid.segment.historical.SingleValueHistoricalDimensionSelector; -import org.apache.druid.segment.vector.MultiValueDimensionVectorSelector; -import org.apache.druid.segment.vector.ReadableVectorOffset; -import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector; -import org.apache.druid.segment.vector.VectorObjectSelector; -import org.apache.druid.utils.CloseableUtils; - -import javax.annotation.Nullable; -import java.io.IOException; -import java.nio.ByteBuffer; -import java.util.BitSet; - -/** - * {@link NestedCommonFormatColumn} specialization for {@link ColumnType#STRING} with a generic buffer based utf8 - * dictionary. This is used when not using the more specific - * {@link org.apache.druid.segment.column.StringFrontCodedDictionaryEncodedColumn}, and only supports single value - * strings. - */ -public class ScalarStringDictionaryEncodedColumn> - implements DictionaryEncodedColumn, NestedCommonFormatColumn -{ - private final ColumnarInts column; - private final TIndexed utf8Dictionary; - - public ScalarStringDictionaryEncodedColumn( - ColumnarInts singleValueColumn, - TIndexed utf8Dictionary - ) - { - this.column = singleValueColumn; - this.utf8Dictionary = utf8Dictionary; - } - - @Override - public int length() - { - return column.size(); - } - - @Override - public boolean hasMultipleValues() - { - return false; - } - - @Override - public int getSingleValueRow(int rowNum) - { - return column.get(rowNum); - } - - @Override - public IndexedInts getMultiValueRow(int rowNum) - { - throw new UnsupportedOperationException("Column is not multi-valued"); - } - - @Override - @Nullable - public String lookupName(int id) - { - final ByteBuffer buffer = utf8Dictionary.get(id); - if (buffer == null) { - return null; - } - return StringUtils.fromUtf8(buffer); - } - - @Override - public int lookupId(String name) - { - return utf8Dictionary.indexOf(StringUtils.toUtf8ByteBuffer(name)); - } - - @Override - public int getCardinality() - { - return utf8Dictionary.size(); - } - - @Override - public HistoricalDimensionSelector makeDimensionSelector( - final ReadableOffset offset, - @Nullable final ExtractionFn extractionFn - ) - { - class SingleValueQueryableDimensionSelector extends AbstractDimensionSelector - implements SingleValueHistoricalDimensionSelector, IdLookup, HistoricalDimensionSelector - { - private final SingleIndexedInt row = new SingleIndexedInt(); - - @Override - public int getValueCardinality() - { - /* - This is technically wrong if - extractionFn != null && (extractionFn.getExtractionType() != ExtractionFn.ExtractionType.ONE_TO_ONE || - !extractionFn.preservesOrdering()) - However current behavior allows some GroupBy-V1 queries to work that wouldn't work otherwise and doesn't - cause any problems due to special handling of extractionFn everywhere. - See https://github.com/apache/druid/pull/8433 - */ - return getCardinality(); - } - - @Override - public String lookupName(int id) - { - final String value = ScalarStringDictionaryEncodedColumn.this.lookupName(id); - return extractionFn == null ? value : extractionFn.apply(value); - } - - @Nullable - @Override - public ByteBuffer lookupNameUtf8(int id) - { - return utf8Dictionary.get(id); - } - - @Override - public boolean supportsLookupNameUtf8() - { - return true; - } - - @Override - public boolean nameLookupPossibleInAdvance() - { - return true; - } - - @Nullable - @Override - public IdLookup idLookup() - { - return extractionFn == null ? this : null; - } - - @Override - public int lookupId(String name) - { - if (extractionFn != null) { - throw new UnsupportedOperationException("cannot perform lookup when applying an extraction function"); - } - return ScalarStringDictionaryEncodedColumn.this.lookupId(name); - } - - @Override - public IndexedInts getRow() - { - row.setValue(getRowValue()); - return row; - } - - public int getRowValue() - { - return column.get(offset.getOffset()); - } - - @Override - public IndexedInts getRow(int offset) - { - row.setValue(getRowValue(offset)); - return row; - } - - @Override - public int getRowValue(int offset) - { - return column.get(offset); - } - - @Override - public ValueMatcher makeValueMatcher(final @Nullable String value) - { - if (extractionFn == null) { - final int valueId = lookupId(value); - if (valueId >= 0) { - return new ValueMatcher() - { - @Override - public boolean matches() - { - return getRowValue() == valueId; - } - - @Override - public void inspectRuntimeShape(RuntimeShapeInspector inspector) - { - inspector.visit("column", ScalarStringDictionaryEncodedColumn.this); - } - }; - } else { - return BooleanValueMatcher.of(false); - } - } else { - // Employ caching BitSet optimization - return makeValueMatcher(Predicates.equalTo(value)); - } - } - - @Override - public ValueMatcher makeValueMatcher(final Predicate predicate) - { - final BitSet checkedIds = new BitSet(getCardinality()); - final BitSet matchingIds = new BitSet(getCardinality()); - - // Lazy matcher; only check an id if matches() is called. - return new ValueMatcher() - { - @Override - public boolean matches() - { - final int id = getRowValue(); - - if (checkedIds.get(id)) { - return matchingIds.get(id); - } else { - final boolean matches = predicate.apply(lookupName(id)); - checkedIds.set(id); - if (matches) { - matchingIds.set(id); - } - return matches; - } - } - - @Override - public void inspectRuntimeShape(RuntimeShapeInspector inspector) - { - inspector.visit("column", ScalarStringDictionaryEncodedColumn.this); - } - }; - } - - @Override - public Object getObject() - { - return lookupName(getRowValue()); - } - - @Override - public Class classOfObject() - { - return String.class; - } - - @Override - public void inspectRuntimeShape(RuntimeShapeInspector inspector) - { - inspector.visit("column", column); - inspector.visit("offset", offset); - inspector.visit("extractionFn", extractionFn); - } - } - return new SingleValueQueryableDimensionSelector(); - } - - @Override - public SingleValueDimensionVectorSelector makeSingleValueDimensionVectorSelector(final ReadableVectorOffset offset) - { - final class StringVectorSelector extends StringDictionaryEncodedColumn.StringSingleValueDimensionVectorSelector - { - public StringVectorSelector() - { - super(column, offset); - } - - @Override - public int getValueCardinality() - { - return getCardinality(); - } - - @Nullable - @Override - public String lookupName(final int id) - { - return ScalarStringDictionaryEncodedColumn.this.lookupName(id); - } - - @Nullable - @Override - public ByteBuffer lookupNameUtf8(int id) - { - return utf8Dictionary.get(id); - } - - @Override - public int lookupId(@Nullable String name) - { - return ScalarStringDictionaryEncodedColumn.this.lookupId(name); - } - } - - return new StringVectorSelector(); - } - - @Override - public MultiValueDimensionVectorSelector makeMultiValueDimensionVectorSelector(ReadableVectorOffset vectorOffset) - { - throw new UnsupportedOperationException("Column not multi-valued"); - } - - @Override - public VectorObjectSelector makeVectorObjectSelector(ReadableVectorOffset offset) - { - final class StringVectorSelector extends StringDictionaryEncodedColumn.StringVectorObjectSelector - { - public StringVectorSelector() - { - super(column, offset); - } - - @Nullable - @Override - public String lookupName(int id) - { - return ScalarStringDictionaryEncodedColumn.this.lookupName(id); - } - } - return new StringVectorSelector(); - } - - @Override - public void close() throws IOException - { - CloseableUtils.closeAll(column); - } - - @Override - public ColumnType getLogicalType() - { - return ColumnType.STRING; - } - - @Override - public Indexed getStringDictionary() - { - return new StringEncodingStrategies.Utf8ToStringIndexed(utf8Dictionary); - } -} diff --git a/processing/src/main/java/org/apache/druid/segment/serde/DoubleNumericColumnPartSerdeV2.java b/processing/src/main/java/org/apache/druid/segment/serde/DoubleNumericColumnPartSerdeV2.java index 99c63620cc7d..5eda4f84ae90 100644 --- a/processing/src/main/java/org/apache/druid/segment/serde/DoubleNumericColumnPartSerdeV2.java +++ b/processing/src/main/java/org/apache/druid/segment/serde/DoubleNumericColumnPartSerdeV2.java @@ -154,8 +154,15 @@ public Deserializer getDeserializer() buffer.position(initialPos + offset); final ImmutableBitmap bitmap; final boolean hasNulls; - if (buffer.hasRemaining() && NullHandling.sqlCompatible()) { - bitmap = bitmapSerdeFactory.getObjectStrategy().fromByteBufferWithSize(buffer); + if (buffer.hasRemaining()) { + if (NullHandling.sqlCompatible()) { + bitmap = bitmapSerdeFactory.getObjectStrategy().fromByteBufferWithSize(buffer); + } else { + // Read from the buffer (to advance its position) but do not actually retain the bitmaps. + bitmapSerdeFactory.getObjectStrategy().fromByteBufferWithSize(buffer); + bitmap = bitmapSerdeFactory.getBitmapFactory().makeEmptyImmutableBitmap(); + } + hasNulls = !bitmap.isEmpty(); } else { bitmap = bitmapSerdeFactory.getBitmapFactory().makeEmptyImmutableBitmap(); diff --git a/processing/src/main/java/org/apache/druid/segment/serde/FloatNumericColumnPartSerdeV2.java b/processing/src/main/java/org/apache/druid/segment/serde/FloatNumericColumnPartSerdeV2.java index 07978bd81719..8e79bc7a3fa8 100644 --- a/processing/src/main/java/org/apache/druid/segment/serde/FloatNumericColumnPartSerdeV2.java +++ b/processing/src/main/java/org/apache/druid/segment/serde/FloatNumericColumnPartSerdeV2.java @@ -152,7 +152,14 @@ public Deserializer getDeserializer() final ImmutableBitmap bitmap; final boolean hasNulls; if (buffer.hasRemaining() && NullHandling.sqlCompatible()) { - bitmap = bitmapSerdeFactory.getObjectStrategy().fromByteBufferWithSize(buffer); + if (NullHandling.sqlCompatible()) { + bitmap = bitmapSerdeFactory.getObjectStrategy().fromByteBufferWithSize(buffer); + } else { + // Read from the buffer (to advance its position) but do not actually retain the bitmaps. + bitmapSerdeFactory.getObjectStrategy().fromByteBufferWithSize(buffer); + bitmap = bitmapSerdeFactory.getBitmapFactory().makeEmptyImmutableBitmap(); + } + hasNulls = !bitmap.isEmpty(); } else { bitmap = bitmapSerdeFactory.getBitmapFactory().makeEmptyImmutableBitmap(); diff --git a/processing/src/main/java/org/apache/druid/segment/serde/LongNumericColumnPartSerdeV2.java b/processing/src/main/java/org/apache/druid/segment/serde/LongNumericColumnPartSerdeV2.java index 4e2fee0b729f..7f145b36a413 100644 --- a/processing/src/main/java/org/apache/druid/segment/serde/LongNumericColumnPartSerdeV2.java +++ b/processing/src/main/java/org/apache/druid/segment/serde/LongNumericColumnPartSerdeV2.java @@ -154,7 +154,14 @@ public Deserializer getDeserializer() final ImmutableBitmap bitmap; final boolean hasNulls; if (buffer.hasRemaining() && NullHandling.sqlCompatible()) { - bitmap = bitmapSerdeFactory.getObjectStrategy().fromByteBufferWithSize(buffer); + if (NullHandling.sqlCompatible()) { + bitmap = bitmapSerdeFactory.getObjectStrategy().fromByteBufferWithSize(buffer); + } else { + // Read from the buffer (to advance its position) but do not actually retain the bitmaps. + bitmapSerdeFactory.getObjectStrategy().fromByteBufferWithSize(buffer); + bitmap = bitmapSerdeFactory.getBitmapFactory().makeEmptyImmutableBitmap(); + } + hasNulls = !bitmap.isEmpty(); } else { bitmap = bitmapSerdeFactory.getBitmapFactory().makeEmptyImmutableBitmap(); diff --git a/processing/src/main/java/org/apache/druid/segment/serde/StringFrontCodedDictionaryEncodedColumnSupplier.java b/processing/src/main/java/org/apache/druid/segment/serde/StringFrontCodedDictionaryEncodedColumnSupplier.java index 82ba770c1f74..23bc28acff1d 100644 --- a/processing/src/main/java/org/apache/druid/segment/serde/StringFrontCodedDictionaryEncodedColumnSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/serde/StringFrontCodedDictionaryEncodedColumnSupplier.java @@ -23,7 +23,7 @@ import org.apache.druid.common.config.NullHandling; import org.apache.druid.segment.column.DictionaryEncodedColumn; import org.apache.druid.segment.column.StringDictionaryEncodedColumn; -import org.apache.druid.segment.column.StringFrontCodedDictionaryEncodedColumn; +import org.apache.druid.segment.column.StringUtf8DictionaryEncodedColumn; import org.apache.druid.segment.data.ColumnarInts; import org.apache.druid.segment.data.ColumnarMultiInts; import org.apache.druid.segment.data.FrontCodedIndexed; @@ -31,7 +31,7 @@ import javax.annotation.Nullable; /** - * {@link DictionaryEncodedColumnSupplier} but for columns using a {@link StringFrontCodedDictionaryEncodedColumn} + * {@link DictionaryEncodedColumnSupplier} but for columns using a {@link StringUtf8DictionaryEncodedColumn} * instead of the traditional {@link StringDictionaryEncodedColumn} */ public class StringFrontCodedDictionaryEncodedColumnSupplier implements Supplier> @@ -57,19 +57,19 @@ public DictionaryEncodedColumn get() final FrontCodedIndexed suppliedUtf8Dictionary = utf8Dictionary.get(); if (NullHandling.mustCombineNullAndEmptyInDictionary(suppliedUtf8Dictionary)) { - return new StringFrontCodedDictionaryEncodedColumn( + return new StringUtf8DictionaryEncodedColumn( singleValuedColumn != null ? new CombineFirstTwoValuesColumnarInts(singleValuedColumn.get()) : null, multiValuedColumn != null ? new CombineFirstTwoValuesColumnarMultiInts(multiValuedColumn.get()) : null, CombineFirstTwoEntriesIndexed.returnNull(suppliedUtf8Dictionary) ); } else if (NullHandling.mustReplaceFirstValueWithNullInDictionary(suppliedUtf8Dictionary)) { - return new StringFrontCodedDictionaryEncodedColumn( + return new StringUtf8DictionaryEncodedColumn( singleValuedColumn != null ? singleValuedColumn.get() : null, multiValuedColumn != null ? multiValuedColumn.get() : null, new ReplaceFirstValueWithNullIndexed<>(suppliedUtf8Dictionary) ); } else { - return new StringFrontCodedDictionaryEncodedColumn( + return new StringUtf8DictionaryEncodedColumn( singleValuedColumn != null ? singleValuedColumn.get() : null, multiValuedColumn != null ? multiValuedColumn.get() : null, suppliedUtf8Dictionary diff --git a/processing/src/test/java/org/apache/druid/segment/nested/ScalarStringColumnSupplierTest.java b/processing/src/test/java/org/apache/druid/segment/nested/ScalarStringColumnSupplierTest.java index 954e4998535e..5b86747aa2ae 100644 --- a/processing/src/test/java/org/apache/druid/segment/nested/ScalarStringColumnSupplierTest.java +++ b/processing/src/test/java/org/apache/druid/segment/nested/ScalarStringColumnSupplierTest.java @@ -45,6 +45,7 @@ import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.DruidPredicateIndex; import org.apache.druid.segment.column.NullValueIndex; +import org.apache.druid.segment.column.StringUtf8DictionaryEncodedColumn; import org.apache.druid.segment.column.StringValueSetIndex; import org.apache.druid.segment.data.BitmapSerdeFactory; import org.apache.druid.segment.data.RoaringBitmapSerdeFactory; @@ -192,7 +193,7 @@ public void testBasicFunctionality() throws IOException bob, NestedFieldColumnIndexSupplierTest.ALWAYS_USE_INDEXES ); - try (ScalarStringDictionaryEncodedColumn column = (ScalarStringDictionaryEncodedColumn) supplier.get()) { + try (StringUtf8DictionaryEncodedColumn column = (StringUtf8DictionaryEncodedColumn) supplier.get()) { smokeTest(supplier, column); } } @@ -225,7 +226,7 @@ public void testConcurrency() throws ExecutionException, InterruptedException try { threadsStartLatch.await(); for (int iter = 0; iter < 5000; iter++) { - try (ScalarStringDictionaryEncodedColumn column = (ScalarStringDictionaryEncodedColumn) supplier.get()) { + try (StringUtf8DictionaryEncodedColumn column = (StringUtf8DictionaryEncodedColumn) supplier.get()) { smokeTest(supplier, column); } } @@ -241,7 +242,7 @@ public void testConcurrency() throws ExecutionException, InterruptedException Assert.assertEquals(expectedReason, failureReason.get()); } - private void smokeTest(ScalarStringColumnAndIndexSupplier supplier, ScalarStringDictionaryEncodedColumn column) + private void smokeTest(ScalarStringColumnAndIndexSupplier supplier, StringUtf8DictionaryEncodedColumn column) { SimpleAscendingOffset offset = new SimpleAscendingOffset(data.size()); ColumnValueSelector valueSelector = column.makeColumnValueSelector(offset); From 8690646ba79ed14dbaa851b5ad6fbd5dd0cbbd66 Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Tue, 27 Jun 2023 12:55:51 -0700 Subject: [PATCH 9/9] Add missing tests. --- .../druid/common/config/NullHandlingTest.java | 78 ++++++++++++++++++- 1 file changed, 76 insertions(+), 2 deletions(-) diff --git a/processing/src/test/java/org/apache/druid/common/config/NullHandlingTest.java b/processing/src/test/java/org/apache/druid/common/config/NullHandlingTest.java index d32eee28b42d..db9ade70f425 100644 --- a/processing/src/test/java/org/apache/druid/common/config/NullHandlingTest.java +++ b/processing/src/test/java/org/apache/druid/common/config/NullHandlingTest.java @@ -19,10 +19,14 @@ package org.apache.druid.common.config; +import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.segment.data.ListIndexed; import org.apache.druid.testing.InitializedNullHandlingTest; import org.junit.Assert; import org.junit.Test; +import java.util.Collections; + import static org.apache.druid.common.config.NullHandling.replaceWithDefault; import static org.junit.Assert.assertEquals; @@ -103,12 +107,82 @@ public void test_ignoreNullsStrings() @Test public void test_mustCombineNullAndEmptyInDictionary() { - // TODO(gianm) + Assert.assertFalse( + NullHandling.mustCombineNullAndEmptyInDictionary( + new ListIndexed<>(Collections.singletonList(null)) + ) + ); + + Assert.assertFalse( + NullHandling.mustCombineNullAndEmptyInDictionary( + new ListIndexed<>(StringUtils.toUtf8ByteBuffer("foo")) + ) + ); + + Assert.assertFalse( + NullHandling.mustCombineNullAndEmptyInDictionary( + new ListIndexed<>(StringUtils.toUtf8ByteBuffer("")) + ) + ); + + Assert.assertFalse( + NullHandling.mustCombineNullAndEmptyInDictionary( + new ListIndexed<>(StringUtils.toUtf8ByteBuffer(""), StringUtils.toUtf8ByteBuffer("foo")) + ) + ); + + Assert.assertEquals( + NullHandling.replaceWithDefault(), + NullHandling.mustCombineNullAndEmptyInDictionary( + new ListIndexed<>(null, StringUtils.toUtf8ByteBuffer("")) + ) + ); + + Assert.assertEquals( + NullHandling.replaceWithDefault(), + NullHandling.mustCombineNullAndEmptyInDictionary( + new ListIndexed<>(null, StringUtils.toUtf8ByteBuffer(""), StringUtils.toUtf8ByteBuffer("foo"))) + ); } @Test public void test_mustReplaceFirstValueWithNullInDictionary() { - // TODO(gianm) + Assert.assertFalse( + NullHandling.mustReplaceFirstValueWithNullInDictionary( + new ListIndexed<>(Collections.singletonList(null)) + ) + ); + + Assert.assertFalse( + NullHandling.mustReplaceFirstValueWithNullInDictionary( + new ListIndexed<>(StringUtils.toUtf8ByteBuffer("foo")) + ) + ); + + Assert.assertEquals( + NullHandling.replaceWithDefault(), + NullHandling.mustReplaceFirstValueWithNullInDictionary( + new ListIndexed<>(StringUtils.toUtf8ByteBuffer("")) + ) + ); + + Assert.assertEquals( + NullHandling.replaceWithDefault(), + NullHandling.mustReplaceFirstValueWithNullInDictionary( + new ListIndexed<>(StringUtils.toUtf8ByteBuffer(""), StringUtils.toUtf8ByteBuffer("foo")) + ) + ); + + Assert.assertFalse( + NullHandling.mustReplaceFirstValueWithNullInDictionary( + new ListIndexed<>(null, StringUtils.toUtf8ByteBuffer("")) + ) + ); + + Assert.assertFalse( + NullHandling.mustReplaceFirstValueWithNullInDictionary( + new ListIndexed<>(null, StringUtils.toUtf8ByteBuffer(""), StringUtils.toUtf8ByteBuffer("foo"))) + ); } }