From 4a683c0ba8b76746f95e02786951eb6f5360c01b Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Thu, 26 Jan 2023 21:53:32 -0800 Subject: [PATCH 1/7] fix nested column handling of null and "null" --- ...balDictionaryEncodedFieldColumnWriter.java | 4 +- .../nested/GlobalDictionaryIdLookup.java | 3 + .../nested/NestedDataColumnSerializer.java | 5 +- .../nested/StringFieldColumnWriter.java | 3 + .../druid/query/SchemaEvolutionTest.java | 2 +- .../query/scan/NestedDataScanQueryTest.java | 313 --------------- .../timeseries/TimeseriesQueryRunnerTest.java | 2 +- .../segment/NestedDataColumnIndexerTest.java | 17 +- .../druid/segment/SchemalessTestFullTest.java | 4 +- .../org/apache/druid/segment/TestHelper.java | 11 +- .../nested/NestedDataColumnSupplierTest.java | 44 +- ...NestedFieldLiteralColumnSelectorsTest.java | 380 ++++++++++++++++++ .../dimension/LookupDimensionSpecTest.java | 4 +- 13 files changed, 440 insertions(+), 352 deletions(-) create mode 100644 processing/src/test/java/org/apache/druid/segment/nested/NestedFieldLiteralColumnSelectorsTest.java diff --git a/processing/src/main/java/org/apache/druid/segment/nested/GlobalDictionaryEncodedFieldColumnWriter.java b/processing/src/main/java/org/apache/druid/segment/nested/GlobalDictionaryEncodedFieldColumnWriter.java index 8284cfee3356..8c8693d3ec45 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/GlobalDictionaryEncodedFieldColumnWriter.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/GlobalDictionaryEncodedFieldColumnWriter.java @@ -19,6 +19,7 @@ package org.apache.druid.segment.nested; +import com.google.common.base.Preconditions; import com.google.common.primitives.Ints; import it.unimi.dsi.fastutil.ints.Int2IntOpenHashMap; import it.unimi.dsi.fastutil.ints.IntArrays; @@ -52,7 +53,7 @@ * for all literal writers, which for this type of writer entails building a local dictionary to map into to the global * dictionary ({@link #localDictionary}) and writes this unsorted localId to an intermediate integer column, * {@link #intermediateValueWriter}. - * + *

* When processing the 'raw' value column is complete, the {@link #writeTo(int, FileSmoosher)} method will sort the * local ids and write them out to a local sorted dictionary, iterate over {@link #intermediateValueWriter} swapping * the unsorted local ids with the sorted ids and writing to the compressed id column writer @@ -134,6 +135,7 @@ public void addValue(int row, Object val) throws IOException } final T value = processValue(val); final int globalId = lookupGlobalId(value); + Preconditions.checkArgument(globalId >= 0, "Value [%s] is not present in global dictionary", value); final int localId = localDictionary.add(globalId); intermediateValueWriter.write(localId); writeValue(value); diff --git a/processing/src/main/java/org/apache/druid/segment/nested/GlobalDictionaryIdLookup.java b/processing/src/main/java/org/apache/druid/segment/nested/GlobalDictionaryIdLookup.java index 48895042cd96..821f3ecc26f0 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/GlobalDictionaryIdLookup.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/GlobalDictionaryIdLookup.java @@ -46,8 +46,11 @@ public class GlobalDictionaryIdLookup public GlobalDictionaryIdLookup() { this.stringLookup = new Object2IntLinkedOpenHashMap<>(); + stringLookup.defaultReturnValue(-1); this.longLookup = new Long2IntLinkedOpenHashMap(); + longLookup.defaultReturnValue(-1); this.doubleLookup = new Double2IntLinkedOpenHashMap(); + doubleLookup.defaultReturnValue(-1); } public void addString(@Nullable String value) diff --git a/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnSerializer.java b/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnSerializer.java index 35142bbc816c..44d228e67c5a 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnSerializer.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnSerializer.java @@ -238,11 +238,12 @@ public void serializeStringDictionary(Iterable dictionaryValues) throws dictionaryWriter.write(null); globalDictionaryIdLookup.addString(null); for (String value : dictionaryValues) { - if (NullHandling.emptyToNullIfNeeded(value) == null) { + value = NullHandling.emptyToNullIfNeeded(value); + if (value == null) { continue; } + dictionaryWriter.write(value); - value = NullHandling.emptyToNullIfNeeded(value); globalDictionaryIdLookup.addString(value); } } diff --git a/processing/src/main/java/org/apache/druid/segment/nested/StringFieldColumnWriter.java b/processing/src/main/java/org/apache/druid/segment/nested/StringFieldColumnWriter.java index 637aa1fb69d4..87078333e442 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/StringFieldColumnWriter.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/StringFieldColumnWriter.java @@ -45,6 +45,9 @@ public StringFieldColumnWriter( @Override String processValue(Object value) { + if (value == null) { + return null; + } return String.valueOf(value); } diff --git a/processing/src/test/java/org/apache/druid/query/SchemaEvolutionTest.java b/processing/src/test/java/org/apache/druid/query/SchemaEvolutionTest.java index ce7a57eaaac6..70ede4c6513a 100644 --- a/processing/src/test/java/org/apache/druid/query/SchemaEvolutionTest.java +++ b/processing/src/test/java/org/apache/druid/query/SchemaEvolutionTest.java @@ -375,7 +375,7 @@ public void testNumericEvolutionFiltering(boolean doVectorize) // Only nonexistent(4) Assert.assertEquals( - timeseriesResult(TestHelper.createExpectedMap( + timeseriesResult(TestHelper.makeMap( "a", NullHandling.defaultLongValue(), "b", diff --git a/processing/src/test/java/org/apache/druid/query/scan/NestedDataScanQueryTest.java b/processing/src/test/java/org/apache/druid/query/scan/NestedDataScanQueryTest.java index defca9cb17d9..7f6d0e04b8bc 100644 --- a/processing/src/test/java/org/apache/druid/query/scan/NestedDataScanQueryTest.java +++ b/processing/src/test/java/org/apache/druid/query/scan/NestedDataScanQueryTest.java @@ -24,41 +24,21 @@ import com.google.common.collect.ImmutableMap; import org.apache.druid.guice.NestedDataModule; import org.apache.druid.java.util.common.Intervals; -import org.apache.druid.java.util.common.UOE; import org.apache.druid.java.util.common.granularity.Granularities; import org.apache.druid.java.util.common.guava.Sequence; -import org.apache.druid.java.util.common.guava.Yielder; -import org.apache.druid.java.util.common.guava.Yielders; import org.apache.druid.java.util.common.io.Closer; import org.apache.druid.java.util.common.logger.Logger; import org.apache.druid.query.Druids; import org.apache.druid.query.NestedDataTestUtils; import org.apache.druid.query.Query; import org.apache.druid.query.aggregation.AggregationTestHelper; -import org.apache.druid.query.dimension.DefaultDimensionSpec; import org.apache.druid.query.filter.BoundDimFilter; import org.apache.druid.query.filter.SelectorDimFilter; import org.apache.druid.query.ordering.StringComparators; import org.apache.druid.query.spec.MultipleIntervalSegmentSpec; -import org.apache.druid.segment.ColumnSelectorFactory; -import org.apache.druid.segment.ColumnValueSelector; -import org.apache.druid.segment.Cursor; -import org.apache.druid.segment.DoubleColumnSelector; -import org.apache.druid.segment.LongColumnSelector; import org.apache.druid.segment.Segment; -import org.apache.druid.segment.StorageAdapter; -import org.apache.druid.segment.VirtualColumns; import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.nested.NestedDataComplexTypeSerde; -import org.apache.druid.segment.nested.NestedPathFinder; -import org.apache.druid.segment.nested.NestedPathPart; -import org.apache.druid.segment.vector.BaseDoubleVectorValueSelector; -import org.apache.druid.segment.vector.BaseLongVectorValueSelector; -import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector; -import org.apache.druid.segment.vector.VectorColumnSelectorFactory; -import org.apache.druid.segment.vector.VectorCursor; -import org.apache.druid.segment.vector.VectorObjectSelector; -import org.apache.druid.segment.vector.VectorValueSelector; import org.apache.druid.segment.virtual.NestedFieldVirtualColumn; import org.apache.druid.testing.InitializedNullHandlingTest; import org.junit.After; @@ -74,14 +54,6 @@ public class NestedDataScanQueryTest extends InitializedNullHandlingTest { private static final Logger LOG = new Logger(NestedDataScanQueryTest.class); - private static final String NESTED_LONG_FIELD = "long"; - private static final String NESTED_DOUBLE_FIELD = "double"; - private static final String NESTED_MIXED_NUMERIC_FIELD = "mixed_numeric"; - private static final String NESTED_MIXED_FIELD = "mixed"; - private static final String NESTED_SPARSE_LONG_FIELD = "sparse_long"; - private static final String NESTED_SPARSE_DOUBLE_FIELD = "sparse_double"; - private static final String NESTED_SPARSE_MIXED_NUMERIC_FIELD = "sparse_mixed_numeric"; - private static final String NESTED_SPARSE_MIXED_FIELD = "sparse_mixed"; private final AggregationTestHelper helper; private final Closer closer; @@ -411,291 +383,6 @@ public void testIngestAndScanSegmentsAndRangeFilter() throws Exception Assert.assertEquals(4, ((List) results.get(0).getEvents()).size()); } - @Test - public void testExpectedTypes() throws Exception - { - // "Line matches the illegal pattern 'ObjectColumnSelector, LongColumnSelector, FloatColumnSelector - // and DoubleColumnSelector must not be used in an instanceof statement, see Javadoc of those interfaces." - //CHECKSTYLE.OFF: Regexp - ColumnSelectorFactory columnSelectorFactory = getNumericColumnSelectorFactory( - makeNestedNumericVirtualColumns() - ); - - ColumnValueSelector longValueSelector = columnSelectorFactory.makeColumnValueSelector( - NESTED_LONG_FIELD - ); - Assert.assertNotNull(longValueSelector); - Assert.assertTrue(longValueSelector instanceof LongColumnSelector); - - ColumnValueSelector doubleValueSelector = columnSelectorFactory.makeColumnValueSelector( - NESTED_DOUBLE_FIELD - ); - Assert.assertNotNull(doubleValueSelector); - Assert.assertTrue(doubleValueSelector instanceof DoubleColumnSelector); - - ColumnValueSelector mixedNumericValueSelector = columnSelectorFactory.makeColumnValueSelector( - NESTED_MIXED_NUMERIC_FIELD - ); - Assert.assertNotNull(mixedNumericValueSelector); - Assert.assertTrue(mixedNumericValueSelector instanceof ColumnValueSelector); - - ColumnValueSelector mixedValueSelector = columnSelectorFactory.makeColumnValueSelector( - NESTED_MIXED_FIELD - ); - Assert.assertNotNull(mixedValueSelector); - Assert.assertTrue(mixedValueSelector instanceof ColumnValueSelector); - - - ColumnValueSelector sparseLongValueSelector = columnSelectorFactory.makeColumnValueSelector( - NESTED_SPARSE_LONG_FIELD - ); - Assert.assertNotNull(sparseLongValueSelector); - Assert.assertTrue(sparseLongValueSelector instanceof LongColumnSelector); - - ColumnValueSelector sparseDoubleValueSelector = columnSelectorFactory.makeColumnValueSelector( - NESTED_SPARSE_DOUBLE_FIELD - ); - Assert.assertNotNull(sparseDoubleValueSelector); - Assert.assertTrue(sparseDoubleValueSelector instanceof DoubleColumnSelector); - - ColumnValueSelector sparseMixedNumericValueSelector = columnSelectorFactory.makeColumnValueSelector( - NESTED_SPARSE_MIXED_NUMERIC_FIELD - ); - Assert.assertNotNull(sparseMixedNumericValueSelector); - Assert.assertTrue(sparseMixedNumericValueSelector instanceof ColumnValueSelector); - - ColumnValueSelector sparseMixedValueSelector = columnSelectorFactory.makeColumnValueSelector( - NESTED_SPARSE_MIXED_FIELD - ); - Assert.assertNotNull(sparseMixedValueSelector); - Assert.assertTrue(sparseMixedValueSelector instanceof ColumnValueSelector); - //CHECKSTYLE.ON: Regexp - } - - @Test - public void testExpectedTypesVectorSelectors() throws Exception - { - // "Line matches the illegal pattern 'ObjectColumnSelector, LongColumnSelector, FloatColumnSelector - // and DoubleColumnSelector must not be used in an instanceof statement, see Javadoc of those interfaces." - //CHECKSTYLE.OFF: Regexp - VectorColumnSelectorFactory factory = getVectorColumnSelectorFactory( - makeNestedNumericVirtualColumns() - ); - - // can make numeric value selectors for single typed numeric types - VectorValueSelector longValueSelector = factory.makeValueSelector( - NESTED_LONG_FIELD - ); - Assert.assertNotNull(longValueSelector); - Assert.assertTrue(longValueSelector instanceof BaseLongVectorValueSelector); - - VectorValueSelector doubleValueSelector = factory.makeValueSelector( - NESTED_DOUBLE_FIELD - ); - Assert.assertNotNull(doubleValueSelector); - Assert.assertTrue(doubleValueSelector instanceof BaseDoubleVectorValueSelector); - - Assert.assertThrows(UOE.class, () -> factory.makeValueSelector(NESTED_MIXED_NUMERIC_FIELD)); - Assert.assertThrows(UOE.class, () -> factory.makeValueSelector(NESTED_MIXED_FIELD)); - - // can also make single value dimension selectors for all nested column types - SingleValueDimensionVectorSelector longDimensionSelector = factory.makeSingleValueDimensionSelector( - DefaultDimensionSpec.of(NESTED_LONG_FIELD) - ); - Assert.assertNotNull(longDimensionSelector); - - SingleValueDimensionVectorSelector doubleDimensionSelector = factory.makeSingleValueDimensionSelector( - DefaultDimensionSpec.of(NESTED_DOUBLE_FIELD) - ); - Assert.assertNotNull(doubleDimensionSelector); - - SingleValueDimensionVectorSelector mixedNumericValueSelector = factory.makeSingleValueDimensionSelector( - DefaultDimensionSpec.of(NESTED_MIXED_NUMERIC_FIELD) - ); - Assert.assertNotNull(mixedNumericValueSelector); - - SingleValueDimensionVectorSelector mixedValueSelector = factory.makeSingleValueDimensionSelector( - DefaultDimensionSpec.of(NESTED_MIXED_FIELD) - ); - Assert.assertNotNull(mixedValueSelector); - - // and object selectors - VectorObjectSelector longObjectSelector = factory.makeObjectSelector( - NESTED_LONG_FIELD - ); - Assert.assertNotNull(longObjectSelector); - - VectorObjectSelector doubleObjectSelector = factory.makeObjectSelector( - NESTED_DOUBLE_FIELD - ); - Assert.assertNotNull(doubleObjectSelector); - - VectorObjectSelector mixedNumericObjectSelector = factory.makeObjectSelector( - NESTED_MIXED_NUMERIC_FIELD - ); - Assert.assertNotNull(mixedNumericObjectSelector); - - VectorObjectSelector mixedObjectSelector = factory.makeObjectSelector( - NESTED_MIXED_FIELD - ); - Assert.assertNotNull(mixedObjectSelector); - //CHECKSTYLE.ON: Regexp - } - - private VirtualColumns makeNestedNumericVirtualColumns() - { - List longParts = NestedPathFinder.parseJqPath(".long"); - List doubleParts = NestedPathFinder.parseJqPath(".double"); - List mixedNumericParts = NestedPathFinder.parseJqPath(".mixed_numeric"); - List mixedParts = NestedPathFinder.parseJqPath(".mixed"); - List sparseLongParts = NestedPathFinder.parseJqPath(".sparse_long"); - List sparseDoubleParts = NestedPathFinder.parseJqPath(".sparse_double"); - List sparseMixedNumericParts = NestedPathFinder.parseJqPath(".sparse_mixed_numeric"); - List sparseMixedParts = NestedPathFinder.parseJqPath(".sparse_mixed"); - - NestedFieldVirtualColumn longVirtualColumn = new NestedFieldVirtualColumn( - "nest", - NESTED_LONG_FIELD, - ColumnType.LONG, - longParts, - false, - null, - null - ); - NestedFieldVirtualColumn doubleVirtualColumn = new NestedFieldVirtualColumn( - "nest", - NESTED_DOUBLE_FIELD, - ColumnType.DOUBLE, - doubleParts, - false, - null, - null - ); - NestedFieldVirtualColumn mixedNumericVirtualColumn = new NestedFieldVirtualColumn( - "nest", - NESTED_MIXED_NUMERIC_FIELD, - null, - mixedNumericParts, - false, - null, - null - ); - NestedFieldVirtualColumn mixedVirtualColumn = new NestedFieldVirtualColumn( - "nest", - NESTED_MIXED_FIELD, - null, - mixedParts, - false, - null, - null - ); - - NestedFieldVirtualColumn sparseLongVirtualColumn = new NestedFieldVirtualColumn( - "nest", - NESTED_SPARSE_LONG_FIELD, - ColumnType.LONG, - sparseLongParts, - false, - null, - null - ); - NestedFieldVirtualColumn sparseDoubleVirtualColumn = new NestedFieldVirtualColumn( - "nest", - NESTED_SPARSE_DOUBLE_FIELD, - ColumnType.DOUBLE, - sparseDoubleParts, - false, - null, - null - ); - NestedFieldVirtualColumn sparseMixedNumericVirtualColumn = new NestedFieldVirtualColumn( - "nest", - NESTED_SPARSE_MIXED_NUMERIC_FIELD, - null, - sparseMixedNumericParts, - false, - null, - null - ); - NestedFieldVirtualColumn sparseMixedVirtualColumn = new NestedFieldVirtualColumn( - "nest", - NESTED_SPARSE_MIXED_FIELD, - null, - sparseMixedParts, - false, - null, - null - ); - - return VirtualColumns.create( - ImmutableList.of( - longVirtualColumn, - doubleVirtualColumn, - mixedNumericVirtualColumn, - mixedVirtualColumn, - sparseLongVirtualColumn, - sparseDoubleVirtualColumn, - sparseMixedNumericVirtualColumn, - sparseMixedVirtualColumn - ) - ); - } - - private ColumnSelectorFactory getNumericColumnSelectorFactory(VirtualColumns virtualColumns) throws Exception - { - List segments = NestedDataTestUtils.createSegments( - helper, - tempFolder, - closer, - NestedDataTestUtils.NUMERIC_DATA_FILE, - NestedDataTestUtils.NUMERIC_PARSER_FILE, - NestedDataTestUtils.SIMPLE_AGG_FILE, - Granularities.DAY, - true, - 1000 - ); - Assert.assertEquals(1, segments.size()); - StorageAdapter storageAdapter = segments.get(0).asStorageAdapter(); - Sequence cursorSequence = storageAdapter.makeCursors( - null, - Intervals.ETERNITY, - virtualColumns, - Granularities.DAY, - false, - null - ); - final Yielder yielder = Yielders.each(cursorSequence); - closer.register(yielder); - final Cursor cursor = yielder.get(); - return cursor.getColumnSelectorFactory(); - } - - private VectorColumnSelectorFactory getVectorColumnSelectorFactory(VirtualColumns virtualColumns) throws Exception - { - List segments = NestedDataTestUtils.createSegments( - helper, - tempFolder, - closer, - NestedDataTestUtils.NUMERIC_DATA_FILE, - NestedDataTestUtils.NUMERIC_PARSER_FILE, - NestedDataTestUtils.SIMPLE_AGG_FILE, - Granularities.DAY, - true, - 1000 - ); - Assert.assertEquals(1, segments.size()); - StorageAdapter storageAdapter = segments.get(0).asStorageAdapter(); - VectorCursor cursor = storageAdapter.makeVectorCursor( - null, - Intervals.ETERNITY, - virtualColumns, - false, - 512, - null - ); - return cursor.getColumnSelectorFactory(); - } - private static void logResults(List results) { StringBuilder bob = new StringBuilder(); diff --git a/processing/src/test/java/org/apache/druid/query/timeseries/TimeseriesQueryRunnerTest.java b/processing/src/test/java/org/apache/druid/query/timeseries/TimeseriesQueryRunnerTest.java index d9d73e67effb..d2755b6579ce 100644 --- a/processing/src/test/java/org/apache/druid/query/timeseries/TimeseriesQueryRunnerTest.java +++ b/processing/src/test/java/org/apache/druid/query/timeseries/TimeseriesQueryRunnerTest.java @@ -651,7 +651,7 @@ public void testTimeseriesIntervalOutOfRanges() new Result<>( QueryRunnerTestHelper.EMPTY_INTERVAL.getIntervals().get(0).getStart(), new TimeseriesResultValue( - TestHelper.createExpectedMap( + TestHelper.makeMap( "rows", 0L, "index", diff --git a/processing/src/test/java/org/apache/druid/segment/NestedDataColumnIndexerTest.java b/processing/src/test/java/org/apache/druid/segment/NestedDataColumnIndexerTest.java index e5a1c41bc422..e77cffbc30e3 100644 --- a/processing/src/test/java/org/apache/druid/segment/NestedDataColumnIndexerTest.java +++ b/processing/src/test/java/org/apache/druid/segment/NestedDataColumnIndexerTest.java @@ -19,7 +19,6 @@ package org.apache.druid.segment; -import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import org.apache.druid.common.config.NullHandling; @@ -46,8 +45,8 @@ import org.junit.Test; import javax.annotation.Nonnull; -import java.util.HashMap; import java.util.List; +import java.util.Map; public class NestedDataColumnIndexerTest extends InitializedNullHandlingTest { @@ -563,20 +562,8 @@ private MapBasedInputRow makeInputRow( Object... kv ) { - HashMap event = new HashMap<>(); + final Map event = TestHelper.makeMap(explicitNull, kv); event.put("time", timestamp); - Preconditions.checkArgument(kv.length % 2 == 0); - String currentKey = null; - for (int i = 0; i < kv.length; i++) { - if (i % 2 == 0) { - currentKey = (String) kv[i]; - } else { - if (explicitNull || kv[i] != null) { - event.put(currentKey, kv[i]); - } - } - } - return new MapBasedInputRow(timestamp, ImmutableList.copyOf(event.keySet()), event); } } diff --git a/processing/src/test/java/org/apache/druid/segment/SchemalessTestFullTest.java b/processing/src/test/java/org/apache/druid/segment/SchemalessTestFullTest.java index dcd4eee5d277..fdc6f8c2cd44 100644 --- a/processing/src/test/java/org/apache/druid/segment/SchemalessTestFullTest.java +++ b/processing/src/test/java/org/apache/druid/segment/SchemalessTestFullTest.java @@ -866,7 +866,7 @@ public void testEmptySchemas() new Result<>( DateTimes.of("2011-01-12T00:00:00.000Z"), new TimeseriesResultValue( - TestHelper.createExpectedMap( + TestHelper.makeMap( "rows", 1L, "index", NullHandling.replaceWithDefault() ? 0.0D : null, "addRowsIndexConstant", NullHandling.replaceWithDefault() ? 2.0D : null, @@ -881,7 +881,7 @@ public void testEmptySchemas() new Result<>( DateTimes.of("2011-01-12T00:00:00.000Z"), new TimeseriesResultValue( - TestHelper.createExpectedMap( + TestHelper.makeMap( "rows", 0L, "index", NullHandling.replaceWithDefault() ? 0.0D : null, "addRowsIndexConstant", NullHandling.replaceWithDefault() ? 1.0D : null, diff --git a/processing/src/test/java/org/apache/druid/segment/TestHelper.java b/processing/src/test/java/org/apache/druid/segment/TestHelper.java index 525fb61b7c30..4bbf9454c28c 100644 --- a/processing/src/test/java/org/apache/druid/segment/TestHelper.java +++ b/processing/src/test/java/org/apache/druid/segment/TestHelper.java @@ -449,13 +449,20 @@ public static void assertRow(String msg, ResultRow expected, ResultRow actual) } } - public static Map createExpectedMap(Object... vals) + public static Map makeMap(Object... vals) + { + return makeMap(true, vals); + } + + public static Map makeMap(boolean explicitNulls, Object... vals) { Preconditions.checkArgument(vals.length % 2 == 0); Map theVals = new HashMap<>(); for (int i = 0; i < vals.length; i += 2) { - theVals.put(vals[i].toString(), vals[i + 1]); + if (explicitNulls || vals[i + 1] != null) { + theVals.put(vals[i].toString(), vals[i + 1]); + } } return theVals; } diff --git a/processing/src/test/java/org/apache/druid/segment/nested/NestedDataColumnSupplierTest.java b/processing/src/test/java/org/apache/druid/segment/nested/NestedDataColumnSupplierTest.java index a0ad1c6ae281..ce3e40e6556e 100644 --- a/processing/src/test/java/org/apache/druid/segment/nested/NestedDataColumnSupplierTest.java +++ b/processing/src/test/java/org/apache/druid/segment/nested/NestedDataColumnSupplierTest.java @@ -21,13 +21,13 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import com.google.common.util.concurrent.Futures; import com.google.common.util.concurrent.ListenableFuture; import com.google.common.util.concurrent.ListeningExecutorService; import com.google.common.util.concurrent.MoreExecutors; import org.apache.druid.collections.bitmap.RoaringBitmapFactory; +import org.apache.druid.common.config.NullHandling; import org.apache.druid.guice.NestedDataModule; import org.apache.druid.java.util.common.concurrent.Execs; import org.apache.druid.java.util.common.io.Closer; @@ -94,12 +94,12 @@ public class NestedDataColumnSupplierTest extends InitializedNullHandlingTest DefaultBitmapResultFactory resultFactory = new DefaultBitmapResultFactory(new RoaringBitmapFactory()); List> data = ImmutableList.of( - ImmutableMap.of("x", 1L, "y", 1.0, "z", "a", "v", "100"), - ImmutableMap.of("y", 3.0, "z", "d", "v", 1000L), - ImmutableMap.of("x", 5L, "y", 5.0, "z", "b"), - ImmutableMap.of("x", 3L, "y", 4.0, "z", "c", "v", 3000.333), - ImmutableMap.of("x", 2L, "v", "40000"), - ImmutableMap.of("x", 4L, "y", 2.0, "z", "e", "v", 11111L) + TestHelper.makeMap("x", 1L, "y", 1.0, "z", "a", "v", "100", "nullish", "notnull"), + TestHelper.makeMap("y", 3.0, "z", "d", "v", 1000L, "nullish", null), + TestHelper.makeMap("x", 5L, "y", 5.0, "z", "b", "nullish", ""), + TestHelper.makeMap("x", 3L, "y", 4.0, "z", "c", "v", 3000.333, "nullish", "null"), + TestHelper.makeMap("x", 2L, "v", "40000"), + TestHelper.makeMap("x", 4L, "y", 2.0, "z", "e", "v", 11111L, "nullish", null) ); Closer closer = Closer.create(); @@ -320,7 +320,18 @@ private void smokeTest(NestedDataComplexColumn column) throws IOException DruidPredicateIndex vPredicateIndex = vIndexSupplier.as(DruidPredicateIndex.class); NullValueIndex vNulls = vIndexSupplier.as(NullValueIndex.class); - Assert.assertEquals(ImmutableList.of(vPath, xPath, yPath, zPath), column.getNestedFields()); + final List nullishPath = NestedPathFinder.parseJsonPath("$.nullish"); + Assert.assertEquals(ImmutableSet.of(ColumnType.STRING), column.getColumnTypes(nullishPath)); + Assert.assertEquals(ColumnType.STRING, column.getColumnHolder(nullishPath).getCapabilities().toColumnType()); + ColumnValueSelector nullishSelector = column.makeColumnValueSelector(nullishPath, offset); + DimensionSelector nullishDimSelector = column.makeDimensionSelector(nullishPath, offset, null); + ColumnIndexSupplier nullishIndexSupplier = column.getColumnIndexSupplier(nullishPath); + Assert.assertNotNull(nullishIndexSupplier); + StringValueSetIndex nullishValueIndex = nullishIndexSupplier.as(StringValueSetIndex.class); + DruidPredicateIndex nullishPredicateIndex = nullishIndexSupplier.as(DruidPredicateIndex.class); + NullValueIndex nullishNulls = nullishIndexSupplier.as(NullValueIndex.class); + + Assert.assertEquals(ImmutableList.of(nullishPath, vPath, xPath, yPath, zPath), column.getNestedFields()); for (int i = 0; i < data.size(); i++) { Map row = data.get(i); @@ -333,6 +344,7 @@ private void smokeTest(NestedDataComplexColumn column) throws IOException testPath(row, i, "x", xSelector, xDimSelector, xValueIndex, xPredicateIndex, xNulls, ColumnType.LONG); testPath(row, i, "y", ySelector, yDimSelector, yValueIndex, yPredicateIndex, yNulls, ColumnType.DOUBLE); testPath(row, i, "z", zSelector, zDimSelector, zValueIndex, zPredicateIndex, zNulls, ColumnType.STRING); + testPath(row, i, "nullish", nullishSelector, nullishDimSelector, nullishValueIndex, nullishPredicateIndex, nullishNulls, ColumnType.STRING); offset.increment(); } @@ -350,16 +362,22 @@ private void testPath( @Nullable ColumnType singleType ) { - if (row.containsKey(path) && row.get(path) != null) { - Assert.assertEquals(row.get(path), valueSelector.getObject()); + final Object inputValue = row.get(path); + // in default value mode, even though the input row had an empty string, the selector spits out null, so we want + // to take the null checking path + final boolean isStringAndNullEquivalent = + inputValue instanceof String && NullHandling.isNullOrEquivalent((String) inputValue); + + if (row.containsKey(path) && inputValue != null && !isStringAndNullEquivalent) { + Assert.assertEquals(inputValue, valueSelector.getObject()); if (ColumnType.LONG.equals(singleType)) { - Assert.assertEquals(row.get(path), valueSelector.getLong()); + Assert.assertEquals(inputValue, valueSelector.getLong()); } else if (ColumnType.DOUBLE.equals(singleType)) { - Assert.assertEquals((double) row.get(path), valueSelector.getDouble(), 0.0); + Assert.assertEquals((double) inputValue, valueSelector.getDouble(), 0.0); } Assert.assertFalse(valueSelector.isNull()); - final String theString = String.valueOf(row.get(path)); + final String theString = String.valueOf(inputValue); Assert.assertEquals(theString, dimSelector.getObject()); String dimSelectorLookupVal = dimSelector.lookupName(dimSelector.getRow().get(0)); Assert.assertEquals(theString, dimSelectorLookupVal); diff --git a/processing/src/test/java/org/apache/druid/segment/nested/NestedFieldLiteralColumnSelectorsTest.java b/processing/src/test/java/org/apache/druid/segment/nested/NestedFieldLiteralColumnSelectorsTest.java new file mode 100644 index 000000000000..cf65e483ba63 --- /dev/null +++ b/processing/src/test/java/org/apache/druid/segment/nested/NestedFieldLiteralColumnSelectorsTest.java @@ -0,0 +1,380 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.nested; + +import com.fasterxml.jackson.databind.Module; +import com.google.common.collect.ImmutableList; +import org.apache.druid.guice.NestedDataModule; +import org.apache.druid.java.util.common.Intervals; +import org.apache.druid.java.util.common.UOE; +import org.apache.druid.java.util.common.granularity.Granularities; +import org.apache.druid.java.util.common.guava.Sequence; +import org.apache.druid.java.util.common.guava.Yielder; +import org.apache.druid.java.util.common.guava.Yielders; +import org.apache.druid.java.util.common.io.Closer; +import org.apache.druid.query.NestedDataTestUtils; +import org.apache.druid.query.aggregation.AggregationTestHelper; +import org.apache.druid.query.dimension.DefaultDimensionSpec; +import org.apache.druid.segment.ColumnSelectorFactory; +import org.apache.druid.segment.ColumnValueSelector; +import org.apache.druid.segment.Cursor; +import org.apache.druid.segment.DoubleColumnSelector; +import org.apache.druid.segment.LongColumnSelector; +import org.apache.druid.segment.Segment; +import org.apache.druid.segment.StorageAdapter; +import org.apache.druid.segment.VirtualColumns; +import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.vector.BaseDoubleVectorValueSelector; +import org.apache.druid.segment.vector.BaseLongVectorValueSelector; +import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector; +import org.apache.druid.segment.vector.VectorColumnSelectorFactory; +import org.apache.druid.segment.vector.VectorCursor; +import org.apache.druid.segment.vector.VectorObjectSelector; +import org.apache.druid.segment.vector.VectorValueSelector; +import org.apache.druid.segment.virtual.NestedFieldVirtualColumn; +import org.junit.After; +import org.junit.Assert; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; + +import java.io.IOException; +import java.util.List; + +public class NestedFieldLiteralColumnSelectorsTest +{ + private static final String NESTED_LONG_FIELD = "long"; + private static final String NESTED_DOUBLE_FIELD = "double"; + private static final String NESTED_MIXED_NUMERIC_FIELD = "mixed_numeric"; + private static final String NESTED_MIXED_FIELD = "mixed"; + private static final String NESTED_SPARSE_LONG_FIELD = "sparse_long"; + private static final String NESTED_SPARSE_DOUBLE_FIELD = "sparse_double"; + private static final String NESTED_SPARSE_MIXED_NUMERIC_FIELD = "sparse_mixed_numeric"; + private static final String NESTED_SPARSE_MIXED_FIELD = "sparse_mixed"; + + + @Rule + public final TemporaryFolder tempFolder = new TemporaryFolder(); + + private final AggregationTestHelper helper; + private final Closer closer; + + public NestedFieldLiteralColumnSelectorsTest() + { + NestedDataModule.registerHandlersAndSerde(); + List mods = NestedDataModule.getJacksonModulesList(); + this.helper = AggregationTestHelper.createScanQueryAggregationTestHelper( + mods, + tempFolder + ); + this.closer = Closer.create(); + } + + @After + public void teardown() throws IOException + { + closer.close(); + } + + @Test + public void testExpectedTypes() throws Exception + { + // "Line matches the illegal pattern 'ObjectColumnSelector, LongColumnSelector, FloatColumnSelector + // and DoubleColumnSelector must not be used in an instanceof statement, see Javadoc of those interfaces." + //CHECKSTYLE.OFF: Regexp + ColumnSelectorFactory columnSelectorFactory = getNumericColumnSelectorFactory( + makeNestedNumericVirtualColumns() + ); + + ColumnValueSelector longValueSelector = columnSelectorFactory.makeColumnValueSelector( + NESTED_LONG_FIELD + ); + Assert.assertNotNull(longValueSelector); + Assert.assertTrue(longValueSelector instanceof LongColumnSelector); + + ColumnValueSelector doubleValueSelector = columnSelectorFactory.makeColumnValueSelector( + NESTED_DOUBLE_FIELD + ); + Assert.assertNotNull(doubleValueSelector); + Assert.assertTrue(doubleValueSelector instanceof DoubleColumnSelector); + + ColumnValueSelector mixedNumericValueSelector = columnSelectorFactory.makeColumnValueSelector( + NESTED_MIXED_NUMERIC_FIELD + ); + Assert.assertNotNull(mixedNumericValueSelector); + Assert.assertTrue(mixedNumericValueSelector instanceof ColumnValueSelector); + + ColumnValueSelector mixedValueSelector = columnSelectorFactory.makeColumnValueSelector( + NESTED_MIXED_FIELD + ); + Assert.assertNotNull(mixedValueSelector); + Assert.assertTrue(mixedValueSelector instanceof ColumnValueSelector); + + + ColumnValueSelector sparseLongValueSelector = columnSelectorFactory.makeColumnValueSelector( + NESTED_SPARSE_LONG_FIELD + ); + Assert.assertNotNull(sparseLongValueSelector); + Assert.assertTrue(sparseLongValueSelector instanceof LongColumnSelector); + + ColumnValueSelector sparseDoubleValueSelector = columnSelectorFactory.makeColumnValueSelector( + NESTED_SPARSE_DOUBLE_FIELD + ); + Assert.assertNotNull(sparseDoubleValueSelector); + Assert.assertTrue(sparseDoubleValueSelector instanceof DoubleColumnSelector); + + ColumnValueSelector sparseMixedNumericValueSelector = columnSelectorFactory.makeColumnValueSelector( + NESTED_SPARSE_MIXED_NUMERIC_FIELD + ); + Assert.assertNotNull(sparseMixedNumericValueSelector); + Assert.assertTrue(sparseMixedNumericValueSelector instanceof ColumnValueSelector); + + ColumnValueSelector sparseMixedValueSelector = columnSelectorFactory.makeColumnValueSelector( + NESTED_SPARSE_MIXED_FIELD + ); + Assert.assertNotNull(sparseMixedValueSelector); + Assert.assertTrue(sparseMixedValueSelector instanceof ColumnValueSelector); + //CHECKSTYLE.ON: Regexp + } + + @Test + public void testExpectedTypesVectorSelectors() throws Exception + { + // "Line matches the illegal pattern 'ObjectColumnSelector, LongColumnSelector, FloatColumnSelector + // and DoubleColumnSelector must not be used in an instanceof statement, see Javadoc of those interfaces." + //CHECKSTYLE.OFF: Regexp + VectorColumnSelectorFactory factory = getVectorColumnSelectorFactory( + makeNestedNumericVirtualColumns() + ); + + // can make numeric value selectors for single typed numeric types + VectorValueSelector longValueSelector = factory.makeValueSelector( + NESTED_LONG_FIELD + ); + Assert.assertNotNull(longValueSelector); + Assert.assertTrue(longValueSelector instanceof BaseLongVectorValueSelector); + + VectorValueSelector doubleValueSelector = factory.makeValueSelector( + NESTED_DOUBLE_FIELD + ); + Assert.assertNotNull(doubleValueSelector); + Assert.assertTrue(doubleValueSelector instanceof BaseDoubleVectorValueSelector); + + Assert.assertThrows(UOE.class, () -> factory.makeValueSelector(NESTED_MIXED_NUMERIC_FIELD)); + Assert.assertThrows(UOE.class, () -> factory.makeValueSelector(NESTED_MIXED_FIELD)); + + // can also make single value dimension selectors for all nested column types + SingleValueDimensionVectorSelector longDimensionSelector = factory.makeSingleValueDimensionSelector( + DefaultDimensionSpec.of(NESTED_LONG_FIELD) + ); + Assert.assertNotNull(longDimensionSelector); + + SingleValueDimensionVectorSelector doubleDimensionSelector = factory.makeSingleValueDimensionSelector( + DefaultDimensionSpec.of(NESTED_DOUBLE_FIELD) + ); + Assert.assertNotNull(doubleDimensionSelector); + + SingleValueDimensionVectorSelector mixedNumericValueSelector = factory.makeSingleValueDimensionSelector( + DefaultDimensionSpec.of(NESTED_MIXED_NUMERIC_FIELD) + ); + Assert.assertNotNull(mixedNumericValueSelector); + + SingleValueDimensionVectorSelector mixedValueSelector = factory.makeSingleValueDimensionSelector( + DefaultDimensionSpec.of(NESTED_MIXED_FIELD) + ); + Assert.assertNotNull(mixedValueSelector); + + // and object selectors + VectorObjectSelector longObjectSelector = factory.makeObjectSelector( + NESTED_LONG_FIELD + ); + Assert.assertNotNull(longObjectSelector); + + VectorObjectSelector doubleObjectSelector = factory.makeObjectSelector( + NESTED_DOUBLE_FIELD + ); + Assert.assertNotNull(doubleObjectSelector); + + VectorObjectSelector mixedNumericObjectSelector = factory.makeObjectSelector( + NESTED_MIXED_NUMERIC_FIELD + ); + Assert.assertNotNull(mixedNumericObjectSelector); + + VectorObjectSelector mixedObjectSelector = factory.makeObjectSelector( + NESTED_MIXED_FIELD + ); + Assert.assertNotNull(mixedObjectSelector); + //CHECKSTYLE.ON: Regexp + } + + private VirtualColumns makeNestedNumericVirtualColumns() + { + List longParts = NestedPathFinder.parseJqPath(".long"); + List doubleParts = NestedPathFinder.parseJqPath(".double"); + List mixedNumericParts = NestedPathFinder.parseJqPath(".mixed_numeric"); + List mixedParts = NestedPathFinder.parseJqPath(".mixed"); + List sparseLongParts = NestedPathFinder.parseJqPath(".sparse_long"); + List sparseDoubleParts = NestedPathFinder.parseJqPath(".sparse_double"); + List sparseMixedNumericParts = NestedPathFinder.parseJqPath(".sparse_mixed_numeric"); + List sparseMixedParts = NestedPathFinder.parseJqPath(".sparse_mixed"); + + NestedFieldVirtualColumn longVirtualColumn = new NestedFieldVirtualColumn( + "nest", + NESTED_LONG_FIELD, + ColumnType.LONG, + longParts, + false, + null, + null + ); + NestedFieldVirtualColumn doubleVirtualColumn = new NestedFieldVirtualColumn( + "nest", + NESTED_DOUBLE_FIELD, + ColumnType.DOUBLE, + doubleParts, + false, + null, + null + ); + NestedFieldVirtualColumn mixedNumericVirtualColumn = new NestedFieldVirtualColumn( + "nest", + NESTED_MIXED_NUMERIC_FIELD, + null, + mixedNumericParts, + false, + null, + null + ); + NestedFieldVirtualColumn mixedVirtualColumn = new NestedFieldVirtualColumn( + "nest", + NESTED_MIXED_FIELD, + null, + mixedParts, + false, + null, + null + ); + + NestedFieldVirtualColumn sparseLongVirtualColumn = new NestedFieldVirtualColumn( + "nest", + NESTED_SPARSE_LONG_FIELD, + ColumnType.LONG, + sparseLongParts, + false, + null, + null + ); + NestedFieldVirtualColumn sparseDoubleVirtualColumn = new NestedFieldVirtualColumn( + "nest", + NESTED_SPARSE_DOUBLE_FIELD, + ColumnType.DOUBLE, + sparseDoubleParts, + false, + null, + null + ); + NestedFieldVirtualColumn sparseMixedNumericVirtualColumn = new NestedFieldVirtualColumn( + "nest", + NESTED_SPARSE_MIXED_NUMERIC_FIELD, + null, + sparseMixedNumericParts, + false, + null, + null + ); + NestedFieldVirtualColumn sparseMixedVirtualColumn = new NestedFieldVirtualColumn( + "nest", + NESTED_SPARSE_MIXED_FIELD, + null, + sparseMixedParts, + false, + null, + null + ); + + return VirtualColumns.create( + ImmutableList.of( + longVirtualColumn, + doubleVirtualColumn, + mixedNumericVirtualColumn, + mixedVirtualColumn, + sparseLongVirtualColumn, + sparseDoubleVirtualColumn, + sparseMixedNumericVirtualColumn, + sparseMixedVirtualColumn + ) + ); + } + + private ColumnSelectorFactory getNumericColumnSelectorFactory(VirtualColumns virtualColumns) throws Exception + { + List segments = NestedDataTestUtils.createSegments( + helper, + tempFolder, + closer, + NestedDataTestUtils.NUMERIC_DATA_FILE, + NestedDataTestUtils.NUMERIC_PARSER_FILE, + NestedDataTestUtils.SIMPLE_AGG_FILE, + Granularities.DAY, + true, + 1000 + ); + Assert.assertEquals(1, segments.size()); + StorageAdapter storageAdapter = segments.get(0).asStorageAdapter(); + Sequence cursorSequence = storageAdapter.makeCursors( + null, + Intervals.ETERNITY, + virtualColumns, + Granularities.DAY, + false, + null + ); + final Yielder yielder = Yielders.each(cursorSequence); + closer.register(yielder); + final Cursor cursor = yielder.get(); + return cursor.getColumnSelectorFactory(); + } + + private VectorColumnSelectorFactory getVectorColumnSelectorFactory(VirtualColumns virtualColumns) throws Exception + { + List segments = NestedDataTestUtils.createSegments( + helper, + tempFolder, + closer, + NestedDataTestUtils.NUMERIC_DATA_FILE, + NestedDataTestUtils.NUMERIC_PARSER_FILE, + NestedDataTestUtils.SIMPLE_AGG_FILE, + Granularities.DAY, + true, + 1000 + ); + Assert.assertEquals(1, segments.size()); + StorageAdapter storageAdapter = segments.get(0).asStorageAdapter(); + VectorCursor cursor = storageAdapter.makeVectorCursor( + null, + Intervals.ETERNITY, + virtualColumns, + false, + 512, + null + ); + return cursor.getColumnSelectorFactory(); + } +} diff --git a/server/src/test/java/org/apache/druid/query/dimension/LookupDimensionSpecTest.java b/server/src/test/java/org/apache/druid/query/dimension/LookupDimensionSpecTest.java index 59b4ae974d91..aef1336779f7 100644 --- a/server/src/test/java/org/apache/druid/query/dimension/LookupDimensionSpecTest.java +++ b/server/src/test/java/org/apache/druid/query/dimension/LookupDimensionSpecTest.java @@ -140,11 +140,11 @@ public Object[] parametersForTestApply() }, new Object[]{ new LookupDimensionSpec("dimName", "outputName", MAP_LOOKUP_EXTRACTOR, false, null, null, true, null), - TestHelper.createExpectedMap("not there", null) + TestHelper.makeMap("not there", null) }, new Object[]{ new LookupDimensionSpec("dimName", "outputName", null, false, null, "lookupName", true, LOOKUP_REF_MANAGER), - TestHelper.createExpectedMap("not there", null) + TestHelper.makeMap("not there", null) }, new Object[]{ new LookupDimensionSpec("dimName", "outputName", MAP_LOOKUP_EXTRACTOR, false, "Missing_value", null, From 6d1e2dc5606571c4c152c0af8934d28a526614dd Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Thu, 26 Jan 2023 23:29:33 -0800 Subject: [PATCH 2/7] oh wow, that one is worse --- .../druid/segment/SimpleDictionaryMergingIterator.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/processing/src/main/java/org/apache/druid/segment/SimpleDictionaryMergingIterator.java b/processing/src/main/java/org/apache/druid/segment/SimpleDictionaryMergingIterator.java index 99c37f065857..d38506fe7241 100644 --- a/processing/src/main/java/org/apache/druid/segment/SimpleDictionaryMergingIterator.java +++ b/processing/src/main/java/org/apache/druid/segment/SimpleDictionaryMergingIterator.java @@ -90,7 +90,11 @@ public T next() } while (!pQueue.isEmpty() && Objects.equals(value, pQueue.peek().peek())) { - pQueue.remove(); + PeekingIterator same = pQueue.remove(); + same.next(); + if (same.hasNext()) { + pQueue.add(same); + } } counter++; From 4b13f58f9244b9d68d6c9115b8e6ed5eae790348 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Fri, 27 Jan 2023 03:04:08 -0800 Subject: [PATCH 3/7] more test --- .../druid/query/NestedDataTestUtils.java | 89 +++++++++++++++++++ .../query/scan/NestedDataScanQueryTest.java | 72 ++++++++++++++- .../SimpleDictionaryMergingIteratorTest.java | 57 ++++++++++++ 3 files changed, 217 insertions(+), 1 deletion(-) create mode 100644 processing/src/test/java/org/apache/druid/segment/SimpleDictionaryMergingIteratorTest.java diff --git a/processing/src/test/java/org/apache/druid/query/NestedDataTestUtils.java b/processing/src/test/java/org/apache/druid/query/NestedDataTestUtils.java index daa4f14b6428..2703e1a90265 100644 --- a/processing/src/test/java/org/apache/druid/query/NestedDataTestUtils.java +++ b/processing/src/test/java/org/apache/druid/query/NestedDataTestUtils.java @@ -45,8 +45,11 @@ import java.io.File; import java.io.FileInputStream; import java.io.IOException; +import java.io.InputStream; +import java.io.SequenceInputStream; import java.nio.charset.StandardCharsets; import java.util.ArrayList; +import java.util.Collections; import java.util.List; public class NestedDataTestUtils @@ -223,6 +226,92 @@ public static List createSegments( return segments; } + public static List createSegmentsWithConcatenatedInput( + AggregationTestHelper helper, + TemporaryFolder tempFolder, + Closer closer, + Granularity granularity, + boolean rollup, + int maxRowCount, + int numCopies, + int numSegments + ) throws Exception + { + return createSegmentsWithConcatenatedInput( + helper, + tempFolder, + closer, + SIMPLE_DATA_FILE, + SIMPLE_PARSER_FILE, + null, + SIMPLE_AGG_FILE, + granularity, + rollup, + maxRowCount, + numCopies, + numSegments + ); + } + + /** + * turn small test data into bigger test data by duplicating itself into a bigger stream + */ + public static List createSegmentsWithConcatenatedInput( + AggregationTestHelper helper, + TemporaryFolder tempFolder, + Closer closer, + String inputFileName, + String parserJsonFileName, + String transformSpecJsonFileName, + String aggJsonFileName, + Granularity granularity, + boolean rollup, + int maxRowCount, + int numCopies, + int numSegments + ) throws Exception + { + String parserJson = readFileFromClasspathAsString(parserJsonFileName); + String transformSpecJson = transformSpecJsonFileName != null ? readFileFromClasspathAsString(transformSpecJsonFileName) : null; + String aggJson = readFileFromClasspathAsString(aggJsonFileName); + + List segmentDirs = Lists.newArrayListWithCapacity(numSegments); + for (int i = 0; i < numSegments; i++) { + List inputStreams = Lists.newArrayListWithCapacity(numCopies); + for (int j = 0; j < numCopies; j++) { + inputStreams.add(new FileInputStream(readFileFromClasspath(inputFileName))); + } + SequenceInputStream inputDataStream = new SequenceInputStream(Collections.enumeration(inputStreams)); + File segmentDir = tempFolder.newFolder(); + helper.createIndex( + inputDataStream, + parserJson, + transformSpecJson, + aggJson, + segmentDir, + 0, + granularity, + maxRowCount, + rollup + ); + segmentDirs.add(segmentDir); + } + + final List segments = Lists.transform( + segmentDirs, + dir -> { + try { + return closer.register(new QueryableIndexSegment(helper.getIndexIO().loadIndex(dir), SegmentId.dummy(""))); + } + catch (IOException ex) { + throw new RuntimeException(ex); + } + } + ); + + return segments; + } + public static Segment createIncrementalIndex( String inputFileName, String parserJsonFileName, diff --git a/processing/src/test/java/org/apache/druid/query/scan/NestedDataScanQueryTest.java b/processing/src/test/java/org/apache/druid/query/scan/NestedDataScanQueryTest.java index 7f6d0e04b8bc..52b466422ccb 100644 --- a/processing/src/test/java/org/apache/druid/query/scan/NestedDataScanQueryTest.java +++ b/processing/src/test/java/org/apache/druid/query/scan/NestedDataScanQueryTest.java @@ -314,6 +314,75 @@ public void testIngestWithMergesAndScanSegments() throws Exception logResults(results); } + @Test + public void testIngestWithMoreMergesAndScanSegments() throws Exception + { + Query scanQuery = Druids.newScanQueryBuilder() + .dataSource("test_datasource") + .intervals( + new MultipleIntervalSegmentSpec( + Collections.singletonList(Intervals.ETERNITY) + ) + ) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .limit(100) + .context(ImmutableMap.of()) + .build(); + + + List segs = NestedDataTestUtils.createSegmentsWithConcatenatedInput( + helper, + tempFolder, + closer, + Granularities.HOUR, + false, + 100, + 100, + 1 + ); + final Sequence seq = helper.runQueryOnSegmentsObjs(segs, scanQuery); + + List results = seq.toList(); + logResults(results); + Assert.assertEquals(1, results.size()); + Assert.assertEquals(100, ((List) results.get(0).getEvents()).size()); + } + + @Test + public void testIngestWithMoreMergesAndScanSegmentsRollup() throws Exception + { + Query scanQuery = Druids.newScanQueryBuilder() + .dataSource("test_datasource") + .intervals( + new MultipleIntervalSegmentSpec( + Collections.singletonList(Intervals.ETERNITY) + ) + ) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .limit(100) + .context(ImmutableMap.of()) + .build(); + + + // same rows over and over so expect same 8 rows after rollup + List segs = NestedDataTestUtils.createSegmentsWithConcatenatedInput( + helper, + tempFolder, + closer, + Granularities.HOUR, + true, + 5, + 100, + 1 + ); + final Sequence seq = helper.runQueryOnSegmentsObjs(segs, scanQuery); + + List results = seq.toList(); + Assert.assertEquals(1, results.size()); + Assert.assertEquals(8, ((List) results.get(0).getEvents()).size()); + logResults(results); + } + @Test public void testIngestAndScanSegmentsAndFilter() throws Exception { @@ -386,8 +455,9 @@ public void testIngestAndScanSegmentsAndRangeFilter() throws Exception private static void logResults(List results) { StringBuilder bob = new StringBuilder(); + int ctr = 0; for (Object event : (List) results.get(0).getEvents()) { - bob.append("[").append(event).append("]").append("\n"); + bob.append("row:").append(++ctr).append(" - ").append(event).append("\n"); } LOG.info("results:\n%s", bob); } diff --git a/processing/src/test/java/org/apache/druid/segment/SimpleDictionaryMergingIteratorTest.java b/processing/src/test/java/org/apache/druid/segment/SimpleDictionaryMergingIteratorTest.java new file mode 100644 index 000000000000..a1fede7db4d2 --- /dev/null +++ b/processing/src/test/java/org/apache/druid/segment/SimpleDictionaryMergingIteratorTest.java @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Lists; +import org.apache.druid.segment.data.Indexed; +import org.apache.druid.segment.data.ListIndexed; +import org.junit.Assert; +import org.junit.Test; + +import java.util.List; + +public class SimpleDictionaryMergingIteratorTest +{ + @Test + public void testMergingIterator() + { + final Indexed[] sortedLookups = new Indexed[]{ + new ListIndexed(null, "", "null", "z"), + new ListIndexed("", "a", "b", "c", "d", "e", "f", "g", "h"), + new ListIndexed(null, "b", "c", "null", "z"), + new ListIndexed(null, "hello") + }; + SimpleDictionaryMergingIterator dictionaryMergeIterator = new SimpleDictionaryMergingIterator<>( + sortedLookups, + NestedDataColumnMerger.STRING_MERGING_COMPARATOR + ); + + List expectedSequence = Lists.newArrayListWithExpectedSize(13); + expectedSequence.add(null); + expectedSequence.addAll(ImmutableList.of("", "a", "b", "c", "d", "e", "f", "g", "h", "hello", "null", "z")); + + List actualSequence = Lists.newArrayListWithExpectedSize(13); + while (dictionaryMergeIterator.hasNext()) { + actualSequence.add(dictionaryMergeIterator.next()); + } + Assert.assertEquals(expectedSequence, actualSequence); + } +} From eabc58c6c72aab5eb6419456d04e439858626495 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Fri, 27 Jan 2023 03:49:28 -0800 Subject: [PATCH 4/7] fix test gizmo --- .../java/org/apache/druid/query/NestedDataTestUtils.java | 5 +++++ .../apache/druid/query/scan/NestedDataScanQueryTest.java | 6 +++--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/processing/src/test/java/org/apache/druid/query/NestedDataTestUtils.java b/processing/src/test/java/org/apache/druid/query/NestedDataTestUtils.java index 2703e1a90265..c04d44c29546 100644 --- a/processing/src/test/java/org/apache/druid/query/NestedDataTestUtils.java +++ b/processing/src/test/java/org/apache/druid/query/NestedDataTestUtils.java @@ -28,6 +28,7 @@ import org.apache.commons.io.LineIterator; import org.apache.druid.data.input.impl.StringInputRowParser; import org.apache.druid.guice.NestedDataModule; +import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.java.util.common.granularity.Granularities; import org.apache.druid.java.util.common.granularity.Granularity; import org.apache.druid.java.util.common.guava.nary.TrinaryFn; @@ -42,6 +43,7 @@ import org.apache.druid.timeline.SegmentId; import org.junit.rules.TemporaryFolder; +import java.io.ByteArrayInputStream; import java.io.File; import java.io.FileInputStream; import java.io.IOException; @@ -280,6 +282,9 @@ public static List createSegmentsWithConcatenatedInput( List inputStreams = Lists.newArrayListWithCapacity(numCopies); for (int j = 0; j < numCopies; j++) { inputStreams.add(new FileInputStream(readFileFromClasspath(inputFileName))); + if (j + 1 < numCopies) { + inputStreams.add(new ByteArrayInputStream(StringUtils.toUtf8("\n"))); + } } SequenceInputStream inputDataStream = new SequenceInputStream(Collections.enumeration(inputStreams)); File segmentDir = tempFolder.newFolder(); diff --git a/processing/src/test/java/org/apache/druid/query/scan/NestedDataScanQueryTest.java b/processing/src/test/java/org/apache/druid/query/scan/NestedDataScanQueryTest.java index 52b466422ccb..461d2b843631 100644 --- a/processing/src/test/java/org/apache/druid/query/scan/NestedDataScanQueryTest.java +++ b/processing/src/test/java/org/apache/druid/query/scan/NestedDataScanQueryTest.java @@ -336,8 +336,8 @@ public void testIngestWithMoreMergesAndScanSegments() throws Exception closer, Granularities.HOUR, false, - 100, - 100, + 5, + 10, 1 ); final Sequence seq = helper.runQueryOnSegmentsObjs(segs, scanQuery); @@ -345,7 +345,7 @@ public void testIngestWithMoreMergesAndScanSegments() throws Exception List results = seq.toList(); logResults(results); Assert.assertEquals(1, results.size()); - Assert.assertEquals(100, ((List) results.get(0).getEvents()).size()); + Assert.assertEquals(80, ((List) results.get(0).getEvents()).size()); } @Test From 35cb25b9475a12d61b79397ead78eaa46a1a4276 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Fri, 27 Jan 2023 04:24:35 -0800 Subject: [PATCH 5/7] more fixes and tests --- ...balDictionaryEncodedFieldColumnWriter.java | 12 +++-- ...edFieldLiteralDictionaryEncodedColumn.java | 3 +- .../nested/StringFieldColumnWriter.java | 3 +- .../druid/query/NestedDataTestUtils.java | 3 ++ .../query/scan/NestedDataScanQueryTest.java | 50 +++++++++++++++++++ .../resources/types-test-data-parser.json | 16 ++++++ .../src/test/resources/types-test-data.json | 8 +++ 7 files changed, 90 insertions(+), 5 deletions(-) create mode 100644 processing/src/test/resources/types-test-data-parser.json create mode 100644 processing/src/test/resources/types-test-data.json diff --git a/processing/src/main/java/org/apache/druid/segment/nested/GlobalDictionaryEncodedFieldColumnWriter.java b/processing/src/main/java/org/apache/druid/segment/nested/GlobalDictionaryEncodedFieldColumnWriter.java index 8c8693d3ec45..381589271b76 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/GlobalDictionaryEncodedFieldColumnWriter.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/GlobalDictionaryEncodedFieldColumnWriter.java @@ -134,9 +134,15 @@ public void addValue(int row, Object val) throws IOException fillNull(row); } final T value = processValue(val); - final int globalId = lookupGlobalId(value); - Preconditions.checkArgument(globalId >= 0, "Value [%s] is not present in global dictionary", value); - final int localId = localDictionary.add(globalId); + final int localId; + // null is always 0 + if (value == null) { + localId = localDictionary.add(0); + } else { + final int globalId = lookupGlobalId(value); + Preconditions.checkArgument(globalId >= 0, "Value [%s] is not present in global dictionary", value); + localId = localDictionary.add(globalId); + } intermediateValueWriter.write(localId); writeValue(value); cursorPosition++; diff --git a/processing/src/main/java/org/apache/druid/segment/nested/NestedFieldLiteralDictionaryEncodedColumn.java b/processing/src/main/java/org/apache/druid/segment/nested/NestedFieldLiteralDictionaryEncodedColumn.java index 6f794b61dbf6..be942a9dfd08 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/NestedFieldLiteralDictionaryEncodedColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/NestedFieldLiteralDictionaryEncodedColumn.java @@ -147,9 +147,10 @@ public String lookupName(int id) return StringUtils.fromUtf8Nullable(globalDictionary.get(globalId)); } else if (globalId < globalDictionary.size() + globalLongDictionary.size()) { return String.valueOf(globalLongDictionary.get(globalId - adjustLongId)); - } else { + } else if (globalId < globalDictionary.size() + globalLongDictionary.size() + globalDoubleDictionary.size()) { return String.valueOf(globalDoubleDictionary.get(globalId - adjustDoubleId)); } + return null; } @Override diff --git a/processing/src/main/java/org/apache/druid/segment/nested/StringFieldColumnWriter.java b/processing/src/main/java/org/apache/druid/segment/nested/StringFieldColumnWriter.java index 87078333e442..c9f10a269c01 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/StringFieldColumnWriter.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/StringFieldColumnWriter.java @@ -19,6 +19,7 @@ package org.apache.druid.segment.nested; +import org.apache.druid.common.config.NullHandling; import org.apache.druid.java.util.common.io.smoosh.FileSmoosher; import org.apache.druid.segment.IndexSpec; import org.apache.druid.segment.writeout.SegmentWriteOutMedium; @@ -48,7 +49,7 @@ String processValue(Object value) if (value == null) { return null; } - return String.valueOf(value); + return NullHandling.emptyToNullIfNeeded(String.valueOf(value)); } @Override diff --git a/processing/src/test/java/org/apache/druid/query/NestedDataTestUtils.java b/processing/src/test/java/org/apache/druid/query/NestedDataTestUtils.java index c04d44c29546..71aa324d5b4d 100644 --- a/processing/src/test/java/org/apache/druid/query/NestedDataTestUtils.java +++ b/processing/src/test/java/org/apache/druid/query/NestedDataTestUtils.java @@ -63,6 +63,9 @@ public class NestedDataTestUtils public static final String SIMPLE_PARSER_TSV_TRANSFORM_FILE = "simple-nested-test-data-tsv-transform.json"; public static final String SIMPLE_AGG_FILE = "simple-nested-test-data-aggs.json"; + public static final String TYPES_DATA_FILE = "types-test-data.json"; + public static final String TYPES_PARSER_FILE = "types-test-data-parser.json"; + public static final String NUMERIC_DATA_FILE = "numeric-nested-test-data.json"; public static final String NUMERIC_PARSER_FILE = "numeric-nested-test-data-parser.json"; diff --git a/processing/src/test/java/org/apache/druid/query/scan/NestedDataScanQueryTest.java b/processing/src/test/java/org/apache/druid/query/scan/NestedDataScanQueryTest.java index 461d2b843631..9ac5a70b72f3 100644 --- a/processing/src/test/java/org/apache/druid/query/scan/NestedDataScanQueryTest.java +++ b/processing/src/test/java/org/apache/druid/query/scan/NestedDataScanQueryTest.java @@ -452,6 +452,56 @@ public void testIngestAndScanSegmentsAndRangeFilter() throws Exception Assert.assertEquals(4, ((List) results.get(0).getEvents()).size()); } + @Test + public void testIngestAndScanSegmentsRealtimeSchemaDiscovery() throws Exception + { + Query scanQuery = Druids.newScanQueryBuilder() + .dataSource("test_datasource") + .intervals( + new MultipleIntervalSegmentSpec( + Collections.singletonList(Intervals.ETERNITY) + ) + ) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .limit(100) + .context(ImmutableMap.of()) + .build(); + List realtimeSegs = ImmutableList.of( + NestedDataTestUtils.createIncrementalIndex( + NestedDataTestUtils.TYPES_DATA_FILE, + NestedDataTestUtils.TYPES_PARSER_FILE, + NestedDataTestUtils.SIMPLE_AGG_FILE, + Granularities.DAY, + true, + false, + 1000 + ) + ); + List segs = NestedDataTestUtils.createSegments( + helper, + tempFolder, + closer, + NestedDataTestUtils.TYPES_DATA_FILE, + NestedDataTestUtils.TYPES_PARSER_FILE, + NestedDataTestUtils.SIMPLE_AGG_FILE, + Granularities.DAY, + true, + 100 + ); + + + final Sequence seq = helper.runQueryOnSegmentsObjs(realtimeSegs, scanQuery); + final Sequence seq2 = helper.runQueryOnSegmentsObjs(segs, scanQuery); + + List resultsRealtime = seq.toList(); + List resultsSegments = seq2.toList(); + logResults(resultsSegments); + logResults(resultsRealtime); + Assert.assertEquals(1, resultsRealtime.size()); + Assert.assertEquals(resultsRealtime.size(), resultsSegments.size()); + Assert.assertEquals(resultsSegments.get(0).getEvents().toString(), resultsRealtime.get(0).getEvents().toString()); + } + private static void logResults(List results) { StringBuilder bob = new StringBuilder(); diff --git a/processing/src/test/resources/types-test-data-parser.json b/processing/src/test/resources/types-test-data-parser.json new file mode 100644 index 000000000000..c148d6ef2d92 --- /dev/null +++ b/processing/src/test/resources/types-test-data-parser.json @@ -0,0 +1,16 @@ +{ + "type": "string", + "parseSpec": { + "format": "json", + "timestampSpec": { + "column": "timestamp", + "format": "auto" + }, + "dimensionsSpec": { + "dimensions": [], + "dimensionExclusions": [], + "spatialDimensions": [], + "useNestedColumnIndexerForSchemaDiscovery": true + } + } +} diff --git a/processing/src/test/resources/types-test-data.json b/processing/src/test/resources/types-test-data.json new file mode 100644 index 000000000000..b4f5aa07fc6e --- /dev/null +++ b/processing/src/test/resources/types-test-data.json @@ -0,0 +1,8 @@ +{"timestamp": "2021-01-01", "str":"a", "long":1, "double":1.0, "variant": 1} +{"timestamp": "2021-01-01", "str":"", "long":2, "variant": "b"} +{"timestamp": "2021-01-01", "str":"null", "long":3, "double":2.0, "variant": 3.0} +{"timestamp": "2021-01-01", "str":"b", "long":4, "double":3.3, "variant": "4"} +{"timestamp": "2021-01-01", "str":"c", "long": null, "double":4.4, "variant": "hello"} +{"timestamp": "2021-01-01", "str":"d", "long":5, "double":5.9} +{"timestamp": "2021-01-01", "str":null, "double":null, "variant": 51} +{"timestamp": "2021-01-01", "long":6, "double":1.0, "variant": null} \ No newline at end of file From 7ffe7c0f46514da810c8b41f4e2ee864fa1891f1 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Fri, 27 Jan 2023 15:57:31 -0800 Subject: [PATCH 6/7] close the streams --- .../java/org/apache/druid/query/NestedDataTestUtils.java | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/processing/src/test/java/org/apache/druid/query/NestedDataTestUtils.java b/processing/src/test/java/org/apache/druid/query/NestedDataTestUtils.java index 71aa324d5b4d..22da08d17911 100644 --- a/processing/src/test/java/org/apache/druid/query/NestedDataTestUtils.java +++ b/processing/src/test/java/org/apache/druid/query/NestedDataTestUtils.java @@ -154,7 +154,7 @@ public static List createSegments( { File segmentDir = tempFolder.newFolder(); File inputFile = readFileFromClasspath(inputFileName); - FileInputStream inputDataStream = new FileInputStream(inputFile); + FileInputStream inputDataStream = closer.register(new FileInputStream(inputFile)); String parserJson = readFileFromClasspathAsString(parserJsonFileName); String aggJson = readFileFromClasspathAsString(aggJsonFileName); @@ -199,7 +199,7 @@ public static List createSegments( { File segmentDir = tempFolder.newFolder(); File inputFile = readFileFromClasspath(inputFileName); - FileInputStream inputDataStream = new FileInputStream(inputFile); + FileInputStream inputDataStream = closer.register(new FileInputStream(inputFile)); String parserJson = readFileFromClasspathAsString(parserJsonFileName); String transformSpecJson = readFileFromClasspathAsString(transformSpecJsonFileName); String aggJson = readFileFromClasspathAsString(aggJsonFileName); @@ -284,7 +284,7 @@ public static List createSegmentsWithConcatenatedInput( for (int i = 0; i < numSegments; i++) { List inputStreams = Lists.newArrayListWithCapacity(numCopies); for (int j = 0; j < numCopies; j++) { - inputStreams.add(new FileInputStream(readFileFromClasspath(inputFileName))); + inputStreams.add(closer.register(new FileInputStream(readFileFromClasspath(inputFileName)))); if (j + 1 < numCopies) { inputStreams.add(new ByteArrayInputStream(StringUtils.toUtf8("\n"))); } @@ -355,6 +355,7 @@ public static Segment createIncrementalIndex( maxRowCount, rollup ); + inputDataStream.close(); return new IncrementalIndexSegment(index, SegmentId.dummy("test_datasource")); } From f80089018d1b26efdc932b8220492bce357b2883 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Fri, 27 Jan 2023 18:29:22 -0800 Subject: [PATCH 7/7] how about this instead --- .../java/org/apache/druid/query/NestedDataTestUtils.java | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/processing/src/test/java/org/apache/druid/query/NestedDataTestUtils.java b/processing/src/test/java/org/apache/druid/query/NestedDataTestUtils.java index 22da08d17911..d75f92e7f1a6 100644 --- a/processing/src/test/java/org/apache/druid/query/NestedDataTestUtils.java +++ b/processing/src/test/java/org/apache/druid/query/NestedDataTestUtils.java @@ -154,7 +154,7 @@ public static List createSegments( { File segmentDir = tempFolder.newFolder(); File inputFile = readFileFromClasspath(inputFileName); - FileInputStream inputDataStream = closer.register(new FileInputStream(inputFile)); + FileInputStream inputDataStream = new FileInputStream(inputFile); String parserJson = readFileFromClasspathAsString(parserJsonFileName); String aggJson = readFileFromClasspathAsString(aggJsonFileName); @@ -168,6 +168,7 @@ public static List createSegments( maxRowCount, rollup ); + inputDataStream.close(); final List segments = Lists.transform( ImmutableList.of(segmentDir), @@ -199,7 +200,7 @@ public static List createSegments( { File segmentDir = tempFolder.newFolder(); File inputFile = readFileFromClasspath(inputFileName); - FileInputStream inputDataStream = closer.register(new FileInputStream(inputFile)); + FileInputStream inputDataStream = new FileInputStream(inputFile); String parserJson = readFileFromClasspathAsString(parserJsonFileName); String transformSpecJson = readFileFromClasspathAsString(transformSpecJsonFileName); String aggJson = readFileFromClasspathAsString(aggJsonFileName); @@ -215,6 +216,7 @@ public static List createSegments( maxRowCount, rollup ); + inputDataStream.close(); final List segments = Lists.transform( ImmutableList.of(segmentDir), @@ -284,7 +286,7 @@ public static List createSegmentsWithConcatenatedInput( for (int i = 0; i < numSegments; i++) { List inputStreams = Lists.newArrayListWithCapacity(numCopies); for (int j = 0; j < numCopies; j++) { - inputStreams.add(closer.register(new FileInputStream(readFileFromClasspath(inputFileName)))); + inputStreams.add(new FileInputStream(readFileFromClasspath(inputFileName))); if (j + 1 < numCopies) { inputStreams.add(new ByteArrayInputStream(StringUtils.toUtf8("\n"))); } @@ -302,6 +304,7 @@ public static List createSegmentsWithConcatenatedInput( maxRowCount, rollup ); + inputDataStream.close(); segmentDirs.add(segmentDir); }