From f7293c7a1daf196940791506765ba0d0af4fd371 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Tue, 11 Apr 2023 13:58:59 -0700 Subject: [PATCH 1/2] fix NPE that can happen when merging all null nested v4 format columns --- .../segment/nested/CompressedNestedDataComplexColumn.java | 5 ++++- .../java/org/apache/druid/query/NestedDataTestUtils.java | 3 ++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/nested/CompressedNestedDataComplexColumn.java b/processing/src/main/java/org/apache/druid/segment/nested/CompressedNestedDataComplexColumn.java index d71f3fa9fc30..fb051f74b715 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/CompressedNestedDataComplexColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/CompressedNestedDataComplexColumn.java @@ -134,7 +134,7 @@ public CompressedNestedDataComplexColumn( Supplier stringDictionary, Supplier> longDictionarySupplier, Supplier> doubleDictionarySupplier, - Supplier arrayDictionarySupplier, + @Nullable Supplier arrayDictionarySupplier, SmooshedFileMapper fileMapper, BitmapSerdeFactory bitmapSerdeFactory, ByteOrder byteOrder, @@ -220,6 +220,9 @@ public Indexed getDoubleDictionary() @Override public Indexed getArrayDictionary() { + if (arrayDictionarySupplier == null) { + return Indexed.empty(); + } Iterable arrays = () -> { final TStringDictionary stringDictionary = stringDictionarySupplier.get(); final FixedIndexed longDictionary = longDictionarySupplier.get(); diff --git a/processing/src/test/java/org/apache/druid/query/NestedDataTestUtils.java b/processing/src/test/java/org/apache/druid/query/NestedDataTestUtils.java index 9a43c77b9c21..f46567c827e2 100644 --- a/processing/src/test/java/org/apache/druid/query/NestedDataTestUtils.java +++ b/processing/src/test/java/org/apache/druid/query/NestedDataTestUtils.java @@ -100,7 +100,8 @@ public class NestedDataTestUtils new NestedDataDimensionSchema("nest_json"), new NestedDataDimensionSchema("nester_json"), new NestedDataDimensionSchema("variant_json"), - new NestedDataDimensionSchema("list_json") + new NestedDataDimensionSchema("list_json"), + new NestedDataDimensionSchema("nonexistent") ) ) .build(); From 0ce8280f4ba2001cbfe5d9da2dc5819d89f6c055 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Tue, 11 Apr 2023 14:04:50 -0700 Subject: [PATCH 2/2] more test --- .../druid/query/NestedDataTestUtils.java | 42 +++++++++++++++---- .../query/scan/NestedDataScanQueryTest.java | 29 +++++++++++++ 2 files changed, 63 insertions(+), 8 deletions(-) diff --git a/processing/src/test/java/org/apache/druid/query/NestedDataTestUtils.java b/processing/src/test/java/org/apache/druid/query/NestedDataTestUtils.java index f46567c827e2..a4801be561bb 100644 --- a/processing/src/test/java/org/apache/druid/query/NestedDataTestUtils.java +++ b/processing/src/test/java/org/apache/druid/query/NestedDataTestUtils.java @@ -40,6 +40,7 @@ import org.apache.druid.query.aggregation.AggregatorFactory; import org.apache.druid.query.aggregation.CountAggregatorFactory; import org.apache.druid.query.expression.TestExprMacroTable; +import org.apache.druid.segment.AutoTypeColumnSchema; import org.apache.druid.segment.IncrementalIndexSegment; import org.apache.druid.segment.IndexBuilder; import org.apache.druid.segment.IndexSpec; @@ -92,7 +93,7 @@ public class NestedDataTestUtils .useSchemaDiscovery(true) .build(); - public static final DimensionsSpec TSV_SCHEMA = + public static final DimensionsSpec TSV_V4_SCHEMA = DimensionsSpec.builder() .setDimensions( Arrays.asList( @@ -105,18 +106,26 @@ public class NestedDataTestUtils ) ) .build(); + + public static final DimensionsSpec TSV_SCHEMA = + DimensionsSpec.builder() + .setDimensions( + Arrays.asList( + new AutoTypeColumnSchema("dim"), + new AutoTypeColumnSchema("nest_json"), + new AutoTypeColumnSchema("nester_json"), + new AutoTypeColumnSchema("variant_json"), + new AutoTypeColumnSchema("list_json"), + new AutoTypeColumnSchema("nonexistent") + ) + ) + .build(); public static final InputRowSchema AUTO_SCHEMA = new InputRowSchema( TIMESTAMP_SPEC, AUTO_DISCOVERY, null ); - public static final InputRowSchema SIMPLE_DATA_TSV_SCHEMA = new InputRowSchema( - TIMESTAMP_SPEC, - TSV_SCHEMA, - null - ); - public static DelimitedInputFormat SIMPLE_DATA_TSV_INPUT_FORMAT = new DelimitedInputFormat( Arrays.asList( "timestamp", @@ -162,6 +171,22 @@ public static List createSimpleSegmentsTsv( tempFolder, closer, Granularities.NONE, + TSV_SCHEMA, + true + ); + } + + public static List createSimpleSegmentsTsvV4( + TemporaryFolder tempFolder, + Closer closer + ) + throws Exception + { + return createSimpleNestedTestDataTsvSegments( + tempFolder, + closer, + Granularities.NONE, + TSV_V4_SCHEMA, true ); } @@ -170,6 +195,7 @@ public static List createSimpleNestedTestDataTsvSegments( TemporaryFolder tempFolder, Closer closer, Granularity granularity, + DimensionsSpec dimensionsSpec, boolean rollup ) throws Exception { @@ -179,7 +205,7 @@ public static List createSimpleNestedTestDataTsvSegments( SIMPLE_DATA_TSV_FILE, SIMPLE_DATA_TSV_INPUT_FORMAT, TIMESTAMP_SPEC, - SIMPLE_DATA_TSV_SCHEMA.getDimensionsSpec(), + dimensionsSpec, SIMPLE_DATA_TSV_TRANSFORM, COUNT, granularity, diff --git a/processing/src/test/java/org/apache/druid/query/scan/NestedDataScanQueryTest.java b/processing/src/test/java/org/apache/druid/query/scan/NestedDataScanQueryTest.java index adc379b38b12..ff469de7bcb2 100644 --- a/processing/src/test/java/org/apache/druid/query/scan/NestedDataScanQueryTest.java +++ b/processing/src/test/java/org/apache/druid/query/scan/NestedDataScanQueryTest.java @@ -262,6 +262,35 @@ public void testIngestAndScanSegmentsRealtimeWithFallback() throws Exception Assert.assertEquals(resultsSegments.get(0).getEvents().toString(), resultsRealtime.get(0).getEvents().toString()); } + @Test + public void testIngestAndScanSegmentsTsvV4() throws Exception + { + Query scanQuery = Druids.newScanQueryBuilder() + .dataSource("test_datasource") + .intervals( + new MultipleIntervalSegmentSpec( + Collections.singletonList(Intervals.ETERNITY) + ) + ) + .virtualColumns( + new NestedFieldVirtualColumn("nest", "$.x", "x"), + new NestedFieldVirtualColumn("nester", "$.x[0]", "x_0"), + new NestedFieldVirtualColumn("nester", "$.y.c[1]", "y_c_1") + ) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .limit(100) + .context(ImmutableMap.of()) + .build(); + List segs = NestedDataTestUtils.createSimpleSegmentsTsvV4(tempFolder, closer); + + final Sequence seq = helper.runQueryOnSegmentsObjs(segs, scanQuery); + + List results = seq.toList(); + Assert.assertEquals(1, results.size()); + Assert.assertEquals(8, ((List) results.get(0).getEvents()).size()); + logResults(results); + } + @Test public void testIngestAndScanSegmentsTsv() throws Exception {