From 5202e2c7dfdf9d040696c9292f4cf7e5c138fc4f Mon Sep 17 00:00:00 2001 From: Junjie Gu Date: Thu, 14 Nov 2019 14:34:24 -0800 Subject: [PATCH 1/6] sampler returns nulls in CSV --- .../overlord/sampler/FirehoseSampler.java | 4 +- .../overlord/sampler/FirehoseSamplerTest.java | 99 +++++++++++++++---- 2 files changed, 82 insertions(+), 21 deletions(-) diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/sampler/FirehoseSampler.java b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/sampler/FirehoseSampler.java index 133b92b2b15e..074f87167725 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/sampler/FirehoseSampler.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/sampler/FirehoseSampler.java @@ -256,9 +256,7 @@ public SamplerResponse sample(FirehoseFactory firehoseFactory, DataSchema dataSc Map parsed = new HashMap<>(); columnNames.forEach(k -> { - if (row.getRaw(k) != null) { - parsed.put(k, row.getRaw(k)); - } + parsed.put(k, row.getRaw(k) == null ? "" : row.getRaw(k)); }); parsed.put(ColumnHolder.TIME_COLUMN_NAME, row.getTimestampFromEpoch()); diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/sampler/FirehoseSamplerTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/sampler/FirehoseSamplerTest.java index 06ab5dc1b58e..42d3a4fe0394 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/sampler/FirehoseSamplerTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/sampler/FirehoseSamplerTest.java @@ -210,6 +210,69 @@ public void testNoDataSchemaNumRowsCacheReplay() Assert.assertEquals(data, response.getData()); } + @Test + public void testCSVColumnAllNull() + { + final List str_csv_rows = ImmutableList.of( + "FirstName,LastName,Number,Gender", + "J,G,,Male", + "Kobe,Bryant,,Male", + "Lisa, Krystal,,Female", + "Michael,Jackson,,Male" + ); + FirehoseFactory firehoseFactory = getFirehoseFactory(str_csv_rows); + + ParseSpec parseSpec = new DelimitedParseSpec( + new TimestampSpec(null, null, DateTimes.of("1970")), + new DimensionsSpec(null), + ",", + null, + null, + true, + 0 + ); + + DataSchema dataSchema = new DataSchema("sampler", OBJECT_MAPPER.convertValue( + new StringInputRowParser(parseSpec, StandardCharsets.UTF_8.name()), + new TypeReference>() + { + } + ), null, null, null, OBJECT_MAPPER); + + SamplerResponse response = firehoseSampler.sample(firehoseFactory, dataSchema, null); + + Assert.assertEquals(4, (int) response.getNumRowsRead()); + Assert.assertEquals(4, (int) response.getNumRowsIndexed()); + Assert.assertEquals(4, response.getData().size()); + + List data = removeEmptyColumns(response.getData()); + + Assert.assertEquals(new SamplerResponseRow( + str_csv_rows.get(1).toString(), + ImmutableMap.of("__time", 0L, "Number", "", "FirstName", "J", "LastName", "G", "Gender", "Male"), + null, + null + ), data.get(0)); + Assert.assertEquals(new SamplerResponseRow( + str_csv_rows.get(2).toString(), + ImmutableMap.of("__time", 0L, "Number", "", "FirstName", "Kobe", "LastName", "Bryant", "Gender", "Male"), + null, + null + ), data.get(1)); + Assert.assertEquals(new SamplerResponseRow( + str_csv_rows.get(3).toString(), + ImmutableMap.of("__time", 0L, "Number", "", "FirstName", "Lisa", "LastName", " Krystal", "Gender", "Female"), + null, + null + ), data.get(2)); + Assert.assertEquals(new SamplerResponseRow( + str_csv_rows.get(4).toString(), + ImmutableMap.of("__time", 0L, "Number", "", "FirstName", "Michael", "LastName", "Jackson", "Gender", "Male"), + null, + null + ), data.get(3)); + } + @Test public void testMissingValueTimestampSpec() { @@ -228,25 +291,25 @@ public void testMissingValueTimestampSpec() Assert.assertEquals(new SamplerResponseRow( getTestRows().get(0).toString(), - ImmutableMap.of("__time", 0L, "t", "2019-04-22T12:00", "dim1", "foo", "met1", "1"), + ImmutableMap.of("__time", 0L, "t", "2019-04-22T12:00", "dim2", "", "dim1", "foo", "met1", "1"), null, null ), data.get(0)); Assert.assertEquals(new SamplerResponseRow( getTestRows().get(1).toString(), - ImmutableMap.of("__time", 0L, "t", "2019-04-22T12:00", "dim1", "foo", "met1", "2"), + ImmutableMap.of("__time", 0L, "t", "2019-04-22T12:00", "dim2", "", "dim1", "foo", "met1", "2"), null, null ), data.get(1)); Assert.assertEquals(new SamplerResponseRow( getTestRows().get(2).toString(), - ImmutableMap.of("__time", 0L, "t", "2019-04-22T12:01", "dim1", "foo", "met1", "3"), + ImmutableMap.of("__time", 0L, "t", "2019-04-22T12:01", "dim2", "", "dim1", "foo", "met1", "3"), null, null ), data.get(2)); Assert.assertEquals(new SamplerResponseRow( getTestRows().get(3).toString(), - ImmutableMap.of("__time", 0L, "t", "2019-04-22T12:00", "dim1", "foo2", "met1", "4"), + ImmutableMap.of("__time", 0L, "t", "2019-04-22T12:00", "dim2", "", "dim1", "foo2", "met1", "4"), null, null ), data.get(3)); @@ -258,7 +321,7 @@ public void testMissingValueTimestampSpec() ), data.get(4)); Assert.assertEquals(new SamplerResponseRow( getTestRows().get(5).toString(), - ImmutableMap.of("__time", 0L, "t", "bad_timestamp", "dim1", "foo", "met1", "6"), + ImmutableMap.of("__time", 0L, "t", "bad_timestamp", "dim2", "", "dim1", "foo", "met1", "6"), null, null ), data.get(5)); @@ -282,25 +345,25 @@ public void testWithTimestampSpec() Assert.assertEquals(new SamplerResponseRow( getTestRows().get(0).toString(), - ImmutableMap.of("__time", 1555934400000L, "dim1", "foo", "met1", "1"), + ImmutableMap.of("__time", 1555934400000L, "dim2", "", "dim1", "foo", "met1", "1"), null, null ), data.get(0)); Assert.assertEquals(new SamplerResponseRow( getTestRows().get(1).toString(), - ImmutableMap.of("__time", 1555934400000L, "dim1", "foo", "met1", "2"), + ImmutableMap.of("__time", 1555934400000L, "dim2", "", "dim1", "foo", "met1", "2"), null, null ), data.get(1)); Assert.assertEquals(new SamplerResponseRow( getTestRows().get(2).toString(), - ImmutableMap.of("__time", 1555934460000L, "dim1", "foo", "met1", "3"), + ImmutableMap.of("__time", 1555934460000L, "dim2", "", "dim1", "foo", "met1", "3"), null, null ), data.get(2)); Assert.assertEquals(new SamplerResponseRow( getTestRows().get(3).toString(), - ImmutableMap.of("__time", 1555934400000L, "dim1", "foo2", "met1", "4"), + ImmutableMap.of("__time", 1555934400000L, "dim2", "", "dim1", "foo2", "met1", "4"), null, null ), data.get(3)); @@ -405,25 +468,25 @@ public void testWithNoRollup() Assert.assertEquals(new SamplerResponseRow( getTestRows().get(0).toString(), - ImmutableMap.of("__time", 1555934400000L, "dim1", "foo", "met1", 1L), + ImmutableMap.of("__time", 1555934400000L, "dim2", "", "dim1", "foo", "met1", 1L), null, null ), data.get(0)); Assert.assertEquals(new SamplerResponseRow( getTestRows().get(1).toString(), - ImmutableMap.of("__time", 1555934400000L, "dim1", "foo", "met1", 2L), + ImmutableMap.of("__time", 1555934400000L, "dim2", "", "dim1", "foo", "met1", 2L), null, null ), data.get(1)); Assert.assertEquals(new SamplerResponseRow( getTestRows().get(2).toString(), - ImmutableMap.of("__time", 1555934400000L, "dim1", "foo", "met1", 3L), + ImmutableMap.of("__time", 1555934400000L, "dim2", "", "dim1", "foo", "met1", 3L), null, null ), data.get(2)); Assert.assertEquals(new SamplerResponseRow( getTestRows().get(3).toString(), - ImmutableMap.of("__time", 1555934400000L, "dim1", "foo2", "met1", 4L), + ImmutableMap.of("__time", 1555934400000L, "dim2", "", "dim1", "foo2", "met1", 4L), null, null ), data.get(3)); @@ -468,13 +531,13 @@ public void testWithRollup() Assert.assertEquals(new SamplerResponseRow( getTestRows().get(0).toString(), - ImmutableMap.of("__time", 1555934400000L, "dim1", "foo", "met1", 6L), + ImmutableMap.of("__time", 1555934400000L, "dim2", "", "dim1", "foo", "met1", 6L), null, null ), data.get(0)); Assert.assertEquals(new SamplerResponseRow( getTestRows().get(3).toString(), - ImmutableMap.of("__time", 1555934400000L, "dim1", "foo2", "met1", 4L), + ImmutableMap.of("__time", 1555934400000L, "dim2", "", "dim1", "foo2", "met1", 4L), null, null ), data.get(1)); @@ -628,13 +691,13 @@ public void testWithTransformsAutoDimensions() Assert.assertEquals(new SamplerResponseRow( getTestRows().get(0).toString(), - ImmutableMap.of("__time", 1555934400000L, "dim1", "foo", "met1", 6L), + ImmutableMap.of("__time", 1555934400000L, "dim2", "", "dim1", "foo", "met1", 6L), null, null ), data.get(0)); Assert.assertEquals(new SamplerResponseRow( getTestRows().get(3).toString(), - ImmutableMap.of("__time", 1555934400000L, "dim1", "foo2", "met1", 4L), + ImmutableMap.of("__time", 1555934400000L, "dim2", "", "dim1", "foo2", "met1", 4L), null, null ), data.get(1)); @@ -742,7 +805,7 @@ public void testWithFilter() Assert.assertEquals(new SamplerResponseRow( getTestRows().get(0).toString(), - ImmutableMap.of("__time", 1555934400000L, "dim1", "foo", "met1", 6L), + ImmutableMap.of("__time", 1555934400000L, "dim2", "", "dim1", "foo", "met1", 6L), null, null ), data.get(0)); From 541e3c92b5937289661fc1f4b37a5bfae8b72d8d Mon Sep 17 00:00:00 2001 From: Junjie Gu Date: Thu, 14 Nov 2019 16:50:09 -0800 Subject: [PATCH 2/6] fixed kafka sampler test --- .../org/apache/druid/indexing/kafka/KafkaSamplerSpecTest.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/extensions-core/kafka-indexing-service/src/test/java/org/apache/druid/indexing/kafka/KafkaSamplerSpecTest.java b/extensions-core/kafka-indexing-service/src/test/java/org/apache/druid/indexing/kafka/KafkaSamplerSpecTest.java index 5ede763b8d62..1dc2f2a0427b 100644 --- a/extensions-core/kafka-indexing-service/src/test/java/org/apache/druid/indexing/kafka/KafkaSamplerSpecTest.java +++ b/extensions-core/kafka-indexing-service/src/test/java/org/apache/druid/indexing/kafka/KafkaSamplerSpecTest.java @@ -186,6 +186,7 @@ public void testSample() ImmutableMap.builder() .put("__time", 1199145600000L) .put("dim1", "a") + .put("dim1t", "") .put("dim2", "y") .put("dimLong", 10L) .put("dimFloat", 20.0F) @@ -201,6 +202,7 @@ public void testSample() .put("__time", 1230768000000L) .put("dim1", "b") .put("dim2", "y") + .put("dim1t", "") .put("dimLong", 10L) .put("dimFloat", 20.0F) .put("rows", 1L) @@ -215,6 +217,7 @@ public void testSample() .put("__time", 1262304000000L) .put("dim1", "c") .put("dim2", "y") + .put("dim1t", "") .put("dimLong", 10L) .put("dimFloat", 20.0F) .put("rows", 1L) From ed88b3bee83bfe7b0ab0bd36ea3dd26c3391e691 Mon Sep 17 00:00:00 2001 From: Junjie Gu Date: Thu, 14 Nov 2019 17:32:20 -0800 Subject: [PATCH 3/6] fix Kinesis test --- .../apache/druid/indexing/kinesis/KinesisSamplerSpecTest.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/extensions-core/kinesis-indexing-service/src/test/java/org/apache/druid/indexing/kinesis/KinesisSamplerSpecTest.java b/extensions-core/kinesis-indexing-service/src/test/java/org/apache/druid/indexing/kinesis/KinesisSamplerSpecTest.java index 3cfcd97208ea..27f8225eb361 100644 --- a/extensions-core/kinesis-indexing-service/src/test/java/org/apache/druid/indexing/kinesis/KinesisSamplerSpecTest.java +++ b/extensions-core/kinesis-indexing-service/src/test/java/org/apache/druid/indexing/kinesis/KinesisSamplerSpecTest.java @@ -205,6 +205,7 @@ public void testSample() throws Exception .put("__time", 1199145600000L) .put("dim1", "a") .put("dim2", "y") + .put("dim1t", "") .put("dimLong", 10L) .put("dimFloat", 20.0F) .put("rows", 1L) @@ -219,6 +220,7 @@ public void testSample() throws Exception .put("__time", 1230768000000L) .put("dim1", "b") .put("dim2", "y") + .put("dim1t", "") .put("dimLong", 10L) .put("dimFloat", 20.0F) .put("rows", 1L) @@ -233,6 +235,7 @@ public void testSample() throws Exception .put("__time", 1262304000000L) .put("dim1", "c") .put("dim2", "y") + .put("dim1t", "") .put("dimLong", 10L) .put("dimFloat", 20.0F) .put("rows", 1L) From aa9374089be0ddb427e5bc811aecc7b96c947224 Mon Sep 17 00:00:00 2001 From: Junjie Gu Date: Thu, 14 Nov 2019 19:58:19 -0800 Subject: [PATCH 4/6] sql compatibility fix --- .../druid/indexing/overlord/sampler/FirehoseSamplerTest.java | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/sampler/FirehoseSamplerTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/sampler/FirehoseSamplerTest.java index 42d3a4fe0394..fc17247b56fd 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/sampler/FirehoseSamplerTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/sampler/FirehoseSamplerTest.java @@ -885,9 +885,7 @@ private String getUnparseableTimestampString() private List removeEmptyColumns(List rows) { - return USE_DEFAULT_VALUE_FOR_NULL - ? rows - : rows.stream().map(x -> x.withParsed(removeEmptyValues(x.getParsed()))).collect(Collectors.toList()); + return rows; } @Nullable From e55d8040453b10f2f09201c7777ddbeeff9b6a66 Mon Sep 17 00:00:00 2001 From: Junjie Gu Date: Fri, 15 Nov 2019 17:53:07 -0800 Subject: [PATCH 5/6] remove null to empty string conversion, use null --- .../indexing/kafka/KafkaSamplerSpecTest.java | 70 ++-- .../kinesis/KinesisSamplerSpecTest.java | 70 ++-- .../overlord/sampler/FirehoseSampler.java | 2 +- .../overlord/sampler/FirehoseSamplerTest.java | 390 ++++++++++++++++-- 4 files changed, 432 insertions(+), 100 deletions(-) diff --git a/extensions-core/kafka-indexing-service/src/test/java/org/apache/druid/indexing/kafka/KafkaSamplerSpecTest.java b/extensions-core/kafka-indexing-service/src/test/java/org/apache/druid/indexing/kafka/KafkaSamplerSpecTest.java index 1dc2f2a0427b..9d7b5f4245b4 100644 --- a/extensions-core/kafka-indexing-service/src/test/java/org/apache/druid/indexing/kafka/KafkaSamplerSpecTest.java +++ b/extensions-core/kafka-indexing-service/src/test/java/org/apache/druid/indexing/kafka/KafkaSamplerSpecTest.java @@ -56,6 +56,7 @@ import java.nio.charset.StandardCharsets; import java.util.Arrays; +import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; @@ -183,46 +184,55 @@ public void testSample() Assert.assertEquals(new SamplerResponse.SamplerResponseRow( "{\"timestamp\":\"2008\",\"dim1\":\"a\",\"dim2\":\"y\",\"dimLong\":\"10\",\"dimFloat\":\"20.0\",\"met1\":\"1.0\"}", - ImmutableMap.builder() - .put("__time", 1199145600000L) - .put("dim1", "a") - .put("dim1t", "") - .put("dim2", "y") - .put("dimLong", 10L) - .put("dimFloat", 20.0F) - .put("rows", 1L) - .put("met1sum", 1.0) - .build(), + new HashMap() + { + { + put("__time", 1199145600000L); + put("dim1", "a"); + put("dim1t", null); + put("dim2", "y"); + put("dimLong", 10L); + put("dimFloat", 20.0F); + put("rows", 1L); + put("met1sum", 1.0); + } + }, null, null ), it.next()); Assert.assertEquals(new SamplerResponse.SamplerResponseRow( "{\"timestamp\":\"2009\",\"dim1\":\"b\",\"dim2\":\"y\",\"dimLong\":\"10\",\"dimFloat\":\"20.0\",\"met1\":\"1.0\"}", - ImmutableMap.builder() - .put("__time", 1230768000000L) - .put("dim1", "b") - .put("dim2", "y") - .put("dim1t", "") - .put("dimLong", 10L) - .put("dimFloat", 20.0F) - .put("rows", 1L) - .put("met1sum", 1.0) - .build(), + new HashMap() + { + { + put("__time", 1230768000000L); + put("dim1", "b"); + put("dim1t", null); + put("dim2", "y"); + put("dimLong", 10L); + put("dimFloat", 20.0F); + put("rows", 1L); + put("met1sum", 1.0); + } + }, null, null ), it.next()); Assert.assertEquals(new SamplerResponse.SamplerResponseRow( "{\"timestamp\":\"2010\",\"dim1\":\"c\",\"dim2\":\"y\",\"dimLong\":\"10\",\"dimFloat\":\"20.0\",\"met1\":\"1.0\"}", - ImmutableMap.builder() - .put("__time", 1262304000000L) - .put("dim1", "c") - .put("dim2", "y") - .put("dim1t", "") - .put("dimLong", 10L) - .put("dimFloat", 20.0F) - .put("rows", 1L) - .put("met1sum", 1.0) - .build(), + new HashMap() + { + { + put("__time", 1262304000000L); + put("dim1", "c"); + put("dim1t", null); + put("dim2", "y"); + put("dimLong", 10L); + put("dimFloat", 20.0F); + put("rows", 1L); + put("met1sum", 1.0); + } + }, null, null ), it.next()); diff --git a/extensions-core/kinesis-indexing-service/src/test/java/org/apache/druid/indexing/kinesis/KinesisSamplerSpecTest.java b/extensions-core/kinesis-indexing-service/src/test/java/org/apache/druid/indexing/kinesis/KinesisSamplerSpecTest.java index 27f8225eb361..9d39b2f8df33 100644 --- a/extensions-core/kinesis-indexing-service/src/test/java/org/apache/druid/indexing/kinesis/KinesisSamplerSpecTest.java +++ b/extensions-core/kinesis-indexing-service/src/test/java/org/apache/druid/indexing/kinesis/KinesisSamplerSpecTest.java @@ -60,6 +60,7 @@ import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.Collections; +import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; @@ -201,46 +202,55 @@ public void testSample() throws Exception Assert.assertEquals(new SamplerResponse.SamplerResponseRow( "{\"timestamp\":\"2008\",\"dim1\":\"a\",\"dim2\":\"y\",\"dimLong\":\"10\",\"dimFloat\":\"20.0\",\"met1\":\"1.0\"}", - ImmutableMap.builder() - .put("__time", 1199145600000L) - .put("dim1", "a") - .put("dim2", "y") - .put("dim1t", "") - .put("dimLong", 10L) - .put("dimFloat", 20.0F) - .put("rows", 1L) - .put("met1sum", 1.0) - .build(), + new HashMap() + { + { + put("__time", 1199145600000L); + put("dim1", "a"); + put("dim1t", null); + put("dim2", "y"); + put("dimLong", 10L); + put("dimFloat", 20.0F); + put("rows", 1L); + put("met1sum", 1.0); + } + }, null, null ), it.next()); Assert.assertEquals(new SamplerResponse.SamplerResponseRow( "{\"timestamp\":\"2009\",\"dim1\":\"b\",\"dim2\":\"y\",\"dimLong\":\"10\",\"dimFloat\":\"20.0\",\"met1\":\"1.0\"}", - ImmutableMap.builder() - .put("__time", 1230768000000L) - .put("dim1", "b") - .put("dim2", "y") - .put("dim1t", "") - .put("dimLong", 10L) - .put("dimFloat", 20.0F) - .put("rows", 1L) - .put("met1sum", 1.0) - .build(), + new HashMap() + { + { + put("__time", 1230768000000L); + put("dim1", "b"); + put("dim1t", null); + put("dim2", "y"); + put("dimLong", 10L); + put("dimFloat", 20.0F); + put("rows", 1L); + put("met1sum", 1.0); + } + }, null, null ), it.next()); Assert.assertEquals(new SamplerResponse.SamplerResponseRow( "{\"timestamp\":\"2010\",\"dim1\":\"c\",\"dim2\":\"y\",\"dimLong\":\"10\",\"dimFloat\":\"20.0\",\"met1\":\"1.0\"}", - ImmutableMap.builder() - .put("__time", 1262304000000L) - .put("dim1", "c") - .put("dim2", "y") - .put("dim1t", "") - .put("dimLong", 10L) - .put("dimFloat", 20.0F) - .put("rows", 1L) - .put("met1sum", 1.0) - .build(), + new HashMap() + { + { + put("__time", 1262304000000L); + put("dim1", "c"); + put("dim1t", null); + put("dim2", "y"); + put("dimLong", 10L); + put("dimFloat", 20.0F); + put("rows", 1L); + put("met1sum", 1.0); + } + }, null, null ), it.next()); diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/sampler/FirehoseSampler.java b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/sampler/FirehoseSampler.java index 074f87167725..4deb74e3c34f 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/sampler/FirehoseSampler.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/sampler/FirehoseSampler.java @@ -256,7 +256,7 @@ public SamplerResponse sample(FirehoseFactory firehoseFactory, DataSchema dataSc Map parsed = new HashMap<>(); columnNames.forEach(k -> { - parsed.put(k, row.getRaw(k) == null ? "" : row.getRaw(k)); + parsed.put(k, row.getRaw(k)); }); parsed.put(ColumnHolder.TIME_COLUMN_NAME, row.getTimestampFromEpoch()); diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/sampler/FirehoseSamplerTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/sampler/FirehoseSamplerTest.java index fc17247b56fd..b995427de125 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/sampler/FirehoseSamplerTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/sampler/FirehoseSamplerTest.java @@ -59,6 +59,7 @@ import javax.annotation.Nullable; import java.nio.charset.StandardCharsets; +import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.stream.Collectors; @@ -249,25 +250,61 @@ public void testCSVColumnAllNull() Assert.assertEquals(new SamplerResponseRow( str_csv_rows.get(1).toString(), - ImmutableMap.of("__time", 0L, "Number", "", "FirstName", "J", "LastName", "G", "Gender", "Male"), + new HashMap() + { + { + put("__time", 0L); + put("Number", null); + put("FirstName", "J"); + put("LastName", "G"); + put("Gender", "Male"); + } + }, null, null ), data.get(0)); Assert.assertEquals(new SamplerResponseRow( str_csv_rows.get(2).toString(), - ImmutableMap.of("__time", 0L, "Number", "", "FirstName", "Kobe", "LastName", "Bryant", "Gender", "Male"), + new HashMap() + { + { + put("__time", 0L); + put("Number", null); + put("FirstName", "Kobe"); + put("LastName", "Bryant"); + put("Gender", "Male"); + } + }, null, null ), data.get(1)); Assert.assertEquals(new SamplerResponseRow( str_csv_rows.get(3).toString(), - ImmutableMap.of("__time", 0L, "Number", "", "FirstName", "Lisa", "LastName", " Krystal", "Gender", "Female"), + new HashMap() + { + { + put("__time", 0L); + put("Number", null); + put("FirstName", "Lisa"); + put("LastName", " Krystal"); + put("Gender", "Female"); + } + }, null, null ), data.get(2)); Assert.assertEquals(new SamplerResponseRow( str_csv_rows.get(4).toString(), - ImmutableMap.of("__time", 0L, "Number", "", "FirstName", "Michael", "LastName", "Jackson", "Gender", "Male"), + new HashMap() + { + { + put("__time", 0L); + put("Number", null); + put("FirstName", "Michael"); + put("LastName", "Jackson"); + put("Gender", "Male"); + } + }, null, null ), data.get(3)); @@ -291,37 +328,91 @@ public void testMissingValueTimestampSpec() Assert.assertEquals(new SamplerResponseRow( getTestRows().get(0).toString(), - ImmutableMap.of("__time", 0L, "t", "2019-04-22T12:00", "dim2", "", "dim1", "foo", "met1", "1"), + new HashMap() + { + { + put("__time", 0L); + put("t", "2019-04-22T12:00"); + put("dim2", null); + put("dim1", "foo"); + put("met1", "1"); + } + }, null, null ), data.get(0)); Assert.assertEquals(new SamplerResponseRow( getTestRows().get(1).toString(), - ImmutableMap.of("__time", 0L, "t", "2019-04-22T12:00", "dim2", "", "dim1", "foo", "met1", "2"), + new HashMap() + { + { + put("__time", 0L); + put("t", "2019-04-22T12:00"); + put("dim2", null); + put("dim1", "foo"); + put("met1", "2"); + } + }, null, null ), data.get(1)); Assert.assertEquals(new SamplerResponseRow( getTestRows().get(2).toString(), - ImmutableMap.of("__time", 0L, "t", "2019-04-22T12:01", "dim2", "", "dim1", "foo", "met1", "3"), + new HashMap() + { + { + put("__time", 0L); + put("t", "2019-04-22T12:01"); + put("dim2", null); + put("dim1", "foo"); + put("met1", "3"); + } + }, null, null ), data.get(2)); Assert.assertEquals(new SamplerResponseRow( getTestRows().get(3).toString(), - ImmutableMap.of("__time", 0L, "t", "2019-04-22T12:00", "dim2", "", "dim1", "foo2", "met1", "4"), + new HashMap() + { + { + put("__time", 0L); + put("t", "2019-04-22T12:00"); + put("dim2", null); + put("dim1", "foo2"); + put("met1", "4"); + } + }, null, null ), data.get(3)); Assert.assertEquals(new SamplerResponseRow( getTestRows().get(4).toString(), - ImmutableMap.of("__time", 0L, "t", "2019-04-22T12:00", "dim1", "foo", "dim2", "bar", "met1", "5"), + new HashMap() + { + { + put("__time", 0L); + put("t", "2019-04-22T12:00"); + put("dim2", "bar"); + put("dim1", "foo"); + put("met1", "5"); + } + }, null, null ), data.get(4)); Assert.assertEquals(new SamplerResponseRow( getTestRows().get(5).toString(), - ImmutableMap.of("__time", 0L, "t", "bad_timestamp", "dim2", "", "dim1", "foo", "met1", "6"), + new HashMap() + { + { + put("__time", 0L); + put("t", "bad_timestamp"); + put("dim2", null); + put("dim1", "foo"); + put("met1", "6"); + } + }, null, null ), data.get(5)); @@ -345,31 +436,71 @@ public void testWithTimestampSpec() Assert.assertEquals(new SamplerResponseRow( getTestRows().get(0).toString(), - ImmutableMap.of("__time", 1555934400000L, "dim2", "", "dim1", "foo", "met1", "1"), + new HashMap() + { + { + put("__time", 1555934400000L); + put("dim2", null); + put("dim1", "foo"); + put("met1", "1"); + } + }, null, null ), data.get(0)); Assert.assertEquals(new SamplerResponseRow( getTestRows().get(1).toString(), - ImmutableMap.of("__time", 1555934400000L, "dim2", "", "dim1", "foo", "met1", "2"), + new HashMap() + { + { + put("__time", 1555934400000L); + put("dim2", null); + put("dim1", "foo"); + put("met1", "2"); + } + }, null, null ), data.get(1)); Assert.assertEquals(new SamplerResponseRow( getTestRows().get(2).toString(), - ImmutableMap.of("__time", 1555934460000L, "dim2", "", "dim1", "foo", "met1", "3"), + new HashMap() + { + { + put("__time", 1555934460000L); + put("dim2", null); + put("dim1", "foo"); + put("met1", "3"); + } + }, null, null ), data.get(2)); Assert.assertEquals(new SamplerResponseRow( getTestRows().get(3).toString(), - ImmutableMap.of("__time", 1555934400000L, "dim2", "", "dim1", "foo2", "met1", "4"), + new HashMap() + { + { + put("__time", 1555934400000L); + put("dim2", null); + put("dim1", "foo2"); + put("met1", "4"); + } + }, null, null ), data.get(3)); Assert.assertEquals(new SamplerResponseRow( getTestRows().get(4).toString(), - ImmutableMap.of("__time", 1555934400000L, "dim1", "foo", "dim2", "bar", "met1", "5"), + new HashMap() + { + { + put("__time", 1555934400000L); + put("dim2", "bar"); + put("dim1", "foo"); + put("met1", "5"); + } + }, null, null ), data.get(4)); @@ -405,31 +536,66 @@ public void testWithDimensionSpec() Assert.assertEquals(new SamplerResponseRow( getTestRows().get(0).toString(), - ImmutableMap.of("__time", 1555934400000L, "dim1", "foo", "met1", "1"), + new HashMap() + { + { + put("__time", 1555934400000L); + put("dim1", "foo"); + put("met1", "1"); + } + }, null, null ), data.get(0)); Assert.assertEquals(new SamplerResponseRow( getTestRows().get(1).toString(), - ImmutableMap.of("__time", 1555934400000L, "dim1", "foo", "met1", "2"), + new HashMap() + { + { + put("__time", 1555934400000L); + put("dim1", "foo"); + put("met1", "2"); + } + }, null, null ), data.get(1)); Assert.assertEquals(new SamplerResponseRow( getTestRows().get(2).toString(), - ImmutableMap.of("__time", 1555934460000L, "dim1", "foo", "met1", "3"), + new HashMap() + { + { + put("__time", 1555934460000L); + put("dim1", "foo"); + put("met1", "3"); + } + }, null, null ), data.get(2)); Assert.assertEquals(new SamplerResponseRow( getTestRows().get(3).toString(), - ImmutableMap.of("__time", 1555934400000L, "dim1", "foo2", "met1", "4"), + new HashMap() + { + { + put("__time", 1555934400000L); + put("dim1", "foo2"); + put("met1", "4"); + } + }, null, null ), data.get(3)); Assert.assertEquals(new SamplerResponseRow( getTestRows().get(4).toString(), - ImmutableMap.of("__time", 1555934400000L, "dim1", "foo", "met1", "5"), + new HashMap() + { + { + put("__time", 1555934400000L); + put("dim1", "foo"); + put("met1", "5"); + } + }, null, null ), data.get(4)); @@ -468,31 +634,71 @@ public void testWithNoRollup() Assert.assertEquals(new SamplerResponseRow( getTestRows().get(0).toString(), - ImmutableMap.of("__time", 1555934400000L, "dim2", "", "dim1", "foo", "met1", 1L), + new HashMap() + { + { + put("__time", 1555934400000L); + put("dim2", null); + put("dim1", "foo"); + put("met1", 1L); + } + }, null, null ), data.get(0)); Assert.assertEquals(new SamplerResponseRow( getTestRows().get(1).toString(), - ImmutableMap.of("__time", 1555934400000L, "dim2", "", "dim1", "foo", "met1", 2L), + new HashMap() + { + { + put("__time", 1555934400000L); + put("dim2", null); + put("dim1", "foo"); + put("met1", 2L); + } + }, null, null ), data.get(1)); Assert.assertEquals(new SamplerResponseRow( getTestRows().get(2).toString(), - ImmutableMap.of("__time", 1555934400000L, "dim2", "", "dim1", "foo", "met1", 3L), + new HashMap() + { + { + put("__time", 1555934400000L); + put("dim2", null); + put("dim1", "foo"); + put("met1", 3L); + } + }, null, null ), data.get(2)); Assert.assertEquals(new SamplerResponseRow( getTestRows().get(3).toString(), - ImmutableMap.of("__time", 1555934400000L, "dim2", "", "dim1", "foo2", "met1", 4L), + new HashMap() + { + { + put("__time", 1555934400000L); + put("dim2", null); + put("dim1", "foo2"); + put("met1", 4L); + } + }, null, null ), data.get(3)); Assert.assertEquals(new SamplerResponseRow( getTestRows().get(4).toString(), - ImmutableMap.of("__time", 1555934400000L, "dim1", "foo", "dim2", "bar", "met1", 5L), + new HashMap() + { + { + put("__time", 1555934400000L); + put("dim2", "bar"); + put("dim1", "foo"); + put("met1", 5L); + } + }, null, null ), data.get(4)); @@ -531,19 +737,43 @@ public void testWithRollup() Assert.assertEquals(new SamplerResponseRow( getTestRows().get(0).toString(), - ImmutableMap.of("__time", 1555934400000L, "dim2", "", "dim1", "foo", "met1", 6L), + new HashMap() + { + { + put("__time", 1555934400000L); + put("dim2", null); + put("dim1", "foo"); + put("met1", 6L); + } + }, null, null ), data.get(0)); Assert.assertEquals(new SamplerResponseRow( getTestRows().get(3).toString(), - ImmutableMap.of("__time", 1555934400000L, "dim2", "", "dim1", "foo2", "met1", 4L), + new HashMap() + { + { + put("__time", 1555934400000L); + put("dim2", null); + put("dim1", "foo2"); + put("met1", 4L); + } + }, null, null ), data.get(1)); Assert.assertEquals(new SamplerResponseRow( getTestRows().get(4).toString(), - ImmutableMap.of("__time", 1555934400000L, "dim1", "foo", "dim2", "bar", "met1", 5L), + new HashMap() + { + { + put("__time", 1555934400000L); + put("dim2", "bar"); + put("dim1", "foo"); + put("met1", 5L); + } + }, null, null ), data.get(2)); @@ -585,13 +815,27 @@ public void testWithMoreRollup() Assert.assertEquals(new SamplerResponseRow( getTestRows().get(0).toString(), - ImmutableMap.of("__time", 1555934400000L, "dim1", "foo", "met1", 11L), + new HashMap() + { + { + put("__time", 1555934400000L); + put("dim1", "foo"); + put("met1", 11L); + } + }, null, null ), data.get(0)); Assert.assertEquals(new SamplerResponseRow( getTestRows().get(3).toString(), - ImmutableMap.of("__time", 1555934400000L, "dim1", "foo2", "met1", 4L), + new HashMap() + { + { + put("__time", 1555934400000L); + put("dim1", "foo2"); + put("met1", 4L); + } + }, null, null ), data.get(1)); @@ -638,13 +882,27 @@ public void testWithMoreRollupCacheReplay() Assert.assertEquals(new SamplerResponseRow( getTestRows().get(0).toString(), - ImmutableMap.of("__time", 1555934400000L, "dim1", "foo", "met1", 11L), + new HashMap() + { + { + put("__time", 1555934400000L); + put("dim1", "foo"); + put("met1", 11L); + } + }, null, null ), data.get(0)); Assert.assertEquals(new SamplerResponseRow( getTestRows().get(3).toString(), - ImmutableMap.of("__time", 1555934400000L, "dim1", "foo2", "met1", 4L), + new HashMap() + { + { + put("__time", 1555934400000L); + put("dim1", "foo2"); + put("met1", 4L); + } + }, null, null ), data.get(1)); @@ -691,19 +949,43 @@ public void testWithTransformsAutoDimensions() Assert.assertEquals(new SamplerResponseRow( getTestRows().get(0).toString(), - ImmutableMap.of("__time", 1555934400000L, "dim2", "", "dim1", "foo", "met1", 6L), + new HashMap() + { + { + put("__time", 1555934400000L); + put("dim2", null); + put("dim1", "foo"); + put("met1", 6L); + } + }, null, null ), data.get(0)); Assert.assertEquals(new SamplerResponseRow( getTestRows().get(3).toString(), - ImmutableMap.of("__time", 1555934400000L, "dim2", "", "dim1", "foo2", "met1", 4L), + new HashMap() + { + { + put("__time", 1555934400000L); + put("dim2", null); + put("dim1", "foo2"); + put("met1", 4L); + } + }, null, null ), data.get(1)); Assert.assertEquals(new SamplerResponseRow( getTestRows().get(4).toString(), - ImmutableMap.of("__time", 1555934400000L, "dim1", "foo", "dim2", "bar", "met1", 5L), + new HashMap() + { + { + put("__time", 1555934400000L); + put("dim2", "bar"); + put("dim1", "foo"); + put("met1", 5L); + } + }, null, null ), data.get(2)); @@ -756,13 +1038,27 @@ public void testWithTransformsDimensionsSpec() Assert.assertEquals(new SamplerResponseRow( getTestRows().get(0).toString(), - ImmutableMap.of("__time", 1555934400000L, "dim1PlusBar", "foobar", "met1", 11L), + new HashMap() + { + { + put("__time", 1555934400000L); + put("dim1PlusBar", "foobar"); + put("met1", 11L); + } + }, null, null ), data.get(0)); Assert.assertEquals(new SamplerResponseRow( getTestRows().get(3).toString(), - ImmutableMap.of("__time", 1555934400000L, "dim1PlusBar", "foo2bar", "met1", 4L), + new HashMap() + { + { + put("__time", 1555934400000L); + put("dim1PlusBar", "foo2bar"); + put("met1", 4L); + } + }, null, null ), data.get(1)); @@ -805,13 +1101,29 @@ public void testWithFilter() Assert.assertEquals(new SamplerResponseRow( getTestRows().get(0).toString(), - ImmutableMap.of("__time", 1555934400000L, "dim2", "", "dim1", "foo", "met1", 6L), + new HashMap() + { + { + put("__time", 1555934400000L); + put("dim2", null); + put("dim1", "foo"); + put("met1", 6L); + } + }, null, null ), data.get(0)); Assert.assertEquals(new SamplerResponseRow( getTestRows().get(4).toString(), - ImmutableMap.of("__time", 1555934400000L, "dim1", "foo", "dim2", "bar", "met1", 5L), + new HashMap() + { + { + put("__time", 1555934400000L); + put("dim2", "bar"); + put("dim1", "foo"); + put("met1", 5L); + } + }, null, null ), data.get(1)); From 0f13ecc88c112080b286189733946c69dd9bfd04 Mon Sep 17 00:00:00 2001 From: Junjie Gu Date: Fri, 15 Nov 2019 18:58:01 -0800 Subject: [PATCH 6/6] fix sql compatibility --- .../overlord/sampler/FirehoseSamplerTest.java | 191 ++++++++++-------- 1 file changed, 105 insertions(+), 86 deletions(-) diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/sampler/FirehoseSamplerTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/sampler/FirehoseSamplerTest.java index b995427de125..62526cfe327d 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/sampler/FirehoseSamplerTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/sampler/FirehoseSamplerTest.java @@ -214,6 +214,7 @@ public void testNoDataSchemaNumRowsCacheReplay() @Test public void testCSVColumnAllNull() { + parserType = ParserType.STR_CSV; final List str_csv_rows = ImmutableList.of( "FirstName,LastName,Number,Gender", "J,G,,Male", @@ -246,11 +247,11 @@ public void testCSVColumnAllNull() Assert.assertEquals(4, (int) response.getNumRowsIndexed()); Assert.assertEquals(4, response.getData().size()); - List data = removeEmptyColumns(response.getData()); + List data = response.getData(); Assert.assertEquals(new SamplerResponseRow( str_csv_rows.get(1).toString(), - new HashMap() + replaceNullValues(new HashMap() { { put("__time", 0L); @@ -259,13 +260,13 @@ public void testCSVColumnAllNull() put("LastName", "G"); put("Gender", "Male"); } - }, + }), null, null ), data.get(0)); Assert.assertEquals(new SamplerResponseRow( str_csv_rows.get(2).toString(), - new HashMap() + replaceNullValues(new HashMap() { { put("__time", 0L); @@ -274,13 +275,13 @@ public void testCSVColumnAllNull() put("LastName", "Bryant"); put("Gender", "Male"); } - }, + }), null, null ), data.get(1)); Assert.assertEquals(new SamplerResponseRow( str_csv_rows.get(3).toString(), - new HashMap() + replaceNullValues(new HashMap() { { put("__time", 0L); @@ -289,13 +290,13 @@ public void testCSVColumnAllNull() put("LastName", " Krystal"); put("Gender", "Female"); } - }, + }), null, null ), data.get(2)); Assert.assertEquals(new SamplerResponseRow( str_csv_rows.get(4).toString(), - new HashMap() + replaceNullValues(new HashMap() { { put("__time", 0L); @@ -304,7 +305,7 @@ public void testCSVColumnAllNull() put("LastName", "Jackson"); put("Gender", "Male"); } - }, + }), null, null ), data.get(3)); @@ -324,11 +325,11 @@ public void testMissingValueTimestampSpec() Assert.assertEquals(6, (int) response.getNumRowsIndexed()); Assert.assertEquals(6, response.getData().size()); - List data = removeEmptyColumns(response.getData()); + List data = response.getData(); Assert.assertEquals(new SamplerResponseRow( getTestRows().get(0).toString(), - new HashMap() + replaceNullValues(new HashMap() { { put("__time", 0L); @@ -337,13 +338,13 @@ public void testMissingValueTimestampSpec() put("dim1", "foo"); put("met1", "1"); } - }, + }), null, null ), data.get(0)); Assert.assertEquals(new SamplerResponseRow( getTestRows().get(1).toString(), - new HashMap() + replaceNullValues(new HashMap() { { put("__time", 0L); @@ -352,13 +353,13 @@ public void testMissingValueTimestampSpec() put("dim1", "foo"); put("met1", "2"); } - }, + }), null, null ), data.get(1)); Assert.assertEquals(new SamplerResponseRow( getTestRows().get(2).toString(), - new HashMap() + replaceNullValues(new HashMap() { { put("__time", 0L); @@ -367,13 +368,13 @@ public void testMissingValueTimestampSpec() put("dim1", "foo"); put("met1", "3"); } - }, + }), null, null ), data.get(2)); Assert.assertEquals(new SamplerResponseRow( getTestRows().get(3).toString(), - new HashMap() + replaceNullValues(new HashMap() { { put("__time", 0L); @@ -382,13 +383,13 @@ public void testMissingValueTimestampSpec() put("dim1", "foo2"); put("met1", "4"); } - }, + }), null, null ), data.get(3)); Assert.assertEquals(new SamplerResponseRow( getTestRows().get(4).toString(), - new HashMap() + replaceNullValues(new HashMap() { { put("__time", 0L); @@ -397,13 +398,13 @@ public void testMissingValueTimestampSpec() put("dim1", "foo"); put("met1", "5"); } - }, + }), null, null ), data.get(4)); Assert.assertEquals(new SamplerResponseRow( getTestRows().get(5).toString(), - new HashMap() + replaceNullValues(new HashMap() { { put("__time", 0L); @@ -412,7 +413,7 @@ public void testMissingValueTimestampSpec() put("dim1", "foo"); put("met1", "6"); } - }, + }), null, null ), data.get(5)); @@ -432,11 +433,11 @@ public void testWithTimestampSpec() Assert.assertEquals(5, (int) response.getNumRowsIndexed()); Assert.assertEquals(6, response.getData().size()); - List data = removeEmptyColumns(response.getData()); + List data = response.getData(); Assert.assertEquals(new SamplerResponseRow( getTestRows().get(0).toString(), - new HashMap() + replaceNullValues(new HashMap() { { put("__time", 1555934400000L); @@ -444,13 +445,13 @@ public void testWithTimestampSpec() put("dim1", "foo"); put("met1", "1"); } - }, + }), null, null ), data.get(0)); Assert.assertEquals(new SamplerResponseRow( getTestRows().get(1).toString(), - new HashMap() + replaceNullValues(new HashMap() { { put("__time", 1555934400000L); @@ -458,13 +459,13 @@ public void testWithTimestampSpec() put("dim1", "foo"); put("met1", "2"); } - }, + }), null, null ), data.get(1)); Assert.assertEquals(new SamplerResponseRow( getTestRows().get(2).toString(), - new HashMap() + replaceNullValues(new HashMap() { { put("__time", 1555934460000L); @@ -472,13 +473,13 @@ public void testWithTimestampSpec() put("dim1", "foo"); put("met1", "3"); } - }, + }), null, null ), data.get(2)); Assert.assertEquals(new SamplerResponseRow( getTestRows().get(3).toString(), - new HashMap() + replaceNullValues(new HashMap() { { put("__time", 1555934400000L); @@ -486,13 +487,13 @@ public void testWithTimestampSpec() put("dim1", "foo2"); put("met1", "4"); } - }, + }), null, null ), data.get(3)); Assert.assertEquals(new SamplerResponseRow( getTestRows().get(4).toString(), - new HashMap() + replaceNullValues(new HashMap() { { put("__time", 1555934400000L); @@ -500,7 +501,7 @@ public void testWithTimestampSpec() put("dim1", "foo"); put("met1", "5"); } - }, + }), null, null ), data.get(4)); @@ -536,66 +537,66 @@ public void testWithDimensionSpec() Assert.assertEquals(new SamplerResponseRow( getTestRows().get(0).toString(), - new HashMap() + replaceNullValues(new HashMap() { { put("__time", 1555934400000L); put("dim1", "foo"); put("met1", "1"); } - }, + }), null, null ), data.get(0)); Assert.assertEquals(new SamplerResponseRow( getTestRows().get(1).toString(), - new HashMap() + replaceNullValues(new HashMap() { { put("__time", 1555934400000L); put("dim1", "foo"); put("met1", "2"); } - }, + }), null, null ), data.get(1)); Assert.assertEquals(new SamplerResponseRow( getTestRows().get(2).toString(), - new HashMap() + replaceNullValues(new HashMap() { { put("__time", 1555934460000L); put("dim1", "foo"); put("met1", "3"); } - }, + }), null, null ), data.get(2)); Assert.assertEquals(new SamplerResponseRow( getTestRows().get(3).toString(), - new HashMap() + replaceNullValues(new HashMap() { { put("__time", 1555934400000L); put("dim1", "foo2"); put("met1", "4"); } - }, + }), null, null ), data.get(3)); Assert.assertEquals(new SamplerResponseRow( getTestRows().get(4).toString(), - new HashMap() + replaceNullValues(new HashMap() { { put("__time", 1555934400000L); put("dim1", "foo"); put("met1", "5"); } - }, + }), null, null ), data.get(4)); @@ -630,11 +631,11 @@ public void testWithNoRollup() Assert.assertEquals(5, (int) response.getNumRowsIndexed()); Assert.assertEquals(6, response.getData().size()); - List data = removeEmptyColumns(response.getData()); + List data = response.getData(); Assert.assertEquals(new SamplerResponseRow( getTestRows().get(0).toString(), - new HashMap() + replaceNullValues(new HashMap() { { put("__time", 1555934400000L); @@ -642,13 +643,13 @@ public void testWithNoRollup() put("dim1", "foo"); put("met1", 1L); } - }, + }), null, null ), data.get(0)); Assert.assertEquals(new SamplerResponseRow( getTestRows().get(1).toString(), - new HashMap() + replaceNullValues(new HashMap() { { put("__time", 1555934400000L); @@ -656,13 +657,13 @@ public void testWithNoRollup() put("dim1", "foo"); put("met1", 2L); } - }, + }), null, null ), data.get(1)); Assert.assertEquals(new SamplerResponseRow( getTestRows().get(2).toString(), - new HashMap() + replaceNullValues(new HashMap() { { put("__time", 1555934400000L); @@ -670,13 +671,13 @@ public void testWithNoRollup() put("dim1", "foo"); put("met1", 3L); } - }, + }), null, null ), data.get(2)); Assert.assertEquals(new SamplerResponseRow( getTestRows().get(3).toString(), - new HashMap() + replaceNullValues(new HashMap() { { put("__time", 1555934400000L); @@ -684,13 +685,13 @@ public void testWithNoRollup() put("dim1", "foo2"); put("met1", 4L); } - }, + }), null, null ), data.get(3)); Assert.assertEquals(new SamplerResponseRow( getTestRows().get(4).toString(), - new HashMap() + replaceNullValues(new HashMap() { { put("__time", 1555934400000L); @@ -698,7 +699,7 @@ public void testWithNoRollup() put("dim1", "foo"); put("met1", 5L); } - }, + }), null, null ), data.get(4)); @@ -733,11 +734,11 @@ public void testWithRollup() Assert.assertEquals(5, (int) response.getNumRowsIndexed()); Assert.assertEquals(4, response.getData().size()); - List data = removeEmptyColumns(response.getData()); + List data = response.getData(); Assert.assertEquals(new SamplerResponseRow( getTestRows().get(0).toString(), - new HashMap() + replaceNullValues(new HashMap() { { put("__time", 1555934400000L); @@ -745,13 +746,13 @@ public void testWithRollup() put("dim1", "foo"); put("met1", 6L); } - }, + }), null, null ), data.get(0)); Assert.assertEquals(new SamplerResponseRow( getTestRows().get(3).toString(), - new HashMap() + replaceNullValues(new HashMap() { { put("__time", 1555934400000L); @@ -759,13 +760,13 @@ public void testWithRollup() put("dim1", "foo2"); put("met1", 4L); } - }, + }), null, null ), data.get(1)); Assert.assertEquals(new SamplerResponseRow( getTestRows().get(4).toString(), - new HashMap() + replaceNullValues(new HashMap() { { put("__time", 1555934400000L); @@ -773,7 +774,7 @@ public void testWithRollup() put("dim1", "foo"); put("met1", 5L); } - }, + }), null, null ), data.get(2)); @@ -815,27 +816,27 @@ public void testWithMoreRollup() Assert.assertEquals(new SamplerResponseRow( getTestRows().get(0).toString(), - new HashMap() + replaceNullValues(new HashMap() { { put("__time", 1555934400000L); put("dim1", "foo"); put("met1", 11L); } - }, + }), null, null ), data.get(0)); Assert.assertEquals(new SamplerResponseRow( getTestRows().get(3).toString(), - new HashMap() + replaceNullValues(new HashMap() { { put("__time", 1555934400000L); put("dim1", "foo2"); put("met1", 4L); } - }, + }), null, null ), data.get(1)); @@ -882,27 +883,27 @@ public void testWithMoreRollupCacheReplay() Assert.assertEquals(new SamplerResponseRow( getTestRows().get(0).toString(), - new HashMap() + replaceNullValues(new HashMap() { { put("__time", 1555934400000L); put("dim1", "foo"); put("met1", 11L); } - }, + }), null, null ), data.get(0)); Assert.assertEquals(new SamplerResponseRow( getTestRows().get(3).toString(), - new HashMap() + replaceNullValues(new HashMap() { { put("__time", 1555934400000L); put("dim1", "foo2"); put("met1", 4L); } - }, + }), null, null ), data.get(1)); @@ -945,11 +946,11 @@ public void testWithTransformsAutoDimensions() Assert.assertEquals(5, (int) response.getNumRowsIndexed()); Assert.assertEquals(4, response.getData().size()); - List data = removeEmptyColumns(response.getData()); + List data = response.getData(); Assert.assertEquals(new SamplerResponseRow( getTestRows().get(0).toString(), - new HashMap() + replaceNullValues(new HashMap() { { put("__time", 1555934400000L); @@ -957,13 +958,13 @@ public void testWithTransformsAutoDimensions() put("dim1", "foo"); put("met1", 6L); } - }, + }), null, null ), data.get(0)); Assert.assertEquals(new SamplerResponseRow( getTestRows().get(3).toString(), - new HashMap() + replaceNullValues(new HashMap() { { put("__time", 1555934400000L); @@ -971,13 +972,13 @@ public void testWithTransformsAutoDimensions() put("dim1", "foo2"); put("met1", 4L); } - }, + }), null, null ), data.get(1)); Assert.assertEquals(new SamplerResponseRow( getTestRows().get(4).toString(), - new HashMap() + replaceNullValues(new HashMap() { { put("__time", 1555934400000L); @@ -985,7 +986,7 @@ public void testWithTransformsAutoDimensions() put("dim1", "foo"); put("met1", 5L); } - }, + }), null, null ), data.get(2)); @@ -1038,27 +1039,27 @@ public void testWithTransformsDimensionsSpec() Assert.assertEquals(new SamplerResponseRow( getTestRows().get(0).toString(), - new HashMap() + replaceNullValues(new HashMap() { { put("__time", 1555934400000L); put("dim1PlusBar", "foobar"); put("met1", 11L); } - }, + }), null, null ), data.get(0)); Assert.assertEquals(new SamplerResponseRow( getTestRows().get(3).toString(), - new HashMap() + replaceNullValues(new HashMap() { { put("__time", 1555934400000L); put("dim1PlusBar", "foo2bar"); put("met1", 4L); } - }, + }), null, null ), data.get(1)); @@ -1097,11 +1098,11 @@ public void testWithFilter() Assert.assertEquals(4, (int) response.getNumRowsIndexed()); Assert.assertEquals(3, response.getData().size()); - List data = removeEmptyColumns(response.getData()); + List data = response.getData(); Assert.assertEquals(new SamplerResponseRow( getTestRows().get(0).toString(), - new HashMap() + replaceNullValues(new HashMap() { { put("__time", 1555934400000L); @@ -1109,13 +1110,13 @@ public void testWithFilter() put("dim1", "foo"); put("met1", 6L); } - }, + }), null, null ), data.get(0)); Assert.assertEquals(new SamplerResponseRow( getTestRows().get(4).toString(), - new HashMap() + replaceNullValues(new HashMap() { { put("__time", 1555934400000L); @@ -1123,7 +1124,7 @@ public void testWithFilter() put("dim1", "foo"); put("met1", 5L); } - }, + }), null, null ), data.get(1)); @@ -1197,9 +1198,12 @@ private String getUnparseableTimestampString() private List removeEmptyColumns(List rows) { - return rows; + return USE_DEFAULT_VALUE_FOR_NULL + ? rows + : rows.stream().map(x -> x.withParsed(removeEmptyValues(x.getParsed()))).collect(Collectors.toList()); } + @Nullable private Map removeEmptyValues(Map data) { @@ -1209,4 +1213,19 @@ private Map removeEmptyValues(Map data) .filter(x -> !(x.getValue() instanceof String) || !((String) x.getValue()).isEmpty()) .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); } + + @Nullable + private Map replaceNullValues(Map data) + { + return ParserType.STR_CSV.equals(parserType) + ? USE_DEFAULT_VALUE_FOR_NULL + ? data + : data.entrySet() + .stream() + .collect(Collectors.toMap( + Map.Entry::getKey, + e -> e.getValue() == null ? "" : e.getValue() + )) + : data; + } }