From 0562c996d5b067711048fd676b83f84f575dfc5a Mon Sep 17 00:00:00 2001 From: Atul Mohan Date: Tue, 29 Oct 2019 16:24:24 -0500 Subject: [PATCH 1/4] Handle missing values --- .../druid/java/util/common/parsers/CSVParser.java | 8 +++++++- .../java/util/common/parsers/DelimitedParser.java | 7 ++++++- .../common/parsers/FlatTextFormatParserTest.java | 14 ++++++++++++++ 3 files changed, 27 insertions(+), 2 deletions(-) diff --git a/core/src/main/java/org/apache/druid/java/util/common/parsers/CSVParser.java b/core/src/main/java/org/apache/druid/java/util/common/parsers/CSVParser.java index 41c2d18226b9..a16a9622dbfa 100644 --- a/core/src/main/java/org/apache/druid/java/util/common/parsers/CSVParser.java +++ b/core/src/main/java/org/apache/druid/java/util/common/parsers/CSVParser.java @@ -20,6 +20,9 @@ package org.apache.druid.java.util.common.parsers; import com.google.common.annotations.VisibleForTesting; +import com.opencsv.CSVParserBuilder; +import com.opencsv.enums.CSVReaderNullFieldIndicator; +import org.apache.druid.common.config.NullHandling; import javax.annotation.Nullable; import java.io.IOException; @@ -28,7 +31,10 @@ public class CSVParser extends AbstractFlatTextFormatParser { - private final com.opencsv.CSVParser parser = new com.opencsv.CSVParser(); + private final com.opencsv.CSVParser parser = NullHandling.replaceWithDefault() + ? new com.opencsv.CSVParser() + : new CSVParserBuilder().withFieldAsNull( + CSVReaderNullFieldIndicator.EMPTY_SEPARATORS).build(); public CSVParser( @Nullable final String listDelimiter, diff --git a/core/src/main/java/org/apache/druid/java/util/common/parsers/DelimitedParser.java b/core/src/main/java/org/apache/druid/java/util/common/parsers/DelimitedParser.java index 61eafa70f19b..aa133bd419b8 100644 --- a/core/src/main/java/org/apache/druid/java/util/common/parsers/DelimitedParser.java +++ b/core/src/main/java/org/apache/druid/java/util/common/parsers/DelimitedParser.java @@ -22,6 +22,7 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; import com.google.common.base.Splitter; +import org.apache.druid.common.config.NullHandling; import javax.annotation.Nullable; import java.util.ArrayList; @@ -94,7 +95,11 @@ private List splitToList(String input) List result = new ArrayList(); while (iterator.hasNext()) { - result.add(iterator.next()); + String splitValue = iterator.next(); + if (!NullHandling.replaceWithDefault() && splitValue.isEmpty()) { + result.add(null); + } + result.add(splitValue); } return Collections.unmodifiableList(result); diff --git a/core/src/test/java/org/apache/druid/java/util/common/parsers/FlatTextFormatParserTest.java b/core/src/test/java/org/apache/druid/java/util/common/parsers/FlatTextFormatParserTest.java index 571a0d359b8b..bda9674cba64 100644 --- a/core/src/test/java/org/apache/druid/java/util/common/parsers/FlatTextFormatParserTest.java +++ b/core/src/test/java/org/apache/druid/java/util/common/parsers/FlatTextFormatParserTest.java @@ -215,6 +215,20 @@ public void testWithoutStartFileFromBeginning() parser.parseToMap(body[0]); } + @Test + public void testWithNullValues() + { + final Parser parser = PARSER_FACTORY.get(format, true, 0); + parser.startFileFromBeginning(); + final String[] body = new String[]{ + concat(format, "time", "value1", "value2"), + concat(format, "hello", "world", "") + }; + Assert.assertNull(parser.parseToMap(body[0])); + final Map jsonMap = parser.parseToMap(body[1]); + Assert.assertNull(jsonMap.get("value2")); + } + private static class FlatTextFormatParserFactory { public Parser get(FlatTextFormat format) From e69c2211482e730270f044809d1200034c1737b8 Mon Sep 17 00:00:00 2001 From: Atul Mohan Date: Fri, 1 Nov 2019 11:12:19 -0500 Subject: [PATCH 2/4] Fix multi value tests --- .../apache/druid/java/util/common/parsers/DelimitedParser.java | 3 ++- .../java/org/apache/druid/query/MultiValuedDimensionTest.java | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/core/src/main/java/org/apache/druid/java/util/common/parsers/DelimitedParser.java b/core/src/main/java/org/apache/druid/java/util/common/parsers/DelimitedParser.java index aa133bd419b8..849d45e85579 100644 --- a/core/src/main/java/org/apache/druid/java/util/common/parsers/DelimitedParser.java +++ b/core/src/main/java/org/apache/druid/java/util/common/parsers/DelimitedParser.java @@ -98,8 +98,9 @@ private List splitToList(String input) String splitValue = iterator.next(); if (!NullHandling.replaceWithDefault() && splitValue.isEmpty()) { result.add(null); + } else { + result.add(splitValue); } - result.add(splitValue); } return Collections.unmodifiableList(result); diff --git a/processing/src/test/java/org/apache/druid/query/MultiValuedDimensionTest.java b/processing/src/test/java/org/apache/druid/query/MultiValuedDimensionTest.java index b570677a0ed8..93cf2dced8a4 100644 --- a/processing/src/test/java/org/apache/druid/query/MultiValuedDimensionTest.java +++ b/processing/src/test/java/org/apache/druid/query/MultiValuedDimensionTest.java @@ -161,7 +161,7 @@ public void setup() throws Exception "2011-01-12T00:00:00.000Z,product_1,t1\tt2\tt3,u1\tu2", "2011-01-13T00:00:00.000Z,product_2,t3\tt4\tt5,u3\tu4", "2011-01-14T00:00:00.000Z,product_3,t5\tt6\tt7,u1\tu5", - "2011-01-14T00:00:00.000Z,product_4,,u2" + "2011-01-14T00:00:00.000Z,product_4,\"\",u2" }; for (String row : rows) { From 3d190fdf3e7574fde884e1fcb56ff482b20990a2 Mon Sep 17 00:00:00 2001 From: Atul Mohan Date: Fri, 1 Nov 2019 15:13:06 -0500 Subject: [PATCH 3/4] Fix firehose tests --- .../druid/indexing/overlord/sampler/FirehoseSamplerTest.java | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/sampler/FirehoseSamplerTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/sampler/FirehoseSamplerTest.java index 06ab5dc1b58e..15f4eb4b346c 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/sampler/FirehoseSamplerTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/sampler/FirehoseSamplerTest.java @@ -814,9 +814,7 @@ private ParseSpec getParseSpec(TimestampSpec timestampSpec, DimensionsSpec dimen private String getUnparseableTimestampString() { return ParserType.STR_CSV.equals(parserType) - ? (USE_DEFAULT_VALUE_FOR_NULL - ? "Unparseable timestamp found! Event: {t=bad_timestamp, dim1=foo, dim2=null, met1=6}" - : "Unparseable timestamp found! Event: {t=bad_timestamp, dim1=foo, dim2=, met1=6}") + ? "Unparseable timestamp found! Event: {t=bad_timestamp, dim1=foo, dim2=null, met1=6}" : "Unparseable timestamp found! Event: {t=bad_timestamp, dim1=foo, met1=6}"; } From 6cb53b428590e742503e4cee9ca7474127cfbd4f Mon Sep 17 00:00:00 2001 From: Atul Mohan Date: Tue, 19 Nov 2019 14:36:40 -0600 Subject: [PATCH 4/4] Fix conflicts --- .../apache/druid/java/util/common/parsers/CSVParser.java | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/core/src/main/java/org/apache/druid/java/util/common/parsers/CSVParser.java b/core/src/main/java/org/apache/druid/java/util/common/parsers/CSVParser.java index 216423f85eba..9b99014dc693 100644 --- a/core/src/main/java/org/apache/druid/java/util/common/parsers/CSVParser.java +++ b/core/src/main/java/org/apache/druid/java/util/common/parsers/CSVParser.java @@ -24,6 +24,7 @@ import com.opencsv.RFC4180ParserBuilder; import com.opencsv.enums.CSVReaderNullFieldIndicator; import org.apache.druid.common.config.NullHandling; + import javax.annotation.Nullable; import java.io.IOException; import java.util.Arrays; @@ -32,9 +33,9 @@ public class CSVParser extends AbstractFlatTextFormatParser { private final RFC4180Parser parser = NullHandling.replaceWithDefault() - ? new RFC4180Parser() - : new RFC4180ParserBuilder().withFieldAsNull( - CSVReaderNullFieldIndicator.EMPTY_SEPARATORS).build(); + ? new RFC4180Parser() + : new RFC4180ParserBuilder().withFieldAsNull( + CSVReaderNullFieldIndicator.EMPTY_SEPARATORS).build(); public CSVParser( @Nullable final String listDelimiter,