From 8e4bf81b3f81aa3398ca9358e91d37c9f4089499 Mon Sep 17 00:00:00 2001 From: Jihoon Son Date: Thu, 22 Jun 2017 16:19:45 +0900 Subject: [PATCH 1/3] Reset fieldNames whenever a new file begins --- .../java/util/common/parsers/CSVParser.java | 3 +- .../util/common/parsers/DelimitedParser.java | 3 +- .../util/common/parsers/CSVParserTest.java | 74 ++++++++++++++++++- .../common/parsers/DelimitedParserTest.java | 70 +++++++++++++++++- 4 files changed, 143 insertions(+), 7 deletions(-) diff --git a/java-util/src/main/java/io/druid/java/util/common/parsers/CSVParser.java b/java-util/src/main/java/io/druid/java/util/common/parsers/CSVParser.java index 957e72560d2a..40449cfa73ba 100644 --- a/java-util/src/main/java/io/druid/java/util/common/parsers/CSVParser.java +++ b/java-util/src/main/java/io/druid/java/util/common/parsers/CSVParser.java @@ -113,9 +113,10 @@ public String getListDelimiter() @Override public void startFileFromBeginning() { - supportSkipHeaderRows = true; + fieldNames = null; hasParsedHeader = false; skippedHeaderRows = 0; + supportSkipHeaderRows = true; } @Override diff --git a/java-util/src/main/java/io/druid/java/util/common/parsers/DelimitedParser.java b/java-util/src/main/java/io/druid/java/util/common/parsers/DelimitedParser.java index 27114df31e95..72c23628c0dd 100644 --- a/java-util/src/main/java/io/druid/java/util/common/parsers/DelimitedParser.java +++ b/java-util/src/main/java/io/druid/java/util/common/parsers/DelimitedParser.java @@ -126,9 +126,10 @@ public String getListDelimiter() @Override public void startFileFromBeginning() { - supportSkipHeaderRows = true; + fieldNames = null; hasParsedHeader = false; skippedHeaderRows = 0; + supportSkipHeaderRows = true; } @Override diff --git a/java-util/src/test/java/io/druid/java/util/common/parsers/CSVParserTest.java b/java-util/src/test/java/io/druid/java/util/common/parsers/CSVParserTest.java index 37a589b276a2..f57bd4077a1f 100644 --- a/java-util/src/test/java/io/druid/java/util/common/parsers/CSVParserTest.java +++ b/java-util/src/test/java/io/druid/java/util/common/parsers/CSVParserTest.java @@ -22,12 +22,16 @@ import com.google.common.base.Optional; import com.google.common.collect.ImmutableMap; import org.junit.Assert; +import org.junit.Rule; import org.junit.Test; +import org.junit.rules.ExpectedException; import java.util.Map; public class CSVParserTest { + @Rule + public ExpectedException expectedException = ExpectedException.none(); @Test public void testValidHeader() @@ -117,9 +121,71 @@ public void testCSVParserWithSkipHeaderRows() ); } - @Test(expected = UnsupportedOperationException.class) + @Test + public void testCSVParserWithHeaderRow() + { + final Parser csvParser = new CSVParser( + Optional.absent(), + true, + 0 + ); + csvParser.startFileFromBeginning(); + final String[] body = new String[] { + "time,value1,value2", + "hello,world,foo" + }; + Assert.assertNull(csvParser.parse(body[0])); + final Map jsonMap = csvParser.parse(body[1]); + Assert.assertEquals( + "jsonMap", + ImmutableMap.of("time", "hello", "value1", "world", "value2", "foo"), + jsonMap + ); + } + + @Test + public void testCSVParserWithDifferentHeaderRows() + { + final Parser csvParser = new CSVParser( + Optional.absent(), + true, + 0 + ); + csvParser.startFileFromBeginning(); + final String[] body = new String[] { + "time,value1,value2", + "hello,world,foo" + }; + Assert.assertNull(csvParser.parse(body[0])); + Map jsonMap = csvParser.parse(body[1]); + Assert.assertEquals( + "jsonMap", + ImmutableMap.of("time", "hello", "value1", "world", "value2", "foo"), + jsonMap + ); + + csvParser.startFileFromBeginning(); + final String[] body2 = new String[] { + "time,value1,value2,value3", + "hello,world,foo,bar" + }; + Assert.assertNull(csvParser.parse(body2[0])); + jsonMap = csvParser.parse(body2[1]); + Assert.assertEquals( + "jsonMap", + ImmutableMap.of("time", "hello", "value1", "world", "value2", "foo", "value3", "bar"), + jsonMap + ); + } + + @Test public void testCSVParserWithoutStartFileFromBeginning() { + expectedException.expect(UnsupportedOperationException.class); + expectedException.expectMessage( + "hasHeaderRow or maxSkipHeaderRows is not supported. Please check the indexTask supports these options." + ); + final int skipHeaderRows = 2; final Parser csvParser = new CSVParser( Optional.absent(), @@ -127,9 +193,9 @@ public void testCSVParserWithoutStartFileFromBeginning() skipHeaderRows ); final String[] body = new String[] { - "header\tline\t1", - "header\tline\t2", - "hello\tworld\tfoo" + "header,line,1", + "header,line,2", + "hello,world,foo" }; csvParser.parse(body[0]); } diff --git a/java-util/src/test/java/io/druid/java/util/common/parsers/DelimitedParserTest.java b/java-util/src/test/java/io/druid/java/util/common/parsers/DelimitedParserTest.java index d91ed25cbbc2..ca913253eb61 100644 --- a/java-util/src/test/java/io/druid/java/util/common/parsers/DelimitedParserTest.java +++ b/java-util/src/test/java/io/druid/java/util/common/parsers/DelimitedParserTest.java @@ -22,12 +22,16 @@ import com.google.common.base.Optional; import com.google.common.collect.ImmutableMap; import org.junit.Assert; +import org.junit.Rule; import org.junit.Test; +import org.junit.rules.ExpectedException; import java.util.Map; public class DelimitedParserTest { + @Rule + public ExpectedException expectedException = ExpectedException.none(); @Test public void testValidHeader() @@ -127,9 +131,73 @@ public void testTSVParserWithSkipHeaderRows() ); } - @Test(expected = UnsupportedOperationException.class) + @Test + public void testTSVParserWithHeaderRow() + { + final Parser parser = new DelimitedParser( + Optional.of("\t"), + Optional.absent(), + true, + 0 + ); + parser.startFileFromBeginning(); + final String[] body = new String[] { + "time\tvalue1\tvalue2", + "hello\tworld\tfoo" + }; + Assert.assertNull(parser.parse(body[0])); + final Map jsonMap = parser.parse(body[1]); + Assert.assertEquals( + "jsonMap", + ImmutableMap.of("time", "hello", "value1", "world", "value2", "foo"), + jsonMap + ); + } + + @Test + public void testTSVParserWithDifferentHeaderRows() + { + final Parser csvParser = new DelimitedParser( + Optional.of("\t"), + Optional.absent(), + true, + 0 + ); + csvParser.startFileFromBeginning(); + final String[] body = new String[] { + "time\tvalue1\tvalue2", + "hello\tworld\tfoo" + }; + Assert.assertNull(csvParser.parse(body[0])); + Map jsonMap = csvParser.parse(body[1]); + Assert.assertEquals( + "jsonMap", + ImmutableMap.of("time", "hello", "value1", "world", "value2", "foo"), + jsonMap + ); + + csvParser.startFileFromBeginning(); + final String[] body2 = new String[] { + "time\tvalue1\tvalue2\tvalue3", + "hello\tworld\tfoo\tbar" + }; + Assert.assertNull(csvParser.parse(body2[0])); + jsonMap = csvParser.parse(body2[1]); + Assert.assertEquals( + "jsonMap", + ImmutableMap.of("time", "hello", "value1", "world", "value2", "foo", "value3", "bar"), + jsonMap + ); + } + + @Test public void testTSVParserWithoutStartFileFromBeginning() { + expectedException.expect(UnsupportedOperationException.class); + expectedException.expectMessage( + "hasHeaderRow or maxSkipHeaderRows is not supported. Please check the indexTask supports these options." + ); + final int skipHeaderRows = 2; final Parser delimitedParser = new DelimitedParser( Optional.of("\t"), From 0bf7c099b262e233c0d541a498e3dfc9a259c8df Mon Sep 17 00:00:00 2001 From: Jihoon Son Date: Thu, 22 Jun 2017 17:04:36 +0900 Subject: [PATCH 2/3] Fix test failure --- .../java/io/druid/java/util/common/parsers/CSVParser.java | 4 +++- .../io/druid/java/util/common/parsers/DelimitedParser.java | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/java-util/src/main/java/io/druid/java/util/common/parsers/CSVParser.java b/java-util/src/main/java/io/druid/java/util/common/parsers/CSVParser.java index 40449cfa73ba..180b095ae942 100644 --- a/java-util/src/main/java/io/druid/java/util/common/parsers/CSVParser.java +++ b/java-util/src/main/java/io/druid/java/util/common/parsers/CSVParser.java @@ -113,7 +113,9 @@ public String getListDelimiter() @Override public void startFileFromBeginning() { - fieldNames = null; + if (hasHeaderRow) { + fieldNames = null; + } hasParsedHeader = false; skippedHeaderRows = 0; supportSkipHeaderRows = true; diff --git a/java-util/src/main/java/io/druid/java/util/common/parsers/DelimitedParser.java b/java-util/src/main/java/io/druid/java/util/common/parsers/DelimitedParser.java index 72c23628c0dd..c052daee17e0 100644 --- a/java-util/src/main/java/io/druid/java/util/common/parsers/DelimitedParser.java +++ b/java-util/src/main/java/io/druid/java/util/common/parsers/DelimitedParser.java @@ -126,7 +126,9 @@ public String getListDelimiter() @Override public void startFileFromBeginning() { - fieldNames = null; + if (hasHeaderRow) { + fieldNames = null; + } hasParsedHeader = false; skippedHeaderRows = 0; supportSkipHeaderRows = true; From 38b7fdecf8bf9fc7ff7e508ccbd3bf30a447127e Mon Sep 17 00:00:00 2001 From: Jihoon Son Date: Thu, 22 Jun 2017 18:05:38 +0900 Subject: [PATCH 3/3] Fix test failure --- .../test/java/io/druid/indexing/common/task/IndexTaskTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/indexing-service/src/test/java/io/druid/indexing/common/task/IndexTaskTest.java b/indexing-service/src/test/java/io/druid/indexing/common/task/IndexTaskTest.java index 2881ab0c5196..d25811517fab 100644 --- a/indexing-service/src/test/java/io/druid/indexing/common/task/IndexTaskTest.java +++ b/indexing-service/src/test/java/io/druid/indexing/common/task/IndexTaskTest.java @@ -482,7 +482,7 @@ public void testCSVFileWithHeaderColumnOverride() throws Exception Assert.assertEquals(1, segments.size()); - Assert.assertEquals(Arrays.asList("dim"), segments.get(0).getDimensions()); + Assert.assertEquals(Arrays.asList("d"), segments.get(0).getDimensions()); Assert.assertEquals(Arrays.asList("val"), segments.get(0).getMetrics()); Assert.assertEquals(new Interval("2014/P1D"), segments.get(0).getInterval()); }