diff --git a/core/src/main/java/org/apache/druid/data/input/InputFormat.java b/core/src/main/java/org/apache/druid/data/input/InputFormat.java
index d3671f5f7c4f..a3a5dd241176 100644
--- a/core/src/main/java/org/apache/druid/data/input/InputFormat.java
+++ b/core/src/main/java/org/apache/druid/data/input/InputFormat.java
@@ -24,11 +24,11 @@
import com.fasterxml.jackson.annotation.JsonSubTypes.Type;
import com.fasterxml.jackson.annotation.JsonTypeInfo;
import org.apache.druid.data.input.impl.CsvInputFormat;
+import org.apache.druid.data.input.impl.DelimitedInputFormat;
import org.apache.druid.data.input.impl.JsonInputFormat;
import org.apache.druid.data.input.impl.NestedInputFormat;
import org.apache.druid.data.input.impl.RegexInputFormat;
import org.apache.druid.data.input.impl.SplittableInputSource;
-import org.apache.druid.data.input.impl.TsvInputFormat;
import org.apache.druid.guice.annotations.UnstableApi;
import java.io.File;
@@ -38,7 +38,7 @@
* InputFormat abstracts the file format of input data.
* It creates a {@link InputEntityReader} to read data and parse it into {@link InputRow}.
* The created InputEntityReader is used by {@link InputSourceReader}.
- *
+ *
* See {@link NestedInputFormat} for nested input formats such as JSON.
*/
@UnstableApi
@@ -47,13 +47,13 @@
@Type(name = "csv", value = CsvInputFormat.class),
@Type(name = "json", value = JsonInputFormat.class),
@Type(name = "regex", value = RegexInputFormat.class),
- @Type(name = "tsv", value = TsvInputFormat.class)
+ @Type(name = "tsv", value = DelimitedInputFormat.class)
})
public interface InputFormat
{
/**
* Trait to indicate that a file can be split into multiple {@link InputSplit}s.
- *
+ *
* This method is not being used anywhere for now, but should be considered
* in {@link SplittableInputSource#createSplits} in the future.
*/
diff --git a/core/src/main/java/org/apache/druid/data/input/IntermediateRowParsingReader.java b/core/src/main/java/org/apache/druid/data/input/IntermediateRowParsingReader.java
index e0506c3ca8b8..c5f2f28bdc7d 100644
--- a/core/src/main/java/org/apache/druid/data/input/IntermediateRowParsingReader.java
+++ b/core/src/main/java/org/apache/druid/data/input/IntermediateRowParsingReader.java
@@ -30,7 +30,7 @@
/**
* {@link InputEntityReader} that parses bytes into some intermediate rows first, and then into {@link InputRow}s.
- * For example, {@link org.apache.druid.data.input.impl.CsvReader} parses bytes into string lines, and then parses
+ * For example, {@link org.apache.druid.data.input.impl.DelimitedValueReader} parses bytes into string lines, and then parses
* those lines into InputRows.
*
* @param type of intermediate row. For example, it can be {@link String} for text formats.
diff --git a/core/src/main/java/org/apache/druid/data/input/impl/CsvInputFormat.java b/core/src/main/java/org/apache/druid/data/input/impl/CsvInputFormat.java
index 28aa5c680de8..fe9f438317f8 100644
--- a/core/src/main/java/org/apache/druid/data/input/impl/CsvInputFormat.java
+++ b/core/src/main/java/org/apache/druid/data/input/impl/CsvInputFormat.java
@@ -25,7 +25,7 @@
import javax.annotation.Nullable;
import java.util.List;
-public class CsvInputFormat extends SeparateValueInputFormat
+public class CsvInputFormat extends DelimitedInputFormat
{
@JsonCreator
public CsvInputFormat(
@@ -36,6 +36,6 @@ public CsvInputFormat(
@JsonProperty("skipHeaderRows") int skipHeaderRows
)
{
- super(columns, listDelimiter, hasHeaderRow, findColumnsFromHeader, skipHeaderRows, Format.CSV);
+ super(columns, listDelimiter, ",", hasHeaderRow, findColumnsFromHeader, skipHeaderRows);
}
}
diff --git a/core/src/main/java/org/apache/druid/data/input/impl/CsvReader.java b/core/src/main/java/org/apache/druid/data/input/impl/CsvReader.java
deleted file mode 100644
index 92fe72d8248a..000000000000
--- a/core/src/main/java/org/apache/druid/data/input/impl/CsvReader.java
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.druid.data.input.impl;
-
-import org.apache.druid.data.input.InputEntity;
-import org.apache.druid.data.input.InputRowSchema;
-
-import javax.annotation.Nullable;
-import java.io.File;
-import java.util.List;
-
-public class CsvReader extends SeparateValueReader
-{
- CsvReader(
- InputRowSchema inputRowSchema,
- InputEntity source,
- File temporaryDirectory,
- @Nullable String listDelimiter,
- @Nullable List columns,
- boolean findColumnsFromHeader,
- int skipHeaderRows
- )
- {
- super(
- inputRowSchema,
- source,
- temporaryDirectory,
- listDelimiter,
- columns,
- findColumnsFromHeader,
- skipHeaderRows,
- SeparateValueInputFormat.Format.CSV
- );
- }
-}
diff --git a/core/src/main/java/org/apache/druid/data/input/impl/SeparateValueInputFormat.java b/core/src/main/java/org/apache/druid/data/input/impl/DelimitedInputFormat.java
similarity index 70%
rename from core/src/main/java/org/apache/druid/data/input/impl/SeparateValueInputFormat.java
rename to core/src/main/java/org/apache/druid/data/input/impl/DelimitedInputFormat.java
index 93a13e8b8038..7d6bc012f6ee 100644
--- a/core/src/main/java/org/apache/druid/data/input/impl/SeparateValueInputFormat.java
+++ b/core/src/main/java/org/apache/druid/data/input/impl/DelimitedInputFormat.java
@@ -36,20 +36,19 @@
import java.util.Objects;
/**
- * SeparateValueInputFormat abstracts the (Comma/Tab) Separate Value format of input data.
- * It implements the common logic between {@link CsvInputFormat} and {@link TsvInputFormat}
- * Should never be instantiated
+ * InputFormat for customized Delimitor Separate Value format of input data(default is TSV).
*/
-public abstract class SeparateValueInputFormat implements InputFormat
+public class DelimitedInputFormat implements InputFormat
{
public enum Format
{
CSV(',', "comma"),
- TSV('\t', "tab");
+ TSV('\t', "tab"),
+ CustomizeSV('|', "");
- private final char delimiter;
- private final String literal;
+ private char delimiter;
+ private String literal;
Format(char delimiter, String literal)
{
@@ -62,6 +61,12 @@ public String getDelimiterAsString()
return String.valueOf(delimiter);
}
+ private void setDelimiter(String delimiter, String literal)
+ {
+ this.delimiter = (delimiter != null && delimiter.length() > 0) ? delimiter.charAt(0) : '\t';
+ this.literal = literal != null ? literal : "customize separator: " + delimiter;
+ }
+
public char getDelimiter()
{
return delimiter;
@@ -78,14 +83,15 @@ public String getLiteral()
private final boolean findColumnsFromHeader;
private final int skipHeaderRows;
private final Format format;
-
- protected SeparateValueInputFormat(
- @Nullable List columns,
- @Nullable String listDelimiter,
- @Nullable Boolean hasHeaderRow,
- @Nullable Boolean findColumnsFromHeader,
- int skipHeaderRows,
- Format format
+ private final String delimiter;
+
+ public DelimitedInputFormat(
+ @JsonProperty("columns") @Nullable List columns,
+ @JsonProperty("listDelimiter") @Nullable String listDelimiter,
+ @JsonProperty("delimiter") @Nullable String delimiter,
+ @Deprecated @JsonProperty("hasHeaderRow") @Nullable Boolean hasHeaderRow,
+ @JsonProperty("findColumnsFromHeader") @Nullable Boolean findColumnsFromHeader,
+ @JsonProperty("skipHeaderRows") int skipHeaderRows
)
{
this.listDelimiter = listDelimiter;
@@ -98,8 +104,17 @@ protected SeparateValueInputFormat(
)
).getValue();
this.skipHeaderRows = skipHeaderRows;
- this.format = format;
-
+ this.delimiter = delimiter == null ? "\t" : delimiter;
+ this.format = getFormat(this.delimiter);
+ Preconditions.checkArgument(
+ this.delimiter.length() == 1,
+ "The delimiter should be a single character"
+ );
+ Preconditions.checkArgument(
+ !this.delimiter.equals(listDelimiter),
+ "Cannot have same delimiter and list delimiter of [%s]",
+ this.delimiter
+ );
if (!this.columns.isEmpty()) {
for (String column : this.columns) {
Preconditions.checkArgument(
@@ -117,6 +132,18 @@ protected SeparateValueInputFormat(
}
}
+ private static Format getFormat(String delimiter)
+ {
+ if (",".equals(delimiter)) {
+ return Format.CSV;
+ } else if ("\t".equals(delimiter)) {
+ return Format.TSV;
+ } else {
+ Format.CustomizeSV.setDelimiter(delimiter, null);
+ return Format.CustomizeSV;
+ }
+ }
+
@JsonProperty
public List getColumns()
@@ -151,22 +178,15 @@ public boolean isSplittable()
@Override
public InputEntityReader createReader(InputRowSchema inputRowSchema, InputEntity source, File temporaryDirectory)
{
- return this.format == Format.TSV ? new TsvReader(
- inputRowSchema,
- source,
- temporaryDirectory,
- listDelimiter,
- columns,
- findColumnsFromHeader,
- skipHeaderRows
- ) : new CsvReader(
+ return new DelimitedValueReader(
inputRowSchema,
source,
temporaryDirectory,
listDelimiter,
columns,
findColumnsFromHeader,
- skipHeaderRows
+ skipHeaderRows,
+ this.format
);
}
@@ -179,7 +199,7 @@ public boolean equals(Object o)
if (o == null || getClass() != o.getClass()) {
return false;
}
- SeparateValueInputFormat format = (SeparateValueInputFormat) o;
+ DelimitedInputFormat format = (DelimitedInputFormat) o;
return findColumnsFromHeader == format.findColumnsFromHeader &&
skipHeaderRows == format.skipHeaderRows &&
Objects.equals(listDelimiter, format.listDelimiter) &&
diff --git a/core/src/main/java/org/apache/druid/data/input/impl/SeparateValueReader.java b/core/src/main/java/org/apache/druid/data/input/impl/DelimitedValueReader.java
similarity index 91%
rename from core/src/main/java/org/apache/druid/data/input/impl/SeparateValueReader.java
rename to core/src/main/java/org/apache/druid/data/input/impl/DelimitedValueReader.java
index 81c6afb22d01..cae21b148ef5 100644
--- a/core/src/main/java/org/apache/druid/data/input/impl/SeparateValueReader.java
+++ b/core/src/main/java/org/apache/druid/data/input/impl/DelimitedValueReader.java
@@ -26,6 +26,7 @@
import com.opencsv.RFC4180Parser;
import com.opencsv.RFC4180ParserBuilder;
import com.opencsv.enums.CSVReaderNullFieldIndicator;
+import org.apache.druid.common.config.NullHandling;
import org.apache.druid.data.input.InputEntity;
import org.apache.druid.data.input.InputRow;
import org.apache.druid.data.input.InputRowSchema;
@@ -45,29 +46,30 @@
import java.util.Map;
/**
- * SeparateValueReader abstracts the reader for (Comma/Tab) Separate Value format input data.
- * It implements the common logic between {@link CsvReader} and {@link TsvReader}
- * Should never be instantiated
+ * DelimitedValueReader is the reader for Delimitor Separate Value format input data(CSV/TSV).
*/
-public abstract class SeparateValueReader extends TextReader
+public class DelimitedValueReader extends TextReader
{
private final boolean findColumnsFromHeader;
private final int skipHeaderRows;
private final Function multiValueFunction;
@Nullable
private List columns;
- private final SeparateValueInputFormat.Format format;
private final RFC4180Parser parser;
public static RFC4180Parser createOpenCsvParser(char separator)
{
- return new RFC4180ParserBuilder().withFieldAsNull(
+ return NullHandling.replaceWithDefault()
+ ? new RFC4180ParserBuilder()
+ .withSeparator(separator)
+ .build()
+ : new RFC4180ParserBuilder().withFieldAsNull(
CSVReaderNullFieldIndicator.EMPTY_SEPARATORS)
.withSeparator(separator)
.build();
}
- SeparateValueReader(
+ DelimitedValueReader(
InputRowSchema inputRowSchema,
InputEntity source,
File temporaryDirectory,
@@ -75,7 +77,7 @@ public static RFC4180Parser createOpenCsvParser(char separator)
@Nullable List columns,
boolean findColumnsFromHeader,
int skipHeaderRows,
- SeparateValueInputFormat.Format format
+ DelimitedInputFormat.Format format
)
{
super(inputRowSchema, source, temporaryDirectory);
@@ -84,7 +86,6 @@ public static RFC4180Parser createOpenCsvParser(char separator)
final String finalListDelimeter = listDelimiter == null ? Parsers.DEFAULT_LIST_DELIMITER : listDelimiter;
this.multiValueFunction = ParserUtils.getMultiValueFunction(finalListDelimeter, Splitter.on(finalListDelimeter));
this.columns = findColumnsFromHeader ? null : columns; // columns will be overriden by header row
- this.format = format;
this.parser = createOpenCsvParser(format.getDelimiter());
if (this.columns != null) {
diff --git a/core/src/main/java/org/apache/druid/data/input/impl/TsvInputFormat.java b/core/src/main/java/org/apache/druid/data/input/impl/TsvInputFormat.java
deleted file mode 100644
index 8bec9536989e..000000000000
--- a/core/src/main/java/org/apache/druid/data/input/impl/TsvInputFormat.java
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.druid.data.input.impl;
-
-import com.fasterxml.jackson.annotation.JsonCreator;
-import com.fasterxml.jackson.annotation.JsonProperty;
-
-import javax.annotation.Nullable;
-import java.util.List;
-
-public class TsvInputFormat extends SeparateValueInputFormat
-{
- @JsonCreator
- public TsvInputFormat(
- @JsonProperty("columns") @Nullable List columns,
- @JsonProperty("listDelimiter") @Nullable String listDelimiter,
- @Deprecated @JsonProperty("hasHeaderRow") @Nullable Boolean hasHeaderRow,
- @JsonProperty("findColumnsFromHeader") @Nullable Boolean findColumnsFromHeader,
- @JsonProperty("skipHeaderRows") int skipHeaderRows
- )
- {
- super(columns, listDelimiter, hasHeaderRow, findColumnsFromHeader, skipHeaderRows, Format.TSV);
- }
-}
diff --git a/core/src/main/java/org/apache/druid/data/input/impl/TsvReader.java b/core/src/main/java/org/apache/druid/data/input/impl/TsvReader.java
deleted file mode 100644
index 961f61c634ea..000000000000
--- a/core/src/main/java/org/apache/druid/data/input/impl/TsvReader.java
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.druid.data.input.impl;
-
-import org.apache.druid.data.input.InputEntity;
-import org.apache.druid.data.input.InputRowSchema;
-
-import javax.annotation.Nullable;
-import java.io.File;
-import java.util.List;
-
-public class TsvReader extends SeparateValueReader
-{
- TsvReader(
- InputRowSchema inputRowSchema,
- InputEntity source,
- File temporaryDirectory,
- @Nullable String listDelimiter,
- @Nullable List columns,
- boolean findColumnsFromHeader,
- int skipHeaderRows
- )
- {
- super(
- inputRowSchema,
- source,
- temporaryDirectory,
- listDelimiter,
- columns,
- findColumnsFromHeader,
- skipHeaderRows,
- SeparateValueInputFormat.Format.TSV
- );
- }
-}
diff --git a/core/src/main/java/org/apache/druid/java/util/common/parsers/CSVParser.java b/core/src/main/java/org/apache/druid/java/util/common/parsers/CSVParser.java
index d026ad4eaed3..e6da347bdfff 100644
--- a/core/src/main/java/org/apache/druid/java/util/common/parsers/CSVParser.java
+++ b/core/src/main/java/org/apache/druid/java/util/common/parsers/CSVParser.java
@@ -21,7 +21,7 @@
import com.google.common.annotations.VisibleForTesting;
import com.opencsv.RFC4180Parser;
-import org.apache.druid.data.input.impl.SeparateValueReader;
+import org.apache.druid.data.input.impl.DelimitedValueReader;
import javax.annotation.Nullable;
import java.io.IOException;
@@ -30,7 +30,7 @@
public class CSVParser extends AbstractFlatTextFormatParser
{
- private final RFC4180Parser parser = SeparateValueReader.createOpenCsvParser(',');
+ private final RFC4180Parser parser = DelimitedValueReader.createOpenCsvParser(',');
public CSVParser(
@Nullable final String listDelimiter,
diff --git a/core/src/test/java/org/apache/druid/data/input/impl/CsvInputFormatTest.java b/core/src/test/java/org/apache/druid/data/input/impl/CsvInputFormatTest.java
index b09ba86f09e9..ad2316c99938 100644
--- a/core/src/test/java/org/apache/druid/data/input/impl/CsvInputFormatTest.java
+++ b/core/src/test/java/org/apache/druid/data/input/impl/CsvInputFormatTest.java
@@ -49,6 +49,14 @@ public void testComma()
{
expectedException.expect(IllegalArgumentException.class);
expectedException.expectMessage("Column[a,] has a comma, it cannot");
- new CsvInputFormat(Collections.singletonList("a,"), ",", null, false, 0);
+ new CsvInputFormat(Collections.singletonList("a,"), "|", null, false, 0);
+ }
+
+ @Test
+ public void testDelimiter()
+ {
+ expectedException.expect(IllegalArgumentException.class);
+ expectedException.expectMessage("Cannot have same delimiter and list delimiter of [,]");
+ new CsvInputFormat(Collections.singletonList("a\t"), ",", null, false, 0);
}
}
diff --git a/core/src/test/java/org/apache/druid/data/input/impl/TsvInputFormatTest.java b/core/src/test/java/org/apache/druid/data/input/impl/DelimitedInputFormatTest.java
similarity index 52%
rename from core/src/test/java/org/apache/druid/data/input/impl/TsvInputFormatTest.java
rename to core/src/test/java/org/apache/druid/data/input/impl/DelimitedInputFormatTest.java
index b160e4bc223c..af06d8a81864 100644
--- a/core/src/test/java/org/apache/druid/data/input/impl/TsvInputFormatTest.java
+++ b/core/src/test/java/org/apache/druid/data/input/impl/DelimitedInputFormatTest.java
@@ -29,18 +29,25 @@
import java.io.IOException;
import java.util.Collections;
-public class TsvInputFormatTest
+public class DelimitedInputFormatTest
{
@Rule
- public ExpectedException expectedException = ExpectedException.none();
+ public final ExpectedException expectedException = ExpectedException.none();
@Test
public void testSerde() throws IOException
{
final ObjectMapper mapper = new ObjectMapper();
- final TsvInputFormat format = new TsvInputFormat(Collections.singletonList("a"), "|", null, true, 10);
+ final DelimitedInputFormat format = new DelimitedInputFormat(
+ Collections.singletonList("a"),
+ "|",
+ null,
+ null,
+ true,
+ 10
+ );
final byte[] bytes = mapper.writeValueAsBytes(format);
- final TsvInputFormat fromJson = (TsvInputFormat) mapper.readValue(bytes, InputFormat.class);
+ final DelimitedInputFormat fromJson = (DelimitedInputFormat) mapper.readValue(bytes, InputFormat.class);
Assert.assertEquals(format, fromJson);
}
@@ -49,6 +56,30 @@ public void testTab()
{
expectedException.expect(IllegalArgumentException.class);
expectedException.expectMessage("Column[a\t] has a tab, it cannot");
- new TsvInputFormat(Collections.singletonList("a\t"), ",", null, false, 0);
+ new DelimitedInputFormat(Collections.singletonList("a\t"), ",", null, null, false, 0);
+ }
+
+ @Test
+ public void testDelimiterLength()
+ {
+ expectedException.expect(IllegalArgumentException.class);
+ expectedException.expectMessage("The delimiter should be a single character");
+ new DelimitedInputFormat(Collections.singletonList("a\t"), ",", "null", null, false, 0);
+ }
+
+ @Test
+ public void testDelimiterAndListDelimiter()
+ {
+ expectedException.expect(IllegalArgumentException.class);
+ expectedException.expectMessage("Cannot have same delimiter and list delimiter of [,]");
+ new DelimitedInputFormat(Collections.singletonList("a\t"), ",", ",", null, false, 0);
+ }
+
+ @Test
+ public void testCustomizeSeparator()
+ {
+ expectedException.expect(IllegalArgumentException.class);
+ expectedException.expectMessage("Column[a|] has a customize separator: |, it cannot");
+ new DelimitedInputFormat(Collections.singletonList("a|"), ",", "|", null, false, 0);
}
}
diff --git a/core/src/test/java/org/apache/druid/data/input/impl/TsvReaderTest.java b/core/src/test/java/org/apache/druid/data/input/impl/DelimitedReaderTest.java
similarity index 81%
rename from core/src/test/java/org/apache/druid/data/input/impl/TsvReaderTest.java
rename to core/src/test/java/org/apache/druid/data/input/impl/DelimitedReaderTest.java
index 8f42e40ffae8..5f3bce44dd31 100644
--- a/core/src/test/java/org/apache/druid/data/input/impl/TsvReaderTest.java
+++ b/core/src/test/java/org/apache/druid/data/input/impl/DelimitedReaderTest.java
@@ -42,7 +42,7 @@
import java.util.List;
import java.util.stream.Collectors;
-public class TsvReaderTest
+public class DelimitedReaderTest
{
private static final InputRowSchema INPUT_ROW_SCHEMA = new InputRowSchema(
new TimestampSpec("ts", "auto", null),
@@ -66,7 +66,14 @@ public void testWithoutHeaders() throws IOException
"2019-01-01T00:00:30Z\tname_3\t15"
)
);
- final TsvInputFormat format = new TsvInputFormat(ImmutableList.of("ts", "name", "score"), null, null, false, 0);
+ final DelimitedInputFormat format = new DelimitedInputFormat(
+ ImmutableList.of("ts", "name", "score"),
+ null,
+ null,
+ null,
+ false,
+ 0
+ );
assertResult(source, format);
}
@@ -81,7 +88,7 @@ public void testFindColumn() throws IOException
"2019-01-01T00:00:30Z\tname_3\t15"
)
);
- final TsvInputFormat format = new TsvInputFormat(ImmutableList.of(), null, null, true, 0);
+ final DelimitedInputFormat format = new DelimitedInputFormat(ImmutableList.of(), null, null, null, true, 0);
assertResult(source, format);
}
@@ -96,7 +103,14 @@ public void testSkipHeaders() throws IOException
"2019-01-01T00:00:30Z\tname_3\t15"
)
);
- final TsvInputFormat format = new TsvInputFormat(ImmutableList.of("ts", "name", "score"), null, null, false, 1);
+ final DelimitedInputFormat format = new DelimitedInputFormat(
+ ImmutableList.of("ts", "name", "score"),
+ null,
+ null,
+ null,
+ false,
+ 1
+ );
assertResult(source, format);
}
@@ -112,7 +126,7 @@ public void testFindColumnAndSkipHeaders() throws IOException
"2019-01-01T00:00:30Z\tname_3\t15"
)
);
- final TsvInputFormat format = new TsvInputFormat(ImmutableList.of(), null, null, true, 1);
+ final DelimitedInputFormat format = new DelimitedInputFormat(ImmutableList.of(), null, null, null, true, 1);
assertResult(source, format);
}
@@ -127,7 +141,42 @@ public void testMultiValues() throws IOException
"2019-01-01T00:00:30Z\tname_3\t15|3"
)
);
- final TsvInputFormat format = new TsvInputFormat(ImmutableList.of(), "|", null, true, 0);
+ final DelimitedInputFormat format = new DelimitedInputFormat(ImmutableList.of(), "|", null, null, true, 0);
+ final InputEntityReader reader = format.createReader(INPUT_ROW_SCHEMA, source, null);
+ int numResults = 0;
+ try (CloseableIterator iterator = reader.read()) {
+ while (iterator.hasNext()) {
+ final InputRow row = iterator.next();
+ Assert.assertEquals(
+ DateTimes.of(StringUtils.format("2019-01-01T00:00:%02dZ", (numResults + 1) * 10)),
+ row.getTimestamp()
+ );
+ Assert.assertEquals(
+ StringUtils.format("name_%d", numResults + 1),
+ Iterables.getOnlyElement(row.getDimension("name"))
+ );
+ Assert.assertEquals(
+ ImmutableList.of(Integer.toString((numResults + 1) * 5), Integer.toString(numResults + 1)),
+ row.getDimension("score")
+ );
+ numResults++;
+ }
+ Assert.assertEquals(3, numResults);
+ }
+ }
+
+ @Test
+ public void testCustomizeSeparator() throws IOException
+ {
+ final ByteEntity source = writeData(
+ ImmutableList.of(
+ "ts|name|score",
+ "2019-01-01T00:00:10Z|name_1|5\t1",
+ "2019-01-01T00:00:20Z|name_2|10\t2",
+ "2019-01-01T00:00:30Z|name_3|15\t3"
+ )
+ );
+ final DelimitedInputFormat format = new DelimitedInputFormat(ImmutableList.of(), "\t", "|", null, true, 0);
final InputEntityReader reader = format.createReader(INPUT_ROW_SCHEMA, source, null);
int numResults = 0;
try (CloseableIterator iterator = reader.read()) {
@@ -218,10 +267,11 @@ public void testQuotes() throws IOException
ImmutableMap.of("Value", "65", "Comment", "Here I write \\n slash n", "Timestamp", "2018-05-09T10:00:00Z")
)
);
- final TsvInputFormat format = new TsvInputFormat(
+ final DelimitedInputFormat format = new DelimitedInputFormat(
ImmutableList.of("Value", "Comment", "Timestamp"),
null,
null,
+ null,
false,
0
);
@@ -252,7 +302,14 @@ public void testRussianTextMess() throws IOException
"2019-01-01T00:00:10Z\tname_1\t\"Как говорится: \\\"\"всё течет\t всё изменяется\\\"\". Украина как всегда обвиняет Россию в собственных проблемах. #ПровокацияКиева\""
)
);
- final TsvInputFormat format = new TsvInputFormat(ImmutableList.of("ts", "name", "Comment"), null, null, false, 0);
+ final DelimitedInputFormat format = new DelimitedInputFormat(
+ ImmutableList.of("ts", "name", "Comment"),
+ null,
+ null,
+ null,
+ false,
+ 0
+ );
final InputEntityReader reader = format.createReader(INPUT_ROW_SCHEMA, source, null);
try (CloseableIterator iterator = reader.read()) {
Assert.assertTrue(iterator.hasNext());
@@ -281,7 +338,7 @@ private ByteEntity writeData(List lines) throws IOException
return new ByteEntity(outputStream.toByteArray());
}
- private void assertResult(ByteEntity source, TsvInputFormat format) throws IOException
+ private void assertResult(ByteEntity source, DelimitedInputFormat format) throws IOException
{
final InputEntityReader reader = format.createReader(INPUT_ROW_SCHEMA, source, null);
int numResults = 0;