-
Notifications
You must be signed in to change notification settings - Fork 3.8k
Add support for headers and skipping thereof for CSV and TSV #4254
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
7bd6077
c3965ed
f68be6b
e188eea
96374cb
8bbb5d3
aeb0fc6
7e7d6f0
6ad82f5
d0ca172
1eaffbd
e723389
aadaa7a
ff74a92
92c5b3e
3fdc0d9
7ffa967
d447f37
2b25c90
a9e7679
27d997f
5290907
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -23,7 +23,6 @@ | |
| import com.fasterxml.jackson.annotation.JsonProperty; | ||
| import com.google.common.base.Optional; | ||
| import com.google.common.base.Preconditions; | ||
|
|
||
| import io.druid.java.util.common.parsers.CSVParser; | ||
| import io.druid.java.util.common.parsers.Parser; | ||
|
|
||
|
|
@@ -35,26 +34,38 @@ public class CSVParseSpec extends ParseSpec | |
| { | ||
| private final String listDelimiter; | ||
| private final List<String> columns; | ||
| private final boolean hasHeaderRow; | ||
| private final int skipHeaderRows; | ||
|
|
||
| @JsonCreator | ||
| public CSVParseSpec( | ||
| @JsonProperty("timestampSpec") TimestampSpec timestampSpec, | ||
| @JsonProperty("dimensionsSpec") DimensionsSpec dimensionsSpec, | ||
| @JsonProperty("listDelimiter") String listDelimiter, | ||
| @JsonProperty("columns") List<String> columns | ||
| @JsonProperty("columns") List<String> columns, | ||
| @JsonProperty("hasHeaderRow") boolean hasHeaderRow, | ||
| @JsonProperty("skipHeaderRows") int skipHeaderRows | ||
| ) | ||
| { | ||
| super(timestampSpec, dimensionsSpec); | ||
|
|
||
| this.listDelimiter = listDelimiter; | ||
| Preconditions.checkNotNull(columns, "columns"); | ||
| for (String column : columns) { | ||
| Preconditions.checkArgument(!column.contains(","), "Column[%s] has a comma, it cannot", column); | ||
| } | ||
|
|
||
| this.columns = columns; | ||
|
|
||
| verify(dimensionsSpec.getDimensionNames()); | ||
| this.hasHeaderRow = hasHeaderRow; | ||
| this.skipHeaderRows = skipHeaderRows; | ||
|
|
||
| if (columns != null) { | ||
| for (String column : columns) { | ||
| Preconditions.checkArgument(!column.contains(","), "Column[%s] has a comma, it cannot", column); | ||
| } | ||
| verify(dimensionsSpec.getDimensionNames()); | ||
| } else { | ||
| Preconditions.checkArgument( | ||
| hasHeaderRow, | ||
| "If columns field is not set, the first row of your data must have your header" | ||
| + " and hasHeaderRow must be set to true." | ||
| ); | ||
| } | ||
| } | ||
|
|
||
| @JsonProperty | ||
|
|
@@ -69,6 +80,18 @@ public List<String> getColumns() | |
| return columns; | ||
| } | ||
|
|
||
| @JsonProperty | ||
| public boolean isHasHeaderRow() | ||
| { | ||
| return hasHeaderRow; | ||
| } | ||
|
|
||
| @JsonProperty("skipHeaderRows") | ||
| public int getSkipHeaderRows() | ||
| { | ||
| return skipHeaderRows; | ||
| } | ||
|
|
||
| @Override | ||
| public void verify(List<String> usedCols) | ||
| { | ||
|
|
@@ -80,23 +103,23 @@ public void verify(List<String> usedCols) | |
| @Override | ||
| public Parser<String, Object> makeParser() | ||
| { | ||
| return new CSVParser(Optional.fromNullable(listDelimiter), columns); | ||
| return new CSVParser(Optional.fromNullable(listDelimiter), columns, hasHeaderRow, skipHeaderRows); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (Optional comment) Note that
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I found some more methods which receive an Optional as its parameter like
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Opened an issue: #4275 |
||
| } | ||
|
|
||
| @Override | ||
| public ParseSpec withTimestampSpec(TimestampSpec spec) | ||
| { | ||
| return new CSVParseSpec(spec, getDimensionsSpec(), listDelimiter, columns); | ||
| return new CSVParseSpec(spec, getDimensionsSpec(), listDelimiter, columns, hasHeaderRow, skipHeaderRows); | ||
| } | ||
|
|
||
| @Override | ||
| public ParseSpec withDimensionsSpec(DimensionsSpec spec) | ||
| { | ||
| return new CSVParseSpec(getTimestampSpec(), spec, listDelimiter, columns); | ||
| return new CSVParseSpec(getTimestampSpec(), spec, listDelimiter, columns, hasHeaderRow, skipHeaderRows); | ||
| } | ||
|
|
||
| public ParseSpec withColumns(List<String> cols) | ||
| { | ||
| return new CSVParseSpec(getTimestampSpec(), getDimensionsSpec(), listDelimiter, cols); | ||
| return new CSVParseSpec(getTimestampSpec(), getDimensionsSpec(), listDelimiter, cols, hasHeaderRow, skipHeaderRows); | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is it backwards compatible to update to add fields to Json form? Maybe need to add legacy constructor without those fields? Also it is used in tranquility: https://github.com/druid-io/tranquility/blob/63eb64e4cf96e62abdb9e784ce7b07c90f83ebb4/server/src/test/scala/com/metamx/tranquility/server/TranquilityServletTest.scala#L277
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Just double checked this, an old-format JSON spec that doesn't have the new fields deserializes fine (gets hasHeaderRow=false, skipHeaderRows=0).
Looks like this does need a legacy constructor though.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for the check! I raised #4388.