From 9eb5323696bec222580f4c31507de92504efa622 Mon Sep 17 00:00:00 2001 From: Abhishek Balaji Radhakrishnan Date: Mon, 16 Sep 2024 10:37:53 -0400 Subject: [PATCH 01/24] Add support to optionally parse numbers from strings for text-based parsers. This helps samplers to detect numeric types for text-based formats like csv and tsv. These text-based formats by default parse numbers as strings. This change add a config flag to optionally parse numbers as numbers. Long for integers and Double for floating-point numbers. It falls back to string if it cannot parse. The web-console has some code in the load data flow to parse the sample of data returned by the Druid sampler to further inspect types so it can convert them to specific numeric types, if applicable. After this change, the web-console sampler/other applications can just rely on Druid to do it. --- .../DelimitedInputFormatBenchmark.java | 2 +- .../data/input/aliyun/OssInputSourceTest.java | 4 +- .../segment/MapVirtualColumnTestBase.java | 3 +- .../druid/catalog/storage/CatalogTests.java | 4 +- .../GoogleCloudStorageInputSourceTest.java | 4 +- .../inputsource/hdfs/HdfsInputSourceTest.java | 4 +- .../kafkainput/KafkaInputFormatTest.java | 4 +- .../input/kinesis/KinesisInputFormatTest.java | 4 +- .../namespace/UriExtractionNamespace.java | 9 +-- .../namespace/UriExtractionNamespaceTest.java | 21 +++++++ .../msq/exec/MSQParseExceptionsTest.java | 4 +- .../apache/druid/msq/exec/MSQSelectTest.java | 4 +- .../data/input/s3/S3InputSourceTest.java | 6 +- .../indexer/BatchDeltaIngestionTest.java | 3 +- .../DetermineHashedPartitionsJobTest.java | 3 +- .../indexer/DeterminePartitionsJobTest.java | 3 +- .../DetermineRangePartitionsJobTest.java | 3 +- .../druid/indexer/IndexGeneratorJobTest.java | 12 ++-- .../apache/druid/indexer/JobHelperTest.java | 3 +- ...tasourceRecordReaderSegmentReaderTest.java | 3 +- .../indexer/path/DatasourcePathSpecTest.java | 3 +- .../task/CompactionTaskParallelRunTest.java | 4 +- .../common/task/CompactionTaskRunTest.java | 12 ++-- .../indexing/common/task/IndexTaskTest.java | 43 +++++++------- .../common/task/IngestionTestBase.java | 8 +-- ...stractParallelIndexSupervisorTaskTest.java | 7 ++- ...rtitionAdjustingCorePartitionSizeTest.java | 4 +- ...rtitionMultiPhaseParallelIndexingTest.java | 7 ++- .../parallel/HashPartitionTaskKillTest.java | 4 +- ...ultiPhaseParallelIndexingRowStatsTest.java | 3 +- .../ParallelIndexSupervisorTaskSerdeTest.java | 2 +- .../batch/parallel/PartialCompactionTest.java | 4 +- ...rtitionAdjustingCorePartitionSizeTest.java | 4 +- ...rtitionMultiPhaseParallelIndexingTest.java | 7 ++- .../parallel/RangePartitionTaskKillTest.java | 4 +- .../sampler/CsvInputSourceSamplerTest.java | 2 +- .../sampler/InputSourceSamplerTest.java | 5 +- .../RecordSupplierInputSourceTest.java | 4 +- ...SeekableStreamIndexTaskRunnerAuthTest.java | 2 +- .../druid/data/input/impl/CSVParseSpec.java | 20 +++++-- .../druid/data/input/impl/CsvInputFormat.java | 8 ++- .../data/input/impl/DelimitedInputFormat.java | 9 ++- .../data/input/impl/DelimitedParseSpec.java | 36 +++++------ .../data/input/impl/DelimitedValueReader.java | 7 ++- .../data/input/impl/FlatTextInputFormat.java | 13 +++- .../druid/data/input/impl/RegexReader.java | 2 +- .../parsers/AbstractFlatTextFormatParser.java | 5 +- .../java/util/common/parsers/CSVParser.java | 12 ++-- .../util/common/parsers/DelimitedParser.java | 12 ++-- .../java/util/common/parsers/ParserUtils.java | 36 +++++++++-- .../data/input/impl/CSVParseSpecTest.java | 3 +- .../data/input/impl/CsvInputFormatTest.java | 33 +++++++---- .../druid/data/input/impl/CsvReaderTest.java | 16 ++--- .../input/impl/DelimitedInputFormatTest.java | 57 +++++++++++++----- .../input/impl/DelimitedParseSpecTest.java | 9 ++- .../data/input/impl/DelimitedReaderTest.java | 44 +++++++++++--- .../impl/InputEntityIteratingReaderTest.java | 12 ++-- .../druid/data/input/impl/ParseSpecTest.java | 15 +++-- .../parsers/FlatTextFormatParserTest.java | 20 +++---- .../util/common/parsers/ParserUtilsTest.java | 59 ++++++++++++++++++- .../druid/query/MultiValuedDimensionTest.java | 3 +- .../druid/query/NestedDataTestUtils.java | 4 +- .../GroupByQueryRunnerFactoryTest.java | 3 +- .../org/apache/druid/segment/TestIndex.java | 3 +- .../model/table/CsvInputFormatTest.java | 2 +- .../model/table/DelimitedInputFormatTest.java | 2 +- .../model/table/ExternalTableTest.java | 2 +- .../table/InlineInputSourceDefnTest.java | 2 +- .../CalciteCatalogIngestionDmlTest.java | 16 ++--- .../sql/calcite/CalciteIngestionDmlTest.java | 2 +- .../sql/calcite/CalciteInsertDmlTest.java | 14 ++--- .../sql/calcite/IngestTableFunctionTest.java | 6 +- .../input-source-step/input-source-step.tsx | 8 +++ 73 files changed, 486 insertions(+), 241 deletions(-) diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/DelimitedInputFormatBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/DelimitedInputFormatBenchmark.java index cabd58a4b7ea..008dcd9600a0 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/DelimitedInputFormatBenchmark.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/DelimitedInputFormatBenchmark.java @@ -132,7 +132,7 @@ public void prepareData() throws Exception @Setup(Level.Trial) public void prepareFormat() { - format = new DelimitedInputFormat(fromHeader ? null : COLUMNS, null, "\t", null, fromHeader, fromHeader ? 0 : 1); + format = new DelimitedInputFormat(fromHeader ? null : COLUMNS, null, "\t", null, fromHeader, fromHeader ? 0 : 1, null); } @Benchmark diff --git a/extensions-contrib/aliyun-oss-extensions/src/test/java/org/apache/druid/data/input/aliyun/OssInputSourceTest.java b/extensions-contrib/aliyun-oss-extensions/src/test/java/org/apache/druid/data/input/aliyun/OssInputSourceTest.java index 207cddf90b4b..12ad9fc8bf9b 100644 --- a/extensions-contrib/aliyun-oss-extensions/src/test/java/org/apache/druid/data/input/aliyun/OssInputSourceTest.java +++ b/extensions-contrib/aliyun-oss-extensions/src/test/java/org/apache/druid/data/input/aliyun/OssInputSourceTest.java @@ -537,7 +537,7 @@ public void testReader() throws IOException InputSourceReader reader = inputSource.reader( someSchema, - new CsvInputFormat(ImmutableList.of("time", "dim1", "dim2"), "|", false, null, 0), + new CsvInputFormat(ImmutableList.of("time", "dim1", "dim2"), "|", false, null, 0, null), temporaryFolder.newFolder() ); @@ -584,7 +584,7 @@ public void testCompressedReader() throws IOException InputSourceReader reader = inputSource.reader( someSchema, - new CsvInputFormat(ImmutableList.of("time", "dim1", "dim2"), "|", false, null, 0), + new CsvInputFormat(ImmutableList.of("time", "dim1", "dim2"), "|", false, null, 0, null), temporaryFolder.newFolder() ); diff --git a/extensions-contrib/virtual-columns/src/test/java/org/apache/druid/segment/MapVirtualColumnTestBase.java b/extensions-contrib/virtual-columns/src/test/java/org/apache/druid/segment/MapVirtualColumnTestBase.java index 29270c0df734..b7402984ca36 100644 --- a/extensions-contrib/virtual-columns/src/test/java/org/apache/druid/segment/MapVirtualColumnTestBase.java +++ b/extensions-contrib/virtual-columns/src/test/java/org/apache/druid/segment/MapVirtualColumnTestBase.java @@ -53,7 +53,8 @@ static IncrementalIndex generateIndex() throws IOException ",", Arrays.asList("ts", "dim", "keys", "values"), false, - 0 + 0, + null ), "utf8" ); diff --git a/extensions-core/druid-catalog/src/test/java/org/apache/druid/catalog/storage/CatalogTests.java b/extensions-core/druid-catalog/src/test/java/org/apache/druid/catalog/storage/CatalogTests.java index d89d5f7a20ce..ddac6809eb0e 100644 --- a/extensions-core/druid-catalog/src/test/java/org/apache/druid/catalog/storage/CatalogTests.java +++ b/extensions-core/druid-catalog/src/test/java/org/apache/druid/catalog/storage/CatalogTests.java @@ -81,8 +81,8 @@ public static InputFormat csvFormat() null, // listDelimiter false, // hasHeaderRow false, // findColumnsFromHeader - 0 // skipHeaderRows - ); + 0, // skipHeaderRows + null); } public static final ObjectMapper JSON_MAPPER = new DefaultObjectMapper(); diff --git a/extensions-core/google-extensions/src/test/java/org/apache/druid/data/input/google/GoogleCloudStorageInputSourceTest.java b/extensions-core/google-extensions/src/test/java/org/apache/druid/data/input/google/GoogleCloudStorageInputSourceTest.java index ae968aa3d6fa..77f0b9436fb5 100644 --- a/extensions-core/google-extensions/src/test/java/org/apache/druid/data/input/google/GoogleCloudStorageInputSourceTest.java +++ b/extensions-core/google-extensions/src/test/java/org/apache/druid/data/input/google/GoogleCloudStorageInputSourceTest.java @@ -406,7 +406,7 @@ public void testReader() throws IOException InputSourceReader reader = inputSource.reader( someSchema, - new CsvInputFormat(ImmutableList.of("time", "dim1", "dim2"), "|", false, null, 0), + new CsvInputFormat(ImmutableList.of("time", "dim1", "dim2"), "|", false, null, 0, null), null ); @@ -453,7 +453,7 @@ public void testCompressedReader() throws IOException InputSourceReader reader = inputSource.reader( someSchema, - new CsvInputFormat(ImmutableList.of("time", "dim1", "dim2"), "|", false, null, 0), + new CsvInputFormat(ImmutableList.of("time", "dim1", "dim2"), "|", false, null, 0, null), null ); diff --git a/extensions-core/hdfs-storage/src/test/java/org/apache/druid/inputsource/hdfs/HdfsInputSourceTest.java b/extensions-core/hdfs-storage/src/test/java/org/apache/druid/inputsource/hdfs/HdfsInputSourceTest.java index 10cff01c2b9f..ada3d6ecf00f 100644 --- a/extensions-core/hdfs-storage/src/test/java/org/apache/druid/inputsource/hdfs/HdfsInputSourceTest.java +++ b/extensions-core/hdfs-storage/src/test/java/org/apache/druid/inputsource/hdfs/HdfsInputSourceTest.java @@ -90,8 +90,8 @@ public class HdfsInputSourceTest extends InitializedNullHandlingTest null, false, null, - 0 - ); + 0, + null); public static class ConstructorTest { diff --git a/extensions-core/kafka-indexing-service/src/test/java/org/apache/druid/data/input/kafkainput/KafkaInputFormatTest.java b/extensions-core/kafka-indexing-service/src/test/java/org/apache/druid/data/input/kafkainput/KafkaInputFormatTest.java index adbb7c4b6779..5de194b439ca 100644 --- a/extensions-core/kafka-indexing-service/src/test/java/org/apache/druid/data/input/kafkainput/KafkaInputFormatTest.java +++ b/extensions-core/kafka-indexing-service/src/test/java/org/apache/druid/data/input/kafkainput/KafkaInputFormatTest.java @@ -712,8 +712,8 @@ public void testValueInCsvFormat() throws IOException null, false, false, - 0 - ), + 0, + null), "kafka.newheader.", "kafka.newkey.key", "kafka.newts.timestamp", diff --git a/extensions-core/kinesis-indexing-service/src/test/java/org/apache/druid/data/input/kinesis/KinesisInputFormatTest.java b/extensions-core/kinesis-indexing-service/src/test/java/org/apache/druid/data/input/kinesis/KinesisInputFormatTest.java index 130f31681dee..4659ae5f2d57 100644 --- a/extensions-core/kinesis-indexing-service/src/test/java/org/apache/druid/data/input/kinesis/KinesisInputFormatTest.java +++ b/extensions-core/kinesis-indexing-service/src/test/java/org/apache/druid/data/input/kinesis/KinesisInputFormatTest.java @@ -713,8 +713,8 @@ public void testValueInCsvFormat() throws IOException null, false, false, - 0 - ), + 0, + null), "kinesis.newts.partitionKey", "kinesis.newts.timestamp" ); diff --git a/extensions-core/lookups-cached-global/src/main/java/org/apache/druid/query/lookup/namespace/UriExtractionNamespace.java b/extensions-core/lookups-cached-global/src/main/java/org/apache/druid/query/lookup/namespace/UriExtractionNamespace.java index e5ad9cd658e7..e1c103c0ca23 100644 --- a/extensions-core/lookups-cached-global/src/main/java/org/apache/druid/query/lookup/namespace/UriExtractionNamespace.java +++ b/extensions-core/lookups-cached-global/src/main/java/org/apache/druid/query/lookup/namespace/UriExtractionNamespace.java @@ -327,7 +327,7 @@ public CSVFlatDataParser( this.valueColumn, Arrays.toString(columns.toArray()) ); - CSVParser csvParser = new CSVParser(null, columns, hasHeaderRow, skipHeaderRows); + CSVParser csvParser = new CSVParser(null, columns, hasHeaderRow, skipHeaderRows, false); csvParser.startFileFromBeginning(); this.parser = new DelegateParser( csvParser, @@ -355,13 +355,13 @@ public List getColumns() @JsonProperty public String getKeyColumn() { - return this.keyColumn; + return keyColumn; } @JsonProperty public String getValueColumn() { - return this.valueColumn; + return valueColumn; } @Override @@ -431,7 +431,8 @@ public TSVFlatDataParser( StringUtils.emptyToNullNonDruidDataString(delimiter), StringUtils.emptyToNullNonDruidDataString(listDelimiter), hasHeaderRow, - skipHeaderRows + skipHeaderRows, + false ); delegate.startFileFromBeginning(); Preconditions.checkArgument( diff --git a/extensions-core/lookups-cached-global/src/test/java/org/apache/druid/query/lookup/namespace/UriExtractionNamespaceTest.java b/extensions-core/lookups-cached-global/src/test/java/org/apache/druid/query/lookup/namespace/UriExtractionNamespaceTest.java index dc50126f0661..cd92dcc747b7 100644 --- a/extensions-core/lookups-cached-global/src/test/java/org/apache/druid/query/lookup/namespace/UriExtractionNamespaceTest.java +++ b/extensions-core/lookups-cached-global/src/test/java/org/apache/druid/query/lookup/namespace/UriExtractionNamespaceTest.java @@ -115,6 +115,27 @@ public void testCSVWithHeader() // The third row will parse to data Assert.assertEquals(ImmutableMap.of("val2", "val3"), parser.getParser().parseToMap("val1,val2,val3")); } + + @Test + public void testCSVWithHeaderWithNumbers() + { + UriExtractionNamespace.CSVFlatDataParser parser = new UriExtractionNamespace.CSVFlatDataParser( + ImmutableList.of("col1", "col2", "col3"), + "col2", + "col3", + true, + 1 + ); + // parser return empty list as the 1 row header need to be skipped. + Assert.assertEquals(ImmutableMap.of(), parser.getParser().parseToMap("row to skip ")); + //Header also need to be skipped. + Assert.assertEquals(ImmutableMap.of(), parser.getParser().parseToMap("col1,col2,col3")); + // test the header is parsed + Assert.assertEquals(ImmutableList.of("col1", "col2", "col3"), parser.getParser().getFieldNames()); + // The third row will parse to data + Assert.assertEquals(ImmutableMap.of("val2", "12"), parser.getParser().parseToMap("val1,val2,12")); + } + @Test(expected = IllegalArgumentException.class) public void testBadCSV() { diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQParseExceptionsTest.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQParseExceptionsTest.java index 879da23977e7..8577dcd18d4e 100644 --- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQParseExceptionsTest.java +++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQParseExceptionsTest.java @@ -157,7 +157,7 @@ public void testIngestWithSanitizedNullByte() throws IOException .dataSource( new ExternalDataSource( new LocalInputSource(null, null, ImmutableList.of(toRead), SystemFields.none()), - new CsvInputFormat(null, null, null, true, 0), + new CsvInputFormat(null, null, null, true, 0, null), RowSignature.builder() .add("timestamp", ColumnType.STRING) .add("agent_category", ColumnType.STRING) @@ -255,7 +255,7 @@ public void testIngestWithSanitizedNullByteUsingContextParameter() throws IOExce .dataSource( new ExternalDataSource( new LocalInputSource(null, null, ImmutableList.of(toRead), SystemFields.none()), - new CsvInputFormat(null, null, null, true, 0), + new CsvInputFormat(null, null, null, true, 0, null), RowSignature.builder() .add("timestamp", ColumnType.STRING) .add("agent_category", ColumnType.STRING) diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQSelectTest.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQSelectTest.java index 45ea0b2357ce..a5822f3a0b7f 100644 --- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQSelectTest.java +++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQSelectTest.java @@ -1779,7 +1779,7 @@ public void testGroupByWithLimitAndOrdering(String contextName, Map readerIterator = reader.read()) { @@ -1111,7 +1111,7 @@ public void testCompressedReader() throws IOException InputSourceReader reader = inputSource.reader( someSchema, - new CsvInputFormat(ImmutableList.of("time", "dim1", "dim2"), "|", false, null, 0), + new CsvInputFormat(ImmutableList.of("time", "dim1", "dim2"), "|", false, null, 0, null), temporaryFolder.newFolder() ); diff --git a/indexing-hadoop/src/test/java/org/apache/druid/indexer/BatchDeltaIngestionTest.java b/indexing-hadoop/src/test/java/org/apache/druid/indexer/BatchDeltaIngestionTest.java index 1e4f62ca6e1f..99e0725de3f8 100644 --- a/indexing-hadoop/src/test/java/org/apache/druid/indexer/BatchDeltaIngestionTest.java +++ b/indexing-hadoop/src/test/java/org/apache/druid/indexer/BatchDeltaIngestionTest.java @@ -443,7 +443,8 @@ private HadoopDruidIndexerConfig makeHadoopDruidIndexerConfig( null, ImmutableList.of("timestamp", "host", "host2", "visited_num"), false, - 0 + 0, + null ), null ), diff --git a/indexing-hadoop/src/test/java/org/apache/druid/indexer/DetermineHashedPartitionsJobTest.java b/indexing-hadoop/src/test/java/org/apache/druid/indexer/DetermineHashedPartitionsJobTest.java index 24a8ee0ef7eb..a738da7edff8 100644 --- a/indexing-hadoop/src/test/java/org/apache/druid/indexer/DetermineHashedPartitionsJobTest.java +++ b/indexing-hadoop/src/test/java/org/apache/druid/indexer/DetermineHashedPartitionsJobTest.java @@ -183,7 +183,8 @@ public DetermineHashedPartitionsJobTest( "index" ), false, - 0 + 0, + null ), null ), diff --git a/indexing-hadoop/src/test/java/org/apache/druid/indexer/DeterminePartitionsJobTest.java b/indexing-hadoop/src/test/java/org/apache/druid/indexer/DeterminePartitionsJobTest.java index a3c98f29565b..c0126f93d4b5 100644 --- a/indexing-hadoop/src/test/java/org/apache/druid/indexer/DeterminePartitionsJobTest.java +++ b/indexing-hadoop/src/test/java/org/apache/druid/indexer/DeterminePartitionsJobTest.java @@ -292,7 +292,8 @@ public DeterminePartitionsJobTest( null, ImmutableList.of("timestamp", "host", "country", "visited_num"), false, - 0 + 0, + false ), null ), diff --git a/indexing-hadoop/src/test/java/org/apache/druid/indexer/DetermineRangePartitionsJobTest.java b/indexing-hadoop/src/test/java/org/apache/druid/indexer/DetermineRangePartitionsJobTest.java index e79d066ab55c..b7e87d38b370 100644 --- a/indexing-hadoop/src/test/java/org/apache/druid/indexer/DetermineRangePartitionsJobTest.java +++ b/indexing-hadoop/src/test/java/org/apache/druid/indexer/DetermineRangePartitionsJobTest.java @@ -349,7 +349,8 @@ public DetermineRangePartitionsJobTest( null, ImmutableList.of("timestamp", "host", "country", "visited_num"), false, - 0 + 0, + null ), null ), diff --git a/indexing-hadoop/src/test/java/org/apache/druid/indexer/IndexGeneratorJobTest.java b/indexing-hadoop/src/test/java/org/apache/druid/indexer/IndexGeneratorJobTest.java index e14ade454f4c..409b101f326e 100644 --- a/indexing-hadoop/src/test/java/org/apache/druid/indexer/IndexGeneratorJobTest.java +++ b/indexing-hadoop/src/test/java/org/apache/druid/indexer/IndexGeneratorJobTest.java @@ -149,7 +149,8 @@ public static Collection constructFeed() null, ImmutableList.of("timestamp", "host", "visited_num"), false, - 0 + 0, + null ), null ), @@ -198,7 +199,8 @@ public static Collection constructFeed() null, ImmutableList.of("timestamp", "host", "visited_num"), false, - 0 + 0, + null ) ), null, @@ -246,7 +248,8 @@ public static Collection constructFeed() null, ImmutableList.of("timestamp", "host", "visited_num"), false, - 0 + 0, + null ), null ), @@ -305,7 +308,8 @@ public static Collection constructFeed() null, ImmutableList.of("timestamp", "host", "visited_num"), false, - 0 + 0, + null ) ), null, diff --git a/indexing-hadoop/src/test/java/org/apache/druid/indexer/JobHelperTest.java b/indexing-hadoop/src/test/java/org/apache/druid/indexer/JobHelperTest.java index 7069e9a78de3..10beee54fe91 100644 --- a/indexing-hadoop/src/test/java/org/apache/druid/indexer/JobHelperTest.java +++ b/indexing-hadoop/src/test/java/org/apache/druid/indexer/JobHelperTest.java @@ -133,7 +133,8 @@ public void setup() throws Exception null, ImmutableList.of("timestamp", "host", "visited_num"), false, - 0 + 0, + null ), null ), diff --git a/indexing-hadoop/src/test/java/org/apache/druid/indexer/hadoop/DatasourceRecordReaderSegmentReaderTest.java b/indexing-hadoop/src/test/java/org/apache/druid/indexer/hadoop/DatasourceRecordReaderSegmentReaderTest.java index e7f1402606ff..8db30a7f3997 100644 --- a/indexing-hadoop/src/test/java/org/apache/druid/indexer/hadoop/DatasourceRecordReaderSegmentReaderTest.java +++ b/indexing-hadoop/src/test/java/org/apache/druid/indexer/hadoop/DatasourceRecordReaderSegmentReaderTest.java @@ -216,7 +216,8 @@ private void createTestIndex(File segmentDir) throws Exception null, ImmutableList.of("timestamp", "host", "visited", "x", "y", "spatial"), false, - 0 + 0, + null ), StandardCharsets.UTF_8.toString() ); diff --git a/indexing-hadoop/src/test/java/org/apache/druid/indexer/path/DatasourcePathSpecTest.java b/indexing-hadoop/src/test/java/org/apache/druid/indexer/path/DatasourcePathSpecTest.java index e8caea0256e0..2caaa2f22b30 100644 --- a/indexing-hadoop/src/test/java/org/apache/druid/indexer/path/DatasourcePathSpecTest.java +++ b/indexing-hadoop/src/test/java/org/apache/druid/indexer/path/DatasourcePathSpecTest.java @@ -318,7 +318,8 @@ private HadoopDruidIndexerConfig makeHadoopDruidIndexerConfig() null, ImmutableList.of("timestamp", "host", "visited"), false, - 0 + 0, + null ), null ), diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskParallelRunTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskParallelRunTest.java index 1b742971eb95..06493e67b2a0 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskParallelRunTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskParallelRunTest.java @@ -928,8 +928,8 @@ private void runIndexTask(@Nullable PartitionsSpec partitionsSpec, boolean appen "|", null, false, - 0 - ), + 0, + null), appendToExisting, null ); diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskRunTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskRunTest.java index cdc7390eb2c3..1c8065939cd4 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskRunTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskRunTest.java @@ -155,7 +155,8 @@ public class CompactionTaskRunTest extends IngestionTestBase "|", Arrays.asList("ts", "dim", "val"), false, - 0 + 0, + null ); private static final List TEST_ROWS = ImmutableList.of( @@ -1638,7 +1639,8 @@ public void testRunWithSpatialDimensions() throws Exception "|", Arrays.asList("ts", "dim", "x", "y", "val"), false, - 0 + 0, + null ); Pair indexTaskResult = runIndexTask(null, null, spatialSpec, spatialrows, false); verifySchema(indexTaskResult.rhs); @@ -1763,7 +1765,8 @@ public void testRunWithAutoCastDimensions() throws Exception "|", Arrays.asList("ts", "dim", "x", "y", "val"), false, - 0 + 0, + null ); Pair indexTaskResult = runIndexTask(null, null, spec, rows, false); verifySchema(indexTaskResult.rhs); @@ -1896,7 +1899,8 @@ public void testRunWithAutoCastDimensionsSortByDimension() throws Exception "|", Arrays.asList("ts", "dim", "x", "y", "val"), false, - 0 + 0, + null ); Pair indexTaskResult = runIndexTask(null, null, spec, rows, false); verifySchema(indexTaskResult.rhs); diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IndexTaskTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IndexTaskTest.java index d03ccf465e57..b1826ef6eae3 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IndexTaskTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IndexTaskTest.java @@ -156,15 +156,16 @@ public class IndexTaskTest extends IngestionTestBase null, Arrays.asList("ts", "dim", "val"), false, - 0 + 0, + null ); private static final InputFormat DEFAULT_INPUT_FORMAT = new CsvInputFormat( Arrays.asList("ts", "dim", "val"), null, null, false, - 0 - ); + 0, + null); @Parameterized.Parameters(name = "{0}, useInputFormatApi={1}") public static Iterable constructorFeeder() @@ -505,7 +506,7 @@ public void testTransformSpec() throws Exception indexIngestionSpec = createIngestionSpec( DEFAULT_TIMESTAMP_SPEC, dimensionsSpec, - new CsvInputFormat(columns, listDelimiter, null, false, 0), + new CsvInputFormat(columns, listDelimiter, null, false, 0, null), transformSpec, null, tuningConfig, @@ -516,7 +517,7 @@ public void testTransformSpec() throws Exception indexIngestionSpec = createIngestionSpec( jsonMapper, tmpDir, - new CSVParseSpec(DEFAULT_TIMESTAMP_SPEC, dimensionsSpec, listDelimiter, columns, false, 0), + new CSVParseSpec(DEFAULT_TIMESTAMP_SPEC, dimensionsSpec, listDelimiter, columns, false, 0, null), transformSpec, null, tuningConfig, @@ -922,7 +923,7 @@ public void testCSVFileWithHeader() throws Exception ingestionSpec = createIngestionSpec( jsonMapper, tmpDir, - new CSVParseSpec(timestampSpec, DimensionsSpec.EMPTY, null, null, true, 0), + new CSVParseSpec(timestampSpec, DimensionsSpec.EMPTY, null, null, true, 0, null), null, null, tuningConfig, @@ -933,7 +934,7 @@ public void testCSVFileWithHeader() throws Exception ingestionSpec = createIngestionSpec( timestampSpec, DimensionsSpec.EMPTY, - new CsvInputFormat(null, null, null, true, 0), + new CsvInputFormat(null, null, null, true, 0, null), null, null, tuningConfig, @@ -973,7 +974,7 @@ public void testCSVFileWithHeaderColumnOverride() throws Exception ingestionSpec = createIngestionSpec( timestampSpec, DimensionsSpec.EMPTY, - new CsvInputFormat(columns, null, null, true, 0), + new CsvInputFormat(columns, null, null, true, 0, null), null, null, tuningConfig, @@ -984,7 +985,7 @@ public void testCSVFileWithHeaderColumnOverride() throws Exception ingestionSpec = createIngestionSpec( jsonMapper, tmpDir, - new CSVParseSpec(timestampSpec, DimensionsSpec.EMPTY, null, columns, true, 0), + new CSVParseSpec(timestampSpec, DimensionsSpec.EMPTY, null, columns, true, 0, null), null, null, tuningConfig, @@ -1373,7 +1374,7 @@ public void testIgnoreParseException() throws Exception parseExceptionIgnoreSpec = createIngestionSpec( timestampSpec, DimensionsSpec.EMPTY, - new CsvInputFormat(columns, null, null, true, 0), + new CsvInputFormat(columns, null, null, true, 0, null), null, null, tuningConfig, @@ -1384,7 +1385,7 @@ public void testIgnoreParseException() throws Exception parseExceptionIgnoreSpec = createIngestionSpec( jsonMapper, tmpDir, - new CSVParseSpec(timestampSpec, DimensionsSpec.EMPTY, null, columns, true, 0), + new CSVParseSpec(timestampSpec, DimensionsSpec.EMPTY, null, columns, true, 0, null), null, null, tuningConfig, @@ -1423,7 +1424,7 @@ public void testReportParseException() throws Exception indexIngestionSpec = createIngestionSpec( timestampSpec, DimensionsSpec.EMPTY, - new CsvInputFormat(columns, null, null, true, 0), + new CsvInputFormat(columns, null, null, true, 0, null), null, null, tuningConfig, @@ -1434,7 +1435,7 @@ public void testReportParseException() throws Exception indexIngestionSpec = createIngestionSpec( jsonMapper, tmpDir, - new CSVParseSpec(timestampSpec, DimensionsSpec.EMPTY, null, columns, true, 0), + new CSVParseSpec(timestampSpec, DimensionsSpec.EMPTY, null, columns, true, 0, null), null, null, tuningConfig, @@ -1664,7 +1665,7 @@ public void testMultipleParseExceptionsFailure() throws Exception ingestionSpec = createIngestionSpec( timestampSpec, dimensionsSpec, - new CsvInputFormat(columns, null, null, true, 0), + new CsvInputFormat(columns, null, null, true, 0, null), null, null, tuningConfig, @@ -1675,7 +1676,7 @@ public void testMultipleParseExceptionsFailure() throws Exception ingestionSpec = createIngestionSpec( jsonMapper, tmpDir, - new CSVParseSpec(timestampSpec, dimensionsSpec, null, columns, true, 0), + new CSVParseSpec(timestampSpec, dimensionsSpec, null, columns, true, 0, null), null, null, tuningConfig, @@ -1783,7 +1784,7 @@ public void testMultipleParseExceptionsFailureAtDeterminePartitions() throws Exc ingestionSpec = createIngestionSpec( timestampSpec, dimensionsSpec, - new CsvInputFormat(columns, null, null, true, 0), + new CsvInputFormat(columns, null, null, true, 0, null), null, null, tuningConfig, @@ -1794,7 +1795,7 @@ public void testMultipleParseExceptionsFailureAtDeterminePartitions() throws Exc ingestionSpec = createIngestionSpec( jsonMapper, tmpDir, - new CSVParseSpec(timestampSpec, dimensionsSpec, null, columns, true, 0), + new CSVParseSpec(timestampSpec, dimensionsSpec, null, columns, true, 0, null), null, null, tuningConfig, @@ -1877,7 +1878,7 @@ public void testCsvWithHeaderOfEmptyColumns() throws Exception ingestionSpec = createIngestionSpec( DEFAULT_TIMESTAMP_SPEC, DimensionsSpec.EMPTY, - new CsvInputFormat(null, null, null, true, 0), + new CsvInputFormat(null, null, null, true, 0, null), null, null, tuningConfig, @@ -1888,7 +1889,7 @@ public void testCsvWithHeaderOfEmptyColumns() throws Exception ingestionSpec = createIngestionSpec( jsonMapper, tmpDir, - new CSVParseSpec(DEFAULT_TIMESTAMP_SPEC, DimensionsSpec.EMPTY, null, null, true, 0), + new CSVParseSpec(DEFAULT_TIMESTAMP_SPEC, DimensionsSpec.EMPTY, null, null, true, 0, null), null, null, tuningConfig, @@ -1947,7 +1948,7 @@ public void testCsvWithHeaderOfEmptyTimestamp() throws Exception ingestionSpec = createIngestionSpec( DEFAULT_TIMESTAMP_SPEC, DimensionsSpec.EMPTY, - new CsvInputFormat(columns, null, null, true, 0), + new CsvInputFormat(columns, null, null, true, 0, null), null, null, tuningConfig, @@ -1958,7 +1959,7 @@ public void testCsvWithHeaderOfEmptyTimestamp() throws Exception ingestionSpec = createIngestionSpec( jsonMapper, tmpDir, - new CSVParseSpec(DEFAULT_TIMESTAMP_SPEC, DimensionsSpec.EMPTY, null, columns, true, 0), + new CSVParseSpec(DEFAULT_TIMESTAMP_SPEC, DimensionsSpec.EMPTY, null, columns, true, 0, null), null, null, tuningConfig, diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IngestionTestBase.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IngestionTestBase.java index 67a0c518f57c..d02024a097de 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IngestionTestBase.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IngestionTestBase.java @@ -313,8 +313,8 @@ public static InputFormat createInputFormatFromParseSpec(ParseSpec parseSpec) csvParseSpec.getListDelimiter(), getColumnsFromHeader ? null : true, getColumnsFromHeader ? true : null, - csvParseSpec.getSkipHeaderRows() - ); + csvParseSpec.getSkipHeaderRows(), + null); } else if (parseSpec instanceof DelimitedParseSpec) { DelimitedParseSpec delimitedParseSpec = (DelimitedParseSpec) parseSpec; boolean getColumnsFromHeader = delimitedParseSpec.isHasHeaderRow() && delimitedParseSpec.getSkipHeaderRows() == 0; @@ -324,8 +324,8 @@ public static InputFormat createInputFormatFromParseSpec(ParseSpec parseSpec) delimitedParseSpec.getDelimiter(), getColumnsFromHeader ? null : true, getColumnsFromHeader ? true : null, - delimitedParseSpec.getSkipHeaderRows() - ); + delimitedParseSpec.getSkipHeaderRows(), + null); } else if (parseSpec instanceof RegexParseSpec) { RegexParseSpec regexParseSpec = (RegexParseSpec) parseSpec; return new RegexInputFormat( diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/AbstractParallelIndexSupervisorTaskTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/AbstractParallelIndexSupervisorTaskTest.java index 1cd6a29e10d2..9d4ad944f44b 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/AbstractParallelIndexSupervisorTaskTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/AbstractParallelIndexSupervisorTaskTest.java @@ -156,15 +156,16 @@ public class AbstractParallelIndexSupervisorTaskTest extends IngestionTestBase null, Arrays.asList("ts", "dim", "val"), false, - 0 + 0, + null ); static final InputFormat DEFAULT_INPUT_FORMAT = new CsvInputFormat( Arrays.asList("ts", "dim", "val"), null, false, false, - 0 - ); + 0, + null); public static final ParallelIndexTuningConfig DEFAULT_TUNING_CONFIG_FOR_PARALLEL_INDEXING = TuningConfigBuilder.forParallelIndexTask() .withMaxNumConcurrentSubTasks(2) diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/HashPartitionAdjustingCorePartitionSizeTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/HashPartitionAdjustingCorePartitionSizeTest.java index 0839ed044057..d669b3be034b 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/HashPartitionAdjustingCorePartitionSizeTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/HashPartitionAdjustingCorePartitionSizeTest.java @@ -61,8 +61,8 @@ public class HashPartitionAdjustingCorePartitionSizeTest extends AbstractMultiPh null, false, false, - 0 - ); + 0, + null); private static final Interval INTERVAL_TO_INDEX = Intervals.of("2020-01-01/P1M"); @Parameterized.Parameters(name = "{0}, maxNumConcurrentSubTasks={1}") diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/HashPartitionMultiPhaseParallelIndexingTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/HashPartitionMultiPhaseParallelIndexingTest.java index cb58d6f79dfc..d269d07acbdd 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/HashPartitionMultiPhaseParallelIndexingTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/HashPartitionMultiPhaseParallelIndexingTest.java @@ -77,15 +77,16 @@ public class HashPartitionMultiPhaseParallelIndexingTest extends AbstractMultiPh null, Arrays.asList("ts", "dim1", "dim2", "val"), false, - 0 + 0, + null ); private static final InputFormat INPUT_FORMAT = new CsvInputFormat( Arrays.asList("ts", "dim1", "dim2", "val"), null, false, false, - 0 - ); + 0, + null); private static final Interval INTERVAL_TO_INDEX = Intervals.of("2017-12/P1M"); private static final String INPUT_FILTER = "test_*"; diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/HashPartitionTaskKillTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/HashPartitionTaskKillTest.java index b8c59d042a31..040e3a247896 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/HashPartitionTaskKillTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/HashPartitionTaskKillTest.java @@ -80,8 +80,8 @@ public class HashPartitionTaskKillTest extends AbstractMultiPhaseParallelIndexin null, false, false, - 0 - ); + 0, + null); private static final Interval INTERVAL_TO_INDEX = Intervals.of("2017-12/P1M"); private File inputDir; diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/MultiPhaseParallelIndexingRowStatsTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/MultiPhaseParallelIndexingRowStatsTest.java index 9e910a668157..26f1788cc59c 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/MultiPhaseParallelIndexingRowStatsTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/MultiPhaseParallelIndexingRowStatsTest.java @@ -61,7 +61,8 @@ public class MultiPhaseParallelIndexingRowStatsTest extends AbstractMultiPhasePa null, Arrays.asList("ts", "dim1", "dim2", "val"), false, - 0 + 0, + null ); private static final Interval INTERVAL_TO_INDEX = Intervals.of("2017-12/P1M"); diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTaskSerdeTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTaskSerdeTest.java index e11fb2ef001c..e025ce14d0a4 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTaskSerdeTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTaskSerdeTest.java @@ -183,7 +183,7 @@ private static class ParallelIndexIngestionSpecBuilder private final ParallelIndexIOConfig ioConfig = new ParallelIndexIOConfig( new LocalInputSource(new File("tmp"), "test_*"), - new CsvInputFormat(Arrays.asList("ts", "dim", "val"), null, null, false, 0), + new CsvInputFormat(Arrays.asList("ts", "dim", "val"), null, null, false, 0, null), false, null ); diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/PartialCompactionTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/PartialCompactionTest.java index 9962dfcd821c..f33a21c9a0a7 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/PartialCompactionTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/PartialCompactionTest.java @@ -64,8 +64,8 @@ public class PartialCompactionTest extends AbstractMultiPhaseParallelIndexingTes null, false, false, - 0 - ); + 0, + null); private static final Interval INTERVAL_TO_INDEX = Intervals.of("2017-12/P1M"); private File inputDir; diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/RangePartitionAdjustingCorePartitionSizeTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/RangePartitionAdjustingCorePartitionSizeTest.java index 65b58fb6fd50..715f63b10559 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/RangePartitionAdjustingCorePartitionSizeTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/RangePartitionAdjustingCorePartitionSizeTest.java @@ -61,8 +61,8 @@ public class RangePartitionAdjustingCorePartitionSizeTest extends AbstractMultiP null, false, false, - 0 - ); + 0, + null); private static final Interval INTERVAL_TO_INDEX = Intervals.of("2020-01-01/P1M"); @Parameterized.Parameters(name = "{0}, maxNumConcurrentSubTasks={1}") diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/RangePartitionMultiPhaseParallelIndexingTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/RangePartitionMultiPhaseParallelIndexingTest.java index 9c04ce6c6ee4..221b36416465 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/RangePartitionMultiPhaseParallelIndexingTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/RangePartitionMultiPhaseParallelIndexingTest.java @@ -103,15 +103,16 @@ public class RangePartitionMultiPhaseParallelIndexingTest extends AbstractMultiP LIST_DELIMITER, Arrays.asList(TIME, DIM1, DIM2, "val"), false, - 0 + 0, + null ); private static final InputFormat INPUT_FORMAT = new CsvInputFormat( Arrays.asList(TIME, DIM1, DIM2, "val"), LIST_DELIMITER, false, false, - 0 - ); + 0, + null); @Parameterized.Parameters(name = "{0}, useInputFormatApi={1}, maxNumConcurrentSubTasks={2}, useMultiValueDim={3}, intervalToIndex={4}") public static Iterable constructorFeeder() diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/RangePartitionTaskKillTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/RangePartitionTaskKillTest.java index 96494b8ac794..35b33a77f5bf 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/RangePartitionTaskKillTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/RangePartitionTaskKillTest.java @@ -85,8 +85,8 @@ public class RangePartitionTaskKillTest extends AbstractMultiPhaseParallelIndexi LIST_DELIMITER, false, false, - 0 - ); + 0, + null); private File inputDir; diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/sampler/CsvInputSourceSamplerTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/sampler/CsvInputSourceSamplerTest.java index e788545507cd..115f27957995 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/sampler/CsvInputSourceSamplerTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/sampler/CsvInputSourceSamplerTest.java @@ -64,7 +64,7 @@ public void testCSVColumnAllNull() "Michael,Jackson,,Male" ); final InputSource inputSource = new InlineInputSource(String.join("\n", strCsvRows)); - final InputFormat inputFormat = new CsvInputFormat(null, null, null, true, 0); + final InputFormat inputFormat = new CsvInputFormat(null, null, null, true, 0, null); final InputSourceSampler inputSourceSampler = new InputSourceSampler(new DefaultObjectMapper()); final SamplerResponse response = inputSourceSampler.sample( diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/sampler/InputSourceSamplerTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/sampler/InputSourceSamplerTest.java index 087b12cef40e..b743b79291fe 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/sampler/InputSourceSamplerTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/sampler/InputSourceSamplerTest.java @@ -1460,7 +1460,7 @@ private InputFormat createInputFormat() case STR_JSON: return new JsonInputFormat(null, null, null, null, null); case STR_CSV: - return new CsvInputFormat(ImmutableList.of("t", "dim1", "dim2", "met1"), null, null, false, 0); + return new CsvInputFormat(ImmutableList.of("t", "dim1", "dim2", "met1"), null, null, false, 0, null); default: throw new IAE("Unknown parser type: %s", parserType); } @@ -1480,7 +1480,8 @@ private InputRowParser createInputRowParser(TimestampSpec timestampSpec, Dimensi null, ImmutableList.of("t", "dim1", "dim2", "met1"), false, - 0 + 0, + null ) ); default: diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/seekablestream/RecordSupplierInputSourceTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/seekablestream/RecordSupplierInputSourceTest.java index 75a9bd0831ba..b1176d991252 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/seekablestream/RecordSupplierInputSourceTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/seekablestream/RecordSupplierInputSourceTest.java @@ -80,7 +80,7 @@ public void testRead() throws IOException final List colNames = IntStream.range(0, NUM_COLS) .mapToObj(i -> StringUtils.format("col_%d", i)) .collect(Collectors.toList()); - final InputFormat inputFormat = new CsvInputFormat(colNames, null, null, false, 0); + final InputFormat inputFormat = new CsvInputFormat(colNames, null, null, false, 0, null); final InputSourceReader reader = inputSource.reader( new InputRowSchema( new TimestampSpec("col_0", "auto", null), @@ -114,7 +114,7 @@ public void testReadTimeout() throws IOException final List colNames = IntStream.range(0, NUM_COLS) .mapToObj(i -> StringUtils.format("col_%d", i)) .collect(Collectors.toList()); - final InputFormat inputFormat = new CsvInputFormat(colNames, null, null, false, 0); + final InputFormat inputFormat = new CsvInputFormat(colNames, null, null, false, 0, null); final InputSourceReader reader = inputSource.reader( new InputRowSchema( new TimestampSpec("col_0", "auto", null), diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/seekablestream/SeekableStreamIndexTaskRunnerAuthTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/seekablestream/SeekableStreamIndexTaskRunnerAuthTest.java index 0f280059e0a5..c4df93fcd8b2 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/seekablestream/SeekableStreamIndexTaskRunnerAuthTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/seekablestream/SeekableStreamIndexTaskRunnerAuthTest.java @@ -385,7 +385,7 @@ public TestSeekableStreamIndexTaskIOConfig() false, DateTimes.nowUtc().minusDays(2), DateTimes.nowUtc(), - new CsvInputFormat(null, null, true, null, 0) + new CsvInputFormat(null, null, true, null, 0, null) ); } } diff --git a/processing/src/main/java/org/apache/druid/data/input/impl/CSVParseSpec.java b/processing/src/main/java/org/apache/druid/data/input/impl/CSVParseSpec.java index 81c8a26ecd3a..e324f8dc2987 100644 --- a/processing/src/main/java/org/apache/druid/data/input/impl/CSVParseSpec.java +++ b/processing/src/main/java/org/apache/druid/data/input/impl/CSVParseSpec.java @@ -27,6 +27,8 @@ import java.util.List; +import javax.annotation.Nullable; + /** */ public class CSVParseSpec extends ParseSpec @@ -35,6 +37,7 @@ public class CSVParseSpec extends ParseSpec private final List columns; private final boolean hasHeaderRow; private final int skipHeaderRows; + private final boolean shouldParseNumbers; @JsonCreator public CSVParseSpec( @@ -43,7 +46,8 @@ public CSVParseSpec( @JsonProperty("listDelimiter") String listDelimiter, @JsonProperty("columns") List columns, @JsonProperty("hasHeaderRow") boolean hasHeaderRow, - @JsonProperty("skipHeaderRows") int skipHeaderRows + @JsonProperty("skipHeaderRows") int skipHeaderRows, + @JsonProperty("shouldParseNumbers") @Nullable Boolean shouldParseNumbers ) { super(timestampSpec, dimensionsSpec); @@ -52,6 +56,7 @@ public CSVParseSpec( this.columns = columns; this.hasHeaderRow = hasHeaderRow; this.skipHeaderRows = skipHeaderRows; + this.shouldParseNumbers = shouldParseNumbers == null ? false : shouldParseNumbers; if (columns != null) { for (String column : columns) { @@ -90,21 +95,28 @@ public int getSkipHeaderRows() return skipHeaderRows; } + @JsonProperty("shouldParseNumbers") + public boolean shouldParseNumbers() + { + return shouldParseNumbers; + } + + @Override public Parser makeParser() { - return new CSVParser(listDelimiter, columns, hasHeaderRow, skipHeaderRows); + return new CSVParser(listDelimiter, columns, hasHeaderRow, skipHeaderRows, shouldParseNumbers); } @Override public ParseSpec withTimestampSpec(TimestampSpec spec) { - return new CSVParseSpec(spec, getDimensionsSpec(), listDelimiter, columns, hasHeaderRow, skipHeaderRows); + return new CSVParseSpec(spec, getDimensionsSpec(), listDelimiter, columns, hasHeaderRow, skipHeaderRows, shouldParseNumbers); } @Override public ParseSpec withDimensionsSpec(DimensionsSpec spec) { - return new CSVParseSpec(getTimestampSpec(), spec, listDelimiter, columns, hasHeaderRow, skipHeaderRows); + return new CSVParseSpec(getTimestampSpec(), spec, listDelimiter, columns, hasHeaderRow, skipHeaderRows, shouldParseNumbers); } } diff --git a/processing/src/main/java/org/apache/druid/data/input/impl/CsvInputFormat.java b/processing/src/main/java/org/apache/druid/data/input/impl/CsvInputFormat.java index a041e031a3b1..f1dcdb96c61f 100644 --- a/processing/src/main/java/org/apache/druid/data/input/impl/CsvInputFormat.java +++ b/processing/src/main/java/org/apache/druid/data/input/impl/CsvInputFormat.java @@ -47,10 +47,11 @@ public CsvInputFormat( @JsonProperty("listDelimiter") @Nullable String listDelimiter, @Deprecated @JsonProperty("hasHeaderRow") @Nullable Boolean hasHeaderRow, @JsonProperty("findColumnsFromHeader") @Nullable Boolean findColumnsFromHeader, - @JsonProperty("skipHeaderRows") int skipHeaderRows + @JsonProperty("skipHeaderRows") int skipHeaderRows, + @JsonProperty("shouldParseNumbers") @Nullable Boolean shouldParseNumbers ) { - super(columns, listDelimiter, String.valueOf(SEPARATOR), hasHeaderRow, findColumnsFromHeader, skipHeaderRows); + super(columns, listDelimiter, String.valueOf(SEPARATOR), hasHeaderRow, findColumnsFromHeader, skipHeaderRows, shouldParseNumbers); } @Override @@ -80,7 +81,8 @@ public InputEntityReader createReader(InputRowSchema inputRowSchema, InputEntity isFindColumnsFromHeader(), getSkipHeaderRows(), line -> Arrays.asList(parser.parseLine(StringUtils.fromUtf8(line))), - useListBasedInputRows() + useListBasedInputRows(), + true ); } diff --git a/processing/src/main/java/org/apache/druid/data/input/impl/DelimitedInputFormat.java b/processing/src/main/java/org/apache/druid/data/input/impl/DelimitedInputFormat.java index 238a2f4dc2ca..8192de2984db 100644 --- a/processing/src/main/java/org/apache/druid/data/input/impl/DelimitedInputFormat.java +++ b/processing/src/main/java/org/apache/druid/data/input/impl/DelimitedInputFormat.java @@ -52,7 +52,8 @@ public DelimitedInputFormat( @JsonProperty("delimiter") @Nullable String delimiter, @Deprecated @JsonProperty("hasHeaderRow") @Nullable Boolean hasHeaderRow, @JsonProperty("findColumnsFromHeader") @Nullable Boolean findColumnsFromHeader, - @JsonProperty("skipHeaderRows") int skipHeaderRows + @JsonProperty("skipHeaderRows") int skipHeaderRows, + @JsonProperty("shouldParseNumbers") @Nullable Boolean shouldParseNumbers ) { super( @@ -61,7 +62,8 @@ public DelimitedInputFormat( delimiter == null ? DEFAULT_DELIMITER : delimiter, hasHeaderRow, findColumnsFromHeader, - skipHeaderRows + skipHeaderRows, + shouldParseNumbers ); } @@ -85,7 +87,8 @@ public InputEntityReader createReader(InputRowSchema inputRowSchema, InputEntity getDelimiter(), useListBasedInputRows() ? getColumns().size() : DelimitedBytes.UNKNOWN_FIELD_COUNT ), - useListBasedInputRows() + useListBasedInputRows(), + shouldParseNumbers() ); } diff --git a/processing/src/main/java/org/apache/druid/data/input/impl/DelimitedParseSpec.java b/processing/src/main/java/org/apache/druid/data/input/impl/DelimitedParseSpec.java index 5940e70e11fd..52d9ddba6516 100644 --- a/processing/src/main/java/org/apache/druid/data/input/impl/DelimitedParseSpec.java +++ b/processing/src/main/java/org/apache/druid/data/input/impl/DelimitedParseSpec.java @@ -27,6 +27,8 @@ import java.util.List; +import javax.annotation.Nullable; + /** */ public class DelimitedParseSpec extends ParseSpec @@ -36,6 +38,7 @@ public class DelimitedParseSpec extends ParseSpec private final List columns; private final boolean hasHeaderRow; private final int skipHeaderRows; + private final boolean shouldParseNumbers; @JsonCreator public DelimitedParseSpec( @@ -45,8 +48,9 @@ public DelimitedParseSpec( @JsonProperty("listDelimiter") String listDelimiter, @JsonProperty("columns") List columns, @JsonProperty("hasHeaderRow") boolean hasHeaderRow, - @JsonProperty("skipHeaderRows") int skipHeaderRows - ) + @JsonProperty("skipHeaderRows") int skipHeaderRows, + @JsonProperty("skipHeaderRows") @Nullable Boolean shouldParseNumbers + ) { super(timestampSpec, dimensionsSpec); @@ -55,6 +59,7 @@ public DelimitedParseSpec( this.columns = columns; this.hasHeaderRow = hasHeaderRow; this.skipHeaderRows = skipHeaderRows; + this.shouldParseNumbers = shouldParseNumbers == null ? false : shouldParseNumbers; if (columns != null) { for (String column : this.columns) { @@ -69,18 +74,6 @@ public DelimitedParseSpec( } } - @Deprecated - public DelimitedParseSpec( - TimestampSpec timestampSpec, - DimensionsSpec dimensionsSpec, - String delimiter, - String listDelimiter, - List columns - ) - { - this(timestampSpec, dimensionsSpec, delimiter, listDelimiter, columns, false, 0); - } - @JsonProperty("delimiter") public String getDelimiter() { @@ -111,6 +104,12 @@ public int getSkipHeaderRows() return skipHeaderRows; } + @JsonProperty("shouldParseNumbers") + public boolean shouldParseNumbers() + { + return shouldParseNumbers; + } + @Override public Parser makeParser() { @@ -119,7 +118,8 @@ public Parser makeParser() listDelimiter, columns, hasHeaderRow, - skipHeaderRows + skipHeaderRows, + shouldParseNumbers ); } @@ -133,7 +133,8 @@ public ParseSpec withTimestampSpec(TimestampSpec spec) listDelimiter, columns, hasHeaderRow, - skipHeaderRows + skipHeaderRows, + shouldParseNumbers ); } @@ -147,7 +148,8 @@ public ParseSpec withDimensionsSpec(DimensionsSpec spec) listDelimiter, columns, hasHeaderRow, - skipHeaderRows + skipHeaderRows, + shouldParseNumbers ); } diff --git a/processing/src/main/java/org/apache/druid/data/input/impl/DelimitedValueReader.java b/processing/src/main/java/org/apache/druid/data/input/impl/DelimitedValueReader.java index 227edffbcb01..b4784a3dc107 100644 --- a/processing/src/main/java/org/apache/druid/data/input/impl/DelimitedValueReader.java +++ b/processing/src/main/java/org/apache/druid/data/input/impl/DelimitedValueReader.java @@ -68,6 +68,7 @@ public class DelimitedValueReader extends TextReader.Bytes @Nullable private List inputRowDimensions; private final boolean useListBasedInputRows; + private final boolean shouldParseNumbers; interface DelimitedValueParser { @@ -82,14 +83,16 @@ interface DelimitedValueParser boolean findColumnsFromHeader, int skipHeaderRows, DelimitedValueParser parser, - boolean useListBasedInputRows + boolean useListBasedInputRows, + boolean shouldParseNumbers ) { super(inputRowSchema, source); this.findColumnsFromHeader = findColumnsFromHeader; this.skipHeaderRows = skipHeaderRows; final String finalListDelimeter = listDelimiter == null ? Parsers.DEFAULT_LIST_DELIMITER : listDelimiter; - this.multiValueFunction = ParserUtils.getMultiValueFunction(finalListDelimeter, Splitter.on(finalListDelimeter)); + this.shouldParseNumbers = shouldParseNumbers; + this.multiValueFunction = ParserUtils.getMultiValueFunction(finalListDelimeter, Splitter.on(finalListDelimeter), shouldParseNumbers); if (!findColumnsFromHeader && columns != null) { // If findColumnsFromHeader, inputRowSignature will be set later. diff --git a/processing/src/main/java/org/apache/druid/data/input/impl/FlatTextInputFormat.java b/processing/src/main/java/org/apache/druid/data/input/impl/FlatTextInputFormat.java index 8e8e052cd000..6e92344dbd90 100644 --- a/processing/src/main/java/org/apache/druid/data/input/impl/FlatTextInputFormat.java +++ b/processing/src/main/java/org/apache/druid/data/input/impl/FlatTextInputFormat.java @@ -46,6 +46,7 @@ public abstract class FlatTextInputFormat implements InputFormat private final String delimiter; private final boolean findColumnsFromHeader; private final int skipHeaderRows; + private final boolean shouldParseNumbers; FlatTextInputFormat( @Nullable List columns, @@ -53,7 +54,8 @@ public abstract class FlatTextInputFormat implements InputFormat String delimiter, @Nullable Boolean hasHeaderRow, @Nullable Boolean findColumnsFromHeader, - int skipHeaderRows + int skipHeaderRows, + @Nullable Boolean shouldParseNumbers ) { this.columns = columns == null ? Collections.emptyList() : columns; @@ -79,6 +81,8 @@ public abstract class FlatTextInputFormat implements InputFormat "Cannot have same delimiter and list delimiter of [%s]", delimiter ); + this.shouldParseNumbers = shouldParseNumbers == null ? false : shouldParseNumbers; + if (!this.columns.isEmpty()) { for (String column : this.columns) { Preconditions.checkArgument( @@ -131,6 +135,13 @@ public int getSkipHeaderRows() return skipHeaderRows; } + @JsonProperty + @JsonInclude(JsonInclude.Include.NON_DEFAULT) + public boolean shouldParseNumbers() + { + return shouldParseNumbers; + } + @Override public long getWeightedSize(String path, long size) { diff --git a/processing/src/main/java/org/apache/druid/data/input/impl/RegexReader.java b/processing/src/main/java/org/apache/druid/data/input/impl/RegexReader.java index aabf4fe1603a..cd08e39aa21f 100644 --- a/processing/src/main/java/org/apache/druid/data/input/impl/RegexReader.java +++ b/processing/src/main/java/org/apache/druid/data/input/impl/RegexReader.java @@ -60,7 +60,7 @@ public class RegexReader extends TextReader.Strings this.pattern = pattern; this.compiledPattern = compiledPattern; final String finalListDelimeter = listDelimiter == null ? Parsers.DEFAULT_LIST_DELIMITER : listDelimiter; - this.multiValueFunction = ParserUtils.getMultiValueFunction(finalListDelimeter, Splitter.on(finalListDelimeter)); + this.multiValueFunction = ParserUtils.getMultiValueFunction(finalListDelimeter, Splitter.on(finalListDelimeter), false); this.columns = columns; } diff --git a/processing/src/main/java/org/apache/druid/java/util/common/parsers/AbstractFlatTextFormatParser.java b/processing/src/main/java/org/apache/druid/java/util/common/parsers/AbstractFlatTextFormatParser.java index 1826b73857a9..993af425f35a 100644 --- a/processing/src/main/java/org/apache/druid/java/util/common/parsers/AbstractFlatTextFormatParser.java +++ b/processing/src/main/java/org/apache/druid/java/util/common/parsers/AbstractFlatTextFormatParser.java @@ -64,11 +64,12 @@ public String getDefaultDelimiter() public AbstractFlatTextFormatParser( @Nullable final String listDelimiter, final boolean hasHeaderRow, - final int maxSkipHeaderRows + final int maxSkipHeaderRows, + final boolean shouldParseNumbers ) { this.listDelimiter = listDelimiter != null ? listDelimiter : Parsers.DEFAULT_LIST_DELIMITER; - this.valueFunction = ParserUtils.getMultiValueFunction(this.listDelimiter, Splitter.on(this.listDelimiter)); + this.valueFunction = ParserUtils.getMultiValueFunction(this.listDelimiter, Splitter.on(this.listDelimiter), shouldParseNumbers); this.hasHeaderRow = hasHeaderRow; this.maxSkipHeaderRows = maxSkipHeaderRows; diff --git a/processing/src/main/java/org/apache/druid/java/util/common/parsers/CSVParser.java b/processing/src/main/java/org/apache/druid/java/util/common/parsers/CSVParser.java index 49f53dacfc78..9195b72fb9b2 100644 --- a/processing/src/main/java/org/apache/druid/java/util/common/parsers/CSVParser.java +++ b/processing/src/main/java/org/apache/druid/java/util/common/parsers/CSVParser.java @@ -35,20 +35,22 @@ public class CSVParser extends AbstractFlatTextFormatParser public CSVParser( @Nullable final String listDelimiter, final boolean hasHeaderRow, - final int maxSkipHeaderRows + final int maxSkipHeaderRows, + final boolean shouldParseNumbers ) { - super(listDelimiter, hasHeaderRow, maxSkipHeaderRows); + super(listDelimiter, hasHeaderRow, maxSkipHeaderRows, shouldParseNumbers); } public CSVParser( @Nullable final String listDelimiter, final Iterable fieldNames, final boolean hasHeaderRow, - final int maxSkipHeaderRows + final int maxSkipHeaderRows, + final boolean shouldParseNumbers ) { - this(listDelimiter, hasHeaderRow, maxSkipHeaderRows); + this(listDelimiter, hasHeaderRow, maxSkipHeaderRows, shouldParseNumbers); setFieldNames(fieldNames); } @@ -56,7 +58,7 @@ public CSVParser( @VisibleForTesting CSVParser(@Nullable final String listDelimiter, final String header) { - this(listDelimiter, false, 0); + this(listDelimiter, false, 0, false); setFieldNames(header); } diff --git a/processing/src/main/java/org/apache/druid/java/util/common/parsers/DelimitedParser.java b/processing/src/main/java/org/apache/druid/java/util/common/parsers/DelimitedParser.java index df75cc3f7343..0f9d3b4748e1 100644 --- a/processing/src/main/java/org/apache/druid/java/util/common/parsers/DelimitedParser.java +++ b/processing/src/main/java/org/apache/druid/java/util/common/parsers/DelimitedParser.java @@ -35,10 +35,11 @@ public DelimitedParser( @Nullable final String delimiter, @Nullable final String listDelimiter, final boolean hasHeaderRow, - final int maxSkipHeaderRows + final int maxSkipHeaderRows, + final boolean shouldParseNumbers ) { - super(listDelimiter, hasHeaderRow, maxSkipHeaderRows); + super(listDelimiter, hasHeaderRow, maxSkipHeaderRows, shouldParseNumbers); final String finalDelimiter = delimiter != null ? delimiter : FlatTextFormat.DELIMITED.getDefaultDelimiter(); Preconditions.checkState( @@ -55,10 +56,11 @@ public DelimitedParser( @Nullable final String listDelimiter, final Iterable fieldNames, final boolean hasHeaderRow, - final int maxSkipHeaderRows + final int maxSkipHeaderRows, + final boolean shouldParseNumbers ) { - this(delimiter, listDelimiter, hasHeaderRow, maxSkipHeaderRows); + this(delimiter, listDelimiter, hasHeaderRow, maxSkipHeaderRows, shouldParseNumbers); setFieldNames(fieldNames); } @@ -66,7 +68,7 @@ public DelimitedParser( @VisibleForTesting DelimitedParser(@Nullable final String delimiter, @Nullable final String listDelimiter, final String header) { - this(delimiter, listDelimiter, false, 0); + this(delimiter, listDelimiter, false, 0, false); setFieldNames(header); } diff --git a/processing/src/main/java/org/apache/druid/java/util/common/parsers/ParserUtils.java b/processing/src/main/java/org/apache/druid/java/util/common/parsers/ParserUtils.java index 045fec91d86e..775155fdc2ed 100644 --- a/processing/src/main/java/org/apache/druid/java/util/common/parsers/ParserUtils.java +++ b/processing/src/main/java/org/apache/druid/java/util/common/parsers/ParserUtils.java @@ -22,6 +22,7 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Function; import com.google.common.base.Splitter; +import org.apache.commons.lang.math.NumberUtils; import org.apache.druid.common.config.NullHandling; import org.joda.time.DateTimeZone; @@ -54,20 +55,45 @@ public class ParserUtils public static Function getMultiValueFunction( final String listDelimiter, - final Splitter listSplitter + final Splitter listSplitter, + final boolean shouldParseNumbers ) { return (input) -> { - if (input != null && input.contains(listDelimiter)) { - return StreamSupport.stream(listSplitter.split(input).spliterator(), false) - .map(NullHandling::emptyToNullIfNeeded) - .collect(Collectors.toList()); + if (input != null) { + if (input.contains(listDelimiter)) { + return StreamSupport.stream(listSplitter.split(input).spliterator(), false) + .map(NullHandling::emptyToNullIfNeeded) + .map(value -> shouldParseNumbers ? ParserUtils.tryParseStringAsNumber(value) : value) + .collect(Collectors.toList()); + } else { + return tryParseStringAsNumber(input); + } } else { return NullHandling.emptyToNullIfNeeded(input); } }; } + @Nullable + public static Object tryParseStringAsNumber(@Nullable final String input) + { + if (!NumberUtils.isNumber(input)) { + return input; + } + + try { + // see if it's a long, if not try parsing as a double. + return Long.parseLong(input); + } catch (NumberFormatException e1) { + try { + return Double.parseDouble(input); + } catch (NumberFormatException e2) { + return input; + } + } + } + public static ArrayList generateFieldNames(int length) { final ArrayList names = new ArrayList<>(length); diff --git a/processing/src/test/java/org/apache/druid/data/input/impl/CSVParseSpecTest.java b/processing/src/test/java/org/apache/druid/data/input/impl/CSVParseSpecTest.java index c8299085c0ea..6cd350250c08 100644 --- a/processing/src/test/java/org/apache/druid/data/input/impl/CSVParseSpecTest.java +++ b/processing/src/test/java/org/apache/druid/data/input/impl/CSVParseSpecTest.java @@ -40,7 +40,8 @@ public void testComma() ",", Collections.singletonList("a,"), false, - 0 + 0, + null ); } } diff --git a/processing/src/test/java/org/apache/druid/data/input/impl/CsvInputFormatTest.java b/processing/src/test/java/org/apache/druid/data/input/impl/CsvInputFormatTest.java index 7768e09d9dc3..7510928626d9 100644 --- a/processing/src/test/java/org/apache/druid/data/input/impl/CsvInputFormatTest.java +++ b/processing/src/test/java/org/apache/druid/data/input/impl/CsvInputFormatTest.java @@ -44,7 +44,7 @@ public class CsvInputFormatTest extends InitializedNullHandlingTest public void testSerde() throws IOException { final ObjectMapper mapper = new ObjectMapper(); - final CsvInputFormat format = new CsvInputFormat(Collections.singletonList("a"), "|", null, true, 10); + final CsvInputFormat format = new CsvInputFormat(Collections.singletonList("a"), "|", null, true, 10, null); final byte[] bytes = mapper.writeValueAsBytes(format); final CsvInputFormat fromJson = (CsvInputFormat) mapper.readValue(bytes, InputFormat.class); Assert.assertEquals(format, fromJson); @@ -127,12 +127,23 @@ public void testDeserializeWithoutAnyProperties() ); } + @Test + public void testDeserializeWithShouldParseNumbers() throws IOException + { + final ObjectMapper mapper = new ObjectMapper(); + final CsvInputFormat inputFormat = (CsvInputFormat) mapper.readValue( + "{\"type\":\"csv\",\"hasHeaderRow\":true,\"shouldParseNumbers\":true}", + InputFormat.class + ); + Assert.assertTrue(inputFormat.shouldParseNumbers()); + } + @Test public void testComma() { expectedException.expect(IllegalArgumentException.class); expectedException.expectMessage("Column[a,] cannot have the delimiter[,] in its name"); - new CsvInputFormat(Collections.singletonList("a,"), "|", null, false, 0); + new CsvInputFormat(Collections.singletonList("a,"), "|", null, false, 0, null); } @Test @@ -140,20 +151,20 @@ public void testDelimiter() { expectedException.expect(IllegalArgumentException.class); expectedException.expectMessage("Cannot have same delimiter and list delimiter of [,]"); - new CsvInputFormat(Collections.singletonList("a\t"), ",", null, false, 0); + new CsvInputFormat(Collections.singletonList("a\t"), ",", null, false, 0, null); } @Test public void testFindColumnsFromHeaderWithColumnsReturningItsValue() { - final CsvInputFormat format = new CsvInputFormat(Collections.singletonList("a"), null, null, true, 0); + final CsvInputFormat format = new CsvInputFormat(Collections.singletonList("a"), null, null, true, 0, null); Assert.assertTrue(format.isFindColumnsFromHeader()); } @Test public void testFindColumnsFromHeaderWithMissingColumnsReturningItsValue() { - final CsvInputFormat format = new CsvInputFormat(null, null, null, true, 0); + final CsvInputFormat format = new CsvInputFormat(null, null, null, true, 0, null); Assert.assertTrue(format.isFindColumnsFromHeader()); } @@ -162,13 +173,13 @@ public void testMissingFindColumnsFromHeaderWithMissingColumnsThrowingError() { expectedException.expect(IllegalArgumentException.class); expectedException.expectMessage("Either [columns] or [findColumnsFromHeader] must be set"); - new CsvInputFormat(null, null, null, null, 0); + new CsvInputFormat(null, null, null, null, 0, null); } @Test public void testMissingFindColumnsFromHeaderWithColumnsReturningFalse() { - final CsvInputFormat format = new CsvInputFormat(Collections.singletonList("a"), null, null, null, 0); + final CsvInputFormat format = new CsvInputFormat(Collections.singletonList("a"), null, null, null, 0, null); Assert.assertFalse(format.isFindColumnsFromHeader()); } @@ -177,20 +188,20 @@ public void testHasHeaderRowWithMissingFindColumnsThrowingError() { expectedException.expect(IllegalArgumentException.class); expectedException.expectMessage("Cannot accept both [findColumnsFromHeader] and [hasHeaderRow]"); - new CsvInputFormat(null, null, true, false, 0); + new CsvInputFormat(null, null, true, false, 0, null); } @Test public void testHasHeaderRowWithMissingColumnsReturningItsValue() { - final CsvInputFormat format = new CsvInputFormat(null, null, true, null, 0); + final CsvInputFormat format = new CsvInputFormat(null, null, true, null, 0, null); Assert.assertTrue(format.isFindColumnsFromHeader()); } @Test public void test_getWeightedSize_withoutCompression() { - final CsvInputFormat format = new CsvInputFormat(null, null, true, null, 0); + final CsvInputFormat format = new CsvInputFormat(null, null, true, null, 0, null); final long unweightedSize = 100L; Assert.assertEquals(unweightedSize, format.getWeightedSize("file.csv", unweightedSize)); } @@ -198,7 +209,7 @@ public void test_getWeightedSize_withoutCompression() @Test public void test_getWeightedSize_withGzCompression() { - final CsvInputFormat format = new CsvInputFormat(null, null, true, null, 0); + final CsvInputFormat format = new CsvInputFormat(null, null, true, null, 0, null); final long unweightedSize = 100L; Assert.assertEquals( unweightedSize * CompressionUtils.COMPRESSED_TEXT_WEIGHT_FACTOR, diff --git a/processing/src/test/java/org/apache/druid/data/input/impl/CsvReaderTest.java b/processing/src/test/java/org/apache/druid/data/input/impl/CsvReaderTest.java index ae8c8709a1d3..198947239838 100644 --- a/processing/src/test/java/org/apache/druid/data/input/impl/CsvReaderTest.java +++ b/processing/src/test/java/org/apache/druid/data/input/impl/CsvReaderTest.java @@ -66,7 +66,7 @@ public void testWithoutHeaders() throws IOException "2019-01-01T00:00:30Z,name_3,15" ) ); - final CsvInputFormat format = new CsvInputFormat(ImmutableList.of("ts", "name", "score"), null, null, false, 0); + final CsvInputFormat format = new CsvInputFormat(ImmutableList.of("ts", "name", "score"), null, null, false, 0, null); assertResult(source, format); } @@ -81,7 +81,7 @@ public void testFindColumn() throws IOException "2019-01-01T00:00:30Z,name_3,15" ) ); - final CsvInputFormat format = new CsvInputFormat(ImmutableList.of(), null, null, true, 0); + final CsvInputFormat format = new CsvInputFormat(ImmutableList.of(), null, null, true, 0, null); assertResult(source, format); } @@ -96,7 +96,7 @@ public void testSkipHeaders() throws IOException "2019-01-01T00:00:30Z,name_3,15" ) ); - final CsvInputFormat format = new CsvInputFormat(ImmutableList.of("ts", "name", "score"), null, null, false, 1); + final CsvInputFormat format = new CsvInputFormat(ImmutableList.of("ts", "name", "score"), null, null, false, 1, null); assertResult(source, format); } @@ -112,7 +112,7 @@ public void testFindColumnAndSkipHeaders() throws IOException "2019-01-01T00:00:30Z,name_3,15" ) ); - final CsvInputFormat format = new CsvInputFormat(ImmutableList.of(), null, null, true, 1); + final CsvInputFormat format = new CsvInputFormat(ImmutableList.of(), null, null, true, 1, null); assertResult(source, format); } @@ -127,7 +127,7 @@ public void testMultiValues() throws IOException "2019-01-01T00:00:30Z,name_3,15|3" ) ); - final CsvInputFormat format = new CsvInputFormat(ImmutableList.of(), "|", null, true, 0); + final CsvInputFormat format = new CsvInputFormat(ImmutableList.of(), "|", null, true, 0, null); final InputEntityReader reader = format.createReader(INPUT_ROW_SCHEMA, source, null); int numResults = 0; try (CloseableIterator iterator = reader.read()) { @@ -227,8 +227,8 @@ public void testQuotes() throws IOException null, null, false, - 0 - ); + 0, + null); final InputEntityReader reader = format.createReader( new InputRowSchema( new TimestampSpec("Timestamp", "auto", null), @@ -256,7 +256,7 @@ public void testRussianTextMess() throws IOException "2019-01-01T00:00:10Z,name_1,\"Как говорится: \\\"\"всё течет, всё изменяется\\\"\". Украина как всегда обвиняет Россию в собственных проблемах. #ПровокацияКиева\"" ) ); - final CsvInputFormat format = new CsvInputFormat(ImmutableList.of("ts", "name", "Comment"), null, null, false, 0); + final CsvInputFormat format = new CsvInputFormat(ImmutableList.of("ts", "name", "Comment"), null, null, false, 0, null); final InputEntityReader reader = format.createReader(INPUT_ROW_SCHEMA, source, null); try (CloseableIterator iterator = reader.read()) { Assert.assertTrue(iterator.hasNext()); diff --git a/processing/src/test/java/org/apache/druid/data/input/impl/DelimitedInputFormatTest.java b/processing/src/test/java/org/apache/druid/data/input/impl/DelimitedInputFormatTest.java index 9cf6db93c3f2..36f039bf943f 100644 --- a/processing/src/test/java/org/apache/druid/data/input/impl/DelimitedInputFormatTest.java +++ b/processing/src/test/java/org/apache/druid/data/input/impl/DelimitedInputFormatTest.java @@ -45,7 +45,8 @@ public void testSerde() throws IOException "delim", null, true, - 10 + 10, + null ); final byte[] bytes = mapper.writeValueAsBytes(format); final DelimitedInputFormat fromJson = (DelimitedInputFormat) mapper.readValue(bytes, InputFormat.class); @@ -57,7 +58,7 @@ public void testTab() { expectedException.expect(IllegalArgumentException.class); expectedException.expectMessage("Column[a\t] cannot have the delimiter[\t] in its name"); - new DelimitedInputFormat(Collections.singletonList("a\t"), ",", null, null, false, 0); + new DelimitedInputFormat(Collections.singletonList("a\t"), ",", null, null, false, 0, null); } @Test @@ -65,7 +66,7 @@ public void testDelimiterAndListDelimiter() { expectedException.expect(IllegalArgumentException.class); expectedException.expectMessage("Cannot have same delimiter and list delimiter of [,]"); - new DelimitedInputFormat(Collections.singletonList("a\t"), ",", ",", null, false, 0); + new DelimitedInputFormat(Collections.singletonList("a\t"), ",", ",", null, false, 0, null); } @Test @@ -73,7 +74,7 @@ public void testCustomizeSeparator() { expectedException.expect(IllegalArgumentException.class); expectedException.expectMessage("Column[a|] cannot have the delimiter[|] in its name"); - new DelimitedInputFormat(Collections.singletonList("a|"), ",", "|", null, false, 0); + new DelimitedInputFormat(Collections.singletonList("a|"), ",", "|", null, false, 0, null); } @Test @@ -85,8 +86,8 @@ public void testFindColumnsFromHeaderWithColumnsReturningItsValue() "delim", null, true, - 0 - ); + 0, + null); Assert.assertTrue(format.isFindColumnsFromHeader()); } @@ -99,17 +100,43 @@ public void testFindColumnsFromHeaderWithMissingColumnsReturningItsValue() "delim", null, true, - 0 - ); + 0, + null); Assert.assertTrue(format.isFindColumnsFromHeader()); } + @Test + public void testShouldParseNumbers() + { + final DelimitedInputFormat format = new DelimitedInputFormat( + null, + null, + "delim", + null, + true, + 0, + true + ); + Assert.assertTrue(format.shouldParseNumbers()); + } + + @Test + public void testDeserializeWithShouldParseNumbers() throws IOException + { + final ObjectMapper mapper = new ObjectMapper(); + final DelimitedInputFormat inputFormat = (DelimitedInputFormat) mapper.readValue( + "{\"type\":\"tsv\",\"hasHeaderRow\":true,\"shouldParseNumbers\":true}", + InputFormat.class + ); + Assert.assertTrue(inputFormat.shouldParseNumbers()); + } + @Test public void testMissingFindColumnsFromHeaderWithMissingColumnsThrowingError() { expectedException.expect(IllegalArgumentException.class); expectedException.expectMessage("Either [columns] or [findColumnsFromHeader] must be set"); - new DelimitedInputFormat(null, null, "delim", null, null, 0); + new DelimitedInputFormat(null, null, "delim", null, null, 0, null); } @Test @@ -121,8 +148,8 @@ public void testMissingFindColumnsFromHeaderWithColumnsReturningFalse() "delim", null, null, - 0 - ); + 0, + null); Assert.assertFalse(format.isFindColumnsFromHeader()); } @@ -131,19 +158,19 @@ public void testHasHeaderRowWithMissingFindColumnsThrowingError() { expectedException.expect(IllegalArgumentException.class); expectedException.expectMessage("Cannot accept both [findColumnsFromHeader] and [hasHeaderRow]"); - new DelimitedInputFormat(null, null, "delim", true, false, 0); + new DelimitedInputFormat(null, null, "delim", true, false, 0, null); } @Test public void testHasHeaderRowWithMissingColumnsReturningItsValue() { - final DelimitedInputFormat format = new DelimitedInputFormat(null, null, "delim", true, null, 0); + final DelimitedInputFormat format = new DelimitedInputFormat(null, null, "delim", true, null, 0, null); Assert.assertTrue(format.isFindColumnsFromHeader()); } @Test public void test_getWeightedSize_withoutCompression() { - final DelimitedInputFormat format = new DelimitedInputFormat(null, null, "delim", true, null, 0); + final DelimitedInputFormat format = new DelimitedInputFormat(null, null, "delim", true, null, 0, null); final long unweightedSize = 100L; Assert.assertEquals(unweightedSize, format.getWeightedSize("file.tsv", unweightedSize)); } @@ -151,7 +178,7 @@ public void test_getWeightedSize_withoutCompression() @Test public void test_getWeightedSize_withGzCompression() { - final DelimitedInputFormat format = new DelimitedInputFormat(null, null, "delim", true, null, 0); + final DelimitedInputFormat format = new DelimitedInputFormat(null, null, "delim", true, null, 0, null); final long unweightedSize = 100L; Assert.assertEquals( unweightedSize * CompressionUtils.COMPRESSED_TEXT_WEIGHT_FACTOR, diff --git a/processing/src/test/java/org/apache/druid/data/input/impl/DelimitedParseSpecTest.java b/processing/src/test/java/org/apache/druid/data/input/impl/DelimitedParseSpecTest.java index 0d3b48640351..cfc9d75641fc 100644 --- a/processing/src/test/java/org/apache/druid/data/input/impl/DelimitedParseSpecTest.java +++ b/processing/src/test/java/org/apache/druid/data/input/impl/DelimitedParseSpecTest.java @@ -42,7 +42,8 @@ public void testSerde() throws IOException "\u0002", Collections.singletonList("abc"), false, - 0 + 0, + null ); final DelimitedParseSpec serde = jsonMapper.readValue( jsonMapper.writeValueAsString(spec), @@ -72,7 +73,8 @@ public void testComma() null, Collections.singletonList("a,"), false, - 0 + 0, + null ); } @@ -91,7 +93,8 @@ public void testDefaultColumnList() null, null, false, - 0 + 0, + null ); } } diff --git a/processing/src/test/java/org/apache/druid/data/input/impl/DelimitedReaderTest.java b/processing/src/test/java/org/apache/druid/data/input/impl/DelimitedReaderTest.java index c98d8fff6a85..58c05f43be7c 100644 --- a/processing/src/test/java/org/apache/druid/data/input/impl/DelimitedReaderTest.java +++ b/processing/src/test/java/org/apache/druid/data/input/impl/DelimitedReaderTest.java @@ -69,9 +69,32 @@ public void testWithoutHeaders() throws IOException null, null, false, - 0 + 0, + null); + assertResult(source, format); + // use this test! + } + + @Test + public void testWithoutHeadersNumberizeStrings() throws IOException + { + final ByteEntity source = writeData( + ImmutableList.of( + "2019-01-01T00:00:10Z\tname_1\t5", + "2019-01-01T00:00:20Z\tname_2\t10", + "2019-01-01T00:00:30Z\tname_3\t15" + ) ); + final DelimitedInputFormat format = new DelimitedInputFormat( + ImmutableList.of("ts", "name", "score"), + null, + null, + null, + false, + 0, + false); assertResult(source, format); + // use this test! } @Test @@ -85,7 +108,7 @@ public void testFindColumn() throws IOException "2019-01-01T00:00:30Z\tname_3\t15" ) ); - final DelimitedInputFormat format = new DelimitedInputFormat(ImmutableList.of(), null, null, null, true, 0); + final DelimitedInputFormat format = new DelimitedInputFormat(ImmutableList.of(), null, null, null, true, 0, null); assertResult(source, format); } @@ -106,8 +129,8 @@ public void testSkipHeaders() throws IOException null, null, false, - 1 - ); + 1, + null); assertResult(source, format); } @@ -123,7 +146,7 @@ public void testFindColumnAndSkipHeaders() throws IOException "2019-01-01T00:00:30Z\tname_3\t15" ) ); - final DelimitedInputFormat format = new DelimitedInputFormat(ImmutableList.of(), null, null, null, true, 1); + final DelimitedInputFormat format = new DelimitedInputFormat(ImmutableList.of(), null, null, null, true, 1, null); assertResult(source, format); } @@ -138,7 +161,7 @@ public void testMultiValues() throws IOException "2019-01-01T00:00:30Z\tname_3\t15|3" ) ); - final DelimitedInputFormat format = new DelimitedInputFormat(ImmutableList.of(), "|", null, null, true, 0); + final DelimitedInputFormat format = new DelimitedInputFormat(ImmutableList.of(), "|", null, null, true, 0, null); final InputEntityReader reader = format.createReader(INPUT_ROW_SCHEMA, source, null); int numResults = 0; try (CloseableIterator iterator = reader.read()) { @@ -173,7 +196,7 @@ public void testCustomizeSeparator() throws IOException "2019-01-01T00:00:30Z|name_3|15\t3" ) ); - final DelimitedInputFormat format = new DelimitedInputFormat(ImmutableList.of(), "\t", "|", null, true, 0); + final DelimitedInputFormat format = new DelimitedInputFormat(ImmutableList.of(), "\t", "|", null, true, 0, null); final InputEntityReader reader = format.createReader(INPUT_ROW_SCHEMA, source, null); int numResults = 0; try (CloseableIterator iterator = reader.read()) { @@ -211,8 +234,8 @@ public void testRussianTextMess() throws IOException null, null, false, - 0 - ); + 0, + null); final InputEntityReader reader = format.createReader(INPUT_ROW_SCHEMA, source, null); try (CloseableIterator iterator = reader.read()) { Assert.assertTrue(iterator.hasNext()); @@ -256,6 +279,9 @@ private void assertResult(ByteEntity source, DelimitedInputFormat format) throws StringUtils.format("name_%d", numResults + 1), Iterables.getOnlyElement(row.getDimension("name")) ); + System.out.println("Score" + row.getRaw("score")); + System.out.println("Score instanceof String" + (row.getRaw("score") instanceof String)); + System.out.println("Score instanceof Long" + (row.getRaw("score") instanceof Long)); Assert.assertEquals( Integer.toString((numResults + 1) * 5), Iterables.getOnlyElement(row.getDimension("score")) diff --git a/processing/src/test/java/org/apache/druid/data/input/impl/InputEntityIteratingReaderTest.java b/processing/src/test/java/org/apache/druid/data/input/impl/InputEntityIteratingReaderTest.java index 5f1b5f365fb8..75a17ccae11b 100644 --- a/processing/src/test/java/org/apache/druid/data/input/impl/InputEntityIteratingReaderTest.java +++ b/processing/src/test/java/org/apache/druid/data/input/impl/InputEntityIteratingReaderTest.java @@ -84,8 +84,8 @@ public void test() throws IOException null, null, false, - 0 - ), + 0, + null), CloseableIterators.withEmptyBaggage( files.stream().flatMap(file -> ImmutableList.of(new FileEntity(file)).stream()).iterator() ), @@ -152,8 +152,8 @@ public void testSampleWithSystemFields() throws IOException null, null, false, - 0 - ), + 0, + null), CloseableIterators.withEmptyBaggage( files.stream().flatMap(file -> ImmutableList.of(new FileEntity(file)).stream()).iterator() ), @@ -200,8 +200,8 @@ public void testIncorrectURI() throws IOException, URISyntaxException null, null, false, - 0 - ), + 0, + null), CloseableIterators.withEmptyBaggage( ImmutableList.of( new HttpEntity(new URI("testscheme://some/path"), null, null, null) diff --git a/processing/src/test/java/org/apache/druid/data/input/impl/ParseSpecTest.java b/processing/src/test/java/org/apache/druid/data/input/impl/ParseSpecTest.java index 96f7fd9ee394..07d3d0cb5d52 100644 --- a/processing/src/test/java/org/apache/druid/data/input/impl/ParseSpecTest.java +++ b/processing/src/test/java/org/apache/druid/data/input/impl/ParseSpecTest.java @@ -73,7 +73,8 @@ public void testDuplicateNames() " ", Arrays.asList("a", "b"), false, - 0 + 0, + null ); } @@ -95,7 +96,8 @@ public void testDimAndDimExcluOverlap() null, Arrays.asList("a", "B"), false, - 0 + 0, + null ); } @@ -117,7 +119,8 @@ public void testDimExclusionDuplicate() null, Arrays.asList("a", "B"), false, - 0 + 0, + null ); } @@ -137,7 +140,8 @@ public void testDefaultTimestampSpec() null, Arrays.asList("a", "B"), false, - 0 + 0, + null ); } @@ -158,7 +162,8 @@ public void testDimensionSpecRequired() null, Arrays.asList("a", "B"), false, - 0 + 0, + null ); } diff --git a/processing/src/test/java/org/apache/druid/java/util/common/parsers/FlatTextFormatParserTest.java b/processing/src/test/java/org/apache/druid/java/util/common/parsers/FlatTextFormatParserTest.java index d9a498a026df..412e4b6a97c6 100644 --- a/processing/src/test/java/org/apache/druid/java/util/common/parsers/FlatTextFormatParserTest.java +++ b/processing/src/test/java/org/apache/druid/java/util/common/parsers/FlatTextFormatParserTest.java @@ -112,7 +112,7 @@ public void testWithoutHeader() public void testWithSkipHeaderRows() { final int skipHeaderRows = 2; - final Parser parser = PARSER_FACTORY.get(format, false, skipHeaderRows); + final Parser parser = PARSER_FACTORY.get(format, false, skipHeaderRows, false); parser.startFileFromBeginning(); final String[] body = new String[]{ concat(format, "header", "line", "1"), @@ -134,7 +134,7 @@ public void testWithSkipHeaderRows() @Test public void testWithHeaderRow() { - final Parser parser = PARSER_FACTORY.get(format, true, 0); + final Parser parser = PARSER_FACTORY.get(format, true, 0, false); parser.startFileFromBeginning(); final String[] body = new String[]{ concat(format, "time", "value1", "value2"), @@ -152,7 +152,7 @@ public void testWithHeaderRow() @Test public void testWithHeaderRowOfEmptyColumns() { - final Parser parser = PARSER_FACTORY.get(format, true, 0); + final Parser parser = PARSER_FACTORY.get(format, true, 0, false); parser.startFileFromBeginning(); final String[] body = new String[]{ concat(format, "time", "", "value2", ""), @@ -170,7 +170,7 @@ public void testWithHeaderRowOfEmptyColumns() @Test public void testWithDifferentHeaderRows() { - final Parser parser = PARSER_FACTORY.get(format, true, 0); + final Parser parser = PARSER_FACTORY.get(format, true, 0, false); parser.startFileFromBeginning(); final String[] body = new String[]{ concat(format, "time", "value1", "value2"), @@ -207,7 +207,7 @@ public void testWithoutStartFileFromBeginning() ); final int skipHeaderRows = 2; - final Parser parser = PARSER_FACTORY.get(format, false, skipHeaderRows); + final Parser parser = PARSER_FACTORY.get(format, false, skipHeaderRows, false); final String[] body = new String[]{ concat(format, "header", "line", "1"), concat(format, "header", "line", "2"), @@ -219,7 +219,7 @@ public void testWithoutStartFileFromBeginning() @Test public void testWithNullValues() { - final Parser parser = PARSER_FACTORY.get(format, true, 0); + final Parser parser = PARSER_FACTORY.get(format, true, 0, false); parser.startFileFromBeginning(); final String[] body = new String[]{ concat(format, "time", "value1", "value2"), @@ -234,16 +234,16 @@ private static class FlatTextFormatParserFactory { public Parser get(FlatTextFormat format) { - return get(format, false, 0); + return get(format, false, 0, false); } - public Parser get(FlatTextFormat format, boolean hasHeaderRow, int maxSkipHeaderRows) + public Parser get(FlatTextFormat format, boolean hasHeaderRow, int maxSkipHeaderRows, boolean shouldParseNumbers) { switch (format) { case CSV: - return new CSVParser(null, hasHeaderRow, maxSkipHeaderRows); + return new CSVParser(null, hasHeaderRow, maxSkipHeaderRows, shouldParseNumbers); case DELIMITED: - return new DelimitedParser("\t", null, hasHeaderRow, maxSkipHeaderRows); + return new DelimitedParser("\t", null, hasHeaderRow, maxSkipHeaderRows, shouldParseNumbers); default: throw new IAE("Unknown format[%s]", format); } diff --git a/processing/src/test/java/org/apache/druid/java/util/common/parsers/ParserUtilsTest.java b/processing/src/test/java/org/apache/druid/java/util/common/parsers/ParserUtilsTest.java index 564573302548..5d102423a6ba 100644 --- a/processing/src/test/java/org/apache/druid/java/util/common/parsers/ParserUtilsTest.java +++ b/processing/src/test/java/org/apache/druid/java/util/common/parsers/ParserUtilsTest.java @@ -19,27 +19,80 @@ package org.apache.druid.java.util.common.parsers; +import com.google.common.base.Splitter; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; -import org.junit.Assert; +import org.apache.druid.common.config.NullHandling; import org.junit.Test; import java.util.Collections; import java.util.List; +import static org.apache.druid.java.util.common.parsers.ParserUtils.*; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNull; + public class ParserUtilsTest { + static { + NullHandling.initializeForTests(); + } + @Test public void testFindDuplicatesMixedCases() { final List fields = ImmutableList.of("f1", "f2", "F1", "F2", "f3"); - Assert.assertEquals(Collections.emptySet(), ParserUtils.findDuplicates(fields)); + assertEquals(Collections.emptySet(), findDuplicates(fields)); } @Test public void testFindDuplicates() { final List fields = ImmutableList.of("f1", "f2", "F1", "F2", "f1", "F2"); - Assert.assertEquals(ImmutableSet.of("f1", "F2"), ParserUtils.findDuplicates(fields)); + assertEquals(ImmutableSet.of("f1", "F2"), findDuplicates(fields)); + } + + @Test + public void testGetMultiValueFunction() + { + assertEquals( + ImmutableList.of("foo", "boo"), + getMultiValueFunction("|", Splitter.on("|"), true).apply("foo|boo") + ); + assertEquals( + ImmutableList.of(1L, 2L, 3L), + getMultiValueFunction("|", Splitter.on("|"), true).apply("1|2|3") + ); + assertEquals( + ImmutableList.of(1L, -2L, 3L, 0L, -2L), + getMultiValueFunction("|", Splitter.on("|"), true).apply("1|-2|3|0|-2") + ); + assertEquals( + 1.23, + getMultiValueFunction("|", Splitter.on("|"), true).apply("1.23") + ); + + // Some mixed types + assertEquals( + ImmutableList.of(-1.23, 3.13, 23L), + getMultiValueFunction("|", Splitter.on("|"), true).apply("-1.23|3.13|23") + ); + assertEquals( + ImmutableList.of(-1.23, 3.13, 23L, "foo", -9L), + getMultiValueFunction("|", Splitter.on("|"), true).apply("-1.23|3.13|23|foo|-9") + ); + } + + @Test + public void testTryParseStringAsNumber() + { + assertNull(tryParseStringAsNumber(null)); + assertEquals("", tryParseStringAsNumber("")); + assertEquals("a", tryParseStringAsNumber("a")); + assertEquals("ab", tryParseStringAsNumber("ab")); + assertEquals(12L, tryParseStringAsNumber("12")); + assertEquals(12.234, tryParseStringAsNumber("12.234")); + assertEquals(-1L, tryParseStringAsNumber("-1")); + assertEquals(-12.234, tryParseStringAsNumber("-12.234")); } } diff --git a/processing/src/test/java/org/apache/druid/query/MultiValuedDimensionTest.java b/processing/src/test/java/org/apache/druid/query/MultiValuedDimensionTest.java index a7abce4943c9..77649f8fcbbc 100644 --- a/processing/src/test/java/org/apache/druid/query/MultiValuedDimensionTest.java +++ b/processing/src/test/java/org/apache/druid/query/MultiValuedDimensionTest.java @@ -152,7 +152,8 @@ public void setup() throws Exception "\t", ImmutableList.of("timestamp", "product", "tags", "othertags"), false, - 0 + 0, + null ), "UTF-8" ); diff --git a/processing/src/test/java/org/apache/druid/query/NestedDataTestUtils.java b/processing/src/test/java/org/apache/druid/query/NestedDataTestUtils.java index 23215b95d1e4..0c85c9b169c8 100644 --- a/processing/src/test/java/org/apache/druid/query/NestedDataTestUtils.java +++ b/processing/src/test/java/org/apache/druid/query/NestedDataTestUtils.java @@ -147,8 +147,8 @@ public class NestedDataTestUtils null, false, false, - 0 - ); + 0, + null); public static final TransformSpec SIMPLE_DATA_TSV_TRANSFORM = new TransformSpec( null, diff --git a/processing/src/test/java/org/apache/druid/query/groupby/GroupByQueryRunnerFactoryTest.java b/processing/src/test/java/org/apache/druid/query/groupby/GroupByQueryRunnerFactoryTest.java index bc8563552cd1..ff089e82db63 100644 --- a/processing/src/test/java/org/apache/druid/query/groupby/GroupByQueryRunnerFactoryTest.java +++ b/processing/src/test/java/org/apache/druid/query/groupby/GroupByQueryRunnerFactoryTest.java @@ -153,7 +153,8 @@ private Segment createSegment() throws Exception "\t", ImmutableList.of("timestamp", "product", "tags"), false, - 0 + 0, + null ), "UTF-8" ); diff --git a/processing/src/test/java/org/apache/druid/segment/TestIndex.java b/processing/src/test/java/org/apache/druid/segment/TestIndex.java index d0c93a270d0f..d4a6e644f7be 100644 --- a/processing/src/test/java/org/apache/druid/segment/TestIndex.java +++ b/processing/src/test/java/org/apache/druid/segment/TestIndex.java @@ -473,7 +473,8 @@ public static IncrementalIndex loadIncrementalIndex( "\u0001", Arrays.asList(COLUMNS), false, - 0 + 0, + null ), "utf8" ); diff --git a/server/src/test/java/org/apache/druid/catalog/model/table/CsvInputFormatTest.java b/server/src/test/java/org/apache/druid/catalog/model/table/CsvInputFormatTest.java index b2995f1838be..fb77e6bdb15f 100644 --- a/server/src/test/java/org/apache/druid/catalog/model/table/CsvInputFormatTest.java +++ b/server/src/test/java/org/apache/druid/catalog/model/table/CsvInputFormatTest.java @@ -66,7 +66,7 @@ public void testDefaults() public void testConversion() { CsvInputFormat format = new CsvInputFormat( - Collections.singletonList("a"), ";", false, false, 1); + Collections.singletonList("a"), ";", false, false, 1, null); TableMetadata table = TableBuilder.external("foo") .inputSource(toMap(new InlineInputSource("a\n"))) .inputFormat(formatToMap(format)) diff --git a/server/src/test/java/org/apache/druid/catalog/model/table/DelimitedInputFormatTest.java b/server/src/test/java/org/apache/druid/catalog/model/table/DelimitedInputFormatTest.java index 04494ec7c343..158b5855e9ee 100644 --- a/server/src/test/java/org/apache/druid/catalog/model/table/DelimitedInputFormatTest.java +++ b/server/src/test/java/org/apache/druid/catalog/model/table/DelimitedInputFormatTest.java @@ -74,7 +74,7 @@ public void testDefaults() public void testConversion() { DelimitedInputFormat format = new DelimitedInputFormat( - Collections.singletonList("a"), ";", "|", false, false, 1); + Collections.singletonList("a"), ";", "|", false, false, 1, null); TableMetadata table = TableBuilder.external("foo") .inputSource(toMap(new InlineInputSource("a\n"))) .inputFormat(formatToMap(format)) diff --git a/server/src/test/java/org/apache/druid/catalog/model/table/ExternalTableTest.java b/server/src/test/java/org/apache/druid/catalog/model/table/ExternalTableTest.java index 1992f98e2ffe..acbea3547031 100644 --- a/server/src/test/java/org/apache/druid/catalog/model/table/ExternalTableTest.java +++ b/server/src/test/java/org/apache/druid/catalog/model/table/ExternalTableTest.java @@ -122,7 +122,7 @@ public void testValidateSourceAndFormat() { // Format is given without columns: it is validated CsvInputFormat format = new CsvInputFormat( - Collections.singletonList("a"), ";", false, false, 0); + Collections.singletonList("a"), ";", false, false, 0, null); TableMetadata table = TableBuilder.external("foo") .inputSource(toMap(new InlineInputSource("a\n"))) .inputFormat(formatToMap(format)) diff --git a/server/src/test/java/org/apache/druid/catalog/model/table/InlineInputSourceDefnTest.java b/server/src/test/java/org/apache/druid/catalog/model/table/InlineInputSourceDefnTest.java index bb3b2354bac9..c80b843ce550 100644 --- a/server/src/test/java/org/apache/druid/catalog/model/table/InlineInputSourceDefnTest.java +++ b/server/src/test/java/org/apache/druid/catalog/model/table/InlineInputSourceDefnTest.java @@ -194,7 +194,7 @@ public void testDefinedTable() { // Define an inline table CsvInputFormat format = new CsvInputFormat( - Collections.singletonList("a"), ";", false, false, 0); + Collections.singletonList("a"), ";", false, false, 0, null); TableMetadata table = TableBuilder.external("foo") .inputSource(toMap(new InlineInputSource("a,b\nc,d"))) .inputFormat(formatToMap(format)) diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteCatalogIngestionDmlTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteCatalogIngestionDmlTest.java index 813722e6990e..60daa769b61b 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteCatalogIngestionDmlTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteCatalogIngestionDmlTest.java @@ -479,7 +479,7 @@ public void testInsertAddNonDefinedColumnIntoNonSealedCatalogTable() { ExternalDataSource externalDataSource = new ExternalDataSource( new InlineInputSource("2022-12-26T12:34:56,extra,10,\"20\",foo\n"), - new CsvInputFormat(ImmutableList.of("a", "b", "c", "d", "e"), null, false, false, 0), + new CsvInputFormat(ImmutableList.of("a", "b", "c", "d", "e"), null, false, false, 0, null), RowSignature.builder() .add("a", ColumnType.STRING) .add("b", ColumnType.STRING) @@ -541,7 +541,7 @@ public void testInsertTableWithClusteringWithClusteringFromCatalog() { ExternalDataSource externalDataSource = new ExternalDataSource( new InlineInputSource("2022-12-26T12:34:56,extra,10,\"20\",foo\n"), - new CsvInputFormat(ImmutableList.of("a", "b", "c", "d", "e"), null, false, false, 0), + new CsvInputFormat(ImmutableList.of("a", "b", "c", "d", "e"), null, false, false, 0, null), RowSignature.builder() .add("a", ColumnType.STRING) .add("b", ColumnType.STRING) @@ -603,7 +603,7 @@ public void testInsertTableWithClusteringWithClusteringFromQuery() { ExternalDataSource externalDataSource = new ExternalDataSource( new InlineInputSource("2022-12-26T12:34:56,extra,10,\"20\",foo\n"), - new CsvInputFormat(ImmutableList.of("a", "b", "c", "d", "e"), null, false, false, 0), + new CsvInputFormat(ImmutableList.of("a", "b", "c", "d", "e"), null, false, false, 0, null), RowSignature.builder() .add("a", ColumnType.STRING) .add("b", ColumnType.STRING) @@ -665,7 +665,7 @@ public void testInsertTableWithClusteringWithClusteringOnNewColumnFromQuery() { ExternalDataSource externalDataSource = new ExternalDataSource( new InlineInputSource("2022-12-26T12:34:56,extra,10,\"20\",foo\n"), - new CsvInputFormat(ImmutableList.of("a", "b", "c", "d", "e"), null, false, false, 0), + new CsvInputFormat(ImmutableList.of("a", "b", "c", "d", "e"), null, false, false, 0, null), RowSignature.builder() .add("a", ColumnType.STRING) .add("b", ColumnType.STRING) @@ -834,7 +834,7 @@ public void testGroupByInsertAddNonDefinedColumnIntoNonSealedCatalogTable() { ExternalDataSource externalDataSource = new ExternalDataSource( new InlineInputSource("2022-12-26T12:34:56,extra,10,\"20\",foo\n"), - new CsvInputFormat(ImmutableList.of("a", "b", "c", "d", "e"), null, false, false, 0), + new CsvInputFormat(ImmutableList.of("a", "b", "c", "d", "e"), null, false, false, 0, null), RowSignature.builder() .add("a", ColumnType.STRING) .add("b", ColumnType.STRING) @@ -953,7 +953,7 @@ public void testInsertAddNonDefinedColumnIntoSealedCatalogTableAndValidationDisa { ExternalDataSource externalDataSource = new ExternalDataSource( new InlineInputSource("2022-12-26T12:34:56,extra,10,\"20\",foo\n"), - new CsvInputFormat(ImmutableList.of("a", "b", "c", "d", "e"), null, false, false, 0), + new CsvInputFormat(ImmutableList.of("a", "b", "c", "d", "e"), null, false, false, 0, null), RowSignature.builder() .add("a", ColumnType.STRING) .add("b", ColumnType.STRING) @@ -1012,7 +1012,7 @@ public void testInsertWithSourceIntoCatalogTable() { ExternalDataSource externalDataSource = new ExternalDataSource( new InlineInputSource("2022-12-26T12:34:56,extra,10,\"20\",foo\n"), - new CsvInputFormat(ImmutableList.of("a", "b", "c", "d", "e"), null, false, false, 0), + new CsvInputFormat(ImmutableList.of("a", "b", "c", "d", "e"), null, false, false, 0, null), RowSignature.builder() .add("a", ColumnType.STRING) .add("b", ColumnType.STRING) @@ -1077,7 +1077,7 @@ public void testGroupByInsertWithSourceIntoCatalogTable() { ExternalDataSource externalDataSource = new ExternalDataSource( new InlineInputSource("2022-12-26T12:34:56,extra,10,\"20\",foo\n"), - new CsvInputFormat(ImmutableList.of("a", "b", "c", "d", "e"), null, false, false, 0), + new CsvInputFormat(ImmutableList.of("a", "b", "c", "d", "e"), null, false, false, 0, null), RowSignature.builder() .add("a", ColumnType.STRING) .add("b", ColumnType.STRING) diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteIngestionDmlTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteIngestionDmlTest.java index e26803fa498a..8edbce199965 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteIngestionDmlTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteIngestionDmlTest.java @@ -116,7 +116,7 @@ public class CalciteIngestionDmlTest extends BaseCalciteQueryTest protected final ExternalDataSource externalDataSource = new ExternalDataSource( new InlineInputSource("a,b,1\nc,d,2\n"), - new CsvInputFormat(ImmutableList.of("x", "y", "z"), null, false, false, 0), + new CsvInputFormat(ImmutableList.of("x", "y", "z"), null, false, false, 0, null), RowSignature.builder() .add("x", ColumnType.STRING) .add("y", ColumnType.STRING) diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteInsertDmlTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteInsertDmlTest.java index fbcb0735c86b..61d955c3be7f 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteInsertDmlTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteInsertDmlTest.java @@ -369,7 +369,7 @@ public void testInsertFromExternalWithSchema() ), Calcites.escapeStringLiteral( queryJsonMapper.writeValueAsString( - new CsvInputFormat(ImmutableList.of("x", "y", "z"), null, false, false, 0) + new CsvInputFormat(ImmutableList.of("x", "y", "z"), null, false, false, 0, null) ) ) ); @@ -414,7 +414,7 @@ public void testInsertFromExternalWithSchemaWithInputsourceSecurity() ), Calcites.escapeStringLiteral( queryJsonMapper.writeValueAsString( - new CsvInputFormat(ImmutableList.of("x", "y", "z"), null, false, false, 0) + new CsvInputFormat(ImmutableList.of("x", "y", "z"), null, false, false, 0, null) ) ) ); @@ -458,7 +458,7 @@ public void testInsertFromExternalFunctionalStyleWithSchemaWithInputsourceSecuri new InlineInputSource("a,b,1\nc,d,2\n") ), queryJsonMapper.writeValueAsString( - new CsvInputFormat(ImmutableList.of("x", "y", "z"), null, false, false, 0) + new CsvInputFormat(ImmutableList.of("x", "y", "z"), null, false, false, 0, null) ) ); } @@ -494,7 +494,7 @@ public void testInsertFromExternalWithoutSecuritySupport() new TestFileInputSource(ImmutableList.of(new File("/tmp/foo.csv").getAbsoluteFile())); final ExternalDataSource externalDataSource = new ExternalDataSource( inputSource, - new CsvInputFormat(ImmutableList.of("x", "y", "z"), null, false, false, 0), + new CsvInputFormat(ImmutableList.of("x", "y", "z"), null, false, false, 0, null), RowSignature.builder() .add("x", ColumnType.STRING) .add("y", ColumnType.STRING) @@ -510,7 +510,7 @@ public void testInsertFromExternalWithoutSecuritySupport() + "inputFormat => '%s'))", queryJsonMapper.writeValueAsString(inputSource), queryJsonMapper.writeValueAsString( - new CsvInputFormat(ImmutableList.of("x", "y", "z"), null, false, false, 0) + new CsvInputFormat(ImmutableList.of("x", "y", "z"), null, false, false, 0, null) ) ); } @@ -552,7 +552,7 @@ public void testInsertFromExternalWithoutSecuritySupportWithInputsourceSecurityE queryJsonMapper.writeValueAsString( new TestFileInputSource(ImmutableList.of(new File("/tmp/foo.csv").getAbsoluteFile()))), queryJsonMapper.writeValueAsString( - new CsvInputFormat(ImmutableList.of("x", "y", "z"), null, false, false, 0) + new CsvInputFormat(ImmutableList.of("x", "y", "z"), null, false, false, 0, null) ) ); } @@ -1488,7 +1488,7 @@ public void testInsertOnExternalDataSourceWithIncompatibleTimeColumnSignature() { ExternalDataSource restrictedSignature = new ExternalDataSource( new InlineInputSource("100\nc200\n"), - new CsvInputFormat(ImmutableList.of("__time"), null, false, false, 0), + new CsvInputFormat(ImmutableList.of("__time"), null, false, false, 0, null), RowSignature.builder() .add("__time", ColumnType.STRING) .build() diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/IngestTableFunctionTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/IngestTableFunctionTest.java index 7401477e6d79..3e187c3e305f 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/IngestTableFunctionTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/IngestTableFunctionTest.java @@ -90,7 +90,7 @@ protected static URI toURI(String uri) null, new HttpInputSourceConfig(null, null) ), - new CsvInputFormat(ImmutableList.of("x", "y", "z"), null, false, false, 0), + new CsvInputFormat(ImmutableList.of("x", "y", "z"), null, false, false, 0, null), RowSignature.builder() .add("x", ColumnType.STRING) .add("y", ColumnType.STRING) @@ -264,7 +264,7 @@ public void testHttpFn2() ImmutableMap.of("Accept", "application/ndjson", "a", "b"), new HttpInputSourceConfig(null, null) ), - new CsvInputFormat(ImmutableList.of("timestamp", "isRobot"), null, false, false, 0), + new CsvInputFormat(ImmutableList.of("timestamp", "isRobot"), null, false, false, 0, null), RowSignature.builder() .add("timestamp", ColumnType.STRING) .add("isRobot", ColumnType.STRING) @@ -556,7 +556,7 @@ public void testInlineFn() Arrays.asList(new File("/tmp/foo.csv"), new File("/tmp/bar.csv")), SystemFields.none() ), - new CsvInputFormat(ImmutableList.of("x", "y", "z"), null, false, false, 0), + new CsvInputFormat(ImmutableList.of("x", "y", "z"), null, false, false, 0, null), RowSignature.builder() .add("x", ColumnType.STRING) .add("y", ColumnType.STRING) diff --git a/web-console/src/views/workbench-view/input-source-step/input-source-step.tsx b/web-console/src/views/workbench-view/input-source-step/input-source-step.tsx index 6d483cf44100..881b1e2d8b6b 100644 --- a/web-console/src/views/workbench-view/input-source-step/input-source-step.tsx +++ b/web-console/src/views/workbench-view/input-source-step/input-source-step.tsx @@ -127,6 +127,13 @@ export const InputSourceStep = React.memo(function InputSourceStep(props: InputS const sampleResponse = await postToSampler(sampleSpec, 'input-source-step'); sampleLines = filterMap(sampleResponse.data, l => (l.input ? l.input.raw : undefined)); + // console.log('sample response: ', sampleResponse); + // console.log('sample response.data: ', sampleResponse.data); + // sampleLines = filterMap(sampleResponse.data, l => (l.input ? l.input.raw : undefined)); + // sampleLines = filterMap(sampleResponse.data, l => + // l.input ? JSON.stringify(l.input) : undefined, + // ); + // console.log('sample lines: ', sampleLines); } else { const tableExpression = externalConfigToTableExpression({ inputSource, @@ -225,6 +232,7 @@ export const InputSourceStep = React.memo(function InputSourceStep(props: InputS
{renderIngestionCard('s3')} {renderIngestionCard('azureStorage')} + {renderIngestionCard('delta')} {renderIngestionCard('google')} {renderIngestionCard('hdfs')} {renderIngestionCard('http')} From df2706cd82f40369cf51305fa7c8cc4b1fb48942 Mon Sep 17 00:00:00 2001 From: Abhishek Balaji Radhakrishnan Date: Mon, 16 Sep 2024 10:50:11 -0400 Subject: [PATCH 02/24] Remove parameter from deprecated parse specs. --- .../segment/MapVirtualColumnTestBase.java | 3 +-- .../indexer/BatchDeltaIngestionTest.java | 3 +-- .../DetermineHashedPartitionsJobTest.java | 3 +-- .../indexer/DeterminePartitionsJobTest.java | 3 +-- .../DetermineRangePartitionsJobTest.java | 3 +-- .../druid/indexer/IndexGeneratorJobTest.java | 12 ++++------ .../apache/druid/indexer/JobHelperTest.java | 3 +-- ...tasourceRecordReaderSegmentReaderTest.java | 3 +-- .../indexer/path/DatasourcePathSpecTest.java | 3 +-- .../common/task/CompactionTaskRunTest.java | 12 ++++------ .../indexing/common/task/IndexTaskTest.java | 21 ++++++++--------- ...stractParallelIndexSupervisorTaskTest.java | 3 +-- ...rtitionMultiPhaseParallelIndexingTest.java | 3 +-- ...ultiPhaseParallelIndexingRowStatsTest.java | 3 +-- ...rtitionMultiPhaseParallelIndexingTest.java | 3 +-- .../sampler/InputSourceSamplerTest.java | 3 +-- .../druid/data/input/impl/CSVParseSpec.java | 20 ++++------------ .../data/input/impl/DelimitedParseSpec.java | 23 ++++--------------- .../data/input/impl/CSVParseSpecTest.java | 3 +-- .../input/impl/DelimitedParseSpecTest.java | 9 +++----- .../druid/data/input/impl/ParseSpecTest.java | 15 ++++-------- .../druid/query/MultiValuedDimensionTest.java | 3 +-- .../GroupByQueryRunnerFactoryTest.java | 3 +-- .../org/apache/druid/segment/TestIndex.java | 3 +-- 24 files changed, 52 insertions(+), 111 deletions(-) diff --git a/extensions-contrib/virtual-columns/src/test/java/org/apache/druid/segment/MapVirtualColumnTestBase.java b/extensions-contrib/virtual-columns/src/test/java/org/apache/druid/segment/MapVirtualColumnTestBase.java index b7402984ca36..29270c0df734 100644 --- a/extensions-contrib/virtual-columns/src/test/java/org/apache/druid/segment/MapVirtualColumnTestBase.java +++ b/extensions-contrib/virtual-columns/src/test/java/org/apache/druid/segment/MapVirtualColumnTestBase.java @@ -53,8 +53,7 @@ static IncrementalIndex generateIndex() throws IOException ",", Arrays.asList("ts", "dim", "keys", "values"), false, - 0, - null + 0 ), "utf8" ); diff --git a/indexing-hadoop/src/test/java/org/apache/druid/indexer/BatchDeltaIngestionTest.java b/indexing-hadoop/src/test/java/org/apache/druid/indexer/BatchDeltaIngestionTest.java index 99e0725de3f8..1e4f62ca6e1f 100644 --- a/indexing-hadoop/src/test/java/org/apache/druid/indexer/BatchDeltaIngestionTest.java +++ b/indexing-hadoop/src/test/java/org/apache/druid/indexer/BatchDeltaIngestionTest.java @@ -443,8 +443,7 @@ private HadoopDruidIndexerConfig makeHadoopDruidIndexerConfig( null, ImmutableList.of("timestamp", "host", "host2", "visited_num"), false, - 0, - null + 0 ), null ), diff --git a/indexing-hadoop/src/test/java/org/apache/druid/indexer/DetermineHashedPartitionsJobTest.java b/indexing-hadoop/src/test/java/org/apache/druid/indexer/DetermineHashedPartitionsJobTest.java index a738da7edff8..24a8ee0ef7eb 100644 --- a/indexing-hadoop/src/test/java/org/apache/druid/indexer/DetermineHashedPartitionsJobTest.java +++ b/indexing-hadoop/src/test/java/org/apache/druid/indexer/DetermineHashedPartitionsJobTest.java @@ -183,8 +183,7 @@ public DetermineHashedPartitionsJobTest( "index" ), false, - 0, - null + 0 ), null ), diff --git a/indexing-hadoop/src/test/java/org/apache/druid/indexer/DeterminePartitionsJobTest.java b/indexing-hadoop/src/test/java/org/apache/druid/indexer/DeterminePartitionsJobTest.java index c0126f93d4b5..a3c98f29565b 100644 --- a/indexing-hadoop/src/test/java/org/apache/druid/indexer/DeterminePartitionsJobTest.java +++ b/indexing-hadoop/src/test/java/org/apache/druid/indexer/DeterminePartitionsJobTest.java @@ -292,8 +292,7 @@ public DeterminePartitionsJobTest( null, ImmutableList.of("timestamp", "host", "country", "visited_num"), false, - 0, - false + 0 ), null ), diff --git a/indexing-hadoop/src/test/java/org/apache/druid/indexer/DetermineRangePartitionsJobTest.java b/indexing-hadoop/src/test/java/org/apache/druid/indexer/DetermineRangePartitionsJobTest.java index b7e87d38b370..e79d066ab55c 100644 --- a/indexing-hadoop/src/test/java/org/apache/druid/indexer/DetermineRangePartitionsJobTest.java +++ b/indexing-hadoop/src/test/java/org/apache/druid/indexer/DetermineRangePartitionsJobTest.java @@ -349,8 +349,7 @@ public DetermineRangePartitionsJobTest( null, ImmutableList.of("timestamp", "host", "country", "visited_num"), false, - 0, - null + 0 ), null ), diff --git a/indexing-hadoop/src/test/java/org/apache/druid/indexer/IndexGeneratorJobTest.java b/indexing-hadoop/src/test/java/org/apache/druid/indexer/IndexGeneratorJobTest.java index 409b101f326e..e14ade454f4c 100644 --- a/indexing-hadoop/src/test/java/org/apache/druid/indexer/IndexGeneratorJobTest.java +++ b/indexing-hadoop/src/test/java/org/apache/druid/indexer/IndexGeneratorJobTest.java @@ -149,8 +149,7 @@ public static Collection constructFeed() null, ImmutableList.of("timestamp", "host", "visited_num"), false, - 0, - null + 0 ), null ), @@ -199,8 +198,7 @@ public static Collection constructFeed() null, ImmutableList.of("timestamp", "host", "visited_num"), false, - 0, - null + 0 ) ), null, @@ -248,8 +246,7 @@ public static Collection constructFeed() null, ImmutableList.of("timestamp", "host", "visited_num"), false, - 0, - null + 0 ), null ), @@ -308,8 +305,7 @@ public static Collection constructFeed() null, ImmutableList.of("timestamp", "host", "visited_num"), false, - 0, - null + 0 ) ), null, diff --git a/indexing-hadoop/src/test/java/org/apache/druid/indexer/JobHelperTest.java b/indexing-hadoop/src/test/java/org/apache/druid/indexer/JobHelperTest.java index 10beee54fe91..7069e9a78de3 100644 --- a/indexing-hadoop/src/test/java/org/apache/druid/indexer/JobHelperTest.java +++ b/indexing-hadoop/src/test/java/org/apache/druid/indexer/JobHelperTest.java @@ -133,8 +133,7 @@ public void setup() throws Exception null, ImmutableList.of("timestamp", "host", "visited_num"), false, - 0, - null + 0 ), null ), diff --git a/indexing-hadoop/src/test/java/org/apache/druid/indexer/hadoop/DatasourceRecordReaderSegmentReaderTest.java b/indexing-hadoop/src/test/java/org/apache/druid/indexer/hadoop/DatasourceRecordReaderSegmentReaderTest.java index 8db30a7f3997..e7f1402606ff 100644 --- a/indexing-hadoop/src/test/java/org/apache/druid/indexer/hadoop/DatasourceRecordReaderSegmentReaderTest.java +++ b/indexing-hadoop/src/test/java/org/apache/druid/indexer/hadoop/DatasourceRecordReaderSegmentReaderTest.java @@ -216,8 +216,7 @@ private void createTestIndex(File segmentDir) throws Exception null, ImmutableList.of("timestamp", "host", "visited", "x", "y", "spatial"), false, - 0, - null + 0 ), StandardCharsets.UTF_8.toString() ); diff --git a/indexing-hadoop/src/test/java/org/apache/druid/indexer/path/DatasourcePathSpecTest.java b/indexing-hadoop/src/test/java/org/apache/druid/indexer/path/DatasourcePathSpecTest.java index 2caaa2f22b30..e8caea0256e0 100644 --- a/indexing-hadoop/src/test/java/org/apache/druid/indexer/path/DatasourcePathSpecTest.java +++ b/indexing-hadoop/src/test/java/org/apache/druid/indexer/path/DatasourcePathSpecTest.java @@ -318,8 +318,7 @@ private HadoopDruidIndexerConfig makeHadoopDruidIndexerConfig() null, ImmutableList.of("timestamp", "host", "visited"), false, - 0, - null + 0 ), null ), diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskRunTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskRunTest.java index 1c8065939cd4..cdc7390eb2c3 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskRunTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskRunTest.java @@ -155,8 +155,7 @@ public class CompactionTaskRunTest extends IngestionTestBase "|", Arrays.asList("ts", "dim", "val"), false, - 0, - null + 0 ); private static final List TEST_ROWS = ImmutableList.of( @@ -1639,8 +1638,7 @@ public void testRunWithSpatialDimensions() throws Exception "|", Arrays.asList("ts", "dim", "x", "y", "val"), false, - 0, - null + 0 ); Pair indexTaskResult = runIndexTask(null, null, spatialSpec, spatialrows, false); verifySchema(indexTaskResult.rhs); @@ -1765,8 +1763,7 @@ public void testRunWithAutoCastDimensions() throws Exception "|", Arrays.asList("ts", "dim", "x", "y", "val"), false, - 0, - null + 0 ); Pair indexTaskResult = runIndexTask(null, null, spec, rows, false); verifySchema(indexTaskResult.rhs); @@ -1899,8 +1896,7 @@ public void testRunWithAutoCastDimensionsSortByDimension() throws Exception "|", Arrays.asList("ts", "dim", "x", "y", "val"), false, - 0, - null + 0 ); Pair indexTaskResult = runIndexTask(null, null, spec, rows, false); verifySchema(indexTaskResult.rhs); diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IndexTaskTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IndexTaskTest.java index b1826ef6eae3..69806f12a250 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IndexTaskTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IndexTaskTest.java @@ -156,8 +156,7 @@ public class IndexTaskTest extends IngestionTestBase null, Arrays.asList("ts", "dim", "val"), false, - 0, - null + 0 ); private static final InputFormat DEFAULT_INPUT_FORMAT = new CsvInputFormat( Arrays.asList("ts", "dim", "val"), @@ -517,7 +516,7 @@ public void testTransformSpec() throws Exception indexIngestionSpec = createIngestionSpec( jsonMapper, tmpDir, - new CSVParseSpec(DEFAULT_TIMESTAMP_SPEC, dimensionsSpec, listDelimiter, columns, false, 0, null), + new CSVParseSpec(DEFAULT_TIMESTAMP_SPEC, dimensionsSpec, listDelimiter, columns, false, 0), transformSpec, null, tuningConfig, @@ -923,7 +922,7 @@ public void testCSVFileWithHeader() throws Exception ingestionSpec = createIngestionSpec( jsonMapper, tmpDir, - new CSVParseSpec(timestampSpec, DimensionsSpec.EMPTY, null, null, true, 0, null), + new CSVParseSpec(timestampSpec, DimensionsSpec.EMPTY, null, null, true, 0), null, null, tuningConfig, @@ -985,7 +984,7 @@ public void testCSVFileWithHeaderColumnOverride() throws Exception ingestionSpec = createIngestionSpec( jsonMapper, tmpDir, - new CSVParseSpec(timestampSpec, DimensionsSpec.EMPTY, null, columns, true, 0, null), + new CSVParseSpec(timestampSpec, DimensionsSpec.EMPTY, null, columns, true, 0), null, null, tuningConfig, @@ -1385,7 +1384,7 @@ public void testIgnoreParseException() throws Exception parseExceptionIgnoreSpec = createIngestionSpec( jsonMapper, tmpDir, - new CSVParseSpec(timestampSpec, DimensionsSpec.EMPTY, null, columns, true, 0, null), + new CSVParseSpec(timestampSpec, DimensionsSpec.EMPTY, null, columns, true, 0), null, null, tuningConfig, @@ -1435,7 +1434,7 @@ public void testReportParseException() throws Exception indexIngestionSpec = createIngestionSpec( jsonMapper, tmpDir, - new CSVParseSpec(timestampSpec, DimensionsSpec.EMPTY, null, columns, true, 0, null), + new CSVParseSpec(timestampSpec, DimensionsSpec.EMPTY, null, columns, true, 0), null, null, tuningConfig, @@ -1676,7 +1675,7 @@ public void testMultipleParseExceptionsFailure() throws Exception ingestionSpec = createIngestionSpec( jsonMapper, tmpDir, - new CSVParseSpec(timestampSpec, dimensionsSpec, null, columns, true, 0, null), + new CSVParseSpec(timestampSpec, dimensionsSpec, null, columns, true, 0), null, null, tuningConfig, @@ -1795,7 +1794,7 @@ public void testMultipleParseExceptionsFailureAtDeterminePartitions() throws Exc ingestionSpec = createIngestionSpec( jsonMapper, tmpDir, - new CSVParseSpec(timestampSpec, dimensionsSpec, null, columns, true, 0, null), + new CSVParseSpec(timestampSpec, dimensionsSpec, null, columns, true, 0), null, null, tuningConfig, @@ -1889,7 +1888,7 @@ public void testCsvWithHeaderOfEmptyColumns() throws Exception ingestionSpec = createIngestionSpec( jsonMapper, tmpDir, - new CSVParseSpec(DEFAULT_TIMESTAMP_SPEC, DimensionsSpec.EMPTY, null, null, true, 0, null), + new CSVParseSpec(DEFAULT_TIMESTAMP_SPEC, DimensionsSpec.EMPTY, null, null, true, 0), null, null, tuningConfig, @@ -1959,7 +1958,7 @@ public void testCsvWithHeaderOfEmptyTimestamp() throws Exception ingestionSpec = createIngestionSpec( jsonMapper, tmpDir, - new CSVParseSpec(DEFAULT_TIMESTAMP_SPEC, DimensionsSpec.EMPTY, null, columns, true, 0, null), + new CSVParseSpec(DEFAULT_TIMESTAMP_SPEC, DimensionsSpec.EMPTY, null, columns, true, 0), null, null, tuningConfig, diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/AbstractParallelIndexSupervisorTaskTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/AbstractParallelIndexSupervisorTaskTest.java index 9d4ad944f44b..b39e00ba79d4 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/AbstractParallelIndexSupervisorTaskTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/AbstractParallelIndexSupervisorTaskTest.java @@ -156,8 +156,7 @@ public class AbstractParallelIndexSupervisorTaskTest extends IngestionTestBase null, Arrays.asList("ts", "dim", "val"), false, - 0, - null + 0 ); static final InputFormat DEFAULT_INPUT_FORMAT = new CsvInputFormat( Arrays.asList("ts", "dim", "val"), diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/HashPartitionMultiPhaseParallelIndexingTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/HashPartitionMultiPhaseParallelIndexingTest.java index d269d07acbdd..984cdeb33efa 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/HashPartitionMultiPhaseParallelIndexingTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/HashPartitionMultiPhaseParallelIndexingTest.java @@ -77,8 +77,7 @@ public class HashPartitionMultiPhaseParallelIndexingTest extends AbstractMultiPh null, Arrays.asList("ts", "dim1", "dim2", "val"), false, - 0, - null + 0 ); private static final InputFormat INPUT_FORMAT = new CsvInputFormat( Arrays.asList("ts", "dim1", "dim2", "val"), diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/MultiPhaseParallelIndexingRowStatsTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/MultiPhaseParallelIndexingRowStatsTest.java index 26f1788cc59c..9e910a668157 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/MultiPhaseParallelIndexingRowStatsTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/MultiPhaseParallelIndexingRowStatsTest.java @@ -61,8 +61,7 @@ public class MultiPhaseParallelIndexingRowStatsTest extends AbstractMultiPhasePa null, Arrays.asList("ts", "dim1", "dim2", "val"), false, - 0, - null + 0 ); private static final Interval INTERVAL_TO_INDEX = Intervals.of("2017-12/P1M"); diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/RangePartitionMultiPhaseParallelIndexingTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/RangePartitionMultiPhaseParallelIndexingTest.java index 221b36416465..a848c66c807c 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/RangePartitionMultiPhaseParallelIndexingTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/RangePartitionMultiPhaseParallelIndexingTest.java @@ -103,8 +103,7 @@ public class RangePartitionMultiPhaseParallelIndexingTest extends AbstractMultiP LIST_DELIMITER, Arrays.asList(TIME, DIM1, DIM2, "val"), false, - 0, - null + 0 ); private static final InputFormat INPUT_FORMAT = new CsvInputFormat( Arrays.asList(TIME, DIM1, DIM2, "val"), diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/sampler/InputSourceSamplerTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/sampler/InputSourceSamplerTest.java index b743b79291fe..d828338142b6 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/sampler/InputSourceSamplerTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/sampler/InputSourceSamplerTest.java @@ -1480,8 +1480,7 @@ private InputRowParser createInputRowParser(TimestampSpec timestampSpec, Dimensi null, ImmutableList.of("t", "dim1", "dim2", "met1"), false, - 0, - null + 0 ) ); default: diff --git a/processing/src/main/java/org/apache/druid/data/input/impl/CSVParseSpec.java b/processing/src/main/java/org/apache/druid/data/input/impl/CSVParseSpec.java index e324f8dc2987..2a5adfe0a4b1 100644 --- a/processing/src/main/java/org/apache/druid/data/input/impl/CSVParseSpec.java +++ b/processing/src/main/java/org/apache/druid/data/input/impl/CSVParseSpec.java @@ -27,8 +27,6 @@ import java.util.List; -import javax.annotation.Nullable; - /** */ public class CSVParseSpec extends ParseSpec @@ -37,7 +35,6 @@ public class CSVParseSpec extends ParseSpec private final List columns; private final boolean hasHeaderRow; private final int skipHeaderRows; - private final boolean shouldParseNumbers; @JsonCreator public CSVParseSpec( @@ -46,8 +43,7 @@ public CSVParseSpec( @JsonProperty("listDelimiter") String listDelimiter, @JsonProperty("columns") List columns, @JsonProperty("hasHeaderRow") boolean hasHeaderRow, - @JsonProperty("skipHeaderRows") int skipHeaderRows, - @JsonProperty("shouldParseNumbers") @Nullable Boolean shouldParseNumbers + @JsonProperty("skipHeaderRows") int skipHeaderRows ) { super(timestampSpec, dimensionsSpec); @@ -56,7 +52,6 @@ public CSVParseSpec( this.columns = columns; this.hasHeaderRow = hasHeaderRow; this.skipHeaderRows = skipHeaderRows; - this.shouldParseNumbers = shouldParseNumbers == null ? false : shouldParseNumbers; if (columns != null) { for (String column : columns) { @@ -95,28 +90,21 @@ public int getSkipHeaderRows() return skipHeaderRows; } - @JsonProperty("shouldParseNumbers") - public boolean shouldParseNumbers() - { - return shouldParseNumbers; - } - - @Override public Parser makeParser() { - return new CSVParser(listDelimiter, columns, hasHeaderRow, skipHeaderRows, shouldParseNumbers); + return new CSVParser(listDelimiter, columns, hasHeaderRow, skipHeaderRows, false); } @Override public ParseSpec withTimestampSpec(TimestampSpec spec) { - return new CSVParseSpec(spec, getDimensionsSpec(), listDelimiter, columns, hasHeaderRow, skipHeaderRows, shouldParseNumbers); + return new CSVParseSpec(spec, getDimensionsSpec(), listDelimiter, columns, hasHeaderRow, skipHeaderRows); } @Override public ParseSpec withDimensionsSpec(DimensionsSpec spec) { - return new CSVParseSpec(getTimestampSpec(), spec, listDelimiter, columns, hasHeaderRow, skipHeaderRows, shouldParseNumbers); + return new CSVParseSpec(getTimestampSpec(), spec, listDelimiter, columns, hasHeaderRow, skipHeaderRows); } } diff --git a/processing/src/main/java/org/apache/druid/data/input/impl/DelimitedParseSpec.java b/processing/src/main/java/org/apache/druid/data/input/impl/DelimitedParseSpec.java index 52d9ddba6516..3327989e9f5e 100644 --- a/processing/src/main/java/org/apache/druid/data/input/impl/DelimitedParseSpec.java +++ b/processing/src/main/java/org/apache/druid/data/input/impl/DelimitedParseSpec.java @@ -27,8 +27,6 @@ import java.util.List; -import javax.annotation.Nullable; - /** */ public class DelimitedParseSpec extends ParseSpec @@ -38,7 +36,6 @@ public class DelimitedParseSpec extends ParseSpec private final List columns; private final boolean hasHeaderRow; private final int skipHeaderRows; - private final boolean shouldParseNumbers; @JsonCreator public DelimitedParseSpec( @@ -48,9 +45,8 @@ public DelimitedParseSpec( @JsonProperty("listDelimiter") String listDelimiter, @JsonProperty("columns") List columns, @JsonProperty("hasHeaderRow") boolean hasHeaderRow, - @JsonProperty("skipHeaderRows") int skipHeaderRows, - @JsonProperty("skipHeaderRows") @Nullable Boolean shouldParseNumbers - ) + @JsonProperty("skipHeaderRows") int skipHeaderRows + ) { super(timestampSpec, dimensionsSpec); @@ -59,7 +55,6 @@ public DelimitedParseSpec( this.columns = columns; this.hasHeaderRow = hasHeaderRow; this.skipHeaderRows = skipHeaderRows; - this.shouldParseNumbers = shouldParseNumbers == null ? false : shouldParseNumbers; if (columns != null) { for (String column : this.columns) { @@ -104,12 +99,6 @@ public int getSkipHeaderRows() return skipHeaderRows; } - @JsonProperty("shouldParseNumbers") - public boolean shouldParseNumbers() - { - return shouldParseNumbers; - } - @Override public Parser makeParser() { @@ -119,7 +108,7 @@ public Parser makeParser() columns, hasHeaderRow, skipHeaderRows, - shouldParseNumbers + false ); } @@ -133,8 +122,7 @@ public ParseSpec withTimestampSpec(TimestampSpec spec) listDelimiter, columns, hasHeaderRow, - skipHeaderRows, - shouldParseNumbers + skipHeaderRows ); } @@ -148,8 +136,7 @@ public ParseSpec withDimensionsSpec(DimensionsSpec spec) listDelimiter, columns, hasHeaderRow, - skipHeaderRows, - shouldParseNumbers + skipHeaderRows ); } diff --git a/processing/src/test/java/org/apache/druid/data/input/impl/CSVParseSpecTest.java b/processing/src/test/java/org/apache/druid/data/input/impl/CSVParseSpecTest.java index 6cd350250c08..c8299085c0ea 100644 --- a/processing/src/test/java/org/apache/druid/data/input/impl/CSVParseSpecTest.java +++ b/processing/src/test/java/org/apache/druid/data/input/impl/CSVParseSpecTest.java @@ -40,8 +40,7 @@ public void testComma() ",", Collections.singletonList("a,"), false, - 0, - null + 0 ); } } diff --git a/processing/src/test/java/org/apache/druid/data/input/impl/DelimitedParseSpecTest.java b/processing/src/test/java/org/apache/druid/data/input/impl/DelimitedParseSpecTest.java index cfc9d75641fc..0d3b48640351 100644 --- a/processing/src/test/java/org/apache/druid/data/input/impl/DelimitedParseSpecTest.java +++ b/processing/src/test/java/org/apache/druid/data/input/impl/DelimitedParseSpecTest.java @@ -42,8 +42,7 @@ public void testSerde() throws IOException "\u0002", Collections.singletonList("abc"), false, - 0, - null + 0 ); final DelimitedParseSpec serde = jsonMapper.readValue( jsonMapper.writeValueAsString(spec), @@ -73,8 +72,7 @@ public void testComma() null, Collections.singletonList("a,"), false, - 0, - null + 0 ); } @@ -93,8 +91,7 @@ public void testDefaultColumnList() null, null, false, - 0, - null + 0 ); } } diff --git a/processing/src/test/java/org/apache/druid/data/input/impl/ParseSpecTest.java b/processing/src/test/java/org/apache/druid/data/input/impl/ParseSpecTest.java index 07d3d0cb5d52..96f7fd9ee394 100644 --- a/processing/src/test/java/org/apache/druid/data/input/impl/ParseSpecTest.java +++ b/processing/src/test/java/org/apache/druid/data/input/impl/ParseSpecTest.java @@ -73,8 +73,7 @@ public void testDuplicateNames() " ", Arrays.asList("a", "b"), false, - 0, - null + 0 ); } @@ -96,8 +95,7 @@ public void testDimAndDimExcluOverlap() null, Arrays.asList("a", "B"), false, - 0, - null + 0 ); } @@ -119,8 +117,7 @@ public void testDimExclusionDuplicate() null, Arrays.asList("a", "B"), false, - 0, - null + 0 ); } @@ -140,8 +137,7 @@ public void testDefaultTimestampSpec() null, Arrays.asList("a", "B"), false, - 0, - null + 0 ); } @@ -162,8 +158,7 @@ public void testDimensionSpecRequired() null, Arrays.asList("a", "B"), false, - 0, - null + 0 ); } diff --git a/processing/src/test/java/org/apache/druid/query/MultiValuedDimensionTest.java b/processing/src/test/java/org/apache/druid/query/MultiValuedDimensionTest.java index 77649f8fcbbc..a7abce4943c9 100644 --- a/processing/src/test/java/org/apache/druid/query/MultiValuedDimensionTest.java +++ b/processing/src/test/java/org/apache/druid/query/MultiValuedDimensionTest.java @@ -152,8 +152,7 @@ public void setup() throws Exception "\t", ImmutableList.of("timestamp", "product", "tags", "othertags"), false, - 0, - null + 0 ), "UTF-8" ); diff --git a/processing/src/test/java/org/apache/druid/query/groupby/GroupByQueryRunnerFactoryTest.java b/processing/src/test/java/org/apache/druid/query/groupby/GroupByQueryRunnerFactoryTest.java index ff089e82db63..bc8563552cd1 100644 --- a/processing/src/test/java/org/apache/druid/query/groupby/GroupByQueryRunnerFactoryTest.java +++ b/processing/src/test/java/org/apache/druid/query/groupby/GroupByQueryRunnerFactoryTest.java @@ -153,8 +153,7 @@ private Segment createSegment() throws Exception "\t", ImmutableList.of("timestamp", "product", "tags"), false, - 0, - null + 0 ), "UTF-8" ); diff --git a/processing/src/test/java/org/apache/druid/segment/TestIndex.java b/processing/src/test/java/org/apache/druid/segment/TestIndex.java index d4a6e644f7be..d0c93a270d0f 100644 --- a/processing/src/test/java/org/apache/druid/segment/TestIndex.java +++ b/processing/src/test/java/org/apache/druid/segment/TestIndex.java @@ -473,8 +473,7 @@ public static IncrementalIndex loadIncrementalIndex( "\u0001", Arrays.asList(COLUMNS), false, - 0, - null + 0 ), "utf8" ); From 54847c9eaea611d7c6f9a921d1dc15b6850a132e Mon Sep 17 00:00:00 2001 From: Abhishek Balaji Radhakrishnan Date: Mon, 16 Sep 2024 10:53:43 -0400 Subject: [PATCH 03/24] Remove delta change. --- .../input-source-step/input-source-step.tsx | 8 -------- 1 file changed, 8 deletions(-) diff --git a/web-console/src/views/workbench-view/input-source-step/input-source-step.tsx b/web-console/src/views/workbench-view/input-source-step/input-source-step.tsx index 881b1e2d8b6b..6d483cf44100 100644 --- a/web-console/src/views/workbench-view/input-source-step/input-source-step.tsx +++ b/web-console/src/views/workbench-view/input-source-step/input-source-step.tsx @@ -127,13 +127,6 @@ export const InputSourceStep = React.memo(function InputSourceStep(props: InputS const sampleResponse = await postToSampler(sampleSpec, 'input-source-step'); sampleLines = filterMap(sampleResponse.data, l => (l.input ? l.input.raw : undefined)); - // console.log('sample response: ', sampleResponse); - // console.log('sample response.data: ', sampleResponse.data); - // sampleLines = filterMap(sampleResponse.data, l => (l.input ? l.input.raw : undefined)); - // sampleLines = filterMap(sampleResponse.data, l => - // l.input ? JSON.stringify(l.input) : undefined, - // ); - // console.log('sample lines: ', sampleLines); } else { const tableExpression = externalConfigToTableExpression({ inputSource, @@ -232,7 +225,6 @@ export const InputSourceStep = React.memo(function InputSourceStep(props: InputS
{renderIngestionCard('s3')} {renderIngestionCard('azureStorage')} - {renderIngestionCard('delta')} {renderIngestionCard('google')} {renderIngestionCard('hdfs')} {renderIngestionCard('http')} From b0c538b02ff3000dbcf7c4ce6763407e62e20979 Mon Sep 17 00:00:00 2001 From: Abhishek Balaji Radhakrishnan Date: Mon, 16 Sep 2024 10:58:16 -0400 Subject: [PATCH 04/24] checkstyle fix. --- .../apache/druid/catalog/storage/CatalogTests.java | 11 ----------- .../druid/inputsource/hdfs/HdfsInputSourceTest.java | 3 ++- .../data/input/kinesis/KinesisInputFormatTest.java | 3 ++- .../druid/java/util/common/parsers/ParserUtils.java | 8 +++++--- .../java/util/common/parsers/ParserUtilsTest.java | 4 +++- 5 files changed, 12 insertions(+), 17 deletions(-) diff --git a/extensions-core/druid-catalog/src/test/java/org/apache/druid/catalog/storage/CatalogTests.java b/extensions-core/druid-catalog/src/test/java/org/apache/druid/catalog/storage/CatalogTests.java index ddac6809eb0e..c1475559aa82 100644 --- a/extensions-core/druid-catalog/src/test/java/org/apache/druid/catalog/storage/CatalogTests.java +++ b/extensions-core/druid-catalog/src/test/java/org/apache/druid/catalog/storage/CatalogTests.java @@ -74,17 +74,6 @@ public Access authorize( } } - public static InputFormat csvFormat() - { - return new CsvInputFormat( - Arrays.asList("x", "y", "z"), - null, // listDelimiter - false, // hasHeaderRow - false, // findColumnsFromHeader - 0, // skipHeaderRows - null); - } - public static final ObjectMapper JSON_MAPPER = new DefaultObjectMapper(); public static class DbFixture diff --git a/extensions-core/hdfs-storage/src/test/java/org/apache/druid/inputsource/hdfs/HdfsInputSourceTest.java b/extensions-core/hdfs-storage/src/test/java/org/apache/druid/inputsource/hdfs/HdfsInputSourceTest.java index ada3d6ecf00f..562f94e07e1e 100644 --- a/extensions-core/hdfs-storage/src/test/java/org/apache/druid/inputsource/hdfs/HdfsInputSourceTest.java +++ b/extensions-core/hdfs-storage/src/test/java/org/apache/druid/inputsource/hdfs/HdfsInputSourceTest.java @@ -91,7 +91,8 @@ public class HdfsInputSourceTest extends InitializedNullHandlingTest false, null, 0, - null); + null + ); public static class ConstructorTest { diff --git a/extensions-core/kinesis-indexing-service/src/test/java/org/apache/druid/data/input/kinesis/KinesisInputFormatTest.java b/extensions-core/kinesis-indexing-service/src/test/java/org/apache/druid/data/input/kinesis/KinesisInputFormatTest.java index 4659ae5f2d57..7e40203c48e0 100644 --- a/extensions-core/kinesis-indexing-service/src/test/java/org/apache/druid/data/input/kinesis/KinesisInputFormatTest.java +++ b/extensions-core/kinesis-indexing-service/src/test/java/org/apache/druid/data/input/kinesis/KinesisInputFormatTest.java @@ -714,7 +714,8 @@ public void testValueInCsvFormat() throws IOException false, false, 0, - null), + null + ), "kinesis.newts.partitionKey", "kinesis.newts.timestamp" ); diff --git a/processing/src/main/java/org/apache/druid/java/util/common/parsers/ParserUtils.java b/processing/src/main/java/org/apache/druid/java/util/common/parsers/ParserUtils.java index 775155fdc2ed..846091709fba 100644 --- a/processing/src/main/java/org/apache/druid/java/util/common/parsers/ParserUtils.java +++ b/processing/src/main/java/org/apache/druid/java/util/common/parsers/ParserUtils.java @@ -85,11 +85,13 @@ public static Object tryParseStringAsNumber(@Nullable final String input) try { // see if it's a long, if not try parsing as a double. return Long.parseLong(input); - } catch (NumberFormatException e1) { + } + catch (NumberFormatException e1) { try { return Double.parseDouble(input); - } catch (NumberFormatException e2) { - return input; + } + catch (NumberFormatException e2) { + return input; } } } diff --git a/processing/src/test/java/org/apache/druid/java/util/common/parsers/ParserUtilsTest.java b/processing/src/test/java/org/apache/druid/java/util/common/parsers/ParserUtilsTest.java index 5d102423a6ba..ffd89a29f3fa 100644 --- a/processing/src/test/java/org/apache/druid/java/util/common/parsers/ParserUtilsTest.java +++ b/processing/src/test/java/org/apache/druid/java/util/common/parsers/ParserUtilsTest.java @@ -28,7 +28,9 @@ import java.util.Collections; import java.util.List; -import static org.apache.druid.java.util.common.parsers.ParserUtils.*; +import static org.apache.druid.java.util.common.parsers.ParserUtils.findDuplicates; +import static org.apache.druid.java.util.common.parsers.ParserUtils.getMultiValueFunction; +import static org.apache.druid.java.util.common.parsers.ParserUtils.tryParseStringAsNumber; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNull; From 7336e7985ac7bddf44a3c9092c50e4809b3d481a Mon Sep 17 00:00:00 2001 From: Abhishek Balaji Radhakrishnan Date: Mon, 16 Sep 2024 11:02:31 -0400 Subject: [PATCH 05/24] Remove unneeded test --- .../namespace/UriExtractionNamespaceTest.java | 20 ------------------- 1 file changed, 20 deletions(-) diff --git a/extensions-core/lookups-cached-global/src/test/java/org/apache/druid/query/lookup/namespace/UriExtractionNamespaceTest.java b/extensions-core/lookups-cached-global/src/test/java/org/apache/druid/query/lookup/namespace/UriExtractionNamespaceTest.java index cd92dcc747b7..61d90b38a987 100644 --- a/extensions-core/lookups-cached-global/src/test/java/org/apache/druid/query/lookup/namespace/UriExtractionNamespaceTest.java +++ b/extensions-core/lookups-cached-global/src/test/java/org/apache/druid/query/lookup/namespace/UriExtractionNamespaceTest.java @@ -116,26 +116,6 @@ public void testCSVWithHeader() Assert.assertEquals(ImmutableMap.of("val2", "val3"), parser.getParser().parseToMap("val1,val2,val3")); } - @Test - public void testCSVWithHeaderWithNumbers() - { - UriExtractionNamespace.CSVFlatDataParser parser = new UriExtractionNamespace.CSVFlatDataParser( - ImmutableList.of("col1", "col2", "col3"), - "col2", - "col3", - true, - 1 - ); - // parser return empty list as the 1 row header need to be skipped. - Assert.assertEquals(ImmutableMap.of(), parser.getParser().parseToMap("row to skip ")); - //Header also need to be skipped. - Assert.assertEquals(ImmutableMap.of(), parser.getParser().parseToMap("col1,col2,col3")); - // test the header is parsed - Assert.assertEquals(ImmutableList.of("col1", "col2", "col3"), parser.getParser().getFieldNames()); - // The third row will parse to data - Assert.assertEquals(ImmutableMap.of("val2", "12"), parser.getParser().parseToMap("val1,val2,12")); - } - @Test(expected = IllegalArgumentException.class) public void testBadCSV() { From 423d437b278c10ddbe2c3bbb18419449a5690f67 Mon Sep 17 00:00:00 2001 From: Abhishek Balaji Radhakrishnan Date: Mon, 16 Sep 2024 11:04:39 -0400 Subject: [PATCH 06/24] More checkstyle fixes. --- .../data/input/kafkainput/KafkaInputFormatTest.java | 3 ++- .../common/task/CompactionTaskParallelRunTest.java | 3 ++- .../druid/indexing/common/task/IndexTaskTest.java | 3 ++- .../indexing/common/task/IngestionTestBase.java | 6 ++++-- ...HashPartitionMultiPhaseParallelIndexingTest.java | 3 ++- ...angePartitionAdjustingCorePartitionSizeTest.java | 3 ++- .../data/input/impl/DelimitedInputFormatTest.java | 9 ++++++--- .../druid/data/input/impl/DelimitedReaderTest.java | 13 +++++++++---- .../input/impl/InputEntityIteratingReaderTest.java | 9 ++++++--- .../org/apache/druid/query/NestedDataTestUtils.java | 3 ++- 10 files changed, 37 insertions(+), 18 deletions(-) diff --git a/extensions-core/kafka-indexing-service/src/test/java/org/apache/druid/data/input/kafkainput/KafkaInputFormatTest.java b/extensions-core/kafka-indexing-service/src/test/java/org/apache/druid/data/input/kafkainput/KafkaInputFormatTest.java index 5de194b439ca..a29654cfd738 100644 --- a/extensions-core/kafka-indexing-service/src/test/java/org/apache/druid/data/input/kafkainput/KafkaInputFormatTest.java +++ b/extensions-core/kafka-indexing-service/src/test/java/org/apache/druid/data/input/kafkainput/KafkaInputFormatTest.java @@ -713,7 +713,8 @@ public void testValueInCsvFormat() throws IOException false, false, 0, - null), + null + ), "kafka.newheader.", "kafka.newkey.key", "kafka.newts.timestamp", diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskParallelRunTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskParallelRunTest.java index 06493e67b2a0..c87a3ef7be3a 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskParallelRunTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskParallelRunTest.java @@ -929,7 +929,8 @@ private void runIndexTask(@Nullable PartitionsSpec partitionsSpec, boolean appen null, false, 0, - null), + null + ), appendToExisting, null ); diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IndexTaskTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IndexTaskTest.java index 69806f12a250..8092bbd6e435 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IndexTaskTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IndexTaskTest.java @@ -164,7 +164,8 @@ public class IndexTaskTest extends IngestionTestBase null, false, 0, - null); + null + ); @Parameterized.Parameters(name = "{0}, useInputFormatApi={1}") public static Iterable constructorFeeder() diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IngestionTestBase.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IngestionTestBase.java index d02024a097de..472b74cd412a 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IngestionTestBase.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IngestionTestBase.java @@ -314,7 +314,8 @@ public static InputFormat createInputFormatFromParseSpec(ParseSpec parseSpec) getColumnsFromHeader ? null : true, getColumnsFromHeader ? true : null, csvParseSpec.getSkipHeaderRows(), - null); + null + ); } else if (parseSpec instanceof DelimitedParseSpec) { DelimitedParseSpec delimitedParseSpec = (DelimitedParseSpec) parseSpec; boolean getColumnsFromHeader = delimitedParseSpec.isHasHeaderRow() && delimitedParseSpec.getSkipHeaderRows() == 0; @@ -325,7 +326,8 @@ public static InputFormat createInputFormatFromParseSpec(ParseSpec parseSpec) getColumnsFromHeader ? null : true, getColumnsFromHeader ? true : null, delimitedParseSpec.getSkipHeaderRows(), - null); + null + ); } else if (parseSpec instanceof RegexParseSpec) { RegexParseSpec regexParseSpec = (RegexParseSpec) parseSpec; return new RegexInputFormat( diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/HashPartitionMultiPhaseParallelIndexingTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/HashPartitionMultiPhaseParallelIndexingTest.java index 984cdeb33efa..53df996e8054 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/HashPartitionMultiPhaseParallelIndexingTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/HashPartitionMultiPhaseParallelIndexingTest.java @@ -85,7 +85,8 @@ public class HashPartitionMultiPhaseParallelIndexingTest extends AbstractMultiPh false, false, 0, - null); + null + ); private static final Interval INTERVAL_TO_INDEX = Intervals.of("2017-12/P1M"); private static final String INPUT_FILTER = "test_*"; diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/RangePartitionAdjustingCorePartitionSizeTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/RangePartitionAdjustingCorePartitionSizeTest.java index 715f63b10559..efa8991b206a 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/RangePartitionAdjustingCorePartitionSizeTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/RangePartitionAdjustingCorePartitionSizeTest.java @@ -62,7 +62,8 @@ public class RangePartitionAdjustingCorePartitionSizeTest extends AbstractMultiP false, false, 0, - null); + null + ); private static final Interval INTERVAL_TO_INDEX = Intervals.of("2020-01-01/P1M"); @Parameterized.Parameters(name = "{0}, maxNumConcurrentSubTasks={1}") diff --git a/processing/src/test/java/org/apache/druid/data/input/impl/DelimitedInputFormatTest.java b/processing/src/test/java/org/apache/druid/data/input/impl/DelimitedInputFormatTest.java index 36f039bf943f..55c752508d68 100644 --- a/processing/src/test/java/org/apache/druid/data/input/impl/DelimitedInputFormatTest.java +++ b/processing/src/test/java/org/apache/druid/data/input/impl/DelimitedInputFormatTest.java @@ -87,7 +87,8 @@ public void testFindColumnsFromHeaderWithColumnsReturningItsValue() null, true, 0, - null); + null + ); Assert.assertTrue(format.isFindColumnsFromHeader()); } @@ -101,7 +102,8 @@ public void testFindColumnsFromHeaderWithMissingColumnsReturningItsValue() null, true, 0, - null); + null + ); Assert.assertTrue(format.isFindColumnsFromHeader()); } @@ -149,7 +151,8 @@ public void testMissingFindColumnsFromHeaderWithColumnsReturningFalse() null, null, 0, - null); + null + ); Assert.assertFalse(format.isFindColumnsFromHeader()); } diff --git a/processing/src/test/java/org/apache/druid/data/input/impl/DelimitedReaderTest.java b/processing/src/test/java/org/apache/druid/data/input/impl/DelimitedReaderTest.java index 58c05f43be7c..f5c7f3d73563 100644 --- a/processing/src/test/java/org/apache/druid/data/input/impl/DelimitedReaderTest.java +++ b/processing/src/test/java/org/apache/druid/data/input/impl/DelimitedReaderTest.java @@ -70,7 +70,8 @@ public void testWithoutHeaders() throws IOException null, false, 0, - null); + null + ); assertResult(source, format); // use this test! } @@ -92,7 +93,8 @@ public void testWithoutHeadersNumberizeStrings() throws IOException null, false, 0, - false); + false + ); assertResult(source, format); // use this test! } @@ -130,7 +132,8 @@ public void testSkipHeaders() throws IOException null, false, 1, - null); + null + ); assertResult(source, format); } @@ -235,7 +238,8 @@ public void testRussianTextMess() throws IOException null, false, 0, - null); + null + ); final InputEntityReader reader = format.createReader(INPUT_ROW_SCHEMA, source, null); try (CloseableIterator iterator = reader.read()) { Assert.assertTrue(iterator.hasNext()); @@ -279,6 +283,7 @@ private void assertResult(ByteEntity source, DelimitedInputFormat format) throws StringUtils.format("name_%d", numResults + 1), Iterables.getOnlyElement(row.getDimension("name")) ); + // fixme System.out.println("Score" + row.getRaw("score")); System.out.println("Score instanceof String" + (row.getRaw("score") instanceof String)); System.out.println("Score instanceof Long" + (row.getRaw("score") instanceof Long)); diff --git a/processing/src/test/java/org/apache/druid/data/input/impl/InputEntityIteratingReaderTest.java b/processing/src/test/java/org/apache/druid/data/input/impl/InputEntityIteratingReaderTest.java index 75a17ccae11b..ad1422126dde 100644 --- a/processing/src/test/java/org/apache/druid/data/input/impl/InputEntityIteratingReaderTest.java +++ b/processing/src/test/java/org/apache/druid/data/input/impl/InputEntityIteratingReaderTest.java @@ -85,7 +85,8 @@ public void test() throws IOException null, false, 0, - null), + null + ), CloseableIterators.withEmptyBaggage( files.stream().flatMap(file -> ImmutableList.of(new FileEntity(file)).stream()).iterator() ), @@ -153,7 +154,8 @@ public void testSampleWithSystemFields() throws IOException null, false, 0, - null), + null + ), CloseableIterators.withEmptyBaggage( files.stream().flatMap(file -> ImmutableList.of(new FileEntity(file)).stream()).iterator() ), @@ -201,7 +203,8 @@ public void testIncorrectURI() throws IOException, URISyntaxException null, false, 0, - null), + null + ), CloseableIterators.withEmptyBaggage( ImmutableList.of( new HttpEntity(new URI("testscheme://some/path"), null, null, null) diff --git a/processing/src/test/java/org/apache/druid/query/NestedDataTestUtils.java b/processing/src/test/java/org/apache/druid/query/NestedDataTestUtils.java index 0c85c9b169c8..9efeeb642e51 100644 --- a/processing/src/test/java/org/apache/druid/query/NestedDataTestUtils.java +++ b/processing/src/test/java/org/apache/druid/query/NestedDataTestUtils.java @@ -148,7 +148,8 @@ public class NestedDataTestUtils false, false, 0, - null); + null + ); public static final TransformSpec SIMPLE_DATA_TSV_TRANSFORM = new TransformSpec( null, From 5d6247130f3024cdea3876d6e042ca262184d062 Mon Sep 17 00:00:00 2001 From: Abhishek Balaji Radhakrishnan Date: Mon, 16 Sep 2024 12:21:07 -0400 Subject: [PATCH 07/24] Fix and update test. --- .../java/util/common/parsers/ParserUtils.java | 4 +- .../data/input/impl/DelimitedReaderTest.java | 39 +++++++++++-------- 2 files changed, 26 insertions(+), 17 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/java/util/common/parsers/ParserUtils.java b/processing/src/main/java/org/apache/druid/java/util/common/parsers/ParserUtils.java index 846091709fba..4bc2f12a55d1 100644 --- a/processing/src/main/java/org/apache/druid/java/util/common/parsers/ParserUtils.java +++ b/processing/src/main/java/org/apache/druid/java/util/common/parsers/ParserUtils.java @@ -66,8 +66,10 @@ public static Function getMultiValueFunction( .map(NullHandling::emptyToNullIfNeeded) .map(value -> shouldParseNumbers ? ParserUtils.tryParseStringAsNumber(value) : value) .collect(Collectors.toList()); - } else { + } else if (shouldParseNumbers) { return tryParseStringAsNumber(input); + } else { + return input; } } else { return NullHandling.emptyToNullIfNeeded(input); diff --git a/processing/src/test/java/org/apache/druid/data/input/impl/DelimitedReaderTest.java b/processing/src/test/java/org/apache/druid/data/input/impl/DelimitedReaderTest.java index f5c7f3d73563..dc1ef1cda359 100644 --- a/processing/src/test/java/org/apache/druid/data/input/impl/DelimitedReaderTest.java +++ b/processing/src/test/java/org/apache/druid/data/input/impl/DelimitedReaderTest.java @@ -58,13 +58,13 @@ public void testWithoutHeaders() throws IOException { final ByteEntity source = writeData( ImmutableList.of( - "2019-01-01T00:00:10Z\tname_1\t5", - "2019-01-01T00:00:20Z\tname_2\t10", - "2019-01-01T00:00:30Z\tname_3\t15" + "2019-01-01T00:00:10Z\tname_1\t5\t0.0", + "2019-01-01T00:00:20Z\tname_2\t10\t1.0", + "2019-01-01T00:00:30Z\tname_3\t15\t2.0" ) ); final DelimitedInputFormat format = new DelimitedInputFormat( - ImmutableList.of("ts", "name", "score"), + ImmutableList.of("ts", "name", "score", "rating_dbl"), null, null, null, @@ -73,30 +73,28 @@ public void testWithoutHeaders() throws IOException null ); assertResult(source, format); - // use this test! } @Test - public void testWithoutHeadersNumberizeStrings() throws IOException + public void testParseNumbers() throws IOException { final ByteEntity source = writeData( ImmutableList.of( - "2019-01-01T00:00:10Z\tname_1\t5", - "2019-01-01T00:00:20Z\tname_2\t10", - "2019-01-01T00:00:30Z\tname_3\t15" + "2019-01-01T00:00:10Z\tname_1\t5\t0.0", + "2019-01-01T00:00:20Z\tname_2\t10\t1.0", + "2019-01-01T00:00:30Z\tname_3\t15\t2.0" ) ); final DelimitedInputFormat format = new DelimitedInputFormat( - ImmutableList.of("ts", "name", "score"), + ImmutableList.of("ts", "name", "score", "rating_dbl"), null, null, null, false, 0, - false + true ); assertResult(source, format); - // use this test! } @Test @@ -283,10 +281,19 @@ private void assertResult(ByteEntity source, DelimitedInputFormat format) throws StringUtils.format("name_%d", numResults + 1), Iterables.getOnlyElement(row.getDimension("name")) ); - // fixme - System.out.println("Score" + row.getRaw("score")); - System.out.println("Score instanceof String" + (row.getRaw("score") instanceof String)); - System.out.println("Score instanceof Long" + (row.getRaw("score") instanceof Long)); + if (format.shouldParseNumbers()) { + Assert.assertEquals(((numResults + 1) * 5L), row.getRaw("score")); + Assert.assertTrue((row.getRaw("score") instanceof Long)); + if (format.getColumns().contains("rating_dbl")) { + Assert.assertEquals(numResults * 1.0, row.getRaw("rating_dbl")); + } + } else { + Assert.assertEquals(Integer.toString((numResults + 1) * 5), row.getRaw("score")); + Assert.assertTrue((row.getRaw("score") instanceof String)); + if (format.getColumns().contains("rating_dbl")) { + Assert.assertEquals(Double.toString(numResults * 1.0), row.getRaw("rating_dbl")); + } + } Assert.assertEquals( Integer.toString((numResults + 1) * 5), Iterables.getOnlyElement(row.getDimension("score")) From f43d544ae81377cd636d54d9493043b0fec0e5a4 Mon Sep 17 00:00:00 2001 From: Abhishek Balaji Radhakrishnan Date: Mon, 16 Sep 2024 13:02:17 -0400 Subject: [PATCH 08/24] Fix and add test in CsvReaderTest. --- .../druid/data/input/impl/CsvInputFormat.java | 2 +- .../druid/data/input/impl/CsvReaderTest.java | 42 +++++++++++++++++-- .../data/input/impl/DelimitedReaderTest.java | 2 - 3 files changed, 39 insertions(+), 7 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/data/input/impl/CsvInputFormat.java b/processing/src/main/java/org/apache/druid/data/input/impl/CsvInputFormat.java index f1dcdb96c61f..17f730dbba82 100644 --- a/processing/src/main/java/org/apache/druid/data/input/impl/CsvInputFormat.java +++ b/processing/src/main/java/org/apache/druid/data/input/impl/CsvInputFormat.java @@ -82,7 +82,7 @@ public InputEntityReader createReader(InputRowSchema inputRowSchema, InputEntity getSkipHeaderRows(), line -> Arrays.asList(parser.parseLine(StringUtils.fromUtf8(line))), useListBasedInputRows(), - true + shouldParseNumbers() ); } diff --git a/processing/src/test/java/org/apache/druid/data/input/impl/CsvReaderTest.java b/processing/src/test/java/org/apache/druid/data/input/impl/CsvReaderTest.java index 198947239838..27a03aa38e99 100644 --- a/processing/src/test/java/org/apache/druid/data/input/impl/CsvReaderTest.java +++ b/processing/src/test/java/org/apache/druid/data/input/impl/CsvReaderTest.java @@ -61,15 +61,37 @@ public void testWithoutHeaders() throws IOException { final ByteEntity source = writeData( ImmutableList.of( - "2019-01-01T00:00:10Z,name_1,5", - "2019-01-01T00:00:20Z,name_2,10", - "2019-01-01T00:00:30Z,name_3,15" + "2019-01-01T00:00:10Z,name_1,5,0.0", + "2019-01-01T00:00:20Z,name_2,10,1.0", + "2019-01-01T00:00:30Z,name_3,15,2.0" ) ); final CsvInputFormat format = new CsvInputFormat(ImmutableList.of("ts", "name", "score"), null, null, false, 0, null); assertResult(source, format); } + @Test + public void testParseNumbers() throws IOException + { + final ByteEntity source = writeData( + ImmutableList.of( + "2019-01-01T00:00:10Z,name_1,5,0.0", + "2019-01-01T00:00:20Z,name_2,10,1.0", + "2019-01-01T00:00:30Z,name_3,15,2.0" + ) + ); + final CsvInputFormat format = new CsvInputFormat( + ImmutableList.of("ts", "name", "score", "rating_dbl"), + null, + null, + false, + 0, + true + ); + + assertResult(source, format); + } + @Test public void testFindColumn() throws IOException { @@ -228,7 +250,8 @@ public void testQuotes() throws IOException null, false, 0, - null); + false + ); final InputEntityReader reader = format.createReader( new InputRowSchema( new TimestampSpec("Timestamp", "auto", null), @@ -300,6 +323,17 @@ private void assertResult(ByteEntity source, CsvInputFormat format) throws IOExc StringUtils.format("name_%d", numResults + 1), Iterables.getOnlyElement(row.getDimension("name")) ); + if (format.shouldParseNumbers()) { + Assert.assertEquals(((numResults + 1) * 5L), row.getRaw("score")); + if (format.getColumns().contains("rating_dbl")) { + Assert.assertEquals(numResults * 1.0, row.getRaw("rating_dbl")); + } + } else { + Assert.assertEquals(Integer.toString((numResults + 1) * 5), row.getRaw("score")); + if (format.getColumns().contains("rating_dbl")) { + Assert.assertEquals(Double.toString(numResults * 1.0), row.getRaw("rating_dbl")); + } + } Assert.assertEquals( Integer.toString((numResults + 1) * 5), Iterables.getOnlyElement(row.getDimension("score")) diff --git a/processing/src/test/java/org/apache/druid/data/input/impl/DelimitedReaderTest.java b/processing/src/test/java/org/apache/druid/data/input/impl/DelimitedReaderTest.java index dc1ef1cda359..5c97e2db3b46 100644 --- a/processing/src/test/java/org/apache/druid/data/input/impl/DelimitedReaderTest.java +++ b/processing/src/test/java/org/apache/druid/data/input/impl/DelimitedReaderTest.java @@ -283,13 +283,11 @@ private void assertResult(ByteEntity source, DelimitedInputFormat format) throws ); if (format.shouldParseNumbers()) { Assert.assertEquals(((numResults + 1) * 5L), row.getRaw("score")); - Assert.assertTrue((row.getRaw("score") instanceof Long)); if (format.getColumns().contains("rating_dbl")) { Assert.assertEquals(numResults * 1.0, row.getRaw("rating_dbl")); } } else { Assert.assertEquals(Integer.toString((numResults + 1) * 5), row.getRaw("score")); - Assert.assertTrue((row.getRaw("score") instanceof String)); if (format.getColumns().contains("rating_dbl")) { Assert.assertEquals(Double.toString(numResults * 1.0), row.getRaw("rating_dbl")); } From 43c5a85da3c430e7b11fbe2b729341305adb3ad6 Mon Sep 17 00:00:00 2001 From: Abhishek Balaji Radhakrishnan Date: Mon, 16 Sep 2024 13:57:20 -0400 Subject: [PATCH 09/24] Javadocs and more test cases. --- .../data/input/impl/DelimitedValueReader.java | 14 +- .../druid/data/input/impl/RegexReader.java | 10 +- .../parsers/AbstractFlatTextFormatParser.java | 10 +- .../java/util/common/parsers/ParserUtils.java | 17 ++- .../util/common/parsers/ParserUtilsTest.java | 121 ++++++++++++++++-- 5 files changed, 149 insertions(+), 23 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/data/input/impl/DelimitedValueReader.java b/processing/src/main/java/org/apache/druid/data/input/impl/DelimitedValueReader.java index b4784a3dc107..32d3cb31dac4 100644 --- a/processing/src/main/java/org/apache/druid/data/input/impl/DelimitedValueReader.java +++ b/processing/src/main/java/org/apache/druid/data/input/impl/DelimitedValueReader.java @@ -51,7 +51,7 @@ public class DelimitedValueReader extends TextReader.Bytes { private final boolean findColumnsFromHeader; private final int skipHeaderRows; - private final Function multiValueFunction; + private final Function multiValueParseFunction; private final DelimitedValueParser parser; /** @@ -68,7 +68,6 @@ public class DelimitedValueReader extends TextReader.Bytes @Nullable private List inputRowDimensions; private final boolean useListBasedInputRows; - private final boolean shouldParseNumbers; interface DelimitedValueParser { @@ -91,8 +90,11 @@ interface DelimitedValueParser this.findColumnsFromHeader = findColumnsFromHeader; this.skipHeaderRows = skipHeaderRows; final String finalListDelimeter = listDelimiter == null ? Parsers.DEFAULT_LIST_DELIMITER : listDelimiter; - this.shouldParseNumbers = shouldParseNumbers; - this.multiValueFunction = ParserUtils.getMultiValueFunction(finalListDelimeter, Splitter.on(finalListDelimeter), shouldParseNumbers); + this.multiValueParseFunction = ParserUtils.getMultiValueAndParseNumbersFunction( + finalListDelimeter, + Splitter.on(finalListDelimeter), + shouldParseNumbers + ); if (!findColumnsFromHeader && columns != null) { // If findColumnsFromHeader, inputRowSignature will be set later. @@ -137,7 +139,7 @@ public List> toMap(byte[] intermediateRow) throws IOExceptio private List readLineAsList(byte[] line) throws IOException { final List parsed = parser.parseLine(line); - return new ArrayList<>(Lists.transform(parsed, multiValueFunction)); + return new ArrayList<>(Lists.transform(parsed, multiValueParseFunction)); } private Map readLineAsMap(byte[] line) throws IOException @@ -145,7 +147,7 @@ private Map readLineAsMap(byte[] line) throws IOException final List parsed = parser.parseLine(line); return Utils.zipMapPartial( Preconditions.checkNotNull(inputRowSignature, "inputRowSignature").getColumnNames(), - Iterables.transform(parsed, multiValueFunction) + Iterables.transform(parsed, multiValueParseFunction) ); } diff --git a/processing/src/main/java/org/apache/druid/data/input/impl/RegexReader.java b/processing/src/main/java/org/apache/druid/data/input/impl/RegexReader.java index cd08e39aa21f..1124ac6584d6 100644 --- a/processing/src/main/java/org/apache/druid/data/input/impl/RegexReader.java +++ b/processing/src/main/java/org/apache/druid/data/input/impl/RegexReader.java @@ -43,7 +43,7 @@ public class RegexReader extends TextReader.Strings { private final String pattern; private final Pattern compiledPattern; - private final Function multiValueFunction; + private final Function multiValueParseFunction; private List columns; @@ -60,7 +60,11 @@ public class RegexReader extends TextReader.Strings this.pattern = pattern; this.compiledPattern = compiledPattern; final String finalListDelimeter = listDelimiter == null ? Parsers.DEFAULT_LIST_DELIMITER : listDelimiter; - this.multiValueFunction = ParserUtils.getMultiValueFunction(finalListDelimeter, Splitter.on(finalListDelimeter), false); + this.multiValueParseFunction = ParserUtils.getMultiValueAndParseNumbersFunction( + finalListDelimeter, + Splitter.on(finalListDelimeter), + false + ); this.columns = columns; } @@ -94,7 +98,7 @@ private Map parseLine(String line) columns = ParserUtils.generateFieldNames(matcher.groupCount()); } - return Utils.zipMapPartial(columns, Iterables.transform(values, multiValueFunction)); + return Utils.zipMapPartial(columns, Iterables.transform(values, multiValueParseFunction)); } catch (Exception e) { throw new ParseException(line, e, "Unable to parse row [%s]", line); diff --git a/processing/src/main/java/org/apache/druid/java/util/common/parsers/AbstractFlatTextFormatParser.java b/processing/src/main/java/org/apache/druid/java/util/common/parsers/AbstractFlatTextFormatParser.java index 993af425f35a..67634a3de3b8 100644 --- a/processing/src/main/java/org/apache/druid/java/util/common/parsers/AbstractFlatTextFormatParser.java +++ b/processing/src/main/java/org/apache/druid/java/util/common/parsers/AbstractFlatTextFormatParser.java @@ -52,7 +52,7 @@ public String getDefaultDelimiter() } private final String listDelimiter; - private final Function valueFunction; + private final Function multiValueParseFunction; private final boolean hasHeaderRow; private final int maxSkipHeaderRows; @@ -69,7 +69,11 @@ public AbstractFlatTextFormatParser( ) { this.listDelimiter = listDelimiter != null ? listDelimiter : Parsers.DEFAULT_LIST_DELIMITER; - this.valueFunction = ParserUtils.getMultiValueFunction(this.listDelimiter, Splitter.on(this.listDelimiter), shouldParseNumbers); + this.multiValueParseFunction = ParserUtils.getMultiValueAndParseNumbersFunction( + this.listDelimiter, + Splitter.on(this.listDelimiter), + shouldParseNumbers + ); this.hasHeaderRow = hasHeaderRow; this.maxSkipHeaderRows = maxSkipHeaderRows; @@ -143,7 +147,7 @@ public Map parseToMap(final String input) setFieldNames(ParserUtils.generateFieldNames(values.size())); } - return Utils.zipMapPartial(fieldNames, Iterables.transform(values, valueFunction)); + return Utils.zipMapPartial(fieldNames, Iterables.transform(values, multiValueParseFunction)); } catch (Exception e) { throw new ParseException(input, e, "Unable to parse row [%s]", input); diff --git a/processing/src/main/java/org/apache/druid/java/util/common/parsers/ParserUtils.java b/processing/src/main/java/org/apache/druid/java/util/common/parsers/ParserUtils.java index 4bc2f12a55d1..40e00a785694 100644 --- a/processing/src/main/java/org/apache/druid/java/util/common/parsers/ParserUtils.java +++ b/processing/src/main/java/org/apache/druid/java/util/common/parsers/ParserUtils.java @@ -53,7 +53,13 @@ public class ParserUtils } } - public static Function getMultiValueFunction( + /** + * @return a function that processes a given string input by splitting it into multiple values + * using the {@code listSplitter} if thge {@code list delimiter} is present in the input. If {@code shouldParseNumbers} + * is enabled, the function will also try to parse any numeric values present in the input -- integers as {@code Long} + * and floating-point numbers as {@code Double}. + */ + public static Function getMultiValueAndParseNumbersFunction( final String listDelimiter, final Splitter listSplitter, final boolean shouldParseNumbers @@ -77,15 +83,20 @@ public static Function getMultiValueFunction( }; } + /** + * Attempts to parse the input string into a numeric value, if applicable. If the input is a number, the method first + * tries to parse the input number as a {@code Long}. If parsing as a {@code Long} fails, it then attempts to parse + * the input number as a {@code Double}. For all other scenarios, the input is returned as-is as a {@code String} type. + */ + @VisibleForTesting @Nullable - public static Object tryParseStringAsNumber(@Nullable final String input) + static Object tryParseStringAsNumber(@Nullable final String input) { if (!NumberUtils.isNumber(input)) { return input; } try { - // see if it's a long, if not try parsing as a double. return Long.parseLong(input); } catch (NumberFormatException e1) { diff --git a/processing/src/test/java/org/apache/druid/java/util/common/parsers/ParserUtilsTest.java b/processing/src/test/java/org/apache/druid/java/util/common/parsers/ParserUtilsTest.java index ffd89a29f3fa..87d2723e57ee 100644 --- a/processing/src/test/java/org/apache/druid/java/util/common/parsers/ParserUtilsTest.java +++ b/processing/src/test/java/org/apache/druid/java/util/common/parsers/ParserUtilsTest.java @@ -29,7 +29,7 @@ import java.util.List; import static org.apache.druid.java.util.common.parsers.ParserUtils.findDuplicates; -import static org.apache.druid.java.util.common.parsers.ParserUtils.getMultiValueFunction; +import static org.apache.druid.java.util.common.parsers.ParserUtils.getMultiValueAndParseNumbersFunction; import static org.apache.druid.java.util.common.parsers.ParserUtils.tryParseStringAsNumber; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNull; @@ -55,33 +55,138 @@ public void testFindDuplicates() } @Test - public void testGetMultiValueFunction() + public void testMultiValueParseFunctionWithParseNumbersEnabled() { assertEquals( ImmutableList.of("foo", "boo"), - getMultiValueFunction("|", Splitter.on("|"), true).apply("foo|boo") + getMultiValueAndParseNumbersFunction("|", Splitter.on("|"), true).apply("foo|boo") ); assertEquals( ImmutableList.of(1L, 2L, 3L), - getMultiValueFunction("|", Splitter.on("|"), true).apply("1|2|3") + getMultiValueAndParseNumbersFunction("|", Splitter.on("|"), true).apply("1|2|3") ); assertEquals( ImmutableList.of(1L, -2L, 3L, 0L, -2L), - getMultiValueFunction("|", Splitter.on("|"), true).apply("1|-2|3|0|-2") + getMultiValueAndParseNumbersFunction("|", Splitter.on("|"), true).apply("1|-2|3|0|-2") ); assertEquals( 1.23, - getMultiValueFunction("|", Splitter.on("|"), true).apply("1.23") + getMultiValueAndParseNumbersFunction("|", Splitter.on("|"), true).apply("1.23") ); // Some mixed types assertEquals( ImmutableList.of(-1.23, 3.13, 23L), - getMultiValueFunction("|", Splitter.on("|"), true).apply("-1.23|3.13|23") + getMultiValueAndParseNumbersFunction("|", Splitter.on("|"), true).apply("-1.23|3.13|23") ); assertEquals( ImmutableList.of(-1.23, 3.13, 23L, "foo", -9L), - getMultiValueFunction("|", Splitter.on("|"), true).apply("-1.23|3.13|23|foo|-9") + getMultiValueAndParseNumbersFunction("|", Splitter.on("|"), true).apply("-1.23|3.13|23|foo|-9") + ); + } + + @Test + public void testMultiValueParseFunctionWithParseNumbersDisabled() + { + assertEquals( + ImmutableList.of("foo", "boo"), + getMultiValueAndParseNumbersFunction("|", Splitter.on("|"), false).apply("foo|boo") + ); + assertEquals( + ImmutableList.of("1", "2", "3"), + getMultiValueAndParseNumbersFunction("|", Splitter.on("|"), false).apply("1|2|3") + ); + assertEquals( + ImmutableList.of("1", "-2", "3", "0", "-2"), + getMultiValueAndParseNumbersFunction("|", Splitter.on("|"), false).apply("1|-2|3|0|-2") + ); + assertEquals( + "1.23", + getMultiValueAndParseNumbersFunction("|", Splitter.on("|"), false).apply("1.23") + ); + + // Some mixed types + assertEquals( + ImmutableList.of("-1.23", "3.13", "23"), + getMultiValueAndParseNumbersFunction("|", Splitter.on("|"), false).apply("-1.23|3.13|23") + ); + assertEquals( + ImmutableList.of("-1.23", "3.13", "23", "foo", "-9"), + getMultiValueAndParseNumbersFunction("|", Splitter.on("|"), false).apply("-1.23|3.13|23|foo|-9") + ); + } + + @Test + public void testInputWithoutDelimiterAndNumberParsingEnabled() + { + assertEquals( + "foo|boo", + getMultiValueAndParseNumbersFunction("$", Splitter.on("$"), true).apply("foo|boo") + ); + assertEquals( + "1|2|3", + getMultiValueAndParseNumbersFunction("$", Splitter.on("$"), true).apply("1|2|3") + ); + assertEquals( + "1|-2|3|0|-2", + getMultiValueAndParseNumbersFunction("$", Splitter.on("$"), true).apply("1|-2|3|0|-2") + ); + assertEquals( + 1.23, + getMultiValueAndParseNumbersFunction("$", Splitter.on("$"), true).apply("1.23") + ); + + // Some mixed types + assertEquals( + "-1.23|3.13|23", + getMultiValueAndParseNumbersFunction("$", Splitter.on("$"), true).apply("-1.23|3.13|23") + ); + assertEquals( + "-1.23|3.13|23|foo|-9", + getMultiValueAndParseNumbersFunction("$", Splitter.on("$"), true).apply("-1.23|3.13|23|foo|-9") + ); + } + + @Test + public void testInputWithoutDelimiterAndNumberParsingDisabled() + { + assertEquals( + "foo|boo", + getMultiValueAndParseNumbersFunction("$", Splitter.on("$"), false).apply("foo|boo") + ); + assertEquals( + "100", + getMultiValueAndParseNumbersFunction("$", Splitter.on("$"), false).apply("100") + ); + assertEquals( + "1.23", + getMultiValueAndParseNumbersFunction("$", Splitter.on("$"), false).apply("1.23") + ); + assertEquals( + "-2.0", + getMultiValueAndParseNumbersFunction("$", Splitter.on("$"), false).apply("-2.0") + ); + assertEquals( + "1|2|3", + getMultiValueAndParseNumbersFunction("$", Splitter.on("$"), false).apply("1|2|3") + ); + assertEquals( + "1|-2|3|0|-2", + getMultiValueAndParseNumbersFunction("$", Splitter.on("$"), false).apply("1|-2|3|0|-2") + ); + assertEquals( + "-1.0|-2.2|3.1|0.2|-2.1", + getMultiValueAndParseNumbersFunction("$", Splitter.on("$"), false).apply("-1.0|-2.2|3.1|0.2|-2.1") + ); + + // Some mixed types + assertEquals( + "-1.23|3.13|23", + getMultiValueAndParseNumbersFunction("$", Splitter.on("$"), false).apply("-1.23|3.13|23") + ); + assertEquals( + "-1.23|3.13|23|foo|-9", + getMultiValueAndParseNumbersFunction("$", Splitter.on("$"), false).apply("-1.23|3.13|23|foo|-9") ); } From 5dd1fff9f2711ba236d720129970c3c6436ac089 Mon Sep 17 00:00:00 2001 From: Abhishek Balaji Radhakrishnan Date: Mon, 16 Sep 2024 16:34:52 -0400 Subject: [PATCH 10/24] Cleanup, checkstyle fix and some test fixes. --- .../druid/catalog/storage/CatalogTests.java | 4 --- ...stractParallelIndexSupervisorTaskTest.java | 3 +- ...rtitionAdjustingCorePartitionSizeTest.java | 3 +- .../batch/parallel/PartialCompactionTest.java | 3 +- .../java/util/common/parsers/ParserUtils.java | 31 ++++++++++++++----- .../util/common/parsers/ParserUtilsTest.java | 2 +- 6 files changed, 31 insertions(+), 15 deletions(-) diff --git a/extensions-core/druid-catalog/src/test/java/org/apache/druid/catalog/storage/CatalogTests.java b/extensions-core/druid-catalog/src/test/java/org/apache/druid/catalog/storage/CatalogTests.java index c1475559aa82..caab4a3dad8e 100644 --- a/extensions-core/druid-catalog/src/test/java/org/apache/druid/catalog/storage/CatalogTests.java +++ b/extensions-core/druid-catalog/src/test/java/org/apache/druid/catalog/storage/CatalogTests.java @@ -23,8 +23,6 @@ import com.google.common.collect.ImmutableMap; import org.apache.druid.catalog.storage.sql.CatalogManager; import org.apache.druid.catalog.storage.sql.SQLCatalogManager; -import org.apache.druid.data.input.InputFormat; -import org.apache.druid.data.input.impl.CsvInputFormat; import org.apache.druid.jackson.DefaultObjectMapper; import org.apache.druid.metadata.TestDerbyConnector.DerbyConnectorRule; import org.apache.druid.server.security.Access; @@ -35,8 +33,6 @@ import org.apache.druid.server.security.Resource; import org.apache.druid.server.security.ResourceType; -import java.util.Arrays; - public class CatalogTests { public static final String TEST_AUTHORITY = "test"; diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/AbstractParallelIndexSupervisorTaskTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/AbstractParallelIndexSupervisorTaskTest.java index b39e00ba79d4..d96a4d2a37e9 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/AbstractParallelIndexSupervisorTaskTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/AbstractParallelIndexSupervisorTaskTest.java @@ -164,7 +164,8 @@ public class AbstractParallelIndexSupervisorTaskTest extends IngestionTestBase false, false, 0, - null); + null + ); public static final ParallelIndexTuningConfig DEFAULT_TUNING_CONFIG_FOR_PARALLEL_INDEXING = TuningConfigBuilder.forParallelIndexTask() .withMaxNumConcurrentSubTasks(2) diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/HashPartitionAdjustingCorePartitionSizeTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/HashPartitionAdjustingCorePartitionSizeTest.java index d669b3be034b..075dae22f5cf 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/HashPartitionAdjustingCorePartitionSizeTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/HashPartitionAdjustingCorePartitionSizeTest.java @@ -62,7 +62,8 @@ public class HashPartitionAdjustingCorePartitionSizeTest extends AbstractMultiPh false, false, 0, - null); + null + ); private static final Interval INTERVAL_TO_INDEX = Intervals.of("2020-01-01/P1M"); @Parameterized.Parameters(name = "{0}, maxNumConcurrentSubTasks={1}") diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/PartialCompactionTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/PartialCompactionTest.java index f33a21c9a0a7..9ef316e8aaa2 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/PartialCompactionTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/PartialCompactionTest.java @@ -65,7 +65,8 @@ public class PartialCompactionTest extends AbstractMultiPhaseParallelIndexingTes false, false, 0, - null); + null + ); private static final Interval INTERVAL_TO_INDEX = Intervals.of("2017-12/P1M"); private File inputDir; diff --git a/processing/src/main/java/org/apache/druid/java/util/common/parsers/ParserUtils.java b/processing/src/main/java/org/apache/druid/java/util/common/parsers/ParserUtils.java index 40e00a785694..db6441d05f86 100644 --- a/processing/src/main/java/org/apache/druid/java/util/common/parsers/ParserUtils.java +++ b/processing/src/main/java/org/apache/druid/java/util/common/parsers/ParserUtils.java @@ -66,17 +66,34 @@ public static Function getMultiValueAndParseNumbersFunction( ) { return (input) -> { - if (input != null) { - if (input.contains(listDelimiter)) { + if (input == null) { + return NullHandling.emptyToNullIfNeeded(input); + } + + if (input.contains(listDelimiter)) { return StreamSupport.stream(listSplitter.split(input).spliterator(), false) .map(NullHandling::emptyToNullIfNeeded) .map(value -> shouldParseNumbers ? ParserUtils.tryParseStringAsNumber(value) : value) .collect(Collectors.toList()); - } else if (shouldParseNumbers) { - return tryParseStringAsNumber(input); } else { - return input; - } + return shouldParseNumbers ? + tryParseStringAsNumber(input) : + NullHandling.emptyToNullIfNeeded(input); + + } + }; + } + + public static Function getMultiValueFunctionOld( + final String listDelimiter, + final Splitter listSplitter + ) + { + return (input) -> { + if (input != null && input.contains(listDelimiter)) { + return StreamSupport.stream(listSplitter.split(input).spliterator(), false) + .map(NullHandling::emptyToNullIfNeeded) + .collect(Collectors.toList()); } else { return NullHandling.emptyToNullIfNeeded(input); } @@ -93,7 +110,7 @@ public static Function getMultiValueAndParseNumbersFunction( static Object tryParseStringAsNumber(@Nullable final String input) { if (!NumberUtils.isNumber(input)) { - return input; + return NullHandling.emptyToNullIfNeeded(input); } try { diff --git a/processing/src/test/java/org/apache/druid/java/util/common/parsers/ParserUtilsTest.java b/processing/src/test/java/org/apache/druid/java/util/common/parsers/ParserUtilsTest.java index 87d2723e57ee..2e5140019c24 100644 --- a/processing/src/test/java/org/apache/druid/java/util/common/parsers/ParserUtilsTest.java +++ b/processing/src/test/java/org/apache/druid/java/util/common/parsers/ParserUtilsTest.java @@ -194,7 +194,7 @@ public void testInputWithoutDelimiterAndNumberParsingDisabled() public void testTryParseStringAsNumber() { assertNull(tryParseStringAsNumber(null)); - assertEquals("", tryParseStringAsNumber("")); + assertEquals(NullHandling.emptyToNullIfNeeded(""), tryParseStringAsNumber("")); assertEquals("a", tryParseStringAsNumber("a")); assertEquals("ab", tryParseStringAsNumber("ab")); assertEquals(12L, tryParseStringAsNumber("12")); From 60762093304e4b6f1f30d93cefe56fb2614dd5db Mon Sep 17 00:00:00 2001 From: Abhishek Balaji Radhakrishnan Date: Mon, 16 Sep 2024 17:35:20 -0400 Subject: [PATCH 11/24] Refactor and more tests. --- .../java/util/common/parsers/ParserUtils.java | 16 --- .../util/common/parsers/ParserUtilsTest.java | 126 +++++++++++++----- 2 files changed, 89 insertions(+), 53 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/java/util/common/parsers/ParserUtils.java b/processing/src/main/java/org/apache/druid/java/util/common/parsers/ParserUtils.java index db6441d05f86..af199e07c321 100644 --- a/processing/src/main/java/org/apache/druid/java/util/common/parsers/ParserUtils.java +++ b/processing/src/main/java/org/apache/druid/java/util/common/parsers/ParserUtils.java @@ -84,22 +84,6 @@ public static Function getMultiValueAndParseNumbersFunction( }; } - public static Function getMultiValueFunctionOld( - final String listDelimiter, - final Splitter listSplitter - ) - { - return (input) -> { - if (input != null && input.contains(listDelimiter)) { - return StreamSupport.stream(listSplitter.split(input).spliterator(), false) - .map(NullHandling::emptyToNullIfNeeded) - .collect(Collectors.toList()); - } else { - return NullHandling.emptyToNullIfNeeded(input); - } - }; - } - /** * Attempts to parse the input string into a numeric value, if applicable. If the input is a number, the method first * tries to parse the input number as a {@code Long}. If parsing as a {@code Long} fails, it then attempts to parse diff --git a/processing/src/test/java/org/apache/druid/java/util/common/parsers/ParserUtilsTest.java b/processing/src/test/java/org/apache/druid/java/util/common/parsers/ParserUtilsTest.java index 2e5140019c24..855d1091dc64 100644 --- a/processing/src/test/java/org/apache/druid/java/util/common/parsers/ParserUtilsTest.java +++ b/processing/src/test/java/org/apache/druid/java/util/common/parsers/ParserUtilsTest.java @@ -55,42 +55,31 @@ public void testFindDuplicates() } @Test - public void testMultiValueParseFunctionWithParseNumbersEnabled() + public void testInputWithDelimiterAndParserDisabled() { assertEquals( ImmutableList.of("foo", "boo"), - getMultiValueAndParseNumbersFunction("|", Splitter.on("|"), true).apply("foo|boo") + getMultiValueAndParseNumbersFunction("|", Splitter.on("|"), false).apply("foo|boo") ); assertEquals( - ImmutableList.of(1L, 2L, 3L), - getMultiValueAndParseNumbersFunction("|", Splitter.on("|"), true).apply("1|2|3") + ImmutableList.of("1", "2", "3"), + getMultiValueAndParseNumbersFunction("|", Splitter.on("|"), false).apply("1|2|3") ); assertEquals( - ImmutableList.of(1L, -2L, 3L, 0L, -2L), - getMultiValueAndParseNumbersFunction("|", Splitter.on("|"), true).apply("1|-2|3|0|-2") + ImmutableList.of("1", "-2", "3", "0", "-2"), + getMultiValueAndParseNumbersFunction("|", Splitter.on("|"), false).apply("1|-2|3|0|-2") ); assertEquals( - 1.23, - getMultiValueAndParseNumbersFunction("|", Splitter.on("|"), true).apply("1.23") - ); - - // Some mixed types - assertEquals( - ImmutableList.of(-1.23, 3.13, 23L), - getMultiValueAndParseNumbersFunction("|", Splitter.on("|"), true).apply("-1.23|3.13|23") + "100", + getMultiValueAndParseNumbersFunction("|", Splitter.on("|"), false).apply("100") ); assertEquals( - ImmutableList.of(-1.23, 3.13, 23L, "foo", -9L), - getMultiValueAndParseNumbersFunction("|", Splitter.on("|"), true).apply("-1.23|3.13|23|foo|-9") + "1.23", + getMultiValueAndParseNumbersFunction("|", Splitter.on("|"), false).apply("1.23") ); - } - - @Test - public void testMultiValueParseFunctionWithParseNumbersDisabled() - { assertEquals( - ImmutableList.of("foo", "boo"), - getMultiValueAndParseNumbersFunction("|", Splitter.on("|"), false).apply("foo|boo") + "-2.0", + getMultiValueAndParseNumbersFunction("|", Splitter.on("|"), false).apply("-2.0") ); assertEquals( ImmutableList.of("1", "2", "3"), @@ -101,8 +90,8 @@ public void testMultiValueParseFunctionWithParseNumbersDisabled() getMultiValueAndParseNumbersFunction("|", Splitter.on("|"), false).apply("1|-2|3|0|-2") ); assertEquals( - "1.23", - getMultiValueAndParseNumbersFunction("|", Splitter.on("|"), false).apply("1.23") + ImmutableList.of("-1.0", "-2.2", "3.1", "0.2", "-2.1"), + getMultiValueAndParseNumbersFunction("|", Splitter.on("|"), false).apply("-1.0|-2.2|3.1|0.2|-2.1") ); // Some mixed types @@ -117,33 +106,53 @@ public void testMultiValueParseFunctionWithParseNumbersDisabled() } @Test - public void testInputWithoutDelimiterAndNumberParsingEnabled() + public void testInputWithDelimiterAndParserEnabled() { assertEquals( - "foo|boo", - getMultiValueAndParseNumbersFunction("$", Splitter.on("$"), true).apply("foo|boo") + ImmutableList.of("foo", "boo"), + getMultiValueAndParseNumbersFunction("|", Splitter.on("|"), true).apply("foo|boo") ); assertEquals( - "1|2|3", - getMultiValueAndParseNumbersFunction("$", Splitter.on("$"), true).apply("1|2|3") + ImmutableList.of(1L, 2L, 3L), + getMultiValueAndParseNumbersFunction("|", Splitter.on("|"), true).apply("1|2|3") ); assertEquals( - "1|-2|3|0|-2", - getMultiValueAndParseNumbersFunction("$", Splitter.on("$"), true).apply("1|-2|3|0|-2") + ImmutableList.of(1L, -2L, 3L, 0L, -2L), + getMultiValueAndParseNumbersFunction("|", Splitter.on("|"), true).apply("1|-2|3|0|-2") + ); + assertEquals( + 100L, + getMultiValueAndParseNumbersFunction("|", Splitter.on("|"), true).apply("100") ); assertEquals( 1.23, - getMultiValueAndParseNumbersFunction("$", Splitter.on("$"), true).apply("1.23") + getMultiValueAndParseNumbersFunction("|", Splitter.on("|"), true).apply("1.23") + ); + assertEquals( + -2.0, + getMultiValueAndParseNumbersFunction("|", Splitter.on("|"), true).apply("-2.0") + ); + assertEquals( + ImmutableList.of(1L, 2L, 3L), + getMultiValueAndParseNumbersFunction("|", Splitter.on("|"), true).apply("1|2|3") + ); + assertEquals( + ImmutableList.of(1L, -2L, 3L, 0L, -2L), + getMultiValueAndParseNumbersFunction("|", Splitter.on("|"), true).apply("1|-2|3|0|-2") + ); + assertEquals( + ImmutableList.of(-1.0, -2.2, 3.1, 0.2, -2.1), + getMultiValueAndParseNumbersFunction("|", Splitter.on("|"), true).apply("-1.0|-2.2|3.1|0.2|-2.1") ); // Some mixed types assertEquals( - "-1.23|3.13|23", - getMultiValueAndParseNumbersFunction("$", Splitter.on("$"), true).apply("-1.23|3.13|23") + ImmutableList.of(-1.23, 3.13, 23L), + getMultiValueAndParseNumbersFunction("|", Splitter.on("|"), true).apply("-1.23|3.13|23") ); assertEquals( - "-1.23|3.13|23|foo|-9", - getMultiValueAndParseNumbersFunction("$", Splitter.on("$"), true).apply("-1.23|3.13|23|foo|-9") + ImmutableList.of(-1.23, 3.13, 23L, "foo", -9L), + getMultiValueAndParseNumbersFunction("|", Splitter.on("|"), true).apply("-1.23|3.13|23|foo|-9") ); } @@ -190,6 +199,49 @@ public void testInputWithoutDelimiterAndNumberParsingDisabled() ); } + @Test + public void testInputWithoutDelimiterAndNumberParsingEnabled() + { + assertEquals( + "foo|boo", + getMultiValueAndParseNumbersFunction("$", Splitter.on("$"), true).apply("foo|boo") + ); + assertEquals( + 100L, + getMultiValueAndParseNumbersFunction("$", Splitter.on("$"), true).apply("100") + ); + assertEquals( + 1.23, + getMultiValueAndParseNumbersFunction("$", Splitter.on("$"), true).apply("1.23") + ); + assertEquals( + -2.0, + getMultiValueAndParseNumbersFunction("$", Splitter.on("$"), true).apply("-2.0") + ); + assertEquals( + "1|2|3", + getMultiValueAndParseNumbersFunction("$", Splitter.on("$"), true).apply("1|2|3") + ); + assertEquals( + "1|-2|3|0|-2", + getMultiValueAndParseNumbersFunction("$", Splitter.on("$"), true).apply("1|-2|3|0|-2") + ); + assertEquals( + "-1.0|-2.2|3.1|0.2|-2.1", + getMultiValueAndParseNumbersFunction("$", Splitter.on("$"), true).apply("-1.0|-2.2|3.1|0.2|-2.1") + ); + + // Some mixed types + assertEquals( + "-1.23|3.13|23", + getMultiValueAndParseNumbersFunction("$", Splitter.on("$"), true).apply("-1.23|3.13|23") + ); + assertEquals( + "-1.23|3.13|23|foo|-9", + getMultiValueAndParseNumbersFunction("$", Splitter.on("$"), true).apply("-1.23|3.13|23|foo|-9") + ); + } + @Test public void testTryParseStringAsNumber() { From c0ac9ae33f954d1b36a646fd3b229bde2a1c1cb5 Mon Sep 17 00:00:00 2001 From: Abhishek Balaji Radhakrishnan Date: Mon, 16 Sep 2024 17:41:19 -0400 Subject: [PATCH 12/24] Add some null and empty "" cases. --- .../java/util/common/parsers/ParserUtils.java | 10 +++---- .../util/common/parsers/ParserUtilsTest.java | 28 +++++++++++++++++++ 2 files changed, 33 insertions(+), 5 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/java/util/common/parsers/ParserUtils.java b/processing/src/main/java/org/apache/druid/java/util/common/parsers/ParserUtils.java index af199e07c321..59da13d21c52 100644 --- a/processing/src/main/java/org/apache/druid/java/util/common/parsers/ParserUtils.java +++ b/processing/src/main/java/org/apache/druid/java/util/common/parsers/ParserUtils.java @@ -71,11 +71,11 @@ public static Function getMultiValueAndParseNumbersFunction( } if (input.contains(listDelimiter)) { - return StreamSupport.stream(listSplitter.split(input).spliterator(), false) - .map(NullHandling::emptyToNullIfNeeded) - .map(value -> shouldParseNumbers ? ParserUtils.tryParseStringAsNumber(value) : value) - .collect(Collectors.toList()); - } else { + return StreamSupport.stream(listSplitter.split(input).spliterator(), false) + .map(NullHandling::emptyToNullIfNeeded) + .map(value -> shouldParseNumbers ? ParserUtils.tryParseStringAsNumber(value) : value) + .collect(Collectors.toList()); + } else { return shouldParseNumbers ? tryParseStringAsNumber(input) : NullHandling.emptyToNullIfNeeded(input); diff --git a/processing/src/test/java/org/apache/druid/java/util/common/parsers/ParserUtilsTest.java b/processing/src/test/java/org/apache/druid/java/util/common/parsers/ParserUtilsTest.java index 855d1091dc64..1167103bfd56 100644 --- a/processing/src/test/java/org/apache/druid/java/util/common/parsers/ParserUtilsTest.java +++ b/processing/src/test/java/org/apache/druid/java/util/common/parsers/ParserUtilsTest.java @@ -57,6 +57,13 @@ public void testFindDuplicates() @Test public void testInputWithDelimiterAndParserDisabled() { + assertNull( + getMultiValueAndParseNumbersFunction("|", Splitter.on("|"), true).apply(null) + ); + assertEquals( + NullHandling.emptyToNullIfNeeded(""), + getMultiValueAndParseNumbersFunction("|", Splitter.on("|"), true).apply("") + ); assertEquals( ImmutableList.of("foo", "boo"), getMultiValueAndParseNumbersFunction("|", Splitter.on("|"), false).apply("foo|boo") @@ -108,6 +115,13 @@ public void testInputWithDelimiterAndParserDisabled() @Test public void testInputWithDelimiterAndParserEnabled() { + assertNull( + getMultiValueAndParseNumbersFunction("|", Splitter.on("|"), true).apply(null) + ); + assertEquals( + NullHandling.emptyToNullIfNeeded(""), + getMultiValueAndParseNumbersFunction("|", Splitter.on("|"), true).apply("") + ); assertEquals( ImmutableList.of("foo", "boo"), getMultiValueAndParseNumbersFunction("|", Splitter.on("|"), true).apply("foo|boo") @@ -159,6 +173,13 @@ public void testInputWithDelimiterAndParserEnabled() @Test public void testInputWithoutDelimiterAndNumberParsingDisabled() { + assertNull( + getMultiValueAndParseNumbersFunction("|", Splitter.on("$"), false).apply(null) + ); + assertEquals( + NullHandling.emptyToNullIfNeeded(""), + getMultiValueAndParseNumbersFunction("|", Splitter.on("$"), false).apply("") + ); assertEquals( "foo|boo", getMultiValueAndParseNumbersFunction("$", Splitter.on("$"), false).apply("foo|boo") @@ -202,6 +223,13 @@ public void testInputWithoutDelimiterAndNumberParsingDisabled() @Test public void testInputWithoutDelimiterAndNumberParsingEnabled() { + assertNull( + getMultiValueAndParseNumbersFunction("$", Splitter.on("$"), true).apply(null) + ); + assertEquals( + NullHandling.emptyToNullIfNeeded(""), + getMultiValueAndParseNumbersFunction("$", Splitter.on("$"), true).apply("") + ); assertEquals( "foo|boo", getMultiValueAndParseNumbersFunction("$", Splitter.on("$"), true).apply("foo|boo") From 8e4dc2cccb91c155629e96f27c49d2f7ee9b8040 Mon Sep 17 00:00:00 2001 From: Abhishek Balaji Radhakrishnan Date: Mon, 16 Sep 2024 19:32:17 -0400 Subject: [PATCH 13/24] Add public-facing docs. --- docs/ingestion/data-formats.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/ingestion/data-formats.md b/docs/ingestion/data-formats.md index 96d8597e7f34..8fc23b48feb1 100644 --- a/docs/ingestion/data-formats.md +++ b/docs/ingestion/data-formats.md @@ -125,6 +125,8 @@ Configure the CSV `inputFormat` to load CSV data as follows: | columns | JSON array | Specifies the columns of the data. The columns should be in the same order with the columns of your data. | yes if `findColumnsFromHeader` is false or missing | | findColumnsFromHeader | Boolean | If this is set, the task will find the column names from the header row. Note that `skipHeaderRows` will be applied before finding column names from the header. For example, if you set `skipHeaderRows` to 2 and `findColumnsFromHeader` to true, the task will skip the first two lines and then extract column information from the third line. `columns` will be ignored if this is set to true. | no (default = false if `columns` is set; otherwise null) | | skipHeaderRows | Integer | If this is set, the task will skip the first `skipHeaderRows` rows. | no (default = 0) | +| shouldParseNumbers| Boolean| If this is set, the task will attempt to parse numeric strings into long or double data type, in that order. If the value cannot be parsed as a number, it is retained as a string. | no (default = false) | + For example: @@ -150,6 +152,8 @@ Configure the TSV `inputFormat` to load TSV data as follows: | columns | JSON array | Specifies the columns of the data. The columns should be in the same order with the columns of your data. | yes if `findColumnsFromHeader` is false or missing | | findColumnsFromHeader | Boolean | If this is set, the task will find the column names from the header row. Note that `skipHeaderRows` will be applied before finding column names from the header. For example, if you set `skipHeaderRows` to 2 and `findColumnsFromHeader` to true, the task will skip the first two lines and then extract column information from the third line. `columns` will be ignored if this is set to true. | no (default = false if `columns` is set; otherwise null) | | skipHeaderRows | Integer | If this is set, the task will skip the first `skipHeaderRows` rows. | no (default = 0) | +| shouldParseNumbers| Boolean| If this is set, the task will attempt to parse numeric strings into long or double data type, in that order. If the value cannot be parsed as a number, it is retained as a string. | no (default = false) | + Be sure to change the `delimiter` to the appropriate delimiter for your data. Like CSV, you must specify the columns and which subset of the columns you want indexed. From 7d6389e33b35d5b5d9ab394c902fa5aecf91b7bf Mon Sep 17 00:00:00 2001 From: Abhishek Balaji Radhakrishnan Date: Mon, 16 Sep 2024 19:38:17 -0400 Subject: [PATCH 14/24] More new lines. --- .../common/task/batch/parallel/HashPartitionTaskKillTest.java | 3 ++- .../parallel/RangePartitionMultiPhaseParallelIndexingTest.java | 3 ++- .../common/task/batch/parallel/RangePartitionTaskKillTest.java | 3 ++- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/HashPartitionTaskKillTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/HashPartitionTaskKillTest.java index 040e3a247896..fa24213e1974 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/HashPartitionTaskKillTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/HashPartitionTaskKillTest.java @@ -81,7 +81,8 @@ public class HashPartitionTaskKillTest extends AbstractMultiPhaseParallelIndexin false, false, 0, - null); + null + ); private static final Interval INTERVAL_TO_INDEX = Intervals.of("2017-12/P1M"); private File inputDir; diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/RangePartitionMultiPhaseParallelIndexingTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/RangePartitionMultiPhaseParallelIndexingTest.java index a848c66c807c..3de358fdb5fd 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/RangePartitionMultiPhaseParallelIndexingTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/RangePartitionMultiPhaseParallelIndexingTest.java @@ -111,7 +111,8 @@ public class RangePartitionMultiPhaseParallelIndexingTest extends AbstractMultiP false, false, 0, - null); + null + ); @Parameterized.Parameters(name = "{0}, useInputFormatApi={1}, maxNumConcurrentSubTasks={2}, useMultiValueDim={3}, intervalToIndex={4}") public static Iterable constructorFeeder() diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/RangePartitionTaskKillTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/RangePartitionTaskKillTest.java index 35b33a77f5bf..4cb7cd5aa51e 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/RangePartitionTaskKillTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/RangePartitionTaskKillTest.java @@ -86,7 +86,8 @@ public class RangePartitionTaskKillTest extends AbstractMultiPhaseParallelIndexi false, false, 0, - null); + null + ); private File inputDir; From c8f8325ebfca57ae46fb09873fdabd18f44b1e2a Mon Sep 17 00:00:00 2001 From: Abhishek Balaji Radhakrishnan Date: Mon, 16 Sep 2024 19:40:08 -0400 Subject: [PATCH 15/24] Remove VisibleForTesting package method as there's sufficient coverage. --- .../java/util/common/parsers/ParserUtils.java | 3 +-- .../java/util/common/parsers/ParserUtilsTest.java | 14 -------------- 2 files changed, 1 insertion(+), 16 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/java/util/common/parsers/ParserUtils.java b/processing/src/main/java/org/apache/druid/java/util/common/parsers/ParserUtils.java index 59da13d21c52..a5df6d99040a 100644 --- a/processing/src/main/java/org/apache/druid/java/util/common/parsers/ParserUtils.java +++ b/processing/src/main/java/org/apache/druid/java/util/common/parsers/ParserUtils.java @@ -89,9 +89,8 @@ public static Function getMultiValueAndParseNumbersFunction( * tries to parse the input number as a {@code Long}. If parsing as a {@code Long} fails, it then attempts to parse * the input number as a {@code Double}. For all other scenarios, the input is returned as-is as a {@code String} type. */ - @VisibleForTesting @Nullable - static Object tryParseStringAsNumber(@Nullable final String input) + private static Object tryParseStringAsNumber(@Nullable final String input) { if (!NumberUtils.isNumber(input)) { return NullHandling.emptyToNullIfNeeded(input); diff --git a/processing/src/test/java/org/apache/druid/java/util/common/parsers/ParserUtilsTest.java b/processing/src/test/java/org/apache/druid/java/util/common/parsers/ParserUtilsTest.java index 1167103bfd56..17388657539e 100644 --- a/processing/src/test/java/org/apache/druid/java/util/common/parsers/ParserUtilsTest.java +++ b/processing/src/test/java/org/apache/druid/java/util/common/parsers/ParserUtilsTest.java @@ -30,7 +30,6 @@ import static org.apache.druid.java.util.common.parsers.ParserUtils.findDuplicates; import static org.apache.druid.java.util.common.parsers.ParserUtils.getMultiValueAndParseNumbersFunction; -import static org.apache.druid.java.util.common.parsers.ParserUtils.tryParseStringAsNumber; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNull; @@ -269,17 +268,4 @@ public void testInputWithoutDelimiterAndNumberParsingEnabled() getMultiValueAndParseNumbersFunction("$", Splitter.on("$"), true).apply("-1.23|3.13|23|foo|-9") ); } - - @Test - public void testTryParseStringAsNumber() - { - assertNull(tryParseStringAsNumber(null)); - assertEquals(NullHandling.emptyToNullIfNeeded(""), tryParseStringAsNumber("")); - assertEquals("a", tryParseStringAsNumber("a")); - assertEquals("ab", tryParseStringAsNumber("ab")); - assertEquals(12L, tryParseStringAsNumber("12")); - assertEquals(12.234, tryParseStringAsNumber("12.234")); - assertEquals(-1L, tryParseStringAsNumber("-1")); - assertEquals(-12.234, tryParseStringAsNumber("-12.234")); - } } From 4e12258c972c6e1a402881e477a667f9dbc69a21 Mon Sep 17 00:00:00 2001 From: Abhishek Balaji Radhakrishnan Date: Mon, 16 Sep 2024 20:23:22 -0400 Subject: [PATCH 16/24] spelling. --- website/.spelling | 1 + 1 file changed, 1 insertion(+) diff --git a/website/.spelling b/website/.spelling index 8175755f8046..3b69542dc9fc 100644 --- a/website/.spelling +++ b/website/.spelling @@ -229,6 +229,7 @@ SSD SSDs SSL Samza +shouldParseNumbers Splunk SqlParameter SslContextFactory From 220f93d30dfa916484891e043b0fd9a93b920865 Mon Sep 17 00:00:00 2001 From: Abhishek Balaji Radhakrishnan Date: Mon, 16 Sep 2024 20:47:02 -0400 Subject: [PATCH 17/24] Tests for CsvReader and DelimmitedReader. --- .../druid/data/input/impl/CsvReaderTest.java | 112 ++++++++++++++++- .../data/input/impl/DelimitedReaderTest.java | 114 ++++++++++++++++++ 2 files changed, 225 insertions(+), 1 deletion(-) diff --git a/processing/src/test/java/org/apache/druid/data/input/impl/CsvReaderTest.java b/processing/src/test/java/org/apache/druid/data/input/impl/CsvReaderTest.java index 27a03aa38e99..bb95ed4042d9 100644 --- a/processing/src/test/java/org/apache/druid/data/input/impl/CsvReaderTest.java +++ b/processing/src/test/java/org/apache/druid/data/input/impl/CsvReaderTest.java @@ -250,7 +250,117 @@ public void testQuotes() throws IOException null, false, 0, - false + null + ); + final InputEntityReader reader = format.createReader( + new InputRowSchema( + new TimestampSpec("Timestamp", "auto", null), + new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("Timestamp"))), + ColumnsFilter.all() + ), + source, + null + ); + + try (CloseableIterator iterator = reader.read()) { + final Iterator expectedRowIterator = expectedResults.iterator(); + while (iterator.hasNext()) { + Assert.assertTrue(expectedRowIterator.hasNext()); + Assert.assertEquals(expectedRowIterator.next(), iterator.next()); + } + } + } + + @Test + public void testParseNumericData() throws IOException + { + final ByteEntity source = writeData( + ImmutableList.of( + "3,1.0,2,1|2|3,2018-05-05T10:00:00Z", + "34,-2.0,foo,1,2018-05-06T10:00:00Z", + "343,3.0,bar,2|3|4,2018-05-07T10:00:00Z", + "545,-4.0,7,2,2018-05-08T10:00:00Z", + "65,5.0,baz,3|4,2018-05-09T10:00:00Z" + ) + ); + final RowSignature signature = + RowSignature.builder() + .add("Value1", null) + .add("Value2", null) + .add("Value3", null) + .add("Value4", null) + .add("Timestamp", null) + .build(); + + final List expectedResults = ImmutableList.of( + new ListBasedInputRow( + signature, + DateTimes.of("2018-05-05T10:00:00Z"), + ImmutableList.of("Timestamp"), + ImmutableList.of( + 3L, + 1.0, + 2L, + ImmutableList.of(1L, 2L, 3L), + "2018-05-05T10:00:00Z" + ) + ), + new ListBasedInputRow( + signature, + DateTimes.of("2018-05-06T10:00:00Z"), + ImmutableList.of("Timestamp"), + ImmutableList.of( + 34L, + -2.0, + "foo", + 1L, + "2018-05-06T10:00:00Z" + ) + ), + new ListBasedInputRow( + signature, + DateTimes.of("2018-05-07T10:00:00Z"), + ImmutableList.of("Timestamp"), + ImmutableList.of( + 343L, + 3.0, + "bar", + ImmutableList.of(2L, 3L, 4L), + "2018-05-07T10:00:00Z" + ) + ), + new ListBasedInputRow( + signature, + DateTimes.of("2018-05-08T10:00:00Z"), + ImmutableList.of("Timestamp"), + ImmutableList.of( + 545L, + -4.0, + 7L, + 2L, + "2018-05-08T10:00:00Z" + ) + ), + new ListBasedInputRow( + signature, + DateTimes.of("2018-05-09T10:00:00Z"), + ImmutableList.of("Timestamp"), + ImmutableList.of( + 65L, + 5.0, + "baz", + ImmutableList.of(3L, 4L), + "2018-05-09T10:00:00Z" + ) + ) + ); + final CsvInputFormat format = new CsvInputFormat( + ImmutableList.of("Value1", "Value2", "Value3", "Value4", "Timestamp"), + "|", + null, + false, + 0, + true ); final InputEntityReader reader = format.createReader( new InputRowSchema( diff --git a/processing/src/test/java/org/apache/druid/data/input/impl/DelimitedReaderTest.java b/processing/src/test/java/org/apache/druid/data/input/impl/DelimitedReaderTest.java index 5c97e2db3b46..b10806c96206 100644 --- a/processing/src/test/java/org/apache/druid/data/input/impl/DelimitedReaderTest.java +++ b/processing/src/test/java/org/apache/druid/data/input/impl/DelimitedReaderTest.java @@ -26,9 +26,11 @@ import org.apache.druid.data.input.InputEntityReader; import org.apache.druid.data.input.InputRow; import org.apache.druid.data.input.InputRowSchema; +import org.apache.druid.data.input.ListBasedInputRow; import org.apache.druid.java.util.common.DateTimes; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.java.util.common.parsers.CloseableIterator; +import org.apache.druid.segment.column.RowSignature; import org.junit.Assert; import org.junit.BeforeClass; import org.junit.Test; @@ -36,6 +38,7 @@ import java.io.ByteArrayOutputStream; import java.io.IOException; import java.util.Arrays; +import java.util.Iterator; import java.util.List; import java.util.stream.Collectors; @@ -186,6 +189,117 @@ public void testMultiValues() throws IOException } } + @Test + public void testParseNumericData() throws IOException + { + final ByteEntity source = writeData( + ImmutableList.of( + "3\t1.0\t2\t1|2|3\t2018-05-05T10:00:00Z", + "34\t-2.0\tfoo\t1\t2018-05-06T10:00:00Z", + "343\t3.0\tbar\t2|3|4\t2018-05-07T10:00:00Z", + "545\t-4.0\t7\t2\t2018-05-08T10:00:00Z", + "65\t5.0\tbaz\t3|4\t2018-05-09T10:00:00Z" + ) + ); + final RowSignature signature = + RowSignature.builder() + .add("Value1", null) + .add("Value2", null) + .add("Value3", null) + .add("Value4", null) + .add("Timestamp", null) + .build(); + + final List expectedResults = ImmutableList.of( + new ListBasedInputRow( + signature, + DateTimes.of("2018-05-05T10:00:00Z"), + ImmutableList.of("Timestamp"), + ImmutableList.of( + 3L, + 1.0, + 2L, + ImmutableList.of(1L, 2L, 3L), + "2018-05-05T10:00:00Z" + ) + ), + new ListBasedInputRow( + signature, + DateTimes.of("2018-05-06T10:00:00Z"), + ImmutableList.of("Timestamp"), + ImmutableList.of( + 34L, + -2.0, + "foo", + 1L, + "2018-05-06T10:00:00Z" + ) + ), + new ListBasedInputRow( + signature, + DateTimes.of("2018-05-07T10:00:00Z"), + ImmutableList.of("Timestamp"), + ImmutableList.of( + 343L, + 3.0, + "bar", + ImmutableList.of(2L, 3L, 4L), + "2018-05-07T10:00:00Z" + ) + ), + new ListBasedInputRow( + signature, + DateTimes.of("2018-05-08T10:00:00Z"), + ImmutableList.of("Timestamp"), + ImmutableList.of( + 545L, + -4.0, + 7L, + 2L, + "2018-05-08T10:00:00Z" + ) + ), + new ListBasedInputRow( + signature, + DateTimes.of("2018-05-09T10:00:00Z"), + ImmutableList.of("Timestamp"), + ImmutableList.of( + 65L, + 5.0, + "baz", + ImmutableList.of(3L, 4L), + "2018-05-09T10:00:00Z" + ) + ) + ); + final DelimitedInputFormat format = new DelimitedInputFormat( + ImmutableList.of("Value1", "Value2", "Value3", "Value4", "Timestamp"), + "|", + null, + false, + null, + 0, + true + ); + final InputEntityReader reader = format.createReader( + new InputRowSchema( + new TimestampSpec("Timestamp", "auto", null), + new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("Timestamp"))), + ColumnsFilter.all() + ), + source, + null + ); + + try (CloseableIterator iterator = reader.read()) { + final Iterator expectedRowIterator = expectedResults.iterator(); + while (iterator.hasNext()) { + Assert.assertTrue(expectedRowIterator.hasNext()); + Assert.assertEquals(expectedRowIterator.next(), iterator.next()); + } + } + } + @Test public void testCustomizeSeparator() throws IOException { From c2d591ef8598dbdd60159535a62c47b3cbf37afb Mon Sep 17 00:00:00 2001 From: Abhishek Balaji Radhakrishnan Date: Mon, 16 Sep 2024 21:04:29 -0400 Subject: [PATCH 18/24] Add boolean values to the test data -- retain them as stringy bools. --- .../druid/data/input/impl/CsvReaderTest.java | 20 ++++++++++++------- .../data/input/impl/DelimitedReaderTest.java | 18 +++++++++++------ 2 files changed, 25 insertions(+), 13 deletions(-) diff --git a/processing/src/test/java/org/apache/druid/data/input/impl/CsvReaderTest.java b/processing/src/test/java/org/apache/druid/data/input/impl/CsvReaderTest.java index bb95ed4042d9..b119a2fd5ced 100644 --- a/processing/src/test/java/org/apache/druid/data/input/impl/CsvReaderTest.java +++ b/processing/src/test/java/org/apache/druid/data/input/impl/CsvReaderTest.java @@ -272,15 +272,15 @@ public void testQuotes() throws IOException } @Test - public void testParseNumericData() throws IOException + public void testDataWithParseNumbersEnabled() throws IOException { final ByteEntity source = writeData( ImmutableList.of( - "3,1.0,2,1|2|3,2018-05-05T10:00:00Z", - "34,-2.0,foo,1,2018-05-06T10:00:00Z", - "343,3.0,bar,2|3|4,2018-05-07T10:00:00Z", - "545,-4.0,7,2,2018-05-08T10:00:00Z", - "65,5.0,baz,3|4,2018-05-09T10:00:00Z" + "3,1.0,2,1|2|3,false,2018-05-05T10:00:00Z", + "34,-2.0,foo,1,true,2018-05-06T10:00:00Z", + "343,3.0,bar,2|3|4,false,2018-05-07T10:00:00Z", + "545,-4.0,7,2,true,2018-05-08T10:00:00Z", + "65,5.0,baz,3|4,false,2018-05-09T10:00:00Z" ) ); final RowSignature signature = @@ -289,6 +289,7 @@ public void testParseNumericData() throws IOException .add("Value2", null) .add("Value3", null) .add("Value4", null) + .add("Value5", null) .add("Timestamp", null) .build(); @@ -302,6 +303,7 @@ public void testParseNumericData() throws IOException 1.0, 2L, ImmutableList.of(1L, 2L, 3L), + "false", "2018-05-05T10:00:00Z" ) ), @@ -314,6 +316,7 @@ public void testParseNumericData() throws IOException -2.0, "foo", 1L, + "true", "2018-05-06T10:00:00Z" ) ), @@ -326,6 +329,7 @@ public void testParseNumericData() throws IOException 3.0, "bar", ImmutableList.of(2L, 3L, 4L), + "false", "2018-05-07T10:00:00Z" ) ), @@ -338,6 +342,7 @@ public void testParseNumericData() throws IOException -4.0, 7L, 2L, + "true", "2018-05-08T10:00:00Z" ) ), @@ -350,12 +355,13 @@ public void testParseNumericData() throws IOException 5.0, "baz", ImmutableList.of(3L, 4L), + "false", "2018-05-09T10:00:00Z" ) ) ); final CsvInputFormat format = new CsvInputFormat( - ImmutableList.of("Value1", "Value2", "Value3", "Value4", "Timestamp"), + ImmutableList.of("Value1", "Value2", "Value3", "Value4", "Value5", "Timestamp"), "|", null, false, diff --git a/processing/src/test/java/org/apache/druid/data/input/impl/DelimitedReaderTest.java b/processing/src/test/java/org/apache/druid/data/input/impl/DelimitedReaderTest.java index b10806c96206..eb676a72a30f 100644 --- a/processing/src/test/java/org/apache/druid/data/input/impl/DelimitedReaderTest.java +++ b/processing/src/test/java/org/apache/druid/data/input/impl/DelimitedReaderTest.java @@ -190,15 +190,15 @@ public void testMultiValues() throws IOException } @Test - public void testParseNumericData() throws IOException + public void testDataWithParseNumbersEnabled() throws IOException { final ByteEntity source = writeData( ImmutableList.of( - "3\t1.0\t2\t1|2|3\t2018-05-05T10:00:00Z", - "34\t-2.0\tfoo\t1\t2018-05-06T10:00:00Z", - "343\t3.0\tbar\t2|3|4\t2018-05-07T10:00:00Z", - "545\t-4.0\t7\t2\t2018-05-08T10:00:00Z", - "65\t5.0\tbaz\t3|4\t2018-05-09T10:00:00Z" + "3\t1.0\t2\t1|2|3\tfalse\t2018-05-05T10:00:00Z", + "34\t-2.0\tfoo\t1\ttrue\t2018-05-06T10:00:00Z", + "343\t3.0\tbar\t2|3|4\tfalse\t2018-05-07T10:00:00Z", + "545\t-4.0\t7\t2\ttrue\t2018-05-08T10:00:00Z", + "65\t5.0\tbaz\t3|4\tfalse\t2018-05-09T10:00:00Z" ) ); final RowSignature signature = @@ -207,6 +207,7 @@ public void testParseNumericData() throws IOException .add("Value2", null) .add("Value3", null) .add("Value4", null) + .add("Value5", null) .add("Timestamp", null) .build(); @@ -220,6 +221,7 @@ public void testParseNumericData() throws IOException 1.0, 2L, ImmutableList.of(1L, 2L, 3L), + "false", "2018-05-05T10:00:00Z" ) ), @@ -232,6 +234,7 @@ public void testParseNumericData() throws IOException -2.0, "foo", 1L, + "true", "2018-05-06T10:00:00Z" ) ), @@ -244,6 +247,7 @@ public void testParseNumericData() throws IOException 3.0, "bar", ImmutableList.of(2L, 3L, 4L), + "false", "2018-05-07T10:00:00Z" ) ), @@ -256,6 +260,7 @@ public void testParseNumericData() throws IOException -4.0, 7L, 2L, + "true", "2018-05-08T10:00:00Z" ) ), @@ -268,6 +273,7 @@ public void testParseNumericData() throws IOException 5.0, "baz", ImmutableList.of(3L, 4L), + "false", "2018-05-09T10:00:00Z" ) ) From 5338d8d220f8eabae381c608dc8d3da6a6074298 Mon Sep 17 00:00:00 2001 From: Abhishek Balaji Radhakrishnan Date: Mon, 16 Sep 2024 21:53:29 -0400 Subject: [PATCH 19/24] fix up. --- .../org/apache/druid/data/input/impl/DelimitedReaderTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/processing/src/test/java/org/apache/druid/data/input/impl/DelimitedReaderTest.java b/processing/src/test/java/org/apache/druid/data/input/impl/DelimitedReaderTest.java index eb676a72a30f..51786cde7dbe 100644 --- a/processing/src/test/java/org/apache/druid/data/input/impl/DelimitedReaderTest.java +++ b/processing/src/test/java/org/apache/druid/data/input/impl/DelimitedReaderTest.java @@ -279,7 +279,7 @@ public void testDataWithParseNumbersEnabled() throws IOException ) ); final DelimitedInputFormat format = new DelimitedInputFormat( - ImmutableList.of("Value1", "Value2", "Value3", "Value4", "Timestamp"), + ImmutableList.of("Value1", "Value2", "Value3", "Value4", "Value5", "Timestamp"), "|", null, false, From 94c35f0f3de253388f14c22f7adee7db48c53e7a Mon Sep 17 00:00:00 2001 From: Abhishek Balaji Radhakrishnan Date: Tue, 17 Sep 2024 15:11:19 -0700 Subject: [PATCH 20/24] Use tryParse instead of nested exception handling. --- .../java/util/common/parsers/ParserUtils.java | 19 ++++++++++--------- .../util/common/parsers/ParserUtilsTest.java | 16 ++++++++++++++++ 2 files changed, 26 insertions(+), 9 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/java/util/common/parsers/ParserUtils.java b/processing/src/main/java/org/apache/druid/java/util/common/parsers/ParserUtils.java index a5df6d99040a..d8c7131da07a 100644 --- a/processing/src/main/java/org/apache/druid/java/util/common/parsers/ParserUtils.java +++ b/processing/src/main/java/org/apache/druid/java/util/common/parsers/ParserUtils.java @@ -22,6 +22,8 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Function; import com.google.common.base.Splitter; +import com.google.common.primitives.Doubles; +import com.google.common.primitives.Longs; import org.apache.commons.lang.math.NumberUtils; import org.apache.druid.common.config.NullHandling; import org.joda.time.DateTimeZone; @@ -96,17 +98,16 @@ private static Object tryParseStringAsNumber(@Nullable final String input) return NullHandling.emptyToNullIfNeeded(input); } - try { - return Long.parseLong(input); + final Long l = Longs.tryParse(input); + if (l != null) { + return l; } - catch (NumberFormatException e1) { - try { - return Double.parseDouble(input); - } - catch (NumberFormatException e2) { - return input; - } + final Double d = Doubles.tryParse(input); + if (d != null) { + return d; } + // fall back to given input if we cannot parse the input as a Long & Double for whatever reason + return input; } public static ArrayList generateFieldNames(int length) diff --git a/processing/src/test/java/org/apache/druid/java/util/common/parsers/ParserUtilsTest.java b/processing/src/test/java/org/apache/druid/java/util/common/parsers/ParserUtilsTest.java index 17388657539e..bed2eb980f44 100644 --- a/processing/src/test/java/org/apache/druid/java/util/common/parsers/ParserUtilsTest.java +++ b/processing/src/test/java/org/apache/druid/java/util/common/parsers/ParserUtilsTest.java @@ -87,6 +87,10 @@ public void testInputWithDelimiterAndParserDisabled() "-2.0", getMultiValueAndParseNumbersFunction("|", Splitter.on("|"), false).apply("-2.0") ); + assertEquals( + "1e2", + getMultiValueAndParseNumbersFunction("|", Splitter.on("|"), false).apply("1e2") + ); assertEquals( ImmutableList.of("1", "2", "3"), getMultiValueAndParseNumbersFunction("|", Splitter.on("|"), false).apply("1|2|3") @@ -145,6 +149,10 @@ public void testInputWithDelimiterAndParserEnabled() -2.0, getMultiValueAndParseNumbersFunction("|", Splitter.on("|"), true).apply("-2.0") ); + assertEquals( + 100.0, + getMultiValueAndParseNumbersFunction("$", Splitter.on("|"), true).apply("1e2") + ); assertEquals( ImmutableList.of(1L, 2L, 3L), getMultiValueAndParseNumbersFunction("|", Splitter.on("|"), true).apply("1|2|3") @@ -195,6 +203,10 @@ public void testInputWithoutDelimiterAndNumberParsingDisabled() "-2.0", getMultiValueAndParseNumbersFunction("$", Splitter.on("$"), false).apply("-2.0") ); + assertEquals( + "1e2", + getMultiValueAndParseNumbersFunction("$", Splitter.on("$"), false).apply("1e2") + ); assertEquals( "1|2|3", getMultiValueAndParseNumbersFunction("$", Splitter.on("$"), false).apply("1|2|3") @@ -245,6 +257,10 @@ public void testInputWithoutDelimiterAndNumberParsingEnabled() -2.0, getMultiValueAndParseNumbersFunction("$", Splitter.on("$"), true).apply("-2.0") ); + assertEquals( + 100.0, + getMultiValueAndParseNumbersFunction("$", Splitter.on("$"), true).apply("1e2") + ); assertEquals( "1|2|3", getMultiValueAndParseNumbersFunction("$", Splitter.on("$"), true).apply("1|2|3") From e6a2b598c3a61a2796749ddb93f3dd682f80adc0 Mon Sep 17 00:00:00 2001 From: Abhishek Balaji Radhakrishnan Date: Wed, 18 Sep 2024 20:33:23 -0700 Subject: [PATCH 21/24] Review: rename to getTransformationFunction. --- .../data/input/impl/DelimitedValueReader.java | 8 +- .../druid/data/input/impl/RegexReader.java | 6 +- .../parsers/AbstractFlatTextFormatParser.java | 6 +- .../java/util/common/parsers/ParserUtils.java | 11 +- .../util/common/parsers/ParserUtilsTest.java | 106 +++++++++--------- 5 files changed, 69 insertions(+), 68 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/data/input/impl/DelimitedValueReader.java b/processing/src/main/java/org/apache/druid/data/input/impl/DelimitedValueReader.java index 32d3cb31dac4..70618e815bf4 100644 --- a/processing/src/main/java/org/apache/druid/data/input/impl/DelimitedValueReader.java +++ b/processing/src/main/java/org/apache/druid/data/input/impl/DelimitedValueReader.java @@ -51,7 +51,7 @@ public class DelimitedValueReader extends TextReader.Bytes { private final boolean findColumnsFromHeader; private final int skipHeaderRows; - private final Function multiValueParseFunction; + private final Function transformationFunction; private final DelimitedValueParser parser; /** @@ -90,7 +90,7 @@ interface DelimitedValueParser this.findColumnsFromHeader = findColumnsFromHeader; this.skipHeaderRows = skipHeaderRows; final String finalListDelimeter = listDelimiter == null ? Parsers.DEFAULT_LIST_DELIMITER : listDelimiter; - this.multiValueParseFunction = ParserUtils.getMultiValueAndParseNumbersFunction( + this.transformationFunction = ParserUtils.getTransformationFunction( finalListDelimeter, Splitter.on(finalListDelimeter), shouldParseNumbers @@ -139,7 +139,7 @@ public List> toMap(byte[] intermediateRow) throws IOExceptio private List readLineAsList(byte[] line) throws IOException { final List parsed = parser.parseLine(line); - return new ArrayList<>(Lists.transform(parsed, multiValueParseFunction)); + return new ArrayList<>(Lists.transform(parsed, transformationFunction)); } private Map readLineAsMap(byte[] line) throws IOException @@ -147,7 +147,7 @@ private Map readLineAsMap(byte[] line) throws IOException final List parsed = parser.parseLine(line); return Utils.zipMapPartial( Preconditions.checkNotNull(inputRowSignature, "inputRowSignature").getColumnNames(), - Iterables.transform(parsed, multiValueParseFunction) + Iterables.transform(parsed, transformationFunction) ); } diff --git a/processing/src/main/java/org/apache/druid/data/input/impl/RegexReader.java b/processing/src/main/java/org/apache/druid/data/input/impl/RegexReader.java index 1124ac6584d6..66f4d25b474e 100644 --- a/processing/src/main/java/org/apache/druid/data/input/impl/RegexReader.java +++ b/processing/src/main/java/org/apache/druid/data/input/impl/RegexReader.java @@ -43,7 +43,7 @@ public class RegexReader extends TextReader.Strings { private final String pattern; private final Pattern compiledPattern; - private final Function multiValueParseFunction; + private final Function transformationFunction; private List columns; @@ -60,7 +60,7 @@ public class RegexReader extends TextReader.Strings this.pattern = pattern; this.compiledPattern = compiledPattern; final String finalListDelimeter = listDelimiter == null ? Parsers.DEFAULT_LIST_DELIMITER : listDelimiter; - this.multiValueParseFunction = ParserUtils.getMultiValueAndParseNumbersFunction( + this.transformationFunction = ParserUtils.getTransformationFunction( finalListDelimeter, Splitter.on(finalListDelimeter), false @@ -98,7 +98,7 @@ private Map parseLine(String line) columns = ParserUtils.generateFieldNames(matcher.groupCount()); } - return Utils.zipMapPartial(columns, Iterables.transform(values, multiValueParseFunction)); + return Utils.zipMapPartial(columns, Iterables.transform(values, transformationFunction)); } catch (Exception e) { throw new ParseException(line, e, "Unable to parse row [%s]", line); diff --git a/processing/src/main/java/org/apache/druid/java/util/common/parsers/AbstractFlatTextFormatParser.java b/processing/src/main/java/org/apache/druid/java/util/common/parsers/AbstractFlatTextFormatParser.java index 67634a3de3b8..3152c97aaf71 100644 --- a/processing/src/main/java/org/apache/druid/java/util/common/parsers/AbstractFlatTextFormatParser.java +++ b/processing/src/main/java/org/apache/druid/java/util/common/parsers/AbstractFlatTextFormatParser.java @@ -52,7 +52,7 @@ public String getDefaultDelimiter() } private final String listDelimiter; - private final Function multiValueParseFunction; + private final Function transformationFunction; private final boolean hasHeaderRow; private final int maxSkipHeaderRows; @@ -69,7 +69,7 @@ public AbstractFlatTextFormatParser( ) { this.listDelimiter = listDelimiter != null ? listDelimiter : Parsers.DEFAULT_LIST_DELIMITER; - this.multiValueParseFunction = ParserUtils.getMultiValueAndParseNumbersFunction( + this.transformationFunction = ParserUtils.getTransformationFunction( this.listDelimiter, Splitter.on(this.listDelimiter), shouldParseNumbers @@ -147,7 +147,7 @@ public Map parseToMap(final String input) setFieldNames(ParserUtils.generateFieldNames(values.size())); } - return Utils.zipMapPartial(fieldNames, Iterables.transform(values, multiValueParseFunction)); + return Utils.zipMapPartial(fieldNames, Iterables.transform(values, transformationFunction)); } catch (Exception e) { throw new ParseException(input, e, "Unable to parse row [%s]", input); diff --git a/processing/src/main/java/org/apache/druid/java/util/common/parsers/ParserUtils.java b/processing/src/main/java/org/apache/druid/java/util/common/parsers/ParserUtils.java index d8c7131da07a..c35e1f96c1b0 100644 --- a/processing/src/main/java/org/apache/druid/java/util/common/parsers/ParserUtils.java +++ b/processing/src/main/java/org/apache/druid/java/util/common/parsers/ParserUtils.java @@ -56,12 +56,13 @@ public class ParserUtils } /** - * @return a function that processes a given string input by splitting it into multiple values - * using the {@code listSplitter} if thge {@code list delimiter} is present in the input. If {@code shouldParseNumbers} - * is enabled, the function will also try to parse any numeric values present in the input -- integers as {@code Long} - * and floating-point numbers as {@code Double}. + * @return a transformation function on an input value. The function does the following transformations on the input + * string: + *
  • Split it into multiple values using the {@code listSplitter} if the {@code list delimiter} is present in the input.
  • + *
  • If {@code shouldParseNumbers} is true, the function will also try to parse any numeric values present in the input + * -- integers as {@code Long} and floating-point numbers as {@code Double}.
  • */ - public static Function getMultiValueAndParseNumbersFunction( + public static Function getTransformationFunction( final String listDelimiter, final Splitter listSplitter, final boolean shouldParseNumbers diff --git a/processing/src/test/java/org/apache/druid/java/util/common/parsers/ParserUtilsTest.java b/processing/src/test/java/org/apache/druid/java/util/common/parsers/ParserUtilsTest.java index bed2eb980f44..caf4fc030dac 100644 --- a/processing/src/test/java/org/apache/druid/java/util/common/parsers/ParserUtilsTest.java +++ b/processing/src/test/java/org/apache/druid/java/util/common/parsers/ParserUtilsTest.java @@ -29,7 +29,7 @@ import java.util.List; import static org.apache.druid.java.util.common.parsers.ParserUtils.findDuplicates; -import static org.apache.druid.java.util.common.parsers.ParserUtils.getMultiValueAndParseNumbersFunction; +import static org.apache.druid.java.util.common.parsers.ParserUtils.getTransformationFunction; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNull; @@ -57,61 +57,61 @@ public void testFindDuplicates() public void testInputWithDelimiterAndParserDisabled() { assertNull( - getMultiValueAndParseNumbersFunction("|", Splitter.on("|"), true).apply(null) + getTransformationFunction("|", Splitter.on("|"), true).apply(null) ); assertEquals( NullHandling.emptyToNullIfNeeded(""), - getMultiValueAndParseNumbersFunction("|", Splitter.on("|"), true).apply("") + getTransformationFunction("|", Splitter.on("|"), true).apply("") ); assertEquals( ImmutableList.of("foo", "boo"), - getMultiValueAndParseNumbersFunction("|", Splitter.on("|"), false).apply("foo|boo") + getTransformationFunction("|", Splitter.on("|"), false).apply("foo|boo") ); assertEquals( ImmutableList.of("1", "2", "3"), - getMultiValueAndParseNumbersFunction("|", Splitter.on("|"), false).apply("1|2|3") + getTransformationFunction("|", Splitter.on("|"), false).apply("1|2|3") ); assertEquals( ImmutableList.of("1", "-2", "3", "0", "-2"), - getMultiValueAndParseNumbersFunction("|", Splitter.on("|"), false).apply("1|-2|3|0|-2") + getTransformationFunction("|", Splitter.on("|"), false).apply("1|-2|3|0|-2") ); assertEquals( "100", - getMultiValueAndParseNumbersFunction("|", Splitter.on("|"), false).apply("100") + getTransformationFunction("|", Splitter.on("|"), false).apply("100") ); assertEquals( "1.23", - getMultiValueAndParseNumbersFunction("|", Splitter.on("|"), false).apply("1.23") + getTransformationFunction("|", Splitter.on("|"), false).apply("1.23") ); assertEquals( "-2.0", - getMultiValueAndParseNumbersFunction("|", Splitter.on("|"), false).apply("-2.0") + getTransformationFunction("|", Splitter.on("|"), false).apply("-2.0") ); assertEquals( "1e2", - getMultiValueAndParseNumbersFunction("|", Splitter.on("|"), false).apply("1e2") + getTransformationFunction("|", Splitter.on("|"), false).apply("1e2") ); assertEquals( ImmutableList.of("1", "2", "3"), - getMultiValueAndParseNumbersFunction("|", Splitter.on("|"), false).apply("1|2|3") + getTransformationFunction("|", Splitter.on("|"), false).apply("1|2|3") ); assertEquals( ImmutableList.of("1", "-2", "3", "0", "-2"), - getMultiValueAndParseNumbersFunction("|", Splitter.on("|"), false).apply("1|-2|3|0|-2") + getTransformationFunction("|", Splitter.on("|"), false).apply("1|-2|3|0|-2") ); assertEquals( ImmutableList.of("-1.0", "-2.2", "3.1", "0.2", "-2.1"), - getMultiValueAndParseNumbersFunction("|", Splitter.on("|"), false).apply("-1.0|-2.2|3.1|0.2|-2.1") + getTransformationFunction("|", Splitter.on("|"), false).apply("-1.0|-2.2|3.1|0.2|-2.1") ); // Some mixed types assertEquals( ImmutableList.of("-1.23", "3.13", "23"), - getMultiValueAndParseNumbersFunction("|", Splitter.on("|"), false).apply("-1.23|3.13|23") + getTransformationFunction("|", Splitter.on("|"), false).apply("-1.23|3.13|23") ); assertEquals( ImmutableList.of("-1.23", "3.13", "23", "foo", "-9"), - getMultiValueAndParseNumbersFunction("|", Splitter.on("|"), false).apply("-1.23|3.13|23|foo|-9") + getTransformationFunction("|", Splitter.on("|"), false).apply("-1.23|3.13|23|foo|-9") ); } @@ -119,61 +119,61 @@ public void testInputWithDelimiterAndParserDisabled() public void testInputWithDelimiterAndParserEnabled() { assertNull( - getMultiValueAndParseNumbersFunction("|", Splitter.on("|"), true).apply(null) + getTransformationFunction("|", Splitter.on("|"), true).apply(null) ); assertEquals( NullHandling.emptyToNullIfNeeded(""), - getMultiValueAndParseNumbersFunction("|", Splitter.on("|"), true).apply("") + getTransformationFunction("|", Splitter.on("|"), true).apply("") ); assertEquals( ImmutableList.of("foo", "boo"), - getMultiValueAndParseNumbersFunction("|", Splitter.on("|"), true).apply("foo|boo") + getTransformationFunction("|", Splitter.on("|"), true).apply("foo|boo") ); assertEquals( ImmutableList.of(1L, 2L, 3L), - getMultiValueAndParseNumbersFunction("|", Splitter.on("|"), true).apply("1|2|3") + getTransformationFunction("|", Splitter.on("|"), true).apply("1|2|3") ); assertEquals( ImmutableList.of(1L, -2L, 3L, 0L, -2L), - getMultiValueAndParseNumbersFunction("|", Splitter.on("|"), true).apply("1|-2|3|0|-2") + getTransformationFunction("|", Splitter.on("|"), true).apply("1|-2|3|0|-2") ); assertEquals( 100L, - getMultiValueAndParseNumbersFunction("|", Splitter.on("|"), true).apply("100") + getTransformationFunction("|", Splitter.on("|"), true).apply("100") ); assertEquals( 1.23, - getMultiValueAndParseNumbersFunction("|", Splitter.on("|"), true).apply("1.23") + getTransformationFunction("|", Splitter.on("|"), true).apply("1.23") ); assertEquals( -2.0, - getMultiValueAndParseNumbersFunction("|", Splitter.on("|"), true).apply("-2.0") + getTransformationFunction("|", Splitter.on("|"), true).apply("-2.0") ); assertEquals( 100.0, - getMultiValueAndParseNumbersFunction("$", Splitter.on("|"), true).apply("1e2") + getTransformationFunction("$", Splitter.on("|"), true).apply("1e2") ); assertEquals( ImmutableList.of(1L, 2L, 3L), - getMultiValueAndParseNumbersFunction("|", Splitter.on("|"), true).apply("1|2|3") + getTransformationFunction("|", Splitter.on("|"), true).apply("1|2|3") ); assertEquals( ImmutableList.of(1L, -2L, 3L, 0L, -2L), - getMultiValueAndParseNumbersFunction("|", Splitter.on("|"), true).apply("1|-2|3|0|-2") + getTransformationFunction("|", Splitter.on("|"), true).apply("1|-2|3|0|-2") ); assertEquals( ImmutableList.of(-1.0, -2.2, 3.1, 0.2, -2.1), - getMultiValueAndParseNumbersFunction("|", Splitter.on("|"), true).apply("-1.0|-2.2|3.1|0.2|-2.1") + getTransformationFunction("|", Splitter.on("|"), true).apply("-1.0|-2.2|3.1|0.2|-2.1") ); // Some mixed types assertEquals( ImmutableList.of(-1.23, 3.13, 23L), - getMultiValueAndParseNumbersFunction("|", Splitter.on("|"), true).apply("-1.23|3.13|23") + getTransformationFunction("|", Splitter.on("|"), true).apply("-1.23|3.13|23") ); assertEquals( ImmutableList.of(-1.23, 3.13, 23L, "foo", -9L), - getMultiValueAndParseNumbersFunction("|", Splitter.on("|"), true).apply("-1.23|3.13|23|foo|-9") + getTransformationFunction("|", Splitter.on("|"), true).apply("-1.23|3.13|23|foo|-9") ); } @@ -181,53 +181,53 @@ public void testInputWithDelimiterAndParserEnabled() public void testInputWithoutDelimiterAndNumberParsingDisabled() { assertNull( - getMultiValueAndParseNumbersFunction("|", Splitter.on("$"), false).apply(null) + getTransformationFunction("|", Splitter.on("$"), false).apply(null) ); assertEquals( NullHandling.emptyToNullIfNeeded(""), - getMultiValueAndParseNumbersFunction("|", Splitter.on("$"), false).apply("") + getTransformationFunction("|", Splitter.on("$"), false).apply("") ); assertEquals( "foo|boo", - getMultiValueAndParseNumbersFunction("$", Splitter.on("$"), false).apply("foo|boo") + getTransformationFunction("$", Splitter.on("$"), false).apply("foo|boo") ); assertEquals( "100", - getMultiValueAndParseNumbersFunction("$", Splitter.on("$"), false).apply("100") + getTransformationFunction("$", Splitter.on("$"), false).apply("100") ); assertEquals( "1.23", - getMultiValueAndParseNumbersFunction("$", Splitter.on("$"), false).apply("1.23") + getTransformationFunction("$", Splitter.on("$"), false).apply("1.23") ); assertEquals( "-2.0", - getMultiValueAndParseNumbersFunction("$", Splitter.on("$"), false).apply("-2.0") + getTransformationFunction("$", Splitter.on("$"), false).apply("-2.0") ); assertEquals( "1e2", - getMultiValueAndParseNumbersFunction("$", Splitter.on("$"), false).apply("1e2") + getTransformationFunction("$", Splitter.on("$"), false).apply("1e2") ); assertEquals( "1|2|3", - getMultiValueAndParseNumbersFunction("$", Splitter.on("$"), false).apply("1|2|3") + getTransformationFunction("$", Splitter.on("$"), false).apply("1|2|3") ); assertEquals( "1|-2|3|0|-2", - getMultiValueAndParseNumbersFunction("$", Splitter.on("$"), false).apply("1|-2|3|0|-2") + getTransformationFunction("$", Splitter.on("$"), false).apply("1|-2|3|0|-2") ); assertEquals( "-1.0|-2.2|3.1|0.2|-2.1", - getMultiValueAndParseNumbersFunction("$", Splitter.on("$"), false).apply("-1.0|-2.2|3.1|0.2|-2.1") + getTransformationFunction("$", Splitter.on("$"), false).apply("-1.0|-2.2|3.1|0.2|-2.1") ); // Some mixed types assertEquals( "-1.23|3.13|23", - getMultiValueAndParseNumbersFunction("$", Splitter.on("$"), false).apply("-1.23|3.13|23") + getTransformationFunction("$", Splitter.on("$"), false).apply("-1.23|3.13|23") ); assertEquals( "-1.23|3.13|23|foo|-9", - getMultiValueAndParseNumbersFunction("$", Splitter.on("$"), false).apply("-1.23|3.13|23|foo|-9") + getTransformationFunction("$", Splitter.on("$"), false).apply("-1.23|3.13|23|foo|-9") ); } @@ -235,53 +235,53 @@ public void testInputWithoutDelimiterAndNumberParsingDisabled() public void testInputWithoutDelimiterAndNumberParsingEnabled() { assertNull( - getMultiValueAndParseNumbersFunction("$", Splitter.on("$"), true).apply(null) + getTransformationFunction("$", Splitter.on("$"), true).apply(null) ); assertEquals( NullHandling.emptyToNullIfNeeded(""), - getMultiValueAndParseNumbersFunction("$", Splitter.on("$"), true).apply("") + getTransformationFunction("$", Splitter.on("$"), true).apply("") ); assertEquals( "foo|boo", - getMultiValueAndParseNumbersFunction("$", Splitter.on("$"), true).apply("foo|boo") + getTransformationFunction("$", Splitter.on("$"), true).apply("foo|boo") ); assertEquals( 100L, - getMultiValueAndParseNumbersFunction("$", Splitter.on("$"), true).apply("100") + getTransformationFunction("$", Splitter.on("$"), true).apply("100") ); assertEquals( 1.23, - getMultiValueAndParseNumbersFunction("$", Splitter.on("$"), true).apply("1.23") + getTransformationFunction("$", Splitter.on("$"), true).apply("1.23") ); assertEquals( -2.0, - getMultiValueAndParseNumbersFunction("$", Splitter.on("$"), true).apply("-2.0") + getTransformationFunction("$", Splitter.on("$"), true).apply("-2.0") ); assertEquals( 100.0, - getMultiValueAndParseNumbersFunction("$", Splitter.on("$"), true).apply("1e2") + getTransformationFunction("$", Splitter.on("$"), true).apply("1e2") ); assertEquals( "1|2|3", - getMultiValueAndParseNumbersFunction("$", Splitter.on("$"), true).apply("1|2|3") + getTransformationFunction("$", Splitter.on("$"), true).apply("1|2|3") ); assertEquals( "1|-2|3|0|-2", - getMultiValueAndParseNumbersFunction("$", Splitter.on("$"), true).apply("1|-2|3|0|-2") + getTransformationFunction("$", Splitter.on("$"), true).apply("1|-2|3|0|-2") ); assertEquals( "-1.0|-2.2|3.1|0.2|-2.1", - getMultiValueAndParseNumbersFunction("$", Splitter.on("$"), true).apply("-1.0|-2.2|3.1|0.2|-2.1") + getTransformationFunction("$", Splitter.on("$"), true).apply("-1.0|-2.2|3.1|0.2|-2.1") ); // Some mixed types assertEquals( "-1.23|3.13|23", - getMultiValueAndParseNumbersFunction("$", Splitter.on("$"), true).apply("-1.23|3.13|23") + getTransformationFunction("$", Splitter.on("$"), true).apply("-1.23|3.13|23") ); assertEquals( "-1.23|3.13|23|foo|-9", - getMultiValueAndParseNumbersFunction("$", Splitter.on("$"), true).apply("-1.23|3.13|23|foo|-9") + getTransformationFunction("$", Splitter.on("$"), true).apply("-1.23|3.13|23|foo|-9") ); } } From 6dd398665c48f71cbb6652222000ea02b520d6b0 Mon Sep 17 00:00:00 2001 From: Abhishek Balaji Radhakrishnan Date: Wed, 18 Sep 2024 20:43:25 -0700 Subject: [PATCH 22/24] Rename shouldParseNumbers to tryParseNumbers. --- docs/ingestion/data-formats.md | 4 ++-- .../apache/druid/data/input/impl/CsvInputFormat.java | 6 +++--- .../druid/data/input/impl/DelimitedInputFormat.java | 6 +++--- .../druid/data/input/impl/DelimitedValueReader.java | 4 ++-- .../druid/data/input/impl/FlatTextInputFormat.java | 10 +++++----- .../common/parsers/AbstractFlatTextFormatParser.java | 4 ++-- .../druid/java/util/common/parsers/CSVParser.java | 8 ++++---- .../java/util/common/parsers/DelimitedParser.java | 8 ++++---- .../druid/java/util/common/parsers/ParserUtils.java | 8 ++++---- .../druid/data/input/impl/CsvInputFormatTest.java | 4 ++-- .../apache/druid/data/input/impl/CsvReaderTest.java | 2 +- .../data/input/impl/DelimitedInputFormatTest.java | 6 +++--- .../druid/data/input/impl/DelimitedReaderTest.java | 2 +- .../util/common/parsers/FlatTextFormatParserTest.java | 6 +++--- website/.spelling | 2 +- 15 files changed, 40 insertions(+), 40 deletions(-) diff --git a/docs/ingestion/data-formats.md b/docs/ingestion/data-formats.md index 8fc23b48feb1..bfdb59b8d2dd 100644 --- a/docs/ingestion/data-formats.md +++ b/docs/ingestion/data-formats.md @@ -125,7 +125,7 @@ Configure the CSV `inputFormat` to load CSV data as follows: | columns | JSON array | Specifies the columns of the data. The columns should be in the same order with the columns of your data. | yes if `findColumnsFromHeader` is false or missing | | findColumnsFromHeader | Boolean | If this is set, the task will find the column names from the header row. Note that `skipHeaderRows` will be applied before finding column names from the header. For example, if you set `skipHeaderRows` to 2 and `findColumnsFromHeader` to true, the task will skip the first two lines and then extract column information from the third line. `columns` will be ignored if this is set to true. | no (default = false if `columns` is set; otherwise null) | | skipHeaderRows | Integer | If this is set, the task will skip the first `skipHeaderRows` rows. | no (default = 0) | -| shouldParseNumbers| Boolean| If this is set, the task will attempt to parse numeric strings into long or double data type, in that order. If the value cannot be parsed as a number, it is retained as a string. | no (default = false) | +| tryParseNumbers| Boolean| If this is set, the task will attempt to parse numeric strings into long or double data type, in that order. If the value cannot be parsed as a number, it is retained as a string. | no (default = false) | For example: @@ -152,7 +152,7 @@ Configure the TSV `inputFormat` to load TSV data as follows: | columns | JSON array | Specifies the columns of the data. The columns should be in the same order with the columns of your data. | yes if `findColumnsFromHeader` is false or missing | | findColumnsFromHeader | Boolean | If this is set, the task will find the column names from the header row. Note that `skipHeaderRows` will be applied before finding column names from the header. For example, if you set `skipHeaderRows` to 2 and `findColumnsFromHeader` to true, the task will skip the first two lines and then extract column information from the third line. `columns` will be ignored if this is set to true. | no (default = false if `columns` is set; otherwise null) | | skipHeaderRows | Integer | If this is set, the task will skip the first `skipHeaderRows` rows. | no (default = 0) | -| shouldParseNumbers| Boolean| If this is set, the task will attempt to parse numeric strings into long or double data type, in that order. If the value cannot be parsed as a number, it is retained as a string. | no (default = false) | +| tryParseNumbers| Boolean| If this is set, the task will attempt to parse numeric strings into long or double data type, in that order. If the value cannot be parsed as a number, it is retained as a string. | no (default = false) | Be sure to change the `delimiter` to the appropriate delimiter for your data. Like CSV, you must specify the columns and which subset of the columns you want indexed. diff --git a/processing/src/main/java/org/apache/druid/data/input/impl/CsvInputFormat.java b/processing/src/main/java/org/apache/druid/data/input/impl/CsvInputFormat.java index 17f730dbba82..023e209b8b93 100644 --- a/processing/src/main/java/org/apache/druid/data/input/impl/CsvInputFormat.java +++ b/processing/src/main/java/org/apache/druid/data/input/impl/CsvInputFormat.java @@ -48,10 +48,10 @@ public CsvInputFormat( @Deprecated @JsonProperty("hasHeaderRow") @Nullable Boolean hasHeaderRow, @JsonProperty("findColumnsFromHeader") @Nullable Boolean findColumnsFromHeader, @JsonProperty("skipHeaderRows") int skipHeaderRows, - @JsonProperty("shouldParseNumbers") @Nullable Boolean shouldParseNumbers + @JsonProperty("tryParseNumbers") @Nullable Boolean tryParseNumbers ) { - super(columns, listDelimiter, String.valueOf(SEPARATOR), hasHeaderRow, findColumnsFromHeader, skipHeaderRows, shouldParseNumbers); + super(columns, listDelimiter, String.valueOf(SEPARATOR), hasHeaderRow, findColumnsFromHeader, skipHeaderRows, tryParseNumbers); } @Override @@ -82,7 +82,7 @@ public InputEntityReader createReader(InputRowSchema inputRowSchema, InputEntity getSkipHeaderRows(), line -> Arrays.asList(parser.parseLine(StringUtils.fromUtf8(line))), useListBasedInputRows(), - shouldParseNumbers() + shouldTryParseNumbers() ); } diff --git a/processing/src/main/java/org/apache/druid/data/input/impl/DelimitedInputFormat.java b/processing/src/main/java/org/apache/druid/data/input/impl/DelimitedInputFormat.java index 8192de2984db..06590584b862 100644 --- a/processing/src/main/java/org/apache/druid/data/input/impl/DelimitedInputFormat.java +++ b/processing/src/main/java/org/apache/druid/data/input/impl/DelimitedInputFormat.java @@ -53,7 +53,7 @@ public DelimitedInputFormat( @Deprecated @JsonProperty("hasHeaderRow") @Nullable Boolean hasHeaderRow, @JsonProperty("findColumnsFromHeader") @Nullable Boolean findColumnsFromHeader, @JsonProperty("skipHeaderRows") int skipHeaderRows, - @JsonProperty("shouldParseNumbers") @Nullable Boolean shouldParseNumbers + @JsonProperty("tryParseNumbers") @Nullable Boolean tryParseNumbers ) { super( @@ -63,7 +63,7 @@ public DelimitedInputFormat( hasHeaderRow, findColumnsFromHeader, skipHeaderRows, - shouldParseNumbers + tryParseNumbers ); } @@ -88,7 +88,7 @@ public InputEntityReader createReader(InputRowSchema inputRowSchema, InputEntity useListBasedInputRows() ? getColumns().size() : DelimitedBytes.UNKNOWN_FIELD_COUNT ), useListBasedInputRows(), - shouldParseNumbers() + shouldTryParseNumbers() ); } diff --git a/processing/src/main/java/org/apache/druid/data/input/impl/DelimitedValueReader.java b/processing/src/main/java/org/apache/druid/data/input/impl/DelimitedValueReader.java index 70618e815bf4..01d2565993f4 100644 --- a/processing/src/main/java/org/apache/druid/data/input/impl/DelimitedValueReader.java +++ b/processing/src/main/java/org/apache/druid/data/input/impl/DelimitedValueReader.java @@ -83,7 +83,7 @@ interface DelimitedValueParser int skipHeaderRows, DelimitedValueParser parser, boolean useListBasedInputRows, - boolean shouldParseNumbers + boolean tryParseNumbers ) { super(inputRowSchema, source); @@ -93,7 +93,7 @@ interface DelimitedValueParser this.transformationFunction = ParserUtils.getTransformationFunction( finalListDelimeter, Splitter.on(finalListDelimeter), - shouldParseNumbers + tryParseNumbers ); if (!findColumnsFromHeader && columns != null) { diff --git a/processing/src/main/java/org/apache/druid/data/input/impl/FlatTextInputFormat.java b/processing/src/main/java/org/apache/druid/data/input/impl/FlatTextInputFormat.java index 6e92344dbd90..39086b6838e5 100644 --- a/processing/src/main/java/org/apache/druid/data/input/impl/FlatTextInputFormat.java +++ b/processing/src/main/java/org/apache/druid/data/input/impl/FlatTextInputFormat.java @@ -46,7 +46,7 @@ public abstract class FlatTextInputFormat implements InputFormat private final String delimiter; private final boolean findColumnsFromHeader; private final int skipHeaderRows; - private final boolean shouldParseNumbers; + private final boolean tryParseNumbers; FlatTextInputFormat( @Nullable List columns, @@ -55,7 +55,7 @@ public abstract class FlatTextInputFormat implements InputFormat @Nullable Boolean hasHeaderRow, @Nullable Boolean findColumnsFromHeader, int skipHeaderRows, - @Nullable Boolean shouldParseNumbers + @Nullable Boolean tryParseNumbers ) { this.columns = columns == null ? Collections.emptyList() : columns; @@ -81,7 +81,7 @@ public abstract class FlatTextInputFormat implements InputFormat "Cannot have same delimiter and list delimiter of [%s]", delimiter ); - this.shouldParseNumbers = shouldParseNumbers == null ? false : shouldParseNumbers; + this.tryParseNumbers = tryParseNumbers == null ? false : tryParseNumbers; if (!this.columns.isEmpty()) { for (String column : this.columns) { @@ -137,9 +137,9 @@ public int getSkipHeaderRows() @JsonProperty @JsonInclude(JsonInclude.Include.NON_DEFAULT) - public boolean shouldParseNumbers() + public boolean shouldTryParseNumbers() { - return shouldParseNumbers; + return tryParseNumbers; } @Override diff --git a/processing/src/main/java/org/apache/druid/java/util/common/parsers/AbstractFlatTextFormatParser.java b/processing/src/main/java/org/apache/druid/java/util/common/parsers/AbstractFlatTextFormatParser.java index 3152c97aaf71..4e9ff77cdf4e 100644 --- a/processing/src/main/java/org/apache/druid/java/util/common/parsers/AbstractFlatTextFormatParser.java +++ b/processing/src/main/java/org/apache/druid/java/util/common/parsers/AbstractFlatTextFormatParser.java @@ -65,14 +65,14 @@ public AbstractFlatTextFormatParser( @Nullable final String listDelimiter, final boolean hasHeaderRow, final int maxSkipHeaderRows, - final boolean shouldParseNumbers + final boolean tryParseNumbers ) { this.listDelimiter = listDelimiter != null ? listDelimiter : Parsers.DEFAULT_LIST_DELIMITER; this.transformationFunction = ParserUtils.getTransformationFunction( this.listDelimiter, Splitter.on(this.listDelimiter), - shouldParseNumbers + tryParseNumbers ); this.hasHeaderRow = hasHeaderRow; diff --git a/processing/src/main/java/org/apache/druid/java/util/common/parsers/CSVParser.java b/processing/src/main/java/org/apache/druid/java/util/common/parsers/CSVParser.java index 9195b72fb9b2..e4c1a748610b 100644 --- a/processing/src/main/java/org/apache/druid/java/util/common/parsers/CSVParser.java +++ b/processing/src/main/java/org/apache/druid/java/util/common/parsers/CSVParser.java @@ -36,10 +36,10 @@ public CSVParser( @Nullable final String listDelimiter, final boolean hasHeaderRow, final int maxSkipHeaderRows, - final boolean shouldParseNumbers + final boolean tryParseNumbers ) { - super(listDelimiter, hasHeaderRow, maxSkipHeaderRows, shouldParseNumbers); + super(listDelimiter, hasHeaderRow, maxSkipHeaderRows, tryParseNumbers); } public CSVParser( @@ -47,10 +47,10 @@ public CSVParser( final Iterable fieldNames, final boolean hasHeaderRow, final int maxSkipHeaderRows, - final boolean shouldParseNumbers + final boolean tryParseNumbers ) { - this(listDelimiter, hasHeaderRow, maxSkipHeaderRows, shouldParseNumbers); + this(listDelimiter, hasHeaderRow, maxSkipHeaderRows, tryParseNumbers); setFieldNames(fieldNames); } diff --git a/processing/src/main/java/org/apache/druid/java/util/common/parsers/DelimitedParser.java b/processing/src/main/java/org/apache/druid/java/util/common/parsers/DelimitedParser.java index 0f9d3b4748e1..c69af8974f8c 100644 --- a/processing/src/main/java/org/apache/druid/java/util/common/parsers/DelimitedParser.java +++ b/processing/src/main/java/org/apache/druid/java/util/common/parsers/DelimitedParser.java @@ -36,10 +36,10 @@ public DelimitedParser( @Nullable final String listDelimiter, final boolean hasHeaderRow, final int maxSkipHeaderRows, - final boolean shouldParseNumbers + final boolean tryParseNumbers ) { - super(listDelimiter, hasHeaderRow, maxSkipHeaderRows, shouldParseNumbers); + super(listDelimiter, hasHeaderRow, maxSkipHeaderRows, tryParseNumbers); final String finalDelimiter = delimiter != null ? delimiter : FlatTextFormat.DELIMITED.getDefaultDelimiter(); Preconditions.checkState( @@ -57,10 +57,10 @@ public DelimitedParser( final Iterable fieldNames, final boolean hasHeaderRow, final int maxSkipHeaderRows, - final boolean shouldParseNumbers + final boolean tryParseNumbers ) { - this(delimiter, listDelimiter, hasHeaderRow, maxSkipHeaderRows, shouldParseNumbers); + this(delimiter, listDelimiter, hasHeaderRow, maxSkipHeaderRows, tryParseNumbers); setFieldNames(fieldNames); } diff --git a/processing/src/main/java/org/apache/druid/java/util/common/parsers/ParserUtils.java b/processing/src/main/java/org/apache/druid/java/util/common/parsers/ParserUtils.java index c35e1f96c1b0..c9d8844cec15 100644 --- a/processing/src/main/java/org/apache/druid/java/util/common/parsers/ParserUtils.java +++ b/processing/src/main/java/org/apache/druid/java/util/common/parsers/ParserUtils.java @@ -59,13 +59,13 @@ public class ParserUtils * @return a transformation function on an input value. The function does the following transformations on the input * string: *
  • Split it into multiple values using the {@code listSplitter} if the {@code list delimiter} is present in the input.
  • - *
  • If {@code shouldParseNumbers} is true, the function will also try to parse any numeric values present in the input + *
  • If {@code tryParseNumbers} is true, the function will also try to parse any numeric values present in the input * -- integers as {@code Long} and floating-point numbers as {@code Double}.
  • */ public static Function getTransformationFunction( final String listDelimiter, final Splitter listSplitter, - final boolean shouldParseNumbers + final boolean tryParseNumbers ) { return (input) -> { @@ -76,10 +76,10 @@ public static Function getTransformationFunction( if (input.contains(listDelimiter)) { return StreamSupport.stream(listSplitter.split(input).spliterator(), false) .map(NullHandling::emptyToNullIfNeeded) - .map(value -> shouldParseNumbers ? ParserUtils.tryParseStringAsNumber(value) : value) + .map(value -> tryParseNumbers ? ParserUtils.tryParseStringAsNumber(value) : value) .collect(Collectors.toList()); } else { - return shouldParseNumbers ? + return tryParseNumbers ? tryParseStringAsNumber(input) : NullHandling.emptyToNullIfNeeded(input); diff --git a/processing/src/test/java/org/apache/druid/data/input/impl/CsvInputFormatTest.java b/processing/src/test/java/org/apache/druid/data/input/impl/CsvInputFormatTest.java index 7510928626d9..581c67eb0d03 100644 --- a/processing/src/test/java/org/apache/druid/data/input/impl/CsvInputFormatTest.java +++ b/processing/src/test/java/org/apache/druid/data/input/impl/CsvInputFormatTest.java @@ -132,10 +132,10 @@ public void testDeserializeWithShouldParseNumbers() throws IOException { final ObjectMapper mapper = new ObjectMapper(); final CsvInputFormat inputFormat = (CsvInputFormat) mapper.readValue( - "{\"type\":\"csv\",\"hasHeaderRow\":true,\"shouldParseNumbers\":true}", + "{\"type\":\"csv\",\"hasHeaderRow\":true,\"tryParseNumbers\":true}", InputFormat.class ); - Assert.assertTrue(inputFormat.shouldParseNumbers()); + Assert.assertTrue(inputFormat.tryParseNumbers()); } @Test diff --git a/processing/src/test/java/org/apache/druid/data/input/impl/CsvReaderTest.java b/processing/src/test/java/org/apache/druid/data/input/impl/CsvReaderTest.java index b119a2fd5ced..bb378153a343 100644 --- a/processing/src/test/java/org/apache/druid/data/input/impl/CsvReaderTest.java +++ b/processing/src/test/java/org/apache/druid/data/input/impl/CsvReaderTest.java @@ -439,7 +439,7 @@ private void assertResult(ByteEntity source, CsvInputFormat format) throws IOExc StringUtils.format("name_%d", numResults + 1), Iterables.getOnlyElement(row.getDimension("name")) ); - if (format.shouldParseNumbers()) { + if (format.tryParseNumbers()) { Assert.assertEquals(((numResults + 1) * 5L), row.getRaw("score")); if (format.getColumns().contains("rating_dbl")) { Assert.assertEquals(numResults * 1.0, row.getRaw("rating_dbl")); diff --git a/processing/src/test/java/org/apache/druid/data/input/impl/DelimitedInputFormatTest.java b/processing/src/test/java/org/apache/druid/data/input/impl/DelimitedInputFormatTest.java index 55c752508d68..790e290e813c 100644 --- a/processing/src/test/java/org/apache/druid/data/input/impl/DelimitedInputFormatTest.java +++ b/processing/src/test/java/org/apache/druid/data/input/impl/DelimitedInputFormatTest.java @@ -119,7 +119,7 @@ public void testShouldParseNumbers() 0, true ); - Assert.assertTrue(format.shouldParseNumbers()); + Assert.assertTrue(format.tryParseNumbers()); } @Test @@ -127,10 +127,10 @@ public void testDeserializeWithShouldParseNumbers() throws IOException { final ObjectMapper mapper = new ObjectMapper(); final DelimitedInputFormat inputFormat = (DelimitedInputFormat) mapper.readValue( - "{\"type\":\"tsv\",\"hasHeaderRow\":true,\"shouldParseNumbers\":true}", + "{\"type\":\"tsv\",\"hasHeaderRow\":true,\"tryParseNumbers\":true}", InputFormat.class ); - Assert.assertTrue(inputFormat.shouldParseNumbers()); + Assert.assertTrue(inputFormat.tryParseNumbers()); } @Test diff --git a/processing/src/test/java/org/apache/druid/data/input/impl/DelimitedReaderTest.java b/processing/src/test/java/org/apache/druid/data/input/impl/DelimitedReaderTest.java index 51786cde7dbe..fcac942cbf14 100644 --- a/processing/src/test/java/org/apache/druid/data/input/impl/DelimitedReaderTest.java +++ b/processing/src/test/java/org/apache/druid/data/input/impl/DelimitedReaderTest.java @@ -401,7 +401,7 @@ private void assertResult(ByteEntity source, DelimitedInputFormat format) throws StringUtils.format("name_%d", numResults + 1), Iterables.getOnlyElement(row.getDimension("name")) ); - if (format.shouldParseNumbers()) { + if (format.tryParseNumbers()) { Assert.assertEquals(((numResults + 1) * 5L), row.getRaw("score")); if (format.getColumns().contains("rating_dbl")) { Assert.assertEquals(numResults * 1.0, row.getRaw("rating_dbl")); diff --git a/processing/src/test/java/org/apache/druid/java/util/common/parsers/FlatTextFormatParserTest.java b/processing/src/test/java/org/apache/druid/java/util/common/parsers/FlatTextFormatParserTest.java index 412e4b6a97c6..1b69d0bdec44 100644 --- a/processing/src/test/java/org/apache/druid/java/util/common/parsers/FlatTextFormatParserTest.java +++ b/processing/src/test/java/org/apache/druid/java/util/common/parsers/FlatTextFormatParserTest.java @@ -237,13 +237,13 @@ public Parser get(FlatTextFormat format) return get(format, false, 0, false); } - public Parser get(FlatTextFormat format, boolean hasHeaderRow, int maxSkipHeaderRows, boolean shouldParseNumbers) + public Parser get(FlatTextFormat format, boolean hasHeaderRow, int maxSkipHeaderRows, boolean tryParseNumbers) { switch (format) { case CSV: - return new CSVParser(null, hasHeaderRow, maxSkipHeaderRows, shouldParseNumbers); + return new CSVParser(null, hasHeaderRow, maxSkipHeaderRows, tryParseNumbers); case DELIMITED: - return new DelimitedParser("\t", null, hasHeaderRow, maxSkipHeaderRows, shouldParseNumbers); + return new DelimitedParser("\t", null, hasHeaderRow, maxSkipHeaderRows, tryParseNumbers); default: throw new IAE("Unknown format[%s]", format); } diff --git a/website/.spelling b/website/.spelling index 3b69542dc9fc..592bac1b99c3 100644 --- a/website/.spelling +++ b/website/.spelling @@ -229,7 +229,7 @@ SSD SSDs SSL Samza -shouldParseNumbers +tryParseNumbers Splunk SqlParameter SslContextFactory From c2c3ee92052e1346c265ef70970369a6a266d73e Mon Sep 17 00:00:00 2001 From: Abhishek Balaji Radhakrishnan Date: Wed, 18 Sep 2024 20:48:49 -0700 Subject: [PATCH 23/24] Better docs --- docs/ingestion/data-formats.md | 5 +++-- .../druid/java/util/common/parsers/ParserUtils.java | 10 +++++----- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/docs/ingestion/data-formats.md b/docs/ingestion/data-formats.md index bfdb59b8d2dd..01c97b47e853 100644 --- a/docs/ingestion/data-formats.md +++ b/docs/ingestion/data-formats.md @@ -125,7 +125,8 @@ Configure the CSV `inputFormat` to load CSV data as follows: | columns | JSON array | Specifies the columns of the data. The columns should be in the same order with the columns of your data. | yes if `findColumnsFromHeader` is false or missing | | findColumnsFromHeader | Boolean | If this is set, the task will find the column names from the header row. Note that `skipHeaderRows` will be applied before finding column names from the header. For example, if you set `skipHeaderRows` to 2 and `findColumnsFromHeader` to true, the task will skip the first two lines and then extract column information from the third line. `columns` will be ignored if this is set to true. | no (default = false if `columns` is set; otherwise null) | | skipHeaderRows | Integer | If this is set, the task will skip the first `skipHeaderRows` rows. | no (default = 0) | -| tryParseNumbers| Boolean| If this is set, the task will attempt to parse numeric strings into long or double data type, in that order. If the value cannot be parsed as a number, it is retained as a string. | no (default = false) | +| tryParseNumbers| Boolean| If this is set, the task will attempt to parse numeric strings into long or double data type, in that order. This parsing also applies to values separated by `listDelimiter`. If the value cannot be parsed as a number, it is retained as a string. | no (default = false) | + For example: @@ -152,7 +153,7 @@ Configure the TSV `inputFormat` to load TSV data as follows: | columns | JSON array | Specifies the columns of the data. The columns should be in the same order with the columns of your data. | yes if `findColumnsFromHeader` is false or missing | | findColumnsFromHeader | Boolean | If this is set, the task will find the column names from the header row. Note that `skipHeaderRows` will be applied before finding column names from the header. For example, if you set `skipHeaderRows` to 2 and `findColumnsFromHeader` to true, the task will skip the first two lines and then extract column information from the third line. `columns` will be ignored if this is set to true. | no (default = false if `columns` is set; otherwise null) | | skipHeaderRows | Integer | If this is set, the task will skip the first `skipHeaderRows` rows. | no (default = 0) | -| tryParseNumbers| Boolean| If this is set, the task will attempt to parse numeric strings into long or double data type, in that order. If the value cannot be parsed as a number, it is retained as a string. | no (default = false) | +| tryParseNumbers| Boolean| If this is set, the task will attempt to parse numeric strings into long or double data type, in that order. This parsing also applies to values separated by `listDelimiter`. If the value cannot be parsed as a number, it is retained as a string. | no (default = false) | Be sure to change the `delimiter` to the appropriate delimiter for your data. Like CSV, you must specify the columns and which subset of the columns you want indexed. diff --git a/processing/src/main/java/org/apache/druid/java/util/common/parsers/ParserUtils.java b/processing/src/main/java/org/apache/druid/java/util/common/parsers/ParserUtils.java index c9d8844cec15..b716737d15e9 100644 --- a/processing/src/main/java/org/apache/druid/java/util/common/parsers/ParserUtils.java +++ b/processing/src/main/java/org/apache/druid/java/util/common/parsers/ParserUtils.java @@ -56,11 +56,11 @@ public class ParserUtils } /** - * @return a transformation function on an input value. The function does the following transformations on the input - * string: - *
  • Split it into multiple values using the {@code listSplitter} if the {@code list delimiter} is present in the input.
  • - *
  • If {@code tryParseNumbers} is true, the function will also try to parse any numeric values present in the input - * -- integers as {@code Long} and floating-point numbers as {@code Double}.
  • + * @return a transformation function on an input value. The function performs the following transformations on the input string: + *
  • Splits it into multiple values using the {@code listSplitter} if the {@code list delimiter} is present in the input.
  • + *
  • If {@code tryParseNumbers} is true, the function will also attempt to parse any numeric values present in the input: + * integers as {@code Long} and floating-point numbers as {@code Double}. If the input is not a number or parsing fails, the input + * is returned as-is as a string.
  • */ public static Function getTransformationFunction( final String listDelimiter, From 153bff5c396a174a4086da36e72758094c656b6e Mon Sep 17 00:00:00 2001 From: Abhishek Balaji Radhakrishnan Date: Wed, 18 Sep 2024 20:51:56 -0700 Subject: [PATCH 24/24] Rename fix and change .spelling entry order. --- docs/ingestion/data-formats.md | 3 - .../data/input/impl/CsvInputFormatTest.java | 4 +- .../druid/data/input/impl/CsvReaderTest.java | 2 +- .../input/impl/DelimitedInputFormatTest.java | 8 +-- .../data/input/impl/DelimitedReaderTest.java | 2 +- .../sql/calcite/IngestTableFunctionTest.java | 56 +++++++++---------- website/.spelling | 2 +- 7 files changed, 37 insertions(+), 40 deletions(-) diff --git a/docs/ingestion/data-formats.md b/docs/ingestion/data-formats.md index 01c97b47e853..320f2e5976b5 100644 --- a/docs/ingestion/data-formats.md +++ b/docs/ingestion/data-formats.md @@ -127,8 +127,6 @@ Configure the CSV `inputFormat` to load CSV data as follows: | skipHeaderRows | Integer | If this is set, the task will skip the first `skipHeaderRows` rows. | no (default = 0) | | tryParseNumbers| Boolean| If this is set, the task will attempt to parse numeric strings into long or double data type, in that order. This parsing also applies to values separated by `listDelimiter`. If the value cannot be parsed as a number, it is retained as a string. | no (default = false) | - - For example: ```json @@ -155,7 +153,6 @@ Configure the TSV `inputFormat` to load TSV data as follows: | skipHeaderRows | Integer | If this is set, the task will skip the first `skipHeaderRows` rows. | no (default = 0) | | tryParseNumbers| Boolean| If this is set, the task will attempt to parse numeric strings into long or double data type, in that order. This parsing also applies to values separated by `listDelimiter`. If the value cannot be parsed as a number, it is retained as a string. | no (default = false) | - Be sure to change the `delimiter` to the appropriate delimiter for your data. Like CSV, you must specify the columns and which subset of the columns you want indexed. For example: diff --git a/processing/src/test/java/org/apache/druid/data/input/impl/CsvInputFormatTest.java b/processing/src/test/java/org/apache/druid/data/input/impl/CsvInputFormatTest.java index 581c67eb0d03..0a65862579d5 100644 --- a/processing/src/test/java/org/apache/druid/data/input/impl/CsvInputFormatTest.java +++ b/processing/src/test/java/org/apache/druid/data/input/impl/CsvInputFormatTest.java @@ -128,14 +128,14 @@ public void testDeserializeWithoutAnyProperties() } @Test - public void testDeserializeWithShouldParseNumbers() throws IOException + public void testDeserializeWithTryParseNumbers() throws IOException { final ObjectMapper mapper = new ObjectMapper(); final CsvInputFormat inputFormat = (CsvInputFormat) mapper.readValue( "{\"type\":\"csv\",\"hasHeaderRow\":true,\"tryParseNumbers\":true}", InputFormat.class ); - Assert.assertTrue(inputFormat.tryParseNumbers()); + Assert.assertTrue(inputFormat.shouldTryParseNumbers()); } @Test diff --git a/processing/src/test/java/org/apache/druid/data/input/impl/CsvReaderTest.java b/processing/src/test/java/org/apache/druid/data/input/impl/CsvReaderTest.java index bb378153a343..70d6c7b705d9 100644 --- a/processing/src/test/java/org/apache/druid/data/input/impl/CsvReaderTest.java +++ b/processing/src/test/java/org/apache/druid/data/input/impl/CsvReaderTest.java @@ -439,7 +439,7 @@ private void assertResult(ByteEntity source, CsvInputFormat format) throws IOExc StringUtils.format("name_%d", numResults + 1), Iterables.getOnlyElement(row.getDimension("name")) ); - if (format.tryParseNumbers()) { + if (format.shouldTryParseNumbers()) { Assert.assertEquals(((numResults + 1) * 5L), row.getRaw("score")); if (format.getColumns().contains("rating_dbl")) { Assert.assertEquals(numResults * 1.0, row.getRaw("rating_dbl")); diff --git a/processing/src/test/java/org/apache/druid/data/input/impl/DelimitedInputFormatTest.java b/processing/src/test/java/org/apache/druid/data/input/impl/DelimitedInputFormatTest.java index 790e290e813c..8f4c01bcd491 100644 --- a/processing/src/test/java/org/apache/druid/data/input/impl/DelimitedInputFormatTest.java +++ b/processing/src/test/java/org/apache/druid/data/input/impl/DelimitedInputFormatTest.java @@ -108,7 +108,7 @@ public void testFindColumnsFromHeaderWithMissingColumnsReturningItsValue() } @Test - public void testShouldParseNumbers() + public void testTryParseNumbers() { final DelimitedInputFormat format = new DelimitedInputFormat( null, @@ -119,18 +119,18 @@ public void testShouldParseNumbers() 0, true ); - Assert.assertTrue(format.tryParseNumbers()); + Assert.assertTrue(format.shouldTryParseNumbers()); } @Test - public void testDeserializeWithShouldParseNumbers() throws IOException + public void testDeserializeWithTryParseNumbers() throws IOException { final ObjectMapper mapper = new ObjectMapper(); final DelimitedInputFormat inputFormat = (DelimitedInputFormat) mapper.readValue( "{\"type\":\"tsv\",\"hasHeaderRow\":true,\"tryParseNumbers\":true}", InputFormat.class ); - Assert.assertTrue(inputFormat.tryParseNumbers()); + Assert.assertTrue(inputFormat.shouldTryParseNumbers()); } @Test diff --git a/processing/src/test/java/org/apache/druid/data/input/impl/DelimitedReaderTest.java b/processing/src/test/java/org/apache/druid/data/input/impl/DelimitedReaderTest.java index fcac942cbf14..aaa6490b2985 100644 --- a/processing/src/test/java/org/apache/druid/data/input/impl/DelimitedReaderTest.java +++ b/processing/src/test/java/org/apache/druid/data/input/impl/DelimitedReaderTest.java @@ -401,7 +401,7 @@ private void assertResult(ByteEntity source, DelimitedInputFormat format) throws StringUtils.format("name_%d", numResults + 1), Iterables.getOnlyElement(row.getDimension("name")) ); - if (format.tryParseNumbers()) { + if (format.shouldTryParseNumbers()) { Assert.assertEquals(((numResults + 1) * 5L), row.getRaw("score")); if (format.getColumns().contains("rating_dbl")) { Assert.assertEquals(numResults * 1.0, row.getRaw("rating_dbl")); diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/IngestTableFunctionTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/IngestTableFunctionTest.java index 6081d86ab633..c5caa3258ce9 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/IngestTableFunctionTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/IngestTableFunctionTest.java @@ -146,7 +146,7 @@ public void testHttpExtern() .columns("x", "y", "z") .context(CalciteIngestionDmlTest.PARTITIONED_BY_ALL_TIME_QUERY_CONTEXT) .build() - ) + ) .expectLogicalPlanFrom("httpExtern") .verify(); } @@ -158,11 +158,11 @@ public void testHttpExtern() public void testHttpFunction() { String extern = "TABLE(http(" - + "userName => 'bob'," - + "password => 'secret'," - + "uris => ARRAY['http://foo.com/bar.csv']," - + "format => 'csv'))" - + " (x VARCHAR, y VARCHAR, z BIGINT)"; + + "userName => 'bob'," + + "password => 'secret'," + + "uris => ARRAY['http://foo.com/bar.csv']," + + "format => 'csv'))" + + " (x VARCHAR, y VARCHAR, z BIGINT)"; testIngestionQuery() .sql("INSERT INTO dst SELECT * FROM %s PARTITIONED BY ALL TIME", extern) .authentication(CalciteTests.SUPER_USER_AUTH_RESULT) @@ -247,7 +247,7 @@ public void testHttpExternByName() .columns("x", "y", "z") .context(CalciteIngestionDmlTest.PARTITIONED_BY_ALL_TIME_QUERY_CONTEXT) .build() - ) + ) .expectLogicalPlanFrom("httpExtern") .verify(); } @@ -277,7 +277,7 @@ public void testHttpFn() .columns("x", "y", "z") .context(CalciteIngestionDmlTest.PARTITIONED_BY_ALL_TIME_QUERY_CONTEXT) .build() - ) + ) .expectLogicalPlanFrom("httpExtern") .verify(); } @@ -301,9 +301,9 @@ public void testHttpFn2() .build() ); RowSignature expectedSig = RowSignature.builder() - .add("__time", ColumnType.LONG) - .add("isRobot", ColumnType.STRING) - .build(); + .add("__time", ColumnType.LONG) + .add("isRobot", ColumnType.STRING) + .build(); testIngestionQuery() .sql("INSERT INTO w000\n" + "SELECT\n" + @@ -328,7 +328,7 @@ public void testHttpFn2() .virtualColumns(expressionVirtualColumn("v0", "timestamp_parse(\"timestamp\",null,'UTC')", ColumnType.LONG)) .columns("isRobot", "v0") .build() - ) + ) .verify(); } @@ -394,7 +394,7 @@ public void testHttpFnWithParameters() testIngestionQuery() .sql("INSERT INTO dst SELECT *\n" + "FROM TABLE(http(userName => 'bob',\n" + - " password => 'secret',\n" + + " password => 'secret',\n" + " uris => ?,\n" + " format => 'csv'))\n" + " EXTEND (x VARCHAR, y VARCHAR, z BIGINT)\n" + @@ -410,7 +410,7 @@ public void testHttpFnWithParameters() .columns("x", "y", "z") .context(CalciteIngestionDmlTest.PARTITIONED_BY_ALL_TIME_QUERY_CONTEXT) .build() - ) + ) .expectLogicalPlanFrom("httpExtern") .verify(); } @@ -437,11 +437,11 @@ public void testHttpJson() .add("c", ColumnType.FLOAT_ARRAY) .add("d", ColumnType.DOUBLE_ARRAY) .build() - ); + ); testIngestionQuery() .sql("INSERT INTO dst SELECT *\n" + "FROM TABLE(http(userName => 'bob',\n" + - " password => 'secret',\n" + + " password => 'secret',\n" + " uris => ARRAY['http://foo.com/bar.json'],\n" + " format => 'json'))\n" + " EXTEND (x VARCHAR, y VARCHAR, z TYPE('COMPLEX'), a VARCHAR ARRAY, b BIGINT ARRAY, c FLOAT ARRAY, d DOUBLE ARRAY)\n" + @@ -456,7 +456,7 @@ public void testHttpJson() .columns("a", "b", "c", "d", "x", "y", "z") .context(CalciteIngestionDmlTest.PARTITIONED_BY_ALL_TIME_QUERY_CONTEXT) .build() - ) + ) .verify(); } @@ -478,7 +478,7 @@ public void testInlineExtern() .columns("x", "y", "z") .context(CalciteIngestionDmlTest.PARTITIONED_BY_ALL_TIME_QUERY_CONTEXT) .build() - ) + ) .expectLogicalPlanFrom("insertFromExternal") .verify(); } @@ -546,7 +546,7 @@ public void testInlineExternWithExtend() .columns("x", "y", "z") .context(CalciteIngestionDmlTest.PARTITIONED_BY_ALL_TIME_QUERY_CONTEXT) .build() - ) + ) .expectLogicalPlanFrom("insertFromExternal") .verify(); } @@ -574,7 +574,7 @@ public void testInlineFn() .columns("x", "y", "z") .context(CalciteIngestionDmlTest.PARTITIONED_BY_ALL_TIME_QUERY_CONTEXT) .build() - ) + ) .expectLogicalPlanFrom("insertFromExternal") .verify(); } @@ -597,7 +597,7 @@ public void testLocalExtern() .columns("x", "y", "z") .context(CalciteIngestionDmlTest.PARTITIONED_BY_ALL_TIME_QUERY_CONTEXT) .build() - ) + ) .expectLogicalPlanFrom("localExtern") .verify(); } @@ -625,7 +625,7 @@ public void testLocalFilesFn() .columns("x", "y", "z") .context(CalciteIngestionDmlTest.PARTITIONED_BY_ALL_TIME_QUERY_CONTEXT) .build() - ) + ) .expectLogicalPlanFrom("localExtern") .verify(); } @@ -653,7 +653,7 @@ public void testLocalFnOmitExtend() .columns("x", "y", "z") .context(CalciteIngestionDmlTest.PARTITIONED_BY_ALL_TIME_QUERY_CONTEXT) .build() - ) + ) .expectLogicalPlanFrom("localExtern") .verify(); } @@ -672,7 +672,7 @@ public void testLocalFnWithAlias() " (x VARCHAR, y VARCHAR, z BIGINT)\n" + " As myTable\n" + "PARTITIONED BY ALL TIME" - ) + ) .authentication(CalciteTests.SUPER_USER_AUTH_RESULT) .expectTarget("dst", localDataSource.getSignature()) .expectResources(dataSourceWrite("dst"), Externals.EXTERNAL_RESOURCE_ACTION) @@ -683,7 +683,7 @@ public void testLocalFnWithAlias() .columns("x", "y", "z") .context(CalciteIngestionDmlTest.PARTITIONED_BY_ALL_TIME_QUERY_CONTEXT) .build() - ) + ) .expectLogicalPlanFrom("localExtern") .verify(); } @@ -702,7 +702,7 @@ public void testLocalFnNotNull() " (x VARCHAR NOT NULL, y VARCHAR NOT NULL, z BIGINT NOT NULL)\n" + " As myTable\n" + "PARTITIONED BY ALL TIME" - ) + ) .authentication(CalciteTests.SUPER_USER_AUTH_RESULT) .expectTarget("dst", localDataSource.getSignature()) .expectResources(dataSourceWrite("dst"), Externals.EXTERNAL_RESOURCE_ACTION) @@ -713,7 +713,7 @@ public void testLocalFnNotNull() .columns("x", "y", "z") .context(CalciteIngestionDmlTest.PARTITIONED_BY_ALL_TIME_QUERY_CONTEXT) .build() - ) + ) .expectLogicalPlanFrom("localExtern") .verify(); } @@ -779,4 +779,4 @@ public void configure(Binder binder) }); } } -} \ No newline at end of file +} diff --git a/website/.spelling b/website/.spelling index 6d2aa4e24bf3..ebfbaa6a8755 100644 --- a/website/.spelling +++ b/website/.spelling @@ -229,7 +229,6 @@ SSD SSDs SSL Samza -tryParseNumbers Splunk SqlParameter SslContextFactory @@ -243,6 +242,7 @@ TLS tls TopN TopNs +tryParseNumbers UI UIs UPSERT