From 89354f986e73d2421d955bb1ae12fca6ef279d2c Mon Sep 17 00:00:00 2001 From: Maytas Monsereenusorn Date: Fri, 26 Jun 2020 15:54:29 -1000 Subject: [PATCH 01/15] Add integration tests for Avro OCF InputFormat --- .../parallelized/ITAvroInputFormatTest.java | 127 ++++++++++++++++++ .../avro/wikipedia_index_data1.avro | Bin 0 -> 1066 bytes .../avro/wikipedia_index_data2.avro | Bin 0 -> 1055 bytes .../avro/wikipedia_index_data3.avro | Bin 0 -> 1151 bytes ...kipedia_local_input_source_index_task.json | 82 +++++++++++ 5 files changed, 209 insertions(+) create mode 100644 integration-tests/src/test/java/org/apache/druid/tests/parallelized/ITAvroInputFormatTest.java create mode 100644 integration-tests/src/test/resources/data/batch_index/avro/wikipedia_index_data1.avro create mode 100644 integration-tests/src/test/resources/data/batch_index/avro/wikipedia_index_data2.avro create mode 100644 integration-tests/src/test/resources/data/batch_index/avro/wikipedia_index_data3.avro create mode 100644 integration-tests/src/test/resources/indexer/wikipedia_local_input_source_index_task.json diff --git a/integration-tests/src/test/java/org/apache/druid/tests/parallelized/ITAvroInputFormatTest.java b/integration-tests/src/test/java/org/apache/druid/tests/parallelized/ITAvroInputFormatTest.java new file mode 100644 index 000000000000..a95e3bf95c02 --- /dev/null +++ b/integration-tests/src/test/java/org/apache/druid/tests/parallelized/ITAvroInputFormatTest.java @@ -0,0 +1,127 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.tests.parallelized; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import org.apache.druid.indexer.partitions.DynamicPartitionsSpec; +import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.testing.guice.DruidTestModuleFactory; +import org.apache.druid.tests.TestNGGroup; +import org.apache.druid.tests.indexer.AbstractITBatchIndexTest; +import org.testng.annotations.Guice; +import org.testng.annotations.Test; + +import java.io.Closeable; +import java.util.List; +import java.util.Map; +import java.util.UUID; +import java.util.function.Function; + +@Test(groups = TestNGGroup.BATCH_INDEX) +@Guice(moduleFactory = DruidTestModuleFactory.class) +public class ITAvroInputFormatTest extends AbstractITBatchIndexTest +{ + private static final String INDEX_TASK = "/indexer/wikipedia_local_input_source_index_task.json"; + private static final String INDEX_QUERIES_RESOURCE = "/indexer/wikipedia_index_queries.json"; + + @Test + public void testIndexDataIngestionSpecWithoutSchema() throws Exception + { + List fieldList = ImmutableList.of( + ImmutableMap.of("name", "timestamp", "type", "string"), + ImmutableMap.of("name", "page", "type", "string"), + ImmutableMap.of("name", "language", "type", "string"), + ImmutableMap.of("name", "user", "type", "string"), + ImmutableMap.of("name", "unpatrolled", "type", "string"), + ImmutableMap.of("name", "newPage", "type", "string"), + ImmutableMap.of("name", "robot", "type", "string"), + ImmutableMap.of("name", "anonymous", "type", "string"), + ImmutableMap.of("name", "namespace", "type", "string"), + ImmutableMap.of("name", "continent", "type", "string"), + ImmutableMap.of("name", "country", "type", "string"), + ImmutableMap.of("name", "region", "type", "string"), + ImmutableMap.of("name", "city", "type", "string"), + ImmutableMap.of("name", "added", "type", "int"), + ImmutableMap.of("name", "deleted", "type", "int"), + ImmutableMap.of("name", "delta", "type", "int") + ); + Map schema = ImmutableMap.of("namespace", "org.apache.druid.data.input", + "type", "record", + "name", "wikipedia", + "fields", fieldList); + Map arvoInputFormatMap = ImmutableMap.of("type", "avro_ocf", "schema", schema); + doIndexTest(arvoInputFormatMap); + } + + @Test + public void testIndexDataIngestionSpecWithSchema() throws Exception + { + Map arvoInputFormatMap = ImmutableMap.of("type", "avro_ocf"); + doIndexTest(arvoInputFormatMap); + } + + private void doIndexTest(Map arvoInputFormatMap) throws Exception + { + final String indexDatasource = "wikipedia_index_test_" + UUID.randomUUID(); + try ( + final Closeable ignored1 = unloader(indexDatasource + config.getExtraDatasourceNameSuffix()); + ) { + final Function sqlInputSourcePropsTransform = spec -> { + try { + spec = StringUtils.replace( + spec, + "%%PARTITIONS_SPEC%%", + jsonMapper.writeValueAsString(new DynamicPartitionsSpec(null, null)) + ); + spec = StringUtils.replace( + spec, + "%%INPUT_SOURCE_FILTER%%", + "*.avro" + ); + spec = StringUtils.replace( + spec, + "%%INPUT_SOURCE_BASE_DIR%%", + "/resources/data/batch_index/avro" + ); + spec = StringUtils.replace( + spec, + "%%AVRO_INPUT_FORMAT%%", + jsonMapper.writeValueAsString(arvoInputFormatMap) + ); + return spec; + } + catch (Exception e) { + throw new RuntimeException(e); + } + }; + + doIndexTest( + indexDatasource, + INDEX_TASK, + sqlInputSourcePropsTransform, + INDEX_QUERIES_RESOURCE, + false, + true, + true + ); + } + } +} \ No newline at end of file diff --git a/integration-tests/src/test/resources/data/batch_index/avro/wikipedia_index_data1.avro b/integration-tests/src/test/resources/data/batch_index/avro/wikipedia_index_data1.avro new file mode 100644 index 0000000000000000000000000000000000000000..5ed0d60703560ba020d2e803dfc0bd682af94db1 GIT binary patch literal 1066 zcmah|PiqrF6gM?$ETNPl5`u?i5%kc8Y_?L}TdaRhS}_V5QFxhoO~%a5EN^BS*3yeN zZ#{@Nzkr{`&me*qKY<5N&L(L|i``!4-+S|Zf9B23;C-cf9rj4K3>#qzf3NO`HqS6j zLPDmAhwGt&wAfF?SY*fr%#Tb4tT@*+Y=K{ou*Jy<-r``PB~<3D&)gL#xe1du`$u)G zNYPkGGasEp8B0P9!-MtXqzr~w7MGxg?wlNFkjiQ}m4PKKB^D~rq@r;0>>MQ016_zc zvj!-w@>DxhmQ?sWGqt1=)5;2ksuYfCr!3`VV1z@VRapiTwzL9pzG$5AlolS4qeNRC zw?!w-Et}qcdvI;qicE84HRYtd@af*q5Aye|Z(E1y%~$XCeqH{4>2{|b#T)Ir8*%ia z9VP8f636|<<2*BY@W8hPQWe#Ta!evn&630Eu)GV>;A~t0vckukPc_+5&`ps9gXT*m zJmsKg!J=8c4|&ifgk}#^@Q6S$Vf>Fcy!dtL1cf+>JIUsq{*9jR;xSV2&=HQolUOzx zBA-E?CbD+?oz}X}_diGh3#)BV76*>uK>fKh#k4b!{!;yk_6jF|DvckN)5iY;mk(!F literal 0 HcmV?d00001 diff --git a/integration-tests/src/test/resources/data/batch_index/avro/wikipedia_index_data2.avro b/integration-tests/src/test/resources/data/batch_index/avro/wikipedia_index_data2.avro new file mode 100644 index 0000000000000000000000000000000000000000..aa25803d5d221f9c851b63dab653ecb504c04150 GIT binary patch literal 1055 zcmaizL2J}d5XZHyvIbeJ6e)X8tazBdOq16pGvU1#Yb-q$ zJc@TeL=pT5#qZ#U@b1Z%?pk7@>2306GQXMs`@h%QyH`8+;Dh3!7AfZN=jMTDssg>x zQz%%)-iAkz*WdecPZk(USRUyDM19UG8G?;d9LB1Y@h}DhLrFzxY~_x`EY@E5?%-b^ zBXiUS^1^19C~cIa#5>$LD=HwtwtNPZl$X?$8dY15k_s@2vkdDkFC2w%|IH;N#oJsj zdtnbCrYh%Bw-wboFGOwWB$$jOq*k2Zl8ma_URP_mD$Pf5dd&-~n-XbH|Vl?1nDDbPpR zi$GQ-(AS>xx%VG$6g~{X(c}60lq)vqM%IVi*@e}Gg;6yKjqp6|g?%vOyaY1rM7&q= z%G#YJXKUaGkspNqC>)RH>(8q~SMF1bLDfO(Q7JNn1*QR5tk(l&q|NtVu`;PU$x%rG m{kJ6TRk$+)2KDL_sN9J{=+A)MX9`G2ZD0DAYu{GRtNj;6QDa{K literal 0 HcmV?d00001 diff --git a/integration-tests/src/test/resources/data/batch_index/avro/wikipedia_index_data3.avro b/integration-tests/src/test/resources/data/batch_index/avro/wikipedia_index_data3.avro new file mode 100644 index 0000000000000000000000000000000000000000..2e0f9f2e1d08ffc5b8295d8847dae7dc5253356a GIT binary patch literal 1151 zcma)+yKWOf6ozqRz^p9GL=mJw$ZQ2tV)+s)WIM~E0ihu51|y2&ofGfGvu9Rw!C4|V zCL-GP`m?gm8|P9!We$vI`&PP zpda`OL#dd*^lY6EfNW|t^!6sNI2+d2zMV2pY50%&Znh_M<~UJgqM7$t?kOy#9pA)LOrf~4d~ zW@0bR0VFAnrPX;sna>MTb25dJ3@4;qTqrFWmFB?|j=7|~h>)Ay48Yi`aolZb|w?7hYQTe&lp`4x;Hh&gYQI6Aj9JKd__j`)RT2gJ0CXtj>d>2)n=*0P~=K@!d z7iG?QUN7PV<$eTo#47{fsRjC2@jg_}@tmJd+OCr(Q)#d^a92$h523oarKtW7_SC%z U Date: Fri, 26 Jun 2020 15:57:28 -1000 Subject: [PATCH 02/15] Add integration tests for Avro OCF InputFormat --- .../apache/druid/tests/parallelized/ITAvroInputFormatTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integration-tests/src/test/java/org/apache/druid/tests/parallelized/ITAvroInputFormatTest.java b/integration-tests/src/test/java/org/apache/druid/tests/parallelized/ITAvroInputFormatTest.java index a95e3bf95c02..1516775cff69 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/parallelized/ITAvroInputFormatTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/parallelized/ITAvroInputFormatTest.java @@ -124,4 +124,4 @@ private void doIndexTest(Map arvoInputFormatMap) throws Exception ); } } -} \ No newline at end of file +} From 2b5d4d599ad46999dc2f917c722e28c96f48f96f Mon Sep 17 00:00:00 2001 From: Maytas Monsereenusorn Date: Fri, 26 Jun 2020 22:05:38 -1000 Subject: [PATCH 03/15] add tests --- .../DoublesSketchComplexMetricSerde.java | 3 +- integration-tests/README.md | 4 +- integration-tests/script/copy_resources.sh | 4 +- .../druid/tests/hadoop/ITHadoopIndexTest.java | 4 +- .../indexer/AbstractITBatchIndexTest.java | 3 + ...ractLocalInputSourceParallelIndexTest.java | 91 +++++++++++++++++++ ...ITLocalInputSourceAllInputFormatTest.java} | 91 +++++++------------ .../{tsv => hadoop_tsv}/batch_hadoop.data | 0 .../hadoop/batch_hadoop_indexer.json | 2 +- ...kipedia_local_input_source_index_task.json | 2 +- 10 files changed, 136 insertions(+), 68 deletions(-) create mode 100644 integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractLocalInputSourceParallelIndexTest.java rename integration-tests/src/test/java/org/apache/druid/tests/parallelized/{ITAvroInputFormatTest.java => ITLocalInputSourceAllInputFormatTest.java} (53%) rename integration-tests/src/test/resources/data/batch_index/{tsv => hadoop_tsv}/batch_hadoop.data (100%) diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/quantiles/DoublesSketchComplexMetricSerde.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/quantiles/DoublesSketchComplexMetricSerde.java index e2c922e88b48..67424385f64c 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/quantiles/DoublesSketchComplexMetricSerde.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/quantiles/DoublesSketchComplexMetricSerde.java @@ -19,6 +19,7 @@ package org.apache.druid.query.aggregation.datasketches.quantiles; +import org.apache.commons.lang.StringUtils; import org.apache.datasketches.memory.Memory; import org.apache.datasketches.quantiles.DoublesSketch; import org.apache.datasketches.quantiles.UpdateDoublesSketch; @@ -77,7 +78,7 @@ public Object extractValue(final InputRow inputRow, final String metricName) // This corresponds to "A" in base64, so it is not a digit if (objectString.isEmpty()) { return DoublesSketchOperations.EMPTY_SKETCH; - } else if (Character.isDigit(objectString.charAt(0))) { + } else if (StringUtils.isNumeric(objectString)) { try { double doubleValue = Double.parseDouble(objectString); UpdateDoublesSketch sketch = DoublesSketch.builder().setK(MIN_K).build(); diff --git a/integration-tests/README.md b/integration-tests/README.md index d5476cda8a39..9893de95ad4b 100644 --- a/integration-tests/README.md +++ b/integration-tests/README.md @@ -270,8 +270,8 @@ credentials/configs may need to be set in the same file as your Druid's Hadoop c If you are running ITHadoopIndexTest with your own Druid + Hadoop cluster, please follow the below steps: - Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json located in integration-tests/src/test/resources/data/batch_index/json to your HDFS at /batch_index/json/ -- Copy batch_hadoop.data located in integration-tests/src/test/resources/data/batch_index/tsv to your HDFS - at /batch_index/tsv/ +- Copy batch_hadoop.data located in integration-tests/src/test/resources/data/batch_index/hadoop_tsv to your HDFS + at /batch_index/hadoop_tsv/ If using the Docker-based Hadoop container, the steps above are automatically done by the integration tests. When running the Hadoop tests, you must set `-Dextra.datasource.name.suffix=''`, due to https://github.com/apache/druid/issues/9788. diff --git a/integration-tests/script/copy_resources.sh b/integration-tests/script/copy_resources.sh index eb3a1b594d64..cb8055981ea0 100755 --- a/integration-tests/script/copy_resources.sh +++ b/integration-tests/script/copy_resources.sh @@ -50,10 +50,10 @@ mkdir -p $SHARED_DIR/docker/extensions/druid-kinesis-indexing-service mv $SHARED_DIR/docker/lib/druid-kinesis-indexing-service-* $SHARED_DIR/docker/extensions/druid-kinesis-indexing-service # For druid-parquet-extensions mkdir -p $SHARED_DIR/docker/extensions/druid-parquet-extensions -mv $SHARED_DIR/docker/lib/druid-parquet-extensions-* $SHARED_DIR/docker/extensions/druid-parquet-extensions +cp $SHARED_DIR/docker/lib/druid-parquet-extensions-* $SHARED_DIR/docker/extensions/druid-parquet-extensions # For druid-orc-extensions mkdir -p $SHARED_DIR/docker/extensions/druid-orc-extensions -mv $SHARED_DIR/docker/lib/druid-orc-extensions-* $SHARED_DIR/docker/extensions/druid-orc-extensions +cp $SHARED_DIR/docker/lib/druid-orc-extensions-* $SHARED_DIR/docker/extensions/druid-orc-extensions # Pull Hadoop dependency if needed if [ -n "$DRUID_INTEGRATION_TEST_START_HADOOP_DOCKER" ] && [ "$DRUID_INTEGRATION_TEST_START_HADOOP_DOCKER" == true ] diff --git a/integration-tests/src/test/java/org/apache/druid/tests/hadoop/ITHadoopIndexTest.java b/integration-tests/src/test/java/org/apache/druid/tests/hadoop/ITHadoopIndexTest.java index 1d37e1db553a..23b8ac04047e 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/hadoop/ITHadoopIndexTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/hadoop/ITHadoopIndexTest.java @@ -42,8 +42,8 @@ * 1) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json * located in integration-tests/src/test/resources/data/batch_index/json to your HDFS at /batch_index/json/ * If using the Docker-based Hadoop container, this is automatically done by the integration tests. - * 2) Copy batch_hadoop.data located in integration-tests/src/test/resources/data/batch_index/tsv to your HDFS - * at /batch_index/tsv/ + * 2) Copy batch_hadoop.data located in integration-tests/src/test/resources/data/batch_index/hadoop_tsv to your HDFS + * at /batch_index/hadoop_tsv/ * If using the Docker-based Hadoop container, this is automatically done by the integration tests. * 2) Provide -Doverride.config.path= with HDFS configs set. See * integration-tests/docker/environment-configs/override-examples/hdfs for env vars to provide. diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractITBatchIndexTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractITBatchIndexTest.java index 5c64dcd626b0..993ee6bac341 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractITBatchIndexTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractITBatchIndexTest.java @@ -50,6 +50,9 @@ public abstract class AbstractITBatchIndexTest extends AbstractIndexerTest { public enum InputFormatDetails { + AVRO("avro_ocf", ".avro", "/avro"), + CSV("csv", ".csv", "/csv"), + TSV("tsv", ".tsv", "/tsv"), ORC("orc", ".orc", "/orc"), JSON("json", ".json", "/json"), PARQUET("parquet", ".parquet", "/parquet"); diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractLocalInputSourceParallelIndexTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractLocalInputSourceParallelIndexTest.java new file mode 100644 index 000000000000..a00bb3d83d47 --- /dev/null +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractLocalInputSourceParallelIndexTest.java @@ -0,0 +1,91 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.tests.indexer; + +import com.google.common.collect.ImmutableMap; +import org.apache.druid.indexer.partitions.DynamicPartitionsSpec; +import org.apache.druid.java.util.common.StringUtils; + +import javax.annotation.Nonnull; +import java.io.Closeable; +import java.util.Map; +import java.util.UUID; +import java.util.function.Function; + +public abstract class AbstractLocalInputSourceParallelIndexTest extends AbstractITBatchIndexTest +{ + private static final String INDEX_TASK = "/indexer/wikipedia_local_input_source_index_task.json"; + private static final String INDEX_QUERIES_RESOURCE = "/indexer/wikipedia_index_queries.json"; + + public void doIndexTest(InputFormatDetails inputFormatDetails) throws Exception + { + doIndexTest(inputFormatDetails, ImmutableMap.of()); + } + + public void doIndexTest(InputFormatDetails inputFormatDetails, @Nonnull Map extraInputFormatMap) throws Exception + { + final String indexDatasource = "wikipedia_index_test_" + UUID.randomUUID(); + Map inputFormatMap = new ImmutableMap.Builder().putAll(extraInputFormatMap) + .put("type", inputFormatDetails.getInputFormatType()) + .build(); + try ( + final Closeable ignored1 = unloader(indexDatasource + config.getExtraDatasourceNameSuffix()); + ) { + final Function sqlInputSourcePropsTransform = spec -> { + try { + spec = StringUtils.replace( + spec, + "%%PARTITIONS_SPEC%%", + jsonMapper.writeValueAsString(new DynamicPartitionsSpec(null, null)) + ); + spec = StringUtils.replace( + spec, + "%%INPUT_SOURCE_FILTER%%", + "*" + inputFormatDetails.getFileExtension() + ); + spec = StringUtils.replace( + spec, + "%%INPUT_SOURCE_BASE_DIR%%", + "/resources/data/batch_index" + inputFormatDetails.getFolderSuffix() + ); + spec = StringUtils.replace( + spec, + "%%INPUT_FORMAT%%", + jsonMapper.writeValueAsString(inputFormatMap) + ); + return spec; + } + catch (Exception e) { + throw new RuntimeException(e); + } + }; + + doIndexTest( + indexDatasource, + INDEX_TASK, + sqlInputSourcePropsTransform, + INDEX_QUERIES_RESOURCE, + false, + true, + true + ); + } + } +} diff --git a/integration-tests/src/test/java/org/apache/druid/tests/parallelized/ITAvroInputFormatTest.java b/integration-tests/src/test/java/org/apache/druid/tests/parallelized/ITLocalInputSourceAllInputFormatTest.java similarity index 53% rename from integration-tests/src/test/java/org/apache/druid/tests/parallelized/ITAvroInputFormatTest.java rename to integration-tests/src/test/java/org/apache/druid/tests/parallelized/ITLocalInputSourceAllInputFormatTest.java index 1516775cff69..9cb8d09c1061 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/parallelized/ITAvroInputFormatTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/parallelized/ITLocalInputSourceAllInputFormatTest.java @@ -21,29 +21,21 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; -import org.apache.druid.indexer.partitions.DynamicPartitionsSpec; -import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.testing.guice.DruidTestModuleFactory; import org.apache.druid.tests.TestNGGroup; -import org.apache.druid.tests.indexer.AbstractITBatchIndexTest; +import org.apache.druid.tests.indexer.AbstractLocalInputSourceParallelIndexTest; import org.testng.annotations.Guice; import org.testng.annotations.Test; -import java.io.Closeable; import java.util.List; import java.util.Map; -import java.util.UUID; -import java.util.function.Function; @Test(groups = TestNGGroup.BATCH_INDEX) @Guice(moduleFactory = DruidTestModuleFactory.class) -public class ITAvroInputFormatTest extends AbstractITBatchIndexTest +public class ITLocalInputSourceAllInputFormatTest extends AbstractLocalInputSourceParallelIndexTest { - private static final String INDEX_TASK = "/indexer/wikipedia_local_input_source_index_task.json"; - private static final String INDEX_QUERIES_RESOURCE = "/indexer/wikipedia_index_queries.json"; - @Test - public void testIndexDataIngestionSpecWithoutSchema() throws Exception + public void testAvroInputFormatIndexDataIngestionSpecWithoutSchema() throws Exception { List fieldList = ImmutableList.of( ImmutableMap.of("name", "timestamp", "type", "string"), @@ -67,61 +59,42 @@ public void testIndexDataIngestionSpecWithoutSchema() throws Exception "type", "record", "name", "wikipedia", "fields", fieldList); - Map arvoInputFormatMap = ImmutableMap.of("type", "avro_ocf", "schema", schema); - doIndexTest(arvoInputFormatMap); + doIndexTest(InputFormatDetails.AVRO, ImmutableMap.of("schema", schema)); } @Test - public void testIndexDataIngestionSpecWithSchema() throws Exception + public void testAvroInputFormatIndexDataIngestionSpecWithSchema() throws Exception { - Map arvoInputFormatMap = ImmutableMap.of("type", "avro_ocf"); - doIndexTest(arvoInputFormatMap); + doIndexTest(InputFormatDetails.AVRO); } - private void doIndexTest(Map arvoInputFormatMap) throws Exception + @Test + public void testJsonInputFormatIndexDataIngestionSpecWithSchema() throws Exception { - final String indexDatasource = "wikipedia_index_test_" + UUID.randomUUID(); - try ( - final Closeable ignored1 = unloader(indexDatasource + config.getExtraDatasourceNameSuffix()); - ) { - final Function sqlInputSourcePropsTransform = spec -> { - try { - spec = StringUtils.replace( - spec, - "%%PARTITIONS_SPEC%%", - jsonMapper.writeValueAsString(new DynamicPartitionsSpec(null, null)) - ); - spec = StringUtils.replace( - spec, - "%%INPUT_SOURCE_FILTER%%", - "*.avro" - ); - spec = StringUtils.replace( - spec, - "%%INPUT_SOURCE_BASE_DIR%%", - "/resources/data/batch_index/avro" - ); - spec = StringUtils.replace( - spec, - "%%AVRO_INPUT_FORMAT%%", - jsonMapper.writeValueAsString(arvoInputFormatMap) - ); - return spec; - } - catch (Exception e) { - throw new RuntimeException(e); - } - }; + doIndexTest(InputFormatDetails.JSON); + } - doIndexTest( - indexDatasource, - INDEX_TASK, - sqlInputSourcePropsTransform, - INDEX_QUERIES_RESOURCE, - false, - true, - true - ); - } + @Test + public void testTsvInputFormatIndexDataIngestionSpecWithSchema() throws Exception + { + doIndexTest(InputFormatDetails.TSV, ImmutableMap.of("findColumnsFromHeader", true)); + } + + @Test + public void testParquetInputFormatIndexDataIngestionSpecWithSchema() throws Exception + { + doIndexTest(InputFormatDetails.PARQUET); + } + + @Test + public void testOrcInputFormatIndexDataIngestionSpecWithSchema() throws Exception + { + doIndexTest(InputFormatDetails.ORC); + } + + @Test + public void testCsvInputFormatIndexDataIngestionSpecWithSchema() throws Exception + { + doIndexTest(InputFormatDetails.CSV, ImmutableMap.of("findColumnsFromHeader", true)); } } diff --git a/integration-tests/src/test/resources/data/batch_index/tsv/batch_hadoop.data b/integration-tests/src/test/resources/data/batch_index/hadoop_tsv/batch_hadoop.data similarity index 100% rename from integration-tests/src/test/resources/data/batch_index/tsv/batch_hadoop.data rename to integration-tests/src/test/resources/data/batch_index/hadoop_tsv/batch_hadoop.data diff --git a/integration-tests/src/test/resources/hadoop/batch_hadoop_indexer.json b/integration-tests/src/test/resources/hadoop/batch_hadoop_indexer.json index 465a22393793..a6710db6e27b 100644 --- a/integration-tests/src/test/resources/hadoop/batch_hadoop_indexer.json +++ b/integration-tests/src/test/resources/hadoop/batch_hadoop_indexer.json @@ -53,7 +53,7 @@ "type": "hadoop", "inputSpec": { "type": "static", - "paths": "/batch_index/tsv/batch_hadoop.data" + "paths": "/batch_index/hadoop_tsv/batch_hadoop.data" } }, "tuningConfig": { diff --git a/integration-tests/src/test/resources/indexer/wikipedia_local_input_source_index_task.json b/integration-tests/src/test/resources/indexer/wikipedia_local_input_source_index_task.json index 8ab615b2cc22..e1d47571fc2d 100644 --- a/integration-tests/src/test/resources/indexer/wikipedia_local_input_source_index_task.json +++ b/integration-tests/src/test/resources/indexer/wikipedia_local_input_source_index_task.json @@ -71,7 +71,7 @@ "filter" : "%%INPUT_SOURCE_FILTER%%", "baseDir": "%%INPUT_SOURCE_BASE_DIR%%" }, - "inputFormat": %%AVRO_INPUT_FORMAT%% + "inputFormat": %%INPUT_FORMAT%% }, "tuningConfig": { "type": "index_parallel", From aaf60b92201029c31b229a385225069ff293eef9 Mon Sep 17 00:00:00 2001 From: Maytas Monsereenusorn Date: Fri, 26 Jun 2020 23:16:45 -1000 Subject: [PATCH 04/15] fix bug --- .../DoublesSketchComplexMetricSerde.java | 4 +- .../DoublesSketchComplexMetricSerdeTest.java | 41 ++++++++++++++++++- .../batch_index/csv/wikipedia_index_data1.csv | 4 ++ .../batch_index/csv/wikipedia_index_data2.csv | 4 ++ .../batch_index/csv/wikipedia_index_data3.csv | 5 +++ .../batch_index/tsv/wikipedia_index_data1.tsv | 4 ++ .../batch_index/tsv/wikipedia_index_data2.tsv | 4 ++ .../batch_index/tsv/wikipedia_index_data3.tsv | 5 +++ 8 files changed, 68 insertions(+), 3 deletions(-) create mode 100644 integration-tests/src/test/resources/data/batch_index/csv/wikipedia_index_data1.csv create mode 100644 integration-tests/src/test/resources/data/batch_index/csv/wikipedia_index_data2.csv create mode 100644 integration-tests/src/test/resources/data/batch_index/csv/wikipedia_index_data3.csv create mode 100644 integration-tests/src/test/resources/data/batch_index/tsv/wikipedia_index_data1.tsv create mode 100644 integration-tests/src/test/resources/data/batch_index/tsv/wikipedia_index_data2.tsv create mode 100644 integration-tests/src/test/resources/data/batch_index/tsv/wikipedia_index_data3.tsv diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/quantiles/DoublesSketchComplexMetricSerde.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/quantiles/DoublesSketchComplexMetricSerde.java index 67424385f64c..f669d4c7ffbf 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/quantiles/DoublesSketchComplexMetricSerde.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/quantiles/DoublesSketchComplexMetricSerde.java @@ -19,7 +19,7 @@ package org.apache.druid.query.aggregation.datasketches.quantiles; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.math.NumberUtils; import org.apache.datasketches.memory.Memory; import org.apache.datasketches.quantiles.DoublesSketch; import org.apache.datasketches.quantiles.UpdateDoublesSketch; @@ -78,7 +78,7 @@ public Object extractValue(final InputRow inputRow, final String metricName) // This corresponds to "A" in base64, so it is not a digit if (objectString.isEmpty()) { return DoublesSketchOperations.EMPTY_SKETCH; - } else if (StringUtils.isNumeric(objectString)) { + } else if (NumberUtils.isParsable(objectString)) { try { double doubleValue = Double.parseDouble(objectString); UpdateDoublesSketch sketch = DoublesSketch.builder().setK(MIN_K).build(); diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/quantiles/DoublesSketchComplexMetricSerdeTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/quantiles/DoublesSketchComplexMetricSerdeTest.java index 5280fbd441f0..e198c7704257 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/quantiles/DoublesSketchComplexMetricSerdeTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/quantiles/DoublesSketchComplexMetricSerdeTest.java @@ -42,7 +42,33 @@ public void testExtractorOnEmptyString() } @Test - public void testExtractorOnNumber() + public void testExtractorOnPositiveNumber() + { + final DoublesSketchComplexMetricSerde serde = new DoublesSketchComplexMetricSerde(); + final ComplexMetricExtractor extractor = serde.getExtractor(); + final DoublesSketch sketch = (DoublesSketch) extractor.extractValue( + new MapBasedInputRow(0L, ImmutableList.of(), ImmutableMap.of("foo", "777")), + "foo" + ); + Assert.assertEquals(1, sketch.getRetainedItems()); + Assert.assertEquals(777d, sketch.getMaxValue(), 0.01d); + } + + @Test + public void testExtractorOnNegativeNumber() + { + final DoublesSketchComplexMetricSerde serde = new DoublesSketchComplexMetricSerde(); + final ComplexMetricExtractor extractor = serde.getExtractor(); + final DoublesSketch sketch = (DoublesSketch) extractor.extractValue( + new MapBasedInputRow(0L, ImmutableList.of(), ImmutableMap.of("foo", "-133")), + "foo" + ); + Assert.assertEquals(1, sketch.getRetainedItems()); + Assert.assertEquals(-133d, sketch.getMaxValue(), 0.01d); + } + + @Test + public void testExtractorOnDecimalNumber() { final DoublesSketchComplexMetricSerde serde = new DoublesSketchComplexMetricSerde(); final ComplexMetricExtractor extractor = serde.getExtractor(); @@ -53,4 +79,17 @@ public void testExtractorOnNumber() Assert.assertEquals(1, sketch.getRetainedItems()); Assert.assertEquals(3.1d, sketch.getMaxValue(), 0.01d); } + + @Test + public void testExtractorOnLeadingDecimalNumber() + { + final DoublesSketchComplexMetricSerde serde = new DoublesSketchComplexMetricSerde(); + final ComplexMetricExtractor extractor = serde.getExtractor(); + final DoublesSketch sketch = (DoublesSketch) extractor.extractValue( + new MapBasedInputRow(0L, ImmutableList.of(), ImmutableMap.of("foo", ".1")), + "foo" + ); + Assert.assertEquals(1, sketch.getRetainedItems()); + Assert.assertEquals(0.1d, sketch.getMaxValue(), 0.01d); + } } diff --git a/integration-tests/src/test/resources/data/batch_index/csv/wikipedia_index_data1.csv b/integration-tests/src/test/resources/data/batch_index/csv/wikipedia_index_data1.csv new file mode 100644 index 000000000000..c2e2b0714d8d --- /dev/null +++ b/integration-tests/src/test/resources/data/batch_index/csv/wikipedia_index_data1.csv @@ -0,0 +1,4 @@ +timestamp,page,language,user,unpatrolled,newPage,robot,anonymous,namespace,continent,country,region,city,added,deleted,delta +2013-08-31T01:02:33Z,Gypsy Danger,en,nuclear,TRUE,TRUE,FALSE,FALSE,article,North America,United States,Bay Area,San Francisco,57,200,-143 +2013-08-31T03:32:45Z,Striker Eureka,en,speed,FALSE,TRUE,TRUE,FALSE,wikipedia,Australia,Australia,Cantebury,Syndey,459,129,330 +2013-08-31T07:11:21Z,Cherno Alpha,ru,masterYi,FALSE,TRUE,TRUE,FALSE,article,Asia,Russia,Oblast,Moscow,123,12,111 diff --git a/integration-tests/src/test/resources/data/batch_index/csv/wikipedia_index_data2.csv b/integration-tests/src/test/resources/data/batch_index/csv/wikipedia_index_data2.csv new file mode 100644 index 000000000000..cbed2815db9c --- /dev/null +++ b/integration-tests/src/test/resources/data/batch_index/csv/wikipedia_index_data2.csv @@ -0,0 +1,4 @@ +timestamp,page,language,user,unpatrolled,newPage,robot,anonymous,namespace,continent,country,region,city,added,deleted,delta +2013-08-31T11:58:39Z,Crimson Typhoon,zh,triplets,TRUE,FALSE,TRUE,FALSE,wikipedia,Asia,China,Shanxi,Taiyuan,905,5,900 +2013-08-31T12:41:27Z,Coyote Tango,ja,stringer,TRUE,FALSE,TRUE,FALSE,wikipedia,Asia,Japan,Kanto,Tokyo,1,10,-9 +2013-09-01T01:02:33Z,Gypsy Danger,en,nuclear,TRUE,TRUE,FALSE,FALSE,article,North America,United States,Bay Area,San Francisco,57,200,-143 diff --git a/integration-tests/src/test/resources/data/batch_index/csv/wikipedia_index_data3.csv b/integration-tests/src/test/resources/data/batch_index/csv/wikipedia_index_data3.csv new file mode 100644 index 000000000000..51d6d21d0d20 --- /dev/null +++ b/integration-tests/src/test/resources/data/batch_index/csv/wikipedia_index_data3.csv @@ -0,0 +1,5 @@ +timestamp,page,language,user,unpatrolled,newPage,robot,anonymous,namespace,continent,country,region,city,added,deleted,delta +2013-09-01T03:32:45Z,Striker Eureka,en,speed,FALSE,TRUE,TRUE,FALSE,wikipedia,Australia,Australia,Cantebury,Syndey,459,129,330 +2013-09-01T07:11:21Z,Cherno Alpha,ru,masterYi,FALSE,TRUE,TRUE,FALSE,article,Asia,Russia,Oblast,Moscow,123,12,111 +2013-09-01T11:58:39Z,Crimson Typhoon,zh,triplets,TRUE,FALSE,TRUE,FALSE,wikipedia,Asia,China,Shanxi,Taiyuan,905,5,900 +2013-09-01T12:41:27Z,Coyote Tango,ja,stringer,TRUE,FALSE,TRUE,FALSE,wikipedia,Asia,Japan,Kanto,Tokyo,1,10,-9 diff --git a/integration-tests/src/test/resources/data/batch_index/tsv/wikipedia_index_data1.tsv b/integration-tests/src/test/resources/data/batch_index/tsv/wikipedia_index_data1.tsv new file mode 100644 index 000000000000..7302aa96d124 --- /dev/null +++ b/integration-tests/src/test/resources/data/batch_index/tsv/wikipedia_index_data1.tsv @@ -0,0 +1,4 @@ +timestamp page language user unpatrolled newPage robot anonymous namespace continent country region city added deleted delta +2013-08-31T01:02:33Z Gypsy Danger en nuclear true true false false article North America United States Bay Area San Francisco 57 200 -143 +2013-08-31T03:32:45Z Striker Eureka en speed false true true false wikipedia Australia Australia Cantebury Syndey 459 129 330 +2013-08-31T07:11:21Z Cherno Alpha ru masterYi false true true false article Asia Russia Oblast Moscow 123 12 111 \ No newline at end of file diff --git a/integration-tests/src/test/resources/data/batch_index/tsv/wikipedia_index_data2.tsv b/integration-tests/src/test/resources/data/batch_index/tsv/wikipedia_index_data2.tsv new file mode 100644 index 000000000000..967afe50db79 --- /dev/null +++ b/integration-tests/src/test/resources/data/batch_index/tsv/wikipedia_index_data2.tsv @@ -0,0 +1,4 @@ +timestamp page language user unpatrolled newPage robot anonymous namespace continent country region city added deleted delta +2013-08-31T11:58:39Z Crimson Typhoon zh triplets true false true false wikipedia Asia China Shanxi Taiyuan 905 5 900 +2013-08-31T12:41:27Z Coyote Tango ja stringer true false true false wikipedia Asia Japan Kanto Tokyo 1 10 -9 +2013-09-01T01:02:33Z Gypsy Danger en nuclear true true false false article North America United States Bay Area San Francisco 57 200 -143 \ No newline at end of file diff --git a/integration-tests/src/test/resources/data/batch_index/tsv/wikipedia_index_data3.tsv b/integration-tests/src/test/resources/data/batch_index/tsv/wikipedia_index_data3.tsv new file mode 100644 index 000000000000..ed26c950543c --- /dev/null +++ b/integration-tests/src/test/resources/data/batch_index/tsv/wikipedia_index_data3.tsv @@ -0,0 +1,5 @@ +timestamp page language user unpatrolled newPage robot anonymous namespace continent country region city added deleted delta +2013-09-01T03:32:45Z Striker Eureka en speed false true true false wikipedia Australia Australia Cantebury Syndey 459 129 330 +2013-09-01T07:11:21Z Cherno Alpha ru masterYi false true true false article Asia Russia Oblast Moscow 123 12 111 +2013-09-01T11:58:39Z Crimson Typhoon zh triplets true false true false wikipedia Asia China Shanxi Taiyuan 905 5 900 +2013-09-01T12:41:27Z Coyote Tango ja stringer true false true false wikipedia Asia Japan Kanto Tokyo 1 10 -9 \ No newline at end of file From 4b5d37790bc0859c926c97ee1f5b9c1e08335a61 Mon Sep 17 00:00:00 2001 From: Maytas Monsereenusorn Date: Fri, 26 Jun 2020 23:19:52 -1000 Subject: [PATCH 05/15] fix bug --- .../indexer/wikipedia_local_input_source_index_task.json | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/integration-tests/src/test/resources/indexer/wikipedia_local_input_source_index_task.json b/integration-tests/src/test/resources/indexer/wikipedia_local_input_source_index_task.json index e1d47571fc2d..67650f8e1be5 100644 --- a/integration-tests/src/test/resources/indexer/wikipedia_local_input_source_index_task.json +++ b/integration-tests/src/test/resources/indexer/wikipedia_local_input_source_index_task.json @@ -76,6 +76,10 @@ "tuningConfig": { "type": "index_parallel", "maxNumConcurrentSubTasks": 10, + "splitHintSpec": { + "type": "maxSize", + "maxSplitSize": 1 + }, "partitionsSpec": %%PARTITIONS_SPEC%% } } From 61ff5f2cfbfd13f625eab7c1a703eddaa4a09851 Mon Sep 17 00:00:00 2001 From: Maytas Monsereenusorn Date: Fri, 26 Jun 2020 23:50:59 -1000 Subject: [PATCH 06/15] fix failing tests --- .../resources/data/batch_index/tsv/wikipedia_index_data1.tsv | 2 +- .../resources/data/batch_index/tsv/wikipedia_index_data2.tsv | 2 +- .../resources/data/batch_index/tsv/wikipedia_index_data3.tsv | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/integration-tests/src/test/resources/data/batch_index/tsv/wikipedia_index_data1.tsv b/integration-tests/src/test/resources/data/batch_index/tsv/wikipedia_index_data1.tsv index 7302aa96d124..7e7654557273 100644 --- a/integration-tests/src/test/resources/data/batch_index/tsv/wikipedia_index_data1.tsv +++ b/integration-tests/src/test/resources/data/batch_index/tsv/wikipedia_index_data1.tsv @@ -1,4 +1,4 @@ timestamp page language user unpatrolled newPage robot anonymous namespace continent country region city added deleted delta 2013-08-31T01:02:33Z Gypsy Danger en nuclear true true false false article North America United States Bay Area San Francisco 57 200 -143 2013-08-31T03:32:45Z Striker Eureka en speed false true true false wikipedia Australia Australia Cantebury Syndey 459 129 330 -2013-08-31T07:11:21Z Cherno Alpha ru masterYi false true true false article Asia Russia Oblast Moscow 123 12 111 \ No newline at end of file +2013-08-31T07:11:21Z Cherno Alpha ru masterYi false true true false article Asia Russia Oblast Moscow 123 12 111 diff --git a/integration-tests/src/test/resources/data/batch_index/tsv/wikipedia_index_data2.tsv b/integration-tests/src/test/resources/data/batch_index/tsv/wikipedia_index_data2.tsv index 967afe50db79..28159ce4e355 100644 --- a/integration-tests/src/test/resources/data/batch_index/tsv/wikipedia_index_data2.tsv +++ b/integration-tests/src/test/resources/data/batch_index/tsv/wikipedia_index_data2.tsv @@ -1,4 +1,4 @@ timestamp page language user unpatrolled newPage robot anonymous namespace continent country region city added deleted delta 2013-08-31T11:58:39Z Crimson Typhoon zh triplets true false true false wikipedia Asia China Shanxi Taiyuan 905 5 900 2013-08-31T12:41:27Z Coyote Tango ja stringer true false true false wikipedia Asia Japan Kanto Tokyo 1 10 -9 -2013-09-01T01:02:33Z Gypsy Danger en nuclear true true false false article North America United States Bay Area San Francisco 57 200 -143 \ No newline at end of file +2013-09-01T01:02:33Z Gypsy Danger en nuclear true true false false article North America United States Bay Area San Francisco 57 200 -143 diff --git a/integration-tests/src/test/resources/data/batch_index/tsv/wikipedia_index_data3.tsv b/integration-tests/src/test/resources/data/batch_index/tsv/wikipedia_index_data3.tsv index ed26c950543c..6e0c5a28a18b 100644 --- a/integration-tests/src/test/resources/data/batch_index/tsv/wikipedia_index_data3.tsv +++ b/integration-tests/src/test/resources/data/batch_index/tsv/wikipedia_index_data3.tsv @@ -2,4 +2,4 @@ 2013-09-01T03:32:45Z Striker Eureka en speed false true true false wikipedia Australia Australia Cantebury Syndey 459 129 330 2013-09-01T07:11:21Z Cherno Alpha ru masterYi false true true false article Asia Russia Oblast Moscow 123 12 111 2013-09-01T11:58:39Z Crimson Typhoon zh triplets true false true false wikipedia Asia China Shanxi Taiyuan 905 5 900 -2013-09-01T12:41:27Z Coyote Tango ja stringer true false true false wikipedia Asia Japan Kanto Tokyo 1 10 -9 \ No newline at end of file +2013-09-01T12:41:27Z Coyote Tango ja stringer true false true false wikipedia Asia Japan Kanto Tokyo 1 10 -9 From cb12d6c5ec50d487865716757183ad05b71020a9 Mon Sep 17 00:00:00 2001 From: Maytas Monsereenusorn Date: Fri, 26 Jun 2020 23:59:49 -1000 Subject: [PATCH 07/15] add comments --- integration-tests/script/copy_resources.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/integration-tests/script/copy_resources.sh b/integration-tests/script/copy_resources.sh index cb8055981ea0..0324a69781da 100755 --- a/integration-tests/script/copy_resources.sh +++ b/integration-tests/script/copy_resources.sh @@ -49,9 +49,11 @@ mv $SHARED_DIR/docker/lib/druid-hdfs-storage-* $SHARED_DIR/docker/extensions/dru mkdir -p $SHARED_DIR/docker/extensions/druid-kinesis-indexing-service mv $SHARED_DIR/docker/lib/druid-kinesis-indexing-service-* $SHARED_DIR/docker/extensions/druid-kinesis-indexing-service # For druid-parquet-extensions +# Using cp so that this extensions is included when running Druid without loadList and as a option for the loadList mkdir -p $SHARED_DIR/docker/extensions/druid-parquet-extensions cp $SHARED_DIR/docker/lib/druid-parquet-extensions-* $SHARED_DIR/docker/extensions/druid-parquet-extensions # For druid-orc-extensions +# Using cp so that this extensions is included when running Druid without loadList and as a option for the loadList mkdir -p $SHARED_DIR/docker/extensions/druid-orc-extensions cp $SHARED_DIR/docker/lib/druid-orc-extensions-* $SHARED_DIR/docker/extensions/druid-orc-extensions From cf2ccd1ffa9cceb63ad8862adbd71678f7958868 Mon Sep 17 00:00:00 2001 From: Maytas Monsereenusorn Date: Sat, 27 Jun 2020 11:01:01 -1000 Subject: [PATCH 08/15] address comments --- .travis.yml | 17 +++++++++++++++-- extensions-core/datasketches/pom.xml | 4 ++++ .../org/apache/druid/tests/TestNGGroup.java | 2 ++ .../ITLocalInputSourceAllInputFormatTest.java | 6 +++--- 4 files changed, 24 insertions(+), 5 deletions(-) diff --git a/.travis.yml b/.travis.yml index 9b3c755107c6..9baa26d7a99f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -313,6 +313,14 @@ jobs: docker exec -it druid-$v sh -c 'dmesg | tail -3' ; done + - &integration_input_format + name: "(Compile=openjdk8, Run=openjdk8) input format integration test" + jdk: openjdk8 + services: *integration_test_services + env: TESTNG_GROUPS='-Dgroups=input-format' JVM_RUNTIME='-Djvm.runtime=8' + script: *run_integration_test + after_failure: *integration_test_diags + - &integration_perfect_rollup_parallel_batch_index name: "(Compile=openjdk8, Run=openjdk8) perfect rollup parallel batch index integration test" jdk: openjdk8 @@ -389,7 +397,7 @@ jobs: name: "(Compile=openjdk8, Run=openjdk8) other integration test" jdk: openjdk8 services: *integration_test_services - env: TESTNG_GROUPS='-DexcludedGroups=batch-index,perfect-rollup-parallel-batch-index,kafka-index,query,realtime-index,security,s3-deep-storage,gcs-deep-storage,azure-deep-storage,hdfs-deep-storage,s3-ingestion,kinesis-index,kinesis-data-format,kafka-transactional-index,kafka-index-slow,kafka-transactional-index-slow,kafka-data-format,hadoop-s3-to-s3-deep-storage,hadoop-s3-to-hdfs-deep-storage,hadoop-azure-to-azure-deep-storage,hadoop-azure-to-hdfs-deep-storage,hadoop-gcs-to-gcs-deep-storage,hadoop-gcs-to-hdfs-deep-storage' JVM_RUNTIME='-Djvm.runtime=8' + env: TESTNG_GROUPS='-DexcludedGroups=batch-index,input-format,perfect-rollup-parallel-batch-index,kafka-index,query,realtime-index,security,s3-deep-storage,gcs-deep-storage,azure-deep-storage,hdfs-deep-storage,s3-ingestion,kinesis-index,kinesis-data-format,kafka-transactional-index,kafka-index-slow,kafka-transactional-index-slow,kafka-data-format,hadoop-s3-to-s3-deep-storage,hadoop-s3-to-hdfs-deep-storage,hadoop-azure-to-azure-deep-storage,hadoop-azure-to-hdfs-deep-storage,hadoop-gcs-to-gcs-deep-storage,hadoop-gcs-to-hdfs-deep-storage' JVM_RUNTIME='-Djvm.runtime=8' script: *run_integration_test after_failure: *integration_test_diags # END - Integration tests for Compile with Java 8 and Run with Java 8 @@ -400,6 +408,11 @@ jobs: jdk: openjdk8 env: TESTNG_GROUPS='-Dgroups=batch-index' JVM_RUNTIME='-Djvm.runtime=11' + - <<: *integration_input_format + name: "(Compile=openjdk8, Run=openjdk11) input format integration test" + jdk: openjdk8 + env: TESTNG_GROUPS='-Dgroups=input-format' JVM_RUNTIME='-Djvm.runtime=11' + - <<: *integration_perfect_rollup_parallel_batch_index name: "(Compile=openjdk8, Run=openjdk11) perfect rollup parallel batch index integration test" jdk: openjdk8 @@ -423,7 +436,7 @@ jobs: - <<: *integration_tests name: "(Compile=openjdk8, Run=openjdk11) other integration test" jdk: openjdk8 - env: TESTNG_GROUPS='-DexcludedGroups=batch-index,perfect-rollup-parallel-batch-index,kafka-index,query,realtime-index,security,s3-deep-storage,gcs-deep-storage,azure-deep-storage,hdfs-deep-storage,s3-ingestion,kinesis-index,kinesis-data-format,kafka-transactional-index,kafka-index-slow,kafka-transactional-index-slow,kafka-data-format,hadoop-s3-to-s3-deep-storage,hadoop-s3-to-hdfs-deep-storage,hadoop-azure-to-azure-deep-storage,hadoop-azure-to-hdfs-deep-storage,hadoop-gcs-to-gcs-deep-storage,hadoop-gcs-to-hdfs-deep-storage' JVM_RUNTIME='-Djvm.runtime=11' + env: TESTNG_GROUPS='-DexcludedGroups=batch-index,input-format,perfect-rollup-parallel-batch-index,kafka-index,query,realtime-index,security,s3-deep-storage,gcs-deep-storage,azure-deep-storage,hdfs-deep-storage,s3-ingestion,kinesis-index,kinesis-data-format,kafka-transactional-index,kafka-index-slow,kafka-transactional-index-slow,kafka-data-format,hadoop-s3-to-s3-deep-storage,hadoop-s3-to-hdfs-deep-storage,hadoop-azure-to-azure-deep-storage,hadoop-azure-to-hdfs-deep-storage,hadoop-gcs-to-gcs-deep-storage,hadoop-gcs-to-hdfs-deep-storage' JVM_RUNTIME='-Djvm.runtime=11' # END - Integration tests for Compile with Java 8 and Run with Java 11 - name: "security vulnerabilities" diff --git a/extensions-core/datasketches/pom.xml b/extensions-core/datasketches/pom.xml index 4b969818c290..2f82ec8c2e25 100644 --- a/extensions-core/datasketches/pom.xml +++ b/extensions-core/datasketches/pom.xml @@ -60,6 +60,10 @@ org.apache.commons commons-math3 + + org.apache.commons + commons-lang3 + org.apache.druid druid-core diff --git a/integration-tests/src/test/java/org/apache/druid/tests/TestNGGroup.java b/integration-tests/src/test/java/org/apache/druid/tests/TestNGGroup.java index 58a3a1b07a1d..468a785c5bdc 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/TestNGGroup.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/TestNGGroup.java @@ -27,6 +27,8 @@ public class TestNGGroup { public static final String BATCH_INDEX = "batch-index"; + public static final String INPUT_FORMAT = "input-format"; + public static final String KAFKA_INDEX = "kafka-index"; public static final String KAFKA_INDEX_SLOW = "kafka-index-slow"; diff --git a/integration-tests/src/test/java/org/apache/druid/tests/parallelized/ITLocalInputSourceAllInputFormatTest.java b/integration-tests/src/test/java/org/apache/druid/tests/parallelized/ITLocalInputSourceAllInputFormatTest.java index 9cb8d09c1061..f510fe1bd840 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/parallelized/ITLocalInputSourceAllInputFormatTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/parallelized/ITLocalInputSourceAllInputFormatTest.java @@ -30,12 +30,12 @@ import java.util.List; import java.util.Map; -@Test(groups = TestNGGroup.BATCH_INDEX) +@Test(groups = TestNGGroup.INPUT_FORMAT) @Guice(moduleFactory = DruidTestModuleFactory.class) public class ITLocalInputSourceAllInputFormatTest extends AbstractLocalInputSourceParallelIndexTest { @Test - public void testAvroInputFormatIndexDataIngestionSpecWithoutSchema() throws Exception + public void testAvroInputFormatIndexDataIngestionSpecWithSchema() throws Exception { List fieldList = ImmutableList.of( ImmutableMap.of("name", "timestamp", "type", "string"), @@ -63,7 +63,7 @@ public void testAvroInputFormatIndexDataIngestionSpecWithoutSchema() throws Exce } @Test - public void testAvroInputFormatIndexDataIngestionSpecWithSchema() throws Exception + public void testAvroInputFormatIndexDataIngestionSpecWithoutSchema() throws Exception { doIndexTest(InputFormatDetails.AVRO); } From c1a80b03a6c19e64b0465c2179aceb3157abbceb Mon Sep 17 00:00:00 2001 From: Maytas Monsereenusorn Date: Mon, 29 Jun 2020 14:09:08 -0700 Subject: [PATCH 09/15] address comments --- extensions-core/datasketches/pom.xml | 4 ---- .../DoublesSketchComplexMetricSerde.java | 18 ++++++------------ .../ITLocalInputSourceAllInputFormatTest.java | 2 +- 3 files changed, 7 insertions(+), 17 deletions(-) rename integration-tests/src/test/java/org/apache/druid/tests/{parallelized => indexer}/ITLocalInputSourceAllInputFormatTest.java (98%) diff --git a/extensions-core/datasketches/pom.xml b/extensions-core/datasketches/pom.xml index 2f82ec8c2e25..4b969818c290 100644 --- a/extensions-core/datasketches/pom.xml +++ b/extensions-core/datasketches/pom.xml @@ -60,10 +60,6 @@ org.apache.commons commons-math3 - - org.apache.commons - commons-lang3 - org.apache.druid druid-core diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/quantiles/DoublesSketchComplexMetricSerde.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/quantiles/DoublesSketchComplexMetricSerde.java index f669d4c7ffbf..d97b5f8c6dd3 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/quantiles/DoublesSketchComplexMetricSerde.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/quantiles/DoublesSketchComplexMetricSerde.java @@ -19,12 +19,11 @@ package org.apache.druid.query.aggregation.datasketches.quantiles; -import org.apache.commons.lang3.math.NumberUtils; +import com.google.common.primitives.Doubles; import org.apache.datasketches.memory.Memory; import org.apache.datasketches.quantiles.DoublesSketch; import org.apache.datasketches.quantiles.UpdateDoublesSketch; import org.apache.druid.data.input.InputRow; -import org.apache.druid.java.util.common.IAE; import org.apache.druid.segment.GenericColumnSerializer; import org.apache.druid.segment.column.ColumnBuilder; import org.apache.druid.segment.data.GenericIndexed; @@ -76,18 +75,13 @@ public Object extractValue(final InputRow inputRow, final String metricName) // Autodetection of the input format: empty string, number, or base64 encoded sketch // A serialized DoublesSketch, as currently implemented, always has 0 in the first 6 bits. // This corresponds to "A" in base64, so it is not a digit + final Double doubleValue; if (objectString.isEmpty()) { return DoublesSketchOperations.EMPTY_SKETCH; - } else if (NumberUtils.isParsable(objectString)) { - try { - double doubleValue = Double.parseDouble(objectString); - UpdateDoublesSketch sketch = DoublesSketch.builder().setK(MIN_K).build(); - sketch.update(doubleValue); - return sketch; - } - catch (NumberFormatException e) { - throw new IAE("Expected a string with a number, received value " + objectString); - } + } else if ((doubleValue = Doubles.tryParse(objectString)) != null) { + UpdateDoublesSketch sketch = DoublesSketch.builder().setK(MIN_K).build(); + sketch.update(doubleValue); + return sketch; } } else if (object instanceof Number) { // this is for reindexing UpdateDoublesSketch sketch = DoublesSketch.builder().setK(MIN_K).build(); diff --git a/integration-tests/src/test/java/org/apache/druid/tests/parallelized/ITLocalInputSourceAllInputFormatTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITLocalInputSourceAllInputFormatTest.java similarity index 98% rename from integration-tests/src/test/java/org/apache/druid/tests/parallelized/ITLocalInputSourceAllInputFormatTest.java rename to integration-tests/src/test/java/org/apache/druid/tests/indexer/ITLocalInputSourceAllInputFormatTest.java index f510fe1bd840..6d2bcfdafc0e 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/parallelized/ITLocalInputSourceAllInputFormatTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITLocalInputSourceAllInputFormatTest.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.druid.tests.parallelized; +package org.apache.druid.tests.indexer; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; From 69e22a795df721e4f71e22e7386c3b8dff395931 Mon Sep 17 00:00:00 2001 From: Maytas Monsereenusorn Date: Mon, 29 Jun 2020 14:14:51 -0700 Subject: [PATCH 10/15] address comments --- .../tests/indexer/ITLocalInputSourceAllInputFormatTest.java | 1 - 1 file changed, 1 deletion(-) diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITLocalInputSourceAllInputFormatTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITLocalInputSourceAllInputFormatTest.java index 6d2bcfdafc0e..a0c1014105a9 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITLocalInputSourceAllInputFormatTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITLocalInputSourceAllInputFormatTest.java @@ -23,7 +23,6 @@ import com.google.common.collect.ImmutableMap; import org.apache.druid.testing.guice.DruidTestModuleFactory; import org.apache.druid.tests.TestNGGroup; -import org.apache.druid.tests.indexer.AbstractLocalInputSourceParallelIndexTest; import org.testng.annotations.Guice; import org.testng.annotations.Test; From ca475d1e1ef4887a1915756cbe3859181e225029 Mon Sep 17 00:00:00 2001 From: Maytas Monsereenusorn Date: Mon, 29 Jun 2020 14:46:19 -0700 Subject: [PATCH 11/15] fix test data --- .../resources/data/batch_index/tsv/wikipedia_index_data1.tsv | 2 +- .../resources/data/batch_index/tsv/wikipedia_index_data2.tsv | 4 ++-- .../resources/data/batch_index/tsv/wikipedia_index_data3.tsv | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/integration-tests/src/test/resources/data/batch_index/tsv/wikipedia_index_data1.tsv b/integration-tests/src/test/resources/data/batch_index/tsv/wikipedia_index_data1.tsv index 7e7654557273..b13d9adca1fc 100644 --- a/integration-tests/src/test/resources/data/batch_index/tsv/wikipedia_index_data1.tsv +++ b/integration-tests/src/test/resources/data/batch_index/tsv/wikipedia_index_data1.tsv @@ -1,4 +1,4 @@ -timestamp page language user unpatrolled newPage robot anonymous namespace continent country region city added deleted delta +timestamp page language user unpatrolled newPage robot anonymous namespace continent country region city added deleted delta 2013-08-31T01:02:33Z Gypsy Danger en nuclear true true false false article North America United States Bay Area San Francisco 57 200 -143 2013-08-31T03:32:45Z Striker Eureka en speed false true true false wikipedia Australia Australia Cantebury Syndey 459 129 330 2013-08-31T07:11:21Z Cherno Alpha ru masterYi false true true false article Asia Russia Oblast Moscow 123 12 111 diff --git a/integration-tests/src/test/resources/data/batch_index/tsv/wikipedia_index_data2.tsv b/integration-tests/src/test/resources/data/batch_index/tsv/wikipedia_index_data2.tsv index 28159ce4e355..e271291911c2 100644 --- a/integration-tests/src/test/resources/data/batch_index/tsv/wikipedia_index_data2.tsv +++ b/integration-tests/src/test/resources/data/batch_index/tsv/wikipedia_index_data2.tsv @@ -1,4 +1,4 @@ -timestamp page language user unpatrolled newPage robot anonymous namespace continent country region city added deleted delta +timestamp page language user unpatrolled newPage robot anonymous namespace continent country region city added deleted delta 2013-08-31T11:58:39Z Crimson Typhoon zh triplets true false true false wikipedia Asia China Shanxi Taiyuan 905 5 900 2013-08-31T12:41:27Z Coyote Tango ja stringer true false true false wikipedia Asia Japan Kanto Tokyo 1 10 -9 -2013-09-01T01:02:33Z Gypsy Danger en nuclear true true false false article North America United States Bay Area San Francisco 57 200 -143 +2013-09-01T01:02:33Z Gypsy Danger en nuclear true true false false article North America United States Bay Area San Francisco 57 200 -143 \ No newline at end of file diff --git a/integration-tests/src/test/resources/data/batch_index/tsv/wikipedia_index_data3.tsv b/integration-tests/src/test/resources/data/batch_index/tsv/wikipedia_index_data3.tsv index 6e0c5a28a18b..b91eae4fde86 100644 --- a/integration-tests/src/test/resources/data/batch_index/tsv/wikipedia_index_data3.tsv +++ b/integration-tests/src/test/resources/data/batch_index/tsv/wikipedia_index_data3.tsv @@ -1,5 +1,5 @@ -timestamp page language user unpatrolled newPage robot anonymous namespace continent country region city added deleted delta +timestamp page language user unpatrolled newPage robot anonymous namespace continent country region city added deleted delta 2013-09-01T03:32:45Z Striker Eureka en speed false true true false wikipedia Australia Australia Cantebury Syndey 459 129 330 2013-09-01T07:11:21Z Cherno Alpha ru masterYi false true true false article Asia Russia Oblast Moscow 123 12 111 2013-09-01T11:58:39Z Crimson Typhoon zh triplets true false true false wikipedia Asia China Shanxi Taiyuan 905 5 900 -2013-09-01T12:41:27Z Coyote Tango ja stringer true false true false wikipedia Asia Japan Kanto Tokyo 1 10 -9 +2013-09-01T12:41:27Z Coyote Tango ja stringer true false true false wikipedia Asia Japan Kanto Tokyo 1 10 -9 \ No newline at end of file From a93ee2fcdecaab33122aacb5c93a36fb6fb5a091 Mon Sep 17 00:00:00 2001 From: Maytas Monsereenusorn Date: Tue, 30 Jun 2020 12:39:41 -0700 Subject: [PATCH 12/15] reduce resource needed for IT --- integration-tests/docker/environment-configs/middlemanager | 3 +-- .../wikipedia_parallel_druid_input_source_index_task.json | 2 +- .../test/resources/indexer/wikipedia_parallel_index_task.json | 2 +- .../indexer/wikipedia_parallel_ingest_segment_index_task.json | 2 +- 4 files changed, 4 insertions(+), 5 deletions(-) diff --git a/integration-tests/docker/environment-configs/middlemanager b/integration-tests/docker/environment-configs/middlemanager index 0ca4dbcdcdf0..6f8485fd0ca1 100644 --- a/integration-tests/docker/environment-configs/middlemanager +++ b/integration-tests/docker/environment-configs/middlemanager @@ -31,10 +31,9 @@ druid_indexer_runner_javaOptsArray=["-server", "-Xmx256m", "-Xms256m", "-XX:NewS druid_indexer_fork_property_druid_processing_buffer_sizeBytes=25000000 druid_indexer_fork_property_druid_processing_numThreads=1 -druid_indexer_fork_server_http_numThreads=20 druid_selectors_indexing_serviceName=druid/overlord druid_indexer_task_chathandler_type=announce druid_auth_basic_common_cacheDirectory=/tmp/authCache/middleManager druid_startup_logging_logProperties=true druid_server_https_crlPath=/tls/revocations.crl -druid_worker_capacity=20 \ No newline at end of file +druid_worker_capacity=10 \ No newline at end of file diff --git a/integration-tests/src/test/resources/indexer/wikipedia_parallel_druid_input_source_index_task.json b/integration-tests/src/test/resources/indexer/wikipedia_parallel_druid_input_source_index_task.json index 91702a413574..84df16c4cb71 100644 --- a/integration-tests/src/test/resources/indexer/wikipedia_parallel_druid_input_source_index_task.json +++ b/integration-tests/src/test/resources/indexer/wikipedia_parallel_druid_input_source_index_task.json @@ -51,7 +51,7 @@ }, "tuningConfig": { "type": "index_parallel", - "maxNumConcurrentSubTasks": 10, + "maxNumConcurrentSubTasks": 4, "forceGuaranteedRollup": "%%FORCE_GUARANTEED_ROLLUP%%", "partitionsSpec": %%PARTITIONS_SPEC%%, "splitHintSpec": { diff --git a/integration-tests/src/test/resources/indexer/wikipedia_parallel_index_task.json b/integration-tests/src/test/resources/indexer/wikipedia_parallel_index_task.json index 4781d39b0249..23092690d7ad 100644 --- a/integration-tests/src/test/resources/indexer/wikipedia_parallel_index_task.json +++ b/integration-tests/src/test/resources/indexer/wikipedia_parallel_index_task.json @@ -64,7 +64,7 @@ }, "tuningConfig": { "type": "index_parallel", - "maxNumConcurrentSubTasks": 10, + "maxNumConcurrentSubTasks": 4, "forceGuaranteedRollup": "%%FORCE_GUARANTEED_ROLLUP%%", "splitHintSpec": { "type": "maxSize", diff --git a/integration-tests/src/test/resources/indexer/wikipedia_parallel_ingest_segment_index_task.json b/integration-tests/src/test/resources/indexer/wikipedia_parallel_ingest_segment_index_task.json index 87cb498f3c4b..87b9010c816c 100644 --- a/integration-tests/src/test/resources/indexer/wikipedia_parallel_ingest_segment_index_task.json +++ b/integration-tests/src/test/resources/indexer/wikipedia_parallel_ingest_segment_index_task.json @@ -57,7 +57,7 @@ }, "tuningConfig": { "type": "index_parallel", - "maxNumConcurrentSubTasks": 10, + "maxNumConcurrentSubTasks": 4, "forceGuaranteedRollup": "%%FORCE_GUARANTEED_ROLLUP%%", "splitHintSpec": { "type": "maxSize", From 72b78702bb28f2fb647fe2f58ecff2a419851b6d Mon Sep 17 00:00:00 2001 From: Maytas Monsereenusorn Date: Tue, 30 Jun 2020 17:36:34 -0700 Subject: [PATCH 13/15] remove bug fix --- .../DoublesSketchComplexMetricSerde.java | 17 +++++--- .../DoublesSketchComplexMetricSerdeTest.java | 41 +------------------ .../docker/environment-configs/middlemanager | 3 +- .../ITLocalInputSourceAllInputFormatTest.java | 7 +--- ...arallel_druid_input_source_index_task.json | 2 +- .../wikipedia_parallel_index_task.json | 2 +- ...ia_parallel_ingest_segment_index_task.json | 2 +- 7 files changed, 18 insertions(+), 56 deletions(-) diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/quantiles/DoublesSketchComplexMetricSerde.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/quantiles/DoublesSketchComplexMetricSerde.java index d97b5f8c6dd3..e2c922e88b48 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/quantiles/DoublesSketchComplexMetricSerde.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/quantiles/DoublesSketchComplexMetricSerde.java @@ -19,11 +19,11 @@ package org.apache.druid.query.aggregation.datasketches.quantiles; -import com.google.common.primitives.Doubles; import org.apache.datasketches.memory.Memory; import org.apache.datasketches.quantiles.DoublesSketch; import org.apache.datasketches.quantiles.UpdateDoublesSketch; import org.apache.druid.data.input.InputRow; +import org.apache.druid.java.util.common.IAE; import org.apache.druid.segment.GenericColumnSerializer; import org.apache.druid.segment.column.ColumnBuilder; import org.apache.druid.segment.data.GenericIndexed; @@ -75,13 +75,18 @@ public Object extractValue(final InputRow inputRow, final String metricName) // Autodetection of the input format: empty string, number, or base64 encoded sketch // A serialized DoublesSketch, as currently implemented, always has 0 in the first 6 bits. // This corresponds to "A" in base64, so it is not a digit - final Double doubleValue; if (objectString.isEmpty()) { return DoublesSketchOperations.EMPTY_SKETCH; - } else if ((doubleValue = Doubles.tryParse(objectString)) != null) { - UpdateDoublesSketch sketch = DoublesSketch.builder().setK(MIN_K).build(); - sketch.update(doubleValue); - return sketch; + } else if (Character.isDigit(objectString.charAt(0))) { + try { + double doubleValue = Double.parseDouble(objectString); + UpdateDoublesSketch sketch = DoublesSketch.builder().setK(MIN_K).build(); + sketch.update(doubleValue); + return sketch; + } + catch (NumberFormatException e) { + throw new IAE("Expected a string with a number, received value " + objectString); + } } } else if (object instanceof Number) { // this is for reindexing UpdateDoublesSketch sketch = DoublesSketch.builder().setK(MIN_K).build(); diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/quantiles/DoublesSketchComplexMetricSerdeTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/quantiles/DoublesSketchComplexMetricSerdeTest.java index e198c7704257..5280fbd441f0 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/quantiles/DoublesSketchComplexMetricSerdeTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/quantiles/DoublesSketchComplexMetricSerdeTest.java @@ -42,33 +42,7 @@ public void testExtractorOnEmptyString() } @Test - public void testExtractorOnPositiveNumber() - { - final DoublesSketchComplexMetricSerde serde = new DoublesSketchComplexMetricSerde(); - final ComplexMetricExtractor extractor = serde.getExtractor(); - final DoublesSketch sketch = (DoublesSketch) extractor.extractValue( - new MapBasedInputRow(0L, ImmutableList.of(), ImmutableMap.of("foo", "777")), - "foo" - ); - Assert.assertEquals(1, sketch.getRetainedItems()); - Assert.assertEquals(777d, sketch.getMaxValue(), 0.01d); - } - - @Test - public void testExtractorOnNegativeNumber() - { - final DoublesSketchComplexMetricSerde serde = new DoublesSketchComplexMetricSerde(); - final ComplexMetricExtractor extractor = serde.getExtractor(); - final DoublesSketch sketch = (DoublesSketch) extractor.extractValue( - new MapBasedInputRow(0L, ImmutableList.of(), ImmutableMap.of("foo", "-133")), - "foo" - ); - Assert.assertEquals(1, sketch.getRetainedItems()); - Assert.assertEquals(-133d, sketch.getMaxValue(), 0.01d); - } - - @Test - public void testExtractorOnDecimalNumber() + public void testExtractorOnNumber() { final DoublesSketchComplexMetricSerde serde = new DoublesSketchComplexMetricSerde(); final ComplexMetricExtractor extractor = serde.getExtractor(); @@ -79,17 +53,4 @@ public void testExtractorOnDecimalNumber() Assert.assertEquals(1, sketch.getRetainedItems()); Assert.assertEquals(3.1d, sketch.getMaxValue(), 0.01d); } - - @Test - public void testExtractorOnLeadingDecimalNumber() - { - final DoublesSketchComplexMetricSerde serde = new DoublesSketchComplexMetricSerde(); - final ComplexMetricExtractor extractor = serde.getExtractor(); - final DoublesSketch sketch = (DoublesSketch) extractor.extractValue( - new MapBasedInputRow(0L, ImmutableList.of(), ImmutableMap.of("foo", ".1")), - "foo" - ); - Assert.assertEquals(1, sketch.getRetainedItems()); - Assert.assertEquals(0.1d, sketch.getMaxValue(), 0.01d); - } } diff --git a/integration-tests/docker/environment-configs/middlemanager b/integration-tests/docker/environment-configs/middlemanager index 6f8485fd0ca1..0ca4dbcdcdf0 100644 --- a/integration-tests/docker/environment-configs/middlemanager +++ b/integration-tests/docker/environment-configs/middlemanager @@ -31,9 +31,10 @@ druid_indexer_runner_javaOptsArray=["-server", "-Xmx256m", "-Xms256m", "-XX:NewS druid_indexer_fork_property_druid_processing_buffer_sizeBytes=25000000 druid_indexer_fork_property_druid_processing_numThreads=1 +druid_indexer_fork_server_http_numThreads=20 druid_selectors_indexing_serviceName=druid/overlord druid_indexer_task_chathandler_type=announce druid_auth_basic_common_cacheDirectory=/tmp/authCache/middleManager druid_startup_logging_logProperties=true druid_server_https_crlPath=/tls/revocations.crl -druid_worker_capacity=10 \ No newline at end of file +druid_worker_capacity=20 \ No newline at end of file diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITLocalInputSourceAllInputFormatTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITLocalInputSourceAllInputFormatTest.java index a0c1014105a9..4a4720928f7d 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITLocalInputSourceAllInputFormatTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITLocalInputSourceAllInputFormatTest.java @@ -24,6 +24,7 @@ import org.apache.druid.testing.guice.DruidTestModuleFactory; import org.apache.druid.tests.TestNGGroup; import org.testng.annotations.Guice; +import org.testng.annotations.Ignore; import org.testng.annotations.Test; import java.util.List; @@ -73,12 +74,6 @@ public void testJsonInputFormatIndexDataIngestionSpecWithSchema() throws Excepti doIndexTest(InputFormatDetails.JSON); } - @Test - public void testTsvInputFormatIndexDataIngestionSpecWithSchema() throws Exception - { - doIndexTest(InputFormatDetails.TSV, ImmutableMap.of("findColumnsFromHeader", true)); - } - @Test public void testParquetInputFormatIndexDataIngestionSpecWithSchema() throws Exception { diff --git a/integration-tests/src/test/resources/indexer/wikipedia_parallel_druid_input_source_index_task.json b/integration-tests/src/test/resources/indexer/wikipedia_parallel_druid_input_source_index_task.json index 84df16c4cb71..91702a413574 100644 --- a/integration-tests/src/test/resources/indexer/wikipedia_parallel_druid_input_source_index_task.json +++ b/integration-tests/src/test/resources/indexer/wikipedia_parallel_druid_input_source_index_task.json @@ -51,7 +51,7 @@ }, "tuningConfig": { "type": "index_parallel", - "maxNumConcurrentSubTasks": 4, + "maxNumConcurrentSubTasks": 10, "forceGuaranteedRollup": "%%FORCE_GUARANTEED_ROLLUP%%", "partitionsSpec": %%PARTITIONS_SPEC%%, "splitHintSpec": { diff --git a/integration-tests/src/test/resources/indexer/wikipedia_parallel_index_task.json b/integration-tests/src/test/resources/indexer/wikipedia_parallel_index_task.json index 23092690d7ad..4781d39b0249 100644 --- a/integration-tests/src/test/resources/indexer/wikipedia_parallel_index_task.json +++ b/integration-tests/src/test/resources/indexer/wikipedia_parallel_index_task.json @@ -64,7 +64,7 @@ }, "tuningConfig": { "type": "index_parallel", - "maxNumConcurrentSubTasks": 4, + "maxNumConcurrentSubTasks": 10, "forceGuaranteedRollup": "%%FORCE_GUARANTEED_ROLLUP%%", "splitHintSpec": { "type": "maxSize", diff --git a/integration-tests/src/test/resources/indexer/wikipedia_parallel_ingest_segment_index_task.json b/integration-tests/src/test/resources/indexer/wikipedia_parallel_ingest_segment_index_task.json index 87b9010c816c..87cb498f3c4b 100644 --- a/integration-tests/src/test/resources/indexer/wikipedia_parallel_ingest_segment_index_task.json +++ b/integration-tests/src/test/resources/indexer/wikipedia_parallel_ingest_segment_index_task.json @@ -57,7 +57,7 @@ }, "tuningConfig": { "type": "index_parallel", - "maxNumConcurrentSubTasks": 4, + "maxNumConcurrentSubTasks": 10, "forceGuaranteedRollup": "%%FORCE_GUARANTEED_ROLLUP%%", "splitHintSpec": { "type": "maxSize", From 3a1588c048e066e9a3e17c98402ded406bfecb37 Mon Sep 17 00:00:00 2001 From: Maytas Monsereenusorn Date: Tue, 30 Jun 2020 17:38:40 -0700 Subject: [PATCH 14/15] fix checkstyle --- .../tests/indexer/ITLocalInputSourceAllInputFormatTest.java | 1 - 1 file changed, 1 deletion(-) diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITLocalInputSourceAllInputFormatTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITLocalInputSourceAllInputFormatTest.java index 4a4720928f7d..29d75d327fdb 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITLocalInputSourceAllInputFormatTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITLocalInputSourceAllInputFormatTest.java @@ -24,7 +24,6 @@ import org.apache.druid.testing.guice.DruidTestModuleFactory; import org.apache.druid.tests.TestNGGroup; import org.testng.annotations.Guice; -import org.testng.annotations.Ignore; import org.testng.annotations.Test; import java.util.List; From bd0dda5a7f69af8d1d8cb05f28ad90df9fbabc73 Mon Sep 17 00:00:00 2001 From: Maytas Monsereenusorn Date: Tue, 30 Jun 2020 22:44:54 -0700 Subject: [PATCH 15/15] add bug fix --- .../DoublesSketchComplexMetricSerde.java | 17 +++----- .../DoublesSketchComplexMetricSerdeTest.java | 41 ++++++++++++++++++- .../ITLocalInputSourceAllInputFormatTest.java | 6 +++ 3 files changed, 52 insertions(+), 12 deletions(-) diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/quantiles/DoublesSketchComplexMetricSerde.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/quantiles/DoublesSketchComplexMetricSerde.java index e2c922e88b48..d97b5f8c6dd3 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/quantiles/DoublesSketchComplexMetricSerde.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/quantiles/DoublesSketchComplexMetricSerde.java @@ -19,11 +19,11 @@ package org.apache.druid.query.aggregation.datasketches.quantiles; +import com.google.common.primitives.Doubles; import org.apache.datasketches.memory.Memory; import org.apache.datasketches.quantiles.DoublesSketch; import org.apache.datasketches.quantiles.UpdateDoublesSketch; import org.apache.druid.data.input.InputRow; -import org.apache.druid.java.util.common.IAE; import org.apache.druid.segment.GenericColumnSerializer; import org.apache.druid.segment.column.ColumnBuilder; import org.apache.druid.segment.data.GenericIndexed; @@ -75,18 +75,13 @@ public Object extractValue(final InputRow inputRow, final String metricName) // Autodetection of the input format: empty string, number, or base64 encoded sketch // A serialized DoublesSketch, as currently implemented, always has 0 in the first 6 bits. // This corresponds to "A" in base64, so it is not a digit + final Double doubleValue; if (objectString.isEmpty()) { return DoublesSketchOperations.EMPTY_SKETCH; - } else if (Character.isDigit(objectString.charAt(0))) { - try { - double doubleValue = Double.parseDouble(objectString); - UpdateDoublesSketch sketch = DoublesSketch.builder().setK(MIN_K).build(); - sketch.update(doubleValue); - return sketch; - } - catch (NumberFormatException e) { - throw new IAE("Expected a string with a number, received value " + objectString); - } + } else if ((doubleValue = Doubles.tryParse(objectString)) != null) { + UpdateDoublesSketch sketch = DoublesSketch.builder().setK(MIN_K).build(); + sketch.update(doubleValue); + return sketch; } } else if (object instanceof Number) { // this is for reindexing UpdateDoublesSketch sketch = DoublesSketch.builder().setK(MIN_K).build(); diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/quantiles/DoublesSketchComplexMetricSerdeTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/quantiles/DoublesSketchComplexMetricSerdeTest.java index 5280fbd441f0..e198c7704257 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/quantiles/DoublesSketchComplexMetricSerdeTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/quantiles/DoublesSketchComplexMetricSerdeTest.java @@ -42,7 +42,33 @@ public void testExtractorOnEmptyString() } @Test - public void testExtractorOnNumber() + public void testExtractorOnPositiveNumber() + { + final DoublesSketchComplexMetricSerde serde = new DoublesSketchComplexMetricSerde(); + final ComplexMetricExtractor extractor = serde.getExtractor(); + final DoublesSketch sketch = (DoublesSketch) extractor.extractValue( + new MapBasedInputRow(0L, ImmutableList.of(), ImmutableMap.of("foo", "777")), + "foo" + ); + Assert.assertEquals(1, sketch.getRetainedItems()); + Assert.assertEquals(777d, sketch.getMaxValue(), 0.01d); + } + + @Test + public void testExtractorOnNegativeNumber() + { + final DoublesSketchComplexMetricSerde serde = new DoublesSketchComplexMetricSerde(); + final ComplexMetricExtractor extractor = serde.getExtractor(); + final DoublesSketch sketch = (DoublesSketch) extractor.extractValue( + new MapBasedInputRow(0L, ImmutableList.of(), ImmutableMap.of("foo", "-133")), + "foo" + ); + Assert.assertEquals(1, sketch.getRetainedItems()); + Assert.assertEquals(-133d, sketch.getMaxValue(), 0.01d); + } + + @Test + public void testExtractorOnDecimalNumber() { final DoublesSketchComplexMetricSerde serde = new DoublesSketchComplexMetricSerde(); final ComplexMetricExtractor extractor = serde.getExtractor(); @@ -53,4 +79,17 @@ public void testExtractorOnNumber() Assert.assertEquals(1, sketch.getRetainedItems()); Assert.assertEquals(3.1d, sketch.getMaxValue(), 0.01d); } + + @Test + public void testExtractorOnLeadingDecimalNumber() + { + final DoublesSketchComplexMetricSerde serde = new DoublesSketchComplexMetricSerde(); + final ComplexMetricExtractor extractor = serde.getExtractor(); + final DoublesSketch sketch = (DoublesSketch) extractor.extractValue( + new MapBasedInputRow(0L, ImmutableList.of(), ImmutableMap.of("foo", ".1")), + "foo" + ); + Assert.assertEquals(1, sketch.getRetainedItems()); + Assert.assertEquals(0.1d, sketch.getMaxValue(), 0.01d); + } } diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITLocalInputSourceAllInputFormatTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITLocalInputSourceAllInputFormatTest.java index 29d75d327fdb..a0c1014105a9 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITLocalInputSourceAllInputFormatTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITLocalInputSourceAllInputFormatTest.java @@ -73,6 +73,12 @@ public void testJsonInputFormatIndexDataIngestionSpecWithSchema() throws Excepti doIndexTest(InputFormatDetails.JSON); } + @Test + public void testTsvInputFormatIndexDataIngestionSpecWithSchema() throws Exception + { + doIndexTest(InputFormatDetails.TSV, ImmutableMap.of("findColumnsFromHeader", true)); + } + @Test public void testParquetInputFormatIndexDataIngestionSpecWithSchema() throws Exception {