From 744f00b68c10ef0ba5ef3db08dfb5f75cca20703 Mon Sep 17 00:00:00 2001 From: "Lucas.Capistrant" Date: Thu, 10 Dec 2020 09:44:05 -0600 Subject: [PATCH 1/3] Make some additions to IT suite to make Hadoop related testing more understandable --- integration-tests/README.md | 21 ++++++++++++++----- integration-tests/pom.xml | 2 ++ .../script/docker_build_containers.sh | 2 +- 3 files changed, 19 insertions(+), 6 deletions(-) diff --git a/integration-tests/README.md b/integration-tests/README.md index 58ecf2fd5944..13a05e2908b7 100644 --- a/integration-tests/README.md +++ b/integration-tests/README.md @@ -109,7 +109,7 @@ docker-compose -f docker-compose.druid-hadoop.yml up 1. Build druid-cluster, druid-hadoop docker images. From root module run maven command: ``` -mvn clean install -pl integration-tests -P integration-tests -Ddocker.run.skip=true -Dmaven.test.skip=true +mvn clean install -pl integration-tests -P integration-tests -Ddocker.run.skip=true -Dmaven.test.skip=true -Ddocker.build.hadoop=true ``` 2. Run druid cluster by docker-compose: @@ -147,6 +147,7 @@ You need to build druid containers only once, after you can skip docker build st up the docker containers (Druid, Kafka, Hadoop, MYSQL, zookeeper, etc). Please make sure that you actually do have these containers already running if using this flag. Additionally, please make sure that the running containers are in the same state that the setup script (run_cluster.sh) would have brought it up in. +- -Ddocker.build.hadoop=true to build the hadoop image when either running integration tests or when building the integration test docker images without running the tests. ### Debugging Druid while running tests @@ -283,7 +284,7 @@ of the integration test run discussed above. This is because druid test clusters might not, in general, have access to hadoop. This also applies to integration test that uses Hadoop HDFS as an inputSource or as a deep storage. To run integration test that uses Hadoop, you will have to run a Hadoop cluster. This can be done in two ways: -1) Run Druid Docker test clusters with Hadoop container by passing -Dstart.hadoop.docker=true to the mvn command. +1) Run Druid Docker test clusters with Hadoop container by passing -Dstart.hadoop.docker=true to the mvn command. If you have not already built the hadoop image, you will also need to add -Ddocker.build.hadoop=true to the mvn command. 2) Run your own Druid + Hadoop cluster and specified Hadoop configs in the configuration file (CONFIG_FILE). Currently, hdfs-deep-storage and other -deep-storage integration test groups can only be run with @@ -302,12 +303,22 @@ If using the Docker-based Hadoop container, the steps above are automatically do When running the Hadoop tests, you must set `-Dextra.datasource.name.suffix=''`, due to https://github.com/apache/druid/issues/9788. -Run the test using mvn (using the bundled Docker-based Hadoop cluster): +Option 1: Run the test using mvn (using the bundled Docker-based Hadoop cluster and building docker images at runtime): ``` - mvn verify -P integration-tests -Dit.test=ITHadoopIndexTest -Dstart.hadoop.docker=true -Doverride.config.path=docker/environment-configs/override-examples/hdfs -Dextra.datasource.name.suffix='' + mvn verify -P integration-tests -Dit.test=ITHadoopIndexTest -Dstart.hadoop.docker=true -Ddocker.build.hadoop=true -Doverride.config.path=docker/environment-configs/override-examples/hdfs -Dextra.datasource.name.suffix='' ``` -Run the test using mvn (using config file for existing Hadoop cluster): +Option 2: Run the test using mvn (using the bundled Docker-based hadoop cluster and not building images at runtime): +``` + mvn verify -P integration-tests -Dit.test=ITHadoopIndexTest -Dstart.hadoop.docker=true -Ddocker.build.skip=true -Doverride.config.path=docker/environment-configs/override-examples/hdfs -Dextra.datasource.name.suffix='' +``` + +Option 3: Run the test using mvn (using the bundled Docker-based hadoop cluster and when you have already started all containers) +``` + mvn verify -P integration-tests -Dit.test=ITHadoopIndexTest -Ddocker.run.skip=true -Ddocker.build.skip=true -Doverride.config.path=docker/environment-configs/override-examples/hdfs -Dextra.datasource.name.suffix='' +``` + +Option 4: Run the test using mvn (using config file for existing Hadoop cluster): ``` mvn verify -P int-tests-config-file -Dit.test=ITHadoopIndexTest -Dextra.datasource.name.suffix='' ``` diff --git a/integration-tests/pom.xml b/integration-tests/pom.xml index 57f78b0029d9..ecfe21b0cb60 100644 --- a/integration-tests/pom.xml +++ b/integration-tests/pom.xml @@ -370,6 +370,7 @@ false false false + false middleManager @@ -391,6 +392,7 @@ pre-integration-test + ${docker.build.hadoop} ${start.hadoop.docker} ${jvm.runtime} ${groups} diff --git a/integration-tests/script/docker_build_containers.sh b/integration-tests/script/docker_build_containers.sh index 033dcef8ee15..b578ca7ab2ca 100755 --- a/integration-tests/script/docker_build_containers.sh +++ b/integration-tests/script/docker_build_containers.sh @@ -42,7 +42,7 @@ else fi # Build Hadoop docker if needed -if [ -n "$DRUID_INTEGRATION_TEST_START_HADOOP_DOCKER" ] && [ "$DRUID_INTEGRATION_TEST_START_HADOOP_DOCKER" == true ] +if [ -n "$DRUID_INTEGRATION_TEST_BUILD_HADOOP_DOCKER" ] && [ "$DRUID_INTEGRATION_TEST_BUILD_HADOOP_DOCKER" == true ] then docker build -t druid-it/hadoop:2.8.5 $HADOOP_DOCKER_DIR fi From e6966e275930a7d44133ea0091a98141f012f11f Mon Sep 17 00:00:00 2001 From: "Lucas.Capistrant" Date: Thu, 10 Dec 2020 15:42:59 -0600 Subject: [PATCH 2/3] add start.hadoop.docker to mvn arg tips in doc --- integration-tests/README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/integration-tests/README.md b/integration-tests/README.md index 13a05e2908b7..435f82bf52d7 100644 --- a/integration-tests/README.md +++ b/integration-tests/README.md @@ -148,6 +148,7 @@ up the docker containers (Druid, Kafka, Hadoop, MYSQL, zookeeper, etc). Please m these containers already running if using this flag. Additionally, please make sure that the running containers are in the same state that the setup script (run_cluster.sh) would have brought it up in. - -Ddocker.build.hadoop=true to build the hadoop image when either running integration tests or when building the integration test docker images without running the tests. +- -Dstart.hadoop.docker=true to start hadoop container when you need to run IT tests that utilize local hadoop docker ### Debugging Druid while running tests From 1324ab9dbfbfb759ad9a6928ed7c1f9b2ddeabf5 Mon Sep 17 00:00:00 2001 From: "Lucas.Capistrant" Date: Fri, 11 Dec 2020 17:28:02 -0600 Subject: [PATCH 3/3] fix issues preventing ITIndexHadoopTest from running in local mode --- .../java/org/apache/druid/testing/utils/ITRetryUtil.java | 2 +- .../src/test/resources/hadoop/batch_hadoop_queries.json | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/integration-tests/src/main/java/org/apache/druid/testing/utils/ITRetryUtil.java b/integration-tests/src/main/java/org/apache/druid/testing/utils/ITRetryUtil.java index 4be7f4f3be81..c6baa86b6ac1 100644 --- a/integration-tests/src/main/java/org/apache/druid/testing/utils/ITRetryUtil.java +++ b/integration-tests/src/main/java/org/apache/druid/testing/utils/ITRetryUtil.java @@ -30,7 +30,7 @@ public class ITRetryUtil private static final Logger LOG = new Logger(ITRetryUtil.class); - public static final int DEFAULT_RETRY_COUNT = 150; // 5 minutes + public static final int DEFAULT_RETRY_COUNT = 300; // 10 minutes public static final long DEFAULT_RETRY_SLEEP = TimeUnit.SECONDS.toMillis(2); diff --git a/integration-tests/src/test/resources/hadoop/batch_hadoop_queries.json b/integration-tests/src/test/resources/hadoop/batch_hadoop_queries.json index d59e7ae0dc2a..2a390b2ec1f7 100644 --- a/integration-tests/src/test/resources/hadoop/batch_hadoop_queries.json +++ b/integration-tests/src/test/resources/hadoop/batch_hadoop_queries.json @@ -18,6 +18,7 @@ "type": "STRING", "size": 0, "hasMultipleValues": false, + "hasNulls": false, "minValue": "location_1", "maxValue": "location_5", "cardinality": 5, @@ -27,6 +28,7 @@ "type": "thetaSketch", "size": 0, "hasMultipleValues": false, + "hasNulls": true, "minValue": null, "maxValue": null, "cardinality": null, @@ -36,6 +38,7 @@ "type": "thetaSketch", "size": 0, "hasMultipleValues": false, + "hasNulls": true, "minValue": null, "maxValue": null, "cardinality": null, @@ -45,6 +48,7 @@ "type": "LONG", "size": 0, "hasMultipleValues": false, + "hasNulls": false, "minValue": null, "maxValue": null, "cardinality": null, @@ -54,6 +58,7 @@ "type": "STRING", "size": 0, "hasMultipleValues": false, + "hasNulls": false, "minValue": "product_1", "maxValue": "product_9", "cardinality": 15,