apache · clintropolis · Mar 17, 2020 · Mar 10, 2020 · Mar 10, 2020 · Mar 10, 2020
diff --git a/.travis.yml b/.travis.yml
@@ -324,6 +324,14 @@ jobs:
       script: *run_integration_test
       after_failure: *integration_test_diags
 
+    - &integration_security
+      name: "(Compile=openjdk8, Run=openjdk8) security integration test"
+      jdk: openjdk8
+      services: *integration_test_services
+      env: TESTNG_GROUPS='-Dgroups=security' JVM_RUNTIME='-Djvm.runtime=8'
+      script: *run_integration_test
+      after_failure: *integration_test_diags
+
     - &integration_realtime_index
       name: "(Compile=openjdk8, Run=openjdk8) realtime index integration test"
       jdk: openjdk8
@@ -336,7 +344,7 @@ jobs:
       name: "(Compile=openjdk8, Run=openjdk8) other integration test"
       jdk: openjdk8
       services: *integration_test_services
-      env: TESTNG_GROUPS='-DexcludedGroups=batch-index,perfect-rollup-parallel-batch-index,kafka-index,query,realtime-index' JVM_RUNTIME='-Djvm.runtime=8'
+      env: TESTNG_GROUPS='-DexcludedGroups=batch-index,perfect-rollup-parallel-batch-index,kafka-index,query,realtime-index,security,s3-deep-storage,gcs-deep-storage,azure-deep-storage' JVM_RUNTIME='-Djvm.runtime=8'
       script: *run_integration_test
       after_failure: *integration_test_diags
     # END - Integration tests for Compile with Java 8 and Run with Java 8
@@ -362,6 +370,11 @@ jobs:
       jdk: openjdk8
       env: TESTNG_GROUPS='-Dgroups=query' JVM_RUNTIME='-Djvm.runtime=11'
 
+    - <<: *integration_security
+      name: "(Compile=openjdk8, Run=openjdk11) security integration test"
+      jdk: openjdk8
+      env: TESTNG_GROUPS='-Dgroups=security' JVM_RUNTIME='-Djvm.runtime=11'
+
     - <<: *integration_realtime_index
       name: "(Compile=openjdk8, Run=openjdk11) realtime index integration test"
       jdk: openjdk8
@@ -370,9 +383,9 @@ jobs:
     - <<: *integration_tests
       name: "(Compile=openjdk8, Run=openjdk11) other integration test"
       jdk: openjdk8
-      env: TESTNG_GROUPS='-DexcludedGroups=batch-index,perfect-rollup-parallel-batch-index,kafka-index,query,realtime-index' JVM_RUNTIME='-Djvm.runtime=11'
+      env: TESTNG_GROUPS='-DexcludedGroups=batch-index,perfect-rollup-parallel-batch-index,kafka-index,query,realtime-index,security,s3-deep-storage,gcs-deep-storage,azure-deep-storage' JVM_RUNTIME='-Djvm.runtime=11'
     # END - Integration tests for Compile with Java 8 and Run with Java 11
-    
+
     - name: "security vulnerabilities"
       stage: cron
       install: skip

diff --git a/integration-tests/README.md b/integration-tests/README.md
@@ -48,9 +48,10 @@ export DOCKER_IP=127.0.0.1
 
 ## Running tests
 
-To run all the tests using docker and mvn run the following command:
+To run all tests from a test group using docker and mvn run the following command: 
+(list of test groups can be found at integration-tests/src/test/java/org/apache/druid/tests/TestNGGroup.java)
 ```
-  mvn verify -P integration-tests
+  mvn verify -P integration-tests -Dgroups=<test_group>
 ```
 
 To run only a single test using mvn run the following command:
@@ -61,6 +62,12 @@ To run only a single test using mvn run the following command:
 Add `-rf :druid-integration-tests` when running integration tests for the second time or later without changing
 the code of core modules in between to skip up-to-date checks for the whole module dependency tree.
 
+Integration tests can also be run with either Java 8 or Java 11 by adding -Djvm.runtime=# to mvn command, where #
+can either be 8 or 11.
+
+Druid's configuration (using Docker) can be overrided by providing -Doverride.config.path=<PATH_TO_FILE>. 
+The file must contain one property per line, the key must start with druid_ and the format should be snake case. 
+
 Running Tests Using A Configuration File for Any Cluster
 -------------------
 
@@ -79,23 +86,54 @@ To run tests on any druid cluster that is already running, create a configuratio
        "coordinator_port": "<coordinator_port>",
        "middlemanager_host": "<middle_manager_ip>",
        "zookeeper_hosts": "<comma-separated list of zookeeper_ip:zookeeper_port>",
+       "cloud_bucket": "<(optional) cloud_bucket for test data if running cloud integration test>",
+       "cloud_path": "<(optional) cloud_path for test data if running cloud integration test>",
     }
 
 Set the environment variable CONFIG_FILE to the name of the configuration file:
 ```
 export CONFIG_FILE=<config file name>
 ```
 
-To run all the tests using mvn run the following command: 
+To run all tests from a test group using mvn run the following command: 
+(list of test groups can be found at integration-tests/src/test/java/org/apache/druid/tests/TestNGGroup.java)
 ```
-  mvn verify -P int-tests-config-file
+  mvn verify -P int-tests-config-file -Dgroups=<test_group>
 ```
 
 To run only a single test using mvn run the following command:
 ```
   mvn verify -P int-tests-config-file -Dit.test=<test_name>
 ```
 
+Running a Test That Uses Cloud
+-------------------
+The integration test that indexes from Cloud or uses Cloud as deep storage is not run as part
+of the integration test run discussed above. Running these tests requires the user to provide
+their own Cloud. 
+
+Currently, the integration test supports Google Cloud Storage, Amazon S3, and Microsoft Azure.
+These can be run by providing "gcs-deep-storage", "s3-deep-storage", or "azure-deep-storage" 
+to -Dgroups for Google Cloud Storage, Amazon S3, and Microsoft Azure respectively. Note that only
+one group should be run per mvn command.
+
+In addition to specifying the -Dgroups to mvn command, the following will need to be provided:
+1) Set the bucket and path for your test data. This can be done by setting -Ddruid.test.config.cloudBucket and 
+-Ddruid.test.config.cloudPath in the mvn command or setting "cloud_bucket" and "cloud_path" in the config file.
+2) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json 
+located in integration-tests/src/test/resources/data/batch_index to your Cloud storage at the location set in step 1.
+3) Provide -Doverride.config.path=<PATH_TO_FILE> with your Cloud credentials/configs set. See
+integration-tests/docker/environment-configs/override-examples/ directory for env vars to provide for each Cloud storage.
+
+For running Google Cloud Storage, in addition to the above, you will also have to:
+1) Provide -Dresource.file.dir.path=<PATH_TO_FOLDER> with folder that contains GOOGLE_APPLICATION_CREDENTIALS file
+
+For example, to run integration test for Google Cloud Storage:
+```
+  mvn verify -P integration-tests -Dgroups=gcs-deep-storage -Doverride.config.path=<PATH_TO_FILE> -Dresource.file.dir.path=<PATH_TO_FOLDER> -Ddruid.test.config.cloudBucket=test-bucket -Ddruid.test.config.cloudPath=test-data-folder/
+```
+
+
 Running a Test That Uses Hadoop
 -------------------
 

diff --git a/integration-tests/docker/Dockerfile b/integration-tests/docker/Dockerfile
@@ -46,11 +46,7 @@ RUN ln -sf /usr/share/java/mysql-connector-java.jar /usr/local/druid/lib/mysql-c
 RUN find /var/lib/mysql -type f -exec touch {} \; && service mysql start \
       && java -cp "/usr/local/druid/lib/*" -Ddruid.metadata.storage.type=mysql org.apache.druid.cli.Main tools metadata-init --connectURI="jdbc:mysql://localhost:3306/druid" --user=druid --password=diurd \
       && /etc/init.d/mysql stop
-ADD sample-data.sql sample-data.sql
-# touch is needed because OverlayFS's copy-up operation breaks POSIX standards. See https://github.com/docker/for-linux/issues/72.
-RUN find /var/lib/mysql -type f -exec touch {} \; && service mysql start \
-      && cat sample-data.sql | mysql -u root druid \
-      && /etc/init.d/mysql stop
+ADD test-data /test-data
 
 # Setup supervisord
 ADD supervisord.conf /etc/supervisor/conf.d/supervisord.conf
@@ -101,6 +97,8 @@ WORKDIR /var/lib/druid
 ENTRYPOINT /tls/generate-server-certs-and-keystores.sh \
             # Create druid service config files with all the config variables
             && . /druid.sh; setupConfig \
+            # Some test groups require pre-existing data to be setup
+            && . /druid.sh; setupData \
             # Export the service config file path to use in supervisord conf file
             && export DRUID_COMMON_CONF_DIR="$(. /druid.sh; getConfPath ${DRUID_SERVICE})" \
             # Export the common config file path to use in supervisord conf file

diff --git a/integration-tests/docker/druid.sh b/integration-tests/docker/druid.sh
@@ -17,7 +17,8 @@
 
 set -e
 
-getConfPath() {
+getConfPath()
+{
     cluster_conf_base=/tmp/conf/druid/cluster
     case "$1" in
     _common) echo $cluster_conf_base/_common ;;
@@ -31,7 +32,8 @@ getConfPath() {
 }
 
 # Delete the old key (if existing) and append new key=value
-setKey() {
+setKey()
+{
     service="$1"
     key="$2"
     value="$3"
@@ -45,7 +47,8 @@ setKey() {
     echo "Setting $key=$value in $service_conf"
 }
 
-setupConfig() {
+setupConfig()
+{
   echo "$(date -Is) configuring service $DRUID_SERVICE"
 
   # We put all the config in /tmp/conf to allow for a
@@ -63,7 +66,6 @@ setupConfig() {
   setKey $DRUID_SERVICE druid.host $(resolveip -s $HOSTNAME)
   setKey $DRUID_SERVICE druid.worker.ip $(resolveip -s $HOSTNAME)
 
-
   # Write out all the environment variables starting with druid_ to druid service config file
   # This will replace _ with . in the key
   env | grep ^druid_ | while read evar;
@@ -73,4 +75,23 @@ setupConfig() {
       var=$(echo "$evar" | sed -e 's?^\([^=]*\)=.*?\1?g' -e 's?_?.?g')
       setKey $DRUID_SERVICE "$var" "$val"
   done
-}
+}
+
+setupData()
+{
+  # The "query" and "security" test groups require data to be setup before running the tests.
+  # In particular, they requires segments to be download from a pre-existing s3 bucket.
+  # This is done by using the loadSpec put into metadatastore and s3 credientials set below.
+  if [ "$DRUID_INTEGRATION_TEST_GROUP" = "query" ] || [ "$DRUID_INTEGRATION_TEST_GROUP" = "security" ]; then
+    # touch is needed because OverlayFS's copy-up operation breaks POSIX standards. See https://github.com/docker/for-linux/issues/72.
+    find /var/lib/mysql -type f -exec touch {} \; && service mysql start \
+      && cat /test-data/${DRUID_INTEGRATION_TEST_GROUP}-sample-data.sql | mysql -u root druid && /etc/init.d/mysql stop
+    # below s3 credentials needed to access the pre-existing s3 bucket
+    setKey $DRUID_SERVICE druid.s3.accessKey AKIAJI7DG7CDECGBQ6NA
+    setKey $DRUID_SERVICE druid.s3.secretKey OBaLISDFjKLajSTrJ53JoTtzTZLjPlRePcwa+Pjv
+    setKey $DRUID_SERVICE druid.extensions.loadList [\"druid-s3-extensions\"]
+    # The region of the sample data s3 blobs needed for these test groups
+    export AWS_REGION=us-east-1
+  fi
+}
+
diff --git a/integration-tests/docker/environment-configs/common b/integration-tests/docker/environment-configs/common
@@ -26,6 +26,8 @@ COMMON_DRUID_JAVA_OPTS=-Duser.timezone=UTC -Dfile.encoding=UTF-8 -Dlog4j.configu
 DRUID_DEP_LIB_DIR=/shared/hadoop_xml/*:/shared/docker/lib/*:/usr/local/druid/lib/mysql-connector-java.jar
 
 # Druid configs
+druid_extensions_loadList=[]
+druid_extensions_directory=/shared/docker/extensions
 druid_auth_authenticator_basic_authorizerName=basic
 druid_auth_authenticator_basic_initialAdminPassword=priest
 druid_auth_authenticator_basic_initialInternalClientPassword=warlock

diff --git a/integration-tests/docker/environment-configs/historical b/integration-tests/docker/environment-configs/historical
@@ -24,8 +24,6 @@ DRUID_LOG_PATH=/shared/logs/historical.log
 SERVICE_DRUID_JAVA_OPTS=-server -Xmx512m -Xms512m -XX:NewSize=256m -XX:MaxNewSize=256m -XX:+UseG1GC
 
 # Druid configs
-druid_s3_accessKey=AKIAJI7DG7CDECGBQ6NA
-druid_s3_secretKey=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
 druid_processing_buffer_sizeBytes=25000000
 druid_processing_numThreads=2
 druid_query_groupBy_maxOnDiskStorage=300000000

diff --git a/integration-tests/docker/environment-configs/middlemanager b/integration-tests/docker/environment-configs/middlemanager
@@ -32,8 +32,6 @@ druid_indexer_runner_javaOptsArray=["-server", "-Xmx256m", "-Xms256m", "-XX:NewS
 druid_indexer_fork_property_druid_processing_buffer_sizeBytes=25000000
 druid_indexer_fork_property_druid_processing_numThreads=1
 druid_indexer_fork_server_http_numThreads=20
-druid_s3_accessKey=AKIAJI7DG7CDECGBQ6NA
-druid_s3_secretKey=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
 druid_selectors_indexing_serviceName=druid/overlord
 druid_indexer_task_chathandler_type=announce
 druid_auth_basic_common_cacheDirectory=/tmp/authCache/middleManager

diff --git a/integration-tests/docker/environment-configs/override-examples/azure b/integration-tests/docker/environment-configs/override-examples/azure
@@ -0,0 +1,28 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+#
+# Example of override config file to provide.
+# Please replace <OVERRIDE_THIS> with your cloud configs/credentials
+#
+druid_storage_type=azure
+druid_azure_account=<OVERRIDE_THIS>
+druid_azure_key=<OVERRIDE_THIS>
+druid_azure_container=<OVERRIDE_THIS>
+druid_extensions_loadList=["druid-azure-extensions"]
diff --git a/integration-tests/docker/environment-configs/override-examples/gcs b/integration-tests/docker/environment-configs/override-examples/gcs
@@ -0,0 +1,29 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+#
+# Example of override config file to provide.
+# Please replace <OVERRIDE_THIS> and <YOUR_GOOGLE_CREDENTIALS_FILE_NAME> with your cloud configs/credentials
+#
+druid_storage_type=google
+druid_google_bucket=<OVERRIDE_THIS>
+druid_google_prefix=<OVERRIDE_THIS>
+druid_extensions_loadList=["druid-google-extensions"]
+GOOGLE_APPLICATION_CREDENTIALS=/shared/docker/credentials/<YOUR_GOOGLE_CREDENTIALS_FILE_NAME>
+
diff --git a/integration-tests/docker/environment-configs/override-examples/s3 b/integration-tests/docker/environment-configs/override-examples/s3
@@ -0,0 +1,30 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+#
+# Example of override config file to provide.
+# Please replace <OVERRIDE_THIS> with your cloud configs/credentials
+#
+druid_storage_type=s3
+druid_storage_bucket=<OVERRIDE_THIS>
+druid_storage_baseKey=druid/segments
+druid_s3_accessKey=<OVERRIDE_THIS>
+druid_s3_secretKey=<OVERRIDE_THIS>
+AWS_REGION=<OVERRIDE_THIS>
+druid_extensions_loadList=["druid-s3-extensions"]
diff --git a/integration-tests/docker/service-supervisords/druid.conf b/integration-tests/docker/service-supervisords/druid.conf
@@ -4,4 +4,3 @@ redirect_stderr=true
 priority=100
 autorestart=false
 stdout_logfile=%(ENV_DRUID_LOG_PATH)s
-environment=AWS_REGION=us-east-1