diff --git a/.travis.yml b/.travis.yml index e7adb9dcd092..f0daa7f106c5 100644 --- a/.travis.yml +++ b/.travis.yml @@ -324,6 +324,14 @@ jobs: script: *run_integration_test after_failure: *integration_test_diags + - &integration_security + name: "(Compile=openjdk8, Run=openjdk8) security integration test" + jdk: openjdk8 + services: *integration_test_services + env: TESTNG_GROUPS='-Dgroups=security' JVM_RUNTIME='-Djvm.runtime=8' + script: *run_integration_test + after_failure: *integration_test_diags + - &integration_realtime_index name: "(Compile=openjdk8, Run=openjdk8) realtime index integration test" jdk: openjdk8 @@ -336,7 +344,7 @@ jobs: name: "(Compile=openjdk8, Run=openjdk8) other integration test" jdk: openjdk8 services: *integration_test_services - env: TESTNG_GROUPS='-DexcludedGroups=batch-index,perfect-rollup-parallel-batch-index,kafka-index,query,realtime-index' JVM_RUNTIME='-Djvm.runtime=8' + env: TESTNG_GROUPS='-DexcludedGroups=batch-index,perfect-rollup-parallel-batch-index,kafka-index,query,realtime-index,security,s3-deep-storage,gcs-deep-storage,azure-deep-storage' JVM_RUNTIME='-Djvm.runtime=8' script: *run_integration_test after_failure: *integration_test_diags # END - Integration tests for Compile with Java 8 and Run with Java 8 @@ -362,6 +370,11 @@ jobs: jdk: openjdk8 env: TESTNG_GROUPS='-Dgroups=query' JVM_RUNTIME='-Djvm.runtime=11' + - <<: *integration_security + name: "(Compile=openjdk8, Run=openjdk11) security integration test" + jdk: openjdk8 + env: TESTNG_GROUPS='-Dgroups=security' JVM_RUNTIME='-Djvm.runtime=11' + - <<: *integration_realtime_index name: "(Compile=openjdk8, Run=openjdk11) realtime index integration test" jdk: openjdk8 @@ -370,9 +383,9 @@ jobs: - <<: *integration_tests name: "(Compile=openjdk8, Run=openjdk11) other integration test" jdk: openjdk8 - env: TESTNG_GROUPS='-DexcludedGroups=batch-index,perfect-rollup-parallel-batch-index,kafka-index,query,realtime-index' JVM_RUNTIME='-Djvm.runtime=11' + env: TESTNG_GROUPS='-DexcludedGroups=batch-index,perfect-rollup-parallel-batch-index,kafka-index,query,realtime-index,security,s3-deep-storage,gcs-deep-storage,azure-deep-storage' JVM_RUNTIME='-Djvm.runtime=11' # END - Integration tests for Compile with Java 8 and Run with Java 11 - + - name: "security vulnerabilities" stage: cron install: skip diff --git a/integration-tests/README.md b/integration-tests/README.md index 47f7eb7993a2..5f8901a2f224 100644 --- a/integration-tests/README.md +++ b/integration-tests/README.md @@ -48,9 +48,10 @@ export DOCKER_IP=127.0.0.1 ## Running tests -To run all the tests using docker and mvn run the following command: +To run all tests from a test group using docker and mvn run the following command: +(list of test groups can be found at integration-tests/src/test/java/org/apache/druid/tests/TestNGGroup.java) ``` - mvn verify -P integration-tests + mvn verify -P integration-tests -Dgroups= ``` To run only a single test using mvn run the following command: @@ -61,6 +62,12 @@ To run only a single test using mvn run the following command: Add `-rf :druid-integration-tests` when running integration tests for the second time or later without changing the code of core modules in between to skip up-to-date checks for the whole module dependency tree. +Integration tests can also be run with either Java 8 or Java 11 by adding -Djvm.runtime=# to mvn command, where # +can either be 8 or 11. + +Druid's configuration (using Docker) can be overrided by providing -Doverride.config.path=. +The file must contain one property per line, the key must start with druid_ and the format should be snake case. + Running Tests Using A Configuration File for Any Cluster ------------------- @@ -79,6 +86,8 @@ To run tests on any druid cluster that is already running, create a configuratio "coordinator_port": "", "middlemanager_host": "", "zookeeper_hosts": "", + "cloud_bucket": "<(optional) cloud_bucket for test data if running cloud integration test>", + "cloud_path": "<(optional) cloud_path for test data if running cloud integration test>", } Set the environment variable CONFIG_FILE to the name of the configuration file: @@ -86,9 +95,10 @@ Set the environment variable CONFIG_FILE to the name of the configuration file: export CONFIG_FILE= ``` -To run all the tests using mvn run the following command: +To run all tests from a test group using mvn run the following command: +(list of test groups can be found at integration-tests/src/test/java/org/apache/druid/tests/TestNGGroup.java) ``` - mvn verify -P int-tests-config-file + mvn verify -P int-tests-config-file -Dgroups= ``` To run only a single test using mvn run the following command: @@ -96,6 +106,34 @@ To run only a single test using mvn run the following command: mvn verify -P int-tests-config-file -Dit.test= ``` +Running a Test That Uses Cloud +------------------- +The integration test that indexes from Cloud or uses Cloud as deep storage is not run as part +of the integration test run discussed above. Running these tests requires the user to provide +their own Cloud. + +Currently, the integration test supports Google Cloud Storage, Amazon S3, and Microsoft Azure. +These can be run by providing "gcs-deep-storage", "s3-deep-storage", or "azure-deep-storage" +to -Dgroups for Google Cloud Storage, Amazon S3, and Microsoft Azure respectively. Note that only +one group should be run per mvn command. + +In addition to specifying the -Dgroups to mvn command, the following will need to be provided: +1) Set the bucket and path for your test data. This can be done by setting -Ddruid.test.config.cloudBucket and +-Ddruid.test.config.cloudPath in the mvn command or setting "cloud_bucket" and "cloud_path" in the config file. +2) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json +located in integration-tests/src/test/resources/data/batch_index to your Cloud storage at the location set in step 1. +3) Provide -Doverride.config.path= with your Cloud credentials/configs set. See +integration-tests/docker/environment-configs/override-examples/ directory for env vars to provide for each Cloud storage. + +For running Google Cloud Storage, in addition to the above, you will also have to: +1) Provide -Dresource.file.dir.path= with folder that contains GOOGLE_APPLICATION_CREDENTIALS file + +For example, to run integration test for Google Cloud Storage: +``` + mvn verify -P integration-tests -Dgroups=gcs-deep-storage -Doverride.config.path= -Dresource.file.dir.path= -Ddruid.test.config.cloudBucket=test-bucket -Ddruid.test.config.cloudPath=test-data-folder/ +``` + + Running a Test That Uses Hadoop ------------------- diff --git a/integration-tests/docker/Dockerfile b/integration-tests/docker/Dockerfile index 8c5b51051015..dbd8a3618241 100644 --- a/integration-tests/docker/Dockerfile +++ b/integration-tests/docker/Dockerfile @@ -46,11 +46,7 @@ RUN ln -sf /usr/share/java/mysql-connector-java.jar /usr/local/druid/lib/mysql-c RUN find /var/lib/mysql -type f -exec touch {} \; && service mysql start \ && java -cp "/usr/local/druid/lib/*" -Ddruid.metadata.storage.type=mysql org.apache.druid.cli.Main tools metadata-init --connectURI="jdbc:mysql://localhost:3306/druid" --user=druid --password=diurd \ && /etc/init.d/mysql stop -ADD sample-data.sql sample-data.sql -# touch is needed because OverlayFS's copy-up operation breaks POSIX standards. See https://github.com/docker/for-linux/issues/72. -RUN find /var/lib/mysql -type f -exec touch {} \; && service mysql start \ - && cat sample-data.sql | mysql -u root druid \ - && /etc/init.d/mysql stop +ADD test-data /test-data # Setup supervisord ADD supervisord.conf /etc/supervisor/conf.d/supervisord.conf @@ -101,6 +97,8 @@ WORKDIR /var/lib/druid ENTRYPOINT /tls/generate-server-certs-and-keystores.sh \ # Create druid service config files with all the config variables && . /druid.sh; setupConfig \ + # Some test groups require pre-existing data to be setup + && . /druid.sh; setupData \ # Export the service config file path to use in supervisord conf file && export DRUID_COMMON_CONF_DIR="$(. /druid.sh; getConfPath ${DRUID_SERVICE})" \ # Export the common config file path to use in supervisord conf file diff --git a/integration-tests/docker/druid.sh b/integration-tests/docker/druid.sh index 9ac25498b76b..a7632b738928 100755 --- a/integration-tests/docker/druid.sh +++ b/integration-tests/docker/druid.sh @@ -17,7 +17,8 @@ set -e -getConfPath() { +getConfPath() +{ cluster_conf_base=/tmp/conf/druid/cluster case "$1" in _common) echo $cluster_conf_base/_common ;; @@ -31,7 +32,8 @@ getConfPath() { } # Delete the old key (if existing) and append new key=value -setKey() { +setKey() +{ service="$1" key="$2" value="$3" @@ -45,7 +47,8 @@ setKey() { echo "Setting $key=$value in $service_conf" } -setupConfig() { +setupConfig() +{ echo "$(date -Is) configuring service $DRUID_SERVICE" # We put all the config in /tmp/conf to allow for a @@ -63,7 +66,6 @@ setupConfig() { setKey $DRUID_SERVICE druid.host $(resolveip -s $HOSTNAME) setKey $DRUID_SERVICE druid.worker.ip $(resolveip -s $HOSTNAME) - # Write out all the environment variables starting with druid_ to druid service config file # This will replace _ with . in the key env | grep ^druid_ | while read evar; @@ -73,4 +75,23 @@ setupConfig() { var=$(echo "$evar" | sed -e 's?^\([^=]*\)=.*?\1?g' -e 's?_?.?g') setKey $DRUID_SERVICE "$var" "$val" done -} \ No newline at end of file +} + +setupData() +{ + # The "query" and "security" test groups require data to be setup before running the tests. + # In particular, they requires segments to be download from a pre-existing s3 bucket. + # This is done by using the loadSpec put into metadatastore and s3 credientials set below. + if [ "$DRUID_INTEGRATION_TEST_GROUP" = "query" ] || [ "$DRUID_INTEGRATION_TEST_GROUP" = "security" ]; then + # touch is needed because OverlayFS's copy-up operation breaks POSIX standards. See https://github.com/docker/for-linux/issues/72. + find /var/lib/mysql -type f -exec touch {} \; && service mysql start \ + && cat /test-data/${DRUID_INTEGRATION_TEST_GROUP}-sample-data.sql | mysql -u root druid && /etc/init.d/mysql stop + # below s3 credentials needed to access the pre-existing s3 bucket + setKey $DRUID_SERVICE druid.s3.accessKey AKIAJI7DG7CDECGBQ6NA + setKey $DRUID_SERVICE druid.s3.secretKey OBaLISDFjKLajSTrJ53JoTtzTZLjPlRePcwa+Pjv + setKey $DRUID_SERVICE druid.extensions.loadList [\"druid-s3-extensions\"] + # The region of the sample data s3 blobs needed for these test groups + export AWS_REGION=us-east-1 + fi +} + diff --git a/integration-tests/docker/environment-configs/common b/integration-tests/docker/environment-configs/common index 9dda72a4e4e7..bd3b6459c27c 100644 --- a/integration-tests/docker/environment-configs/common +++ b/integration-tests/docker/environment-configs/common @@ -26,6 +26,8 @@ COMMON_DRUID_JAVA_OPTS=-Duser.timezone=UTC -Dfile.encoding=UTF-8 -Dlog4j.configu DRUID_DEP_LIB_DIR=/shared/hadoop_xml/*:/shared/docker/lib/*:/usr/local/druid/lib/mysql-connector-java.jar # Druid configs +druid_extensions_loadList=[] +druid_extensions_directory=/shared/docker/extensions druid_auth_authenticator_basic_authorizerName=basic druid_auth_authenticator_basic_initialAdminPassword=priest druid_auth_authenticator_basic_initialInternalClientPassword=warlock diff --git a/integration-tests/docker/environment-configs/historical b/integration-tests/docker/environment-configs/historical index 39bad971f8ff..1f74b0ce2678 100644 --- a/integration-tests/docker/environment-configs/historical +++ b/integration-tests/docker/environment-configs/historical @@ -24,8 +24,6 @@ DRUID_LOG_PATH=/shared/logs/historical.log SERVICE_DRUID_JAVA_OPTS=-server -Xmx512m -Xms512m -XX:NewSize=256m -XX:MaxNewSize=256m -XX:+UseG1GC # Druid configs -druid_s3_accessKey=AKIAJI7DG7CDECGBQ6NA -druid_s3_secretKey=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 druid_processing_buffer_sizeBytes=25000000 druid_processing_numThreads=2 druid_query_groupBy_maxOnDiskStorage=300000000 diff --git a/integration-tests/docker/environment-configs/middlemanager b/integration-tests/docker/environment-configs/middlemanager index feed0d7df252..c37c3fee8859 100644 --- a/integration-tests/docker/environment-configs/middlemanager +++ b/integration-tests/docker/environment-configs/middlemanager @@ -32,8 +32,6 @@ druid_indexer_runner_javaOptsArray=["-server", "-Xmx256m", "-Xms256m", "-XX:NewS druid_indexer_fork_property_druid_processing_buffer_sizeBytes=25000000 druid_indexer_fork_property_druid_processing_numThreads=1 druid_indexer_fork_server_http_numThreads=20 -druid_s3_accessKey=AKIAJI7DG7CDECGBQ6NA -druid_s3_secretKey=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 druid_selectors_indexing_serviceName=druid/overlord druid_indexer_task_chathandler_type=announce druid_auth_basic_common_cacheDirectory=/tmp/authCache/middleManager diff --git a/integration-tests/docker/environment-configs/override-examples/azure b/integration-tests/docker/environment-configs/override-examples/azure new file mode 100644 index 000000000000..275489c8c17f --- /dev/null +++ b/integration-tests/docker/environment-configs/override-examples/azure @@ -0,0 +1,28 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +# +# Example of override config file to provide. +# Please replace with your cloud configs/credentials +# +druid_storage_type=azure +druid_azure_account= +druid_azure_key= +druid_azure_container= +druid_extensions_loadList=["druid-azure-extensions"] diff --git a/integration-tests/docker/environment-configs/override-examples/gcs b/integration-tests/docker/environment-configs/override-examples/gcs new file mode 100644 index 000000000000..745053da59fc --- /dev/null +++ b/integration-tests/docker/environment-configs/override-examples/gcs @@ -0,0 +1,29 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +# +# Example of override config file to provide. +# Please replace and with your cloud configs/credentials +# +druid_storage_type=google +druid_google_bucket= +druid_google_prefix= +druid_extensions_loadList=["druid-google-extensions"] +GOOGLE_APPLICATION_CREDENTIALS=/shared/docker/credentials/ + diff --git a/integration-tests/docker/environment-configs/override-examples/s3 b/integration-tests/docker/environment-configs/override-examples/s3 new file mode 100644 index 000000000000..cdca76490625 --- /dev/null +++ b/integration-tests/docker/environment-configs/override-examples/s3 @@ -0,0 +1,30 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +# +# Example of override config file to provide. +# Please replace with your cloud configs/credentials +# +druid_storage_type=s3 +druid_storage_bucket= +druid_storage_baseKey=druid/segments +druid_s3_accessKey= +druid_s3_secretKey= +AWS_REGION= +druid_extensions_loadList=["druid-s3-extensions"] \ No newline at end of file diff --git a/integration-tests/docker/service-supervisords/druid.conf b/integration-tests/docker/service-supervisords/druid.conf index 5aea9941a0c7..721844ea6974 100644 --- a/integration-tests/docker/service-supervisords/druid.conf +++ b/integration-tests/docker/service-supervisords/druid.conf @@ -4,4 +4,3 @@ redirect_stderr=true priority=100 autorestart=false stdout_logfile=%(ENV_DRUID_LOG_PATH)s -environment=AWS_REGION=us-east-1 diff --git a/integration-tests/docker/sample-data.sql b/integration-tests/docker/test-data/query-sample-data.sql similarity index 80% rename from integration-tests/docker/sample-data.sql rename to integration-tests/docker/test-data/query-sample-data.sql index 69bf6ea012bc..18ab48ad556b 100644 --- a/integration-tests/docker/sample-data.sql +++ b/integration-tests/docker/test-data/query-sample-data.sql @@ -18,5 +18,3 @@ INSERT INTO druid_segments (id,dataSource,created_date,start,end,partitioned,ver INSERT INTO druid_segments (id,dataSource,created_date,start,end,partitioned,version,used,payload) VALUES ('twitterstream_2013-01-03T00:00:00.000Z_2013-01-04T00:00:00.000Z_2013-01-04T04:09:13.590Z_v9','twitterstream','2013-05-13T00:03:48.807Z','2013-01-03T00:00:00.000Z','2013-01-04T00:00:00.000Z',0,'2013-01-04T04:09:13.590Z_v9',1,'{\"dataSource\":\"twitterstream\",\"interval\":\"2013-01-03T00:00:00.000Z/2013-01-04T00:00:00.000Z\",\"version\":\"2013-01-04T04:09:13.590Z_v9\",\"loadSpec\":{\"type\":\"s3_zip\",\"bucket\":\"static.druid.io\",\"key\":\"data/segments/twitterstream/2013-01-03T00:00:00.000Z_2013-01-04T00:00:00.000Z/2013-01-04T04:09:13.590Z_v9/0/index.zip\"},\"dimensions\":\"has_links,first_hashtag,user_time_zone,user_location,has_mention,user_lang,rt_name,user_name,is_retweet,is_viral,has_geo,url_domain,user_mention_name,reply_to_name\",\"metrics\":\"count,tweet_length,num_followers,num_links,num_mentions,num_hashtags,num_favorites,user_total_tweets\",\"shardSpec\":{\"type\":\"none\"},\"binaryVersion\":9,\"size\":411651320,\"identifier\":\"twitterstream_2013-01-03T00:00:00.000Z_2013-01-04T00:00:00.000Z_2013-01-04T04:09:13.590Z_v9\"}'); INSERT INTO druid_segments (id,dataSource,created_date,start,end,partitioned,version,used,payload) VALUES ('wikipedia_editstream_2012-12-29T00:00:00.000Z_2013-01-10T08:00:00.000Z_2013-01-10T08:13:47.830Z_v9','wikipedia_editstream','2013-03-15T20:49:52.348Z','2012-12-29T00:00:00.000Z','2013-01-10T08:00:00.000Z',0,'2013-01-10T08:13:47.830Z_v9',1,'{\"dataSource\":\"wikipedia_editstream\",\"interval\":\"2012-12-29T00:00:00.000Z/2013-01-10T08:00:00.000Z\",\"version\":\"2013-01-10T08:13:47.830Z_v9\",\"loadSpec\":{\"type\":\"s3_zip\",\"bucket\":\"static.druid.io\",\"key\":\"data/segments/wikipedia_editstream/2012-12-29T00:00:00.000Z_2013-01-10T08:00:00.000Z/2013-01-10T08:13:47.830Z_v9/0/index.zip\"},\"dimensions\":\"anonymous,area_code,city,continent_code,country_name,dma_code,geo,language,namespace,network,newpage,page,postal_code,region_lookup,robot,unpatrolled,user\",\"metrics\":\"added,count,deleted,delta,delta_hist,unique_users,variation\",\"shardSpec\":{\"type\":\"none\"},\"binaryVersion\":9,\"size\":446027801,\"identifier\":\"wikipedia_editstream_2012-12-29T00:00:00.000Z_2013-01-10T08:00:00.000Z_2013-01-10T08:13:47.830Z_v9\"}'); INSERT INTO druid_segments (id, dataSource, created_date, start, end, partitioned, version, used, payload) VALUES ('wikipedia_2013-08-01T00:00:00.000Z_2013-08-02T00:00:00.000Z_2013-08-08T21:22:48.989Z', 'wikipedia', '2013-08-08T21:26:23.799Z', '2013-08-01T00:00:00.000Z', '2013-08-02T00:00:00.000Z', '0', '2013-08-08T21:22:48.989Z', '1', '{\"dataSource\":\"wikipedia\",\"interval\":\"2013-08-01T00:00:00.000Z/2013-08-02T00:00:00.000Z\",\"version\":\"2013-08-08T21:22:48.989Z\",\"loadSpec\":{\"type\":\"s3_zip\",\"bucket\":\"static.druid.io\",\"key\":\"data/segments/wikipedia/20130801T000000.000Z_20130802T000000.000Z/2013-08-08T21_22_48.989Z/0/index.zip\"},\"dimensions\":\"dma_code,continent_code,geo,area_code,robot,country_name,network,city,namespace,anonymous,unpatrolled,page,postal_code,language,newpage,user,region_lookup\",\"metrics\":\"count,delta,variation,added,deleted\",\"shardSpec\":{\"type\":\"none\"},\"binaryVersion\":9,\"size\":24664730,\"identifier\":\"wikipedia_2013-08-01T00:00:00.000Z_2013-08-02T00:00:00.000Z_2013-08-08T21:22:48.989Z\"}'); -INSERT INTO druid_tasks (id, created_date, datasource, payload, status_payload, active) VALUES ('index_auth_test_2030-04-30T01:13:31.893Z', '2030-04-30T01:13:31.893Z', 'auth_test', '{\"id\":\"index_auth_test_2030-04-30T01:13:31.893Z\",\"created_date\":\"2030-04-30T01:13:31.893Z\",\"datasource\":\"auth_test\",\"active\":0}', '{\"id\":\"index_auth_test_2030-04-30T01:13:31.893Z\",\"status\":\"SUCCESS\",\"duration\":1}', 0); -INSERT INTO druid_segments (id,dataSource,created_date,start,end,partitioned,version,used,payload) VALUES ('auth_test_2012-12-29T00:00:00.000Z_2013-01-10T08:00:00.000Z_2013-01-10T08:13:47.830Z_v9','auth_test','2013-03-15T20:49:52.348Z','2012-12-29T00:00:00.000Z','2013-01-10T08:00:00.000Z',0,'2013-01-10T08:13:47.830Z_v9',1,'{\"dataSource\":\"auth_test\",\"interval\":\"2012-12-29T00:00:00.000Z/2013-01-10T08:00:00.000Z\",\"version\":\"2013-01-10T08:13:47.830Z_v9\",\"loadSpec\":{\"type\":\"s3_zip\",\"bucket\":\"static.druid.io\",\"key\":\"data/segments/wikipedia_editstream/2012-12-29T00:00:00.000Z_2013-01-10T08:00:00.000Z/2013-01-10T08:13:47.830Z_v9/0/index.zip\"},\"dimensions\":\"anonymous,area_code,city,continent_code,country_name,dma_code,geo,language,namespace,network,newpage,page,postal_code,region_lookup,robot,unpatrolled,user\",\"metrics\":\"added,count,deleted,delta,delta_hist,unique_users,variation\",\"shardSpec\":{\"type\":\"none\"},\"binaryVersion\":9,\"size\":446027801,\"identifier\":\"auth_test_2012-12-29T00:00:00.000Z_2013-01-10T08:00:00.000Z_2013-01-10T08:13:47.830Z_v9\"}'); diff --git a/integration-tests/docker/test-data/security-sample-data.sql b/integration-tests/docker/test-data/security-sample-data.sql new file mode 100644 index 000000000000..f9edf756de9f --- /dev/null +++ b/integration-tests/docker/test-data/security-sample-data.sql @@ -0,0 +1,17 @@ +-- Licensed to the Apache Software Foundation (ASF) under one or more +-- contributor license agreements. See the NOTICE file distributed with +-- this work for additional information regarding copyright ownership. +-- The ASF licenses this file to You under the Apache License, Version 2.0 +-- (the "License"); you may not use this file except in compliance with +-- the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, software +-- distributed under the License is distributed on an "AS IS" BASIS, +-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-- See the License for the specific language governing permissions and +-- limitations under the License. + +INSERT INTO druid_tasks (id, created_date, datasource, payload, status_payload, active) VALUES ('index_auth_test_2030-04-30T01:13:31.893Z', '2030-04-30T01:13:31.893Z', 'auth_test', '{\"id\":\"index_auth_test_2030-04-30T01:13:31.893Z\",\"created_date\":\"2030-04-30T01:13:31.893Z\",\"datasource\":\"auth_test\",\"active\":0}', '{\"id\":\"index_auth_test_2030-04-30T01:13:31.893Z\",\"status\":\"SUCCESS\",\"duration\":1}', 0); +INSERT INTO druid_segments (id,dataSource,created_date,start,end,partitioned,version,used,payload) VALUES ('auth_test_2012-12-29T00:00:00.000Z_2013-01-10T08:00:00.000Z_2013-01-10T08:13:47.830Z_v9','auth_test','2013-03-15T20:49:52.348Z','2012-12-29T00:00:00.000Z','2013-01-10T08:00:00.000Z',0,'2013-01-10T08:13:47.830Z_v9',1,'{\"dataSource\":\"auth_test\",\"interval\":\"2012-12-29T00:00:00.000Z/2013-01-10T08:00:00.000Z\",\"version\":\"2013-01-10T08:13:47.830Z_v9\",\"loadSpec\":{\"type\":\"s3_zip\",\"bucket\":\"static.druid.io\",\"key\":\"data/segments/wikipedia_editstream/2012-12-29T00:00:00.000Z_2013-01-10T08:00:00.000Z/2013-01-10T08:13:47.830Z_v9/0/index.zip\"},\"dimensions\":\"anonymous,area_code,city,continent_code,country_name,dma_code,geo,language,namespace,network,newpage,page,postal_code,region_lookup,robot,unpatrolled,user\",\"metrics\":\"added,count,deleted,delta,delta_hist,unique_users,variation\",\"shardSpec\":{\"type\":\"none\"},\"binaryVersion\":9,\"size\":446027801,\"identifier\":\"auth_test_2012-12-29T00:00:00.000Z_2013-01-10T08:00:00.000Z_2013-01-10T08:13:47.830Z_v9\"}'); diff --git a/integration-tests/pom.xml b/integration-tests/pom.xml index 5ce34b109aa3..e13358ce9663 100644 --- a/integration-tests/pom.xml +++ b/integration-tests/pom.xml @@ -43,6 +43,18 @@ ${project.parent.version} runtime + + org.apache.druid.extensions + druid-azure-extensions + ${project.parent.version} + runtime + + + org.apache.druid.extensions + druid-google-extensions + ${project.parent.version} + runtime + org.apache.druid.extensions druid-datasketches @@ -268,6 +280,7 @@ false + @@ -287,6 +300,7 @@ ${jvm.runtime} ${groups} ${override.config.path} + ${resource.file.dir.path}> ${project.basedir}/run_cluster.sh diff --git a/integration-tests/run_cluster.sh b/integration-tests/run_cluster.sh index 7dabd69ba003..41d2fb9a9ca7 100755 --- a/integration-tests/run_cluster.sh +++ b/integration-tests/run_cluster.sh @@ -50,12 +50,25 @@ mkdir -p $SHARED_DIR/hadoop-dependencies mkdir -p $SHARED_DIR/logs mkdir -p $SHARED_DIR/tasklogs + mkdir -p $SHARED_DIR/docker/extensions + mkdir -p $SHARED_DIR/docker/credentials # install druid jars rm -rf $SHARED_DIR/docker cp -R docker $SHARED_DIR/docker mvn -B dependency:copy-dependencies -DoutputDirectory=$SHARED_DIR/docker/lib + # move extensions into a seperate extension folder + # For druid-s3-extensions + mkdir -p $SHARED_DIR/docker/extensions/druid-s3-extensions + mv $SHARED_DIR/docker/lib/druid-s3-extensions-* $SHARED_DIR/docker/extensions/druid-s3-extensions + # For druid-azure-extensions + mkdir -p $SHARED_DIR/docker/extensions/druid-azure-extensions + mv $SHARED_DIR/docker/lib/druid-azure-extensions-* $SHARED_DIR/docker/extensions/druid-azure-extensions + # For druid-google-extensions + mkdir -p $SHARED_DIR/docker/extensions/druid-google-extensions + mv $SHARED_DIR/docker/lib/druid-google-extensions-* $SHARED_DIR/docker/extensions/druid-google-extensions + # Pull Hadoop dependency if needed if [ -n "$DRUID_INTEGRATION_TEST_START_HADOOP_DOCKER" ] && [ "$DRUID_INTEGRATION_TEST_START_HADOOP_DOCKER" == true ] then @@ -73,8 +86,14 @@ cp ../examples/quickstart/tutorial/wikiticker-2015-09-12-sampled.json.gz $SHARED_DIR/wikiticker-it/wikiticker-2015-09-12-sampled.json.gz cp docker/wiki-simple-lookup.json $SHARED_DIR/wikiticker-it/wiki-simple-lookup.json + # copy other files if needed + if [ -n "$DRUID_INTEGRATION_TEST_RESOURCE_FILE_DIR_PATH" ] + then + cp -a $DRUID_INTEGRATION_TEST_RESOURCE_FILE_DIR_PATH/. $SHARED_DIR/docker/credentials/ + fi + # setup all enviornment variables to be pass to the containers - COMMON_ENV="--env-file=$ENVIRONMENT_CONFIGS_DIR/common" + COMMON_ENV="--env-file=$ENVIRONMENT_CONFIGS_DIR/common -e DRUID_INTEGRATION_TEST_GROUP" BROKER_ENV="--env-file=$ENVIRONMENT_CONFIGS_DIR/broker" COORDINATOR_ENV="--env-file=$ENVIRONMENT_CONFIGS_DIR/coordinator" HISTORICAL_ENV="--env-file=$ENVIRONMENT_CONFIGS_DIR/historical" @@ -89,6 +108,12 @@ if [ -z "$DRUID_INTEGRATION_TEST_OVERRIDE_CONFIG_PATH" ] then echo "\$DRUID_INTEGRATION_TEST_OVERRIDE_CONFIG_PATH is not set. No override config file provided" + if [ "$DRUID_INTEGRATION_TEST_GROUP" = "s3-deep-storage" ] || \ + [ "$DRUID_INTEGRATION_TEST_GROUP" = "gcs-deep-storage" ] || \ + [ "$DRUID_INTEGRATION_TEST_GROUP" = "azure-deep-storage" ]; then + echo "Test group $DRUID_INTEGRATION_TEST_GROUP requires override config file. Stopping test..." + exit 1 + fi else echo "\$DRUID_INTEGRATION_TEST_OVERRIDE_CONFIG_PATH is set with value ${DRUID_INTEGRATION_TEST_OVERRIDE_CONFIG_PATH}" OVERRIDE_ENV="--env-file=$DRUID_INTEGRATION_TEST_OVERRIDE_CONFIG_PATH" diff --git a/integration-tests/src/main/java/org/apache/druid/testing/ConfigFileConfigProvider.java b/integration-tests/src/main/java/org/apache/druid/testing/ConfigFileConfigProvider.java index 976eb894f99f..769b71216114 100644 --- a/integration-tests/src/main/java/org/apache/druid/testing/ConfigFileConfigProvider.java +++ b/integration-tests/src/main/java/org/apache/druid/testing/ConfigFileConfigProvider.java @@ -55,6 +55,8 @@ public class ConfigFileConfigProvider implements IntegrationTestingConfigProvide private Map props = null; private String username; private String password; + private String cloudBucket; + private String cloudPath; @JsonCreator ConfigFileConfigProvider(@JsonProperty("configFile") String configFile) @@ -188,6 +190,9 @@ private void loadProperties(String configFile) password = props.get("password"); + cloudBucket = props.get("cloud_bucket"); + cloudPath = props.get("cloud_path"); + LOG.info("router: [%s], [%s]", routerUrl, routerTLSUrl); LOG.info("broker: [%s], [%s]", brokerUrl, brokerTLSUrl); LOG.info("historical: [%s], [%s]", historicalUrl, historicalTLSUrl); @@ -337,6 +342,18 @@ public String getPassword() return password; } + @Override + public String getCloudBucket() + { + return cloudBucket; + } + + @Override + public String getCloudPath() + { + return cloudPath; + } + @Override public Map getProperties() { diff --git a/integration-tests/src/main/java/org/apache/druid/testing/DockerConfigProvider.java b/integration-tests/src/main/java/org/apache/druid/testing/DockerConfigProvider.java index 7cd8d9363b48..83d80e7870a8 100644 --- a/integration-tests/src/main/java/org/apache/druid/testing/DockerConfigProvider.java +++ b/integration-tests/src/main/java/org/apache/druid/testing/DockerConfigProvider.java @@ -40,6 +40,12 @@ public class DockerConfigProvider implements IntegrationTestingConfigProvider @JsonProperty private String extraDatasourceNameSuffix = ""; + @JsonProperty + private String cloudPath; + + @JsonProperty + private String cloudBucket; + @Override public IntegrationTestingConfig get() { @@ -211,6 +217,18 @@ public String getExtraDatasourceNameSuffix() { return extraDatasourceNameSuffix; } + + @Override + public String getCloudBucket() + { + return cloudBucket; + } + + @Override + public String getCloudPath() + { + return cloudPath; + } }; } } diff --git a/integration-tests/src/main/java/org/apache/druid/testing/IntegrationTestingConfig.java b/integration-tests/src/main/java/org/apache/druid/testing/IntegrationTestingConfig.java index f4e745fca946..d178f90dd1d4 100644 --- a/integration-tests/src/main/java/org/apache/druid/testing/IntegrationTestingConfig.java +++ b/integration-tests/src/main/java/org/apache/druid/testing/IntegrationTestingConfig.java @@ -84,4 +84,8 @@ default String getKafkaInternalHost() boolean manageKafkaTopic(); String getExtraDatasourceNameSuffix(); + + String getCloudBucket(); + + String getCloudPath(); } diff --git a/integration-tests/src/test/java/org/apache/druid/tests/TestNGGroup.java b/integration-tests/src/test/java/org/apache/druid/tests/TestNGGroup.java index ad8a1454a6d0..0903e3bc3dd8 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/TestNGGroup.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/TestNGGroup.java @@ -30,7 +30,23 @@ public class TestNGGroup public static final String KAFKA_INDEX = "kafka-index"; public static final String OTHER_INDEX = "other-index"; public static final String PERFECT_ROLLUP_PARALLEL_BATCH_INDEX = "perfect-rollup-parallel-batch-index"; + // This group can only be run individually using -Dgroups=query since it requires specific test data setup. public static final String QUERY = "query"; public static final String REALTIME_INDEX = "realtime-index"; + // This group can only be run individually using -Dgroups=security since it requires specific test data setup. public static final String SECURITY = "security"; + // This group is not part of CI. To run this group, s3 configs/credentials for your s3 must be provided in a file. + // The path of the file must then we pass to mvn with -Doverride.config.path= + // See integration-tests/docker/environment-configs/override-examples/s3 for env vars to provide. + public static final String S3_DEEP_STORAGE = "s3-deep-storage"; + // This group is not part of CI. To run this group, gcs configs/credentials for your gcs must be provided in a file. + // The path of the file must then we pass to mvn with -Doverride.config.path= + // See integration-tests/docker/environment-configs/override-examples/gcs for env vars to provide. + // The path to the folder that contains your GOOGLE_APPLICATION_CREDENTIALS file must also be pass + // to mvn with -Dresource.file.dir.path= + public static final String GCS_DEEP_STORAGE = "gcs-deep-storage"; + // This group is not part of CI. To run this group, azure configs/credentials for your azure must be provided in a file. + // The path of the file must then we pass to mvn with -Doverride.config.path= + // See integration-tests/docker/environment-configs/override-examples/azures for env vars to provide. + public static final String AZURE_DEEP_STORAGE = "azure-deep-storage"; } diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITAzureParallelIndexTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITAzureParallelIndexTest.java new file mode 100644 index 000000000000..97a8254647f2 --- /dev/null +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITAzureParallelIndexTest.java @@ -0,0 +1,145 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.tests.indexer; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import org.apache.druid.indexer.partitions.DynamicPartitionsSpec; +import org.apache.druid.java.util.common.Pair; +import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.testing.guice.DruidTestModuleFactory; +import org.apache.druid.tests.TestNGGroup; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Guice; +import org.testng.annotations.Test; + +import java.io.Closeable; +import java.util.List; +import java.util.UUID; +import java.util.function.Function; + +/** + * IMPORTANT: + * To run this test, you must: + * 1) Set the bucket and path for your data. This can be done by setting -Ddruid.test.config.cloudBucket and + * -Ddruid.test.config.cloudPath or setting "cloud_bucket" and "cloud_path" in the config file. + * 2) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json + * located in integration-tests/src/test/resources/data/batch_index to your Azure at the location set in step 1. + * 3) Provide -Doverride.config.path= with Azure credentials/configs set. See + * integration-tests/docker/environment-configs/override-examples/azure for env vars to provide. + */ +@Test(groups = TestNGGroup.AZURE_DEEP_STORAGE) +@Guice(moduleFactory = DruidTestModuleFactory.class) +public class ITAzureParallelIndexTest extends AbstractITBatchIndexTest +{ + private static final String INDEX_TASK = "/indexer/wikipedia_cloud_index_task.json"; + private static final String INDEX_QUERIES_RESOURCE = "/indexer/wikipedia_index_queries.json"; + private static final String INDEX_DATASOURCE = "wikipedia_index_test_" + UUID.randomUUID(); + private static final String INPUT_SOURCE_URIS_KEY = "uris"; + private static final String INPUT_SOURCE_PREFIXES_KEY = "prefixes"; + private static final String INPUT_SOURCE_OBJECTS_KEY = "objects"; + private static final String WIKIPEDIA_DATA_1 = "wikipedia_index_data1.json"; + private static final String WIKIPEDIA_DATA_2 = "wikipedia_index_data2.json"; + private static final String WIKIPEDIA_DATA_3 = "wikipedia_index_data3.json"; + + @DataProvider + public static Object[][] resources() + { + return new Object[][]{ + {new Pair<>(INPUT_SOURCE_URIS_KEY, + ImmutableList.of( + "azure://%%BUCKET%%/%%PATH%%" + WIKIPEDIA_DATA_1, + "azure://%%BUCKET%%/%%PATH%%" + WIKIPEDIA_DATA_2, + "azure://%%BUCKET%%/%%PATH%%" + WIKIPEDIA_DATA_3 + ) + )}, + {new Pair<>(INPUT_SOURCE_PREFIXES_KEY, + ImmutableList.of( + "azure://%%BUCKET%%/%%PATH%%" + ) + )}, + {new Pair<>(INPUT_SOURCE_OBJECTS_KEY, + ImmutableList.of( + ImmutableMap.of("bucket", "%%BUCKET%%", "path", "%%PATH%%" + WIKIPEDIA_DATA_1), + ImmutableMap.of("bucket", "%%BUCKET%%", "path", "%%PATH%%" + WIKIPEDIA_DATA_2), + ImmutableMap.of("bucket", "%%BUCKET%%", "path", "%%PATH%%" + WIKIPEDIA_DATA_3) + ) + )} + }; + } + + @Test(dataProvider = "resources") + public void testAzureIndexData(Pair azureInputSource) throws Exception + { + try ( + final Closeable ignored1 = unloader(INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix()); + ) { + final Function azurePropsTransform = spec -> { + try { + String inputSourceValue = jsonMapper.writeValueAsString(azureInputSource.rhs); + inputSourceValue = StringUtils.replace( + inputSourceValue, + "%%BUCKET%%", + config.getCloudBucket() + ); + inputSourceValue = StringUtils.replace( + inputSourceValue, + "%%PATH%%", + config.getCloudPath() + ); + + spec = StringUtils.replace( + spec, + "%%PARTITIONS_SPEC%%", + jsonMapper.writeValueAsString(new DynamicPartitionsSpec(null, null)) + ); + spec = StringUtils.replace( + spec, + "%%INPUT_SOURCE_TYPE%%", + "azure" + ); + spec = StringUtils.replace( + spec, + "%%INPUT_SOURCE_PROPERTY_KEY%%", + azureInputSource.lhs + ); + return StringUtils.replace( + spec, + "%%INPUT_SOURCE_PROPERTY_VALUE%%", + inputSourceValue + ); + } + catch (Exception e) { + throw new RuntimeException(e); + } + }; + + doIndexTest( + INDEX_DATASOURCE, + INDEX_TASK, + azurePropsTransform, + INDEX_QUERIES_RESOURCE, + false, + true, + true + ); + } + } +} diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITGcsParallelIndexTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITGcsParallelIndexTest.java new file mode 100644 index 000000000000..3cbf1ad26f0d --- /dev/null +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITGcsParallelIndexTest.java @@ -0,0 +1,146 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.tests.indexer; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import org.apache.druid.indexer.partitions.DynamicPartitionsSpec; +import org.apache.druid.java.util.common.Pair; +import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.testing.guice.DruidTestModuleFactory; +import org.apache.druid.tests.TestNGGroup; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Guice; +import org.testng.annotations.Test; + +import java.io.Closeable; +import java.util.List; +import java.util.UUID; +import java.util.function.Function; + +/** + * IMPORTANT: + * To run this test, you must: + * 1) Set the bucket and path for your data. This can be done by setting -Ddruid.test.config.cloudBucket and + * -Ddruid.test.config.cloudPath or setting "cloud_bucket" and "cloud_path" in the config file. + * 2) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json + * located in integration-tests/src/test/resources/data/batch_index to your GCS at the location set in step 1. + * 3) Provide -Doverride.config.path= with gcs configs set. See + * integration-tests/docker/environment-configs/override-examples/gcs for env vars to provide. + * 4) Provide -Dresource.file.dir.path= with folder that contains GOOGLE_APPLICATION_CREDENTIALS file + */ +@Test(groups = TestNGGroup.GCS_DEEP_STORAGE) +@Guice(moduleFactory = DruidTestModuleFactory.class) +public class ITGcsParallelIndexTest extends AbstractITBatchIndexTest +{ + private static final String INDEX_TASK = "/indexer/wikipedia_cloud_index_task.json"; + private static final String INDEX_QUERIES_RESOURCE = "/indexer/wikipedia_index_queries.json"; + private static final String INDEX_DATASOURCE = "wikipedia_index_test_" + UUID.randomUUID(); + private static final String INPUT_SOURCE_URIS_KEY = "uris"; + private static final String INPUT_SOURCE_PREFIXES_KEY = "prefixes"; + private static final String INPUT_SOURCE_OBJECTS_KEY = "objects"; + private static final String WIKIPEDIA_DATA_1 = "wikipedia_index_data1.json"; + private static final String WIKIPEDIA_DATA_2 = "wikipedia_index_data2.json"; + private static final String WIKIPEDIA_DATA_3 = "wikipedia_index_data3.json"; + + @DataProvider + public static Object[][] resources() + { + return new Object[][]{ + {new Pair<>(INPUT_SOURCE_URIS_KEY, + ImmutableList.of( + "gs://%%BUCKET%%/%%PATH%%" + WIKIPEDIA_DATA_1, + "gs://%%BUCKET%%/%%PATH%%" + WIKIPEDIA_DATA_2, + "gs://%%BUCKET%%/%%PATH%%" + WIKIPEDIA_DATA_3 + ) + )}, + {new Pair<>(INPUT_SOURCE_PREFIXES_KEY, + ImmutableList.of( + "gs://%%BUCKET%%/%%PATH%%" + ) + )}, + {new Pair<>(INPUT_SOURCE_OBJECTS_KEY, + ImmutableList.of( + ImmutableMap.of("bucket", "%%BUCKET%%", "path", "%%PATH%%" + WIKIPEDIA_DATA_1), + ImmutableMap.of("bucket", "%%BUCKET%%", "path", "%%PATH%%" + WIKIPEDIA_DATA_2), + ImmutableMap.of("bucket", "%%BUCKET%%", "path", "%%PATH%%" + WIKIPEDIA_DATA_3) + ) + )} + }; + } + + @Test(dataProvider = "resources") + public void testGcsIndexData(Pair gcsInputSource) throws Exception + { + try ( + final Closeable ignored1 = unloader(INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix()); + ) { + final Function gcsPropsTransform = spec -> { + try { + String inputSourceValue = jsonMapper.writeValueAsString(gcsInputSource.rhs); + inputSourceValue = StringUtils.replace( + inputSourceValue, + "%%BUCKET%%", + config.getCloudBucket() + ); + inputSourceValue = StringUtils.replace( + inputSourceValue, + "%%PATH%%", + config.getCloudPath() + ); + + spec = StringUtils.replace( + spec, + "%%PARTITIONS_SPEC%%", + jsonMapper.writeValueAsString(new DynamicPartitionsSpec(null, null)) + ); + spec = StringUtils.replace( + spec, + "%%INPUT_SOURCE_TYPE%%", + "google" + ); + spec = StringUtils.replace( + spec, + "%%INPUT_SOURCE_PROPERTY_KEY%%", + gcsInputSource.lhs + ); + return StringUtils.replace( + spec, + "%%INPUT_SOURCE_PROPERTY_VALUE%%", + inputSourceValue + ); + } + catch (Exception e) { + throw new RuntimeException(e); + } + }; + + doIndexTest( + INDEX_DATASOURCE, + INDEX_TASK, + gcsPropsTransform, + INDEX_QUERIES_RESOURCE, + false, + true, + true + ); + } + } +} diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITS3ParallelIndexTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITS3ParallelIndexTest.java new file mode 100644 index 000000000000..8e6381d6473c --- /dev/null +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITS3ParallelIndexTest.java @@ -0,0 +1,145 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.tests.indexer; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import org.apache.druid.indexer.partitions.DynamicPartitionsSpec; +import org.apache.druid.java.util.common.Pair; +import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.testing.guice.DruidTestModuleFactory; +import org.apache.druid.tests.TestNGGroup; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Guice; +import org.testng.annotations.Test; + +import java.io.Closeable; +import java.util.List; +import java.util.UUID; +import java.util.function.Function; + +/** + * IMPORTANT: + * To run this test, you must: + * 1) Set the bucket and path for your data. This can be done by setting -Ddruid.test.config.cloudBucket and + * -Ddruid.test.config.cloudPath or setting "cloud_bucket" and "cloud_path" in the config file. + * 2) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json + * located in integration-tests/src/test/resources/data/batch_index to your S3 at the location set in step 1. + * 3) Provide -Doverride.config.path= with s3 credentials/configs set. See + * integration-tests/docker/environment-configs/override-examples/s3 for env vars to provide. + */ +@Test(groups = TestNGGroup.S3_DEEP_STORAGE) +@Guice(moduleFactory = DruidTestModuleFactory.class) +public class ITS3ParallelIndexTest extends AbstractITBatchIndexTest +{ + private static final String INDEX_TASK = "/indexer/wikipedia_cloud_index_task.json"; + private static final String INDEX_QUERIES_RESOURCE = "/indexer/wikipedia_index_queries.json"; + private static final String INDEX_DATASOURCE = "wikipedia_index_test_" + UUID.randomUUID(); + private static final String INPUT_SOURCE_URIS_KEY = "uris"; + private static final String INPUT_SOURCE_PREFIXES_KEY = "prefixes"; + private static final String INPUT_SOURCE_OBJECTS_KEY = "objects"; + private static final String WIKIPEDIA_DATA_1 = "wikipedia_index_data1.json"; + private static final String WIKIPEDIA_DATA_2 = "wikipedia_index_data2.json"; + private static final String WIKIPEDIA_DATA_3 = "wikipedia_index_data3.json"; + + @DataProvider + public static Object[][] resources() + { + return new Object[][]{ + {new Pair<>(INPUT_SOURCE_URIS_KEY, + ImmutableList.of( + "s3://%%BUCKET%%/%%PATH%%" + WIKIPEDIA_DATA_1, + "s3://%%BUCKET%%/%%PATH%%" + WIKIPEDIA_DATA_2, + "s3://%%BUCKET%%/%%PATH%%" + WIKIPEDIA_DATA_3 + ) + )}, + {new Pair<>(INPUT_SOURCE_PREFIXES_KEY, + ImmutableList.of( + "s3://%%BUCKET%%/%%PATH%%" + ) + )}, + {new Pair<>(INPUT_SOURCE_OBJECTS_KEY, + ImmutableList.of( + ImmutableMap.of("bucket", "%%BUCKET%%", "path", "%%PATH%%" + WIKIPEDIA_DATA_1), + ImmutableMap.of("bucket", "%%BUCKET%%", "path", "%%PATH%%" + WIKIPEDIA_DATA_2), + ImmutableMap.of("bucket", "%%BUCKET%%", "path", "%%PATH%%" + WIKIPEDIA_DATA_3) + ) + )} + }; + } + + @Test(dataProvider = "resources") + public void testS3IndexData(Pair s3InputSource) throws Exception + { + try ( + final Closeable ignored1 = unloader(INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix()); + ) { + final Function s3PropsTransform = spec -> { + try { + String inputSourceValue = jsonMapper.writeValueAsString(s3InputSource.rhs); + inputSourceValue = StringUtils.replace( + inputSourceValue, + "%%BUCKET%%", + config.getCloudBucket() + ); + inputSourceValue = StringUtils.replace( + inputSourceValue, + "%%PATH%%", + config.getCloudPath() + ); + + spec = StringUtils.replace( + spec, + "%%PARTITIONS_SPEC%%", + jsonMapper.writeValueAsString(new DynamicPartitionsSpec(null, null)) + ); + spec = StringUtils.replace( + spec, + "%%INPUT_SOURCE_TYPE%%", + "s3" + ); + spec = StringUtils.replace( + spec, + "%%INPUT_SOURCE_PROPERTY_KEY%%", + s3InputSource.lhs + ); + return StringUtils.replace( + spec, + "%%INPUT_SOURCE_PROPERTY_VALUE%%", + inputSourceValue + ); + } + catch (Exception e) { + throw new RuntimeException(e); + } + }; + + doIndexTest( + INDEX_DATASOURCE, + INDEX_TASK, + s3PropsTransform, + INDEX_QUERIES_RESOURCE, + false, + true, + true + ); + } + } +} diff --git a/integration-tests/src/test/resources/indexer/wikipedia_cloud_index_task.json b/integration-tests/src/test/resources/indexer/wikipedia_cloud_index_task.json new file mode 100644 index 000000000000..23f93cd1f9f8 --- /dev/null +++ b/integration-tests/src/test/resources/indexer/wikipedia_cloud_index_task.json @@ -0,0 +1,87 @@ +{ + "type": "index_parallel", + "spec": { + "dataSchema": { + "dataSource": "%%DATASOURCE%%", + "timestampSpec": { + "column": "timestamp" + }, + "dimensionsSpec": { + "dimensions": [ + "page", + {"type": "string", "name": "language", "createBitmapIndex": false}, + "user", + "unpatrolled", + "newPage", + "robot", + "anonymous", + "namespace", + "continent", + "country", + "region", + "city" + ] + }, + "metricsSpec": [ + { + "type": "count", + "name": "count" + }, + { + "type": "doubleSum", + "name": "added", + "fieldName": "added" + }, + { + "type": "doubleSum", + "name": "deleted", + "fieldName": "deleted" + }, + { + "type": "doubleSum", + "name": "delta", + "fieldName": "delta" + }, + { + "name": "thetaSketch", + "type": "thetaSketch", + "fieldName": "user" + }, + { + "name": "quantilesDoublesSketch", + "type": "quantilesDoublesSketch", + "fieldName": "delta" + }, + { + "name": "HLLSketchBuild", + "type": "HLLSketchBuild", + "fieldName": "user" + } + ], + "granularitySpec": { + "segmentGranularity": "DAY", + "queryGranularity": "second", + "intervals" : [ "2013-08-31/2013-09-02" ] + } + }, + "ioConfig": { + "type": "index_parallel", + "inputSource": { + "type": "%%INPUT_SOURCE_TYPE%%", + "%%INPUT_SOURCE_PROPERTY_KEY%%": %%INPUT_SOURCE_PROPERTY_VALUE%% + }, + "inputFormat": { + "type": "json" + } + }, + "tuningConfig": { + "type": "index_parallel", + "maxNumConcurrentSubTasks": 10, + "partitionsSpec": %%PARTITIONS_SPEC%%, + "splitHintSpec": { + "type": "maxSize", + "maxSplitSize": 1 + } + } + } +} \ No newline at end of file