diff --git a/distribution/pom.xml b/distribution/pom.xml index d16b3b8fa86d..d58e9f2782d7 100644 --- a/distribution/pom.xml +++ b/distribution/pom.xml @@ -38,24 +38,69 @@ druid-services ${project.parent.version} - - io.druid - extensions-distribution - ${project.parent.version} - extensions-repo - zip - + + org.codehaus.mojo + exec-maven-plugin + + + install + + exec + + + java + + -classpath + + -Ddruid.extensions.loadList=[] + io.druid.cli.Main + tools + pull-deps + --clean + --defaultVersion + ${project.parent.version} + -c + io.druid.extensions:druid-examples + -c + io.druid.extensions:druid-azure-extensions + -c + io.druid.extensions:druid-cassandra-storage + -c + io.druid.extensions:druid-hdfs-storage + -c + io.druid.extensions:druid-histogram + -c + io.druid.extensions:druid-kafka-eight + -c + io.druid.extensions:druid-kafka-eight-simple-consumer + -c + io.druid.extensions:druid-kafka-extraction-namespace + -c + io.druid.extensions:mysql-metadata-storage + -c + io.druid.extensions:druid-namespace-lookup + -c + io.druid.extensions:postgresql-metadata-storage + -c + io.druid.extensions:druid-rabbitmq + -c + io.druid.extensions:druid-s3-extensions + + + + + org.apache.maven.plugins maven-assembly-plugin distro-assembly - package + install single @@ -67,6 +112,20 @@ + + mysql-distro-assembly + install + + single + + + mysql-metdata-storage + posix + + src/assembly/mysql_assembly.xml + + + @@ -81,6 +140,20 @@ + + org.apache.maven.plugins + maven-clean-plugin + + + + ${project.basedir}/druid_extensions + + + ${project.basedir}/hadoop_druid_dependencies + + + + diff --git a/distribution/src/assembly/assembly.xml b/distribution/src/assembly/assembly.xml index 6ea46b3a7ee6..caba48d4e2bc 100644 --- a/distribution/src/assembly/assembly.xml +++ b/distribution/src/assembly/assembly.xml @@ -24,6 +24,23 @@ tar.gz + + druid_extensions + + */* + + + mysql-metadata-storage/** + + druid_extensions + + + hadoop_druid_dependencies + + */*/* + + hadoop_druid_dependencies + ../examples/config diff --git a/distribution/src/assembly/mysql_assembly.xml b/distribution/src/assembly/mysql_assembly.xml new file mode 100644 index 000000000000..8335ea3fea6d --- /dev/null +++ b/distribution/src/assembly/mysql_assembly.xml @@ -0,0 +1,35 @@ + + + + + bin + + tar.gz + + + + druid_extensions/mysql-metadata-storage + + * + + ./ + + + diff --git a/docs/content/configuration/index.md b/docs/content/configuration/index.md index 708755e59311..b15f86e84764 100644 --- a/docs/content/configuration/index.md +++ b/docs/content/configuration/index.md @@ -21,10 +21,9 @@ Many of Druid's external dependencies can be plugged in as modules. Extensions c |Property|Description|Default| |--------|-----------|-------| -|`druid.extensions.remoteRepositories`|This is a JSON Array list of remote repositories to load dependencies from. If this is not set to '[]', Druid will try to download extensions at the specified remote repository.|["http://repo1.maven.org/maven2/", "https://metamx.artifactoryonline.com/metamx/pub-libs-releases-local"]| -|`druid.extensions.localRepository`|. The way maven gets dependencies is that it downloads them to a "local repository" on your local disk and then collects the paths to each of the jars. This specifies the directory to consider the "local repository". If this is set, remoteRepositories is not required.|`~/.m2/repository`| -|`druid.extensions.coordinates`|This is a JSON array of "groupId:artifactId[:version]" maven coordinates. For artifacts without version specified, Druid will append the default version. Notice: extensions explicitly specified in this property will have precedence over ones included in the classpath when Druid loads extensions. If there are duplicate extensions, Druid will only load ones explicitly specified here|[]| -|`druid.extensions.defaultVersion`|Version to use for extension artifacts without version information.|`druid-server` artifact version.| +|`druid.extensions.directory`|The root extension directory where user can put extensions related files. Druid will load extensions stored under this directory.|`druid_extensions` (This is a relative path to Druid's working directory)| +|`druid.extensions.hadoopDependenciesDir`|The root hadoop dependencies directory where user can put hadoop related dependencies files. Druid will load the dependencies based on the hadoop coordinate specified in the hadoop index task.|`hadoop_druid_dependencies` (This is a relative path to Druid's working directory| +|`druid.extensions.loadList`|A JSON array of extensions to load from extension directories by Druid. If it is not specified, its value will be `null` and Druid will load all the extensions under `druid.extensions.directory`. If its value is empty list `[]`, then no extensions will be loaded at all.|null| |`druid.extensions.searchCurrentClassloader`|This is a boolean flag that determines if Druid will search the main classloader for extensions. It defaults to true but can be turned off if you have reason to not automatically add all modules on the classpath.|true| ### Zookeeper diff --git a/docs/content/dependencies/metadata-storage.md b/docs/content/dependencies/metadata-storage.md index f26d6cfae095..9ab710c86383 100644 --- a/docs/content/dependencies/metadata-storage.md +++ b/docs/content/dependencies/metadata-storage.md @@ -15,8 +15,13 @@ The following metadata storage engines are supported: * MySQL (io.druid.extensions:mysql-metadata-storage) * PostgreSQL (io.druid.extensions:postgresql-metadata-storage) -To choose a metadata storage, set the `druid.extensions` configuration to -include the extension for the metadata storage you plan to use. +To choose a metadata storage, + +1. Make sure Druid can pick up the extension files from either classpath or +extensions directory, see [Including Extensions](../operations/including-extensions.html) for more information. + +2. set the `druid.extensions` configuration to include the extension for the +metadata storage you plan to use. See below. ## Setting up MySQL @@ -55,13 +60,18 @@ include the extension for the metadata storage you plan to use. with the hostname of the database. ```properties - druid.extensions.coordinates=[\"io.druid.extensions:mysql-metadata-storage"] + druid.extensions.loadList=["mysql-metadata-storage"] druid.metadata.storage.type=mysql druid.metadata.storage.connector.connectURI=jdbc:mysql:///druid_test druid.metadata.storage.connector.user=druid druid.metadata.storage.connector.password=diurd ``` + Note: metadata storage extension is not packaged within the main Druid tarball, it is + packaged in a separate tarball that can be downloaded from [here](http://druid.io/downloads.html). + However, you can always get it using [pull-deps](../pull-deps.html), or you can even build + it from source code, see [Build from Source](../development/build.html) + ## Setting up PostgreSQL 1. Install PostgreSQL @@ -97,7 +107,7 @@ include the extension for the metadata storage you plan to use. with the hostname of the database. ```properties - druid.extensions.coordinates=[\"io.druid.extensions:postgresql-metadata-storage"] + druid.extensions.loadList=["postgresql-metadata-storage"] druid.metadata.storage.type=postgresql druid.metadata.storage.connector.connectURI=jdbc:postgresql:///druid_test druid.metadata.storage.connector.user=druid diff --git a/docs/content/development/build.md b/docs/content/development/build.md index d88aad1c24b6..3db65fd7a930 100644 --- a/docs/content/development/build.md +++ b/docs/content/development/build.md @@ -16,11 +16,14 @@ To do so, run these commands: ``` git clone git@github.com:druid-io/druid.git cd druid -mvn clean package +mvn clean install ``` This will compile the project and create the Druid binary distribution tar under -`services/target/druid-VERSION-bin.tar.gz`. +`distribution/target/druid-VERSION-bin.tar.gz`. + +This will also create a tarball that contains `mysql-metadata-storage` extension under + `distribution/target/mysql-metdata-storage-bin.tar.gz`. If you want Druid to load `mysql-metadata-storage`, you can first untar `druid-VERSION-bin.tar.gz`, then go to ```druid-/druid_extensions```, untar `mysql-metdata-storage-bin.tar.gz` there. Now just specifiy `mysql-metadata-storage` in `druid.extensions.loadList` so that Druid will pick it up. See [Including Extensions](../operations/including-extensions.html) for more infomation. You can find the example executables in the examples/bin directory: diff --git a/docs/content/ingestion/batch-ingestion.md b/docs/content/ingestion/batch-ingestion.md index 027230a1bd35..9f621de9bb3c 100644 --- a/docs/content/ingestion/batch-ingestion.md +++ b/docs/content/ingestion/batch-ingestion.md @@ -413,7 +413,7 @@ The tuningConfig is optional and default parameters will be used if no tuningCon ### Running the Task -The Hadoop Index Config submitted as part of an Hadoop Index Task is identical to the Hadoop Index Config used by the `HadoopDruidIndexer` except that three fields must be omitted: `segmentOutputPath`, `workingPath`, `updaterJobSpec`. The Indexing Service takes care of setting these fields internally. +The Hadoop Index Config submitted as part of an Hadoop Index Task is identical to the Hadoop Index Config used by the `HadoopDruidIndexer` except that three fields must be omitted: `segmentOutputPath`, `workingPath`, `metadataUpdateSpec`. The Indexing Service takes care of setting these fields internally. To run the task: diff --git a/docs/content/misc/tasks.md b/docs/content/misc/tasks.md index 8c116e248f24..8d3cc0cc9a63 100644 --- a/docs/content/misc/tasks.md +++ b/docs/content/misc/tasks.md @@ -123,7 +123,7 @@ The indexSpec is optional and default parameters will be used if not specified. |dimensionCompression|compression format for dimension columns (currently only affects single-value dimensions, multi-value dimensions are always uncompressed)|`"uncompressed"`, `"lz4"`, `"lzf"`|`"lz4"`|no| |metricCompression|compression format for metric columns, defaults to LZ4|`"lz4"`, `"lzf"`|`"lz4"`|no| -### Index Hadoop Task +### Hadoop Index Task The Hadoop Index Task is used to index larger data sets that require the parallelization and processing power of a Hadoop cluster. @@ -138,14 +138,17 @@ The Hadoop Index Task is used to index larger data sets that require the paralle |--------|-----------|---------| |type|The task type, this should always be "index_hadoop".|yes| |spec|A Hadoop Index Spec. See [Batch Ingestion](../ingestion/batch-ingestion.html)|yes| -|hadoopCoordinates|The Maven \:\:\ of Hadoop to use. The default is "org.apache.hadoop:hadoop-client:2.3.0".|no| +|hadoopDependencyCoordinates|A JSON array of Hadoop dependency coordinates that Druid will use, this property will override the default Hadoop coordinates. Once specified, Druid will look for those Hadoop dependencies from the location specified by `druid.extensions.hadoopDependenciesDir`|no| +|classpathPrefix|Classpath that will be pre-appended for the peon process.|no| +The Hadoop Index Config submitted as part of an Hadoop Index Task is identical to the Hadoop Index Config used by the `HadoopDruidIndexer` except that three fields must be omitted: `segmentOutputPath`, `workingPath`, `metadataUpdateSpec`. The Indexing Service takes care of setting these fields internally. -The Hadoop Index Config submitted as part of an Hadoop Index Task is identical to the Hadoop Index Config used by the `HadoopDruidIndexer` except that three fields must be omitted: `segmentOutputPath`, `workingPath`, `updaterJobSpec`. The Indexing Service takes care of setting these fields internally. +Note: Before using Hadoop Index Task, please make sure to include Hadoop dependencies so that Druid knows where to pick them up during runtime, see [Include Hadoop Dependencies](../operations/other-hadoop.html). +Druid uses hadoop-client 2.3.0 as the default Hadoop version, you can get it from the released Druid tarball(under folder ```hadoop_druid_dependencies```) or use [pull-deps](../pull-deps.html). #### Using your own Hadoop distribution -Druid is compiled against Apache hadoop-client 2.3.0. However, if you happen to use a different flavor of hadoop that is API compatible with hadoop-client 2.3.0, you should only have to change the hadoopCoordinates property to point to the maven artifact used by your distribution. For non-API compatible versions, please see [here](../operations/other-hadoop.html). +Druid is compiled against Apache hadoop-client 2.3.0. However, if you happen to use a different flavor of Hadoop that is API compatible with hadoop-client 2.3.0, you should first make sure Druid knows where to pick it up, then you should only have to change the `hadoopDependencyCoordinates` property to point to the list of maven artifact used by your distribution. For non-API compatible versions and more information, please see [here](../operations/other-hadoop.html). #### Resolving dependency conflicts running HadoopIndexTask diff --git a/docs/content/operations/including-extensions.md b/docs/content/operations/including-extensions.md index d40d8ebb16a1..40588e7174cf 100644 --- a/docs/content/operations/including-extensions.md +++ b/docs/content/operations/including-extensions.md @@ -13,22 +13,58 @@ Druid extensions can be specified in the `common.runtime.properties`. There are If you add your extension jar to the classpath at runtime, Druid will load it into the system. This mechanism is relatively easy to reason about, but it also means that you have to ensure that all dependency jars on the classpath are compatible. That is, Druid makes no provisions while using this method to maintain class loader isolation so you must make sure that the jars on your classpath are mutually compatible. -### Specify maven coordinates +### Add to the extension directory -Druid has the ability to automatically load extension jars from maven at runtime. With this mechanism, Druid also loads up the dependencies of the extension jar into an isolated class loader. That means that your extension can depend on a different version of a library that Druid also uses and both can co-exist. +If you don't want to fiddle with classpath, you can create an extension directory and tell Druid to load extensions from there. -### I want classloader isolation, but I don't want my production machines downloading their own dependencies. What should I do? +To let Druid load your extensions, follow the steps below -If you want to take advantage of the maven-based classloader isolation but you are also rightly frightened by the prospect of each of your production machines downloading their own dependencies on deploy, this section is for you. +1) Specify `druid.extensions.directory` (root directory for normal Druid extensions). If you don' specify it, Druid will use their default value, see [Configuration](../configuration/index.html). -The trick to doing this is +2) Prepare normal extension directories under root extension directory. Under the root extension directory, you should create sub-directories for each extension you might want to load. Inside each sub-directory, you can put extension related files in it. (If you don't want to manually setup the extension directory, Druid also provides a [pull-deps](../pull-deps.html) tool that can help you genereate these directories automatically) -1) Specify a local directory for `druid.extensions.localRepository` +Example: -2) Run the `tools pull-deps` command to pull all the specified dependencies down into your local repository +Suppose you specify `druid.extensions.directory=/usr/local/druid/druid_extensions`, and want Druid to load normal extensions ```druid-examples```, ```druid-kafka-eight``` and ```mysql-metadata-storage```. -3) Bundle up the local repository along with your other Druid stuff into whatever you use for a deployable artifact +Then under ```druid_extensions```, it should look like this, -4) Run Your druid processes with `druid.extensions.remoteRepositories=[]` and a local repository set to wherever your bundled "local" repository is located +``` +druid_extensions/ +├── druid-examples +│   ├── commons-beanutils-1.8.3.jar +│   ├── commons-digester-1.8.jar +│   ├── commons-logging-1.1.1.jar +│   ├── commons-validator-1.4.0.jar +│   ├── druid-examples-0.8.0-rc1.jar +│   ├── twitter4j-async-3.0.3.jar +│   ├── twitter4j-core-3.0.3.jar +│   └── twitter4j-stream-3.0.3.jar +├── druid-kafka-eight +│   ├── druid-kafka-eight-0.7.3.jar +│   ├── jline-0.9.94.jar +│   ├── jopt-simple-3.2.jar +│   ├── kafka-clients-0.8.2.1.jar +│   ├── kafka_2.10-0.8.2.1.jar +│   ├── log4j-1.2.16.jar +│   ├── lz4-1.3.0.jar +│   ├── metrics-core-2.2.0.jar +│   ├── netty-3.7.0.Final.jar +│   ├── scala-library-2.10.4.jar +│   ├── slf4j-log4j12-1.6.1.jar +│   ├── snappy-java-1.1.1.6.jar +│   ├── zkclient-0.3.jar +│   └── zookeeper-3.4.6.jar +└── mysql-metadata-storage + ├── jdbi-2.32.jar + ├── mysql-connector-java-5.1.34.jar + └── mysql-metadata-storage-0.8.0-rc1.jar +``` -The Druid processes will then only load up jars from the local repository and will not try to go out onto the internet to find the maven dependencies. +As you can see, under ```druid_extensions``` there are three sub-directories ```druid-examples```, ```druid-kafka-eight``` and ```mysql-metadata-storage```, each sub-directory denotes an extension that Druid might load. + +3) Tell Druid which extensions to load. Now you have prepared your extension directories, if you want Druid to load a specific list of extensions under root extension directory, you need to specify `druid.extensions.loadList`. Using the example above, if you want Druid to load ```druid-kafka-eight``` and ```mysql-metadata-storage```, you can specify `druid.extensions.loadList=["druid-kafka-eight", "mysql-metadata-storage"]`. + +If you specify `druid.extensions.loadList=[]`, Druid won't load any extension from file system. + +If you don't specify `druid.extensions.loadList`, Druid will load all the extensions under root extension directory. diff --git a/docs/content/operations/other-hadoop.md b/docs/content/operations/other-hadoop.md index 1026a54ddb21..d686b159446a 100644 --- a/docs/content/operations/other-hadoop.md +++ b/docs/content/operations/other-hadoop.md @@ -1,21 +1,72 @@ --- layout: doc_page --- -Working with different versions of Hadoop may require a bit of extra work for the time being. We will make changes to support different Hadoop versions in the near future. If you have problems outside of these instructions, please feel free to contact us in IRC or on the [forum](https://groups.google.com/forum/#!forum/druid-development). +# Work with different versions of Hadoop -Working with Hadoop 2.x ------------------------ -The default version of Hadoop bundled with Druid is 2.3. This should work out of the box. +## Include Hadoop dependencies -To override the default Hadoop version, both the Hadoop Index Task and the standalone Hadoop indexer support the parameter `hadoopDependencyCoordinates`. You can pass another set of Hadoop coordinates through this parameter (e.g. You can specify coordinates for Hadoop 2.4.0 as `["org.apache.hadoop:hadoop-client:2.4.0"]`). +There are two different ways to let Druid pick up your Hadoop version, choose the one that fits your need. + +### Add your Hadoop dependencies to the Hadoop dependencies directory + +You can create a Hadoop dependency directory and tell Druid to load your Hadoop jars from there. + +To make this work, follow the steps below + +(1) Specify `druid.extensions.hadoopDependenciesDir` (root directory for Hadoop related dependencies). If you don't specify it, Druid will use its default value, see [Configuration](../configuration/index.html). + +(2) Set-up Hadoop dependencies directories under root Hadoop dependency directory. Under the root directory, you should create sub-directories for each Hadoop dependencies. Inside each sub-directory, created a sub-sub-directory whose name is the version of Hadoop it contains, and inside that sub-sub-directory, put Hadoop jars in it. This file structure is almost same as normal Druid extensions described in [Including-Extensions](../including-extensions.html), except that there is an extra layer of folder that specifies the version of Hadoop. (If you don't want to manually setup this directory, Druid also provides a [pull-deps](../pull-deps.html) tool that can help you generate these directories automatically) + +Example: + +Suppose you specify `druid.extensions.hadoopDependenciesDir=/usr/local/druid/hadoop_druid_dependencies`, and you want to prepare both `hadoop-client` 2.3.0 and 2.4.0 for Druid, + +Then you can either use [pull-deps](../pull-deps.html) or manually set up Hadoop dependencies directories such that under ```hadoop_druid_dependencies```, it looks like this, + +``` +hadoop_druid_dependencies/ +└── hadoop-client + ├── 2.3.0 + │   ├── activation-1.1.jar + │   ├── avro-1.7.4.jar + │   ├── commons-beanutils-1.7.0.jar + │   ├── commons-beanutils-core-1.8.0.jar + │   ├── commons-cli-1.2.jar + │   ├── commons-codec-1.4.jar + ..... lots of jars + └── 2.4.0 + ├── activation-1.1.jar + ├── avro-1.7.4.jar + ├── commons-beanutils-1.7.0.jar + ├── commons-beanutils-core-1.8.0.jar + ├── commons-cli-1.2.jar + ├── commons-codec-1.4.jar + ..... lots of jars +``` + +As you can see, under ```hadoop-client```, there are two sub-directories, each denotes a version of ```hadoop-client```. During runtime, Druid will look for these directories and load appropriate ```hadoop-client``` based on `hadoopDependencyCoordinates` passed to [Hadoop Index Task](../misc/tasks.html). + +### Append your Hadoop jars to the Druid classpath + +If you really don't like the way above, and you just want to use one specific Hadoop version, and don't want Druid to work with different Hadoop versions, then you can + +(1) Set `druid.indexer.task.defaultHadoopCoordinates=[]`. `druid.indexer.task.defaultHadoopCoordinates` specifies the default Hadoop coordinates that Druid uses. Its default value is `["org.apache.hadoop:hadoop-client:2.3.0"]`. By setting it to an empty list, Druid will not load any other Hadoop dependencies except the ones specified in the classpath. + +(2) Append your Hadoop jars to the classpath, Druid will load them into the system. This mechanism is relatively easy to reason about, but it also means that you have to ensure that all dependency jars on the classpath are compatible. That is, Druid makes no provisions while using this method to maintain class loader isolation so you must make sure that the jars on your classpath are mutually compatible. + +## Working with Hadoop 2.x + +The default version of Hadoop bundled with Druid is 2.3. + +To override the default Hadoop version, both the Hadoop Index Task and the standalone Hadoop indexer support the parameter `hadoopDependencyCoordinates`(See [Index Hadoop Task](../misc/tasks.html). You can pass another set of Hadoop coordinates through this parameter (e.g. You can specify coordinates for Hadoop 2.4.0 as `["org.apache.hadoop:hadoop-client:2.4.0"]`), which will overwrite the default Hadoop coordinates Druid uses. The Hadoop Index Task takes this parameter has part of the task JSON and the standalone Hadoop indexer takes this parameter as a command line argument. If you are still having problems, include all relevant hadoop jars at the beginning of the classpath of your indexing or historical nodes. -Working with CDH ----------------- -Members of the community have reported dependency conflicts between the version of Jackson used in CDH and Druid. Currently, our best workaround is to edit Druid's pom.xml dependencies to match the version of Jackson in your hadoop version and recompile Druid. +## Working with CDH + +Members of the community have reported dependency conflicts between the version of Jackson used in CDH and Druid. Currently, our best workaround is to edit Druid's pom.xml dependencies to match the version of Jackson in your Hadoop version and recompile Druid. For more about building Druid, please see [Building Druid](../development/build.html). @@ -29,7 +80,7 @@ Another workaround solution is to build a custom fat jar of Druid using [sbt](ht You can always add more building targets or remove the ones you don't need. -(4) In the same directory creat a new directory named 'project'. +(4) In the same directory create a new directory named 'project'. (5) Put the druid source code into 'druid_build/project'. @@ -42,10 +93,10 @@ addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.13.0") (8) In the 'druid_build/target/scala-2.10' folder, you will find the fat jar you just build. -(9) Make sure the jars you've uploaded has been completely removed. The hdfs directory is by default '/tmp/druid-indexing/classpath'. +(9) Make sure the jars you've uploaded has been completely removed. The HDFS directory is by default '/tmp/druid-indexing/classpath'. (10) Include the fat jar in the classpath when you start the indexing service. Make sure you've removed 'lib/*' from your classpath because now the fat jar includes all you need. -Working with Hadoop 1.x and older ---------------------------------- +## Working with Hadoop 1.x and older + We recommend recompiling Druid with your particular version of Hadoop by changing the dependencies in Druid's pom.xml files. Make sure to also either override the default `hadoopDependencyCoordinates` in the code or pass your Hadoop version in as part of indexing. diff --git a/docs/content/operations/pull-deps.md b/docs/content/operations/pull-deps.md new file mode 100644 index 000000000000..493be449fded --- /dev/null +++ b/docs/content/operations/pull-deps.md @@ -0,0 +1,94 @@ +--- +layout: doc_page +--- +# pull-deps Tool + +`pull-deps` is a tool that can pull down dependencies to the local repository and lay dependencies out into the extension directory as needed. + +`pull-deps` has several command line options, they are as follows: + +`-c` or `--coordinate` (Can be specified multiply times) + + Extension coordinate to pull down, followed by a maven coordinate, e.g. io.druid.extensions:mysql-metadata-storage + +`-h` or `--hadoop-coordinate` (Can be specified multiply times) + + Hadoop dependency to pull down, followed by a maven coordinate, e.g. org.apache.hadoop:hadoop-client:2.4.0 + +`--no-default-hadoop` + + Don't pull down the default hadoop coordinate, i.e., org.apache.hadoop:hadoop-client:2.3.0. If `-h` option is supplied, then default hadoop coordinate will not be downloaded. + +`--clean` + + Remove exisiting extension and hadoop dependencies directories before pulling down dependencies. + +`-l` or `--localRepository` + + A local repostiry that Maven will use to put downloaded files. Then pull-deps will lay these files out into the extensions directory as needed. + +`-r` or `--remoteRepositories` + + A JSON Array list of remote repositories to load dependencies from. + +`-d` or `--defaultVersion` + + Version to use for extension coordinate that doesn't have a version information. For example, if extension coordinate is `io.druid.extensions:mysql-metadata-storage`, and default version is `0.8.0`, then this coordinate will be treated as `io.druid.extensions:mysql-metadata-storage:0.8.0` + +To run `pull-deps`, you should + +1) Specify `druid.extensions.directory` and `druid.extensions.hadoopDependenciesDir`, these two properties tell `pull-deps` where to put extensions. If you don't specify them, default values will be used, see [Configuration](../configuration/index.html). + +2) Tell `pull-deps` what to download using `-c` or `-h` option, which are followed by a maven coordinate. + +Example: + +Suppose you want to download ```druid-examples```, ```mysql-metadata-storage``` and ```hadoop-client```(both 2.3.0 and 2.4.0) with a specific version, you can run `pull-deps` command with `-c io.druid.extensions:druid-examples:0.8.0`, `-c io.druid.extensions:mysql-metadata-storage:0.8.0`, `-h org.apache.hadoop:hadoop-client:2.3.0` and `-h org.apache.hadoop:hadoop-client:2.4.0`, an example command would be: + +```java -classpath "/my/druid/library/*" io.druid.cli.Main tools pull-deps --clean -c io.druid.extensions:mysql-metadata-storage:0.8.0 -c io.druid.extensions:druid-examples:0.8.0 -h org.apache.hadoop:hadoop-client:2.3.0 -h org.apache.hadoop:hadoop-client:2.4.0``` + +Because `--clean` is supplied, this command will first remove the directories specified at `druid.extensions.directory` and `druid.extensions.hadoopDependenciesDir`, then recreate them and start downloading the extensions there. After finishing downloading, if you go to the extension directories you specified, you will see + +``` +tree druid_extensions +druid_extensions +├── druid-examples +│   ├── commons-beanutils-1.8.3.jar +│   ├── commons-digester-1.8.jar +│   ├── commons-logging-1.1.1.jar +│   ├── commons-validator-1.4.0.jar +│   ├── druid-examples-0.8.0.jar +│   ├── twitter4j-async-3.0.3.jar +│   ├── twitter4j-core-3.0.3.jar +│   └── twitter4j-stream-3.0.3.jar +└── mysql-metadata-storage + ├── jdbi-2.32.jar + ├── mysql-connector-java-5.1.34.jar + └── mysql-metadata-storage-0.8.0.jar +``` + +``` +tree hadoop_druid_dependencies +hadoop_druid_dependencies/ +└── hadoop-client + ├── 2.3.0 + │   ├── activation-1.1.jar + │   ├── avro-1.7.4.jar + │   ├── commons-beanutils-1.7.0.jar + │   ├── commons-beanutils-core-1.8.0.jar + │   ├── commons-cli-1.2.jar + │   ├── commons-codec-1.4.jar + ..... lots of jars + └── 2.4.0 + ├── activation-1.1.jar + ├── avro-1.7.4.jar + ├── commons-beanutils-1.7.0.jar + ├── commons-beanutils-core-1.8.0.jar + ├── commons-cli-1.2.jar + ├── commons-codec-1.4.jar + ..... lots of jars +``` + +Note that if you specify `--defaultVersion`, you don't have to put version information in the coordinate. For example, if you want both `druid-examples` and `mysql-metadata-storage` to use version `0.8.0`, you can change the command above to + +```java -classpath "/my/druid/library/*" io.druid.cli.Main tools pull-deps --defaultVersion 0.8.0 --clean -c io.druid.extensions:mysql-metadata-storage -c io.druid.extensions:druid-examples -h org.apache.hadoop:hadoop-client:2.3.0 -h org.apache.hadoop:hadoop-client:2.4.0``` diff --git a/docs/content/tutorials/firewall.md b/docs/content/tutorials/firewall.md index 2de0de4abcbe..ce6301a06683 100644 --- a/docs/content/tutorials/firewall.md +++ b/docs/content/tutorials/firewall.md @@ -4,23 +4,7 @@ layout: doc_page What to Do When You Have a Firewall ----------------------------------- -When you are behind a firewall, the Maven Druid dependencies will not be accessible, as well as the IRC wikipedia channels that feed realtime data into Druid. To workaround those two challenges, you will need to: - -1. Make the Maven Druid dependencies available offline -2. Make the Wikipedia example GeoLite DB dependency available offline - -## Making Maven Druid Dependencies Available Offline -1. Extract Druid to a machine that has internet access; e.g. `/Users/foo/druid-` -2. Create a repository directory to download the dependencies to; e.g. `/Users/foo/druid-\repo` -3. Create property `druid.extensions.localRepository=`*`path to repo directory`* in the *`Druid Directory`*`\config\_common/common.runtime.properties` file; e.g. `druid.extensions.localRepository=/Users/foo/druid-/repo` -4. From within Druid directory, run the `pull-deps` command to download all Druid dependencies to the repository specified in the `common.runtime.properties` file: - - ``` - java -classpath "config\_common;lib\*" io.druid.cli.Main tools pull-deps - ``` - -5. Once all dependencies have been downloaded successfully, replicate the `repo` directory to the machine behind the firewall; e.g. `/opt/druid-/repo` -6. Create property `druid.extensions.localRepository=`*`path to repo directory`* in the *`Druid Directory`*`/config/_common/common.runtime.properties` file; e.g. `druid.extensions.localRepository=/opt/druid-/repo` +When you are behind a firewall, if the IRC wikipedia channels that feed realtime data into Druid are not accessible, then there is nothing you can do. If IRC channels are accessible, but downloading Geolite DB from maxmind is firewalled, you can workaround this challenge by making GeoLite DB dependency available offline, see below. ## Making the Wikipedia Example GeoLite DB Dependency Available Offline 1. Download GeoLite2 City DB from http://dev.maxmind.com/geoip/geoip2/geolite2/ diff --git a/docs/content/tutorials/tutorial-a-first-look-at-druid.md b/docs/content/tutorials/tutorial-a-first-look-at-druid.md index 99df1bce73f5..37a058de08e1 100644 --- a/docs/content/tutorials/tutorial-a-first-look-at-druid.md +++ b/docs/content/tutorials/tutorial-a-first-look-at-druid.md @@ -74,6 +74,8 @@ You should see a bunch of files: * run_example_server.sh * run_example_client.sh * LICENSE, config, examples, lib directories +* druid_extensions (This folder contains all the extensions that could be loaded by Druid. Note that extension `mysql-metadata-storage` is packaged in a separate tarball that can be downloaded from [here](http://druid.io/downloads.html). See [Including Extensions](../operations/including-extensions.html) for more information about loading extensions. +* hadoop_druid_dependencies (This folder contains hadoop-client:2.3.0, see [Different Hadoop Versions](../operations/other-hadoop.html) for more information about how Druid picks up Hadoop dependencies) ## External Dependencies diff --git a/docs/content/tutorials/tutorial-loading-batch-data.md b/docs/content/tutorials/tutorial-loading-batch-data.md index ed6ccacb9059..0432d73d01aa 100644 --- a/docs/content/tutorials/tutorial-loading-batch-data.md +++ b/docs/content/tutorials/tutorial-loading-batch-data.md @@ -66,7 +66,7 @@ Note: If Zookeeper isn't running, you'll have to start it again as described in To start the Indexing Service: ```bash -java -Xmx2g -Duser.timezone=UTC -Dfile.encoding=UTF-8 -classpath config/_common:config/overlord:lib/*: io.druid.cli.Main server overlord +java -Xmx2g -Duser.timezone=UTC -Dfile.encoding=UTF-8 -classpath config/_common:config/overlord:lib/* io.druid.cli.Main server overlord ``` To start the Coordinator Node: @@ -247,7 +247,7 @@ Most common data ingestion problems are around timestamp formats and other malfo Druid is designed for large data volumes, and most real-world data sets require batch indexing be done through a Hadoop job. -For this tutorial, we used [Hadoop 2.3.0](https://archive.apache.org/dist/hadoop/core/hadoop-2.3.0/). There are many pages on the Internet showing how to set up a single-node (standalone) Hadoop cluster, which is all that's needed for this example. +For this tutorial, we used [Hadoop 2.3.0](https://archive.apache.org/dist/hadoop/core/hadoop-2.3.0/), which is included under ```hadoop_druid_dependencies```. There are many pages on the Internet showing how to set up a single-node (standalone) Hadoop cluster, which is all that's needed for this example. For more information about how Druid picks up your Hadoop version, see [here](../operations/other-hadoop.html). Before indexing the data, make sure you have a valid Hadoop cluster running. To build our Druid segment, we are going to submit a [Hadoop index task](../misc/tasks.html) to the indexing service. The grammar for the Hadoop index task is very similar to the index task of the last tutorial. The tutorial Hadoop index task should be located at: diff --git a/examples/bin/run_druid_server.sh b/examples/bin/run_druid_server.sh index ecc818664da0..bfbc8276463b 100755 --- a/examples/bin/run_druid_server.sh +++ b/examples/bin/run_druid_server.sh @@ -28,7 +28,6 @@ cd ${CURR_DIR} # start process JAVA_ARGS="${JAVA_ARGS} -Xmx512m -Duser.timezone=UTC -Dfile.encoding=UTF-8" -JAVA_ARGS="${JAVA_ARGS} -Ddruid.extensions.localRepository=${MAVEN_DIR}" DRUID_CP="${SCRIPT_DIR}/config/_common" DRUID_CP="${DRUID_CP}:${SCRIPT_DIR}/config/$SERVER_TYPE" diff --git a/examples/bin/run_example_server.sh b/examples/bin/run_example_server.sh index 61db9709bde6..eb0893ede5ac 100755 --- a/examples/bin/run_example_server.sh +++ b/examples/bin/run_example_server.sh @@ -37,8 +37,6 @@ fi # start process JAVA_ARGS="-Xmx512m -Duser.timezone=UTC -Dfile.encoding=UTF-8" JAVA_ARGS="${JAVA_ARGS} -Ddruid.realtime.specFile=${SPEC_FILE}" -JAVA_ARGS="${JAVA_ARGS} -Ddruid.extensions.localRepository=${MAVEN_DIR}" -JAVA_ARGS="${JAVA_ARGS} -Ddruid.extensions.remoteRepositories=[]" JAVA_ARGS="${JAVA_ARGS} -Ddruid.publish.type=noop" DRUID_CP=${EXAMPLE_LOC} diff --git a/examples/config/_common/common.runtime.properties b/examples/config/_common/common.runtime.properties index 554b6d93417d..3928823df4c2 100644 --- a/examples/config/_common/common.runtime.properties +++ b/examples/config/_common/common.runtime.properties @@ -15,10 +15,14 @@ # limitations under the License. # -# Extensions (no deep storage model is listed - using local fs for deep storage - not recommended for production) -# Also, for production to use mysql add, "io.druid.extensions:mysql-metadata-storage" -druid.extensions.coordinates=["io.druid.extensions:druid-examples","io.druid.extensions:druid-kafka-eight"] -druid.extensions.localRepository=extensions-repo +# Extensions specified in the load list will be loaded by Druid (no deep storage model is listed - using local fs +# for deep storage - not recommended for production) +# Also, use mysql for production, add "mysql-metadata-storage" + +# If you specify `druid.extensions.loadList=[]`, Druid won't load any extension from file system. +# If you don't specify `druid.extensions.loadList`, Druid will load all the extensions under root extension directory. +# More info: http://druid.io/docs/latest/operations/including-extensions.html +druid.extensions.loadList=["druid-examples","druid-kafka-eight"] # Zookeeper druid.zk.service.host=localhost diff --git a/extensions-distribution/pom.xml b/extensions-distribution/pom.xml deleted file mode 100644 index 3c510a0c0a0e..000000000000 --- a/extensions-distribution/pom.xml +++ /dev/null @@ -1,146 +0,0 @@ - - - - - 4.0.0 - - pom - - extensions-distribution - extensions-distribution - extensions-distribution - - - druid - io.druid - 0.9.0-SNAPSHOT - - - - - - - io.druid.extensions - druid-examples - ${project.parent.version} - true - - - - io.druid.extensions - druid-namespace-lookup - ${project.parent.version} - true - - - - io.druid.extensions - druid-cassandra-storage - ${project.parent.version} - true - - - - io.druid.extensions - druid-kafka-eight - ${project.parent.version} - true - - - - io.druid.extensions - druid-azure-extensions - ${project.parent.version} - true - - - - io.druid.extensions - mysql-metadata-storage - ${project.parent.version} - true - - - - io.druid.extensions - druid-hdfs-storage - ${project.parent.version} - true - - - - io.druid.extensions - postgresql-metadata-storage - ${project.parent.version} - true - - - - io.druid.extensions - druid-kafka-extraction-namespace - ${project.parent.version} - true - - - - io.druid.extensions - druid-rabbitmq - ${project.parent.version} - true - - - - io.druid.extensions - druid-s3-extensions - ${project.parent.version} - true - - - - io.druid.extensions - druid-histogram - ${project.parent.version} - true - - - - - - - org.apache.maven.plugins - maven-assembly-plugin - - - distro-assembly - package - - single - - - extensions-repo - - src/assembly/assembly.xml - - - - - - - - - diff --git a/extensions-distribution/src/assembly/assembly.xml b/extensions-distribution/src/assembly/assembly.xml deleted file mode 100644 index abd5bb5e3e79..000000000000 --- a/extensions-distribution/src/assembly/assembly.xml +++ /dev/null @@ -1,35 +0,0 @@ - - - - - extensions-repo - - zip - - - - - io.druid.extensions:* - - - - diff --git a/indexing-service/src/main/java/io/druid/indexing/common/task/HadoopTask.java b/indexing-service/src/main/java/io/druid/indexing/common/task/HadoopTask.java index 82efbb06be42..f3487158c8a2 100644 --- a/indexing-service/src/main/java/io/druid/indexing/common/task/HadoopTask.java +++ b/indexing-service/src/main/java/io/druid/indexing/common/task/HadoopTask.java @@ -29,7 +29,6 @@ import io.druid.guice.GuiceInjectors; import io.druid.indexing.common.TaskToolbox; import io.druid.initialization.Initialization; -import io.tesla.aether.internal.DefaultTeslaAether; import java.io.File; import java.lang.reflect.InvocationTargetException; @@ -76,14 +75,10 @@ protected ClassLoader buildClassLoader(final TaskToolbox toolbox) throws Excepti ? hadoopDependencyCoordinates : toolbox.getConfig().getDefaultHadoopCoordinates(); - final DefaultTeslaAether aetherClient = Initialization.getAetherClient(extensionsConfig); - final List extensionURLs = Lists.newArrayList(); - for (String coordinate : extensionsConfig.getCoordinates()) { - final ClassLoader coordinateLoader = Initialization.getClassLoaderForCoordinates( - aetherClient, coordinate, extensionsConfig.getDefaultVersion() - ); - extensionURLs.addAll(Arrays.asList(((URLClassLoader) coordinateLoader).getURLs())); + for (final File extension : Initialization.getExtensionFilesToLoad(extensionsConfig)) { + final ClassLoader extensionLoader = Initialization.getClassLoaderForExtension(extension); + extensionURLs.addAll(Arrays.asList(((URLClassLoader) extensionLoader).getURLs())); } final List nonHadoopURLs = Lists.newArrayList(); @@ -91,11 +86,14 @@ protected ClassLoader buildClassLoader(final TaskToolbox toolbox) throws Excepti final List driverURLs = Lists.newArrayList(); driverURLs.addAll(nonHadoopURLs); + // put hadoop dependencies last to avoid jets3t & apache.httpcore version conflicts - for (String hadoopDependencyCoordinate : finalHadoopDependencyCoordinates) { - final ClassLoader hadoopLoader = Initialization.getClassLoaderForCoordinates( - aetherClient, hadoopDependencyCoordinate, extensionsConfig.getDefaultVersion() - ); + for (final File hadoopDependency : + Initialization.getHadoopDependencyFilesToLoad( + finalHadoopDependencyCoordinates, + extensionsConfig + )) { + final ClassLoader hadoopLoader = Initialization.getClassLoaderForExtension(hadoopDependency); driverURLs.addAll(Arrays.asList(((URLClassLoader) hadoopLoader).getURLs())); } diff --git a/pom.xml b/pom.xml index 816be54d14c4..b58ea5b6f21d 100644 --- a/pom.xml +++ b/pom.xml @@ -105,7 +105,6 @@ extensions/namespace-lookup extensions/kafka-extraction-namespace - extensions-distribution distribution diff --git a/processing/src/main/java/io/druid/guice/ExtensionsConfig.java b/processing/src/main/java/io/druid/guice/ExtensionsConfig.java index 89f75da01150..073fde1b8226 100644 --- a/processing/src/main/java/io/druid/guice/ExtensionsConfig.java +++ b/processing/src/main/java/io/druid/guice/ExtensionsConfig.java @@ -18,7 +18,6 @@ package io.druid.guice; import com.fasterxml.jackson.annotation.JsonProperty; -import com.google.common.collect.ImmutableList; import javax.validation.constraints.NotNull; import java.util.List; @@ -27,54 +26,37 @@ */ public class ExtensionsConfig { - public static final String PACKAGE_VERSION = ExtensionsConfig.class.getPackage().getImplementationVersion(); - @JsonProperty @NotNull private boolean searchCurrentClassloader = true; @JsonProperty - @NotNull - private List coordinates = ImmutableList.of(); + private String directory = "druid_extensions"; - // default version to use for extensions without version info @JsonProperty - private String defaultVersion; + private String hadoopDependenciesDir = "hadoop_druid_dependencies"; @JsonProperty - @NotNull - private String localRepository = String.format("%s/%s", System.getProperty("user.home"), ".m2/repository"); - - @JsonProperty - @NotNull - private List remoteRepositories = ImmutableList.of( - "https://repo1.maven.org/maven2/", - "https://metamx.artifactoryonline.com/metamx/pub-libs-releases-local" - ); + private List loadList; public boolean searchCurrentClassloader() { return searchCurrentClassloader; } - public List getCoordinates() - { - return coordinates; - } - - public String getDefaultVersion() + public String getDirectory() { - return defaultVersion != null ? defaultVersion : PACKAGE_VERSION; + return directory; } - public String getLocalRepository() + public String getHadoopDependenciesDir() { - return localRepository; + return hadoopDependenciesDir; } - public List getRemoteRepositories() + public List getLoadList() { - return remoteRepositories; + return loadList; } @Override @@ -82,10 +64,9 @@ public String toString() { return "ExtensionsConfig{" + "searchCurrentClassloader=" + searchCurrentClassloader + - ", coordinates=" + coordinates + - ", defaultVersion='" + getDefaultVersion() + '\'' + - ", localRepository='" + localRepository + '\'' + - ", remoteRepositories=" + remoteRepositories + + ", directory='" + directory + '\'' + + ", hadoopDependenciesDir='" + hadoopDependenciesDir + '\'' + + ", loadList=" + loadList + '}'; } } diff --git a/server/src/main/java/io/druid/initialization/Initialization.java b/server/src/main/java/io/druid/initialization/Initialization.java index e98c2d99236f..8c2de783804f 100644 --- a/server/src/main/java/io/druid/initialization/Initialization.java +++ b/server/src/main/java/io/druid/initialization/Initialization.java @@ -28,7 +28,6 @@ import com.google.inject.Module; import com.google.inject.util.Modules; import com.metamx.common.ISE; -import com.metamx.common.StringUtils; import com.metamx.common.logger.Logger; import io.druid.curator.CuratorModule; import io.druid.curator.discovery.DiscoveryModule; @@ -57,27 +56,11 @@ import io.druid.server.initialization.EmitterModule; import io.druid.server.initialization.jetty.JettyServerModule; import io.druid.server.metrics.MetricsModule; -import io.tesla.aether.Repository; -import io.tesla.aether.TeslaAether; -import io.tesla.aether.internal.DefaultTeslaAether; -import org.eclipse.aether.artifact.Artifact; +import org.apache.commons.io.FileUtils; import org.eclipse.aether.artifact.DefaultArtifact; -import org.eclipse.aether.collection.CollectRequest; -import org.eclipse.aether.graph.Dependency; -import org.eclipse.aether.graph.DependencyFilter; -import org.eclipse.aether.graph.DependencyNode; -import org.eclipse.aether.resolution.DependencyRequest; -import org.eclipse.aether.resolution.DependencyResolutionException; -import org.eclipse.aether.util.artifact.JavaScopes; -import org.eclipse.aether.util.filter.DependencyFilterUtils; -import java.io.IOException; -import java.io.OutputStream; -import java.io.PrintStream; -import java.io.UnsupportedEncodingException; +import java.io.File; import java.net.MalformedURLException; -import java.net.URI; -import java.net.URISyntaxException; import java.net.URL; import java.net.URLClassLoader; import java.util.Collection; @@ -94,10 +77,6 @@ public class Initialization private static final Logger log = new Logger(Initialization.class); private static final Map loadersMap = Maps.newHashMap(); - private static final Set exclusions = Sets.newHashSet( - "io.druid", - "com.metamx.druid" - ); private final static Map extensionsMap = Maps.newHashMap(); /** @@ -132,9 +111,9 @@ static Map getLoadersMap() } /** - * Look for extension modules for the given class from both classpath and druid.extensions.coordinates. - * Extensions explicitly specified in druid.extensions.coordinates will be loaded first, if there is a duplicate - * extension from classpath, it will be ignored. + * Look for extension modules for the given class from both classpath and extensions directory. A user should never + * put the same two extensions in classpath and extensions directory, if he/she does that, the one that is in the + * classpath will be loaded, the other will be ignored. * * @param config Extensions configuration * @param clazz The class of extension module (e.g., DruidModule) @@ -143,25 +122,39 @@ static Map getLoadersMap() */ public synchronized static Collection getFromExtensions(ExtensionsConfig config, Class clazz) { - final TeslaAether aether = getAetherClient(config); final Set retVal = Sets.newHashSet(); - final Set extensionNames = Sets.newHashSet(); + final Set loadedExtensionNames = Sets.newHashSet(); - for (String coordinate : config.getCoordinates()) { - log.info("Loading extension[%s] for class[%s]", coordinate, clazz.getName()); - try { - URLClassLoader loader = getClassLoaderForCoordinates(aether, coordinate, config.getDefaultVersion()); + if (config.searchCurrentClassloader()) { + for (T module : ServiceLoader.load(clazz, Thread.currentThread().getContextClassLoader())) { + final String moduleName = module.getClass().getCanonicalName(); + if (moduleName == null) { + log.warn( + "Extension module [%s] was ignored because it doesn't have a canonical name, is it a local or anonymous class?", + module.getClass().getName() + ); + } else if (!loadedExtensionNames.contains(moduleName)) { + log.info("Adding classpath extension module [%s] for class [%s]", moduleName, clazz.getName()); + loadedExtensionNames.add(moduleName); + retVal.add(module); + } + } + } + for (File extension : getExtensionFilesToLoad(config)) { + log.info("Loading extension [%s] for class [%s]", extension.getName(), clazz.getName()); + try { + final URLClassLoader loader = getClassLoaderForExtension(extension); for (T module : ServiceLoader.load(clazz, loader)) { - String moduleName = module.getClass().getCanonicalName(); + final String moduleName = module.getClass().getCanonicalName(); if (moduleName == null) { log.warn( "Extension module [%s] was ignored because it doesn't have a canonical name, is it a local or anonymous class?", module.getClass().getName() ); - } else if (!extensionNames.contains(moduleName)) { - log.info("Adding remote extension module[%s] for class[%s]", moduleName, clazz.getName()); - extensionNames.add(moduleName); + } else if (!loadedExtensionNames.contains(moduleName)) { + log.info("Adding local file system extension module [%s] for class [%s]", moduleName, clazz.getName()); + loadedExtensionNames.add(moduleName); retVal.add(module); } } @@ -171,199 +164,111 @@ public synchronized static Collection getFromExtensions(ExtensionsConfig } } - if (config.searchCurrentClassloader()) { - for (T module : ServiceLoader.load(clazz, Initialization.class.getClassLoader())) { - String moduleName = module.getClass().getCanonicalName(); - if (moduleName == null) { - log.warn( - "Extension module [%s] was ignored because it doesn't have a canonical name, is it a local or anonymous class?", - module.getClass().getName() - ); - } else if (!extensionNames.contains(moduleName)) { - log.info("Adding local extension module[%s] for class[%s]", moduleName, clazz.getName()); - extensionNames.add(moduleName); - retVal.add(module); - } - } - } - // update the map with currently loaded modules extensionsMap.put(clazz, retVal); return retVal; } - public static URLClassLoader getClassLoaderForCoordinates( - TeslaAether aether, - String coordinate, - String defaultVersion - ) - throws DependencyResolutionException, MalformedURLException + /** + * Find all the extension files that should be loaded by druid. + *

+ * If user explicitly specifies druid.extensions.loadList, then it will look for those extensions under root + * extensions directory. If one of them is not found, druid will fail loudly. + *

+ * If user doesn't specify druid.extension.toLoad (or its value is empty), druid will load all the extensions + * under the root extensions directory. + * + * @param config ExtensionsConfig configured by druid.extensions.xxx + * + * @return an array of druid extension files that will be loaded by druid process + */ + public static File[] getExtensionFilesToLoad(ExtensionsConfig config) { - URLClassLoader loader = loadersMap.get(coordinate); - if (loader == null) { - final CollectRequest collectRequest = new CollectRequest(); - - DefaultArtifact versionedArtifact; - try { - // this will throw an exception if no version is specified - versionedArtifact = new DefaultArtifact(coordinate); - } - catch (IllegalArgumentException e) { - // try appending the default version so we can specify artifacts without versions - if (defaultVersion != null) { - versionedArtifact = new DefaultArtifact(coordinate + ":" + defaultVersion); - } else { - throw e; - } - } - - collectRequest.setRoot(new Dependency(versionedArtifact, JavaScopes.RUNTIME)); - DependencyRequest dependencyRequest = new DependencyRequest( - collectRequest, - DependencyFilterUtils.andFilter( - DependencyFilterUtils.classpathFilter(JavaScopes.RUNTIME), - new DependencyFilter() - { - @Override - public boolean accept(DependencyNode node, List parents) - { - if (accept(node.getArtifact())) { - return false; - } - - for (DependencyNode parent : parents) { - if (accept(parent.getArtifact())) { - return false; - } - } - - return true; - } - - private boolean accept(final Artifact artifact) - { - return exclusions.contains(artifact.getGroupId()); - } - } - ) - ); - - try { - final List artifacts = aether.resolveArtifacts(dependencyRequest); - - List urls = Lists.newArrayListWithExpectedSize(artifacts.size()); - for (Artifact artifact : artifacts) { - if (!exclusions.contains(artifact.getGroupId())) { - urls.add(artifact.getFile().toURI().toURL()); - } else { - log.debug("Skipped Artifact[%s]", artifact); - } - } - - for (URL url : urls) { - log.info("Added URL[%s]", url); + final File rootExtensionsDir = new File(config.getDirectory()); + if (rootExtensionsDir.exists() && !rootExtensionsDir.isDirectory()) { + throw new ISE("Root extensions directory [%s] is not a directory!?", rootExtensionsDir); + } + File[] extensionsToLoad; + final List toLoad = config.getLoadList(); + if (toLoad == null) { + extensionsToLoad = rootExtensionsDir.listFiles(); + } else { + int i = 0; + extensionsToLoad = new File[toLoad.size()]; + for (final String extensionName : toLoad) { + final File extensionDir = new File(rootExtensionsDir, extensionName); + if (!extensionDir.isDirectory()) { + throw new ISE( + String.format( + "Extension [%s] specified in \"druid.extensions.loadList\" didn't exist!?", + extensionDir.getAbsolutePath() + ) + ); } - - loader = new URLClassLoader(urls.toArray(new URL[urls.size()]), Initialization.class.getClassLoader()); - loadersMap.put(coordinate, loader); - } - catch (Exception e) { - log.error(e, "Unable to resolve artifacts for [%s].", dependencyRequest); - throw Throwables.propagate(e); + extensionsToLoad[i++] = extensionDir; } } - return loader; + return extensionsToLoad == null ? new File[]{} : extensionsToLoad; } - public static DefaultTeslaAether getAetherClient(ExtensionsConfig config) + /** + * Find all the hadoop dependencies that should be loaded by druid + * + * @param hadoopDependencyCoordinates e.g.["org.apache.hadoop:hadoop-client:2.3.0"] + * @param extensionsConfig ExtensionsConfig configured by druid.extensions.xxx + * + * @return an array of hadoop dependency files that will be loaded by druid process + */ + public static File[] getHadoopDependencyFilesToLoad( + List hadoopDependencyCoordinates, + ExtensionsConfig extensionsConfig + ) { - /* - DefaultTeslaAether logs a bunch of stuff to System.out, which is annoying. We choose to disable that - unless debug logging is turned on. "Disabling" it, however, is kinda bass-ackwards. We copy out a reference - to the current System.out, and set System.out to a noop output stream. Then after DefaultTeslaAether has pulled - The reference we swap things back. - - This has implications for other things that are running in parallel to this. Namely, if anything else also grabs - a reference to System.out or tries to log to it while we have things adjusted like this, then they will also log - to nothingness. Fortunately, the code that calls this is single-threaded and shouldn't hopefully be running - alongside anything else that's grabbing System.out. But who knows. - */ - - List remoteUriList = config.getRemoteRepositories(); - - List remoteRepositories = Lists.newArrayList(); - for (String uri : remoteUriList) { - try { - URI u = new URI(uri); - Repository r = new Repository(uri); - - if (u.getUserInfo() != null) { - String[] auth = u.getUserInfo().split(":", 2); - if (auth.length == 2) { - r.setUsername(auth[0]); - r.setPassword(auth[1]); - } else { - log.warn( - "Invalid credentials in repository URI, expecting [:], got [%s] for [%s]", - u.getUserInfo(), - uri - ); - } - } - remoteRepositories.add(r); - } - catch (URISyntaxException e) { - throw Throwables.propagate(e); - } + final File rootHadoopDependenciesDir = new File(extensionsConfig.getHadoopDependenciesDir()); + if (rootHadoopDependenciesDir.exists() && !rootHadoopDependenciesDir.isDirectory()) { + throw new ISE("Root Hadoop dependencies directory [%s] is not a directory!?", rootHadoopDependenciesDir); } - - if (log.isTraceEnabled() || log.isDebugEnabled()) { - return new DefaultTeslaAether( - config.getLocalRepository(), - remoteRepositories.toArray(new Repository[remoteRepositories.size()]) - ); + final File[] hadoopDependenciesToLoad = new File[hadoopDependencyCoordinates.size()]; + int i = 0; + for (final String coordinate : hadoopDependencyCoordinates) { + final DefaultArtifact artifact = new DefaultArtifact(coordinate); + final File hadoopDependencyDir = new File(rootHadoopDependenciesDir, artifact.getArtifactId()); + final File versionDir = new File(hadoopDependencyDir, artifact.getVersion()); + // find the hadoop dependency with the version specified in coordinate + if (!hadoopDependencyDir.isDirectory() || !versionDir.isDirectory()) { + throw new ISE( + String.format("Hadoop dependency [%s] didn't exist!?", versionDir.getAbsolutePath()) + ); + } + hadoopDependenciesToLoad[i++] = versionDir; } + return hadoopDependenciesToLoad; + } - PrintStream oldOut = System.out; - try { - System.setOut( - new PrintStream( - new OutputStream() - { - @Override - public void write(int b) throws IOException - { - - } - - @Override - public void write(byte[] b) throws IOException - { - - } - - @Override - public void write(byte[] b, int off, int len) throws IOException - { - - } - } - , false, StringUtils.UTF8_STRING - ) - ); - return new DefaultTeslaAether( - config.getLocalRepository(), - remoteRepositories.toArray(new Repository[remoteRepositories.size()]) - ); - } - catch (UnsupportedEncodingException e) { - // should never happen - throw new IllegalStateException(e); - } - finally { - System.setOut(oldOut); + /** + * @param extension The File instance of the extension we want to load + * + * @return a URLClassLoader that loads all the jars on which the extension is dependent + * + * @throws MalformedURLException + */ + public static URLClassLoader getClassLoaderForExtension(File extension) throws MalformedURLException + { + URLClassLoader loader = loadersMap.get(extension.getName()); + if (loader == null) { + final Collection jars = FileUtils.listFiles(extension, new String[]{"jar"}, false); + final URL[] urls = new URL[jars.size()]; + int i = 0; + for (File jar : jars) { + final URL url = jar.toURI().toURL(); + log.info("added URL[%s]", url); + urls[i++] = url; + } + loader = new URLClassLoader(urls, Initialization.class.getClassLoader()); + loadersMap.put(extension.getName(), loader); } + return loader; } public static Injector makeInjectorWithModules(final Injector baseInjector, Iterable modules) diff --git a/server/src/test/java/io/druid/initialization/InitializationTest.java b/server/src/test/java/io/druid/initialization/InitializationTest.java index 7ff8c033d51a..a939266f98ef 100644 --- a/server/src/test/java/io/druid/initialization/InitializationTest.java +++ b/server/src/test/java/io/druid/initialization/InitializationTest.java @@ -26,6 +26,7 @@ import com.google.inject.Binder; import com.google.inject.Injector; import com.google.inject.Key; +import com.metamx.common.ISE; import io.druid.guice.ExtensionsConfig; import io.druid.guice.GuiceInjectors; import io.druid.guice.JsonConfigProvider; @@ -33,18 +34,28 @@ import io.druid.server.DruidNode; import org.junit.Assert; import org.junit.FixMethodOrder; +import org.junit.Rule; import org.junit.Test; +import org.junit.rules.TemporaryFolder; import org.junit.runners.MethodSorters; import javax.annotation.Nullable; +import java.io.File; +import java.io.IOException; +import java.net.URL; import java.net.URLClassLoader; +import java.util.Arrays; import java.util.Collection; +import java.util.Comparator; import java.util.List; import java.util.Set; @FixMethodOrder(MethodSorters.NAME_ASCENDING) public class InitializationTest { + @Rule + public final TemporaryFolder temporaryFolder = new TemporaryFolder(); + @Test public void test01InitialModulesEmpty() throws Exception { @@ -102,22 +113,7 @@ public void test04DuplicateClassLoaderExtensions() throws Exception { Initialization.getLoadersMap().put("xyz", (URLClassLoader) Initialization.class.getClassLoader()); - Collection modules = Initialization.getFromExtensions( - new ExtensionsConfig() - { - @Override - public List getCoordinates() - { - return ImmutableList.of("xyz"); - } - - @Override - public List getRemoteRepositories() - { - return ImmutableList.of(); - } - }, DruidModule.class - ); + Collection modules = Initialization.getFromExtensions(new ExtensionsConfig(), DruidModule.class); Set loadedModuleNames = Sets.newHashSet(); for (DruidModule module : modules) { @@ -149,6 +145,32 @@ public void configure(Binder binder) Assert.assertNotNull(injector); } + @Test + public void test06GetClassLoaderForExtension() throws IOException + { + final File some_extension_dir = temporaryFolder.newFolder(); + final File a_jar = new File(some_extension_dir, "a.jar"); + final File b_jar = new File(some_extension_dir, "b.jar"); + final File c_jar = new File(some_extension_dir, "c.jar"); + a_jar.createNewFile(); + b_jar.createNewFile(); + c_jar.createNewFile(); + final URLClassLoader loader = Initialization.getClassLoaderForExtension(some_extension_dir); + final URL[] expectedURLs = new URL[]{a_jar.toURI().toURL(), b_jar.toURI().toURL(), c_jar.toURI().toURL()}; + final URL[] actualURLs = loader.getURLs(); + Arrays.sort( + actualURLs, new Comparator() + { + @Override + public int compare(URL o1, URL o2) + { + return o1.getPath().compareTo(o2.getPath()); + } + } + ); + Assert.assertArrayEquals(expectedURLs, actualURLs); + } + @Test public void testGetLoadedModules() { @@ -162,6 +184,199 @@ public void testGetLoadedModules() Assert.assertEquals("Set from loaded modules #2 should be same!", modules, loadedModules2); } + @Test + public void testGetExtensionFilesToLoad_non_exist_extensions_dir() + { + Assert.assertArrayEquals( + "Non-exist root extensionsDir should return emply array of File", + new File[]{}, + Initialization.getExtensionFilesToLoad(new ExtensionsConfig()) + ); + } + + @Test(expected = ISE.class) + public void testGetExtensionFilesToLoad_wrong_type_extensions_dir() throws IOException + { + final File extensionsDir = temporaryFolder.newFile(); + final ExtensionsConfig config = new ExtensionsConfig() + { + @Override + public String getDirectory() + { + return extensionsDir.getAbsolutePath(); + } + }; + Initialization.getExtensionFilesToLoad(config); + } + + @Test + public void testGetExtensionFilesToLoad_empty_extensions_dir() throws IOException + { + final File extensionsDir = temporaryFolder.newFolder(); + final ExtensionsConfig config = new ExtensionsConfig() + { + @Override + public String getDirectory() + { + return extensionsDir.getAbsolutePath(); + } + }; + + Assert.assertArrayEquals( + "Empty root extensionsDir should return emply array of File", + new File[]{}, + Initialization.getExtensionFilesToLoad(new ExtensionsConfig()) + ); + } + + /** + * If druid.extension.load is not specified, Initialization.getExtensionFilesToLoad is supposed to return all the + * extension folders under root extensions directory. + */ + @Test + public void testGetExtensionFilesToLoad_null_load_list() throws IOException + { + final File extensionsDir = temporaryFolder.newFolder(); + final ExtensionsConfig config = new ExtensionsConfig() + { + @Override + public String getDirectory() + { + return extensionsDir.getAbsolutePath(); + } + }; + final File mysql_metadata_storage = new File(extensionsDir, "mysql-metadata-storage"); + final File druid_kafka_eight = new File(extensionsDir, "druid-kafka-eight"); + mysql_metadata_storage.mkdir(); + druid_kafka_eight.mkdir(); + + final File[] expectedFileList = new File[]{druid_kafka_eight, mysql_metadata_storage}; + final File[] actualFileList = Initialization.getExtensionFilesToLoad(config); + Arrays.sort(actualFileList); + Assert.assertArrayEquals(expectedFileList, actualFileList); + } + + /** + * druid.extension.load is specified, Initialization.getExtensionFilesToLoad is supposed to return all the extension + * folders appeared in the load list. + */ + @Test + public void testGetExtensionFilesToLoad_with_load_list() throws IOException + { + final File extensionsDir = temporaryFolder.newFolder(); + final ExtensionsConfig config = new ExtensionsConfig() + { + @Override + public List getLoadList() + { + return Arrays.asList("mysql-metadata-storage", "druid-kafka-eight"); + } + + @Override + public String getDirectory() + { + return extensionsDir.getAbsolutePath(); + } + }; + final File mysql_metadata_storage = new File(extensionsDir, "mysql-metadata-storage"); + final File druid_kafka_eight = new File(extensionsDir, "druid-kafka-eight"); + final File random_extension = new File(extensionsDir, "random-extensions"); + mysql_metadata_storage.mkdir(); + druid_kafka_eight.mkdir(); + random_extension.mkdir(); + + final File[] expectedFileList = new File[]{druid_kafka_eight, mysql_metadata_storage}; + final File[] actualFileList = Initialization.getExtensionFilesToLoad(config); + Arrays.sort(actualFileList); + Assert.assertArrayEquals(expectedFileList, actualFileList); + } + + /** + * druid.extension.load is specified, but contains an extension that is not prepared under root extension directory. + * Initialization.getExtensionFilesToLoad is supposed to throw ISE. + */ + @Test(expected = ISE.class) + public void testGetExtensionFilesToLoad_with_non_exist_item_in_load_list() throws IOException + { + final File extensionsDir = temporaryFolder.newFolder(); + final ExtensionsConfig config = new ExtensionsConfig() + { + @Override + public List getLoadList() + { + return Arrays.asList("mysql-metadata-storage", "druid-kafka-eight"); + } + + @Override + public String getDirectory() + { + return extensionsDir.getAbsolutePath(); + } + }; + final File druid_kafka_eight = new File(extensionsDir, "druid-kafka-eight"); + final File random_extension = new File(extensionsDir, "random-extensions"); + druid_kafka_eight.mkdir(); + random_extension.mkdir(); + Initialization.getExtensionFilesToLoad(config); + } + + @Test(expected = ISE.class) + public void testGetHadoopDependencyFilesToLoad_wrong_type_root_hadoop_depenencies_dir() throws IOException + { + final File rootHadoopDependenciesDir = temporaryFolder.newFile(); + final ExtensionsConfig config = new ExtensionsConfig() + { + @Override + public String getHadoopDependenciesDir() + { + return rootHadoopDependenciesDir.getAbsolutePath(); + } + }; + Initialization.getHadoopDependencyFilesToLoad(ImmutableList.of(), config); + } + + @Test(expected = ISE.class) + public void testGetHadoopDependencyFilesToLoad_non_exist_version_dir() throws IOException + { + final File rootHadoopDependenciesDir = temporaryFolder.newFolder(); + final ExtensionsConfig config = new ExtensionsConfig() + { + @Override + public String getHadoopDependenciesDir() + { + return rootHadoopDependenciesDir.getAbsolutePath(); + } + }; + final File hadoopClient = new File(rootHadoopDependenciesDir, "hadoop-client"); + hadoopClient.mkdir(); + Initialization.getHadoopDependencyFilesToLoad(ImmutableList.of("org.apache.hadoop:hadoop-client:2.3.0"), config); + } + + @Test + public void testGetHadoopDependencyFilesToLoad_with_hadoop_coordinates() throws IOException + { + final File rootHadoopDependenciesDir = temporaryFolder.newFolder(); + final ExtensionsConfig config = new ExtensionsConfig() + { + @Override + public String getHadoopDependenciesDir() + { + return rootHadoopDependenciesDir.getAbsolutePath(); + } + }; + final File hadoopClient = new File(rootHadoopDependenciesDir, "hadoop-client"); + final File versionDir = new File(hadoopClient, "2.3.0"); + hadoopClient.mkdir(); + versionDir.mkdir(); + final File[] expectedFileList = new File[]{versionDir}; + final File[] actualFileList = Initialization.getHadoopDependencyFilesToLoad( + ImmutableList.of( + "org.apache.hadoop:hadoop-client:2.3.0" + ), config + ); + Assert.assertArrayEquals(expectedFileList, actualFileList); + } + public static class TestDruidModule implements DruidModule { @Override diff --git a/services/src/main/java/io/druid/cli/CliHadoopIndexer.java b/services/src/main/java/io/druid/cli/CliHadoopIndexer.java index e76a5b500bd3..4712beba986c 100644 --- a/services/src/main/java/io/druid/cli/CliHadoopIndexer.java +++ b/services/src/main/java/io/druid/cli/CliHadoopIndexer.java @@ -26,7 +26,6 @@ import io.airlift.airline.Option; import io.druid.guice.ExtensionsConfig; import io.druid.initialization.Initialization; -import io.tesla.aether.internal.DefaultTeslaAether; import java.io.File; import java.lang.reflect.Method; @@ -76,14 +75,10 @@ public void run() allCoordinates.add(DEFAULT_HADOOP_COORDINATES); } - final DefaultTeslaAether aetherClient = Initialization.getAetherClient(extensionsConfig); - final List extensionURLs = Lists.newArrayList(); - for (String coordinate : extensionsConfig.getCoordinates()) { - final ClassLoader coordinateLoader = Initialization.getClassLoaderForCoordinates( - aetherClient, coordinate, extensionsConfig.getDefaultVersion() - ); - extensionURLs.addAll(Arrays.asList(((URLClassLoader) coordinateLoader).getURLs())); + for (final File extension : Initialization.getExtensionFilesToLoad(extensionsConfig)) { + final ClassLoader extensionLoader = Initialization.getClassLoaderForExtension(extension); + extensionURLs.addAll(Arrays.asList(((URLClassLoader) extensionLoader).getURLs())); } final List nonHadoopURLs = Lists.newArrayList(); @@ -92,10 +87,8 @@ public void run() final List driverURLs = Lists.newArrayList(); driverURLs.addAll(nonHadoopURLs); // put hadoop dependencies last to avoid jets3t & apache.httpcore version conflicts - for (String coordinate : allCoordinates) { - final ClassLoader hadoopLoader = Initialization.getClassLoaderForCoordinates( - aetherClient, coordinate, extensionsConfig.getDefaultVersion() - ); + for (File hadoopDependency : Initialization.getHadoopDependencyFilesToLoad(allCoordinates, extensionsConfig)) { + final ClassLoader hadoopLoader = Initialization.getClassLoaderForExtension(hadoopDependency); driverURLs.addAll(Arrays.asList(((URLClassLoader) hadoopLoader).getURLs())); } diff --git a/services/src/main/java/io/druid/cli/PullDependencies.java b/services/src/main/java/io/druid/cli/PullDependencies.java index e0cd5f56d9d8..319cae7ceef4 100644 --- a/services/src/main/java/io/druid/cli/PullDependencies.java +++ b/services/src/main/java/io/druid/cli/PullDependencies.java @@ -18,58 +18,380 @@ package io.druid.cli; import com.google.common.base.Throwables; +import com.google.common.collect.ImmutableList; import com.google.common.collect.Lists; +import com.google.common.collect.Sets; import com.google.inject.Inject; +import com.metamx.common.ISE; +import com.metamx.common.StringUtils; +import com.metamx.common.logger.Logger; import io.airlift.airline.Command; import io.airlift.airline.Option; import io.druid.guice.ExtensionsConfig; import io.druid.indexing.common.config.TaskConfig; -import io.druid.initialization.Initialization; +import io.tesla.aether.Repository; +import io.tesla.aether.TeslaAether; import io.tesla.aether.internal.DefaultTeslaAether; +import org.apache.commons.io.FileUtils; +import org.eclipse.aether.artifact.Artifact; +import org.eclipse.aether.artifact.DefaultArtifact; +import org.eclipse.aether.collection.CollectRequest; +import org.eclipse.aether.graph.Dependency; +import org.eclipse.aether.graph.DependencyFilter; +import org.eclipse.aether.graph.DependencyNode; +import org.eclipse.aether.resolution.DependencyRequest; +import org.eclipse.aether.util.artifact.JavaScopes; +import org.eclipse.aether.util.filter.DependencyFilterUtils; +import java.io.File; +import java.io.IOException; +import java.io.OutputStream; +import java.io.PrintStream; +import java.io.UnsupportedEncodingException; +import java.net.URI; +import java.net.URISyntaxException; import java.util.List; +import java.util.Set; @Command( name = "pull-deps", - description = "Pull down dependencies to the local repository specified by druid.extensions.localRepository" + description = "Pull down dependencies to the local repository specified by druid.extensions.localRepository, extensions directory specified by druid.extensions.extensionsDir and hadoop depenencies directory specified by druid.extensions.hadoopDependenciesDir" ) public class PullDependencies implements Runnable { - @Option(name = {"-c", "--coordinate"}, - title = "coordinate", - description = "extra dependencies to pull down (e.g. hadoop coordinates)", - required = false) - public List coordinates; + private static final Logger log = new Logger(PullDependencies.class); - @Option(name = "--no-default-hadoop", - description = "don't pull down the default HadoopIndexTask dependencies", - required = false) - public boolean noDefaultHadoop; + private static final Set exclusions = Sets.newHashSet( + "io.druid", + "com.metamx.druid" + ); + + private TeslaAether aether; @Inject - public ExtensionsConfig extensionsConfig = null; + public ExtensionsConfig extensionsConfig; + + @Option( + name = {"-c", "--coordinate"}, + title = "coordinate", + description = "Extension coordinate to pull down, followed by a maven coordinate, e.g. io.druid.extensions:mysql-metadata-storage", + required = false) + public List coordinates = Lists.newArrayList(); + + @Option( + name = {"-h", "--hadoop-coordinate"}, + title = "hadoop coordinate", + description = "Hadoop dependency to pull down, followed by a maven coordinate, e.g. org.apache.hadoop:hadoop-client:2.4.0", + required = false) + public List hadoopCoordinates = Lists.newArrayList(); + + @Option( + name = "--no-default-hadoop", + description = "Don't pull down the default hadoop coordinate, i.e., org.apache.hadoop:hadoop-client:2.3.0. If `-h` option is supplied, then default hadoop coordinate will not be downloaded.", + required = false) + public boolean noDefaultHadoop = false; + + @Option( + name = "--clean", + title = "Remove exisiting extension and hadoop dependencies directories before pulling down dependencies.", + required = false) + public boolean clean = false; + + @Option( + name = {"-l", "--localRepository"}, + title = "A local repostiry that Maven will use to put downloaded files. Then pull-deps will lay these files out into the extensions directory as needed.", + required = false + ) + public String localRepository = String.format("%s/%s", System.getProperty("user.home"), ".m2/repository"); + + @Option( + name = {"-r", "--remoteRepositories"}, + title = "A JSON Array list of remote repositories to load dependencies from.", + required = false + ) + List remoteRepositories = ImmutableList.of( + "https://repo1.maven.org/maven2/", + "https://metamx.artifactoryonline.com/metamx/pub-libs-releases-local" + ); + + @Option( + name = {"-d", "--defaultVersion"}, + title = "Version to use for extension artifacts without version information.", + required = false + ) + public String defaultVersion = PullDependencies.class.getPackage().getImplementationVersion(); + + public PullDependencies() + { + } + + // Used for testing only + PullDependencies(TeslaAether aether, ExtensionsConfig extensionsConfig) + { + this.aether = aether; + this.extensionsConfig = extensionsConfig; + } @Override public void run() { - // Druid dependencies are pulled down as a side-effect of Guice injection. Extra dependencies are pulled down as - // a side-effect of getting class loaders. - final List allCoordinates = Lists.newArrayList(); - if (coordinates != null) { - allCoordinates.addAll(coordinates); + if (aether == null) { + aether = getAetherClient(); + } + + final File extensionsDir = new File(extensionsConfig.getDirectory()); + final File hadoopDependenciesDir = new File(extensionsConfig.getHadoopDependenciesDir()); + + if (clean) { + try { + FileUtils.deleteDirectory(extensionsDir); + FileUtils.deleteDirectory(hadoopDependenciesDir); + } + catch (IOException e) { + log.error("Unable to clear extension directory at [%s]", extensionsConfig.getDirectory()); + throw Throwables.propagate(e); + } + } + + createRootExtensionsDirectory(extensionsDir); + createRootExtensionsDirectory(hadoopDependenciesDir); + + try { + log.info("Start downloading dependencies for extension coordinates: [%s]", coordinates); + for (final String coordinate : coordinates) { + final Artifact versionedArtifact = getArtifact(coordinate); + + File currExtensionDir = new File(extensionsDir, versionedArtifact.getArtifactId()); + createExtensionDirectory(coordinate, currExtensionDir); + + downloadExtension(versionedArtifact, currExtensionDir); + } + log.info("Finish downloading dependencies for extension coordinates: [%s]", coordinates); + + if (!noDefaultHadoop && hadoopCoordinates.isEmpty()) { + hadoopCoordinates.addAll(TaskConfig.DEFAULT_DEFAULT_HADOOP_COORDINATES); + } + + log.info("Start downloading dependencies for hadoop extension coordinates: [%s]", hadoopCoordinates); + for (final String hadoopCoordinate : hadoopCoordinates) { + final Artifact versionedArtifact = getArtifact(hadoopCoordinate); + + File currExtensionDir = new File(hadoopDependenciesDir, versionedArtifact.getArtifactId()); + createExtensionDirectory(hadoopCoordinate, currExtensionDir); + + // add a version folder for hadoop dependency directory + currExtensionDir = new File(currExtensionDir, versionedArtifact.getVersion()); + createExtensionDirectory(hadoopCoordinate, currExtensionDir); + + downloadExtension(versionedArtifact, currExtensionDir); + } + log.info("Finish downloading dependencies for hadoop extension coordinates: [%s]", hadoopCoordinates); + } + catch (Exception e) { + throw Throwables.propagate(e); + } + } + + private Artifact getArtifact(String coordinate) + { + DefaultArtifact versionedArtifact; + try { + // this will throw an exception if no version is specified + versionedArtifact = new DefaultArtifact(coordinate); } - if (!noDefaultHadoop) { - allCoordinates.addAll(TaskConfig.DEFAULT_DEFAULT_HADOOP_COORDINATES); + catch (IllegalArgumentException e) { + // try appending the default version so we can specify artifacts without versions + if (defaultVersion != null) { + versionedArtifact = new DefaultArtifact(coordinate + ":" + defaultVersion); + } else { + throw e; + } } + return versionedArtifact; + } + + /** + * Download the extension given its maven coordinate + * + * @param versionedArtifact The maven artifact of the extension + * @param toLocation The location where this extension will be downloaded to + */ + private void downloadExtension(Artifact versionedArtifact, File toLocation) + { + final CollectRequest collectRequest = new CollectRequest(); + collectRequest.setRoot(new Dependency(versionedArtifact, JavaScopes.RUNTIME)); + final DependencyRequest dependencyRequest = new DependencyRequest( + collectRequest, + DependencyFilterUtils.andFilter( + DependencyFilterUtils.classpathFilter(JavaScopes.RUNTIME), + new DependencyFilter() + { + @Override + public boolean accept(DependencyNode node, List parents) + { + if (accept(node.getArtifact())) { + return false; + } + + for (DependencyNode parent : parents) { + if (accept(parent.getArtifact())) { + return false; + } + } + + return true; + } + + private boolean accept(final Artifact artifact) + { + return exclusions.contains(artifact.getGroupId()); + } + } + ) + ); + try { - final DefaultTeslaAether aetherClient = Initialization.getAetherClient(extensionsConfig); - for (final String coordinate : allCoordinates) { - Initialization.getClassLoaderForCoordinates(aetherClient, coordinate, extensionsConfig.getDefaultVersion()); + log.info("Start downloading extension [%s]", versionedArtifact); + final List artifacts = aether.resolveArtifacts(dependencyRequest); + + for (Artifact artifact : artifacts) { + if (!exclusions.contains(artifact.getGroupId())) { + log.info("Adding file [%s] at [%s]", artifact.getFile().getName(), toLocation.getAbsolutePath()); + FileUtils.copyFileToDirectory(artifact.getFile(), toLocation); + } else { + log.debug("Skipped Artifact[%s]", artifact); + } } } catch (Exception e) { + log.error(e, "Unable to resolve artifacts for [%s].", dependencyRequest); throw Throwables.propagate(e); } + log.info("Finish downloading extension [%s]", versionedArtifact); + } + + private DefaultTeslaAether getAetherClient() + { + /* + DefaultTeslaAether logs a bunch of stuff to System.out, which is annoying. We choose to disable that + unless debug logging is turned on. "Disabling" it, however, is kinda bass-ackwards. We copy out a reference + to the current System.out, and set System.out to a noop output stream. Then after DefaultTeslaAether has pulled + The reference we swap things back. + + This has implications for other things that are running in parallel to this. Namely, if anything else also grabs + a reference to System.out or tries to log to it while we have things adjusted like this, then they will also log + to nothingness. Fortunately, the code that calls this is single-threaded and shouldn't hopefully be running + alongside anything else that's grabbing System.out. But who knows. + */ + + List remoteUriList = remoteRepositories; + + List remoteRepositories = Lists.newArrayList(); + for (String uri : remoteUriList) { + try { + URI u = new URI(uri); + Repository r = new Repository(uri); + + if (u.getUserInfo() != null) { + String[] auth = u.getUserInfo().split(":", 2); + if (auth.length == 2) { + r.setUsername(auth[0]); + r.setPassword(auth[1]); + } else { + log.warn( + "Invalid credentials in repository URI, expecting [:], got [%s] for [%s]", + u.getUserInfo(), + uri + ); + } + } + remoteRepositories.add(r); + } + catch (URISyntaxException e) { + throw Throwables.propagate(e); + } + } + + if (log.isTraceEnabled() || log.isDebugEnabled()) { + return new DefaultTeslaAether( + localRepository, + remoteRepositories.toArray(new Repository[remoteRepositories.size()]) + ); + } + + PrintStream oldOut = System.out; + try { + System.setOut( + new PrintStream( + new OutputStream() + { + @Override + public void write(int b) throws IOException + { + + } + + @Override + public void write(byte[] b) throws IOException + { + + } + + @Override + public void write(byte[] b, int off, int len) throws IOException + { + + } + } + , false, StringUtils.UTF8_STRING + ) + ); + return new DefaultTeslaAether( + localRepository, + remoteRepositories.toArray(new Repository[remoteRepositories.size()]) + ); + } + catch (UnsupportedEncodingException e) { + // should never happen + throw new IllegalStateException(e); + } + finally { + System.setOut(oldOut); + } + } + + private void createRootExtensionsDirectory(File atLocation) + { + if (!atLocation.mkdirs()) { + throw new ISE( + String.format( + "Unable to create extensions directory at [%s]", + atLocation.getAbsolutePath() + ) + ); + } + } + + /** + * Create the extension directory for a specific maven coordinate. + * The name of this directory should be the artifactId in the coordinate + */ + private void createExtensionDirectory(String coordinate, File atLocation) + { + if (atLocation.isDirectory()) { + log.info("Directory [%s] already exists, skipping creating a directory", atLocation.getAbsolutePath()); + return; + } + + if (!atLocation.mkdir()) { + throw new ISE( + String.format( + "Unable to create directory at [%s] for coordinate [%s]", + atLocation.getAbsolutePath(), + coordinate + ) + ); + } } } diff --git a/services/src/test/java/io/druid/cli/PullDependenciesTest.java b/services/src/test/java/io/druid/cli/PullDependenciesTest.java new file mode 100644 index 000000000000..6c96a8ae76bf --- /dev/null +++ b/services/src/test/java/io/druid/cli/PullDependenciesTest.java @@ -0,0 +1,222 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.cli; + +import com.google.api.client.repackaged.com.google.common.base.Throwables; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Lists; +import com.metamx.common.ISE; +import io.druid.guice.ExtensionsConfig; +import io.tesla.aether.internal.DefaultTeslaAether; +import org.eclipse.aether.artifact.Artifact; +import org.eclipse.aether.artifact.DefaultArtifact; +import org.eclipse.aether.resolution.DependencyRequest; +import org.eclipse.aether.resolution.DependencyResolutionException; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; + +import java.io.File; +import java.io.IOException; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; + +/** + */ +public class PullDependenciesTest +{ + + private static final String EXTENSION_A_COORDINATE = "groupX:extension_A:123"; + private static final String EXTENSION_B_COORDINATE = "groupY:extension_B:456"; + private static final String HADOOP_CLIENT_2_3_0_COORDINATE = "org.apache.hadoop:hadoop-client:2.3.0"; + private static final String HADOOP_CLIENT_2_4_0_COORDINATE = "org.apache.hadoop:hadoop-client:2.4.0"; + + @Rule + public final TemporaryFolder temporaryFolder = new TemporaryFolder(); + + private File localRepo; // a mock local repository that stores jars + + private final Artifact extension_A = new DefaultArtifact(EXTENSION_A_COORDINATE); + private final Artifact extension_B = new DefaultArtifact(EXTENSION_B_COORDINATE); + private final Artifact hadoop_client_2_3_0 = new DefaultArtifact(HADOOP_CLIENT_2_3_0_COORDINATE); + private final Artifact hadoop_client_2_4_0 = new DefaultArtifact(HADOOP_CLIENT_2_4_0_COORDINATE); + + private PullDependencies pullDependencies; + private File rootExtensionsDir; + private File rootHadoopDependenciesDir; + + private HashMap> extensionToJars; // map Artifact to its associated jars' names + + @Before + public void setUp() throws Exception + { + localRepo = temporaryFolder.newFolder(); + extensionToJars = new HashMap<>(); + + extensionToJars.put(extension_A, ImmutableList.of("a.jar", "b.jar", "c.jar")); + extensionToJars.put(extension_B, ImmutableList.of("d.jar", "e.jar")); + extensionToJars.put(hadoop_client_2_3_0, ImmutableList.of("f.jar", "g.jar")); + extensionToJars.put(hadoop_client_2_4_0, ImmutableList.of("h.jar", "i.jar")); + + rootExtensionsDir = new File(temporaryFolder.getRoot(), "druid_extensions"); + rootHadoopDependenciesDir = new File(temporaryFolder.getRoot(), "druid_hadoop_dependencies"); + + pullDependencies = new PullDependencies( + new DefaultTeslaAether() + { + @Override + public List resolveArtifacts(DependencyRequest request) throws DependencyResolutionException + { + return getArtifactsForExtension(request.getCollectRequest().getRoot().getArtifact()); + } + }, + new ExtensionsConfig() + { + @Override + public String getDirectory() + { + return rootExtensionsDir.getAbsolutePath(); + } + + @Override + public String getHadoopDependenciesDir() + { + return rootHadoopDependenciesDir.getAbsolutePath(); + } + } + ); + + pullDependencies.coordinates = ImmutableList.of(EXTENSION_A_COORDINATE, EXTENSION_B_COORDINATE); + pullDependencies.hadoopCoordinates = ImmutableList.of( + HADOOP_CLIENT_2_3_0_COORDINATE, + HADOOP_CLIENT_2_4_0_COORDINATE + ); + } + + private List getArtifactsForExtension(Artifact artifact) + { + final List jarNames = extensionToJars.get(artifact); + final List artifacts = Lists.newArrayList(); + for (String jarName : jarNames) { + final File jarFile = new File(localRepo, jarName); + try { + jarFile.createNewFile(); + } + catch (IOException e) { + Throwables.propagate(e); + } + artifacts.add(new DefaultArtifact(null, jarName, null, "jar", "1.0", null, jarFile)); + } + return artifacts; + } + + private File[] getExpectedJarFiles(Artifact artifact) + { + final String artifactId = artifact.getArtifactId(); + final List jarNames = extensionToJars.get(artifact); + final File[] expectedJars = new File[jarNames.size()]; + if (artifactId.equals("hadoop-client")) { + final String version = artifact.getVersion(); + for (int i = 0; i < jarNames.size(); ++i) { + expectedJars[i] = new File( + String.format( + "%s/%s/%s/%s", + rootHadoopDependenciesDir, + artifactId, + version, + jarNames.get(i) + ) + ); + } + } else { + for (int i = 0; i < jarNames.size(); ++i) { + expectedJars[i] = new File(String.format("%s/%s/%s", rootExtensionsDir, artifactId, jarNames.get(i))); + } + } + return expectedJars; + } + + /** + * If --clean is not specified and something already exists at druid.extensions.directory, ISE should be thrown + */ + @Test(expected = ISE.class) + public void testPullDependencies_root_extension_dir_exists() + { + rootExtensionsDir.mkdir(); + pullDependencies.run(); + } + + /** + * If --clean is not specified and something already exists at druid.extensions.hadoopDependenciesDir, + * ISE should be thrown + */ + @Test(expected = ISE.class) + public void testPullDependencies_root_hadoop_dependencies_dir_exists() + { + rootHadoopDependenciesDir.mkdir(); + pullDependencies.run(); + } + + @Test + public void testPullDependencies() + { + rootExtensionsDir.mkdir(); + rootHadoopDependenciesDir.mkdir(); + // Because --clean is specified, pull-deps will first remove existing root extensions and hadoop dependencies + pullDependencies.clean = true; + + pullDependencies.run(); + final File[] actualExtensions = rootExtensionsDir.listFiles(); + Arrays.sort(actualExtensions); + Assert.assertEquals(2, actualExtensions.length); + Assert.assertEquals(extension_A.getArtifactId(), actualExtensions[0].getName()); + Assert.assertEquals(extension_B.getArtifactId(), actualExtensions[1].getName()); + + final File[] jarsUnderExtensionA = actualExtensions[0].listFiles(); + Arrays.sort(jarsUnderExtensionA); + Assert.assertArrayEquals(getExpectedJarFiles(extension_A), jarsUnderExtensionA); + + final File[] jarsUnderExtensionB = actualExtensions[1].listFiles(); + Arrays.sort(jarsUnderExtensionB); + Assert.assertArrayEquals(getExpectedJarFiles(extension_B), jarsUnderExtensionB); + + final File[] actualHadoopDependencies = rootHadoopDependenciesDir.listFiles(); + Arrays.sort(actualHadoopDependencies); + Assert.assertEquals(1, actualHadoopDependencies.length); + Assert.assertEquals(hadoop_client_2_3_0.getArtifactId(), actualHadoopDependencies[0].getName()); + + final File[] versionDirsUnderHadoopClient = actualHadoopDependencies[0].listFiles(); + Assert.assertEquals(2, versionDirsUnderHadoopClient.length); + Arrays.sort(versionDirsUnderHadoopClient); + Assert.assertEquals(hadoop_client_2_3_0.getVersion(), versionDirsUnderHadoopClient[0].getName()); + Assert.assertEquals(hadoop_client_2_4_0.getVersion(), versionDirsUnderHadoopClient[1].getName()); + + final File[] jarsUnder2_3_0 = versionDirsUnderHadoopClient[0].listFiles(); + Arrays.sort(jarsUnder2_3_0); + Assert.assertArrayEquals(getExpectedJarFiles(hadoop_client_2_3_0), jarsUnder2_3_0); + + final File[] jarsUnder2_4_0 = versionDirsUnderHadoopClient[1].listFiles(); + Arrays.sort(jarsUnder2_4_0); + Assert.assertArrayEquals(getExpectedJarFiles(hadoop_client_2_4_0), jarsUnder2_4_0); + } +}