From a755ff99b56717f8a135044d790ea9a891280ed0 Mon Sep 17 00:00:00 2001 From: Karthick Narendran Date: Tue, 7 Apr 2020 16:17:53 +0000 Subject: [PATCH 1/6] Update Accumulo version in Dockerfile & README.md --- Dockerfile | 12 ++++++------ README.md | 10 ++++------ 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/Dockerfile b/Dockerfile index 0350ae0..2b8fcad 100644 --- a/Dockerfile +++ b/Dockerfile @@ -18,9 +18,9 @@ FROM centos:7 RUN yum install -y java-1.8.0-openjdk-devel make gcc-c++ wget ENV JAVA_HOME /usr/lib/jvm/java-1.8.0-openjdk -ARG ACCUMULO_VERSION=2.0.0-alpha-1 -ARG HADOOP_VERSION=3.1.1 -ARG ZOOKEEPER_VERSION=3.4.13 +ARG ACCUMULO_VERSION=2.0.0 +ARG HADOOP_VERSION=3.2.1 +ARG ZOOKEEPER_VERSION=3.6.0 ARG HADOOP_USER_NAME=accumulo ARG ACCUMULO_FILE= ARG HADOOP_FILE= @@ -58,9 +58,9 @@ RUN set -eux; \ cp "/tmp/$HADOOP_FILE" "hadoop.tar.gz"; \ fi; \ if [ -z "$ZOOKEEPER_FILE" ]; then \ - download "zookeeper.tar.gz" "zookeeper/zookeeper-$ZOOKEEPER_VERSION/zookeeper-$ZOOKEEPER_VERSION.tar.gz"; \ + download "zookeeper.tar.gz" "zookeeper/zookeeper-$ZOOKEEPER_VERSION/apache-zookeeper-$ZOOKEEPER_VERSION.tar.gz"; \ else \ - cp "/tmp/$ZOOKEEPER_FILE" "zookeeper.tar.gz"; \ + cp "/tmp/$ZOOKEEPER_FILE" "apache-zookeeper.tar.gz"; \ fi; \ if [ -z "$ACCUMULO_FILE" ]; then \ download "accumulo.tar.gz" "accumulo/$ACCUMULO_VERSION/accumulo-$ACCUMULO_VERSION-bin.tar.gz"; \ @@ -73,7 +73,7 @@ RUN tar xzf hadoop.tar.gz -C /tmp/ RUN tar xzf zookeeper.tar.gz -C /tmp/ RUN mv /tmp/hadoop-$HADOOP_VERSION /opt/hadoop -RUN mv /tmp/zookeeper-$ZOOKEEPER_VERSION /opt/zookeeper +RUN mv /tmp/apache-zookeeper-$ZOOKEEPER_VERSION /opt/zookeeper RUN mv /tmp/accumulo-$ACCUMULO_VERSION /opt/accumulo RUN /opt/accumulo/bin/accumulo-util build-native diff --git a/README.md b/README.md index 2a5a117..2dd5453 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,6 @@ # Apache Accumulo Docker Image -**This is currently a work in progress that depends on unreleased features of Accumulo and will not be ready -for use until after Accumulo 2.0.0 is released.** Sometime after Accumulo 2.0.0 is released this project -will make its first release. Eventually, this will project will create a `apache/accumulo` image at DockerHub. +This is the first release of this project. Eventually, this project will create an `apache/accumulo` image at DockerHub. Until then, you will need to build your own image. The master branch of this repo creates a Docker image for Accumulo 2.0+. If you want to create a Docker image for Accumulo 1.9, there is a [1.9 branch](https://github.com/apache/accumulo-docker/tree/1.9) for that. @@ -18,9 +16,9 @@ While it is easier to pull from DockerHub, the image will default to the softwar | Software | Version | |-------------|---------------| -| [Accumulo] | 2.0.0-alpha-1 | -| [Hadoop] | 3.1.1 | -| [Zookeeper] | 3.4.13 | +| [Accumulo] | 2.0.0 | +| [Hadoop] | 3.2.1 | +| [Zookeeper] | 3.6.0 | If these versions do not match what is running on your cluster, you should consider building your own image with matching versions. However, Accumulo must be 2.0.0+. Below are instructions for From ae160eefbdfd72d1ad28c53d36b605a497764f79 Mon Sep 17 00:00:00 2001 From: Karthick Narendran Date: Tue, 7 Apr 2020 21:05:13 +0000 Subject: [PATCH 2/6] Update README.md as suggested by Christopher --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 2dd5453..811ee38 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,7 @@ While it is easier to pull from DockerHub, the image will default to the softwar |-------------|---------------| | [Accumulo] | 2.0.0 | | [Hadoop] | 3.2.1 | -| [Zookeeper] | 3.6.0 | +| [ZooKeeper] | 3.6.0 | If these versions do not match what is running on your cluster, you should consider building your own image with matching versions. However, Accumulo must be 2.0.0+. Below are instructions for @@ -138,4 +138,4 @@ of tablet servers that you want to run. [Accumulo]: https://accumulo.apache.org/ [Hadoop]: https://hadoop.apache.org/ -[Zookeeper]: https://zookeeper.apache.org/ +[ZooKeeper]: https://zookeeper.apache.org/ From 2bdfbe2bdc84c01a1b8f79dd729a3a89583e9a68 Mon Sep 17 00:00:00 2001 From: Karthick Narendran Date: Wed, 8 Apr 2020 13:52:57 +0000 Subject: [PATCH 3/6] Corrected ZK tarball filename --- Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 2b8fcad..78d2a6c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -58,7 +58,7 @@ RUN set -eux; \ cp "/tmp/$HADOOP_FILE" "hadoop.tar.gz"; \ fi; \ if [ -z "$ZOOKEEPER_FILE" ]; then \ - download "zookeeper.tar.gz" "zookeeper/zookeeper-$ZOOKEEPER_VERSION/apache-zookeeper-$ZOOKEEPER_VERSION.tar.gz"; \ + download "zookeeper.tar.gz" "zookeeper/zookeeper-$ZOOKEEPER_VERSION/apache-zookeeper-$ZOOKEEPER_VERSION-bin.tar.gz"; \ else \ cp "/tmp/$ZOOKEEPER_FILE" "apache-zookeeper.tar.gz"; \ fi; \ @@ -73,7 +73,7 @@ RUN tar xzf hadoop.tar.gz -C /tmp/ RUN tar xzf zookeeper.tar.gz -C /tmp/ RUN mv /tmp/hadoop-$HADOOP_VERSION /opt/hadoop -RUN mv /tmp/apache-zookeeper-$ZOOKEEPER_VERSION /opt/zookeeper +RUN mv /tmp/apache-zookeeper-$ZOOKEEPER_VERSION-bin /opt/zookeeper RUN mv /tmp/accumulo-$ACCUMULO_VERSION /opt/accumulo RUN /opt/accumulo/bin/accumulo-util build-native From f8d2ae6c65f627a1301bec9fb662ab80cd8d3748 Mon Sep 17 00:00:00 2001 From: Karthick Narendran Date: Wed, 8 Apr 2020 17:29:54 +0000 Subject: [PATCH 4/6] Added accumulo-env.sh as a post-script to the repo --- Dockerfile | 1 + accumulo-env.sh | 124 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 125 insertions(+) create mode 100644 accumulo-env.sh diff --git a/Dockerfile b/Dockerfile index 78d2a6c..9b6b87d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -79,6 +79,7 @@ RUN mv /tmp/accumulo-$ACCUMULO_VERSION /opt/accumulo RUN /opt/accumulo/bin/accumulo-util build-native ADD ./accumulo.properties /opt/accumulo/conf +ADD ./accumulo-env.sh /opt/accumulo/conf ADD ./log4j-service.properties /opt/accumulo/conf ADD ./log4j-monitor.properties /opt/accumulo/conf diff --git a/accumulo-env.sh b/accumulo-env.sh new file mode 100644 index 0000000..a2529e0 --- /dev/null +++ b/accumulo-env.sh @@ -0,0 +1,124 @@ +#! /usr/bin/env bash + +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +## Before accumulo-env.sh is loaded, these environment variables are set and can be used in this file: + +# cmd - Command that is being called such as tserver, master, etc. +# basedir - Root of Accumulo installation +# bin - Directory containing Accumulo scripts +# conf - Directory containing Accumulo configuration +# lib - Directory containing Accumulo libraries + +############################ +# Variables that must be set +############################ + +## Accumulo logs directory. Referenced by logger config. +export ACCUMULO_LOG_DIR="${ACCUMULO_LOG_DIR:-${basedir}/logs}" +## Hadoop installation +export HADOOP_HOME="${HADOOP_HOME:-/path/to/hadoop}" +## Hadoop configuration +export HADOOP_CONF_DIR="${HADOOP_CONF_DIR:-${HADOOP_HOME}/etc/hadoop}" +## Zookeeper installation +export ZOOKEEPER_HOME="${ZOOKEEPER_HOME:-/path/to/zookeeper}" + +########################## +# Build CLASSPATH variable +########################## + +## Verify that Hadoop & Zookeeper installation directories exist +if [[ ! -d "$ZOOKEEPER_HOME" ]]; then + echo "ZOOKEEPER_HOME=$ZOOKEEPER_HOME is not set to a valid directory in accumulo-env.sh" + exit 1 +fi +if [[ ! -d "$HADOOP_HOME" ]]; then + echo "HADOOP_HOME=$HADOOP_HOME is not set to a valid directory in accumulo-env.sh" + exit 1 +fi + +## Build using existing CLASSPATH, conf/ directory, dependencies in lib/, and external Hadoop & Zookeeper dependencies +if [[ -n "$CLASSPATH" ]]; then + CLASSPATH="${CLASSPATH}:${conf}" +else + CLASSPATH="${conf}" +fi +CLASSPATH="${CLASSPATH}:${lib}/*:${HADOOP_CONF_DIR}:${ZOOKEEPER_HOME}/*:${HADOOP_HOME}/share/hadoop/client/*:${ZOOKEEPER_HOME}/lib/*" +export CLASSPATH + +################################################################## +# Build JAVA_OPTS variable. Defaults below work but can be edited. +################################################################## + +## JVM options set for all processes. Extra options can be passed in by setting ACCUMULO_JAVA_OPTS to an array of options. +JAVA_OPTS=("${ACCUMULO_JAVA_OPTS[@]}" + '-XX:+UseConcMarkSweepGC' + '-XX:CMSInitiatingOccupancyFraction=75' + '-XX:+CMSClassUnloadingEnabled' + '-XX:OnOutOfMemoryError=kill -9 %p' + '-XX:-OmitStackTraceInFastThrow' + '-Djava.net.preferIPv4Stack=true' + "-Daccumulo.native.lib.path=${lib}/native") + +## Make sure Accumulo native libraries are built since they are enabled by default +"${bin}"/accumulo-util build-native &> /dev/null + +## JVM options set for individual applications +case "$cmd" in + master) JAVA_OPTS=("${JAVA_OPTS[@]}" '-Xmx512m' '-Xms512m') ;; + monitor) JAVA_OPTS=("${JAVA_OPTS[@]}" '-Xmx256m' '-Xms256m') ;; + gc) JAVA_OPTS=("${JAVA_OPTS[@]}" '-Xmx256m' '-Xms256m') ;; + tserver) JAVA_OPTS=("${JAVA_OPTS[@]}" '-Xmx768m' '-Xms768m') ;; + *) JAVA_OPTS=("${JAVA_OPTS[@]}" '-Xmx256m' '-Xms64m') ;; +esac + +## JVM options set for logging. Review logj4 properties files to see how they are used. +JAVA_OPTS=("${JAVA_OPTS[@]}" + "-Daccumulo.log.dir=${ACCUMULO_LOG_DIR}" + "-Daccumulo.application=${cmd}${ACCUMULO_SERVICE_INSTANCE}_$(hostname)") + +case "$cmd" in + monitor) + JAVA_OPTS=("${JAVA_OPTS[@]}" "-Dlog4j.configuration=log4j-monitor.properties") + ;; + gc|master|tserver|tracer) + JAVA_OPTS=("${JAVA_OPTS[@]}" "-Dlog4j.configuration=log4j-service.properties") + ;; + *) + # let log4j use its default behavior (log4j.xml, log4j.properties) + true + ;; +esac + +export JAVA_OPTS + +############################ +# Variables set to a default +############################ + +export MALLOC_ARENA_MAX=${MALLOC_ARENA_MAX:-1} +## Add Hadoop native libraries to shared library paths given operating system +case "$(uname)" in + Darwin) export DYLD_LIBRARY_PATH="${HADOOP_HOME}/lib/native:${DYLD_LIBRARY_PATH}" ;; + *) export LD_LIBRARY_PATH="${HADOOP_HOME}/lib/native:${LD_LIBRARY_PATH}" ;; +esac + +############################################### +# Variables that are optional. Uncomment to set +############################################### + +## Specifies command that will be placed before calls to Java in accumulo script +# export ACCUMULO_JAVA_PREFIX="" From ff8cedf711e6283251d1aaafd67c17386917f6ef Mon Sep 17 00:00:00 2001 From: Karthick Narendran Date: Thu, 9 Apr 2020 12:55:03 +0000 Subject: [PATCH 5/6] Added 'sed' to modify the classpath & removed the post-script --- Dockerfile | 4 +- accumulo-env.sh | 124 ------------------------------------------------ 2 files changed, 3 insertions(+), 125 deletions(-) delete mode 100644 accumulo-env.sh diff --git a/Dockerfile b/Dockerfile index 9b6b87d..2dc1a20 100644 --- a/Dockerfile +++ b/Dockerfile @@ -78,8 +78,10 @@ RUN mv /tmp/accumulo-$ACCUMULO_VERSION /opt/accumulo RUN /opt/accumulo/bin/accumulo-util build-native +# The below line is required for Accumulo 2.0 to work with ZK 3.5 & above. +RUN sed -i 's/\${ZOOKEEPER_HOME}\/\*/\${ZOOKEEPER_HOME}\/\*\:\${ZOOKEEPER_HOME}\/lib\/\*/g' /opt/accumulo/conf/accumulo-env.sh + ADD ./accumulo.properties /opt/accumulo/conf -ADD ./accumulo-env.sh /opt/accumulo/conf ADD ./log4j-service.properties /opt/accumulo/conf ADD ./log4j-monitor.properties /opt/accumulo/conf diff --git a/accumulo-env.sh b/accumulo-env.sh deleted file mode 100644 index a2529e0..0000000 --- a/accumulo-env.sh +++ /dev/null @@ -1,124 +0,0 @@ -#! /usr/bin/env bash - -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -## Before accumulo-env.sh is loaded, these environment variables are set and can be used in this file: - -# cmd - Command that is being called such as tserver, master, etc. -# basedir - Root of Accumulo installation -# bin - Directory containing Accumulo scripts -# conf - Directory containing Accumulo configuration -# lib - Directory containing Accumulo libraries - -############################ -# Variables that must be set -############################ - -## Accumulo logs directory. Referenced by logger config. -export ACCUMULO_LOG_DIR="${ACCUMULO_LOG_DIR:-${basedir}/logs}" -## Hadoop installation -export HADOOP_HOME="${HADOOP_HOME:-/path/to/hadoop}" -## Hadoop configuration -export HADOOP_CONF_DIR="${HADOOP_CONF_DIR:-${HADOOP_HOME}/etc/hadoop}" -## Zookeeper installation -export ZOOKEEPER_HOME="${ZOOKEEPER_HOME:-/path/to/zookeeper}" - -########################## -# Build CLASSPATH variable -########################## - -## Verify that Hadoop & Zookeeper installation directories exist -if [[ ! -d "$ZOOKEEPER_HOME" ]]; then - echo "ZOOKEEPER_HOME=$ZOOKEEPER_HOME is not set to a valid directory in accumulo-env.sh" - exit 1 -fi -if [[ ! -d "$HADOOP_HOME" ]]; then - echo "HADOOP_HOME=$HADOOP_HOME is not set to a valid directory in accumulo-env.sh" - exit 1 -fi - -## Build using existing CLASSPATH, conf/ directory, dependencies in lib/, and external Hadoop & Zookeeper dependencies -if [[ -n "$CLASSPATH" ]]; then - CLASSPATH="${CLASSPATH}:${conf}" -else - CLASSPATH="${conf}" -fi -CLASSPATH="${CLASSPATH}:${lib}/*:${HADOOP_CONF_DIR}:${ZOOKEEPER_HOME}/*:${HADOOP_HOME}/share/hadoop/client/*:${ZOOKEEPER_HOME}/lib/*" -export CLASSPATH - -################################################################## -# Build JAVA_OPTS variable. Defaults below work but can be edited. -################################################################## - -## JVM options set for all processes. Extra options can be passed in by setting ACCUMULO_JAVA_OPTS to an array of options. -JAVA_OPTS=("${ACCUMULO_JAVA_OPTS[@]}" - '-XX:+UseConcMarkSweepGC' - '-XX:CMSInitiatingOccupancyFraction=75' - '-XX:+CMSClassUnloadingEnabled' - '-XX:OnOutOfMemoryError=kill -9 %p' - '-XX:-OmitStackTraceInFastThrow' - '-Djava.net.preferIPv4Stack=true' - "-Daccumulo.native.lib.path=${lib}/native") - -## Make sure Accumulo native libraries are built since they are enabled by default -"${bin}"/accumulo-util build-native &> /dev/null - -## JVM options set for individual applications -case "$cmd" in - master) JAVA_OPTS=("${JAVA_OPTS[@]}" '-Xmx512m' '-Xms512m') ;; - monitor) JAVA_OPTS=("${JAVA_OPTS[@]}" '-Xmx256m' '-Xms256m') ;; - gc) JAVA_OPTS=("${JAVA_OPTS[@]}" '-Xmx256m' '-Xms256m') ;; - tserver) JAVA_OPTS=("${JAVA_OPTS[@]}" '-Xmx768m' '-Xms768m') ;; - *) JAVA_OPTS=("${JAVA_OPTS[@]}" '-Xmx256m' '-Xms64m') ;; -esac - -## JVM options set for logging. Review logj4 properties files to see how they are used. -JAVA_OPTS=("${JAVA_OPTS[@]}" - "-Daccumulo.log.dir=${ACCUMULO_LOG_DIR}" - "-Daccumulo.application=${cmd}${ACCUMULO_SERVICE_INSTANCE}_$(hostname)") - -case "$cmd" in - monitor) - JAVA_OPTS=("${JAVA_OPTS[@]}" "-Dlog4j.configuration=log4j-monitor.properties") - ;; - gc|master|tserver|tracer) - JAVA_OPTS=("${JAVA_OPTS[@]}" "-Dlog4j.configuration=log4j-service.properties") - ;; - *) - # let log4j use its default behavior (log4j.xml, log4j.properties) - true - ;; -esac - -export JAVA_OPTS - -############################ -# Variables set to a default -############################ - -export MALLOC_ARENA_MAX=${MALLOC_ARENA_MAX:-1} -## Add Hadoop native libraries to shared library paths given operating system -case "$(uname)" in - Darwin) export DYLD_LIBRARY_PATH="${HADOOP_HOME}/lib/native:${DYLD_LIBRARY_PATH}" ;; - *) export LD_LIBRARY_PATH="${HADOOP_HOME}/lib/native:${LD_LIBRARY_PATH}" ;; -esac - -############################################### -# Variables that are optional. Uncomment to set -############################################### - -## Specifies command that will be placed before calls to Java in accumulo script -# export ACCUMULO_JAVA_PREFIX="" From 6afccc07dad40635bec4562f5d2bdf139bfef038 Mon Sep 17 00:00:00 2001 From: Karthick Narendran Date: Thu, 9 Apr 2020 17:15:02 +0100 Subject: [PATCH 6/6] Update comments as suggested by Keith Co-Authored-By: Keith Turner --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 2dc1a20..6102757 100644 --- a/Dockerfile +++ b/Dockerfile @@ -78,7 +78,7 @@ RUN mv /tmp/accumulo-$ACCUMULO_VERSION /opt/accumulo RUN /opt/accumulo/bin/accumulo-util build-native -# The below line is required for Accumulo 2.0 to work with ZK 3.5 & above. +# The below line is required for Accumulo 2.0 to work with ZK 3.5 & above. This will not be needed for Accumulo 2.1 RUN sed -i 's/\${ZOOKEEPER_HOME}\/\*/\${ZOOKEEPER_HOME}\/\*\:\${ZOOKEEPER_HOME}\/lib\/\*/g' /opt/accumulo/conf/accumulo-env.sh ADD ./accumulo.properties /opt/accumulo/conf