From eafe9774eb84e266d2007428cbcfeacaa94c0995 Mon Sep 17 00:00:00 2001 From: frank chen Date: Tue, 25 May 2021 17:24:26 +0800 Subject: [PATCH 1/7] Create /opt/data to fix permission problem --- distribution/docker/Dockerfile | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/distribution/docker/Dockerfile b/distribution/docker/Dockerfile index 06d55dee6933..ea4bdd31aa32 100644 --- a/distribution/docker/Dockerfile +++ b/distribution/docker/Dockerfile @@ -58,7 +58,10 @@ COPY --chown=druid:druid --from=builder /opt /opt COPY distribution/docker/druid.sh /druid.sh RUN mkdir /opt/druid/var \ && chown druid:druid /opt/druid/var \ - && chmod 775 /opt/druid/var + && chmod 775 /opt/druid/var \ + && mkdir /opt/data \ + && chown druid:druid /opt/data \ + && chmod 775 /opt/data USER druid VOLUME /opt/druid/var From 4c1556adec67038b278442652b3c4b699452e507 Mon Sep 17 00:00:00 2001 From: frank chen Date: Tue, 25 May 2021 17:26:44 +0800 Subject: [PATCH 2/7] eliminate symlink to avoid compatibility problem on AWS Fargate --- distribution/docker/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/distribution/docker/Dockerfile b/distribution/docker/Dockerfile index ea4bdd31aa32..11ef4e015f15 100644 --- a/distribution/docker/Dockerfile +++ b/distribution/docker/Dockerfile @@ -41,7 +41,7 @@ RUN VERSION=$(mvn -B -q org.apache.maven.plugins:maven-help-plugin:3.2.0:evaluat -Dexpression=project.version -DforceStdout=true \ ) \ && tar -zxf ./distribution/target/apache-druid-${VERSION}-bin.tar.gz -C /opt \ - && ln -s /opt/apache-druid-${VERSION} /opt/druid + && mv /opt/apache-druid-${VERSION} /opt/druid FROM amd64/busybox:1.30.0-glibc as busybox From 60038adca2aeb066a7f9ea4cb8b0a10f08d46191 Mon Sep 17 00:00:00 2001 From: frank chen Date: Tue, 25 May 2021 17:55:07 +0800 Subject: [PATCH 3/7] Add a workaround section --- docs/tutorials/docker.md | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/docs/tutorials/docker.md b/docs/tutorials/docker.md index 33dc8e735856..b0ac7be0e6e0 100644 --- a/docs/tutorials/docker.md +++ b/docs/tutorials/docker.md @@ -79,5 +79,24 @@ It takes a few seconds for all the Druid processes to fully start up. If you ope From here you can follow along with the [Quickstart](./index.md#step-4-load-data), or elaborate on your `docker-compose.yml` to add any additional external service dependencies as necessary. +> Note: The example docker-compose.yml mounts a local directory `distribution/docker/storage` for docker processes to keep Druid task logs and segments. +> +> On some Linux platforms, ingestion tasks won't success due to a permission problem of this directory. +> +> You could execute following command to set up permission correctly after launching the cluster +> +> ``` +> sudo docker exec --user root middlemanager chown druid:druid /opt/data +> ``` +> +> or use a named volume in docker-compose.yml by replacing all lines +> ``` +> ./storage:/opt/data +> ``` +> in the example docker-compose.yml to +> ``` +> druid_data_storage:/opt/data +> ``` + ## Docker Memory Requirements If you experience any processes crashing with a 137 error code you likely don't have enough memory allocated to Docker. 6 GB may be a good place to start. From be1336ac49c9d6202cee4d930c6621be83834fbd Mon Sep 17 00:00:00 2001 From: frank chen Date: Wed, 26 May 2021 15:43:02 +0800 Subject: [PATCH 4/7] Update instruction for named volume --- docs/tutorials/docker.md | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/docs/tutorials/docker.md b/docs/tutorials/docker.md index b0ac7be0e6e0..29f55bf1bbd8 100644 --- a/docs/tutorials/docker.md +++ b/docs/tutorials/docker.md @@ -89,14 +89,21 @@ From here you can follow along with the [Quickstart](./index.md#step-4-load-data > sudo docker exec --user root middlemanager chown druid:druid /opt/data > ``` > -> or use a named volume in docker-compose.yml by replacing all lines +> or use a named volume in the docker-compose.yml by replacing all lines > ``` > ./storage:/opt/data > ``` -> in the example docker-compose.yml to +> to > ``` > druid_data_storage:/opt/data > ``` +> and declare the named volume under the 'volumes' property as +> ``` +> volumes: +> ... +> druid_data_storage: {} +> ``` +> ## Docker Memory Requirements If you experience any processes crashing with a 137 error code you likely don't have enough memory allocated to Docker. 6 GB may be a good place to start. From 28e4b80b48460204849b4a3d1ee32a3f25f8969b Mon Sep 17 00:00:00 2001 From: frank chen Date: Fri, 28 May 2021 09:20:20 +0800 Subject: [PATCH 5/7] Use named volume in docker-compose --- distribution/docker/docker-compose.yml | 7 +++--- docs/tutorials/docker.md | 33 +++++--------------------- 2 files changed, 10 insertions(+), 30 deletions(-) diff --git a/distribution/docker/docker-compose.yml b/distribution/docker/docker-compose.yml index a940f4af66ea..0ea513f02cae 100644 --- a/distribution/docker/docker-compose.yml +++ b/distribution/docker/docker-compose.yml @@ -25,6 +25,7 @@ volumes: broker_var: {} coordinator_var: {} router_var: {} + druid_data: {} services: @@ -51,7 +52,7 @@ services: image: apache/druid:0.22.0 container_name: coordinator volumes: - - ./storage:/opt/data + - druid_data:/opt/data - coordinator_var:/opt/druid/var depends_on: - zookeeper @@ -83,7 +84,7 @@ services: image: apache/druid:0.22.0 container_name: historical volumes: - - ./storage:/opt/data + - druid_data:/opt/data - historical_var:/opt/druid/var depends_on: - zookeeper @@ -100,7 +101,7 @@ services: image: apache/druid:0.22.0 container_name: middlemanager volumes: - - ./storage:/opt/data + - druid_data:/opt/data - middle_var:/opt/druid/var depends_on: - zookeeper diff --git a/docs/tutorials/docker.md b/docs/tutorials/docker.md index 29f55bf1bbd8..4cb76ec07dc5 100644 --- a/docs/tutorials/docker.md +++ b/docs/tutorials/docker.md @@ -36,7 +36,11 @@ The Druid source code contains [an example `docker-compose.yml`](https://github. ### Compose file -The example `docker-compose.yml` will create a container for each Druid service, as well as Zookeeper and a PostgreSQL container as the metadata store. Deep storage will be a local directory, by default configured as `./storage` relative to your `docker-compose.yml` file, and will be mounted as `/opt/data` and shared between Druid containers which require access to deep storage. The Druid containers are configured via an [environment file](https://github.com/apache/druid/blob/{{DRUIDVERSION}}/distribution/docker/environment). +The example `docker-compose.yml` will create a container for each Druid service, as well as Zookeeper and a PostgreSQL container as the metadata store. + +It will also create a named volumes `druid-data`, which is mounted as `opt/data` in container, as deep storage to keep and share segments and task logs among Druid services. + +The Druid containers are configured via an [environment file](https://github.com/apache/druid/blob/{{DRUIDVERSION}}/distribution/docker/environment). ### Configuration @@ -79,31 +83,6 @@ It takes a few seconds for all the Druid processes to fully start up. If you ope From here you can follow along with the [Quickstart](./index.md#step-4-load-data), or elaborate on your `docker-compose.yml` to add any additional external service dependencies as necessary. -> Note: The example docker-compose.yml mounts a local directory `distribution/docker/storage` for docker processes to keep Druid task logs and segments. -> -> On some Linux platforms, ingestion tasks won't success due to a permission problem of this directory. -> -> You could execute following command to set up permission correctly after launching the cluster -> -> ``` -> sudo docker exec --user root middlemanager chown druid:druid /opt/data -> ``` -> -> or use a named volume in the docker-compose.yml by replacing all lines -> ``` -> ./storage:/opt/data -> ``` -> to -> ``` -> druid_data_storage:/opt/data -> ``` -> and declare the named volume under the 'volumes' property as -> ``` -> volumes: -> ... -> druid_data_storage: {} -> ``` -> ## Docker Memory Requirements -If you experience any processes crashing with a 137 error code you likely don't have enough memory allocated to Docker. 6 GB may be a good place to start. +If you experience any processes crashing with a 137 error code you likely don't have enough memory allocated to Docker. 6 GB may be a good place to start. From 51ba67d5f034e70d59042894f3ad434358175080 Mon Sep 17 00:00:00 2001 From: frank chen Date: Fri, 28 May 2021 09:55:29 +0800 Subject: [PATCH 6/7] Revert some doc change --- docs/tutorials/docker.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/tutorials/docker.md b/docs/tutorials/docker.md index 4cb76ec07dc5..128fd066bc31 100644 --- a/docs/tutorials/docker.md +++ b/docs/tutorials/docker.md @@ -83,6 +83,5 @@ It takes a few seconds for all the Druid processes to fully start up. If you ope From here you can follow along with the [Quickstart](./index.md#step-4-load-data), or elaborate on your `docker-compose.yml` to add any additional external service dependencies as necessary. - ## Docker Memory Requirements -If you experience any processes crashing with a 137 error code you likely don't have enough memory allocated to Docker. 6 GB may be a good place to start. +If you experience any processes crashing with a 137 error code you likely don't have enough memory allocated to Docker. 6 GB may be a good place to start. From c8a1229efefe837df9c2bbfc758090d2c8930a7b Mon Sep 17 00:00:00 2001 From: frank chen Date: Fri, 28 May 2021 19:14:03 +0800 Subject: [PATCH 7/7] Resolve review comments --- distribution/docker/Dockerfile | 13 +++++++------ distribution/docker/docker-compose.yml | 8 ++++---- distribution/docker/environment | 6 +++--- docs/tutorials/docker.md | 4 ++-- 4 files changed, 16 insertions(+), 15 deletions(-) diff --git a/distribution/docker/Dockerfile b/distribution/docker/Dockerfile index 11ef4e015f15..7e7cdcb632c3 100644 --- a/distribution/docker/Dockerfile +++ b/distribution/docker/Dockerfile @@ -56,12 +56,13 @@ RUN addgroup -S -g 1000 druid \ COPY --chown=druid:druid --from=builder /opt /opt COPY distribution/docker/druid.sh /druid.sh -RUN mkdir /opt/druid/var \ - && chown druid:druid /opt/druid/var \ - && chmod 775 /opt/druid/var \ - && mkdir /opt/data \ - && chown druid:druid /opt/data \ - && chmod 775 /opt/data + +# create necessary directories which could be mounted as volume +# /opt/druid/var is used to keep individual files(e.g. log) of each Druid service +# /opt/shared is used to keep segments and task logs shared among Druid services +RUN mkdir /opt/druid/var /opt/shared \ + && chown druid:druid /opt/druid/var /opt/shared \ + && chmod 775 /opt/druid/var /opt/shared USER druid VOLUME /opt/druid/var diff --git a/distribution/docker/docker-compose.yml b/distribution/docker/docker-compose.yml index 0ea513f02cae..ed4dbef4c068 100644 --- a/distribution/docker/docker-compose.yml +++ b/distribution/docker/docker-compose.yml @@ -25,7 +25,7 @@ volumes: broker_var: {} coordinator_var: {} router_var: {} - druid_data: {} + druid_shared: {} services: @@ -52,7 +52,7 @@ services: image: apache/druid:0.22.0 container_name: coordinator volumes: - - druid_data:/opt/data + - druid_shared:/opt/shared - coordinator_var:/opt/druid/var depends_on: - zookeeper @@ -84,7 +84,7 @@ services: image: apache/druid:0.22.0 container_name: historical volumes: - - druid_data:/opt/data + - druid_shared:/opt/shared - historical_var:/opt/druid/var depends_on: - zookeeper @@ -101,7 +101,7 @@ services: image: apache/druid:0.22.0 container_name: middlemanager volumes: - - druid_data:/opt/data + - druid_shared:/opt/shared - middle_var:/opt/druid/var depends_on: - zookeeper diff --git a/distribution/docker/environment b/distribution/docker/environment index 2023138b20ed..7bb9214a7011 100644 --- a/distribution/docker/environment +++ b/distribution/docker/environment @@ -39,12 +39,12 @@ druid_metadata_storage_connector_password=FoolishPassword druid_coordinator_balancer_strategy=cachingCost druid_indexer_runner_javaOptsArray=["-server", "-Xmx1g", "-Xms1g", "-XX:MaxDirectMemorySize=3g", "-Duser.timezone=UTC", "-Dfile.encoding=UTF-8", "-Djava.util.logging.manager=org.apache.logging.log4j.jul.LogManager"] -druid_indexer_fork_property_druid_processing_buffer_sizeBytes=268435456 +druid_indexer_fork_property_druid_processing_buffer_sizeBytes=256MiB druid_storage_type=local -druid_storage_storageDirectory=/opt/data/segments +druid_storage_storageDirectory=/opt/shared/segments druid_indexer_logs_type=file -druid_indexer_logs_directory=/opt/data/indexing-logs +druid_indexer_logs_directory=/opt/shared/indexing-logs druid_processing_numThreads=2 druid_processing_numMergeBuffers=2 diff --git a/docs/tutorials/docker.md b/docs/tutorials/docker.md index 128fd066bc31..b64b636b9956 100644 --- a/docs/tutorials/docker.md +++ b/docs/tutorials/docker.md @@ -36,9 +36,9 @@ The Druid source code contains [an example `docker-compose.yml`](https://github. ### Compose file -The example `docker-compose.yml` will create a container for each Druid service, as well as Zookeeper and a PostgreSQL container as the metadata store. +The example `docker-compose.yml` will create a container for each Druid service, as well as ZooKeeper and a PostgreSQL container as the metadata store. -It will also create a named volumes `druid-data`, which is mounted as `opt/data` in container, as deep storage to keep and share segments and task logs among Druid services. +It will also create a named volumes `druid_shared`, which is mounted as `opt/shared` in container, as deep storage to keep and share segments and task logs among Druid services. The Druid containers are configured via an [environment file](https://github.com/apache/druid/blob/{{DRUIDVERSION}}/distribution/docker/environment).