diff --git a/Dockerfile b/Dockerfile index e8b4840..ba60fc9 100644 --- a/Dockerfile +++ b/Dockerfile @@ -13,16 +13,86 @@ # See the License for the specific language governing permissions and # limitations under the License. -ARG OZONE_RUNNER_IMAGE=apache/ozone-runner -ARG OZONE_RUNNER_VERSION=20260106-1-jdk21 +ARG OZONE_VERSION=2.1.0 +ARG OZONE_RUNNER_IMAGE=apache/ozone +ARG OZONE_RUNNER_VERSION=${OZONE_VERSION}-slim + FROM ${OZONE_RUNNER_IMAGE}:${OZONE_RUNNER_VERSION} -ARG OZONE_VERSION=2.1.0 -ARG OZONE_URL="https://www.apache.org/dyn/closer.lua?action=download&filename=ozone/${OZONE_VERSION}/ozone-${OZONE_VERSION}.tar.gz" +# Environment variables for Ozone configuration +ENV CORE-SITE.XML_fs.defaultFS=ofs://localhost \ + CORE-SITE.XML_fs.trash.interval=1 \ + CORE-SITE.XML_hadoop.proxyuser.hadoop.hosts=* \ + CORE-SITE.XML_hadoop.proxyuser.hadoop.groups=* \ + OZONE-SITE.XML_ozone.om.address=localhost \ + OZONE-SITE.XML_ozone.om.http-address=localhost:9874 \ + OZONE-SITE.XML_ozone.scm.http-address=localhost:9876 \ + OZONE-SITE.XML_ozone.scm.container.size=1GB \ + OZONE-SITE.XML_ozone.scm.block.size=1MB \ + OZONE-SITE.XML_ozone.scm.datanode.ratis.volume.free-space.min=10MB \ + OZONE-SITE.XML_ozone.scm.pipeline.creation.interval=5s \ + OZONE-SITE.XML_ozone.scm.pipeline.owner.container.count=1 \ + OZONE-SITE.XML_ozone.scm.ec.pipeline.minimum=1 \ + OZONE-SITE.XML_ozone.scm.names=localhost \ + OZONE-SITE.XML_ozone.scm.datanode.id.dir=/data/metadata \ + OZONE-SITE.XML_ozone.scm.block.client.address=localhost \ + OZONE-SITE.XML_ozone.metadata.dirs=/data/metadata \ + OZONE-SITE.XML_ozone.recon.db.dir=/data/metadata/recon \ + OZONE-SITE.XML_ozone.scm.client.address=localhost \ + OZONE-SITE.XML_hdds.datanode.dir=/data/hdds \ + OZONE-SITE.XML_hdds.datanode.volume.min.free.space=100MB \ + OZONE-SITE.XML_hdds.datanode.volume.min.free.space.percent=0 \ + OZONE-SITE.XML_ozone.recon.address=localhost:9891 \ + OZONE-SITE.XML_ozone.recon.http-address=0.0.0.0:9888 \ + OZONE-SITE.XML_ozone.recon.https-address=0.0.0.0:9889 \ + OZONE-SITE.XML_ozone.recon.om.snapshot.task.interval.delay=1m \ + OZONE-SITE.XML_ozone.datanode.pipeline.limit=1 \ + OZONE-SITE.XML_hdds.scmclient.max.retry.timeout=30s \ + OZONE-SITE.XML_hdds.container.report.interval=60s \ + OZONE-SITE.XML_ozone.scm.stale.node.interval=30s \ + OZONE-SITE.XML_ozone.scm.dead.node.interval=45s \ + OZONE-SITE.XML_hdds.heartbeat.interval=5s \ + OZONE-SITE.XML_ozone.scm.close.container.wait.duration=5s \ + OZONE-SITE.XML_hdds.scm.replication.thread.interval=15s \ + OZONE-SITE.XML_hdds.scm.replication.under.replicated.interval=5s \ + OZONE-SITE.XML_hdds.scm.replication.over.replicated.interval=5s \ + OZONE-SITE.XML_hdds.scm.wait.time.after.safemode.exit=5s \ + OZONE-SITE.XML_ozone.http.basedir=/tmp/ozone_http \ + OZONE-SITE.XML_hdds.container.ratis.datastream.enabled=true \ + OZONE-SITE.XML_ozone.fs.hsync.enabled=true \ + OZONE-SITE.XML_ozone.recon.dn.metrics.collection.minimum.api.delay=5s \ + OZONE-SITE.XML_ozone.filesystem.snapshot.enabled=true \ + OZONE-SITE.XML_ozone.server.default.replication=1 \ + OZONE-SITE.XML_hdds.scm.safemode.min.datanode=1 \ + OZONE-SITE.XML_dfs.container.ratis.datanode.storage.dir=/data/metadata/dn \ + OZONE_CONF_DIR=/etc/hadoop \ + OZONE_LOG_DIR=/var/log/hadoop \ + no_proxy=localhost,127.0.0.1 + +# Expose all service ports +# SCM ports +EXPOSE 9876 9860 +# OM ports +EXPOSE 9874 9862 +# DataNode ports +EXPOSE 19864 9882 +# S3 Gateway ports +EXPOSE 9878 19878 +# Recon ports +EXPOSE 9888 +# HttpFS ports +EXPOSE 14000 + +# Expose volumes for data persistence +VOLUME ["/data/metadata"] +VOLUME ["/data/hdds"] +VOLUME ["/var/log/hadoop"] -WORKDIR /opt -RUN sudo rm -rf /opt/hadoop && curl -LSs -o ozone.tar.gz $OZONE_URL && tar zxf ozone.tar.gz && rm ozone.tar.gz && mv ozone* hadoop +# Create startup script and set permissions +COPY --chmod=755 start-all-services.sh /usr/local/bin/start-all-services.sh -WORKDIR /opt/hadoop +# Switch to root to allow volume initialization +USER root -CMD ["echo","Please check https://github.com/apache/ozone-docker for information."] +# Set the startup script as the entrypoint +CMD ["/usr/local/bin/start-all-services.sh"] diff --git a/QUICKSTART.md b/QUICKSTART.md new file mode 100644 index 0000000..a61f0d9 --- /dev/null +++ b/QUICKSTART.md @@ -0,0 +1,276 @@ + + +# Apache Ozone Quick Start + +Get started with Apache Ozone in 5 minutes using a single-container setup with the AWS S3 CLI. + +## What is the Quickstart Image? + +The quickstart image is a single-container version of Apache Ozone that runs all services (SCM, OM, DataNode, S3 Gateway, Recon, HttpFS) in one container. It's designed for: + +- Quick development and testing +- Learning Ozone functionality +- Demos and POCs +- Local S3-compatible storage + +> **Note**: For production deployments, use the [multi-container setup](docker-compose.yaml) with separate containers for each service. + +## Prerequisites + +- Docker installed +- AWS CLI installed (optional, for S3 testing) + - macOS: `brew install awscli` + - pip: `pip install awscli` + +## Quick Start + +### Step 1: Start Ozone + +```bash +docker run -d \ + --name ozone \ + -p 9876:9876 \ + -p 9874:9874 \ + -p 19864:19864 \ + -p 9878:9878 \ + -p 9888:9888 \ + -p 14000:14000 \ + -v ozone-metadata:/data/metadata \ + -v ozone-hdds:/data/hdds \ + -v ozone-logs:/var/log/hadoop \ + apache/ozone-quickstart:latest + +# Wait 10-30 seconds for services to start +docker logs -f ozone +``` + +### Step 2: Configure AWS CLI + +Set up AWS CLI to point to Ozone's S3 Gateway: + +```bash +# Configure credentials (use any value for local testing) +aws configure set aws_access_key_id ozone +aws configure set aws_secret_access_key ozone +aws configure set default.s3.signature_version s3v4 +``` + +**Note**: For this single-node cluster, authentication is minimal. Use any credentials. + +### Step 3: Create a Bucket + +```bash +# Create bucket +aws s3api create-bucket \ + --bucket mybucket \ + --endpoint-url http://localhost:9878 + +# List all buckets +aws s3 ls --endpoint-url http://localhost:9878 +``` + +### Step 4: Upload and Download Files + +```bash +# Create a test file +echo "Hello, Ozone!" > test.txt + +# Upload file +aws s3 cp test.txt s3://mybucket/mykey.txt \ + --endpoint-url http://localhost:9878 + +# List keys in bucket +aws s3 ls s3://mybucket/ --endpoint-url http://localhost:9878 + +# Download file +aws s3 cp s3://mybucket/mykey.txt downloaded.txt \ + --endpoint-url http://localhost:9878 + +# Verify content +cat downloaded.txt +``` + +## Common Operations + +### Upload Directory +```bash +aws s3 sync ./local-folder/ s3://mybucket/remote-folder/ \ + --endpoint-url http://localhost:9878 +``` + +### Download Directory +```bash +aws s3 sync s3://mybucket/remote-folder/ ./local-folder/ \ + --endpoint-url http://localhost:9878 +``` + +### Delete Objects +```bash +aws s3 rm s3://mybucket/ --recursive --endpoint-url http://localhost:9878 +``` + +### Copy Between Buckets +```bash +aws s3 cp s3://source-bucket/file.txt s3://dest-bucket/file.txt \ + --endpoint-url http://localhost:9878 +``` + +## Using Ozone Native CLI + +If you prefer Ozone's native CLI instead of S3: + +```bash +# Enter the container +docker exec -it ozone bash + +# Create a volume +ozone sh volume create /vol1 + +# Create a bucket +ozone sh bucket create /vol1/bucket1 + +# Upload key +echo "Hello Ozone" > /tmp/test.txt +ozone sh key put /vol1/bucket1/key1 /tmp/test.txt + +# Download key +ozone sh key get /vol1/bucket1/key1 /tmp/test-out.txt + +# List keys +ozone sh key list /vol1/bucket1/ +``` + +## Web UIs + +Access Ozone's web interfaces: + +- **Recon Dashboard**: http://localhost:9888 - Visual overview of volumes, buckets, and keys +- **S3 Gateway**: http://localhost:9878 +- **SCM Web UI**: http://localhost:9876 +- **OM Web UI**: http://localhost:9874 +- **DataNode Web UI**: http://localhost:19864 +- **HttpFS API**: http://localhost:14000 + +## Managing the Container + +### Stop the Container +```bash +docker stop ozone +``` + +### Restart the Container +```bash +docker start ozone +``` + +### Remove the Container +```bash +docker rm ozone +``` + +### Remove Volumes (Start Fresh) +```bash +docker volume rm ozone-metadata ozone-hdds ozone-logs +``` + +### Check Logs +```bash +# All services +docker logs ozone + +# Specific service +docker exec ozone tail -f /var/log/hadoop/scm.log +docker exec ozone tail -f /var/log/hadoop/om.log +docker exec ozone tail -f /var/log/hadoop/datanode.log +docker exec ozone tail -f /var/log/hadoop/s3g.log +docker exec ozone tail -f /var/log/hadoop/recon.log +docker exec ozone tail -f /var/log/hadoop/httpfs.log +``` + +## Troubleshooting + +### Container Exits Immediately + +Check the logs: +```bash +docker logs ozone +``` + +Ensure you have allocated enough resources to Docker (at least 4GB RAM recommended). + +### SCM Not Exiting Safe Mode + +Wait 10-30 seconds after startup. Check status: +```bash +docker exec ozone ozone admin safemode status --verbose +``` + +### Permission Errors + +If using host-mounted directories instead of Docker volumes, ensure proper permissions: +```bash +mkdir -p ./ozone-metadata ./ozone-hdds ./ozone-logs +chmod -R 777 ./ozone-metadata ./ozone-hdds ./ozone-logs +``` + +### Reset and Start Fresh + +```bash +# Stop and remove container +docker stop ozone +docker rm ozone + +# Remove all volumes +docker volume rm ozone-metadata ozone-hdds ozone-logs + +# Start fresh +docker run -d \ + --name ozone \ + -p 9876:9876 -p 9874:9874 -p 19864:19864 \ + -p 9878:9878 -p 9888:9888 -p 14000:14000 \ + -v ozone-metadata:/data/metadata \ + -v ozone-hdds:/data/hdds \ + -v ozone-logs:/var/log/hadoop \ + apache/ozone-quickstart:latest +``` + +## Runtime Configuration + +Override configuration at runtime using environment variables: + +```bash +docker run -d \ + --name ozone \ + -p 9876:9876 -p 9874:9874 -p 19864:19864 \ + -p 9878:9878 -p 9888:9888 -p 14000:14000 \ + -e OZONE-SITE.XML_ozone.server.default.replication=3 \ + -e OZONE-SITE.XML_hdds.scm.safemode.min.datanode=1 \ + -v ozone-metadata:/data/metadata \ + -v ozone-hdds:/data/hdds \ + -v ozone-logs:/var/log/hadoop \ + apache/ozone:dev-quickstart +``` + +## Next Steps + +- Visit [Apache Ozone documentation](https://ozone.apache.org/docs/) + +--- + +**That's it!** You now have a running Ozone cluster with S3-compatible storage. 🎉 + diff --git a/build.sh b/build.sh index a093cdf..c4a3877 100755 --- a/build.sh +++ b/build.sh @@ -21,5 +21,5 @@ docker build \ --build-arg OZONE_RUNNER_VERSION \ --build-arg OZONE_URL \ --build-arg OZONE_VERSION \ - -t apache/ozone:dev \ - $@ - < Dockerfile + -t apache/ozone-quickstart:latest \ + $@ . diff --git a/start-all-services.sh b/start-all-services.sh new file mode 100755 index 0000000..9bdc8b2 --- /dev/null +++ b/start-all-services.sh @@ -0,0 +1,150 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -e + +echo "Starting Ozone Container..." + +# Only do initial setup if running as root +if [ "$(id -u)" = "0" ]; then + echo "Setting up directories with proper permissions..." + mkdir -p /data/metadata /data/hdds /var/log/hadoop + chown -R hadoop:hadoop /data/metadata /data/hdds /var/log/hadoop + chmod -R 755 /data + + # Switch to hadoop user and re-execute this script + exec su -s /bin/bash hadoop -c "$0" +fi + +# From here on, we're running as the hadoop user +echo "Running as user: $(whoami)" + +# Initialize SCM if not already initialized +if [ ! -f "/data/metadata/scm/current/VERSION" ]; then + echo "Initializing SCM..." + ozone scm --init +fi + +# Start SCM in background +echo "Starting Storage Container Manager (SCM)..." +ozone scm > /var/log/hadoop/scm.log 2>&1 & +SCM_PID=$! +echo "SCM started with PID: $SCM_PID" + + +# Initialize OM if not already initialized +if [ ! -f "/data/metadata/om/current/VERSION" ]; then + echo "Initializing OM..." + ozone om --init +fi + +# Start OM in background +echo "Starting Ozone Manager (OM)..." +ozone om > /var/log/hadoop/om.log 2>&1 & +OM_PID=$! +echo "OM started with PID: $OM_PID" + +# Wait a bit for OM to initialize +sleep 5 + +# Start DataNode in background +echo "Starting DataNode..." +ozone datanode > /var/log/hadoop/datanode.log 2>&1 & +DN_PID=$! +echo "DataNode started with PID: $DN_PID" + +# Start S3 Gateway in background +echo "Starting S3 Gateway..." +ozone s3g > /var/log/hadoop/s3g.log 2>&1 & +S3G_PID=$! +echo "S3 Gateway started with PID: $S3G_PID" + +# Start Recon in background +echo "Starting Recon..." +ozone recon > /var/log/hadoop/recon.log 2>&1 & +RECON_PID=$! +echo "Recon started with PID: $RECON_PID" + +# Start HttpFS in background +echo "Starting HttpFS..." +ozone httpfs > /var/log/hadoop/httpfs.log 2>&1 & +HTTPFS_PID=$! +echo "HttpFS started with PID: $HTTPFS_PID" + +# Wait for SCM to exit safe mode +echo "Waiting for ozone to be ready" +echo "Note: This can take 60-90 seconds" +for i in {1..90}; do + safemode_output=$(ozone admin safemode status 2>&1) + if echo "$safemode_output" | grep -q "SCM is out of safe mode"; then + echo "Ozone is ready" + break + fi + + # Show progress every 10 seconds + if [ $((i % 5)) -eq 0 ]; then + if echo "$safemode_output" | grep -q "SCM is in safe mode"; then + echo " Status: Waiting for Ozone to be ready" + fi + fi + + if [ $i -eq 90 ]; then + echo "Ozone did not exit safe mode within 180 seconds" + echo "Current safe mode status:" + ozone admin safemode status --verbose + echo "" + tail -50 /var/log/hadoop/scm.log + echo "" + echo "Container will continue running, but you may need to manually check safe mode status" + break + fi + sleep 2 +done + + +echo "" +echo "==========================================" +echo "All Ozone services started successfully!" +echo "==========================================" +echo "" +echo " - S3 Gateway: PID $S3G_PID" +echo " Endpoint: http://localhost:9878" +echo "" +echo " - Recon: PID $RECON_PID" +echo " Web UI: http://localhost:9888" +echo "==========================================" +echo "" + +# Function to handle shutdown +shutdown() { + echo "Shutting down Ozone services..." + kill $HTTPFS_PID $RECON_PID $S3G_PID $DN_PID $OM_PID $SCM_PID 2>/dev/null || true + wait $HTTPFS_PID $RECON_PID $S3G_PID $DN_PID $OM_PID $SCM_PID 2>/dev/null || true + echo "All services stopped." + exit 0 +} + +# Trap SIGTERM and SIGINT +trap shutdown SIGTERM SIGINT + +# Wait for all background processes +wait -n $SCM_PID $OM_PID $DN_PID $S3G_PID $RECON_PID $HTTPFS_PID + +# If any process exits, shutdown all +echo "Ozone exited unexpectedly. Shutting down..." +shutdown +