diff --git a/4.0.0-preview1/scala2.13-java17-ubuntu/Dockerfile b/4.0.0-preview1/scala2.13-java17-ubuntu/Dockerfile index 913f2ad..1102caf 100644 --- a/4.0.0-preview1/scala2.13-java17-ubuntu/Dockerfile +++ b/4.0.0-preview1/scala2.13-java17-ubuntu/Dockerfile @@ -46,8 +46,6 @@ RUN set -ex; \ wget -nv -O spark.tgz "$SPARK_TGZ_URL"; \ wget -nv -O spark.tgz.asc "$SPARK_TGZ_ASC_URL"; \ export GNUPGHOME="$(mktemp -d)"; \ - wget -nv -O KEYS https://downloads.apache.org/spark/KEYS; \ - gpg --import KEYS; \ gpg --batch --keyserver hkps://keys.openpgp.org --recv-key "$GPG_KEY" || \ gpg --batch --keyserver hkps://keyserver.ubuntu.com --recv-keys "$GPG_KEY"; \ gpg --batch --verify spark.tgz.asc spark.tgz; \ diff --git a/README.md b/README.md index 87286dc..f34328b 100644 --- a/README.md +++ b/README.md @@ -11,6 +11,55 @@ and Structured Streaming for stream processing. https://spark.apache.org/ +## Create a new version + +### Step 1 Add dockerfiles for a new version. + +You can see [3.4.0 PR](https://github.com/apache/spark-docker/pull/33) as reference. + +- 1.1 Add gpg key to [tools/template.py](https://github.com/apache/spark-docker/blob/master/tools/template.py#L24) + + This gpg key will be used by Dockerfiles (such as [3.4.0](https://github.com/apache/spark-docker/blob/04e85239a8fcc9b3dcfe146bc144ee2b981f8f42/3.4.0/scala2.12-java11-ubuntu/Dockerfile#L41)) to verify the signature of the Apache Spark tarball. + +- 1.2 Add image build workflow (such as [3.4.0 yaml](https://github.com/apache/spark-docker/blob/04e85239a8fcc9b3dcfe146bc144ee2b981f8f42/.github/workflows/build_3.4.0.yaml)) + + This file will be used by GitHub Actions to build the Docker image when you submit the PR to make sure dockerfiles are correct and pass all tests (build/standalone/kubernetes). + +- 1.3 Using `./add-dockerfiles.sh [version]` to add Dockerfiles. + + You will get a new directory with the Dockerfiles for the specified version. + +- 1.4 Add version and tag info to versions.json, publish.yml and test.yml. + + This version file will be used by image build workflow (such as [3.4.0](https://github.com/apache/spark-docker/commit/47c357a52625f482b8b0cb831ccb8c9df523affd) reference) and docker official image. + +### Step 2. Publish apache/spark Images. + +Click [Publish (Java 17 only)](https://github.com/apache/spark-docker/actions/workflows/publish-java17.yaml) (such as 4.x) or [Publish](https://github.com/apache/spark-docker/actions/workflows/publish.yml) (such as 3.x) to publish images. + +After this, the [apache/spark](https://hub.docker.com/r/apache/spark) docker images will be published. + + +### Step 3. Publish spark Docker Official Images. + +Submit the PR to [docker-library/official-images](https://github.com/docker-library/official-images/), see (link)[https://github.com/docker-library/official-images/pull/15363] as reference. + +You can type `tools/manifest.py manifest` to generate the content. + +After this, the [spark](https://hub.docker.com/_/spark) docker images will be published. + +## About images + +| | Apache Spark Image | Spark Docker Official Image | +|---------------|--------------------------------------------------------|--------------------------------------------------------| +| Name | apache/spark | spark | +| Maintenance | Reviewed, published by Apache Spark community | Reviewed, published and maintained by Docker community | +| Update policy | Only build and push once when specific version release | Actively rebuild for updates and security fixes | +| Link | https://hub.docker.com/r/apache/spark | https://hub.docker.com/_/spark | +| source | [apache/spark-docker](https://github.com/apache/spark-docker) | [apache/spark-docker](https://github.com/apache/spark-docker) and [docker-library/official-images](https://github.com/docker-library/official-images/blob/master/library/spark) | + +We recommend using [Spark Docker Official Image](https://hub.docker.com/_/spark), the [Apache Spark Image](https://hub.docker.com/r/apache/spark) are provided in case of delays in the review process by Docker community. + ## About this repository This repository contains the Dockerfiles used to build the Apache Spark Docker Image. diff --git a/add-dockerfiles.sh b/add-dockerfiles.sh index 5161c0c..63f610c 100755 --- a/add-dockerfiles.sh +++ b/add-dockerfiles.sh @@ -28,21 +28,31 @@ VERSION=${1:-"3.5.0"} -TAGS=" -scala2.12-java11-python3-r-ubuntu -scala2.12-java11-python3-ubuntu -scala2.12-java11-r-ubuntu -scala2.12-java11-ubuntu -" - -# java17 images were added in 3.5.0. We need to skip java17 for 3.3.x and 3.4.x -if ! echo $VERSION | grep -Eq "^3.3|^3.4"; then - TAGS+=" - scala2.12-java17-python3-r-ubuntu - scala2.12-java17-python3-ubuntu - scala2.12-java17-r-ubuntu - scala2.12-java17-ubuntu - " +if echo $VERSION | grep -Eq "^4."; then + # 4.x default + TAGS=" + scala2.13-java17-python3-r-ubuntu + scala2.13-java17-python3-ubuntu + scala2.13-java17-r-ubuntu + scala2.13-java17-ubuntu + " +elif echo $VERSION | grep -Eq "^3."; then + # 3.x default + TAGS=" + scala2.12-java11-python3-r-ubuntu + scala2.12-java11-python3-ubuntu + scala2.12-java11-r-ubuntu + scala2.12-java11-ubuntu + " + # java17 images were added in 3.5.0. We need to skip java17 for 3.3.x and 3.4.x + if ! echo $VERSION | grep -Eq "^3.3|^3.4"; then + TAGS+=" + scala2.12-java17-python3-r-ubuntu + scala2.12-java17-python3-ubuntu + scala2.12-java17-r-ubuntu + scala2.12-java17-ubuntu + " + fi fi for TAG in $TAGS; do @@ -55,17 +65,23 @@ for TAG in $TAGS; do OPTS+=" --sparkr" fi + if echo $TAG | grep -q "scala2.12"; then + OPTS+=" --scala-version 2.12" + elif echo $TAG | grep -q "scala2.13"; then + OPTS+=" --scala-version 2.13" + fi + if echo $TAG | grep -q "java17"; then OPTS+=" --java-version 17 --image eclipse-temurin:17-jre-jammy" elif echo $TAG | grep -q "java11"; then OPTS+=" --java-version 11 --image eclipse-temurin:11-jre-focal" - fi + fi OPTS+=" --spark-version $VERSION" mkdir -p $VERSION/$TAG - if [ "$TAG" == "scala2.12-java11-ubuntu" ] || [ "$TAG" == "scala2.12-java17-ubuntu" ]; then + if [ "$TAG" == "scala2.12-java11-ubuntu" ] || [ "$TAG" == "scala2.12-java17-ubuntu" ] || [ "$TAG" == "scala2.13-java17-ubuntu" ]; then python3 tools/template.py $OPTS > $VERSION/$TAG/Dockerfile python3 tools/template.py $OPTS -f entrypoint.sh.template > $VERSION/$TAG/entrypoint.sh chmod a+x $VERSION/$TAG/entrypoint.sh diff --git a/versions.json b/versions.json index 1586def..31d94cd 100644 --- a/versions.json +++ b/versions.json @@ -4,23 +4,22 @@ "path": "4.0.0-preview1/scala2.13-java17-python3-ubuntu", "tags": [ "4.0.0-preview1-scala2.13-java17-python3-ubuntu", - "4.0.0-preview1-java17-python3", - "4.0.0-preview1-java17", - "python3-java17" + "4.0.0-preview1-python3", + "4.0.0-preview1" ] }, { "path": "4.0.0-preview1/scala2.13-java17-r-ubuntu", "tags": [ "4.0.0-preview1-scala2.13-java17-r-ubuntu", - "4.0.0-preview1-java17-r" + "4.0.0-preview1-r" ] }, { "path": "4.0.0-preview1/scala2.13-java17-ubuntu", "tags": [ "4.0.0-preview1-scala2.13-java17-ubuntu", - "4.0.0-preview1-java17-scala" + "4.0.0-preview1-scala" ] }, {