From 2e4609b3776f3d5d9b72cf305bfbc453b62ba086 Mon Sep 17 00:00:00 2001 From: Patrick Carlson Date: Tue, 17 May 2022 16:04:07 -0600 Subject: [PATCH 01/27] initial take at docker-compose build --- conf/docker-compose/.dockerignore | 5 + conf/docker-compose/.env | 42 ++++ conf/docker-compose/.gitignore | 10 + conf/docker-compose/README.md | 96 +++++++++ conf/docker-compose/dataverse/Dockerfile | 74 +++++++ conf/docker-compose/dataverse/config | 4 + conf/docker-compose/dataverse/credentials | 3 + conf/docker-compose/dataverse/startup.sh | 80 ++++++++ conf/docker-compose/docker-compose.yml | 185 ++++++++++++++++++ conf/docker-compose/postgres/Dockerfile | 2 + conf/docker-compose/prepbuild.sh | 7 + conf/docker-compose/rserve/Dockerfile | 22 +++ conf/docker-compose/seaweedfs/Dockerfile | 15 ++ conf/docker-compose/seaweedfs/config.json | 19 ++ conf/docker-compose/solr/Dockerfile | 25 +++ conf/docker-compose/solr/startup.sh | 10 + conf/docker-compose/traefik/Dockerfile | 18 ++ conf/docker-compose/traefik/certificates.yaml | 6 + scripts/installer/as-setup.sh | 13 +- 19 files changed, 634 insertions(+), 2 deletions(-) create mode 100644 conf/docker-compose/.dockerignore create mode 100644 conf/docker-compose/.env create mode 100644 conf/docker-compose/.gitignore create mode 100644 conf/docker-compose/README.md create mode 100644 conf/docker-compose/dataverse/Dockerfile create mode 100644 conf/docker-compose/dataverse/config create mode 100644 conf/docker-compose/dataverse/credentials create mode 100644 conf/docker-compose/dataverse/startup.sh create mode 100644 conf/docker-compose/docker-compose.yml create mode 100644 conf/docker-compose/postgres/Dockerfile create mode 100755 conf/docker-compose/prepbuild.sh create mode 100644 conf/docker-compose/rserve/Dockerfile create mode 100644 conf/docker-compose/seaweedfs/Dockerfile create mode 100644 conf/docker-compose/seaweedfs/config.json create mode 100644 conf/docker-compose/solr/Dockerfile create mode 100644 conf/docker-compose/solr/startup.sh create mode 100644 conf/docker-compose/traefik/Dockerfile create mode 100644 conf/docker-compose/traefik/certificates.yaml diff --git a/conf/docker-compose/.dockerignore b/conf/docker-compose/.dockerignore new file mode 100644 index 00000000000..860ae84eb75 --- /dev/null +++ b/conf/docker-compose/.dockerignore @@ -0,0 +1,5 @@ +/postgres-bind/ +/solr-bind/ +/seaweedfs-bind/ +/dataverse-docroot-bind/ +/dataverse-logos-bind/ \ No newline at end of file diff --git a/conf/docker-compose/.env b/conf/docker-compose/.env new file mode 100644 index 00000000000..b9aee36f6ed --- /dev/null +++ b/conf/docker-compose/.env @@ -0,0 +1,42 @@ +# timezone +# https://en.wikipedia.org/wiki/List_of_tz_database_time_zones +TZ="America/Denver" + +# dataverse service +HOST_DNS_ADDRESS=dataverse +GLASSFISH_USER=dataverse +GLASSFISH_PASSWORD=secret +GLASSFISH_ADMIN_USER=admin +GLASSFISH_ADMIN_PASSWORD=secret +ADMIN_EMAIL=noreply@mydomain.com +MAIL_SERVER=localhost +POSTGRES_ADMIN_PASSWORD=secret +POSTGRES_SERVER=postgres +POSTGRES_PORT=5432 +POSTGRES_DATABASE=dataverse +POSTGRES_PASSWORD=secret +POSTGRES_USER=dataverse +SOLR_LOCATION=solr:8983 +RSERVE_HOST=rserve +RSERVE_PORT=6311 +# the rserve credentials are hardcoded in the Dockerfile, edit both if you want to change them +RSERVE_USER=rserve +RSERVE_PASSWORD=rserve + +# disable DOI validation checks, true or false, set this to true for your development environment +DISABLE_DOI=true + +# exclude emails from exports +# https://guides.dataverse.org/en/latest/installation/config.html#excludeemailfromexport +EXCLUDE_EMAIL_EXPORTS=true + +# s3 keys +S3_ACCESS_KEY=secret +S3_SECRET_KEY=secret + +# fully qualified domain name (FQDN) and site URL +# recommend keeping this as dataverse because it's used internally for routing within Docker +# if you change this, s3 storage will break +DATAVERSE_FQDN=dataverse +# make sure to escape characters like : +DATAVERSE_SITE_URL=http\://localhost diff --git a/conf/docker-compose/.gitignore b/conf/docker-compose/.gitignore new file mode 100644 index 00000000000..6de52ae063e --- /dev/null +++ b/conf/docker-compose/.gitignore @@ -0,0 +1,10 @@ +/dataverse/dataverse/ +/solr/*.xml +/dataverse/.m2/ +/postgres-bind/ +/solr-bind/ +/seaweedfs-bind/ +/dataverse-docroot-bind/ +/dataverse-logos-bind/ +/traefik/traefik.key +/traefik/traefik.crt \ No newline at end of file diff --git a/conf/docker-compose/README.md b/conf/docker-compose/README.md new file mode 100644 index 00000000000..489581da9a6 --- /dev/null +++ b/conf/docker-compose/README.md @@ -0,0 +1,96 @@ +# docker-compose version of Dataverse + +## Requirements + +* [docker-compose](https://docs.docker.com/compose/) +* [Docker](https://docker.com) (or some other supported container engine) + +## Setup + +Edit the `.env` file as needed. Make sure to properly secure it in terms of file permissions. + +Edit `./seaweedfs/config.json` and enter your [credential keys](https://github.com/chrislusf/seaweedfs/wiki/Amazon-S3-API#static-configuration) for s3 storage. + +If you're running locally and don't have a key, you'll +need to generate it yourself with something like Git Bash. Make sure +[Posix to Windows path conversion](https://github.com/git-for-windows/git/issues/577#issuecomment-166118846) doesn't +take place with the forward slashes using `MSYS_NO_PATHCONV=1` if you're on Windows. + +```shell +MSYS_NO_PATHCONV=1 openssl req -x509 -nodes -days 4096 -newkey rsa:4096 -out traefik.crt -keyout traefik.key -subj "/C=US/ST=New Mexico/L=ABQ/O=Local/CN=127.0.0.1" -addext "subjectAltName = IP:127.0.0.1" +``` + +Or grab your public/private keys from your sysadmin or provider and renamed them to `traefik.key` and `traefik.crt`. + +Then copy the `traefik.key` and `traefik.crt` files into the `traefik` folder. + +## Building + +Run `prepbuild.sh` once + +Pull and build the Docker containers + +```shell +# this uses Compose v2, if you're on an older version you may +# need to change this call to docker-compose +docker compose pull +docker compose build +``` + +## Deploying + +```shell +docker-compose up -d +``` + +Note that this can take a couple minutes to start up. Wait until it shows `healthy` as the status. + +For the bind mounts (see `docker-compose.yml`) you may need to set the permissions +on those folders `*-bind` so they can be written from within the containers. Alternatively, +you can create local users or do UID/GID mappings. + +```shell +docker ps +``` + +Then go to the following URL in your browser: + +[https://localhost](https://localhost) + +Default credentials for login are: + +* username: `dataverseAdmin` +* password: `admin` + +Make sure to change this password right away. + +## How It Works + +* Builds a copy of the `.war` deployable code from source +* Stands up various services and pieces needed: + * seaweedfs - for s3 storage + * traefik - reverse proxy, HTTP is re-routed automatically to HTTPS + * postgres - database backend + * solr - text indexing database + * rserve - R server for running R commands + * dataverse - the main Dataverse web application +* sets up two storage options, one is the default `=files` for local storage +and the other is `=s3`for s3 storage + +## Uninstall / Teardown + +```shell +docker-compose down -v +``` + +## Development References + +There are many community led efforts to utilize containers, Kubernetes, and more to help automate +and setup Dataverse. + +* [https://github.com/fzappa/rocky-dataverse/blob/main/rocky-dataverse.sh](https://github.com/fzappa/rocky-dataverse/blob/main/rocky-dataverse.sh) +* [https://github.com/IQSS/dataverse/tree/develop/conf/docker-aio](https://github.com/IQSS/dataverse/tree/develop/conf/docker-aio) +* [https://github.com/gdcc/dataverse-kubernetes/blob/develop/docker-compose.yaml](https://github.com/gdcc/dataverse-kubernetes/blob/develop/docker-compose.yaml) +* [https://github.com/gdcc/dataverse-kubernetes](https://github.com/gdcc/dataverse-kubernetes) +* [https://github.com/EOSC-synergy/dataverse-kubernetes](https://github.com/EOSC-synergy/dataverse-kubernetes) +* [https://github.com/IQSS/dataverse-docker](https://github.com/IQSS/dataverse-docker) diff --git a/conf/docker-compose/dataverse/Dockerfile b/conf/docker-compose/dataverse/Dockerfile new file mode 100644 index 00000000000..41991e6551e --- /dev/null +++ b/conf/docker-compose/dataverse/Dockerfile @@ -0,0 +1,74 @@ +# https://hub.docker.com/_/ubuntu +FROM ubuntu:20.04 + +ENV DEBIAN_FRONTEND noninteractive + +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + tzdata nano dos2unix curl wget openjdk-11-jdk maven unzip jq imagemagick python3 python3-pip python3-psycopg2 wait-for-it ca-certificates && \ + apt-get -y upgrade && \ + rm -rf /var/lib/apt/lists/* + +WORKDIR / + +RUN useradd --create-home --shell /bin/bash dataverse + +# https://guides.dataverse.org/en/5.8/installation/prerequisites.html +RUN wget https://s3-eu-west-1.amazonaws.com/payara.fish/Payara+Downloads/5.2022.1/payara-5.2022.1.zip + +RUN unzip payara-5.2022.1.zip && \ + mv payara5 /usr/local && \ + rm payara-5.2022.1.zip + +RUN chown -R root:root /usr/local/payara5 && \ + chown dataverse /usr/local/payara5/glassfish/lib && \ + chown -R dataverse:dataverse /usr/local/payara5/glassfish/domains/domain1 + +ENV JAVA_HOME /usr/lib/jvm/java-11-openjdk-amd64 + +# install Counter Processor +# https://guides.dataverse.org/en/latest/installation/prerequisites.html#counter-processor +RUN cd /usr/local && \ + wget https://github.com/CDLUC3/counter-processor/archive/refs/tags/v0.1.04.tar.gz && \ + tar xvfz v0.1.04.tar.gz && \ + rm v0.1.04.tar.gz && \ + cd counter-processor-0.1.04 && \ + pip3 install -r requirements.txt + +RUN useradd --create-home --shell /bin/bash counter && \ + chown -R counter:counter /usr/local/counter-processor-0.1.04 + +# install awscli +RUN pip3 install --no-cache-dir awscli + +# switch to non-root user as this is more secure +USER dataverse +WORKDIR / + +RUN mkdir -p /home/dataverse/.aws/ +COPY --chown=dataverse:dataverse config /home/dataverse/.aws/config +COPY --chown=dataverse:dataverse credentials /home/dataverse/.aws/credentials + +RUN cp -R /home/dataverse/.aws/ /usr/local/payara5/glassfish/domains/domain1/ + +# if you want to speed up the Maven build you can copy over +# cached packages here +COPY --chown=dataverse:dataverse ./.m2/ /home/dataverse/.m2/ + +COPY --chown=dataverse:dataverse ./dataverse/ /dataverse/ + +# this likely isn't needed on Linux but was needed on a Windows build +RUN find /dataverse -type f -print0 | xargs -0 -n 1 -P 4 dos2unix + +# this can take some time to download all the dependencies +RUN cd /dataverse/ && mvn package -DskipTests + +USER root +COPY --chown=dataverse:dataverse startup.sh /startup.sh +RUN chmod +x /startup.sh && dos2unix /startup.sh + +USER dataverse +CMD ["wait-for-it", "postgres:5432", "--", "/startup.sh"] + +# helpful for debugging purposes to just start up the container +# CMD ["tail", "-f", "/dev/null"] \ No newline at end of file diff --git a/conf/docker-compose/dataverse/config b/conf/docker-compose/dataverse/config new file mode 100644 index 00000000000..d1e10d78ee3 --- /dev/null +++ b/conf/docker-compose/dataverse/config @@ -0,0 +1,4 @@ +[default] +region = us-east-1 +s3 = + signature_version = s3v4 \ No newline at end of file diff --git a/conf/docker-compose/dataverse/credentials b/conf/docker-compose/dataverse/credentials new file mode 100644 index 00000000000..64af5712826 --- /dev/null +++ b/conf/docker-compose/dataverse/credentials @@ -0,0 +1,3 @@ +[default] +aws_access_key_id = secret +aws_secret_access_key = secret \ No newline at end of file diff --git a/conf/docker-compose/dataverse/startup.sh b/conf/docker-compose/dataverse/startup.sh new file mode 100644 index 00000000000..81973a97172 --- /dev/null +++ b/conf/docker-compose/dataverse/startup.sh @@ -0,0 +1,80 @@ +#!/bin/bash + +# create the config file that has all our environment settings + +echo -e " +[glassfish] +HOST_DNS_ADDRESS=${HOST_DNS_ADDRESS} +GLASSFISH_USER = ${GLASSFISH_USER} +GLASSFISH_DIRECTORY = /usr/local/payara5/ +GLASSFISH_ADMIN_USER = ${GLASSFISH_ADMIN_USER} +GLASSFISH_ADMIN_PASSWORD = ${GLASSFISH_ADMIN_PASSWORD} +GLASSFISH_HEAP = 2048 +GLASSFISH_REQUEST_TIMEOUT = 1800 + +[database] +POSTGRES_ADMIN_PASSWORD=${POSTGRES_ADMIN_PASSWORD} +POSTGRES_SERVER=${POSTGRES_SERVER} +POSTGRES_PORT=${POSTGRES_PORT} +POSTGRES_DATABASE=${POSTGRES_DATABASE} +POSTGRES_PASSWORD=${POSTGRES_PASSWORD} +POSTGRES_USER=${POSTGRES_USER} + +[system] +ADMIN_EMAIL=${ADMIN_EMAIL} +MAIL_SERVER=${MAIL_SERVER} +SOLR_LOCATION=${SOLR_LOCATION} + +[rserve] +RSERVE_HOST=${RSERVE_HOST} +RSERVE_PORT=${RSERVE_PORT} +RSERVE_USER=${RSERVE_USER} +RSERVE_PASSWORD=${RSERVE_PASSWORD} + +[doi] +DOI_USERNAME = dataciteuser +DOI_PASSWORD = datacitepassword +DOI_BASEURL = https://mds.test.datacite.org +DOI_DATACITERESTAPIURL = https://api.test.datacite.org +" > /dataverse/scripts/installer/default.config + +# https://github.com/poikilotherm/dataverse/blob/ct-mvn-mod/modules/container-base/src/main/docker/Dockerfile +# https://guides.dataverse.org/en/latest/installation/config.html#amazon-s3-storage-or-compatible +# set s3 storage settings +if ! grep -q "Ddataverse.files.s3.type=s3" "/usr/local/payara5/glassfish/domains/domain1/config/domain.xml"; then + # use : as delimiter + sed -i "s::-Ddataverse.files.s3.type=s3\n:" /usr/local/payara5/glassfish/domains/domain1/config/domain.xml + sed -i "s::-Ddataverse.files.s3.label=s3\n:" /usr/local/payara5/glassfish/domains/domain1/config/domain.xml + sed -i "s::-Ddataverse.files.s3.access-key=${S3_ACCESS_KEY}\n:" /usr/local/payara5/glassfish/domains/domain1/config/domain.xml + sed -i "s::-Ddataverse.files.s3.secret-key=${S3_SECRET_KEY}\n:" /usr/local/payara5/glassfish/domains/domain1/config/domain.xml + sed -i "s::-Ddataverse.files.s3.custom-endpoint-url=http\:\/\/seaweedfs\:8333\n:" /usr/local/payara5/glassfish/domains/domain1/config/domain.xml + # keep this as dataverse as it's hardcoded elsewhere + sed -i "s::-Ddataverse.files.s3.bucket-name=dataverse\n:" /usr/local/payara5/glassfish/domains/domain1/config/domain.xml + sed -i "s::-Ddataverse.files.s3.custom-endpoint-region=us-east-1\n:" /usr/local/payara5/glassfish/domains/domain1/config/domain.xml + # # Use path style buckets instead of subdomains + sed -i "s::-Ddataverse.files.s3.path-style-access=true\n:" /usr/local/payara5/glassfish/domains/domain1/config/domain.xml +fi + +cd /dataverse/scripts/installer/ + +# the installer needs to run from within the directory, it cannot be run from / for example +# this can take some time to run, be patient +python3 install.py --noninteractive --force + +# check if we should disable DOI validation +if [[ ! -z "${DISABLE_DOI}" ]] && [[ "true" = "${DISABLE_DOI}" ]]; then + echo "Disabling DOI validation" + curl -X PUT -d FAKE http://localhost:8080/api/admin/settings/:DoiProvider +fi + +# check if we should exclude emails from exports +if [[ ! -z "${EXCLUDE_EMAIL_EXPORTS}" ]] && [[ "true" = "${EXCLUDE_EMAIL_EXPORTS}" ]]; then + echo "Excluding emails in exports" + curl -X PUT -d true http://localhost:8080/api/admin/settings/:ExcludeEmailFromExport +fi + +# create an empty s3 bucket in seaweedfs if it doesn't already exist +curl -X POST "http://seaweedfs:8888/buckets/" +curl -X POST "http://seaweedfs:8888/buckets/dataverse/" + +wait-for-it localhost:8080 -- tail -f /usr/local/payara5/glassfish/domains/domain1/logs/server.log \ No newline at end of file diff --git a/conf/docker-compose/docker-compose.yml b/conf/docker-compose/docker-compose.yml new file mode 100644 index 00000000000..a5e5578ad11 --- /dev/null +++ b/conf/docker-compose/docker-compose.yml @@ -0,0 +1,185 @@ +version: '3.5' +services: + + postgres: + build: + context: ./postgres/ + #ports: + # - 5432:5432 + restart: unless-stopped + container_name: postgres + hostname: postgres + volumes: + - ./postgres-bind:/var/lib/postgresql/data:rw + environment: + # this user and password will have superuser privileges + - POSTGRES_USER=postgres + - POSTGRES_PASSWORD=${POSTGRES_ADMIN_PASSWORD} + - TZ=${TZ} + - LC_ALL=C.UTF-8 + healthcheck: + test: ["CMD", "pg_isready", "-U", "postgres"] + interval: 5s + retries: 5 + + solr: + build: + context: ./solr/ + restart: unless-stopped + container_name: solr + hostname: solr + volumes: + - ./solr-bind:/var/solr:rw + environment: + - TZ=${TZ} + - "SOLR_JAVA_MEM=-Xms1g -Xmx1g" + - "SOLR_OPTS=-Dlog4j2.formatMsgNoLookups=true" + # ports: + # - 8983:8983 + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8983"] + interval: 10s + timeout: 3s + retries: 50 + start_period: 30s + + rserve: + build: + context: ./rserve/ + restart: unless-stopped + container_name: rserve + hostname: rserve + environment: + - TZ=${TZ} + - RSERVE_USER=${RSERVE_USER} + - RSERVE_PASSWORD=${RSERVE_PASSWORD} + # ports: + # - 6311:6311 + healthcheck: + test: ["CMD", "wait-for-it", "localhost:6311"] + interval: 10s + timeout: 3s + retries: 10 + start_period: 5s + + dataverse: + build: + context: ./dataverse/ + restart: unless-stopped + container_name: dataverse + hostname: dataverse + environment: + - TZ=${TZ} + - HOST_DNS_ADDRESS=${HOST_DNS_ADDRESS} + - GLASSFISH_USER=${GLASSFISH_USER} + - GLASSFISH_PASSWORD=${GLASSFISH_PASSWORD} + - ADMIN_EMAIL=${ADMIN_EMAIL} + - MAIL_SERVER=${MAIL_SERVER} + - POSTGRES_ADMIN_PASSWORD=${POSTGRES_ADMIN_PASSWORD} + - POSTGRES_SERVER=${POSTGRES_SERVER} + - POSTGRES_PORT=${POSTGRES_PORT} + - POSTGRES_DATABASE=${POSTGRES_DATABASE} + - POSTGRES_PASSWORD=${POSTGRES_PASSWORD} + - POSTGRES_USER=${POSTGRES_USER} + - SOLR_LOCATION=${SOLR_LOCATION} + - RSERVE_HOST=${RSERVE_HOST} + - RSERVE_PORT=${RSERVE_PORT} + - RSERVE_USER=${RSERVE_USER} + - RSERVE_PASSWORD=${RSERVE_PASSWORD} + - DISABLE_DOI=${DISABLE_DOI} + - EXCLUDE_EMAIL_EXPORTS=${EXCLUDE_EMAIL_EXPORTS} + - S3_ACCESS_KEY=${S3_ACCESS_KEY} + - S3_SECRET_KEY=${S3_SECRET_KEY} + - DATAVERSE_FQDN=${DATAVERSE_FQDN} + - DATAVERSE_SITE_URL=${DATAVERSE_SITE_URL} + depends_on: + - postgres + - solr + - rserve + - seaweedfs + # ports: + # - 8080:8080 # the Dataverse web-UI + # - 4848:4848 # the Payara admin web-UI + volumes: + # https://guides.dataverse.org/en/latest/installation/advanced.html + # logos and sitemap are initially empty on the very first startup + - ./dataverse-logos-bind:/usr/local/payara5/glassfish/domains/domain1/docroot/logos/ + - ./dataverse-sitemap-bind:/usr/local/payara5/glassfish/domains/domain1/docroot/sitemap/ + labels: + - "traefik.enable=true" + - "traefik.http.routers.dataverse.rule=Host(`localhost`)" + - "traefik.http.routers.dataverse.tls=true" + - "traefik.http.routers.dataverse.entrypoints=web-secure" + - "traefik.http.services.dataverse.loadbalancer.server.port=8080" + - "traefik.port=8080" + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8080/api/info/version"] + interval: 5s + timeout: 3s + retries: 50 + start_period: 30s + + # Traefik reverse proxy + traefik: + build: + context: ./traefik/ + container_name: traefik + hostname: traefik + environment: + - TZ=${TZ} + depends_on: + - dataverse + command: + # - "--log.level=DEBUG" + # - "--api.insecure=true" + - "--providers.docker=true" + - "--providers.docker.exposedbydefault=false" + - "--entrypoints.web.http.redirections.entryPoint.to=web-secure" + - "--entrypoints.web.http.redirections.entryPoint.scheme=https" + - "--entrypoints.web.http.redirections.entrypoint.permanent=true" + # location of certs + - "--providers.file.directory=/configuration/" + - "--entrypoints.web.address=:80" + - "--entrypoints.web-secure.address=:443" + - "--ping" + ports: + - 80:80 # HTTP port, this gets re-routed to 443 TLS + - 443:443 # TLS port, needs certificate generated to use + # The Web UI (enabled by --api.insecure=true) + # - 8080:8080 + volumes: + - /var/run/docker.sock:/var/run/docker.sock:ro + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8080/ping"] + interval: 10s + timeout: 3s + retries: 50 + start_period: 30s + + # S3 storage + # alternatives include minio and others + # https://github.com/chrislusf/seaweedfs/blob/master/docker/seaweedfs-compose.yml + seaweedfs: + build: + context: ./seaweedfs/ + restart: unless-stopped + container_name: seaweedfs + hostname: seaweedfs + environment: + - TZ=${TZ} + volumes: + - ./seaweedfs-bind:/data:rw + # ports: + # - 8333:8333 # s3 API + # - 9327:9327 # metrics API + # - 9333:9333 # master API + # - 8060:8080 # volume server API + # - 8888:8888 # filer API, nice web-ui to look at folders and files + # https://github.com/chrislusf/seaweedfs/wiki/Amazon-S3-API#static-configuration + command: 'server -s3 -metricsPort=9327 -dir=/data -s3.config=/config.json' + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:9333"] + interval: 10s + timeout: 3s + retries: 10 + start_period: 5s diff --git a/conf/docker-compose/postgres/Dockerfile b/conf/docker-compose/postgres/Dockerfile new file mode 100644 index 00000000000..89b59a5d5ea --- /dev/null +++ b/conf/docker-compose/postgres/Dockerfile @@ -0,0 +1,2 @@ +# https://hub.docker.com/_/postgres +FROM postgres:14 diff --git a/conf/docker-compose/prepbuild.sh b/conf/docker-compose/prepbuild.sh new file mode 100755 index 00000000000..1e569ea661f --- /dev/null +++ b/conf/docker-compose/prepbuild.sh @@ -0,0 +1,7 @@ +#!/bin/bash + +cp ../solr/8.11.1/*.xml ./solr/ + +mkdir -p ./dataverse/dataverse/ + +rsync -a --exclude '.m2*' --exclude '*-bind' ../../../dataverse/ ./dataverse/dataverse/ diff --git a/conf/docker-compose/rserve/Dockerfile b/conf/docker-compose/rserve/Dockerfile new file mode 100644 index 00000000000..345b4212d8f --- /dev/null +++ b/conf/docker-compose/rserve/Dockerfile @@ -0,0 +1,22 @@ +# https://hub.docker.com/_/ubuntu +FROM ubuntu:20.04 + +ENV DEBIAN_FRONTEND noninteractive + +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + tzdata nano r-base r-base-dev libssl-dev libnlopt-dev libcurl4-openssl-dev wait-for-it ca-certificates && \ + apt-get -y upgrade && \ + rm -rf /var/lib/apt/lists/* + +RUN R -e "install.packages(c('R2HTML', 'rjson', 'DescTools', 'Rserve', 'haven'))" + +# this is the user and password that Dataverse will connect to the Rserve server +RUN echo "rserve rserve" > /rserve.pwd + +WORKDIR / + +EXPOSE 6311/tcp + +# https://github.com/ubc/r-docker/blob/master/Dockerfile +CMD ["R", "-e", "Rserve::run.Rserve(remote=TRUE, auth=TRUE, pwdfile='/rserve.pwd', daemon=FALSE, port=6311, fileio=TRUE, maxinbuf=262144)"] \ No newline at end of file diff --git a/conf/docker-compose/seaweedfs/Dockerfile b/conf/docker-compose/seaweedfs/Dockerfile new file mode 100644 index 00000000000..c000090ba6e --- /dev/null +++ b/conf/docker-compose/seaweedfs/Dockerfile @@ -0,0 +1,15 @@ +# https://hub.docker.com/r/chrislusf/seaweedfs +FROM chrislusf/seaweedfs:latest + +USER root + +# use HTTP connection for adding the needed ca-certificates package +# install it and then swap back to HTTPS +RUN sed -i 's,https,http,g' /etc/apk/repositories && \ + apk add --no-cache ca-certificates && \ + update-ca-certificates && \ + sed -i 's,http,https,g' /etc/apk/repositories && \ + apk update && apk upgrade && apk add --no-cache curl bash + +# https://github.com/chrislusf/seaweedfs/wiki/Amazon-S3-API#static-configuration +COPY config.json /config.json diff --git a/conf/docker-compose/seaweedfs/config.json b/conf/docker-compose/seaweedfs/config.json new file mode 100644 index 00000000000..ad18955f042 --- /dev/null +++ b/conf/docker-compose/seaweedfs/config.json @@ -0,0 +1,19 @@ +{ + "identities": [ + { + "name": "anonymous", + "credentials": [ + { + "accessKey": "secret", + "secretKey": "secret" + } + ], + "actions": [ + "Read:dataverse", + "List:dataverse", + "Tagging:dataverse", + "Write:dataverse" + ] + } + ] +} \ No newline at end of file diff --git a/conf/docker-compose/solr/Dockerfile b/conf/docker-compose/solr/Dockerfile new file mode 100644 index 00000000000..8a3cccfb1ac --- /dev/null +++ b/conf/docker-compose/solr/Dockerfile @@ -0,0 +1,25 @@ +# https://hub.docker.com/_/solr +FROM solr:8.11.1 + +# https://guides.dataverse.org/en/latest/installation/prerequisites.html#solr + +USER root + +COPY *.xml / + +# increase the number of file descriptors and max processes +RUN echo "solr soft nproc 65000" >> /etc/security/limits.conf && \ + echo "solr hard nproc 65000" >> /etc/security/limits.conf && \ + echo "solr soft nofile 65000" >> /etc/security/limits.conf && \ + echo "solr hard nofile 65000" >> /etc/security/limits.conf + +# increase Header size +RUN sed -i "s/name=\"solr.jetty.request.header.size\" default=\"8192\"/name=\"solr.jetty.request.header.size\" default=\"102400\"/g" /opt/solr/server/etc/jetty.xml + +COPY --chown=solr:solr startup.sh /startup.sh +RUN chmod +x /startup.sh + +# switch back to normal runtime user for security purposes +USER solr + +CMD ["/startup.sh"] \ No newline at end of file diff --git a/conf/docker-compose/solr/startup.sh b/conf/docker-compose/solr/startup.sh new file mode 100644 index 00000000000..aced1a47ed4 --- /dev/null +++ b/conf/docker-compose/solr/startup.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +mkdir -p /var/solr/data/collection1/ +cp -R /opt/solr/server/solr/configsets/_default/conf /var/solr/data/collection1 +cp /*.xml /var/solr/data/collection1/conf/ + +# create collection on startup +echo "name=collection1" > /var/solr/data/collection1/core.properties + +solr-foreground diff --git a/conf/docker-compose/traefik/Dockerfile b/conf/docker-compose/traefik/Dockerfile new file mode 100644 index 00000000000..3735935e714 --- /dev/null +++ b/conf/docker-compose/traefik/Dockerfile @@ -0,0 +1,18 @@ +# https://hub.docker.com/_/traefik +FROM traefik:v2.6 + +# Add certs to OS truststore +COPY *.crt /usr/local/share/ca-certificates/ +# private key +COPY *.key /usr/local/share/ca-certificates/ + +# use HTTP connection for adding the needed ca-certificates package +# install it and then swap back to HTTPS +RUN sed -i 's,https,http,g' /etc/apk/repositories && \ + apk add --no-cache ca-certificates && \ + update-ca-certificates && \ + sed -i 's,http,https,g' /etc/apk/repositories && \ + apk update && apk upgrade && apk add --no-cache bash curl + +RUN mkdir -p /configuration/ +COPY certificates.yaml /configuration/certificates.yaml diff --git a/conf/docker-compose/traefik/certificates.yaml b/conf/docker-compose/traefik/certificates.yaml new file mode 100644 index 00000000000..27fc217abf6 --- /dev/null +++ b/conf/docker-compose/traefik/certificates.yaml @@ -0,0 +1,6 @@ +tls: + stores: + default: + defaultCertificate: + certFile: /usr/local/share/ca-certificates/traefik.crt + keyFile: /usr/local/share/ca-certificates/traefik.key \ No newline at end of file diff --git a/scripts/installer/as-setup.sh b/scripts/installer/as-setup.sh index 853db77f471..d3a7d20c9ae 100755 --- a/scripts/installer/as-setup.sh +++ b/scripts/installer/as-setup.sh @@ -97,8 +97,17 @@ function preliminary_setup() ./asadmin $ASADMIN_OPTS create-jvm-options "\-Ddataverse.rserve.user=${RSERVE_USER}" ./asadmin $ASADMIN_OPTS create-jvm-options '\-Ddataverse.rserve.password=${ALIAS=rserve_password_alias}' # The host and url addresses this Dataverse will be using: - ./asadmin $ASADMIN_OPTS create-jvm-options "\-Ddataverse.fqdn=${HOST_ADDRESS}" - ./asadmin $ASADMIN_OPTS create-jvm-options "\-Ddataverse.siteUrl=http\://\${dataverse.fqdn}\:8080" + # https://guides.dataverse.org/en/latest/installation/config.html#dataverse-fqdn + if [[ ! -z "${DATAVERSE_FQDN}" ]]; then + ./asadmin $ASADMIN_OPTS create-jvm-options "\-Ddataverse.fqdn=${DATAVERSE_FQDN}" + else + ./asadmin $ASADMIN_OPTS create-jvm-options "\-Ddataverse.fqdn=${HOST_ADDRESS}" + fi + if [[ ! -z "${DATAVERSE_SITE_URL}" ]]; then + ./asadmin $ASADMIN_OPTS create-jvm-options "\-Ddataverse.siteUrl=${DATAVERSE_SITE_URL}" + else + ./asadmin $ASADMIN_OPTS create-jvm-options "\-Ddataverse.siteUrl=http\://\${dataverse.fqdn}\:8080" + fi # password reset token timeout in minutes ./asadmin $ASADMIN_OPTS create-jvm-options "\-Ddataverse.auth.password-reset-timeout-in-minutes=60" From 5b297f76a59c31868f10e6425468ad67330ddf19 Mon Sep 17 00:00:00 2001 From: Patrick Carlson Date: Wed, 18 May 2022 07:35:22 -0600 Subject: [PATCH 02/27] minor documentation --- conf/docker-compose/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/docker-compose/README.md b/conf/docker-compose/README.md index 489581da9a6..d0f1069a1f4 100644 --- a/conf/docker-compose/README.md +++ b/conf/docker-compose/README.md @@ -20,7 +20,7 @@ take place with the forward slashes using `MSYS_NO_PATHCONV=1` if you're on Wind MSYS_NO_PATHCONV=1 openssl req -x509 -nodes -days 4096 -newkey rsa:4096 -out traefik.crt -keyout traefik.key -subj "/C=US/ST=New Mexico/L=ABQ/O=Local/CN=127.0.0.1" -addext "subjectAltName = IP:127.0.0.1" ``` -Or grab your public/private keys from your sysadmin or provider and renamed them to `traefik.key` and `traefik.crt`. +Or grab your public/private keys from your sysadmin or provider and rename them to `traefik.key` and `traefik.crt`. Then copy the `traefik.key` and `traefik.crt` files into the `traefik` folder. From 422bb7e41567715f57af2efc8b45765b480e4c8e Mon Sep 17 00:00:00 2001 From: Patrick Carlson Date: Wed, 18 May 2022 07:35:55 -0600 Subject: [PATCH 03/27] .m2 folder isn't setup by default so this would break --- conf/docker-compose/dataverse/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/docker-compose/dataverse/Dockerfile b/conf/docker-compose/dataverse/Dockerfile index 41991e6551e..e202ba8bcef 100644 --- a/conf/docker-compose/dataverse/Dockerfile +++ b/conf/docker-compose/dataverse/Dockerfile @@ -53,7 +53,7 @@ RUN cp -R /home/dataverse/.aws/ /usr/local/payara5/glassfish/domains/domain1/ # if you want to speed up the Maven build you can copy over # cached packages here -COPY --chown=dataverse:dataverse ./.m2/ /home/dataverse/.m2/ +#COPY --chown=dataverse:dataverse ./.m2/ /home/dataverse/.m2/ COPY --chown=dataverse:dataverse ./dataverse/ /dataverse/ From 393fdf0d661c7937a0f928d32627bbfdbfad7b12 Mon Sep 17 00:00:00 2001 From: Patrick Carlson Date: Thu, 19 May 2022 08:38:43 -0600 Subject: [PATCH 04/27] switch to Ubuntu 22.04 LTS --- conf/docker-compose/dataverse/Dockerfile | 4 ++-- conf/docker-compose/rserve/Dockerfile | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/conf/docker-compose/dataverse/Dockerfile b/conf/docker-compose/dataverse/Dockerfile index e202ba8bcef..be707bbb6aa 100644 --- a/conf/docker-compose/dataverse/Dockerfile +++ b/conf/docker-compose/dataverse/Dockerfile @@ -1,11 +1,11 @@ # https://hub.docker.com/_/ubuntu -FROM ubuntu:20.04 +FROM ubuntu:22.04 ENV DEBIAN_FRONTEND noninteractive RUN apt-get update && \ apt-get install -y --no-install-recommends \ - tzdata nano dos2unix curl wget openjdk-11-jdk maven unzip jq imagemagick python3 python3-pip python3-psycopg2 wait-for-it ca-certificates && \ + gcc python3-dev tzdata nano dos2unix curl wget openjdk-11-jdk maven unzip jq imagemagick python3 python3-pip python3-psycopg2 wait-for-it ca-certificates && \ apt-get -y upgrade && \ rm -rf /var/lib/apt/lists/* diff --git a/conf/docker-compose/rserve/Dockerfile b/conf/docker-compose/rserve/Dockerfile index 345b4212d8f..20e62dc443a 100644 --- a/conf/docker-compose/rserve/Dockerfile +++ b/conf/docker-compose/rserve/Dockerfile @@ -1,5 +1,5 @@ # https://hub.docker.com/_/ubuntu -FROM ubuntu:20.04 +FROM ubuntu:22.04 ENV DEBIAN_FRONTEND noninteractive From cb88c10863585b1f2075178f47e87b794d31b88e Mon Sep 17 00:00:00 2001 From: Patrick Carlson Date: Thu, 19 May 2022 09:36:44 -0600 Subject: [PATCH 05/27] remove healthcheck from Docker which was breaking Rserve --- conf/docker-compose/docker-compose.yml | 6 ------ conf/docker-compose/rserve/Dockerfile | 2 +- 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/conf/docker-compose/docker-compose.yml b/conf/docker-compose/docker-compose.yml index a5e5578ad11..3596af05f04 100644 --- a/conf/docker-compose/docker-compose.yml +++ b/conf/docker-compose/docker-compose.yml @@ -55,12 +55,6 @@ services: - RSERVE_PASSWORD=${RSERVE_PASSWORD} # ports: # - 6311:6311 - healthcheck: - test: ["CMD", "wait-for-it", "localhost:6311"] - interval: 10s - timeout: 3s - retries: 10 - start_period: 5s dataverse: build: diff --git a/conf/docker-compose/rserve/Dockerfile b/conf/docker-compose/rserve/Dockerfile index 20e62dc443a..bf0554439e5 100644 --- a/conf/docker-compose/rserve/Dockerfile +++ b/conf/docker-compose/rserve/Dockerfile @@ -5,7 +5,7 @@ ENV DEBIAN_FRONTEND noninteractive RUN apt-get update && \ apt-get install -y --no-install-recommends \ - tzdata nano r-base r-base-dev libssl-dev libnlopt-dev libcurl4-openssl-dev wait-for-it ca-certificates && \ + tzdata nano r-base r-base-dev libssl-dev libnlopt-dev libcurl4-openssl-dev ca-certificates && \ apt-get -y upgrade && \ rm -rf /var/lib/apt/lists/* From 73c00514d7794fce066bb245e826ea38fe54d270 Mon Sep 17 00:00:00 2001 From: Patrick Carlson Date: Thu, 19 May 2022 09:37:35 -0600 Subject: [PATCH 06/27] add architecture flag to swap between amd64 and arm64 CPU architectures --- conf/docker-compose/.env | 3 +++ conf/docker-compose/dataverse/Dockerfile | 5 ++++- conf/docker-compose/docker-compose.yml | 1 + 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/conf/docker-compose/.env b/conf/docker-compose/.env index b9aee36f6ed..ec4d4909045 100644 --- a/conf/docker-compose/.env +++ b/conf/docker-compose/.env @@ -40,3 +40,6 @@ S3_SECRET_KEY=secret DATAVERSE_FQDN=dataverse # make sure to escape characters like : DATAVERSE_SITE_URL=http\://localhost + +# CPU architecture, amd64 (Intel, AMD, etc.) or arm64 (Apple M1, Raspberry Pi, etc.) +ARCHITECTURE=amd64 \ No newline at end of file diff --git a/conf/docker-compose/dataverse/Dockerfile b/conf/docker-compose/dataverse/Dockerfile index be707bbb6aa..98dbe4455a9 100644 --- a/conf/docker-compose/dataverse/Dockerfile +++ b/conf/docker-compose/dataverse/Dockerfile @@ -1,6 +1,9 @@ # https://hub.docker.com/_/ubuntu FROM ubuntu:22.04 +ARG ARCHITECTURE=amd64 +ENV ARCHITECTURE $ARCHITECTURE + ENV DEBIAN_FRONTEND noninteractive RUN apt-get update && \ @@ -24,7 +27,7 @@ RUN chown -R root:root /usr/local/payara5 && \ chown dataverse /usr/local/payara5/glassfish/lib && \ chown -R dataverse:dataverse /usr/local/payara5/glassfish/domains/domain1 -ENV JAVA_HOME /usr/lib/jvm/java-11-openjdk-amd64 +ENV JAVA_HOME /usr/lib/jvm/java-11-openjdk-${ARCHITECTURE} # install Counter Processor # https://guides.dataverse.org/en/latest/installation/prerequisites.html#counter-processor diff --git a/conf/docker-compose/docker-compose.yml b/conf/docker-compose/docker-compose.yml index 3596af05f04..5aa1b0f00c3 100644 --- a/conf/docker-compose/docker-compose.yml +++ b/conf/docker-compose/docker-compose.yml @@ -86,6 +86,7 @@ services: - S3_SECRET_KEY=${S3_SECRET_KEY} - DATAVERSE_FQDN=${DATAVERSE_FQDN} - DATAVERSE_SITE_URL=${DATAVERSE_SITE_URL} + - ARCHITECTURE=${ARCHITECTURE} depends_on: - postgres - solr From 8497eb555ff57e682c968e5ddc8dc3e977b803bc Mon Sep 17 00:00:00 2001 From: Patrick Carlson Date: Thu, 19 May 2022 13:31:47 -0600 Subject: [PATCH 07/27] refactor code to root dir for build --- .dockerignore | 7 ++ conf/docker-compose/.env => .env | 3 - Dockerfile | 85 ++++++++++++++++++- conf/docker-compose/.dockerignore | 5 -- conf/docker-compose/dataverse/Dockerfile | 77 ----------------- conf/docker-compose/dataverse/startup.sh | 3 + conf/docker-compose/prepbuild.sh | 3 - .../docker-compose.yml => docker-compose.yml | 23 +++-- 8 files changed, 105 insertions(+), 101 deletions(-) create mode 100644 .dockerignore rename conf/docker-compose/.env => .env (92%) delete mode 100644 conf/docker-compose/.dockerignore delete mode 100644 conf/docker-compose/dataverse/Dockerfile rename conf/docker-compose/docker-compose.yml => docker-compose.yml (88%) diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 00000000000..d26c7363ca0 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,7 @@ +/.git/ +/.github/ +/conf/docker-compose/postgres-bind/ +/conf/docker-compose/solr-bind/ +/conf/docker-compose/seaweedfs-bind/ +/conf/docker-compose/dataverse-docroot-bind/ +/conf/docker-compose/dataverse-logos-bind/ \ No newline at end of file diff --git a/conf/docker-compose/.env b/.env similarity index 92% rename from conf/docker-compose/.env rename to .env index ec4d4909045..b9aee36f6ed 100644 --- a/conf/docker-compose/.env +++ b/.env @@ -40,6 +40,3 @@ S3_SECRET_KEY=secret DATAVERSE_FQDN=dataverse # make sure to escape characters like : DATAVERSE_SITE_URL=http\://localhost - -# CPU architecture, amd64 (Intel, AMD, etc.) or arm64 (Apple M1, Raspberry Pi, etc.) -ARCHITECTURE=amd64 \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index b0864a0c55f..f813d522a29 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1 +1,84 @@ -# See http://guides.dataverse.org/en/latest/developers/containers.html +# https://hub.docker.com/_/ubuntu +FROM ubuntu:22.04 + +ENV DEBIAN_FRONTEND noninteractive + +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + gcc python3-dev tzdata nano dos2unix curl wget openjdk-11-jdk maven unzip jq imagemagick python3 python3-pip python3-psycopg2 wait-for-it ca-certificates && \ + apt-get -y upgrade && \ + rm -rf /var/lib/apt/lists/* + +WORKDIR / + +RUN useradd --create-home --shell /bin/bash dataverse + +# https://guides.dataverse.org/en/5.8/installation/prerequisites.html +RUN wget https://s3-eu-west-1.amazonaws.com/payara.fish/Payara+Downloads/5.2022.1/payara-5.2022.1.zip + +RUN unzip payara-5.2022.1.zip && \ + mv payara5 /usr/local && \ + rm payara-5.2022.1.zip + +RUN chown -R root:root /usr/local/payara5 && \ + chown dataverse /usr/local/payara5/glassfish/lib && \ + chown -R dataverse:dataverse /usr/local/payara5/glassfish/domains/domain1 + +# ENV JAVA_HOME "/usr/lib/jvm/java-11-openjdk-${ARCHITECTURE}" +# RUN export JAVA_HOME="$(dirname $(dirname $(readlink -f $(which java))))" + +# install Counter Processor +# https://guides.dataverse.org/en/latest/installation/prerequisites.html#counter-processor +RUN cd /usr/local && \ + wget https://github.com/CDLUC3/counter-processor/archive/refs/tags/v0.1.04.tar.gz && \ + tar xvfz v0.1.04.tar.gz && \ + rm v0.1.04.tar.gz && \ + cd counter-processor-0.1.04 && \ + pip3 install -r requirements.txt + +RUN useradd --create-home --shell /bin/bash counter && \ + chown -R counter:counter /usr/local/counter-processor-0.1.04 + +# install awscli +RUN pip3 install --no-cache-dir awscli + +# switch to non-root user as this is more secure +USER dataverse +WORKDIR / + +RUN mkdir -p /home/dataverse/.aws/ +COPY --chown=dataverse:dataverse ./conf/docker-compose/dataverse/config /home/dataverse/.aws/config +COPY --chown=dataverse:dataverse ./conf/docker-compose/dataverse/credentials /home/dataverse/.aws/credentials + +RUN cp -R /home/dataverse/.aws/ /usr/local/payara5/glassfish/domains/domain1/ + +# if you want to speed up the Maven build you can copy over +# cached packages here +COPY --chown=dataverse:dataverse ./conf/docker-compose/dataverse/.m2/ /home/dataverse/.m2/ + +# copy over sourcecode and build files needed to compile the .war +# as well as installer files +COPY --chown=dataverse:dataverse pom.xml /dataverse/ +COPY --chown=dataverse:dataverse src /dataverse/src/ +COPY --chown=dataverse:dataverse modules /dataverse/modules/ +COPY --chown=dataverse:dataverse scripts /dataverse/scripts/ +COPY --chown=dataverse:dataverse conf/jhove/ /dataverse/conf/jhove/ + +# this likely isn't needed on Linux but was needed on a Windows build +RUN find /dataverse -type f -print0 | xargs -0 -n 1 -P 4 dos2unix + +# this can take some time to download all the dependencies +RUN cd /dataverse/ && \ + export dpkgArch="$(dpkg --print-architecture)" && \ + export JAVA_HOME="/usr/lib/jvm/java-11-openjdk-${dpkgArch}" && \ + mvn package -DskipTests + +USER root +COPY --chown=dataverse:dataverse ./conf/docker-compose/dataverse/startup.sh /startup.sh +RUN chmod +x /startup.sh && dos2unix /startup.sh + +USER dataverse +CMD ["wait-for-it", "postgres:5432", "--", "/startup.sh"] + +# helpful for debugging purposes to just start up the container +# CMD ["tail", "-f", "/dev/null"] \ No newline at end of file diff --git a/conf/docker-compose/.dockerignore b/conf/docker-compose/.dockerignore deleted file mode 100644 index 860ae84eb75..00000000000 --- a/conf/docker-compose/.dockerignore +++ /dev/null @@ -1,5 +0,0 @@ -/postgres-bind/ -/solr-bind/ -/seaweedfs-bind/ -/dataverse-docroot-bind/ -/dataverse-logos-bind/ \ No newline at end of file diff --git a/conf/docker-compose/dataverse/Dockerfile b/conf/docker-compose/dataverse/Dockerfile deleted file mode 100644 index 98dbe4455a9..00000000000 --- a/conf/docker-compose/dataverse/Dockerfile +++ /dev/null @@ -1,77 +0,0 @@ -# https://hub.docker.com/_/ubuntu -FROM ubuntu:22.04 - -ARG ARCHITECTURE=amd64 -ENV ARCHITECTURE $ARCHITECTURE - -ENV DEBIAN_FRONTEND noninteractive - -RUN apt-get update && \ - apt-get install -y --no-install-recommends \ - gcc python3-dev tzdata nano dos2unix curl wget openjdk-11-jdk maven unzip jq imagemagick python3 python3-pip python3-psycopg2 wait-for-it ca-certificates && \ - apt-get -y upgrade && \ - rm -rf /var/lib/apt/lists/* - -WORKDIR / - -RUN useradd --create-home --shell /bin/bash dataverse - -# https://guides.dataverse.org/en/5.8/installation/prerequisites.html -RUN wget https://s3-eu-west-1.amazonaws.com/payara.fish/Payara+Downloads/5.2022.1/payara-5.2022.1.zip - -RUN unzip payara-5.2022.1.zip && \ - mv payara5 /usr/local && \ - rm payara-5.2022.1.zip - -RUN chown -R root:root /usr/local/payara5 && \ - chown dataverse /usr/local/payara5/glassfish/lib && \ - chown -R dataverse:dataverse /usr/local/payara5/glassfish/domains/domain1 - -ENV JAVA_HOME /usr/lib/jvm/java-11-openjdk-${ARCHITECTURE} - -# install Counter Processor -# https://guides.dataverse.org/en/latest/installation/prerequisites.html#counter-processor -RUN cd /usr/local && \ - wget https://github.com/CDLUC3/counter-processor/archive/refs/tags/v0.1.04.tar.gz && \ - tar xvfz v0.1.04.tar.gz && \ - rm v0.1.04.tar.gz && \ - cd counter-processor-0.1.04 && \ - pip3 install -r requirements.txt - -RUN useradd --create-home --shell /bin/bash counter && \ - chown -R counter:counter /usr/local/counter-processor-0.1.04 - -# install awscli -RUN pip3 install --no-cache-dir awscli - -# switch to non-root user as this is more secure -USER dataverse -WORKDIR / - -RUN mkdir -p /home/dataverse/.aws/ -COPY --chown=dataverse:dataverse config /home/dataverse/.aws/config -COPY --chown=dataverse:dataverse credentials /home/dataverse/.aws/credentials - -RUN cp -R /home/dataverse/.aws/ /usr/local/payara5/glassfish/domains/domain1/ - -# if you want to speed up the Maven build you can copy over -# cached packages here -#COPY --chown=dataverse:dataverse ./.m2/ /home/dataverse/.m2/ - -COPY --chown=dataverse:dataverse ./dataverse/ /dataverse/ - -# this likely isn't needed on Linux but was needed on a Windows build -RUN find /dataverse -type f -print0 | xargs -0 -n 1 -P 4 dos2unix - -# this can take some time to download all the dependencies -RUN cd /dataverse/ && mvn package -DskipTests - -USER root -COPY --chown=dataverse:dataverse startup.sh /startup.sh -RUN chmod +x /startup.sh && dos2unix /startup.sh - -USER dataverse -CMD ["wait-for-it", "postgres:5432", "--", "/startup.sh"] - -# helpful for debugging purposes to just start up the container -# CMD ["tail", "-f", "/dev/null"] \ No newline at end of file diff --git a/conf/docker-compose/dataverse/startup.sh b/conf/docker-compose/dataverse/startup.sh index 81973a97172..a6daf18d143 100644 --- a/conf/docker-compose/dataverse/startup.sh +++ b/conf/docker-compose/dataverse/startup.sh @@ -1,5 +1,8 @@ #!/bin/bash +export dpkgArch="$(dpkg --print-architecture)" +export JAVA_HOME="/usr/lib/jvm/java-11-openjdk-${dpkgArch}" + # create the config file that has all our environment settings echo -e " diff --git a/conf/docker-compose/prepbuild.sh b/conf/docker-compose/prepbuild.sh index 1e569ea661f..a7bdefd979f 100755 --- a/conf/docker-compose/prepbuild.sh +++ b/conf/docker-compose/prepbuild.sh @@ -2,6 +2,3 @@ cp ../solr/8.11.1/*.xml ./solr/ -mkdir -p ./dataverse/dataverse/ - -rsync -a --exclude '.m2*' --exclude '*-bind' ../../../dataverse/ ./dataverse/dataverse/ diff --git a/conf/docker-compose/docker-compose.yml b/docker-compose.yml similarity index 88% rename from conf/docker-compose/docker-compose.yml rename to docker-compose.yml index 5aa1b0f00c3..a3cfad8b0c3 100644 --- a/conf/docker-compose/docker-compose.yml +++ b/docker-compose.yml @@ -3,14 +3,14 @@ services: postgres: build: - context: ./postgres/ + context: ./conf/docker-compose/postgres/ #ports: # - 5432:5432 restart: unless-stopped container_name: postgres hostname: postgres volumes: - - ./postgres-bind:/var/lib/postgresql/data:rw + - ./conf/docker-compose/postgres-bind:/var/lib/postgresql/data:rw environment: # this user and password will have superuser privileges - POSTGRES_USER=postgres @@ -24,12 +24,12 @@ services: solr: build: - context: ./solr/ + context: ./conf/docker-compose/solr/ restart: unless-stopped container_name: solr hostname: solr volumes: - - ./solr-bind:/var/solr:rw + - ./conf/docker-compose/solr-bind:/var/solr:rw environment: - TZ=${TZ} - "SOLR_JAVA_MEM=-Xms1g -Xmx1g" @@ -45,7 +45,7 @@ services: rserve: build: - context: ./rserve/ + context: ./conf/docker-compose/rserve/ restart: unless-stopped container_name: rserve hostname: rserve @@ -58,7 +58,7 @@ services: dataverse: build: - context: ./dataverse/ + context: . restart: unless-stopped container_name: dataverse hostname: dataverse @@ -86,7 +86,6 @@ services: - S3_SECRET_KEY=${S3_SECRET_KEY} - DATAVERSE_FQDN=${DATAVERSE_FQDN} - DATAVERSE_SITE_URL=${DATAVERSE_SITE_URL} - - ARCHITECTURE=${ARCHITECTURE} depends_on: - postgres - solr @@ -98,8 +97,8 @@ services: volumes: # https://guides.dataverse.org/en/latest/installation/advanced.html # logos and sitemap are initially empty on the very first startup - - ./dataverse-logos-bind:/usr/local/payara5/glassfish/domains/domain1/docroot/logos/ - - ./dataverse-sitemap-bind:/usr/local/payara5/glassfish/domains/domain1/docroot/sitemap/ + - ./conf/docker-compose/dataverse-logos-bind:/usr/local/payara5/glassfish/domains/domain1/docroot/logos/ + - ./conf/docker-compose/dataverse-sitemap-bind:/usr/local/payara5/glassfish/domains/domain1/docroot/sitemap/ labels: - "traefik.enable=true" - "traefik.http.routers.dataverse.rule=Host(`localhost`)" @@ -117,7 +116,7 @@ services: # Traefik reverse proxy traefik: build: - context: ./traefik/ + context: ./conf/docker-compose/traefik/ container_name: traefik hostname: traefik environment: @@ -156,14 +155,14 @@ services: # https://github.com/chrislusf/seaweedfs/blob/master/docker/seaweedfs-compose.yml seaweedfs: build: - context: ./seaweedfs/ + context: ./conf/docker-compose/seaweedfs/ restart: unless-stopped container_name: seaweedfs hostname: seaweedfs environment: - TZ=${TZ} volumes: - - ./seaweedfs-bind:/data:rw + - ./conf/docker-compose/seaweedfs-bind:/data:rw # ports: # - 8333:8333 # s3 API # - 9327:9327 # metrics API From c5edd007863232f91a31a92aff5b6829fcbcf674 Mon Sep 17 00:00:00 2001 From: Patrick Carlson Date: Thu, 19 May 2022 15:43:02 -0600 Subject: [PATCH 08/27] disable cache testing again --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index f813d522a29..c3cb9ee89fa 100644 --- a/Dockerfile +++ b/Dockerfile @@ -54,7 +54,7 @@ RUN cp -R /home/dataverse/.aws/ /usr/local/payara5/glassfish/domains/domain1/ # if you want to speed up the Maven build you can copy over # cached packages here -COPY --chown=dataverse:dataverse ./conf/docker-compose/dataverse/.m2/ /home/dataverse/.m2/ +# COPY --chown=dataverse:dataverse ./conf/docker-compose/dataverse/.m2/ /home/dataverse/.m2/ # copy over sourcecode and build files needed to compile the .war # as well as installer files From 61d7d9e9ebe825f1f70ef02cdb4439ab9f83997f Mon Sep 17 00:00:00 2001 From: Patrick Carlson Date: Thu, 19 May 2022 15:48:59 -0600 Subject: [PATCH 09/27] cleanup gitignore with refactor --- conf/docker-compose/.gitignore | 1 - 1 file changed, 1 deletion(-) diff --git a/conf/docker-compose/.gitignore b/conf/docker-compose/.gitignore index 6de52ae063e..69fdcfd2255 100644 --- a/conf/docker-compose/.gitignore +++ b/conf/docker-compose/.gitignore @@ -1,4 +1,3 @@ -/dataverse/dataverse/ /solr/*.xml /dataverse/.m2/ /postgres-bind/ From 8067fc90777f09aa40a076ffb25362da509e1d6d Mon Sep 17 00:00:00 2001 From: Patrick Carlson Date: Mon, 23 May 2022 10:53:40 -0600 Subject: [PATCH 10/27] multi-thread building of R libraries --- conf/docker-compose/rserve/Dockerfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/conf/docker-compose/rserve/Dockerfile b/conf/docker-compose/rserve/Dockerfile index bf0554439e5..8817882a69b 100644 --- a/conf/docker-compose/rserve/Dockerfile +++ b/conf/docker-compose/rserve/Dockerfile @@ -9,7 +9,8 @@ RUN apt-get update && \ apt-get -y upgrade && \ rm -rf /var/lib/apt/lists/* -RUN R -e "install.packages(c('R2HTML', 'rjson', 'DescTools', 'Rserve', 'haven'))" +# multi-thread the building of the libraries from source via NCPUS=X +RUN R -e "install.packages(c('R2HTML', 'rjson', 'DescTools', 'Rserve', 'haven'), NCPUS=4)" # this is the user and password that Dataverse will connect to the Rserve server RUN echo "rserve rserve" > /rserve.pwd From 324c13b7b9712d1b2bf1b7ad3d4287fa1e5971d4 Mon Sep 17 00:00:00 2001 From: Patrick Carlson Date: Tue, 14 Jun 2022 14:34:00 -0600 Subject: [PATCH 11/27] fix maven build --- Dockerfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index c3cb9ee89fa..44ce144e5da 100644 --- a/Dockerfile +++ b/Dockerfile @@ -63,6 +63,7 @@ COPY --chown=dataverse:dataverse src /dataverse/src/ COPY --chown=dataverse:dataverse modules /dataverse/modules/ COPY --chown=dataverse:dataverse scripts /dataverse/scripts/ COPY --chown=dataverse:dataverse conf/jhove/ /dataverse/conf/jhove/ +COPY --chown=dataverse:dataverse local_lib /dataverse/local_lib/ # this likely isn't needed on Linux but was needed on a Windows build RUN find /dataverse -type f -print0 | xargs -0 -n 1 -P 4 dos2unix @@ -71,7 +72,7 @@ RUN find /dataverse -type f -print0 | xargs -0 -n 1 -P 4 dos2unix RUN cd /dataverse/ && \ export dpkgArch="$(dpkg --print-architecture)" && \ export JAVA_HOME="/usr/lib/jvm/java-11-openjdk-${dpkgArch}" && \ - mvn package -DskipTests + mvn package -DskipTests --no-transfer-progress USER root COPY --chown=dataverse:dataverse ./conf/docker-compose/dataverse/startup.sh /startup.sh From 80771b835a664a561648fac2c9324c2eab02f784 Mon Sep 17 00:00:00 2001 From: Patrick Carlson Date: Mon, 29 Aug 2022 11:22:56 -0600 Subject: [PATCH 12/27] rev upstream Solr version --- conf/docker-compose/solr/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/docker-compose/solr/Dockerfile b/conf/docker-compose/solr/Dockerfile index 8a3cccfb1ac..4cb7909cda5 100644 --- a/conf/docker-compose/solr/Dockerfile +++ b/conf/docker-compose/solr/Dockerfile @@ -1,5 +1,5 @@ # https://hub.docker.com/_/solr -FROM solr:8.11.1 +FROM solr:8.11.2 # https://guides.dataverse.org/en/latest/installation/prerequisites.html#solr From 3a399eb6259a828d7f47526b0af801a96286c729 Mon Sep 17 00:00:00 2001 From: Patrick Carlson Date: Tue, 30 Aug 2022 07:12:22 -0600 Subject: [PATCH 13/27] rev traefik version --- conf/docker-compose/traefik/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/docker-compose/traefik/Dockerfile b/conf/docker-compose/traefik/Dockerfile index 3735935e714..58cbf466a50 100644 --- a/conf/docker-compose/traefik/Dockerfile +++ b/conf/docker-compose/traefik/Dockerfile @@ -1,5 +1,5 @@ # https://hub.docker.com/_/traefik -FROM traefik:v2.6 +FROM traefik:v2.8 # Add certs to OS truststore COPY *.crt /usr/local/share/ca-certificates/ From b4deddca191f16418f1ea91518f093c1ea1bc34e Mon Sep 17 00:00:00 2001 From: Patrick Carlson Date: Tue, 30 Aug 2022 07:39:30 -0600 Subject: [PATCH 14/27] look for Debian package updates in Solr --- conf/docker-compose/solr/Dockerfile | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/conf/docker-compose/solr/Dockerfile b/conf/docker-compose/solr/Dockerfile index 4cb7909cda5..c64aa52927f 100644 --- a/conf/docker-compose/solr/Dockerfile +++ b/conf/docker-compose/solr/Dockerfile @@ -5,6 +5,10 @@ FROM solr:8.11.2 USER root +RUN apt-get update && \ + apt-get -y upgrade && \ + rm -rf /var/lib/apt/lists/* + COPY *.xml / # increase the number of file descriptors and max processes From 0ecc128f19f89f01b1a8386572deda69377f8a01 Mon Sep 17 00:00:00 2001 From: Patrick Carlson Date: Tue, 30 Aug 2022 08:13:12 -0600 Subject: [PATCH 15/27] rev Payara version --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 44ce144e5da..bb6f371c53b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -14,7 +14,7 @@ WORKDIR / RUN useradd --create-home --shell /bin/bash dataverse # https://guides.dataverse.org/en/5.8/installation/prerequisites.html -RUN wget https://s3-eu-west-1.amazonaws.com/payara.fish/Payara+Downloads/5.2022.1/payara-5.2022.1.zip +RUN wget https://s3-eu-west-1.amazonaws.com/payara.fish/Payara+Downloads/5.2022.3/payara-5.2022.3.zip RUN unzip payara-5.2022.1.zip && \ mv payara5 /usr/local && \ From 403c037a9acef56ae4399ee76d347b69ec08f918 Mon Sep 17 00:00:00 2001 From: Patrick Carlson Date: Tue, 30 Aug 2022 08:20:12 -0600 Subject: [PATCH 16/27] swap to latest Payara version --- Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index bb6f371c53b..6143b880827 100644 --- a/Dockerfile +++ b/Dockerfile @@ -16,9 +16,9 @@ RUN useradd --create-home --shell /bin/bash dataverse # https://guides.dataverse.org/en/5.8/installation/prerequisites.html RUN wget https://s3-eu-west-1.amazonaws.com/payara.fish/Payara+Downloads/5.2022.3/payara-5.2022.3.zip -RUN unzip payara-5.2022.1.zip && \ +RUN unzip payara-5.2022.3.zip && \ mv payara5 /usr/local && \ - rm payara-5.2022.1.zip + rm payara-5.2022.3.zip RUN chown -R root:root /usr/local/payara5 && \ chown dataverse /usr/local/payara5/glassfish/lib && \ From 3605baf1358ef6a590b210de82808608ab796e48 Mon Sep 17 00:00:00 2001 From: Patrick Carlson Date: Tue, 30 Aug 2022 14:38:42 -0600 Subject: [PATCH 17/27] delete cached Maven dependencies --- Dockerfile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Dockerfile b/Dockerfile index 6143b880827..7f06fb9c83c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -74,6 +74,9 @@ RUN cd /dataverse/ && \ export JAVA_HOME="/usr/lib/jvm/java-11-openjdk-${dpkgArch}" && \ mvn package -DskipTests --no-transfer-progress +# delete the cached dependencies so we don't get any inaccurate "false flags" on container scanning for security issues +RUN rm -rf ~/.m2/ + USER root COPY --chown=dataverse:dataverse ./conf/docker-compose/dataverse/startup.sh /startup.sh RUN chmod +x /startup.sh && dos2unix /startup.sh From 6cc5adcc29a5bc8525809355a428703e7cb71a13 Mon Sep 17 00:00:00 2001 From: Patrick Carlson Date: Tue, 30 Aug 2022 16:24:02 -0600 Subject: [PATCH 18/27] look for and apply Debian updates --- conf/docker-compose/postgres/Dockerfile | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/conf/docker-compose/postgres/Dockerfile b/conf/docker-compose/postgres/Dockerfile index 89b59a5d5ea..81d896cc87d 100644 --- a/conf/docker-compose/postgres/Dockerfile +++ b/conf/docker-compose/postgres/Dockerfile @@ -1,2 +1,6 @@ # https://hub.docker.com/_/postgres FROM postgres:14 + +RUN apt-get update && \ + apt-get -y upgrade && \ + rm -rf /var/lib/apt/lists/* From c22719f16497e02949dca2b929aac97f3a399ce7 Mon Sep 17 00:00:00 2001 From: Patrick Carlson Date: Wed, 11 Jan 2023 19:37:50 -0600 Subject: [PATCH 19/27] add traefik container in maven --- modules/container-traefik/.gitignore | 3 + modules/container-traefik/README.md | 52 +++++++ modules/container-traefik/pom.xml | 143 ++++++++++++++++++ .../src/main/docker/Dockerfile | 18 +++ .../src/main/docker/assembly.xml | 10 ++ .../src/main/docker/certificates.yaml | 6 + 6 files changed, 232 insertions(+) create mode 100644 modules/container-traefik/.gitignore create mode 100644 modules/container-traefik/README.md create mode 100644 modules/container-traefik/pom.xml create mode 100644 modules/container-traefik/src/main/docker/Dockerfile create mode 100644 modules/container-traefik/src/main/docker/assembly.xml create mode 100644 modules/container-traefik/src/main/docker/certificates.yaml diff --git a/modules/container-traefik/.gitignore b/modules/container-traefik/.gitignore new file mode 100644 index 00000000000..e803b2f0ad1 --- /dev/null +++ b/modules/container-traefik/.gitignore @@ -0,0 +1,3 @@ +.flattened-pom.xml +/src/main/docker/traefik.crt +/src/main/docker/traefik.key \ No newline at end of file diff --git a/modules/container-traefik/README.md b/modules/container-traefik/README.md new file mode 100644 index 00000000000..3914c7defb5 --- /dev/null +++ b/modules/container-traefik/README.md @@ -0,0 +1,52 @@ +# Dataverse Traefik Image + +The Dataverse Traefik Image provides a reverse proxy and TLS certificate encryption +for HTTPS access into the Dataverse application. + +## Quick Reference + +**Maintained by:** + +This image is created, maintained and supported by the Dataverse community on a best-effort basis. + +**Where to find documentation:** + +**Where to get help and ask questions:** + +IQSS will not offer support on how to deploy or run it. Please reach out to the community for help on using it. +You can join the Community Chat on Matrix at https://chat.dataverse.org or the Community Slack at +https://dataversecommunity.slack.com to ask for help and guidance. + +## Supported Image Tags + +This image is sourced within the main upstream code [repository of the Dataverse software](https://github.com/IQSS/dataverse). +Development and maintenance of the [image's code](https://github.com/IQSS/dataverse/tree/develop/modules/container-traefik) +happens there (again, by the community). Community-supported image tags are based on the two most important branches: + +- The `unstable` tag corresponds to the `develop` branch, where pull requests are merged. + ([`Dockerfile`](https://github.com/IQSS/dataverse/tree/develop/modules/container-traefik/src/main/docker/Dockerfile)) +- The `stable` tag corresponds to the `master` branch, where releases are cut from. + ([`Dockerfile`](https://github.com/IQSS/dataverse/tree/master/modules/container-traefik/src/main/docker/Dockerfile)) + +Within the main repository, you may find the base image files at `/modules/container-traefik`. +This Maven module uses the [Maven Docker Plugin](https://dmp.fabric8.io) to build and ship the image. +You may use, extend, or alter this image to your liking and/or host in some different registry if you want to. + +**Supported architectures:** This image is created as a "multi-arch image", supporting the most common architectures +Dataverse usually runs on: AMD64 (Windows/Linux/...) and ARM64 (Apple M1/M2). + +## License + +Image content created by the community is licensed under [Apache License, Version 2.0](https://www.apache.org/licenses/LICENSE-2.0), +like the [main Dataverse project](https://github.com/IQSS/dataverse/blob/develop/LICENSE.md). + +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and limitations under the License. + +As with all Docker images, all images likely also contain other software which may be under other licenses (such as +[Payara Server](https://github.com/payara/Payara/blob/master/LICENSE.txt), Bash, etc., from the base +distribution, along with any direct or indirect (Java) dependencies contained). + +As for any pre-built image usage, it is the image user's responsibility to ensure that any use of this image complies +with any relevant licenses for all software contained within. diff --git a/modules/container-traefik/pom.xml b/modules/container-traefik/pom.xml new file mode 100644 index 00000000000..cade7f68907 --- /dev/null +++ b/modules/container-traefik/pom.xml @@ -0,0 +1,143 @@ + + + 4.0.0 + + + edu.harvard.iq + dataverse-parent + ${revision} + ../dataverse-parent + + + io.gdcc + container-traefik + ${packaging.type} + Container Dataverse Traefik + This module provides the Traefik reverse proxy via a container. + + + + poikilotherm + Oliver Bertuch + github@bertuch.eu + Europe/Berlin + + maintainer + + + + + + + + pom + + + + + ct + + docker-build + gdcc/traefik:${base.image.tag} + unstable + 1000 + 1000 + + + + + + + + io.fabric8 + docker-maven-plugin + true + + + + base + ${base.image} + + + + linux/arm64 + linux/amd64 + + ${project.build.directory}/buildx-state + + Dockerfile + + ${java.image} + ${base.image.uid} + ${base.image.gid} + + @ + + assembly.xml + + + + + + + + + + org.codehaus.mojo + flatten-maven-plugin + 1.2.7 + + true + oss + + remove + remove + + + + + + flatten + process-resources + + flatten + + + + + flatten.clean + clean + + clean + + + + + + + + maven-install-plugin + + + default-install + install + + install + + + + + + + + + diff --git a/modules/container-traefik/src/main/docker/Dockerfile b/modules/container-traefik/src/main/docker/Dockerfile new file mode 100644 index 00000000000..58cbf466a50 --- /dev/null +++ b/modules/container-traefik/src/main/docker/Dockerfile @@ -0,0 +1,18 @@ +# https://hub.docker.com/_/traefik +FROM traefik:v2.8 + +# Add certs to OS truststore +COPY *.crt /usr/local/share/ca-certificates/ +# private key +COPY *.key /usr/local/share/ca-certificates/ + +# use HTTP connection for adding the needed ca-certificates package +# install it and then swap back to HTTPS +RUN sed -i 's,https,http,g' /etc/apk/repositories && \ + apk add --no-cache ca-certificates && \ + update-ca-certificates && \ + sed -i 's,http,https,g' /etc/apk/repositories && \ + apk update && apk upgrade && apk add --no-cache bash curl + +RUN mkdir -p /configuration/ +COPY certificates.yaml /configuration/certificates.yaml diff --git a/modules/container-traefik/src/main/docker/assembly.xml b/modules/container-traefik/src/main/docker/assembly.xml new file mode 100644 index 00000000000..25a5a578d83 --- /dev/null +++ b/modules/container-traefik/src/main/docker/assembly.xml @@ -0,0 +1,10 @@ + + + + ${project.basedir}/target/ + traefik + + + diff --git a/modules/container-traefik/src/main/docker/certificates.yaml b/modules/container-traefik/src/main/docker/certificates.yaml new file mode 100644 index 00000000000..27fc217abf6 --- /dev/null +++ b/modules/container-traefik/src/main/docker/certificates.yaml @@ -0,0 +1,6 @@ +tls: + stores: + default: + defaultCertificate: + certFile: /usr/local/share/ca-certificates/traefik.crt + keyFile: /usr/local/share/ca-certificates/traefik.key \ No newline at end of file From ed2d9109e37d367fe737ee05aacb52fae3fee57d Mon Sep 17 00:00:00 2001 From: Patrick Carlson Date: Wed, 11 Jan 2023 19:42:24 -0600 Subject: [PATCH 20/27] add solr container in maven --- modules/container-solr/.gitignore | 2 + modules/container-solr/README.md | 51 +++++++ modules/container-solr/pom.xml | 143 ++++++++++++++++++ .../container-solr/src/main/docker/Dockerfile | 29 ++++ .../container-solr/src/main/docker/startup.sh | 10 ++ 5 files changed, 235 insertions(+) create mode 100644 modules/container-solr/.gitignore create mode 100644 modules/container-solr/README.md create mode 100644 modules/container-solr/pom.xml create mode 100644 modules/container-solr/src/main/docker/Dockerfile create mode 100644 modules/container-solr/src/main/docker/startup.sh diff --git a/modules/container-solr/.gitignore b/modules/container-solr/.gitignore new file mode 100644 index 00000000000..cea44f1ab64 --- /dev/null +++ b/modules/container-solr/.gitignore @@ -0,0 +1,2 @@ +.flattened-pom.xml +/src/main/docker/*.xml \ No newline at end of file diff --git a/modules/container-solr/README.md b/modules/container-solr/README.md new file mode 100644 index 00000000000..bb4fa454f94 --- /dev/null +++ b/modules/container-solr/README.md @@ -0,0 +1,51 @@ +# Dataverse Solr Image + +The Dataverse Solr Image provides the Solr indexing database used by Dataverse. + +## Quick Reference + +**Maintained by:** + +This image is created, maintained and supported by the Dataverse community on a best-effort basis. + +**Where to find documentation:** + +**Where to get help and ask questions:** + +IQSS will not offer support on how to deploy or run it. Please reach out to the community for help on using it. +You can join the Community Chat on Matrix at https://chat.dataverse.org or the Community Slack at +https://dataversecommunity.slack.com to ask for help and guidance. + +## Supported Image Tags + +This image is sourced within the main upstream code [repository of the Dataverse software](https://github.com/IQSS/dataverse). +Development and maintenance of the [image's code](https://github.com/IQSS/dataverse/tree/develop/modules/container-solr) +happens there (again, by the community). Community-supported image tags are based on the two most important branches: + +- The `unstable` tag corresponds to the `develop` branch, where pull requests are merged. + ([`Dockerfile`](https://github.com/IQSS/dataverse/tree/develop/modules/container-solr/src/main/docker/Dockerfile)) +- The `stable` tag corresponds to the `master` branch, where releases are cut from. + ([`Dockerfile`](https://github.com/IQSS/dataverse/tree/master/modules/container-solr/src/main/docker/Dockerfile)) + +Within the main repository, you may find the base image files at `/modules/container-solr`. +This Maven module uses the [Maven Docker Plugin](https://dmp.fabric8.io) to build and ship the image. +You may use, extend, or alter this image to your liking and/or host in some different registry if you want to. + +**Supported architectures:** This image is created as a "multi-arch image", supporting the most common architectures +Dataverse usually runs on: AMD64 (Windows/Linux/...) and ARM64 (Apple M1/M2). + +## License + +Image content created by the community is licensed under [Apache License, Version 2.0](https://www.apache.org/licenses/LICENSE-2.0), +like the [main Dataverse project](https://github.com/IQSS/dataverse/blob/develop/LICENSE.md). + +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and limitations under the License. + +As with all Docker images, all images likely also contain other software which may be under other licenses (such as +[Payara Server](https://github.com/payara/Payara/blob/master/LICENSE.txt), Bash, etc., from the base +distribution, along with any direct or indirect (Java) dependencies contained). + +As for any pre-built image usage, it is the image user's responsibility to ensure that any use of this image complies +with any relevant licenses for all software contained within. diff --git a/modules/container-solr/pom.xml b/modules/container-solr/pom.xml new file mode 100644 index 00000000000..f233fa49aa0 --- /dev/null +++ b/modules/container-solr/pom.xml @@ -0,0 +1,143 @@ + + + 4.0.0 + + + edu.harvard.iq + dataverse-parent + ${revision} + ../dataverse-parent + + + io.gdcc + container-solr + ${packaging.type} + Container Dataverse Solr + This module provides the Solr database via a container. + + + + poikilotherm + Oliver Bertuch + github@bertuch.eu + Europe/Berlin + + maintainer + + + + + + + + pom + + + + + ct + + docker-build + gdcc/solr:${base.image.tag} + unstable + 1000 + 1000 + + + + + + + + io.fabric8 + docker-maven-plugin + true + + + + base + ${base.image} + + + + linux/arm64 + linux/amd64 + + ${project.build.directory}/buildx-state + + Dockerfile + + ${java.image} + ${base.image.uid} + ${base.image.gid} + + @ + + assembly.xml + + + + + + + + + + org.codehaus.mojo + flatten-maven-plugin + 1.2.7 + + true + oss + + remove + remove + + + + + + flatten + process-resources + + flatten + + + + + flatten.clean + clean + + clean + + + + + + + + maven-install-plugin + + + default-install + install + + install + + + + + + + + + diff --git a/modules/container-solr/src/main/docker/Dockerfile b/modules/container-solr/src/main/docker/Dockerfile new file mode 100644 index 00000000000..bb128fcc776 --- /dev/null +++ b/modules/container-solr/src/main/docker/Dockerfile @@ -0,0 +1,29 @@ +# https://hub.docker.com/_/solr +FROM solr:8.11.2 + +# https://guides.dataverse.org/en/latest/installation/prerequisites.html#solr + +USER root + +RUN apt-get update && \ + apt-get -y upgrade && \ + rm -rf /var/lib/apt/lists/* + +COPY *.xml / + +# increase the number of file descriptors and max processes +RUN echo "solr soft nproc 65000" >> /etc/security/limits.conf && \ + echo "solr hard nproc 65000" >> /etc/security/limits.conf && \ + echo "solr soft nofile 65000" >> /etc/security/limits.conf && \ + echo "solr hard nofile 65000" >> /etc/security/limits.conf + +# increase Header size +RUN sed -i "s/name=\"solr.jetty.request.header.size\" default=\"8192\"/name=\"solr.jetty.request.header.size\" default=\"102400\"/g" /opt/solr/server/etc/jetty.xml + +COPY --chown=solr:solr startup.sh /startup.sh +RUN chmod +x /startup.sh + +# switch back to normal runtime user for security purposes +USER solr + +CMD ["/startup.sh"] diff --git a/modules/container-solr/src/main/docker/startup.sh b/modules/container-solr/src/main/docker/startup.sh new file mode 100644 index 00000000000..aced1a47ed4 --- /dev/null +++ b/modules/container-solr/src/main/docker/startup.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +mkdir -p /var/solr/data/collection1/ +cp -R /opt/solr/server/solr/configsets/_default/conf /var/solr/data/collection1 +cp /*.xml /var/solr/data/collection1/conf/ + +# create collection on startup +echo "name=collection1" > /var/solr/data/collection1/core.properties + +solr-foreground From a5f3fcfbab116232d7c28dd3a2df371775646cb9 Mon Sep 17 00:00:00 2001 From: Patrick Carlson Date: Wed, 11 Jan 2023 19:45:56 -0600 Subject: [PATCH 21/27] add seaweedfs container using maven --- modules/container-seaweedfs/.gitignore | 1 + modules/container-seaweedfs/README.md | 51 +++++++ modules/container-seaweedfs/pom.xml | 143 ++++++++++++++++++ .../src/main/docker/Dockerfile | 15 ++ .../src/main/docker/assembly.xml | 10 ++ .../src/main/docker/config.json | 19 +++ 6 files changed, 239 insertions(+) create mode 100644 modules/container-seaweedfs/.gitignore create mode 100644 modules/container-seaweedfs/README.md create mode 100644 modules/container-seaweedfs/pom.xml create mode 100644 modules/container-seaweedfs/src/main/docker/Dockerfile create mode 100644 modules/container-seaweedfs/src/main/docker/assembly.xml create mode 100644 modules/container-seaweedfs/src/main/docker/config.json diff --git a/modules/container-seaweedfs/.gitignore b/modules/container-seaweedfs/.gitignore new file mode 100644 index 00000000000..d75620abf70 --- /dev/null +++ b/modules/container-seaweedfs/.gitignore @@ -0,0 +1 @@ +.flattened-pom.xml diff --git a/modules/container-seaweedfs/README.md b/modules/container-seaweedfs/README.md new file mode 100644 index 00000000000..71b773d191c --- /dev/null +++ b/modules/container-seaweedfs/README.md @@ -0,0 +1,51 @@ +# Dataverse Seaweedfs Image + +The Dataverse Seaweedfs Image provides s3 compatible object storage for data. + +## Quick Reference + +**Maintained by:** + +This image is created, maintained and supported by the Dataverse community on a best-effort basis. + +**Where to find documentation:** + +**Where to get help and ask questions:** + +IQSS will not offer support on how to deploy or run it. Please reach out to the community for help on using it. +You can join the Community Chat on Matrix at https://chat.dataverse.org or the Community Slack at +https://dataversecommunity.slack.com to ask for help and guidance. + +## Supported Image Tags + +This image is sourced within the main upstream code [repository of the Dataverse software](https://github.com/IQSS/dataverse). +Development and maintenance of the [image's code](https://github.com/IQSS/dataverse/tree/develop/modules/container-seaweedfs) +happens there (again, by the community). Community-supported image tags are based on the two most important branches: + +- The `unstable` tag corresponds to the `develop` branch, where pull requests are merged. + ([`Dockerfile`](https://github.com/IQSS/dataverse/tree/develop/modules/container-seaweedfs/src/main/docker/Dockerfile)) +- The `stable` tag corresponds to the `master` branch, where releases are cut from. + ([`Dockerfile`](https://github.com/IQSS/dataverse/tree/master/modules/container-seaweedfs/src/main/docker/Dockerfile)) + +Within the main repository, you may find the base image files at `/modules/container-seaweedfs`. +This Maven module uses the [Maven Docker Plugin](https://dmp.fabric8.io) to build and ship the image. +You may use, extend, or alter this image to your liking and/or host in some different registry if you want to. + +**Supported architectures:** This image is created as a "multi-arch image", supporting the most common architectures +Dataverse usually runs on: AMD64 (Windows/Linux/...) and ARM64 (Apple M1/M2). + +## License + +Image content created by the community is licensed under [Apache License, Version 2.0](https://www.apache.org/licenses/LICENSE-2.0), +like the [main Dataverse project](https://github.com/IQSS/dataverse/blob/develop/LICENSE.md). + +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and limitations under the License. + +As with all Docker images, all images likely also contain other software which may be under other licenses (such as +[Payara Server](https://github.com/payara/Payara/blob/master/LICENSE.txt), Bash, etc., from the base +distribution, along with any direct or indirect (Java) dependencies contained). + +As for any pre-built image usage, it is the image user's responsibility to ensure that any use of this image complies +with any relevant licenses for all software contained within. diff --git a/modules/container-seaweedfs/pom.xml b/modules/container-seaweedfs/pom.xml new file mode 100644 index 00000000000..4612cfe821e --- /dev/null +++ b/modules/container-seaweedfs/pom.xml @@ -0,0 +1,143 @@ + + + 4.0.0 + + + edu.harvard.iq + dataverse-parent + ${revision} + ../dataverse-parent + + + io.gdcc + container-seaweedfs + ${packaging.type} + Container Dataverse Seaweedfs + This module provides the seaweedfs object storage via a container. + + + + poikilotherm + Oliver Bertuch + github@bertuch.eu + Europe/Berlin + + maintainer + + + + + + + + pom + + + + + ct + + docker-build + gdcc/seaweedfs:${base.image.tag} + unstable + 1000 + 1000 + + + + + + + + io.fabric8 + docker-maven-plugin + true + + + + base + ${base.image} + + + + linux/arm64 + linux/amd64 + + ${project.build.directory}/buildx-state + + Dockerfile + + ${java.image} + ${base.image.uid} + ${base.image.gid} + + @ + + assembly.xml + + + + + + + + + + org.codehaus.mojo + flatten-maven-plugin + 1.2.7 + + true + oss + + remove + remove + + + + + + flatten + process-resources + + flatten + + + + + flatten.clean + clean + + clean + + + + + + + + maven-install-plugin + + + default-install + install + + install + + + + + + + + + diff --git a/modules/container-seaweedfs/src/main/docker/Dockerfile b/modules/container-seaweedfs/src/main/docker/Dockerfile new file mode 100644 index 00000000000..c000090ba6e --- /dev/null +++ b/modules/container-seaweedfs/src/main/docker/Dockerfile @@ -0,0 +1,15 @@ +# https://hub.docker.com/r/chrislusf/seaweedfs +FROM chrislusf/seaweedfs:latest + +USER root + +# use HTTP connection for adding the needed ca-certificates package +# install it and then swap back to HTTPS +RUN sed -i 's,https,http,g' /etc/apk/repositories && \ + apk add --no-cache ca-certificates && \ + update-ca-certificates && \ + sed -i 's,http,https,g' /etc/apk/repositories && \ + apk update && apk upgrade && apk add --no-cache curl bash + +# https://github.com/chrislusf/seaweedfs/wiki/Amazon-S3-API#static-configuration +COPY config.json /config.json diff --git a/modules/container-seaweedfs/src/main/docker/assembly.xml b/modules/container-seaweedfs/src/main/docker/assembly.xml new file mode 100644 index 00000000000..1e1d728be5a --- /dev/null +++ b/modules/container-seaweedfs/src/main/docker/assembly.xml @@ -0,0 +1,10 @@ + + + + ${project.basedir}/target/ + seaweedfs + + + diff --git a/modules/container-seaweedfs/src/main/docker/config.json b/modules/container-seaweedfs/src/main/docker/config.json new file mode 100644 index 00000000000..ad18955f042 --- /dev/null +++ b/modules/container-seaweedfs/src/main/docker/config.json @@ -0,0 +1,19 @@ +{ + "identities": [ + { + "name": "anonymous", + "credentials": [ + { + "accessKey": "secret", + "secretKey": "secret" + } + ], + "actions": [ + "Read:dataverse", + "List:dataverse", + "Tagging:dataverse", + "Write:dataverse" + ] + } + ] +} \ No newline at end of file From a87bfca383d224e3a53df80cbbecabfb5f8225a6 Mon Sep 17 00:00:00 2001 From: Patrick Carlson Date: Wed, 11 Jan 2023 19:48:20 -0600 Subject: [PATCH 22/27] add rserve container using maven --- modules/container-rserve/.gitignore | 1 + modules/container-rserve/README.md | 51 +++++++ modules/container-rserve/pom.xml | 143 ++++++++++++++++++ .../src/main/docker/Dockerfile | 23 +++ .../src/main/docker/assembly.xml | 10 ++ 5 files changed, 228 insertions(+) create mode 100644 modules/container-rserve/.gitignore create mode 100644 modules/container-rserve/README.md create mode 100644 modules/container-rserve/pom.xml create mode 100644 modules/container-rserve/src/main/docker/Dockerfile create mode 100644 modules/container-rserve/src/main/docker/assembly.xml diff --git a/modules/container-rserve/.gitignore b/modules/container-rserve/.gitignore new file mode 100644 index 00000000000..d75620abf70 --- /dev/null +++ b/modules/container-rserve/.gitignore @@ -0,0 +1 @@ +.flattened-pom.xml diff --git a/modules/container-rserve/README.md b/modules/container-rserve/README.md new file mode 100644 index 00000000000..525af0f323d --- /dev/null +++ b/modules/container-rserve/README.md @@ -0,0 +1,51 @@ +# Dataverse Rserve Image + +The Dataverse Rserve Image provides the rserve daemon which is a dependency of Dataverse. + +## Quick Reference + +**Maintained by:** + +This image is created, maintained and supported by the Dataverse community on a best-effort basis. + +**Where to find documentation:** + +**Where to get help and ask questions:** + +IQSS will not offer support on how to deploy or run it. Please reach out to the community for help on using it. +You can join the Community Chat on Matrix at https://chat.dataverse.org or the Community Slack at +https://dataversecommunity.slack.com to ask for help and guidance. + +## Supported Image Tags + +This image is sourced within the main upstream code [repository of the Dataverse software](https://github.com/IQSS/dataverse). +Development and maintenance of the [image's code](https://github.com/IQSS/dataverse/tree/develop/modules/container-rserve) +happens there (again, by the community). Community-supported image tags are based on the two most important branches: + +- The `unstable` tag corresponds to the `develop` branch, where pull requests are merged. + ([`Dockerfile`](https://github.com/IQSS/dataverse/tree/develop/modules/container-rserve/src/main/docker/Dockerfile)) +- The `stable` tag corresponds to the `master` branch, where releases are cut from. + ([`Dockerfile`](https://github.com/IQSS/dataverse/tree/master/modules/container-rserve/src/main/docker/Dockerfile)) + +Within the main repository, you may find the base image files at `/modules/container-rserve`. +This Maven module uses the [Maven Docker Plugin](https://dmp.fabric8.io) to build and ship the image. +You may use, extend, or alter this image to your liking and/or host in some different registry if you want to. + +**Supported architectures:** This image is created as a "multi-arch image", supporting the most common architectures +Dataverse usually runs on: AMD64 (Windows/Linux/...) and ARM64 (Apple M1/M2). + +## License + +Image content created by the community is licensed under [Apache License, Version 2.0](https://www.apache.org/licenses/LICENSE-2.0), +like the [main Dataverse project](https://github.com/IQSS/dataverse/blob/develop/LICENSE.md). + +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and limitations under the License. + +As with all Docker images, all images likely also contain other software which may be under other licenses (such as +[Payara Server](https://github.com/payara/Payara/blob/master/LICENSE.txt), Bash, etc., from the base +distribution, along with any direct or indirect (Java) dependencies contained). + +As for any pre-built image usage, it is the image user's responsibility to ensure that any use of this image complies +with any relevant licenses for all software contained within. diff --git a/modules/container-rserve/pom.xml b/modules/container-rserve/pom.xml new file mode 100644 index 00000000000..71d9c7fc38e --- /dev/null +++ b/modules/container-rserve/pom.xml @@ -0,0 +1,143 @@ + + + 4.0.0 + + + edu.harvard.iq + dataverse-parent + ${revision} + ../dataverse-parent + + + io.gdcc + container-rserve + ${packaging.type} + Container Dataverse Rserve + This module provides the rserve daemon via a container. + + + + poikilotherm + Oliver Bertuch + github@bertuch.eu + Europe/Berlin + + maintainer + + + + + + + + pom + + + + + ct + + docker-build + gdcc/rserve:${base.image.tag} + unstable + 1000 + 1000 + + + + + + + + io.fabric8 + docker-maven-plugin + true + + + + base + ${base.image} + + + + linux/arm64 + linux/amd64 + + ${project.build.directory}/buildx-state + + Dockerfile + + ${java.image} + ${base.image.uid} + ${base.image.gid} + + @ + + assembly.xml + + + + + + + + + + org.codehaus.mojo + flatten-maven-plugin + 1.2.7 + + true + oss + + remove + remove + + + + + + flatten + process-resources + + flatten + + + + + flatten.clean + clean + + clean + + + + + + + + maven-install-plugin + + + default-install + install + + install + + + + + + + + + diff --git a/modules/container-rserve/src/main/docker/Dockerfile b/modules/container-rserve/src/main/docker/Dockerfile new file mode 100644 index 00000000000..8817882a69b --- /dev/null +++ b/modules/container-rserve/src/main/docker/Dockerfile @@ -0,0 +1,23 @@ +# https://hub.docker.com/_/ubuntu +FROM ubuntu:22.04 + +ENV DEBIAN_FRONTEND noninteractive + +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + tzdata nano r-base r-base-dev libssl-dev libnlopt-dev libcurl4-openssl-dev ca-certificates && \ + apt-get -y upgrade && \ + rm -rf /var/lib/apt/lists/* + +# multi-thread the building of the libraries from source via NCPUS=X +RUN R -e "install.packages(c('R2HTML', 'rjson', 'DescTools', 'Rserve', 'haven'), NCPUS=4)" + +# this is the user and password that Dataverse will connect to the Rserve server +RUN echo "rserve rserve" > /rserve.pwd + +WORKDIR / + +EXPOSE 6311/tcp + +# https://github.com/ubc/r-docker/blob/master/Dockerfile +CMD ["R", "-e", "Rserve::run.Rserve(remote=TRUE, auth=TRUE, pwdfile='/rserve.pwd', daemon=FALSE, port=6311, fileio=TRUE, maxinbuf=262144)"] \ No newline at end of file diff --git a/modules/container-rserve/src/main/docker/assembly.xml b/modules/container-rserve/src/main/docker/assembly.xml new file mode 100644 index 00000000000..d7f1aa80009 --- /dev/null +++ b/modules/container-rserve/src/main/docker/assembly.xml @@ -0,0 +1,10 @@ + + + + ${project.basedir}/target/ + rserve + + + From 169422e0ef32631ca836e4395bb967d2951a8fc0 Mon Sep 17 00:00:00 2001 From: Patrick Carlson Date: Wed, 11 Jan 2023 19:49:57 -0600 Subject: [PATCH 23/27] add postgresql container using maven --- modules/container-postgresql/.gitignore | 1 + modules/container-postgresql/README.md | 52 +++++++ modules/container-postgresql/pom.xml | 143 ++++++++++++++++++ .../src/main/docker/Dockerfile | 6 + .../src/main/docker/assembly.xml | 10 ++ 5 files changed, 212 insertions(+) create mode 100644 modules/container-postgresql/.gitignore create mode 100644 modules/container-postgresql/README.md create mode 100644 modules/container-postgresql/pom.xml create mode 100644 modules/container-postgresql/src/main/docker/Dockerfile create mode 100644 modules/container-postgresql/src/main/docker/assembly.xml diff --git a/modules/container-postgresql/.gitignore b/modules/container-postgresql/.gitignore new file mode 100644 index 00000000000..d75620abf70 --- /dev/null +++ b/modules/container-postgresql/.gitignore @@ -0,0 +1 @@ +.flattened-pom.xml diff --git a/modules/container-postgresql/README.md b/modules/container-postgresql/README.md new file mode 100644 index 00000000000..08a2b87c997 --- /dev/null +++ b/modules/container-postgresql/README.md @@ -0,0 +1,52 @@ +# Dataverse Postgresql Image + +The Dataverse Postgresql Image is for the backend Postgresql relational database which +is a dependency for Dataverse. + +## Quick Reference + +**Maintained by:** + +This image is created, maintained and supported by the Dataverse community on a best-effort basis. + +**Where to find documentation:** + +**Where to get help and ask questions:** + +IQSS will not offer support on how to deploy or run it. Please reach out to the community for help on using it. +You can join the Community Chat on Matrix at https://chat.dataverse.org or the Community Slack at +https://dataversecommunity.slack.com to ask for help and guidance. + +## Supported Image Tags + +This image is sourced within the main upstream code [repository of the Dataverse software](https://github.com/IQSS/dataverse). +Development and maintenance of the [image's code](https://github.com/IQSS/dataverse/tree/develop/modules/container-postgresql) +happens there (again, by the community). Community-supported image tags are based on the two most important branches: + +- The `unstable` tag corresponds to the `develop` branch, where pull requests are merged. + ([`Dockerfile`](https://github.com/IQSS/dataverse/tree/develop/modules/container-postgresql/src/main/docker/Dockerfile)) +- The `stable` tag corresponds to the `master` branch, where releases are cut from. + ([`Dockerfile`](https://github.com/IQSS/dataverse/tree/master/modules/container-postgresql/src/main/docker/Dockerfile)) + +Within the main repository, you may find the base image files at `/modules/container-postgresql`. +This Maven module uses the [Maven Docker Plugin](https://dmp.fabric8.io) to build and ship the image. +You may use, extend, or alter this image to your liking and/or host in some different registry if you want to. + +**Supported architectures:** This image is created as a "multi-arch image", supporting the most common architectures +Dataverse usually runs on: AMD64 (Windows/Linux/...) and ARM64 (Apple M1/M2). + +## License + +Image content created by the community is licensed under [Apache License, Version 2.0](https://www.apache.org/licenses/LICENSE-2.0), +like the [main Dataverse project](https://github.com/IQSS/dataverse/blob/develop/LICENSE.md). + +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and limitations under the License. + +As with all Docker images, all images likely also contain other software which may be under other licenses (such as +[Payara Server](https://github.com/payara/Payara/blob/master/LICENSE.txt), Bash, etc., from the base +distribution, along with any direct or indirect (Java) dependencies contained). + +As for any pre-built image usage, it is the image user's responsibility to ensure that any use of this image complies +with any relevant licenses for all software contained within. diff --git a/modules/container-postgresql/pom.xml b/modules/container-postgresql/pom.xml new file mode 100644 index 00000000000..04c31af9400 --- /dev/null +++ b/modules/container-postgresql/pom.xml @@ -0,0 +1,143 @@ + + + 4.0.0 + + + edu.harvard.iq + dataverse-parent + ${revision} + ../dataverse-parent + + + io.gdcc + container-postgresql + ${packaging.type} + Container Dataverse Postgresql + This module provides the Postgresql database via a container. + + + + poikilotherm + Oliver Bertuch + github@bertuch.eu + Europe/Berlin + + maintainer + + + + + + + + pom + + + + + ct + + docker-build + gdcc/postgresql:${base.image.tag} + unstable + 1000 + 1000 + + + + + + + + io.fabric8 + docker-maven-plugin + true + + + + base + ${base.image} + + + + linux/arm64 + linux/amd64 + + ${project.build.directory}/buildx-state + + Dockerfile + + ${java.image} + ${base.image.uid} + ${base.image.gid} + + @ + + assembly.xml + + + + + + + + + + org.codehaus.mojo + flatten-maven-plugin + 1.2.7 + + true + oss + + remove + remove + + + + + + flatten + process-resources + + flatten + + + + + flatten.clean + clean + + clean + + + + + + + + maven-install-plugin + + + default-install + install + + install + + + + + + + + + \ No newline at end of file diff --git a/modules/container-postgresql/src/main/docker/Dockerfile b/modules/container-postgresql/src/main/docker/Dockerfile new file mode 100644 index 00000000000..81d896cc87d --- /dev/null +++ b/modules/container-postgresql/src/main/docker/Dockerfile @@ -0,0 +1,6 @@ +# https://hub.docker.com/_/postgres +FROM postgres:14 + +RUN apt-get update && \ + apt-get -y upgrade && \ + rm -rf /var/lib/apt/lists/* diff --git a/modules/container-postgresql/src/main/docker/assembly.xml b/modules/container-postgresql/src/main/docker/assembly.xml new file mode 100644 index 00000000000..6ed4fcb6df4 --- /dev/null +++ b/modules/container-postgresql/src/main/docker/assembly.xml @@ -0,0 +1,10 @@ + + + + ${project.basedir}/target/ + postgresql + + + From 6e5d3f45c88a67a5a555d3f7a4d9f9137ee8373a Mon Sep 17 00:00:00 2001 From: Patrick Carlson Date: Wed, 11 Jan 2023 19:50:37 -0600 Subject: [PATCH 24/27] update docker-compose with prebuilt images from maven --- docker-compose.yml | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index a3cfad8b0c3..5c02bbf7e40 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -2,8 +2,7 @@ version: '3.5' services: postgres: - build: - context: ./conf/docker-compose/postgres/ + image: gdcc/postgresql:unstable #ports: # - 5432:5432 restart: unless-stopped @@ -23,8 +22,7 @@ services: retries: 5 solr: - build: - context: ./conf/docker-compose/solr/ + image: gdcc/solr:unstable restart: unless-stopped container_name: solr hostname: solr @@ -44,8 +42,7 @@ services: start_period: 30s rserve: - build: - context: ./conf/docker-compose/rserve/ + image: gdcc/rserve:unstable restart: unless-stopped container_name: rserve hostname: rserve @@ -57,8 +54,7 @@ services: # - 6311:6311 dataverse: - build: - context: . + image: gdcc/dataverse:unstable restart: unless-stopped container_name: dataverse hostname: dataverse @@ -67,6 +63,8 @@ services: - HOST_DNS_ADDRESS=${HOST_DNS_ADDRESS} - GLASSFISH_USER=${GLASSFISH_USER} - GLASSFISH_PASSWORD=${GLASSFISH_PASSWORD} + - GLASSFISH_ADMIN_USER=${GLASSFISH_ADMIN_USER} + - GLASSFISH_ADMIN_PASSWORD=${GLASSFISH_ADMIN_PASSWORD} - ADMIN_EMAIL=${ADMIN_EMAIL} - MAIL_SERVER=${MAIL_SERVER} - POSTGRES_ADMIN_PASSWORD=${POSTGRES_ADMIN_PASSWORD} @@ -97,8 +95,8 @@ services: volumes: # https://guides.dataverse.org/en/latest/installation/advanced.html # logos and sitemap are initially empty on the very first startup - - ./conf/docker-compose/dataverse-logos-bind:/usr/local/payara5/glassfish/domains/domain1/docroot/logos/ - - ./conf/docker-compose/dataverse-sitemap-bind:/usr/local/payara5/glassfish/domains/domain1/docroot/sitemap/ + - ./conf/docker-compose/dataverse-logos-bind:/opt/payara/appserver/glassfish/domains/domain1/docroot/logos/ + - ./conf/docker-compose/dataverse-sitemap-bind:/opt/payara/appserver/glassfish/domains/domain1/docroot/sitemap/ labels: - "traefik.enable=true" - "traefik.http.routers.dataverse.rule=Host(`localhost`)" @@ -115,8 +113,7 @@ services: # Traefik reverse proxy traefik: - build: - context: ./conf/docker-compose/traefik/ + image: gdcc/traefik:unstable container_name: traefik hostname: traefik environment: @@ -154,8 +151,7 @@ services: # alternatives include minio and others # https://github.com/chrislusf/seaweedfs/blob/master/docker/seaweedfs-compose.yml seaweedfs: - build: - context: ./conf/docker-compose/seaweedfs/ + image: gdcc/seaweedfs:unstable restart: unless-stopped container_name: seaweedfs hostname: seaweedfs From aeae7c6cb60d2ab51fc96e2793aa3c1ea4d95725 Mon Sep 17 00:00:00 2001 From: Patrick Carlson Date: Wed, 11 Jan 2023 19:53:23 -0600 Subject: [PATCH 25/27] update build script --- conf/docker-compose/README.md | 4 +++- conf/docker-compose/build-containers.sh | 29 +++++++++++++++++++++++++ conf/docker-compose/prepbuild.sh | 4 ---- 3 files changed, 32 insertions(+), 5 deletions(-) create mode 100755 conf/docker-compose/build-containers.sh delete mode 100755 conf/docker-compose/prepbuild.sh diff --git a/conf/docker-compose/README.md b/conf/docker-compose/README.md index d0f1069a1f4..3f0b37d7a83 100644 --- a/conf/docker-compose/README.md +++ b/conf/docker-compose/README.md @@ -4,6 +4,7 @@ * [docker-compose](https://docs.docker.com/compose/) * [Docker](https://docker.com) (or some other supported container engine) +* [Maven](https://maven.apache.org/) ## Setup @@ -26,7 +27,8 @@ Then copy the `traefik.key` and `traefik.crt` files into the `traefik` folder. ## Building -Run `prepbuild.sh` once +Run `build-containers.sh`. This will copy a few files and setup the build environment before +running maven builds for each of the container services. Pull and build the Docker containers diff --git a/conf/docker-compose/build-containers.sh b/conf/docker-compose/build-containers.sh new file mode 100755 index 00000000000..1d6b86dfd88 --- /dev/null +++ b/conf/docker-compose/build-containers.sh @@ -0,0 +1,29 @@ +#!/bin/bash + +# Solr XML schema files +cp ../solr/8.11.1/*.xml ../../modules/container-solr/src/main/docker/ + +# go back to git root directory +cd ../../ + +# prep Solr beforehand so it has the appropriate permissions +# 8983 is the UID hard-coded in the stock Solr Dockerfile +mkdir -p ./conf/docker-compose/solr-bind/ +sudo chown 8983:8983 ./conf/docker-compose/solr-bind/ + +# copy sourcecode and installer files over +cp pom.xml modules/container-dataverse/src/main/docker/ +cp -R ./src/ modules/container-dataverse/src/main/docker/src/ +cp -R ./modules/dataverse-parent/ modules/container-dataverse/src/main/docker/modules/dataverse-parent/ +cp -R ./scripts/ modules/container-dataverse/src/main/docker/scripts/ +cp -R ./conf/ modules/container-dataverse/src/main/docker/conf/ +cp -R ./local_lib/ modules/container-dataverse/src/main/docker/local_lib/ + +# build out each of the images +mvn -Pct -f modules/container-base clean install -Dmaven.test.skip -Ddocker.verbose=true +mvn -Pct -f modules/container-postgresql clean install -Dmaven.test.skip -Ddocker.verbose=true +mvn -Pct -f modules/container-rserve clean install -Dmaven.test.skip -Ddocker.verbose=true +mvn -Pct -f modules/container-seaweedfs clean install -Dmaven.test.skip -Ddocker.verbose=true +mvn -Pct -f modules/container-solr clean install -Dmaven.test.skip -Ddocker.verbose=true +mvn -Pct -f modules/container-traefik clean install -Dmaven.test.skip -Ddocker.verbose=true +mvn -Pct -f modules/container-dataverse clean install -Dmaven.test.skip -Ddocker.verbose=true diff --git a/conf/docker-compose/prepbuild.sh b/conf/docker-compose/prepbuild.sh deleted file mode 100755 index a7bdefd979f..00000000000 --- a/conf/docker-compose/prepbuild.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash - -cp ../solr/8.11.1/*.xml ./solr/ - From ae526a96dd11c4e448386856b54f9c7c2839bbd1 Mon Sep 17 00:00:00 2001 From: Patrick Carlson Date: Wed, 11 Jan 2023 19:55:55 -0600 Subject: [PATCH 26/27] add dataverse container using maven --- modules/container-dataverse/.gitignore | 1 + modules/container-dataverse/README.md | 52 ++ modules/container-dataverse/pom.xml | 143 ++++ .../src/main/docker/Dockerfile | 104 +++ .../src/main/docker/assembly.xml | 10 + .../src/main/docker/config | 4 + .../src/main/docker/credentials | 3 + .../src/main/docker/pom.xml | 787 ++++++++++++++++++ .../src/main/docker/postboot.sh | 16 + .../src/main/docker/preboot.sh | 60 ++ 10 files changed, 1180 insertions(+) create mode 100644 modules/container-dataverse/.gitignore create mode 100644 modules/container-dataverse/README.md create mode 100644 modules/container-dataverse/pom.xml create mode 100644 modules/container-dataverse/src/main/docker/Dockerfile create mode 100644 modules/container-dataverse/src/main/docker/assembly.xml create mode 100644 modules/container-dataverse/src/main/docker/config create mode 100644 modules/container-dataverse/src/main/docker/credentials create mode 100644 modules/container-dataverse/src/main/docker/pom.xml create mode 100644 modules/container-dataverse/src/main/docker/postboot.sh create mode 100644 modules/container-dataverse/src/main/docker/preboot.sh diff --git a/modules/container-dataverse/.gitignore b/modules/container-dataverse/.gitignore new file mode 100644 index 00000000000..d75620abf70 --- /dev/null +++ b/modules/container-dataverse/.gitignore @@ -0,0 +1 @@ +.flattened-pom.xml diff --git a/modules/container-dataverse/README.md b/modules/container-dataverse/README.md new file mode 100644 index 00000000000..3c7d648a109 --- /dev/null +++ b/modules/container-dataverse/README.md @@ -0,0 +1,52 @@ +# Dataverse Image + +The Dataverse Image contains the main Dataverse application. It uses the `container-base` image +as a starting point. + +## Quick Reference + +**Maintained by:** + +This image is created, maintained and supported by the Dataverse community on a best-effort basis. + +**Where to find documentation:** + +**Where to get help and ask questions:** + +IQSS will not offer support on how to deploy or run it. Please reach out to the community for help on using it. +You can join the Community Chat on Matrix at https://chat.dataverse.org or the Community Slack at +https://dataversecommunity.slack.com to ask for help and guidance. + +## Supported Image Tags + +This image is sourced within the main upstream code [repository of the Dataverse software](https://github.com/IQSS/dataverse). +Development and maintenance of the [image's code](https://github.com/IQSS/dataverse/tree/develop/modules/container-dataverse) +happens there (again, by the community). Community-supported image tags are based on the two most important branches: + +- The `unstable` tag corresponds to the `develop` branch, where pull requests are merged. + ([`Dockerfile`](https://github.com/IQSS/dataverse/tree/develop/modules/container-dataverse/src/main/docker/Dockerfile)) +- The `stable` tag corresponds to the `master` branch, where releases are cut from. + ([`Dockerfile`](https://github.com/IQSS/dataverse/tree/master/modules/container-dataverse/src/main/docker/Dockerfile)) + +Within the main repository, you may find the base image files at `/modules/container-dataverse`. +This Maven module uses the [Maven Docker Plugin](https://dmp.fabric8.io) to build and ship the image. +You may use, extend, or alter this image to your liking and/or host in some different registry if you want to. + +**Supported architectures:** This image is created as a "multi-arch image", supporting the most common architectures +Dataverse usually runs on: AMD64 (Windows/Linux/...) and ARM64 (Apple M1/M2). + +## License + +Image content created by the community is licensed under [Apache License, Version 2.0](https://www.apache.org/licenses/LICENSE-2.0), +like the [main Dataverse project](https://github.com/IQSS/dataverse/blob/develop/LICENSE.md). + +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and limitations under the License. + +As with all Docker images, all images likely also contain other software which may be under other licenses (such as +[Payara Server](https://github.com/payara/Payara/blob/master/LICENSE.txt), Bash, etc., from the base +distribution, along with any direct or indirect (Java) dependencies contained). + +As for any pre-built image usage, it is the image user's responsibility to ensure that any use of this image complies +with any relevant licenses for all software contained within. diff --git a/modules/container-dataverse/pom.xml b/modules/container-dataverse/pom.xml new file mode 100644 index 00000000000..b3a0fa3f365 --- /dev/null +++ b/modules/container-dataverse/pom.xml @@ -0,0 +1,143 @@ + + + 4.0.0 + + + edu.harvard.iq + dataverse-parent + ${revision} + ../dataverse-parent + + + io.gdcc + container-dataverse + ${packaging.type} + Container Dataverse + This module provides the main Dataverse application via a container. + + + + poikilotherm + Oliver Bertuch + github@bertuch.eu + Europe/Berlin + + maintainer + + + + + + + + pom + + + + + ct + + docker-build + gdcc/dataverse:${base.image.tag} + unstable + 1000 + 1000 + + + + + + + + io.fabric8 + docker-maven-plugin + true + + + + base + ${base.image} + + + + linux/arm64 + linux/amd64 + + ${project.build.directory}/buildx-state + + Dockerfile + + ${java.image} + ${base.image.uid} + ${base.image.gid} + + @ + + assembly.xml + + + + + + + + + + org.codehaus.mojo + flatten-maven-plugin + 1.2.7 + + true + oss + + remove + remove + + + + + + flatten + process-resources + + flatten + + + + + flatten.clean + clean + + clean + + + + + + + + maven-install-plugin + + + default-install + install + + install + + + + + + + + + diff --git a/modules/container-dataverse/src/main/docker/Dockerfile b/modules/container-dataverse/src/main/docker/Dockerfile new file mode 100644 index 00000000000..d41796a74d3 --- /dev/null +++ b/modules/container-dataverse/src/main/docker/Dockerfile @@ -0,0 +1,104 @@ +# this is built off the base container +FROM gdcc/base:unstable as builder + +ENV DEBIAN_FRONTEND noninteractive + +USER root + +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + openjdk-11-jdk maven ca-certificates && \ + apt-get -y upgrade && \ + rm -rf /var/lib/apt/lists/* + +RUN mkdir -p /dataverse + +# copy over sourcecode and build files needed to compile the .war +# as well as installer files +COPY pom.xml /dataverse/ +COPY modules /dataverse/modules/ +COPY local_lib /dataverse/local_lib/ + +# cache pom files and packages for compilation later +RUN cd /dataverse && mvn verify clean --fail-never + +COPY src /dataverse/src/ +COPY scripts /dataverse/scripts/ +COPY conf/jhove/ /dataverse/conf/jhove/ + +WORKDIR /dataverse + +# this can take some time to download all the dependencies +RUN export dpkgArch="$(dpkg --print-architecture)" && \ + export JAVA_HOME="/usr/lib/jvm/java-11-openjdk-${dpkgArch}" && \ + mvn package -DskipTests --no-transfer-progress -T 1C + +FROM gdcc/base:unstable + +ENV DEBIAN_FRONTEND noninteractive + +USER root + +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + python3-dev tzdata nano curl wget unzip jq imagemagick python3 python3-pip python3-psycopg2 wait-for-it ca-certificates && \ + apt-get -y upgrade && \ + rm -rf /var/lib/apt/lists/* + +RUN useradd --create-home --shell /bin/bash dataverse + +# install awscli +RUN pip3 install --no-cache-dir awscli + +# ENV JAVA_HOME "/usr/lib/jvm/java-11-openjdk-${ARCHITECTURE}" +# RUN export JAVA_HOME="$(dirname $(dirname $(readlink -f $(which java))))" + +# install Counter Processor +# https://guides.dataverse.org/en/latest/installation/prerequisites.html#counter-processor +# RUN cd /usr/local && \ +# wget https://github.com/CDLUC3/counter-processor/archive/refs/tags/v0.1.04.tar.gz && \ +# tar xvfz v0.1.04.tar.gz && \ +# rm v0.1.04.tar.gz && \ +# cd counter-processor-0.1.04 && \ +# pip3 install -r requirements.txt +# RUN useradd --create-home --shell /bin/bash counter && \ +# chown -R counter:counter /usr/local/counter-processor-0.1.04 + +WORKDIR / + +COPY --chown=dataverse:dataverse --from=builder /dataverse /dataverse + +# switch to non-root user as this is more secure +USER dataverse +WORKDIR / + +RUN mkdir -p /home/dataverse/.aws/ +COPY --chown=dataverse:dataverse ./config /home/dataverse/.aws/config +COPY --chown=dataverse:dataverse ./credentials /home/dataverse/.aws/credentials + +# add Payara to PATH +ENV PATH="${PATH}:${PAYARA_DIR}/bin" + +USER root +RUN cp -R /home/dataverse/.aws/ "${PAYARA_DIR}/glassfish/domains/${DOMAIN_NAME}" + +# set ownership so later the dataverse user can write to and make changes +RUN chown -R dataverse "${PAYARA_DIR}" + +COPY --chown=dataverse:dataverse ./preboot.sh /preboot.sh +RUN chmod +x /preboot.sh +COPY --chown=dataverse:dataverse ./postboot.sh /postboot.sh +RUN chmod +x /postboot.sh + +# auto-deploy the .war file +ENV DEPLOY_DIR /dataverse/target/ + +# pre and post boot commands +ENV PREBOOT_COMMANDS /preboot.sh +ENV POSTBOOT_COMMANDS /postboot.sh + +#USER dataverse +#CMD ["wait-for-it", "postgres:5432", "--", "/startup.sh"] + +# helpful for debugging purposes to just start up the container +# CMD ["tail", "-f", "/dev/null"] \ No newline at end of file diff --git a/modules/container-dataverse/src/main/docker/assembly.xml b/modules/container-dataverse/src/main/docker/assembly.xml new file mode 100644 index 00000000000..c7bfead1809 --- /dev/null +++ b/modules/container-dataverse/src/main/docker/assembly.xml @@ -0,0 +1,10 @@ + + + + ${project.basedir}/target/ + dataverse + + + diff --git a/modules/container-dataverse/src/main/docker/config b/modules/container-dataverse/src/main/docker/config new file mode 100644 index 00000000000..d1e10d78ee3 --- /dev/null +++ b/modules/container-dataverse/src/main/docker/config @@ -0,0 +1,4 @@ +[default] +region = us-east-1 +s3 = + signature_version = s3v4 \ No newline at end of file diff --git a/modules/container-dataverse/src/main/docker/credentials b/modules/container-dataverse/src/main/docker/credentials new file mode 100644 index 00000000000..64af5712826 --- /dev/null +++ b/modules/container-dataverse/src/main/docker/credentials @@ -0,0 +1,3 @@ +[default] +aws_access_key_id = secret +aws_secret_access_key = secret \ No newline at end of file diff --git a/modules/container-dataverse/src/main/docker/pom.xml b/modules/container-dataverse/src/main/docker/pom.xml new file mode 100644 index 00000000000..8b6f98c5896 --- /dev/null +++ b/modules/container-dataverse/src/main/docker/pom.xml @@ -0,0 +1,787 @@ + + + 4.0.0 + + + edu.harvard.iq + dataverse-parent + ${revision} + modules/dataverse-parent + + + + dataverse + war + dataverse + + false + 1.2.18.4 + 8.5.10 + 1.20.1 + 0.8.7 + 5.2.1 + 2.4.1 + 5.5.3 + + + + + + + + org.apache.abdera + abdera-core + 1.1.3 + + + org.apache.abdera + abdera-i18n + 1.1.3 + + + + + + + + + + org.slf4j + slf4j-jdk14 + runtime + + + + org.passay + passay + 1.6.0 + + + + + commons-httpclient + commons-httpclient + 3.1 + + + + + io.gdcc + sword2-server + 1.2.1 + + + + org.apache.abdera + abdera-core + + + + + org.apache.abdera + abdera-i18n + + + + + com.amazonaws + aws-java-sdk-s3 + + + + com.apicatalog + titanium-json-ld + 1.3.0-SNAPSHOT + + + com.google.code.gson + gson + 2.8.9 + compile + + + + com.fasterxml.jackson.core + jackson-core + provided + + + + com.fasterxml.jackson.core + jackson-databind + provided + + + + org.everit.json + org.everit.json.schema + 1.5.1 + + + org.mindrot + jbcrypt + 0.4 + + + org.postgresql + postgresql + + + org.flywaydb + flyway-core + ${flyway.version} + + + com.google.guava + guava + 29.0-jre + jar + + + org.eclipse.microprofile.config + microprofile-config-api + provided + + + jakarta.platform + jakarta.jakartaee-api + ${jakartaee-api.version} + provided + + + + org.glassfish + jakarta.json + provided + + + + com.sun.mail + jakarta.mail + provided + + + org.glassfish + jakarta.faces + provided + + + org.primefaces + primefaces + 11.0.0 + + + org.primefaces.themes + all-themes + 1.0.10 + + + org.omnifaces + omnifaces + 3.8 + + + + jakarta.validation + jakarta.validation-api + provided + + + org.hibernate.validator + hibernate-validator + provided + + + org.glassfish + jakarta.el + provided + + + + commons-io + commons-io + + + + org.apache.commons + commons-lang3 + + + + + org.apache.commons + commons-text + 1.10.0 + + + org.apache.commons + commons-math + 2.2 + + + commons-validator + commons-validator + 1.7 + + + + org.apache.solr + solr-solrj + 8.11.1 + + + colt + colt + 1.2.0 + + + + nom.tam.fits + fits + 2012-10-25-generated + + + net.handle + handle + 8.1.1 + + + + edu.harvard.iq.dvn + unf5 + 5.0 + + + + org.dataverse + unf + 6.0 + + + + + org.nuiton.thirdparty + REngine + 0.6-1 + + + org.nuiton.thirdparty + Rserve + 0.6-1 + + + + org.apache.poi + poi + ${poi.version} + + + org.apache.poi + poi-ooxml + ${poi.version} + + + org.apache.poi + poi-scratchpad + ${poi.version} + + + org.openpreservation.jhove + jhove-core + ${jhove.version} + + + org.openpreservation.jhove + jhove-modules + ${jhove.version} + + + org.openpreservation.jhove + jhove-ext-modules + ${jhove.version} + + + + com.github.jai-imageio + jai-imageio-core + 1.3.1 + + + org.ocpsoft.rewrite + rewrite-servlet + 3.5.0.Final + + + org.ocpsoft.rewrite + rewrite-config-prettyfaces + 3.5.0.Final + + + edu.ucsb.nceas + ezid + 1.0.0 + jar + + + org.jsoup + jsoup + 1.15.3 + + + io.searchbox + jest + 0.1.7 + + + commons-codec + commons-codec + 1.15 + + + + org.javaswift + joss + 0.10.0 + + + org.apache.commons + commons-csv + 1.2 + + + + com.github.scribejava + scribejava-apis + 6.9.0 + + + + com.nimbusds + oauth2-oidc-sdk + 9.41.1 + + + + io.gdcc + xoai-data-provider + ${gdcc.xoai.version} + + + io.gdcc + xoai-service-provider + ${gdcc.xoai.version} + + + + com.google.auto.service + auto-service + 1.0-rc2 + true + jar + + + + org.glassfish.jersey.containers + jersey-container-servlet + 2.23.2 + + + + org.glassfish.jersey.media + jersey-media-multipart + 2.23.2 + + + com.mashape.unirest + unirest-java + 1.4.9 + + + + org.apache.commons + commons-compress + + + + org.duracloud + common + 7.1.1 + + + org.slf4j + log4j-over-slf4j + + + ch.qos.logback + logback-classic + + + + + org.duracloud + storeclient + 7.1.1 + + + org.slf4j + log4j-over-slf4j + + + com.amazonaws + aws-java-sdk-sqs + + + ch.qos.logback + logback-classic + + + + + + org.apache.tika + tika-core + ${tika.version} + + + org.apache.tika + tika-parsers-standard-package + ${tika.version} + + + + org.apache.opennlp + opennlp-tools + 1.9.1 + + + com.google.cloud + google-cloud-storage + + + + + + com.auth0 + java-jwt + 3.19.1 + + + + io.github.erdtman + java-json-canonicalization + 1.1 + + + edu.ucar + cdm-core + ${netcdf.version} + + + + + org.junit.jupiter + junit-jupiter + ${junit.jupiter.version} + test + + + junit + junit + ${junit.version} + test + + + org.junit.vintage + junit-vintage-engine + ${junit.vintage.version} + test + + + org.hamcrest + hamcrest-library + 2.2 + test + + + org.assertj + assertj-core + 3.20.2 + test + + + org.xmlunit + xmlunit-assertj3 + 2.8.2 + test + + + com.jayway.restassured + rest-assured + 2.4.0 + test + + + org.skyscreamer + jsonassert + 1.5.0 + test + + + com.vaadin.external.google + android-json + + + + + org.testcontainers + testcontainers + test + + + org.testcontainers + junit-jupiter + test + + + org.testcontainers + postgresql + test + + + org.mockito + mockito-core + ${mockito.version} + test + + + org.mockito + mockito-junit-jupiter + ${mockito.version} + test + + + io.smallrye.config + smallrye-config + ${smallrye-mpconfig.version} + test + + + + + + + + src/main/java + + *.properties + **/*.properties + **/mime.types + **/*.R + + + + src/main/resources + + **/*.sql + **/*.xml + **/firstNames/*.* + **/*.xsl + **/services/* + + + + src/main/resources + + true + + **/*.properties + + + + + + org.apache.maven.plugins + maven-compiler-plugin + + ${target.java.version} + + ${compilerArgument} + + + + org.apache.maven.plugins + maven-jar-plugin + + + + true + true + + + + + + org.apache.maven.plugins + maven-war-plugin + + true + false + + + true + true + + + + + + de.qaware.maven + go-offline-maven-plugin + 1.2.1 + + + + + + + org.jacoco + jacoco-maven-plugin + ${jacoco.version} + + ${basedir}/target/coverage-reports/jacoco-unit.exec + ${basedir}/target/coverage-reports/jacoco-unit.exec + + + + jacoco-initialize + + prepare-agent + + + + jacoco-site + package + + report + + + + + + org.eluder.coveralls + coveralls-maven-plugin + 4.3.0 + + + javax.xml.bind + jaxb-api + 2.3.1 + + + + + org.apache.maven.plugins + maven-surefire-plugin + + + + ${testsToExclude} + ${skipUnitTests} + + + + org.apache.maven.plugins + maven-checkstyle-plugin + + checkstyle.xml + UTF-8 + true + + + + + + + dev + + + + true + + + edu.harvard.iq.dataverse.NonEssentialTests + + + + all-unit-tests + + + + tc + + true + 9.6 + + + + + org.apache.maven.plugins + maven-failsafe-plugin + ${maven-failsafe-plugin.version} + + testcontainers + + ${postgresql.server.version} + + + + + + integration-test + verify + + + + + + + + + diff --git a/modules/container-dataverse/src/main/docker/postboot.sh b/modules/container-dataverse/src/main/docker/postboot.sh new file mode 100644 index 00000000000..ac279b99a56 --- /dev/null +++ b/modules/container-dataverse/src/main/docker/postboot.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +# check if we should disable DOI validation +if [[ ! -z "${DISABLE_DOI}" ]] && [[ "true" = "${DISABLE_DOI}" ]]; then + echo "Disabling DOI validation" + curl -X PUT -d FAKE http://localhost:8080/api/admin/settings/:DoiProvider +fi + +# check if we should exclude emails from exports +if [[ ! -z "${EXCLUDE_EMAIL_EXPORTS}" ]] && [[ "true" = "${EXCLUDE_EMAIL_EXPORTS}" ]]; then + echo "Excluding emails in exports" + curl -X PUT -d true http://localhost:8080/api/admin/settings/:ExcludeEmailFromExport +fi + + +#wait-for-it localhost:8080 -- tail -f ${PAYARA_DIR}/glassfish/domains/${DOMAIN_NAME}/logs/server.log \ No newline at end of file diff --git a/modules/container-dataverse/src/main/docker/preboot.sh b/modules/container-dataverse/src/main/docker/preboot.sh new file mode 100644 index 00000000000..31c9f20f8fd --- /dev/null +++ b/modules/container-dataverse/src/main/docker/preboot.sh @@ -0,0 +1,60 @@ +#!/bin/bash + +# create the config file that has all our environment settings + +echo -e " +[glassfish] +HOST_DNS_ADDRESS=${HOST_DNS_ADDRESS} +GLASSFISH_USER = ${GLASSFISH_USER} +GLASSFISH_DIRECTORY = ${PAYARA_DIR} +GLASSFISH_ADMIN_USER = ${GLASSFISH_ADMIN_USER} +GLASSFISH_ADMIN_PASSWORD = ${GLASSFISH_ADMIN_PASSWORD} +GLASSFISH_HEAP = 2048 +GLASSFISH_REQUEST_TIMEOUT = 1800 + +[database] +POSTGRES_ADMIN_PASSWORD=${POSTGRES_ADMIN_PASSWORD} +POSTGRES_SERVER=${POSTGRES_SERVER} +POSTGRES_PORT=${POSTGRES_PORT} +POSTGRES_DATABASE=${POSTGRES_DATABASE} +POSTGRES_PASSWORD=${POSTGRES_PASSWORD} +POSTGRES_USER=${POSTGRES_USER} + +[system] +ADMIN_EMAIL=${ADMIN_EMAIL} +MAIL_SERVER=${MAIL_SERVER} +SOLR_LOCATION=${SOLR_LOCATION} + +[rserve] +RSERVE_HOST=${RSERVE_HOST} +RSERVE_PORT=${RSERVE_PORT} +RSERVE_USER=${RSERVE_USER} +RSERVE_PASSWORD=${RSERVE_PASSWORD} + +[doi] +DOI_USERNAME = dataciteuser +DOI_PASSWORD = datacitepassword +DOI_BASEURL = https://mds.test.datacite.org +DOI_DATACITERESTAPIURL = https://api.test.datacite.org +" > /dataverse/scripts/installer/default.config + +# https://github.com/poikilotherm/dataverse/blob/ct-mvn-mod/modules/container-base/src/main/docker/Dockerfile +# https://guides.dataverse.org/en/latest/installation/config.html#amazon-s3-storage-or-compatible +# set s3 storage settings +if ! grep -q "Ddataverse.files.s3.type=s3" "${PAYARA_DIR}/glassfish/domains/${DOMAIN_NAME}/config/domain.xml"; then + # use : as delimiter + sed -i "s::-Ddataverse.files.s3.type=s3\n:" ${PAYARA_DIR}/glassfish/domains/${DOMAIN_NAME}/config/domain.xml + sed -i "s::-Ddataverse.files.s3.label=s3\n:" ${PAYARA_DIR}/glassfish/domains/${DOMAIN_NAME}/config/domain.xml + sed -i "s::-Ddataverse.files.s3.access-key=${S3_ACCESS_KEY}\n:" ${PAYARA_DIR}/glassfish/domains/${DOMAIN_NAME}/config/domain.xml + sed -i "s::-Ddataverse.files.s3.secret-key=${S3_SECRET_KEY}\n:" ${PAYARA_DIR}/glassfish/domains/${DOMAIN_NAME}/config/domain.xml + sed -i "s::-Ddataverse.files.s3.custom-endpoint-url=http\:\/\/seaweedfs\:8333\n:" ${PAYARA_DIR}/glassfish/domains/${DOMAIN_NAME}/config/domain.xml + # keep this as dataverse as it's hardcoded elsewhere + sed -i "s::-Ddataverse.files.s3.bucket-name=dataverse\n:" ${PAYARA_DIR}/glassfish/domains/${DOMAIN_NAME}/config/domain.xml + sed -i "s::-Ddataverse.files.s3.custom-endpoint-region=us-east-1\n:" ${PAYARA_DIR}/glassfish/domains/${DOMAIN_NAME}/config/domain.xml + # # Use path style buckets instead of subdomains + sed -i "s::-Ddataverse.files.s3.path-style-access=true\n:" ${PAYARA_DIR}/glassfish/domains/${DOMAIN_NAME}/config/domain.xml +fi + +# create an empty s3 bucket in seaweedfs if it doesn't already exist +curl -X POST "http://seaweedfs:8888/buckets/" +curl -X POST "http://seaweedfs:8888/buckets/dataverse/" From 134695d3ba2262ac93f66b02bc5a33875a63df31 Mon Sep 17 00:00:00 2001 From: Patrick Carlson Date: Wed, 18 Jan 2023 19:53:17 -0600 Subject: [PATCH 27/27] delete some unnecessary code to simplify --- .../src/main/docker/Dockerfile | 65 +------------------ 1 file changed, 2 insertions(+), 63 deletions(-) diff --git a/modules/container-dataverse/src/main/docker/Dockerfile b/modules/container-dataverse/src/main/docker/Dockerfile index d41796a74d3..ace5f86fce2 100644 --- a/modules/container-dataverse/src/main/docker/Dockerfile +++ b/modules/container-dataverse/src/main/docker/Dockerfile @@ -1,86 +1,26 @@ # this is built off the base container -FROM gdcc/base:unstable as builder - -ENV DEBIAN_FRONTEND noninteractive +FROM gdcc/base:unstable USER root -RUN apt-get update && \ - apt-get install -y --no-install-recommends \ - openjdk-11-jdk maven ca-certificates && \ - apt-get -y upgrade && \ - rm -rf /var/lib/apt/lists/* - RUN mkdir -p /dataverse -# copy over sourcecode and build files needed to compile the .war -# as well as installer files COPY pom.xml /dataverse/ COPY modules /dataverse/modules/ COPY local_lib /dataverse/local_lib/ - -# cache pom files and packages for compilation later -RUN cd /dataverse && mvn verify clean --fail-never - -COPY src /dataverse/src/ COPY scripts /dataverse/scripts/ COPY conf/jhove/ /dataverse/conf/jhove/ -WORKDIR /dataverse - -# this can take some time to download all the dependencies -RUN export dpkgArch="$(dpkg --print-architecture)" && \ - export JAVA_HOME="/usr/lib/jvm/java-11-openjdk-${dpkgArch}" && \ - mvn package -DskipTests --no-transfer-progress -T 1C - -FROM gdcc/base:unstable - -ENV DEBIAN_FRONTEND noninteractive - -USER root - -RUN apt-get update && \ - apt-get install -y --no-install-recommends \ - python3-dev tzdata nano curl wget unzip jq imagemagick python3 python3-pip python3-psycopg2 wait-for-it ca-certificates && \ - apt-get -y upgrade && \ - rm -rf /var/lib/apt/lists/* - RUN useradd --create-home --shell /bin/bash dataverse -# install awscli -RUN pip3 install --no-cache-dir awscli - -# ENV JAVA_HOME "/usr/lib/jvm/java-11-openjdk-${ARCHITECTURE}" -# RUN export JAVA_HOME="$(dirname $(dirname $(readlink -f $(which java))))" - -# install Counter Processor -# https://guides.dataverse.org/en/latest/installation/prerequisites.html#counter-processor -# RUN cd /usr/local && \ -# wget https://github.com/CDLUC3/counter-processor/archive/refs/tags/v0.1.04.tar.gz && \ -# tar xvfz v0.1.04.tar.gz && \ -# rm v0.1.04.tar.gz && \ -# cd counter-processor-0.1.04 && \ -# pip3 install -r requirements.txt -# RUN useradd --create-home --shell /bin/bash counter && \ -# chown -R counter:counter /usr/local/counter-processor-0.1.04 - -WORKDIR / - -COPY --chown=dataverse:dataverse --from=builder /dataverse /dataverse - # switch to non-root user as this is more secure USER dataverse WORKDIR / -RUN mkdir -p /home/dataverse/.aws/ -COPY --chown=dataverse:dataverse ./config /home/dataverse/.aws/config -COPY --chown=dataverse:dataverse ./credentials /home/dataverse/.aws/credentials - # add Payara to PATH ENV PATH="${PATH}:${PAYARA_DIR}/bin" USER root -RUN cp -R /home/dataverse/.aws/ "${PAYARA_DIR}/glassfish/domains/${DOMAIN_NAME}" # set ownership so later the dataverse user can write to and make changes RUN chown -R dataverse "${PAYARA_DIR}" @@ -97,8 +37,7 @@ ENV DEPLOY_DIR /dataverse/target/ ENV PREBOOT_COMMANDS /preboot.sh ENV POSTBOOT_COMMANDS /postboot.sh -#USER dataverse -#CMD ["wait-for-it", "postgres:5432", "--", "/startup.sh"] +USER dataverse # helpful for debugging purposes to just start up the container # CMD ["tail", "-f", "/dev/null"] \ No newline at end of file