diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 00000000000..d26c7363ca0 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,7 @@ +/.git/ +/.github/ +/conf/docker-compose/postgres-bind/ +/conf/docker-compose/solr-bind/ +/conf/docker-compose/seaweedfs-bind/ +/conf/docker-compose/dataverse-docroot-bind/ +/conf/docker-compose/dataverse-logos-bind/ \ No newline at end of file diff --git a/.env b/.env new file mode 100644 index 00000000000..b9aee36f6ed --- /dev/null +++ b/.env @@ -0,0 +1,42 @@ +# timezone +# https://en.wikipedia.org/wiki/List_of_tz_database_time_zones +TZ="America/Denver" + +# dataverse service +HOST_DNS_ADDRESS=dataverse +GLASSFISH_USER=dataverse +GLASSFISH_PASSWORD=secret +GLASSFISH_ADMIN_USER=admin +GLASSFISH_ADMIN_PASSWORD=secret +ADMIN_EMAIL=noreply@mydomain.com +MAIL_SERVER=localhost +POSTGRES_ADMIN_PASSWORD=secret +POSTGRES_SERVER=postgres +POSTGRES_PORT=5432 +POSTGRES_DATABASE=dataverse +POSTGRES_PASSWORD=secret +POSTGRES_USER=dataverse +SOLR_LOCATION=solr:8983 +RSERVE_HOST=rserve +RSERVE_PORT=6311 +# the rserve credentials are hardcoded in the Dockerfile, edit both if you want to change them +RSERVE_USER=rserve +RSERVE_PASSWORD=rserve + +# disable DOI validation checks, true or false, set this to true for your development environment +DISABLE_DOI=true + +# exclude emails from exports +# https://guides.dataverse.org/en/latest/installation/config.html#excludeemailfromexport +EXCLUDE_EMAIL_EXPORTS=true + +# s3 keys +S3_ACCESS_KEY=secret +S3_SECRET_KEY=secret + +# fully qualified domain name (FQDN) and site URL +# recommend keeping this as dataverse because it's used internally for routing within Docker +# if you change this, s3 storage will break +DATAVERSE_FQDN=dataverse +# make sure to escape characters like : +DATAVERSE_SITE_URL=http\://localhost diff --git a/Dockerfile b/Dockerfile index b0864a0c55f..7f06fb9c83c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1 +1,88 @@ -# See http://guides.dataverse.org/en/latest/developers/containers.html +# https://hub.docker.com/_/ubuntu +FROM ubuntu:22.04 + +ENV DEBIAN_FRONTEND noninteractive + +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + gcc python3-dev tzdata nano dos2unix curl wget openjdk-11-jdk maven unzip jq imagemagick python3 python3-pip python3-psycopg2 wait-for-it ca-certificates && \ + apt-get -y upgrade && \ + rm -rf /var/lib/apt/lists/* + +WORKDIR / + +RUN useradd --create-home --shell /bin/bash dataverse + +# https://guides.dataverse.org/en/5.8/installation/prerequisites.html +RUN wget https://s3-eu-west-1.amazonaws.com/payara.fish/Payara+Downloads/5.2022.3/payara-5.2022.3.zip + +RUN unzip payara-5.2022.3.zip && \ + mv payara5 /usr/local && \ + rm payara-5.2022.3.zip + +RUN chown -R root:root /usr/local/payara5 && \ + chown dataverse /usr/local/payara5/glassfish/lib && \ + chown -R dataverse:dataverse /usr/local/payara5/glassfish/domains/domain1 + +# ENV JAVA_HOME "/usr/lib/jvm/java-11-openjdk-${ARCHITECTURE}" +# RUN export JAVA_HOME="$(dirname $(dirname $(readlink -f $(which java))))" + +# install Counter Processor +# https://guides.dataverse.org/en/latest/installation/prerequisites.html#counter-processor +RUN cd /usr/local && \ + wget https://github.com/CDLUC3/counter-processor/archive/refs/tags/v0.1.04.tar.gz && \ + tar xvfz v0.1.04.tar.gz && \ + rm v0.1.04.tar.gz && \ + cd counter-processor-0.1.04 && \ + pip3 install -r requirements.txt + +RUN useradd --create-home --shell /bin/bash counter && \ + chown -R counter:counter /usr/local/counter-processor-0.1.04 + +# install awscli +RUN pip3 install --no-cache-dir awscli + +# switch to non-root user as this is more secure +USER dataverse +WORKDIR / + +RUN mkdir -p /home/dataverse/.aws/ +COPY --chown=dataverse:dataverse ./conf/docker-compose/dataverse/config /home/dataverse/.aws/config +COPY --chown=dataverse:dataverse ./conf/docker-compose/dataverse/credentials /home/dataverse/.aws/credentials + +RUN cp -R /home/dataverse/.aws/ /usr/local/payara5/glassfish/domains/domain1/ + +# if you want to speed up the Maven build you can copy over +# cached packages here +# COPY --chown=dataverse:dataverse ./conf/docker-compose/dataverse/.m2/ /home/dataverse/.m2/ + +# copy over sourcecode and build files needed to compile the .war +# as well as installer files +COPY --chown=dataverse:dataverse pom.xml /dataverse/ +COPY --chown=dataverse:dataverse src /dataverse/src/ +COPY --chown=dataverse:dataverse modules /dataverse/modules/ +COPY --chown=dataverse:dataverse scripts /dataverse/scripts/ +COPY --chown=dataverse:dataverse conf/jhove/ /dataverse/conf/jhove/ +COPY --chown=dataverse:dataverse local_lib /dataverse/local_lib/ + +# this likely isn't needed on Linux but was needed on a Windows build +RUN find /dataverse -type f -print0 | xargs -0 -n 1 -P 4 dos2unix + +# this can take some time to download all the dependencies +RUN cd /dataverse/ && \ + export dpkgArch="$(dpkg --print-architecture)" && \ + export JAVA_HOME="/usr/lib/jvm/java-11-openjdk-${dpkgArch}" && \ + mvn package -DskipTests --no-transfer-progress + +# delete the cached dependencies so we don't get any inaccurate "false flags" on container scanning for security issues +RUN rm -rf ~/.m2/ + +USER root +COPY --chown=dataverse:dataverse ./conf/docker-compose/dataverse/startup.sh /startup.sh +RUN chmod +x /startup.sh && dos2unix /startup.sh + +USER dataverse +CMD ["wait-for-it", "postgres:5432", "--", "/startup.sh"] + +# helpful for debugging purposes to just start up the container +# CMD ["tail", "-f", "/dev/null"] \ No newline at end of file diff --git a/conf/docker-compose/.gitignore b/conf/docker-compose/.gitignore new file mode 100644 index 00000000000..69fdcfd2255 --- /dev/null +++ b/conf/docker-compose/.gitignore @@ -0,0 +1,9 @@ +/solr/*.xml +/dataverse/.m2/ +/postgres-bind/ +/solr-bind/ +/seaweedfs-bind/ +/dataverse-docroot-bind/ +/dataverse-logos-bind/ +/traefik/traefik.key +/traefik/traefik.crt \ No newline at end of file diff --git a/conf/docker-compose/README.md b/conf/docker-compose/README.md new file mode 100644 index 00000000000..3f0b37d7a83 --- /dev/null +++ b/conf/docker-compose/README.md @@ -0,0 +1,98 @@ +# docker-compose version of Dataverse + +## Requirements + +* [docker-compose](https://docs.docker.com/compose/) +* [Docker](https://docker.com) (or some other supported container engine) +* [Maven](https://maven.apache.org/) + +## Setup + +Edit the `.env` file as needed. Make sure to properly secure it in terms of file permissions. + +Edit `./seaweedfs/config.json` and enter your [credential keys](https://github.com/chrislusf/seaweedfs/wiki/Amazon-S3-API#static-configuration) for s3 storage. + +If you're running locally and don't have a key, you'll +need to generate it yourself with something like Git Bash. Make sure +[Posix to Windows path conversion](https://github.com/git-for-windows/git/issues/577#issuecomment-166118846) doesn't +take place with the forward slashes using `MSYS_NO_PATHCONV=1` if you're on Windows. + +```shell +MSYS_NO_PATHCONV=1 openssl req -x509 -nodes -days 4096 -newkey rsa:4096 -out traefik.crt -keyout traefik.key -subj "/C=US/ST=New Mexico/L=ABQ/O=Local/CN=127.0.0.1" -addext "subjectAltName = IP:127.0.0.1" +``` + +Or grab your public/private keys from your sysadmin or provider and rename them to `traefik.key` and `traefik.crt`. + +Then copy the `traefik.key` and `traefik.crt` files into the `traefik` folder. + +## Building + +Run `build-containers.sh`. This will copy a few files and setup the build environment before +running maven builds for each of the container services. + +Pull and build the Docker containers + +```shell +# this uses Compose v2, if you're on an older version you may +# need to change this call to docker-compose +docker compose pull +docker compose build +``` + +## Deploying + +```shell +docker-compose up -d +``` + +Note that this can take a couple minutes to start up. Wait until it shows `healthy` as the status. + +For the bind mounts (see `docker-compose.yml`) you may need to set the permissions +on those folders `*-bind` so they can be written from within the containers. Alternatively, +you can create local users or do UID/GID mappings. + +```shell +docker ps +``` + +Then go to the following URL in your browser: + +[https://localhost](https://localhost) + +Default credentials for login are: + +* username: `dataverseAdmin` +* password: `admin` + +Make sure to change this password right away. + +## How It Works + +* Builds a copy of the `.war` deployable code from source +* Stands up various services and pieces needed: + * seaweedfs - for s3 storage + * traefik - reverse proxy, HTTP is re-routed automatically to HTTPS + * postgres - database backend + * solr - text indexing database + * rserve - R server for running R commands + * dataverse - the main Dataverse web application +* sets up two storage options, one is the default `=files` for local storage +and the other is `=s3`for s3 storage + +## Uninstall / Teardown + +```shell +docker-compose down -v +``` + +## Development References + +There are many community led efforts to utilize containers, Kubernetes, and more to help automate +and setup Dataverse. + +* [https://github.com/fzappa/rocky-dataverse/blob/main/rocky-dataverse.sh](https://github.com/fzappa/rocky-dataverse/blob/main/rocky-dataverse.sh) +* [https://github.com/IQSS/dataverse/tree/develop/conf/docker-aio](https://github.com/IQSS/dataverse/tree/develop/conf/docker-aio) +* [https://github.com/gdcc/dataverse-kubernetes/blob/develop/docker-compose.yaml](https://github.com/gdcc/dataverse-kubernetes/blob/develop/docker-compose.yaml) +* [https://github.com/gdcc/dataverse-kubernetes](https://github.com/gdcc/dataverse-kubernetes) +* [https://github.com/EOSC-synergy/dataverse-kubernetes](https://github.com/EOSC-synergy/dataverse-kubernetes) +* [https://github.com/IQSS/dataverse-docker](https://github.com/IQSS/dataverse-docker) diff --git a/conf/docker-compose/build-containers.sh b/conf/docker-compose/build-containers.sh new file mode 100755 index 00000000000..1d6b86dfd88 --- /dev/null +++ b/conf/docker-compose/build-containers.sh @@ -0,0 +1,29 @@ +#!/bin/bash + +# Solr XML schema files +cp ../solr/8.11.1/*.xml ../../modules/container-solr/src/main/docker/ + +# go back to git root directory +cd ../../ + +# prep Solr beforehand so it has the appropriate permissions +# 8983 is the UID hard-coded in the stock Solr Dockerfile +mkdir -p ./conf/docker-compose/solr-bind/ +sudo chown 8983:8983 ./conf/docker-compose/solr-bind/ + +# copy sourcecode and installer files over +cp pom.xml modules/container-dataverse/src/main/docker/ +cp -R ./src/ modules/container-dataverse/src/main/docker/src/ +cp -R ./modules/dataverse-parent/ modules/container-dataverse/src/main/docker/modules/dataverse-parent/ +cp -R ./scripts/ modules/container-dataverse/src/main/docker/scripts/ +cp -R ./conf/ modules/container-dataverse/src/main/docker/conf/ +cp -R ./local_lib/ modules/container-dataverse/src/main/docker/local_lib/ + +# build out each of the images +mvn -Pct -f modules/container-base clean install -Dmaven.test.skip -Ddocker.verbose=true +mvn -Pct -f modules/container-postgresql clean install -Dmaven.test.skip -Ddocker.verbose=true +mvn -Pct -f modules/container-rserve clean install -Dmaven.test.skip -Ddocker.verbose=true +mvn -Pct -f modules/container-seaweedfs clean install -Dmaven.test.skip -Ddocker.verbose=true +mvn -Pct -f modules/container-solr clean install -Dmaven.test.skip -Ddocker.verbose=true +mvn -Pct -f modules/container-traefik clean install -Dmaven.test.skip -Ddocker.verbose=true +mvn -Pct -f modules/container-dataverse clean install -Dmaven.test.skip -Ddocker.verbose=true diff --git a/conf/docker-compose/dataverse/config b/conf/docker-compose/dataverse/config new file mode 100644 index 00000000000..d1e10d78ee3 --- /dev/null +++ b/conf/docker-compose/dataverse/config @@ -0,0 +1,4 @@ +[default] +region = us-east-1 +s3 = + signature_version = s3v4 \ No newline at end of file diff --git a/conf/docker-compose/dataverse/credentials b/conf/docker-compose/dataverse/credentials new file mode 100644 index 00000000000..64af5712826 --- /dev/null +++ b/conf/docker-compose/dataverse/credentials @@ -0,0 +1,3 @@ +[default] +aws_access_key_id = secret +aws_secret_access_key = secret \ No newline at end of file diff --git a/conf/docker-compose/dataverse/startup.sh b/conf/docker-compose/dataverse/startup.sh new file mode 100644 index 00000000000..a6daf18d143 --- /dev/null +++ b/conf/docker-compose/dataverse/startup.sh @@ -0,0 +1,83 @@ +#!/bin/bash + +export dpkgArch="$(dpkg --print-architecture)" +export JAVA_HOME="/usr/lib/jvm/java-11-openjdk-${dpkgArch}" + +# create the config file that has all our environment settings + +echo -e " +[glassfish] +HOST_DNS_ADDRESS=${HOST_DNS_ADDRESS} +GLASSFISH_USER = ${GLASSFISH_USER} +GLASSFISH_DIRECTORY = /usr/local/payara5/ +GLASSFISH_ADMIN_USER = ${GLASSFISH_ADMIN_USER} +GLASSFISH_ADMIN_PASSWORD = ${GLASSFISH_ADMIN_PASSWORD} +GLASSFISH_HEAP = 2048 +GLASSFISH_REQUEST_TIMEOUT = 1800 + +[database] +POSTGRES_ADMIN_PASSWORD=${POSTGRES_ADMIN_PASSWORD} +POSTGRES_SERVER=${POSTGRES_SERVER} +POSTGRES_PORT=${POSTGRES_PORT} +POSTGRES_DATABASE=${POSTGRES_DATABASE} +POSTGRES_PASSWORD=${POSTGRES_PASSWORD} +POSTGRES_USER=${POSTGRES_USER} + +[system] +ADMIN_EMAIL=${ADMIN_EMAIL} +MAIL_SERVER=${MAIL_SERVER} +SOLR_LOCATION=${SOLR_LOCATION} + +[rserve] +RSERVE_HOST=${RSERVE_HOST} +RSERVE_PORT=${RSERVE_PORT} +RSERVE_USER=${RSERVE_USER} +RSERVE_PASSWORD=${RSERVE_PASSWORD} + +[doi] +DOI_USERNAME = dataciteuser +DOI_PASSWORD = datacitepassword +DOI_BASEURL = https://mds.test.datacite.org +DOI_DATACITERESTAPIURL = https://api.test.datacite.org +" > /dataverse/scripts/installer/default.config + +# https://github.com/poikilotherm/dataverse/blob/ct-mvn-mod/modules/container-base/src/main/docker/Dockerfile +# https://guides.dataverse.org/en/latest/installation/config.html#amazon-s3-storage-or-compatible +# set s3 storage settings +if ! grep -q "Ddataverse.files.s3.type=s3" "/usr/local/payara5/glassfish/domains/domain1/config/domain.xml"; then + # use : as delimiter + sed -i "s::-Ddataverse.files.s3.type=s3\n:" /usr/local/payara5/glassfish/domains/domain1/config/domain.xml + sed -i "s::-Ddataverse.files.s3.label=s3\n:" /usr/local/payara5/glassfish/domains/domain1/config/domain.xml + sed -i "s::-Ddataverse.files.s3.access-key=${S3_ACCESS_KEY}\n:" /usr/local/payara5/glassfish/domains/domain1/config/domain.xml + sed -i "s::-Ddataverse.files.s3.secret-key=${S3_SECRET_KEY}\n:" /usr/local/payara5/glassfish/domains/domain1/config/domain.xml + sed -i "s::-Ddataverse.files.s3.custom-endpoint-url=http\:\/\/seaweedfs\:8333\n:" /usr/local/payara5/glassfish/domains/domain1/config/domain.xml + # keep this as dataverse as it's hardcoded elsewhere + sed -i "s::-Ddataverse.files.s3.bucket-name=dataverse\n:" /usr/local/payara5/glassfish/domains/domain1/config/domain.xml + sed -i "s::-Ddataverse.files.s3.custom-endpoint-region=us-east-1\n:" /usr/local/payara5/glassfish/domains/domain1/config/domain.xml + # # Use path style buckets instead of subdomains + sed -i "s::-Ddataverse.files.s3.path-style-access=true\n:" /usr/local/payara5/glassfish/domains/domain1/config/domain.xml +fi + +cd /dataverse/scripts/installer/ + +# the installer needs to run from within the directory, it cannot be run from / for example +# this can take some time to run, be patient +python3 install.py --noninteractive --force + +# check if we should disable DOI validation +if [[ ! -z "${DISABLE_DOI}" ]] && [[ "true" = "${DISABLE_DOI}" ]]; then + echo "Disabling DOI validation" + curl -X PUT -d FAKE http://localhost:8080/api/admin/settings/:DoiProvider +fi + +# check if we should exclude emails from exports +if [[ ! -z "${EXCLUDE_EMAIL_EXPORTS}" ]] && [[ "true" = "${EXCLUDE_EMAIL_EXPORTS}" ]]; then + echo "Excluding emails in exports" + curl -X PUT -d true http://localhost:8080/api/admin/settings/:ExcludeEmailFromExport +fi + +# create an empty s3 bucket in seaweedfs if it doesn't already exist +curl -X POST "http://seaweedfs:8888/buckets/" +curl -X POST "http://seaweedfs:8888/buckets/dataverse/" + +wait-for-it localhost:8080 -- tail -f /usr/local/payara5/glassfish/domains/domain1/logs/server.log \ No newline at end of file diff --git a/conf/docker-compose/postgres/Dockerfile b/conf/docker-compose/postgres/Dockerfile new file mode 100644 index 00000000000..81d896cc87d --- /dev/null +++ b/conf/docker-compose/postgres/Dockerfile @@ -0,0 +1,6 @@ +# https://hub.docker.com/_/postgres +FROM postgres:14 + +RUN apt-get update && \ + apt-get -y upgrade && \ + rm -rf /var/lib/apt/lists/* diff --git a/conf/docker-compose/rserve/Dockerfile b/conf/docker-compose/rserve/Dockerfile new file mode 100644 index 00000000000..8817882a69b --- /dev/null +++ b/conf/docker-compose/rserve/Dockerfile @@ -0,0 +1,23 @@ +# https://hub.docker.com/_/ubuntu +FROM ubuntu:22.04 + +ENV DEBIAN_FRONTEND noninteractive + +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + tzdata nano r-base r-base-dev libssl-dev libnlopt-dev libcurl4-openssl-dev ca-certificates && \ + apt-get -y upgrade && \ + rm -rf /var/lib/apt/lists/* + +# multi-thread the building of the libraries from source via NCPUS=X +RUN R -e "install.packages(c('R2HTML', 'rjson', 'DescTools', 'Rserve', 'haven'), NCPUS=4)" + +# this is the user and password that Dataverse will connect to the Rserve server +RUN echo "rserve rserve" > /rserve.pwd + +WORKDIR / + +EXPOSE 6311/tcp + +# https://github.com/ubc/r-docker/blob/master/Dockerfile +CMD ["R", "-e", "Rserve::run.Rserve(remote=TRUE, auth=TRUE, pwdfile='/rserve.pwd', daemon=FALSE, port=6311, fileio=TRUE, maxinbuf=262144)"] \ No newline at end of file diff --git a/conf/docker-compose/seaweedfs/Dockerfile b/conf/docker-compose/seaweedfs/Dockerfile new file mode 100644 index 00000000000..c000090ba6e --- /dev/null +++ b/conf/docker-compose/seaweedfs/Dockerfile @@ -0,0 +1,15 @@ +# https://hub.docker.com/r/chrislusf/seaweedfs +FROM chrislusf/seaweedfs:latest + +USER root + +# use HTTP connection for adding the needed ca-certificates package +# install it and then swap back to HTTPS +RUN sed -i 's,https,http,g' /etc/apk/repositories && \ + apk add --no-cache ca-certificates && \ + update-ca-certificates && \ + sed -i 's,http,https,g' /etc/apk/repositories && \ + apk update && apk upgrade && apk add --no-cache curl bash + +# https://github.com/chrislusf/seaweedfs/wiki/Amazon-S3-API#static-configuration +COPY config.json /config.json diff --git a/conf/docker-compose/seaweedfs/config.json b/conf/docker-compose/seaweedfs/config.json new file mode 100644 index 00000000000..ad18955f042 --- /dev/null +++ b/conf/docker-compose/seaweedfs/config.json @@ -0,0 +1,19 @@ +{ + "identities": [ + { + "name": "anonymous", + "credentials": [ + { + "accessKey": "secret", + "secretKey": "secret" + } + ], + "actions": [ + "Read:dataverse", + "List:dataverse", + "Tagging:dataverse", + "Write:dataverse" + ] + } + ] +} \ No newline at end of file diff --git a/conf/docker-compose/solr/Dockerfile b/conf/docker-compose/solr/Dockerfile new file mode 100644 index 00000000000..c64aa52927f --- /dev/null +++ b/conf/docker-compose/solr/Dockerfile @@ -0,0 +1,29 @@ +# https://hub.docker.com/_/solr +FROM solr:8.11.2 + +# https://guides.dataverse.org/en/latest/installation/prerequisites.html#solr + +USER root + +RUN apt-get update && \ + apt-get -y upgrade && \ + rm -rf /var/lib/apt/lists/* + +COPY *.xml / + +# increase the number of file descriptors and max processes +RUN echo "solr soft nproc 65000" >> /etc/security/limits.conf && \ + echo "solr hard nproc 65000" >> /etc/security/limits.conf && \ + echo "solr soft nofile 65000" >> /etc/security/limits.conf && \ + echo "solr hard nofile 65000" >> /etc/security/limits.conf + +# increase Header size +RUN sed -i "s/name=\"solr.jetty.request.header.size\" default=\"8192\"/name=\"solr.jetty.request.header.size\" default=\"102400\"/g" /opt/solr/server/etc/jetty.xml + +COPY --chown=solr:solr startup.sh /startup.sh +RUN chmod +x /startup.sh + +# switch back to normal runtime user for security purposes +USER solr + +CMD ["/startup.sh"] \ No newline at end of file diff --git a/conf/docker-compose/solr/startup.sh b/conf/docker-compose/solr/startup.sh new file mode 100644 index 00000000000..aced1a47ed4 --- /dev/null +++ b/conf/docker-compose/solr/startup.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +mkdir -p /var/solr/data/collection1/ +cp -R /opt/solr/server/solr/configsets/_default/conf /var/solr/data/collection1 +cp /*.xml /var/solr/data/collection1/conf/ + +# create collection on startup +echo "name=collection1" > /var/solr/data/collection1/core.properties + +solr-foreground diff --git a/conf/docker-compose/traefik/Dockerfile b/conf/docker-compose/traefik/Dockerfile new file mode 100644 index 00000000000..58cbf466a50 --- /dev/null +++ b/conf/docker-compose/traefik/Dockerfile @@ -0,0 +1,18 @@ +# https://hub.docker.com/_/traefik +FROM traefik:v2.8 + +# Add certs to OS truststore +COPY *.crt /usr/local/share/ca-certificates/ +# private key +COPY *.key /usr/local/share/ca-certificates/ + +# use HTTP connection for adding the needed ca-certificates package +# install it and then swap back to HTTPS +RUN sed -i 's,https,http,g' /etc/apk/repositories && \ + apk add --no-cache ca-certificates && \ + update-ca-certificates && \ + sed -i 's,http,https,g' /etc/apk/repositories && \ + apk update && apk upgrade && apk add --no-cache bash curl + +RUN mkdir -p /configuration/ +COPY certificates.yaml /configuration/certificates.yaml diff --git a/conf/docker-compose/traefik/certificates.yaml b/conf/docker-compose/traefik/certificates.yaml new file mode 100644 index 00000000000..27fc217abf6 --- /dev/null +++ b/conf/docker-compose/traefik/certificates.yaml @@ -0,0 +1,6 @@ +tls: + stores: + default: + defaultCertificate: + certFile: /usr/local/share/ca-certificates/traefik.crt + keyFile: /usr/local/share/ca-certificates/traefik.key \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 00000000000..5c02bbf7e40 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,175 @@ +version: '3.5' +services: + + postgres: + image: gdcc/postgresql:unstable + #ports: + # - 5432:5432 + restart: unless-stopped + container_name: postgres + hostname: postgres + volumes: + - ./conf/docker-compose/postgres-bind:/var/lib/postgresql/data:rw + environment: + # this user and password will have superuser privileges + - POSTGRES_USER=postgres + - POSTGRES_PASSWORD=${POSTGRES_ADMIN_PASSWORD} + - TZ=${TZ} + - LC_ALL=C.UTF-8 + healthcheck: + test: ["CMD", "pg_isready", "-U", "postgres"] + interval: 5s + retries: 5 + + solr: + image: gdcc/solr:unstable + restart: unless-stopped + container_name: solr + hostname: solr + volumes: + - ./conf/docker-compose/solr-bind:/var/solr:rw + environment: + - TZ=${TZ} + - "SOLR_JAVA_MEM=-Xms1g -Xmx1g" + - "SOLR_OPTS=-Dlog4j2.formatMsgNoLookups=true" + # ports: + # - 8983:8983 + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8983"] + interval: 10s + timeout: 3s + retries: 50 + start_period: 30s + + rserve: + image: gdcc/rserve:unstable + restart: unless-stopped + container_name: rserve + hostname: rserve + environment: + - TZ=${TZ} + - RSERVE_USER=${RSERVE_USER} + - RSERVE_PASSWORD=${RSERVE_PASSWORD} + # ports: + # - 6311:6311 + + dataverse: + image: gdcc/dataverse:unstable + restart: unless-stopped + container_name: dataverse + hostname: dataverse + environment: + - TZ=${TZ} + - HOST_DNS_ADDRESS=${HOST_DNS_ADDRESS} + - GLASSFISH_USER=${GLASSFISH_USER} + - GLASSFISH_PASSWORD=${GLASSFISH_PASSWORD} + - GLASSFISH_ADMIN_USER=${GLASSFISH_ADMIN_USER} + - GLASSFISH_ADMIN_PASSWORD=${GLASSFISH_ADMIN_PASSWORD} + - ADMIN_EMAIL=${ADMIN_EMAIL} + - MAIL_SERVER=${MAIL_SERVER} + - POSTGRES_ADMIN_PASSWORD=${POSTGRES_ADMIN_PASSWORD} + - POSTGRES_SERVER=${POSTGRES_SERVER} + - POSTGRES_PORT=${POSTGRES_PORT} + - POSTGRES_DATABASE=${POSTGRES_DATABASE} + - POSTGRES_PASSWORD=${POSTGRES_PASSWORD} + - POSTGRES_USER=${POSTGRES_USER} + - SOLR_LOCATION=${SOLR_LOCATION} + - RSERVE_HOST=${RSERVE_HOST} + - RSERVE_PORT=${RSERVE_PORT} + - RSERVE_USER=${RSERVE_USER} + - RSERVE_PASSWORD=${RSERVE_PASSWORD} + - DISABLE_DOI=${DISABLE_DOI} + - EXCLUDE_EMAIL_EXPORTS=${EXCLUDE_EMAIL_EXPORTS} + - S3_ACCESS_KEY=${S3_ACCESS_KEY} + - S3_SECRET_KEY=${S3_SECRET_KEY} + - DATAVERSE_FQDN=${DATAVERSE_FQDN} + - DATAVERSE_SITE_URL=${DATAVERSE_SITE_URL} + depends_on: + - postgres + - solr + - rserve + - seaweedfs + # ports: + # - 8080:8080 # the Dataverse web-UI + # - 4848:4848 # the Payara admin web-UI + volumes: + # https://guides.dataverse.org/en/latest/installation/advanced.html + # logos and sitemap are initially empty on the very first startup + - ./conf/docker-compose/dataverse-logos-bind:/opt/payara/appserver/glassfish/domains/domain1/docroot/logos/ + - ./conf/docker-compose/dataverse-sitemap-bind:/opt/payara/appserver/glassfish/domains/domain1/docroot/sitemap/ + labels: + - "traefik.enable=true" + - "traefik.http.routers.dataverse.rule=Host(`localhost`)" + - "traefik.http.routers.dataverse.tls=true" + - "traefik.http.routers.dataverse.entrypoints=web-secure" + - "traefik.http.services.dataverse.loadbalancer.server.port=8080" + - "traefik.port=8080" + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8080/api/info/version"] + interval: 5s + timeout: 3s + retries: 50 + start_period: 30s + + # Traefik reverse proxy + traefik: + image: gdcc/traefik:unstable + container_name: traefik + hostname: traefik + environment: + - TZ=${TZ} + depends_on: + - dataverse + command: + # - "--log.level=DEBUG" + # - "--api.insecure=true" + - "--providers.docker=true" + - "--providers.docker.exposedbydefault=false" + - "--entrypoints.web.http.redirections.entryPoint.to=web-secure" + - "--entrypoints.web.http.redirections.entryPoint.scheme=https" + - "--entrypoints.web.http.redirections.entrypoint.permanent=true" + # location of certs + - "--providers.file.directory=/configuration/" + - "--entrypoints.web.address=:80" + - "--entrypoints.web-secure.address=:443" + - "--ping" + ports: + - 80:80 # HTTP port, this gets re-routed to 443 TLS + - 443:443 # TLS port, needs certificate generated to use + # The Web UI (enabled by --api.insecure=true) + # - 8080:8080 + volumes: + - /var/run/docker.sock:/var/run/docker.sock:ro + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8080/ping"] + interval: 10s + timeout: 3s + retries: 50 + start_period: 30s + + # S3 storage + # alternatives include minio and others + # https://github.com/chrislusf/seaweedfs/blob/master/docker/seaweedfs-compose.yml + seaweedfs: + image: gdcc/seaweedfs:unstable + restart: unless-stopped + container_name: seaweedfs + hostname: seaweedfs + environment: + - TZ=${TZ} + volumes: + - ./conf/docker-compose/seaweedfs-bind:/data:rw + # ports: + # - 8333:8333 # s3 API + # - 9327:9327 # metrics API + # - 9333:9333 # master API + # - 8060:8080 # volume server API + # - 8888:8888 # filer API, nice web-ui to look at folders and files + # https://github.com/chrislusf/seaweedfs/wiki/Amazon-S3-API#static-configuration + command: 'server -s3 -metricsPort=9327 -dir=/data -s3.config=/config.json' + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:9333"] + interval: 10s + timeout: 3s + retries: 10 + start_period: 5s diff --git a/modules/container-dataverse/.gitignore b/modules/container-dataverse/.gitignore new file mode 100644 index 00000000000..d75620abf70 --- /dev/null +++ b/modules/container-dataverse/.gitignore @@ -0,0 +1 @@ +.flattened-pom.xml diff --git a/modules/container-dataverse/README.md b/modules/container-dataverse/README.md new file mode 100644 index 00000000000..3c7d648a109 --- /dev/null +++ b/modules/container-dataverse/README.md @@ -0,0 +1,52 @@ +# Dataverse Image + +The Dataverse Image contains the main Dataverse application. It uses the `container-base` image +as a starting point. + +## Quick Reference + +**Maintained by:** + +This image is created, maintained and supported by the Dataverse community on a best-effort basis. + +**Where to find documentation:** + +**Where to get help and ask questions:** + +IQSS will not offer support on how to deploy or run it. Please reach out to the community for help on using it. +You can join the Community Chat on Matrix at https://chat.dataverse.org or the Community Slack at +https://dataversecommunity.slack.com to ask for help and guidance. + +## Supported Image Tags + +This image is sourced within the main upstream code [repository of the Dataverse software](https://github.com/IQSS/dataverse). +Development and maintenance of the [image's code](https://github.com/IQSS/dataverse/tree/develop/modules/container-dataverse) +happens there (again, by the community). Community-supported image tags are based on the two most important branches: + +- The `unstable` tag corresponds to the `develop` branch, where pull requests are merged. + ([`Dockerfile`](https://github.com/IQSS/dataverse/tree/develop/modules/container-dataverse/src/main/docker/Dockerfile)) +- The `stable` tag corresponds to the `master` branch, where releases are cut from. + ([`Dockerfile`](https://github.com/IQSS/dataverse/tree/master/modules/container-dataverse/src/main/docker/Dockerfile)) + +Within the main repository, you may find the base image files at `/modules/container-dataverse`. +This Maven module uses the [Maven Docker Plugin](https://dmp.fabric8.io) to build and ship the image. +You may use, extend, or alter this image to your liking and/or host in some different registry if you want to. + +**Supported architectures:** This image is created as a "multi-arch image", supporting the most common architectures +Dataverse usually runs on: AMD64 (Windows/Linux/...) and ARM64 (Apple M1/M2). + +## License + +Image content created by the community is licensed under [Apache License, Version 2.0](https://www.apache.org/licenses/LICENSE-2.0), +like the [main Dataverse project](https://github.com/IQSS/dataverse/blob/develop/LICENSE.md). + +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and limitations under the License. + +As with all Docker images, all images likely also contain other software which may be under other licenses (such as +[Payara Server](https://github.com/payara/Payara/blob/master/LICENSE.txt), Bash, etc., from the base +distribution, along with any direct or indirect (Java) dependencies contained). + +As for any pre-built image usage, it is the image user's responsibility to ensure that any use of this image complies +with any relevant licenses for all software contained within. diff --git a/modules/container-dataverse/pom.xml b/modules/container-dataverse/pom.xml new file mode 100644 index 00000000000..b3a0fa3f365 --- /dev/null +++ b/modules/container-dataverse/pom.xml @@ -0,0 +1,143 @@ + + + 4.0.0 + + + edu.harvard.iq + dataverse-parent + ${revision} + ../dataverse-parent + + + io.gdcc + container-dataverse + ${packaging.type} + Container Dataverse + This module provides the main Dataverse application via a container. + + + + poikilotherm + Oliver Bertuch + github@bertuch.eu + Europe/Berlin + + maintainer + + + + + + + + pom + + + + + ct + + docker-build + gdcc/dataverse:${base.image.tag} + unstable + 1000 + 1000 + + + + + + + + io.fabric8 + docker-maven-plugin + true + + + + base + ${base.image} + + + + linux/arm64 + linux/amd64 + + ${project.build.directory}/buildx-state + + Dockerfile + + ${java.image} + ${base.image.uid} + ${base.image.gid} + + @ + + assembly.xml + + + + + + + + + + org.codehaus.mojo + flatten-maven-plugin + 1.2.7 + + true + oss + + remove + remove + + + + + + flatten + process-resources + + flatten + + + + + flatten.clean + clean + + clean + + + + + + + + maven-install-plugin + + + default-install + install + + install + + + + + + + + + diff --git a/modules/container-dataverse/src/main/docker/Dockerfile b/modules/container-dataverse/src/main/docker/Dockerfile new file mode 100644 index 00000000000..ace5f86fce2 --- /dev/null +++ b/modules/container-dataverse/src/main/docker/Dockerfile @@ -0,0 +1,43 @@ +# this is built off the base container +FROM gdcc/base:unstable + +USER root + +RUN mkdir -p /dataverse + +COPY pom.xml /dataverse/ +COPY modules /dataverse/modules/ +COPY local_lib /dataverse/local_lib/ +COPY scripts /dataverse/scripts/ +COPY conf/jhove/ /dataverse/conf/jhove/ + +RUN useradd --create-home --shell /bin/bash dataverse + +# switch to non-root user as this is more secure +USER dataverse +WORKDIR / + +# add Payara to PATH +ENV PATH="${PATH}:${PAYARA_DIR}/bin" + +USER root + +# set ownership so later the dataverse user can write to and make changes +RUN chown -R dataverse "${PAYARA_DIR}" + +COPY --chown=dataverse:dataverse ./preboot.sh /preboot.sh +RUN chmod +x /preboot.sh +COPY --chown=dataverse:dataverse ./postboot.sh /postboot.sh +RUN chmod +x /postboot.sh + +# auto-deploy the .war file +ENV DEPLOY_DIR /dataverse/target/ + +# pre and post boot commands +ENV PREBOOT_COMMANDS /preboot.sh +ENV POSTBOOT_COMMANDS /postboot.sh + +USER dataverse + +# helpful for debugging purposes to just start up the container +# CMD ["tail", "-f", "/dev/null"] \ No newline at end of file diff --git a/modules/container-dataverse/src/main/docker/assembly.xml b/modules/container-dataverse/src/main/docker/assembly.xml new file mode 100644 index 00000000000..c7bfead1809 --- /dev/null +++ b/modules/container-dataverse/src/main/docker/assembly.xml @@ -0,0 +1,10 @@ + + + + ${project.basedir}/target/ + dataverse + + + diff --git a/modules/container-dataverse/src/main/docker/config b/modules/container-dataverse/src/main/docker/config new file mode 100644 index 00000000000..d1e10d78ee3 --- /dev/null +++ b/modules/container-dataverse/src/main/docker/config @@ -0,0 +1,4 @@ +[default] +region = us-east-1 +s3 = + signature_version = s3v4 \ No newline at end of file diff --git a/modules/container-dataverse/src/main/docker/credentials b/modules/container-dataverse/src/main/docker/credentials new file mode 100644 index 00000000000..64af5712826 --- /dev/null +++ b/modules/container-dataverse/src/main/docker/credentials @@ -0,0 +1,3 @@ +[default] +aws_access_key_id = secret +aws_secret_access_key = secret \ No newline at end of file diff --git a/modules/container-dataverse/src/main/docker/pom.xml b/modules/container-dataverse/src/main/docker/pom.xml new file mode 100644 index 00000000000..8b6f98c5896 --- /dev/null +++ b/modules/container-dataverse/src/main/docker/pom.xml @@ -0,0 +1,787 @@ + + + 4.0.0 + + + edu.harvard.iq + dataverse-parent + ${revision} + modules/dataverse-parent + + + + dataverse + war + dataverse + + false + 1.2.18.4 + 8.5.10 + 1.20.1 + 0.8.7 + 5.2.1 + 2.4.1 + 5.5.3 + + + + + + + + org.apache.abdera + abdera-core + 1.1.3 + + + org.apache.abdera + abdera-i18n + 1.1.3 + + + + + + + + + + org.slf4j + slf4j-jdk14 + runtime + + + + org.passay + passay + 1.6.0 + + + + + commons-httpclient + commons-httpclient + 3.1 + + + + + io.gdcc + sword2-server + 1.2.1 + + + + org.apache.abdera + abdera-core + + + + + org.apache.abdera + abdera-i18n + + + + + com.amazonaws + aws-java-sdk-s3 + + + + com.apicatalog + titanium-json-ld + 1.3.0-SNAPSHOT + + + com.google.code.gson + gson + 2.8.9 + compile + + + + com.fasterxml.jackson.core + jackson-core + provided + + + + com.fasterxml.jackson.core + jackson-databind + provided + + + + org.everit.json + org.everit.json.schema + 1.5.1 + + + org.mindrot + jbcrypt + 0.4 + + + org.postgresql + postgresql + + + org.flywaydb + flyway-core + ${flyway.version} + + + com.google.guava + guava + 29.0-jre + jar + + + org.eclipse.microprofile.config + microprofile-config-api + provided + + + jakarta.platform + jakarta.jakartaee-api + ${jakartaee-api.version} + provided + + + + org.glassfish + jakarta.json + provided + + + + com.sun.mail + jakarta.mail + provided + + + org.glassfish + jakarta.faces + provided + + + org.primefaces + primefaces + 11.0.0 + + + org.primefaces.themes + all-themes + 1.0.10 + + + org.omnifaces + omnifaces + 3.8 + + + + jakarta.validation + jakarta.validation-api + provided + + + org.hibernate.validator + hibernate-validator + provided + + + org.glassfish + jakarta.el + provided + + + + commons-io + commons-io + + + + org.apache.commons + commons-lang3 + + + + + org.apache.commons + commons-text + 1.10.0 + + + org.apache.commons + commons-math + 2.2 + + + commons-validator + commons-validator + 1.7 + + + + org.apache.solr + solr-solrj + 8.11.1 + + + colt + colt + 1.2.0 + + + + nom.tam.fits + fits + 2012-10-25-generated + + + net.handle + handle + 8.1.1 + + + + edu.harvard.iq.dvn + unf5 + 5.0 + + + + org.dataverse + unf + 6.0 + + + + + org.nuiton.thirdparty + REngine + 0.6-1 + + + org.nuiton.thirdparty + Rserve + 0.6-1 + + + + org.apache.poi + poi + ${poi.version} + + + org.apache.poi + poi-ooxml + ${poi.version} + + + org.apache.poi + poi-scratchpad + ${poi.version} + + + org.openpreservation.jhove + jhove-core + ${jhove.version} + + + org.openpreservation.jhove + jhove-modules + ${jhove.version} + + + org.openpreservation.jhove + jhove-ext-modules + ${jhove.version} + + + + com.github.jai-imageio + jai-imageio-core + 1.3.1 + + + org.ocpsoft.rewrite + rewrite-servlet + 3.5.0.Final + + + org.ocpsoft.rewrite + rewrite-config-prettyfaces + 3.5.0.Final + + + edu.ucsb.nceas + ezid + 1.0.0 + jar + + + org.jsoup + jsoup + 1.15.3 + + + io.searchbox + jest + 0.1.7 + + + commons-codec + commons-codec + 1.15 + + + + org.javaswift + joss + 0.10.0 + + + org.apache.commons + commons-csv + 1.2 + + + + com.github.scribejava + scribejava-apis + 6.9.0 + + + + com.nimbusds + oauth2-oidc-sdk + 9.41.1 + + + + io.gdcc + xoai-data-provider + ${gdcc.xoai.version} + + + io.gdcc + xoai-service-provider + ${gdcc.xoai.version} + + + + com.google.auto.service + auto-service + 1.0-rc2 + true + jar + + + + org.glassfish.jersey.containers + jersey-container-servlet + 2.23.2 + + + + org.glassfish.jersey.media + jersey-media-multipart + 2.23.2 + + + com.mashape.unirest + unirest-java + 1.4.9 + + + + org.apache.commons + commons-compress + + + + org.duracloud + common + 7.1.1 + + + org.slf4j + log4j-over-slf4j + + + ch.qos.logback + logback-classic + + + + + org.duracloud + storeclient + 7.1.1 + + + org.slf4j + log4j-over-slf4j + + + com.amazonaws + aws-java-sdk-sqs + + + ch.qos.logback + logback-classic + + + + + + org.apache.tika + tika-core + ${tika.version} + + + org.apache.tika + tika-parsers-standard-package + ${tika.version} + + + + org.apache.opennlp + opennlp-tools + 1.9.1 + + + com.google.cloud + google-cloud-storage + + + + + + com.auth0 + java-jwt + 3.19.1 + + + + io.github.erdtman + java-json-canonicalization + 1.1 + + + edu.ucar + cdm-core + ${netcdf.version} + + + + + org.junit.jupiter + junit-jupiter + ${junit.jupiter.version} + test + + + junit + junit + ${junit.version} + test + + + org.junit.vintage + junit-vintage-engine + ${junit.vintage.version} + test + + + org.hamcrest + hamcrest-library + 2.2 + test + + + org.assertj + assertj-core + 3.20.2 + test + + + org.xmlunit + xmlunit-assertj3 + 2.8.2 + test + + + com.jayway.restassured + rest-assured + 2.4.0 + test + + + org.skyscreamer + jsonassert + 1.5.0 + test + + + com.vaadin.external.google + android-json + + + + + org.testcontainers + testcontainers + test + + + org.testcontainers + junit-jupiter + test + + + org.testcontainers + postgresql + test + + + org.mockito + mockito-core + ${mockito.version} + test + + + org.mockito + mockito-junit-jupiter + ${mockito.version} + test + + + io.smallrye.config + smallrye-config + ${smallrye-mpconfig.version} + test + + + + + + + + src/main/java + + *.properties + **/*.properties + **/mime.types + **/*.R + + + + src/main/resources + + **/*.sql + **/*.xml + **/firstNames/*.* + **/*.xsl + **/services/* + + + + src/main/resources + + true + + **/*.properties + + + + + + org.apache.maven.plugins + maven-compiler-plugin + + ${target.java.version} + + ${compilerArgument} + + + + org.apache.maven.plugins + maven-jar-plugin + + + + true + true + + + + + + org.apache.maven.plugins + maven-war-plugin + + true + false + + + true + true + + + + + + de.qaware.maven + go-offline-maven-plugin + 1.2.1 + + + + + + + org.jacoco + jacoco-maven-plugin + ${jacoco.version} + + ${basedir}/target/coverage-reports/jacoco-unit.exec + ${basedir}/target/coverage-reports/jacoco-unit.exec + + + + jacoco-initialize + + prepare-agent + + + + jacoco-site + package + + report + + + + + + org.eluder.coveralls + coveralls-maven-plugin + 4.3.0 + + + javax.xml.bind + jaxb-api + 2.3.1 + + + + + org.apache.maven.plugins + maven-surefire-plugin + + + + ${testsToExclude} + ${skipUnitTests} + + + + org.apache.maven.plugins + maven-checkstyle-plugin + + checkstyle.xml + UTF-8 + true + + + + + + + dev + + + + true + + + edu.harvard.iq.dataverse.NonEssentialTests + + + + all-unit-tests + + + + tc + + true + 9.6 + + + + + org.apache.maven.plugins + maven-failsafe-plugin + ${maven-failsafe-plugin.version} + + testcontainers + + ${postgresql.server.version} + + + + + + integration-test + verify + + + + + + + + + diff --git a/modules/container-dataverse/src/main/docker/postboot.sh b/modules/container-dataverse/src/main/docker/postboot.sh new file mode 100644 index 00000000000..ac279b99a56 --- /dev/null +++ b/modules/container-dataverse/src/main/docker/postboot.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +# check if we should disable DOI validation +if [[ ! -z "${DISABLE_DOI}" ]] && [[ "true" = "${DISABLE_DOI}" ]]; then + echo "Disabling DOI validation" + curl -X PUT -d FAKE http://localhost:8080/api/admin/settings/:DoiProvider +fi + +# check if we should exclude emails from exports +if [[ ! -z "${EXCLUDE_EMAIL_EXPORTS}" ]] && [[ "true" = "${EXCLUDE_EMAIL_EXPORTS}" ]]; then + echo "Excluding emails in exports" + curl -X PUT -d true http://localhost:8080/api/admin/settings/:ExcludeEmailFromExport +fi + + +#wait-for-it localhost:8080 -- tail -f ${PAYARA_DIR}/glassfish/domains/${DOMAIN_NAME}/logs/server.log \ No newline at end of file diff --git a/modules/container-dataverse/src/main/docker/preboot.sh b/modules/container-dataverse/src/main/docker/preboot.sh new file mode 100644 index 00000000000..31c9f20f8fd --- /dev/null +++ b/modules/container-dataverse/src/main/docker/preboot.sh @@ -0,0 +1,60 @@ +#!/bin/bash + +# create the config file that has all our environment settings + +echo -e " +[glassfish] +HOST_DNS_ADDRESS=${HOST_DNS_ADDRESS} +GLASSFISH_USER = ${GLASSFISH_USER} +GLASSFISH_DIRECTORY = ${PAYARA_DIR} +GLASSFISH_ADMIN_USER = ${GLASSFISH_ADMIN_USER} +GLASSFISH_ADMIN_PASSWORD = ${GLASSFISH_ADMIN_PASSWORD} +GLASSFISH_HEAP = 2048 +GLASSFISH_REQUEST_TIMEOUT = 1800 + +[database] +POSTGRES_ADMIN_PASSWORD=${POSTGRES_ADMIN_PASSWORD} +POSTGRES_SERVER=${POSTGRES_SERVER} +POSTGRES_PORT=${POSTGRES_PORT} +POSTGRES_DATABASE=${POSTGRES_DATABASE} +POSTGRES_PASSWORD=${POSTGRES_PASSWORD} +POSTGRES_USER=${POSTGRES_USER} + +[system] +ADMIN_EMAIL=${ADMIN_EMAIL} +MAIL_SERVER=${MAIL_SERVER} +SOLR_LOCATION=${SOLR_LOCATION} + +[rserve] +RSERVE_HOST=${RSERVE_HOST} +RSERVE_PORT=${RSERVE_PORT} +RSERVE_USER=${RSERVE_USER} +RSERVE_PASSWORD=${RSERVE_PASSWORD} + +[doi] +DOI_USERNAME = dataciteuser +DOI_PASSWORD = datacitepassword +DOI_BASEURL = https://mds.test.datacite.org +DOI_DATACITERESTAPIURL = https://api.test.datacite.org +" > /dataverse/scripts/installer/default.config + +# https://github.com/poikilotherm/dataverse/blob/ct-mvn-mod/modules/container-base/src/main/docker/Dockerfile +# https://guides.dataverse.org/en/latest/installation/config.html#amazon-s3-storage-or-compatible +# set s3 storage settings +if ! grep -q "Ddataverse.files.s3.type=s3" "${PAYARA_DIR}/glassfish/domains/${DOMAIN_NAME}/config/domain.xml"; then + # use : as delimiter + sed -i "s::-Ddataverse.files.s3.type=s3\n:" ${PAYARA_DIR}/glassfish/domains/${DOMAIN_NAME}/config/domain.xml + sed -i "s::-Ddataverse.files.s3.label=s3\n:" ${PAYARA_DIR}/glassfish/domains/${DOMAIN_NAME}/config/domain.xml + sed -i "s::-Ddataverse.files.s3.access-key=${S3_ACCESS_KEY}\n:" ${PAYARA_DIR}/glassfish/domains/${DOMAIN_NAME}/config/domain.xml + sed -i "s::-Ddataverse.files.s3.secret-key=${S3_SECRET_KEY}\n:" ${PAYARA_DIR}/glassfish/domains/${DOMAIN_NAME}/config/domain.xml + sed -i "s::-Ddataverse.files.s3.custom-endpoint-url=http\:\/\/seaweedfs\:8333\n:" ${PAYARA_DIR}/glassfish/domains/${DOMAIN_NAME}/config/domain.xml + # keep this as dataverse as it's hardcoded elsewhere + sed -i "s::-Ddataverse.files.s3.bucket-name=dataverse\n:" ${PAYARA_DIR}/glassfish/domains/${DOMAIN_NAME}/config/domain.xml + sed -i "s::-Ddataverse.files.s3.custom-endpoint-region=us-east-1\n:" ${PAYARA_DIR}/glassfish/domains/${DOMAIN_NAME}/config/domain.xml + # # Use path style buckets instead of subdomains + sed -i "s::-Ddataverse.files.s3.path-style-access=true\n:" ${PAYARA_DIR}/glassfish/domains/${DOMAIN_NAME}/config/domain.xml +fi + +# create an empty s3 bucket in seaweedfs if it doesn't already exist +curl -X POST "http://seaweedfs:8888/buckets/" +curl -X POST "http://seaweedfs:8888/buckets/dataverse/" diff --git a/modules/container-postgresql/.gitignore b/modules/container-postgresql/.gitignore new file mode 100644 index 00000000000..d75620abf70 --- /dev/null +++ b/modules/container-postgresql/.gitignore @@ -0,0 +1 @@ +.flattened-pom.xml diff --git a/modules/container-postgresql/README.md b/modules/container-postgresql/README.md new file mode 100644 index 00000000000..08a2b87c997 --- /dev/null +++ b/modules/container-postgresql/README.md @@ -0,0 +1,52 @@ +# Dataverse Postgresql Image + +The Dataverse Postgresql Image is for the backend Postgresql relational database which +is a dependency for Dataverse. + +## Quick Reference + +**Maintained by:** + +This image is created, maintained and supported by the Dataverse community on a best-effort basis. + +**Where to find documentation:** + +**Where to get help and ask questions:** + +IQSS will not offer support on how to deploy or run it. Please reach out to the community for help on using it. +You can join the Community Chat on Matrix at https://chat.dataverse.org or the Community Slack at +https://dataversecommunity.slack.com to ask for help and guidance. + +## Supported Image Tags + +This image is sourced within the main upstream code [repository of the Dataverse software](https://github.com/IQSS/dataverse). +Development and maintenance of the [image's code](https://github.com/IQSS/dataverse/tree/develop/modules/container-postgresql) +happens there (again, by the community). Community-supported image tags are based on the two most important branches: + +- The `unstable` tag corresponds to the `develop` branch, where pull requests are merged. + ([`Dockerfile`](https://github.com/IQSS/dataverse/tree/develop/modules/container-postgresql/src/main/docker/Dockerfile)) +- The `stable` tag corresponds to the `master` branch, where releases are cut from. + ([`Dockerfile`](https://github.com/IQSS/dataverse/tree/master/modules/container-postgresql/src/main/docker/Dockerfile)) + +Within the main repository, you may find the base image files at `/modules/container-postgresql`. +This Maven module uses the [Maven Docker Plugin](https://dmp.fabric8.io) to build and ship the image. +You may use, extend, or alter this image to your liking and/or host in some different registry if you want to. + +**Supported architectures:** This image is created as a "multi-arch image", supporting the most common architectures +Dataverse usually runs on: AMD64 (Windows/Linux/...) and ARM64 (Apple M1/M2). + +## License + +Image content created by the community is licensed under [Apache License, Version 2.0](https://www.apache.org/licenses/LICENSE-2.0), +like the [main Dataverse project](https://github.com/IQSS/dataverse/blob/develop/LICENSE.md). + +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and limitations under the License. + +As with all Docker images, all images likely also contain other software which may be under other licenses (such as +[Payara Server](https://github.com/payara/Payara/blob/master/LICENSE.txt), Bash, etc., from the base +distribution, along with any direct or indirect (Java) dependencies contained). + +As for any pre-built image usage, it is the image user's responsibility to ensure that any use of this image complies +with any relevant licenses for all software contained within. diff --git a/modules/container-postgresql/pom.xml b/modules/container-postgresql/pom.xml new file mode 100644 index 00000000000..04c31af9400 --- /dev/null +++ b/modules/container-postgresql/pom.xml @@ -0,0 +1,143 @@ + + + 4.0.0 + + + edu.harvard.iq + dataverse-parent + ${revision} + ../dataverse-parent + + + io.gdcc + container-postgresql + ${packaging.type} + Container Dataverse Postgresql + This module provides the Postgresql database via a container. + + + + poikilotherm + Oliver Bertuch + github@bertuch.eu + Europe/Berlin + + maintainer + + + + + + + + pom + + + + + ct + + docker-build + gdcc/postgresql:${base.image.tag} + unstable + 1000 + 1000 + + + + + + + + io.fabric8 + docker-maven-plugin + true + + + + base + ${base.image} + + + + linux/arm64 + linux/amd64 + + ${project.build.directory}/buildx-state + + Dockerfile + + ${java.image} + ${base.image.uid} + ${base.image.gid} + + @ + + assembly.xml + + + + + + + + + + org.codehaus.mojo + flatten-maven-plugin + 1.2.7 + + true + oss + + remove + remove + + + + + + flatten + process-resources + + flatten + + + + + flatten.clean + clean + + clean + + + + + + + + maven-install-plugin + + + default-install + install + + install + + + + + + + + + \ No newline at end of file diff --git a/modules/container-postgresql/src/main/docker/Dockerfile b/modules/container-postgresql/src/main/docker/Dockerfile new file mode 100644 index 00000000000..81d896cc87d --- /dev/null +++ b/modules/container-postgresql/src/main/docker/Dockerfile @@ -0,0 +1,6 @@ +# https://hub.docker.com/_/postgres +FROM postgres:14 + +RUN apt-get update && \ + apt-get -y upgrade && \ + rm -rf /var/lib/apt/lists/* diff --git a/modules/container-postgresql/src/main/docker/assembly.xml b/modules/container-postgresql/src/main/docker/assembly.xml new file mode 100644 index 00000000000..6ed4fcb6df4 --- /dev/null +++ b/modules/container-postgresql/src/main/docker/assembly.xml @@ -0,0 +1,10 @@ + + + + ${project.basedir}/target/ + postgresql + + + diff --git a/modules/container-rserve/.gitignore b/modules/container-rserve/.gitignore new file mode 100644 index 00000000000..d75620abf70 --- /dev/null +++ b/modules/container-rserve/.gitignore @@ -0,0 +1 @@ +.flattened-pom.xml diff --git a/modules/container-rserve/README.md b/modules/container-rserve/README.md new file mode 100644 index 00000000000..525af0f323d --- /dev/null +++ b/modules/container-rserve/README.md @@ -0,0 +1,51 @@ +# Dataverse Rserve Image + +The Dataverse Rserve Image provides the rserve daemon which is a dependency of Dataverse. + +## Quick Reference + +**Maintained by:** + +This image is created, maintained and supported by the Dataverse community on a best-effort basis. + +**Where to find documentation:** + +**Where to get help and ask questions:** + +IQSS will not offer support on how to deploy or run it. Please reach out to the community for help on using it. +You can join the Community Chat on Matrix at https://chat.dataverse.org or the Community Slack at +https://dataversecommunity.slack.com to ask for help and guidance. + +## Supported Image Tags + +This image is sourced within the main upstream code [repository of the Dataverse software](https://github.com/IQSS/dataverse). +Development and maintenance of the [image's code](https://github.com/IQSS/dataverse/tree/develop/modules/container-rserve) +happens there (again, by the community). Community-supported image tags are based on the two most important branches: + +- The `unstable` tag corresponds to the `develop` branch, where pull requests are merged. + ([`Dockerfile`](https://github.com/IQSS/dataverse/tree/develop/modules/container-rserve/src/main/docker/Dockerfile)) +- The `stable` tag corresponds to the `master` branch, where releases are cut from. + ([`Dockerfile`](https://github.com/IQSS/dataverse/tree/master/modules/container-rserve/src/main/docker/Dockerfile)) + +Within the main repository, you may find the base image files at `/modules/container-rserve`. +This Maven module uses the [Maven Docker Plugin](https://dmp.fabric8.io) to build and ship the image. +You may use, extend, or alter this image to your liking and/or host in some different registry if you want to. + +**Supported architectures:** This image is created as a "multi-arch image", supporting the most common architectures +Dataverse usually runs on: AMD64 (Windows/Linux/...) and ARM64 (Apple M1/M2). + +## License + +Image content created by the community is licensed under [Apache License, Version 2.0](https://www.apache.org/licenses/LICENSE-2.0), +like the [main Dataverse project](https://github.com/IQSS/dataverse/blob/develop/LICENSE.md). + +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and limitations under the License. + +As with all Docker images, all images likely also contain other software which may be under other licenses (such as +[Payara Server](https://github.com/payara/Payara/blob/master/LICENSE.txt), Bash, etc., from the base +distribution, along with any direct or indirect (Java) dependencies contained). + +As for any pre-built image usage, it is the image user's responsibility to ensure that any use of this image complies +with any relevant licenses for all software contained within. diff --git a/modules/container-rserve/pom.xml b/modules/container-rserve/pom.xml new file mode 100644 index 00000000000..71d9c7fc38e --- /dev/null +++ b/modules/container-rserve/pom.xml @@ -0,0 +1,143 @@ + + + 4.0.0 + + + edu.harvard.iq + dataverse-parent + ${revision} + ../dataverse-parent + + + io.gdcc + container-rserve + ${packaging.type} + Container Dataverse Rserve + This module provides the rserve daemon via a container. + + + + poikilotherm + Oliver Bertuch + github@bertuch.eu + Europe/Berlin + + maintainer + + + + + + + + pom + + + + + ct + + docker-build + gdcc/rserve:${base.image.tag} + unstable + 1000 + 1000 + + + + + + + + io.fabric8 + docker-maven-plugin + true + + + + base + ${base.image} + + + + linux/arm64 + linux/amd64 + + ${project.build.directory}/buildx-state + + Dockerfile + + ${java.image} + ${base.image.uid} + ${base.image.gid} + + @ + + assembly.xml + + + + + + + + + + org.codehaus.mojo + flatten-maven-plugin + 1.2.7 + + true + oss + + remove + remove + + + + + + flatten + process-resources + + flatten + + + + + flatten.clean + clean + + clean + + + + + + + + maven-install-plugin + + + default-install + install + + install + + + + + + + + + diff --git a/modules/container-rserve/src/main/docker/Dockerfile b/modules/container-rserve/src/main/docker/Dockerfile new file mode 100644 index 00000000000..8817882a69b --- /dev/null +++ b/modules/container-rserve/src/main/docker/Dockerfile @@ -0,0 +1,23 @@ +# https://hub.docker.com/_/ubuntu +FROM ubuntu:22.04 + +ENV DEBIAN_FRONTEND noninteractive + +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + tzdata nano r-base r-base-dev libssl-dev libnlopt-dev libcurl4-openssl-dev ca-certificates && \ + apt-get -y upgrade && \ + rm -rf /var/lib/apt/lists/* + +# multi-thread the building of the libraries from source via NCPUS=X +RUN R -e "install.packages(c('R2HTML', 'rjson', 'DescTools', 'Rserve', 'haven'), NCPUS=4)" + +# this is the user and password that Dataverse will connect to the Rserve server +RUN echo "rserve rserve" > /rserve.pwd + +WORKDIR / + +EXPOSE 6311/tcp + +# https://github.com/ubc/r-docker/blob/master/Dockerfile +CMD ["R", "-e", "Rserve::run.Rserve(remote=TRUE, auth=TRUE, pwdfile='/rserve.pwd', daemon=FALSE, port=6311, fileio=TRUE, maxinbuf=262144)"] \ No newline at end of file diff --git a/modules/container-rserve/src/main/docker/assembly.xml b/modules/container-rserve/src/main/docker/assembly.xml new file mode 100644 index 00000000000..d7f1aa80009 --- /dev/null +++ b/modules/container-rserve/src/main/docker/assembly.xml @@ -0,0 +1,10 @@ + + + + ${project.basedir}/target/ + rserve + + + diff --git a/modules/container-seaweedfs/.gitignore b/modules/container-seaweedfs/.gitignore new file mode 100644 index 00000000000..d75620abf70 --- /dev/null +++ b/modules/container-seaweedfs/.gitignore @@ -0,0 +1 @@ +.flattened-pom.xml diff --git a/modules/container-seaweedfs/README.md b/modules/container-seaweedfs/README.md new file mode 100644 index 00000000000..71b773d191c --- /dev/null +++ b/modules/container-seaweedfs/README.md @@ -0,0 +1,51 @@ +# Dataverse Seaweedfs Image + +The Dataverse Seaweedfs Image provides s3 compatible object storage for data. + +## Quick Reference + +**Maintained by:** + +This image is created, maintained and supported by the Dataverse community on a best-effort basis. + +**Where to find documentation:** + +**Where to get help and ask questions:** + +IQSS will not offer support on how to deploy or run it. Please reach out to the community for help on using it. +You can join the Community Chat on Matrix at https://chat.dataverse.org or the Community Slack at +https://dataversecommunity.slack.com to ask for help and guidance. + +## Supported Image Tags + +This image is sourced within the main upstream code [repository of the Dataverse software](https://github.com/IQSS/dataverse). +Development and maintenance of the [image's code](https://github.com/IQSS/dataverse/tree/develop/modules/container-seaweedfs) +happens there (again, by the community). Community-supported image tags are based on the two most important branches: + +- The `unstable` tag corresponds to the `develop` branch, where pull requests are merged. + ([`Dockerfile`](https://github.com/IQSS/dataverse/tree/develop/modules/container-seaweedfs/src/main/docker/Dockerfile)) +- The `stable` tag corresponds to the `master` branch, where releases are cut from. + ([`Dockerfile`](https://github.com/IQSS/dataverse/tree/master/modules/container-seaweedfs/src/main/docker/Dockerfile)) + +Within the main repository, you may find the base image files at `/modules/container-seaweedfs`. +This Maven module uses the [Maven Docker Plugin](https://dmp.fabric8.io) to build and ship the image. +You may use, extend, or alter this image to your liking and/or host in some different registry if you want to. + +**Supported architectures:** This image is created as a "multi-arch image", supporting the most common architectures +Dataverse usually runs on: AMD64 (Windows/Linux/...) and ARM64 (Apple M1/M2). + +## License + +Image content created by the community is licensed under [Apache License, Version 2.0](https://www.apache.org/licenses/LICENSE-2.0), +like the [main Dataverse project](https://github.com/IQSS/dataverse/blob/develop/LICENSE.md). + +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and limitations under the License. + +As with all Docker images, all images likely also contain other software which may be under other licenses (such as +[Payara Server](https://github.com/payara/Payara/blob/master/LICENSE.txt), Bash, etc., from the base +distribution, along with any direct or indirect (Java) dependencies contained). + +As for any pre-built image usage, it is the image user's responsibility to ensure that any use of this image complies +with any relevant licenses for all software contained within. diff --git a/modules/container-seaweedfs/pom.xml b/modules/container-seaweedfs/pom.xml new file mode 100644 index 00000000000..4612cfe821e --- /dev/null +++ b/modules/container-seaweedfs/pom.xml @@ -0,0 +1,143 @@ + + + 4.0.0 + + + edu.harvard.iq + dataverse-parent + ${revision} + ../dataverse-parent + + + io.gdcc + container-seaweedfs + ${packaging.type} + Container Dataverse Seaweedfs + This module provides the seaweedfs object storage via a container. + + + + poikilotherm + Oliver Bertuch + github@bertuch.eu + Europe/Berlin + + maintainer + + + + + + + + pom + + + + + ct + + docker-build + gdcc/seaweedfs:${base.image.tag} + unstable + 1000 + 1000 + + + + + + + + io.fabric8 + docker-maven-plugin + true + + + + base + ${base.image} + + + + linux/arm64 + linux/amd64 + + ${project.build.directory}/buildx-state + + Dockerfile + + ${java.image} + ${base.image.uid} + ${base.image.gid} + + @ + + assembly.xml + + + + + + + + + + org.codehaus.mojo + flatten-maven-plugin + 1.2.7 + + true + oss + + remove + remove + + + + + + flatten + process-resources + + flatten + + + + + flatten.clean + clean + + clean + + + + + + + + maven-install-plugin + + + default-install + install + + install + + + + + + + + + diff --git a/modules/container-seaweedfs/src/main/docker/Dockerfile b/modules/container-seaweedfs/src/main/docker/Dockerfile new file mode 100644 index 00000000000..c000090ba6e --- /dev/null +++ b/modules/container-seaweedfs/src/main/docker/Dockerfile @@ -0,0 +1,15 @@ +# https://hub.docker.com/r/chrislusf/seaweedfs +FROM chrislusf/seaweedfs:latest + +USER root + +# use HTTP connection for adding the needed ca-certificates package +# install it and then swap back to HTTPS +RUN sed -i 's,https,http,g' /etc/apk/repositories && \ + apk add --no-cache ca-certificates && \ + update-ca-certificates && \ + sed -i 's,http,https,g' /etc/apk/repositories && \ + apk update && apk upgrade && apk add --no-cache curl bash + +# https://github.com/chrislusf/seaweedfs/wiki/Amazon-S3-API#static-configuration +COPY config.json /config.json diff --git a/modules/container-seaweedfs/src/main/docker/assembly.xml b/modules/container-seaweedfs/src/main/docker/assembly.xml new file mode 100644 index 00000000000..1e1d728be5a --- /dev/null +++ b/modules/container-seaweedfs/src/main/docker/assembly.xml @@ -0,0 +1,10 @@ + + + + ${project.basedir}/target/ + seaweedfs + + + diff --git a/modules/container-seaweedfs/src/main/docker/config.json b/modules/container-seaweedfs/src/main/docker/config.json new file mode 100644 index 00000000000..ad18955f042 --- /dev/null +++ b/modules/container-seaweedfs/src/main/docker/config.json @@ -0,0 +1,19 @@ +{ + "identities": [ + { + "name": "anonymous", + "credentials": [ + { + "accessKey": "secret", + "secretKey": "secret" + } + ], + "actions": [ + "Read:dataverse", + "List:dataverse", + "Tagging:dataverse", + "Write:dataverse" + ] + } + ] +} \ No newline at end of file diff --git a/modules/container-solr/.gitignore b/modules/container-solr/.gitignore new file mode 100644 index 00000000000..cea44f1ab64 --- /dev/null +++ b/modules/container-solr/.gitignore @@ -0,0 +1,2 @@ +.flattened-pom.xml +/src/main/docker/*.xml \ No newline at end of file diff --git a/modules/container-solr/README.md b/modules/container-solr/README.md new file mode 100644 index 00000000000..bb4fa454f94 --- /dev/null +++ b/modules/container-solr/README.md @@ -0,0 +1,51 @@ +# Dataverse Solr Image + +The Dataverse Solr Image provides the Solr indexing database used by Dataverse. + +## Quick Reference + +**Maintained by:** + +This image is created, maintained and supported by the Dataverse community on a best-effort basis. + +**Where to find documentation:** + +**Where to get help and ask questions:** + +IQSS will not offer support on how to deploy or run it. Please reach out to the community for help on using it. +You can join the Community Chat on Matrix at https://chat.dataverse.org or the Community Slack at +https://dataversecommunity.slack.com to ask for help and guidance. + +## Supported Image Tags + +This image is sourced within the main upstream code [repository of the Dataverse software](https://github.com/IQSS/dataverse). +Development and maintenance of the [image's code](https://github.com/IQSS/dataverse/tree/develop/modules/container-solr) +happens there (again, by the community). Community-supported image tags are based on the two most important branches: + +- The `unstable` tag corresponds to the `develop` branch, where pull requests are merged. + ([`Dockerfile`](https://github.com/IQSS/dataverse/tree/develop/modules/container-solr/src/main/docker/Dockerfile)) +- The `stable` tag corresponds to the `master` branch, where releases are cut from. + ([`Dockerfile`](https://github.com/IQSS/dataverse/tree/master/modules/container-solr/src/main/docker/Dockerfile)) + +Within the main repository, you may find the base image files at `/modules/container-solr`. +This Maven module uses the [Maven Docker Plugin](https://dmp.fabric8.io) to build and ship the image. +You may use, extend, or alter this image to your liking and/or host in some different registry if you want to. + +**Supported architectures:** This image is created as a "multi-arch image", supporting the most common architectures +Dataverse usually runs on: AMD64 (Windows/Linux/...) and ARM64 (Apple M1/M2). + +## License + +Image content created by the community is licensed under [Apache License, Version 2.0](https://www.apache.org/licenses/LICENSE-2.0), +like the [main Dataverse project](https://github.com/IQSS/dataverse/blob/develop/LICENSE.md). + +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and limitations under the License. + +As with all Docker images, all images likely also contain other software which may be under other licenses (such as +[Payara Server](https://github.com/payara/Payara/blob/master/LICENSE.txt), Bash, etc., from the base +distribution, along with any direct or indirect (Java) dependencies contained). + +As for any pre-built image usage, it is the image user's responsibility to ensure that any use of this image complies +with any relevant licenses for all software contained within. diff --git a/modules/container-solr/pom.xml b/modules/container-solr/pom.xml new file mode 100644 index 00000000000..f233fa49aa0 --- /dev/null +++ b/modules/container-solr/pom.xml @@ -0,0 +1,143 @@ + + + 4.0.0 + + + edu.harvard.iq + dataverse-parent + ${revision} + ../dataverse-parent + + + io.gdcc + container-solr + ${packaging.type} + Container Dataverse Solr + This module provides the Solr database via a container. + + + + poikilotherm + Oliver Bertuch + github@bertuch.eu + Europe/Berlin + + maintainer + + + + + + + + pom + + + + + ct + + docker-build + gdcc/solr:${base.image.tag} + unstable + 1000 + 1000 + + + + + + + + io.fabric8 + docker-maven-plugin + true + + + + base + ${base.image} + + + + linux/arm64 + linux/amd64 + + ${project.build.directory}/buildx-state + + Dockerfile + + ${java.image} + ${base.image.uid} + ${base.image.gid} + + @ + + assembly.xml + + + + + + + + + + org.codehaus.mojo + flatten-maven-plugin + 1.2.7 + + true + oss + + remove + remove + + + + + + flatten + process-resources + + flatten + + + + + flatten.clean + clean + + clean + + + + + + + + maven-install-plugin + + + default-install + install + + install + + + + + + + + + diff --git a/modules/container-solr/src/main/docker/Dockerfile b/modules/container-solr/src/main/docker/Dockerfile new file mode 100644 index 00000000000..bb128fcc776 --- /dev/null +++ b/modules/container-solr/src/main/docker/Dockerfile @@ -0,0 +1,29 @@ +# https://hub.docker.com/_/solr +FROM solr:8.11.2 + +# https://guides.dataverse.org/en/latest/installation/prerequisites.html#solr + +USER root + +RUN apt-get update && \ + apt-get -y upgrade && \ + rm -rf /var/lib/apt/lists/* + +COPY *.xml / + +# increase the number of file descriptors and max processes +RUN echo "solr soft nproc 65000" >> /etc/security/limits.conf && \ + echo "solr hard nproc 65000" >> /etc/security/limits.conf && \ + echo "solr soft nofile 65000" >> /etc/security/limits.conf && \ + echo "solr hard nofile 65000" >> /etc/security/limits.conf + +# increase Header size +RUN sed -i "s/name=\"solr.jetty.request.header.size\" default=\"8192\"/name=\"solr.jetty.request.header.size\" default=\"102400\"/g" /opt/solr/server/etc/jetty.xml + +COPY --chown=solr:solr startup.sh /startup.sh +RUN chmod +x /startup.sh + +# switch back to normal runtime user for security purposes +USER solr + +CMD ["/startup.sh"] diff --git a/modules/container-solr/src/main/docker/startup.sh b/modules/container-solr/src/main/docker/startup.sh new file mode 100644 index 00000000000..aced1a47ed4 --- /dev/null +++ b/modules/container-solr/src/main/docker/startup.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +mkdir -p /var/solr/data/collection1/ +cp -R /opt/solr/server/solr/configsets/_default/conf /var/solr/data/collection1 +cp /*.xml /var/solr/data/collection1/conf/ + +# create collection on startup +echo "name=collection1" > /var/solr/data/collection1/core.properties + +solr-foreground diff --git a/modules/container-traefik/.gitignore b/modules/container-traefik/.gitignore new file mode 100644 index 00000000000..e803b2f0ad1 --- /dev/null +++ b/modules/container-traefik/.gitignore @@ -0,0 +1,3 @@ +.flattened-pom.xml +/src/main/docker/traefik.crt +/src/main/docker/traefik.key \ No newline at end of file diff --git a/modules/container-traefik/README.md b/modules/container-traefik/README.md new file mode 100644 index 00000000000..3914c7defb5 --- /dev/null +++ b/modules/container-traefik/README.md @@ -0,0 +1,52 @@ +# Dataverse Traefik Image + +The Dataverse Traefik Image provides a reverse proxy and TLS certificate encryption +for HTTPS access into the Dataverse application. + +## Quick Reference + +**Maintained by:** + +This image is created, maintained and supported by the Dataverse community on a best-effort basis. + +**Where to find documentation:** + +**Where to get help and ask questions:** + +IQSS will not offer support on how to deploy or run it. Please reach out to the community for help on using it. +You can join the Community Chat on Matrix at https://chat.dataverse.org or the Community Slack at +https://dataversecommunity.slack.com to ask for help and guidance. + +## Supported Image Tags + +This image is sourced within the main upstream code [repository of the Dataverse software](https://github.com/IQSS/dataverse). +Development and maintenance of the [image's code](https://github.com/IQSS/dataverse/tree/develop/modules/container-traefik) +happens there (again, by the community). Community-supported image tags are based on the two most important branches: + +- The `unstable` tag corresponds to the `develop` branch, where pull requests are merged. + ([`Dockerfile`](https://github.com/IQSS/dataverse/tree/develop/modules/container-traefik/src/main/docker/Dockerfile)) +- The `stable` tag corresponds to the `master` branch, where releases are cut from. + ([`Dockerfile`](https://github.com/IQSS/dataverse/tree/master/modules/container-traefik/src/main/docker/Dockerfile)) + +Within the main repository, you may find the base image files at `/modules/container-traefik`. +This Maven module uses the [Maven Docker Plugin](https://dmp.fabric8.io) to build and ship the image. +You may use, extend, or alter this image to your liking and/or host in some different registry if you want to. + +**Supported architectures:** This image is created as a "multi-arch image", supporting the most common architectures +Dataverse usually runs on: AMD64 (Windows/Linux/...) and ARM64 (Apple M1/M2). + +## License + +Image content created by the community is licensed under [Apache License, Version 2.0](https://www.apache.org/licenses/LICENSE-2.0), +like the [main Dataverse project](https://github.com/IQSS/dataverse/blob/develop/LICENSE.md). + +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and limitations under the License. + +As with all Docker images, all images likely also contain other software which may be under other licenses (such as +[Payara Server](https://github.com/payara/Payara/blob/master/LICENSE.txt), Bash, etc., from the base +distribution, along with any direct or indirect (Java) dependencies contained). + +As for any pre-built image usage, it is the image user's responsibility to ensure that any use of this image complies +with any relevant licenses for all software contained within. diff --git a/modules/container-traefik/pom.xml b/modules/container-traefik/pom.xml new file mode 100644 index 00000000000..cade7f68907 --- /dev/null +++ b/modules/container-traefik/pom.xml @@ -0,0 +1,143 @@ + + + 4.0.0 + + + edu.harvard.iq + dataverse-parent + ${revision} + ../dataverse-parent + + + io.gdcc + container-traefik + ${packaging.type} + Container Dataverse Traefik + This module provides the Traefik reverse proxy via a container. + + + + poikilotherm + Oliver Bertuch + github@bertuch.eu + Europe/Berlin + + maintainer + + + + + + + + pom + + + + + ct + + docker-build + gdcc/traefik:${base.image.tag} + unstable + 1000 + 1000 + + + + + + + + io.fabric8 + docker-maven-plugin + true + + + + base + ${base.image} + + + + linux/arm64 + linux/amd64 + + ${project.build.directory}/buildx-state + + Dockerfile + + ${java.image} + ${base.image.uid} + ${base.image.gid} + + @ + + assembly.xml + + + + + + + + + + org.codehaus.mojo + flatten-maven-plugin + 1.2.7 + + true + oss + + remove + remove + + + + + + flatten + process-resources + + flatten + + + + + flatten.clean + clean + + clean + + + + + + + + maven-install-plugin + + + default-install + install + + install + + + + + + + + + diff --git a/modules/container-traefik/src/main/docker/Dockerfile b/modules/container-traefik/src/main/docker/Dockerfile new file mode 100644 index 00000000000..58cbf466a50 --- /dev/null +++ b/modules/container-traefik/src/main/docker/Dockerfile @@ -0,0 +1,18 @@ +# https://hub.docker.com/_/traefik +FROM traefik:v2.8 + +# Add certs to OS truststore +COPY *.crt /usr/local/share/ca-certificates/ +# private key +COPY *.key /usr/local/share/ca-certificates/ + +# use HTTP connection for adding the needed ca-certificates package +# install it and then swap back to HTTPS +RUN sed -i 's,https,http,g' /etc/apk/repositories && \ + apk add --no-cache ca-certificates && \ + update-ca-certificates && \ + sed -i 's,http,https,g' /etc/apk/repositories && \ + apk update && apk upgrade && apk add --no-cache bash curl + +RUN mkdir -p /configuration/ +COPY certificates.yaml /configuration/certificates.yaml diff --git a/modules/container-traefik/src/main/docker/assembly.xml b/modules/container-traefik/src/main/docker/assembly.xml new file mode 100644 index 00000000000..25a5a578d83 --- /dev/null +++ b/modules/container-traefik/src/main/docker/assembly.xml @@ -0,0 +1,10 @@ + + + + ${project.basedir}/target/ + traefik + + + diff --git a/modules/container-traefik/src/main/docker/certificates.yaml b/modules/container-traefik/src/main/docker/certificates.yaml new file mode 100644 index 00000000000..27fc217abf6 --- /dev/null +++ b/modules/container-traefik/src/main/docker/certificates.yaml @@ -0,0 +1,6 @@ +tls: + stores: + default: + defaultCertificate: + certFile: /usr/local/share/ca-certificates/traefik.crt + keyFile: /usr/local/share/ca-certificates/traefik.key \ No newline at end of file diff --git a/scripts/installer/as-setup.sh b/scripts/installer/as-setup.sh index 853db77f471..d3a7d20c9ae 100755 --- a/scripts/installer/as-setup.sh +++ b/scripts/installer/as-setup.sh @@ -97,8 +97,17 @@ function preliminary_setup() ./asadmin $ASADMIN_OPTS create-jvm-options "\-Ddataverse.rserve.user=${RSERVE_USER}" ./asadmin $ASADMIN_OPTS create-jvm-options '\-Ddataverse.rserve.password=${ALIAS=rserve_password_alias}' # The host and url addresses this Dataverse will be using: - ./asadmin $ASADMIN_OPTS create-jvm-options "\-Ddataverse.fqdn=${HOST_ADDRESS}" - ./asadmin $ASADMIN_OPTS create-jvm-options "\-Ddataverse.siteUrl=http\://\${dataverse.fqdn}\:8080" + # https://guides.dataverse.org/en/latest/installation/config.html#dataverse-fqdn + if [[ ! -z "${DATAVERSE_FQDN}" ]]; then + ./asadmin $ASADMIN_OPTS create-jvm-options "\-Ddataverse.fqdn=${DATAVERSE_FQDN}" + else + ./asadmin $ASADMIN_OPTS create-jvm-options "\-Ddataverse.fqdn=${HOST_ADDRESS}" + fi + if [[ ! -z "${DATAVERSE_SITE_URL}" ]]; then + ./asadmin $ASADMIN_OPTS create-jvm-options "\-Ddataverse.siteUrl=${DATAVERSE_SITE_URL}" + else + ./asadmin $ASADMIN_OPTS create-jvm-options "\-Ddataverse.siteUrl=http\://\${dataverse.fqdn}\:8080" + fi # password reset token timeout in minutes ./asadmin $ASADMIN_OPTS create-jvm-options "\-Ddataverse.auth.password-reset-timeout-in-minutes=60"