From b023bdbef2ddeab8ccd7ce791b0bcf944eac3c5c Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Thu, 3 Feb 2022 12:40:29 +0100 Subject: [PATCH 01/29] build: fix wrong XML namespaces in pom.xml --- pom.xml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index d4b4fb7f2d6..b23f6939438 100644 --- a/pom.xml +++ b/pom.xml @@ -1,5 +1,6 @@ - + 4.0.0 + 5.9 + + UTF-8 + -Xdoclint:none + + + UTC + en + US + + -Duser.timezone=${project.timezone} -Dfile.encoding=${project.build.sourceEncoding} -Duser.language=${project.language} -Duser.region=${project.region} + + + 3.8.1 + 3.2.2 + 3.3.2 + 3.2.0 + 3.0.0-M5 + 3.0.0-M5 + + + \ No newline at end of file From f102122175b6d52af845071a9eb132c9639861ec Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Thu, 3 Feb 2022 15:08:26 +0100 Subject: [PATCH 03/29] build(dataverse): make main pom.xml inherit from new dataverse-parent #8394 This includes moving/inheriting some project properties from the parent. --- pom.xml | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/pom.xml b/pom.xml index b23f6939438..4e452301c14 100644 --- a/pom.xml +++ b/pom.xml @@ -2,28 +2,22 @@ 4.0.0 + + + edu.harvard.iq + dataverse-parent + ${revision} + modules/dataverse-parent + + - edu.harvard.iq dataverse - 5.9 war dataverse - UTF-8 - -Xdoclint:none - - - UTC - en - US - - -Duser.timezone=${project.timezone} -Dfile.encoding=${project.build.sourceEncoding} -Duser.language=${project.language} -Duser.region=${project.region} false 11 From c70e75cddb7a5acac7a14550818294903c08e566 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Thu, 3 Feb 2022 15:25:34 +0100 Subject: [PATCH 04/29] build(dataverse): make Maven plugin versions controlled by parent #8394 --- pom.xml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pom.xml b/pom.xml index 4e452301c14..2f280ae4454 100644 --- a/pom.xml +++ b/pom.xml @@ -833,7 +833,7 @@ org.apache.maven.plugins maven-jar-plugin - 2.3 + ${maven-jar-plugin.version} @@ -846,7 +846,7 @@ org.apache.maven.plugins maven-war-plugin - 2.3 + ${maven-war-plugin.version} true false @@ -861,7 +861,7 @@ org.apache.maven.plugins maven-dependency-plugin - 3.2.0 + ${maven-dependency-plugin.version} de.qaware.maven @@ -911,7 +911,7 @@ maven-surefire-plugin - 3.0.0-M5 + ${maven-surefire-plugin.version} ${testsToExclude} @@ -964,7 +964,7 @@ org.apache.maven.plugins maven-failsafe-plugin - 2.22.2 + ${maven-failsafe-plugin.version} testcontainers From 424400b4ca694af2201c4c857f0230c646a5b6c4 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Thu, 3 Feb 2022 15:27:18 +0100 Subject: [PATCH 05/29] chore(jacoco): fix wrongly typed config options of JaCoCo Maven plugin #8394 --- pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index 2f280ae4454..ac137d84a8e 100644 --- a/pom.xml +++ b/pom.xml @@ -877,8 +877,8 @@ jacoco-maven-plugin ${jacoco.version} - ${basedir}/target/coverage-reports/jacoco-unit.exec - ${basedir}/target/coverage-reports/jacoco-unit.exec + ${basedir}/target/coverage-reports/jacoco-unit.exec + ${basedir}/target/coverage-reports/jacoco-unit.exec From 8fb4b9ab6ecc456ac5753a2d1b65ff0c2c9e74d8 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Thu, 3 Feb 2022 15:29:59 +0100 Subject: [PATCH 06/29] build(dataverse): move and to parent #8394 --- modules/dataverse-parent/pom.xml | 82 ++++++++++++++++++++++++++++++ pom.xml | 87 ++------------------------------ 2 files changed, 87 insertions(+), 82 deletions(-) diff --git a/modules/dataverse-parent/pom.xml b/modules/dataverse-parent/pom.xml index 1dd72033991..8a0175efbac 100644 --- a/modules/dataverse-parent/pom.xml +++ b/modules/dataverse-parent/pom.xml @@ -40,4 +40,86 @@ 3.0.0-M5 + + + central + Central Repository + https://repo.maven.apache.org/maven2 + default + + false + + + never + + + + + + + + payara-nexus-artifacts + Payara Nexus Artifacts + https://nexus.payara.fish/repository/payara-artifacts + + true + + + false + + + + + payara-patched-externals + Payara Patched Externals + https://raw.github.com/payara/Payara_PatchedProjects/master + + true + + + false + + + + central-repo + Central Repository + https://repo1.maven.org/maven2 + default + + + prime-repo + PrimeFaces Maven Repository + https://repository.primefaces.org + default + + + dataone.org + https://maven.dataone.org + + true + + + true + + + + dvn.private + Local repository for hosting jars not available from network repositories. + file://${project.basedir}/local_lib + + + + \ No newline at end of file diff --git a/pom.xml b/pom.xml index ac137d84a8e..788171232d5 100644 --- a/pom.xml +++ b/pom.xml @@ -43,88 +43,11 @@ 1.20.1 0.8.7 - - - central - Central Repository - https://repo.maven.apache.org/maven2 - default - - false - - - never - - - - - - - payara-nexus-artifacts - Payara Nexus Artifacts - https://nexus.payara.fish/repository/payara-artifacts - - true - - - false - - - - - payara-patched-externals - Payara Patched Externals - https://raw.github.com/payara/Payara_PatchedProjects/master - - true - - - false - - - - central-repo - Central Repository - https://repo1.maven.org/maven2 - default - - - prime-repo - PrimeFaces Maven Repository - https://repository.primefaces.org - default - - - dataone.org - https://maven.dataone.org - - true - - - true - - - - dvn.private - Local repository for hosting jars not available from network repositories. - file://${project.basedir}/local_lib - - - - + + From 6089cbb665bf59d83ac4773cabae7f3bbb628305 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Thu, 3 Feb 2022 15:54:15 +0100 Subject: [PATCH 07/29] build(dataverse): move common version properties to parent #8394 --- modules/dataverse-parent/pom.xml | 29 ++++++++++++++++++++++++++++- pom.xml | 19 ------------------- 2 files changed, 28 insertions(+), 20 deletions(-) diff --git a/modules/dataverse-parent/pom.xml b/modules/dataverse-parent/pom.xml index 8a0175efbac..569050c49ce 100644 --- a/modules/dataverse-parent/pom.xml +++ b/modules/dataverse-parent/pom.xml @@ -17,7 +17,8 @@ 5.9 - + + 11 UTF-8 -Xdoclint:none @@ -31,6 +32,32 @@ --> -Duser.timezone=${project.timezone} -Dfile.encoding=${project.build.sourceEncoding} -Duser.language=${project.language} -Duser.region=${project.region} + + 5.2021.5 + 42.2.19 + 8.8.1 + 1.11.762 + 0.157.0 + + + 8.0.0 + 1.7.35 + 2.11.0 + 1.2 + 3.12.0 + 1.21 + 4.5.13 + 4.4.14 + + + 1.15.0 + 0.4.1 + + 4.13.1 + 5.7.0 + ${junit.jupiter.version} + 2.28.2 + 3.8.1 3.2.2 diff --git a/pom.xml b/pom.xml index 788171232d5..7db550694b3 100644 --- a/pom.xml +++ b/pom.xml @@ -19,26 +19,7 @@ dataverse false - - 11 - 8.0.0 - 5.2021.5 - 42.2.19 - 1.11.762 - 1.7.35 1.2.18.4 - 2.11.0 - 1.2 - 3.12.0 - 4.5.13 - 4.4.14 - 0.157.0 - 4.13.1 - 5.7.0 - ${junit.jupiter.version} - 1.15.0 - 0.4.1 - 2.28.2 5.2.4 1.20.1 0.8.7 From 0ad680c38b4b55ee23662dab31b2fea1337c096d Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Thu, 3 Feb 2022 15:55:10 +0100 Subject: [PATCH 08/29] build(dataverse): make SolrJ dependency use a version from parent #8394 --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 7db550694b3..5f5f7334bd3 100644 --- a/pom.xml +++ b/pom.xml @@ -333,7 +333,7 @@ org.apache.solr solr-solrj - 8.8.1 + ${solr.version} colt From c56e901d23f8cca3c11cec604d3929b3e236d04a Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Thu, 3 Feb 2022 15:58:04 +0100 Subject: [PATCH 09/29] build(dataverse): move common entries from to parent #8394 --- modules/dataverse-parent/pom.xml | 109 +++++++++++++++++++++++++++++++ pom.xml | 98 +-------------------------- 2 files changed, 112 insertions(+), 95 deletions(-) diff --git a/modules/dataverse-parent/pom.xml b/modules/dataverse-parent/pom.xml index 569050c49ce..1a56054a559 100644 --- a/modules/dataverse-parent/pom.xml +++ b/modules/dataverse-parent/pom.xml @@ -13,6 +13,115 @@ ../../pom.xml + + + + + + + + + fish.payara.api + payara-bom + ${payara.version} + pom + import + + + com.amazonaws + aws-java-sdk-bom + ${aws.version} + pom + import + + + com.google.cloud + google-cloud-bom + ${google.cloud.version} + pom + import + + + + + + commons-logging + commons-logging + ${commons.logging.version} + + + org.apache.commons + commons-lang3 + ${commons.lang3.version} + + + + org.apache.httpcomponents + httpclient + ${apache.httpcomponents.client.version} + + + + org.apache.httpcomponents + httpmime + ${apache.httpcomponents.client.version} + + + + org.apache.httpcomponents + httpcore + ${apache.httpcomponents.core.version} + + + + + commons-io + commons-io + ${commons.io.version} + + + + org.apache.commons + commons-compress + ${commons.compress.version} + + + + + org.slf4j + slf4j-api + ${slf4j.version} + + + org.slf4j + slf4j-jdk14 + ${slf4j.version} + + + + org.slf4j + slf4j-log4j12 + ${slf4j.version} + + + org.slf4j + jcl-over-slf4j + ${slf4j.version} + + + + + + org.testcontainers + testcontainers-bom + ${testcontainers.version} + import + pom + + + diff --git a/pom.xml b/pom.xml index 5f5f7334bd3..62dfa73a28a 100644 --- a/pom.xml +++ b/pom.xml @@ -31,76 +31,6 @@ --> - - fish.payara.api - payara-bom - ${payara.version} - pom - import - - - com.amazonaws - aws-java-sdk-bom - ${aws.version} - pom - import - - - commons-logging - commons-logging - ${commons.logging.version} - - - org.apache.commons - commons-lang3 - ${commons.lang3.version} - - - - org.apache.httpcomponents - httpclient - ${apache.httpcomponents.client.version} - - - - org.apache.httpcomponents - httpmime - ${apache.httpcomponents.client.version} - - - - org.apache.httpcomponents - httpcore - ${apache.httpcomponents.core.version} - - - com.google.cloud - google-cloud-bom - ${google.cloud.version} - pom - import - - - org.testcontainers - testcontainers-bom - ${testcontainers.version} - import - pom - - - - - commons-io - commons-io - ${commons.io.version} - - - - org.apache.commons - commons-compress - 1.21 - - org.apache.abdera @@ -112,29 +42,6 @@ abdera-i18n 1.1.3 - - - - org.slf4j - slf4j-api - ${slf4j.version} - - - org.slf4j - slf4j-jdk14 - ${slf4j.version} - - - org.slf4j - jcl-over-slf4j - ${slf4j.version} - - - org.slf4j - jul-to-slf4j - ${slf4j.version} - - + org.apache.commons commons-lang3 - + @@ -592,6 +499,7 @@ com.google.cloud google-cloud-storage + From 477a180465e38a690a5de4c9da227ef9ae445ca6 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Thu, 3 Feb 2022 17:20:26 +0100 Subject: [PATCH 10/29] ci(unit): make Github Action for Maven Unit Tests be triggered on POM changes #8394 --- .github/workflows/maven_unit_test.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/maven_unit_test.yml b/.github/workflows/maven_unit_test.yml index 70363112050..1e5bba3ce1c 100644 --- a/.github/workflows/maven_unit_test.yml +++ b/.github/workflows/maven_unit_test.yml @@ -4,9 +4,13 @@ on: push: paths: - "**.java" + - "pom.xml" + - "modules/**/pom.xml" pull_request: paths: - "**.java" + - "pom.xml" + - "modules/**/pom.xml" jobs: unittest: From 19b9becabc5905130382ae3a3ec3e9873b6e76d0 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Mon, 7 Feb 2022 13:49:34 +0100 Subject: [PATCH 11/29] build(dataverse): move postgresql lib versioning to parent #8394 --- modules/dataverse-parent/pom.xml | 8 +++++++- pom.xml | 1 - 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/modules/dataverse-parent/pom.xml b/modules/dataverse-parent/pom.xml index 1a56054a559..24e602f8033 100644 --- a/modules/dataverse-parent/pom.xml +++ b/modules/dataverse-parent/pom.xml @@ -44,7 +44,12 @@ - + + + org.postgresql + postgresql + ${postgresql.version} + commons-logging commons-logging @@ -174,6 +179,7 @@ 3.2.0 3.0.0-M5 3.0.0-M5 + 3.3.0 diff --git a/pom.xml b/pom.xml index 62dfa73a28a..2feba7e9cf5 100644 --- a/pom.xml +++ b/pom.xml @@ -144,7 +144,6 @@ org.postgresql postgresql - ${postgresql.version} org.flywaydb From 66461a9feb850ac34d0c3bc98999260195f3562a Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Mon, 7 Feb 2022 13:51:02 +0100 Subject: [PATCH 12/29] style(zipdownloader): fix accidental tab char usage --- .../custom/service/download/ZipDownloadService.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/zipdownload/src/main/java/edu/harvard/iq/dataverse/custom/service/download/ZipDownloadService.java b/scripts/zipdownload/src/main/java/edu/harvard/iq/dataverse/custom/service/download/ZipDownloadService.java index 4b66ee770d5..db7eb7b8428 100644 --- a/scripts/zipdownload/src/main/java/edu/harvard/iq/dataverse/custom/service/download/ZipDownloadService.java +++ b/scripts/zipdownload/src/main/java/edu/harvard/iq/dataverse/custom/service/download/ZipDownloadService.java @@ -148,9 +148,9 @@ public void processFiles() { String zipEntryName = checkZipEntryName(fileName, fileNamesList); // this may not be needed anymore - some extra sanitizing of the file // name we used to have to do - since all the values in a current Dataverse - // database may already be santized enough. - // (Edit: Yes, we still need this - there are still datasets with multiple - // files with duplicate names; this method takes care of that) + // database may already be santized enough. + // (Edit: Yes, we still need this - there are still datasets with multiple + // files with duplicate names; this method takes care of that) if (inputStream != null && this.zipOutputStream != null) { ZipEntry entry = new ZipEntry(zipEntryName); From ea59f8ae766d038d6e529b3abc109fbbb6a7f308 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Mon, 7 Feb 2022 13:53:25 +0100 Subject: [PATCH 13/29] build(zipdownloader): make zipdownloader use parent POM #8394 --- modules/dataverse-parent/pom.xml | 1 + scripts/zipdownload/pom.xml | 55 ++++++++------------------------ 2 files changed, 15 insertions(+), 41 deletions(-) diff --git a/modules/dataverse-parent/pom.xml b/modules/dataverse-parent/pom.xml index 24e602f8033..b216772a153 100644 --- a/modules/dataverse-parent/pom.xml +++ b/modules/dataverse-parent/pom.xml @@ -12,6 +12,7 @@ ../../pom.xml + ../../scripts/zipdownload org.postgresql postgresql - 42.2.2 com.amazonaws @@ -58,10 +32,9 @@ maven-compiler-plugin - 3.1 + ${maven-compiler-plugin.version} - 1.8 - 1.8 + ${target.java.version} From 41e80db1db922935b4efda21e8b0dc54881fa94a Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Mon, 7 Feb 2022 13:58:42 +0100 Subject: [PATCH 14/29] refactor(zipdownloader): replace Maven Assembly Plugin with Spring Boot #8394 The Spring Boot method to create an executable fat/uber JAR is offering a much better experience than what Maven Assembly does. It provides a custom bootloader to load JARs from the JAR, which means less potential for conflicts etc. It also offers options to be integrated in other workflows like container images etc. The package is now about 8.2 MB in size, which is negligible compared to 8.0 MB via the Assembly plugin. --- scripts/zipdownload/pom.xml | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/scripts/zipdownload/pom.xml b/scripts/zipdownload/pom.xml index 3e48b3d273f..16c719e31f5 100644 --- a/scripts/zipdownload/pom.xml +++ b/scripts/zipdownload/pom.xml @@ -38,21 +38,22 @@ - org.apache.maven.plugins - maven-assembly-plugin - 2.4 - - - - edu.harvard.iq.dataverse.custom.service.download.ZipDownloadService - - - - jar-with-dependencies - - ${project.artifactId}-v${project.version} - false - + org.springframework.boot + spring-boot-maven-plugin + 2.6.3 + + + + repackage + + + + + edu.harvard.iq.dataverse.custom.service.download.ZipDownloadService + + + + From febc61db21d3e5c52b9728640fa39d9a1f7049bd Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Wed, 9 Feb 2022 11:21:54 +0100 Subject: [PATCH 15/29] build(deps): re-add SLF4J changes from #8377 in parent #8394 --- modules/dataverse-parent/pom.xml | 6 ++---- pom.xml | 2 +- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/modules/dataverse-parent/pom.xml b/modules/dataverse-parent/pom.xml index b216772a153..a90245c03fd 100644 --- a/modules/dataverse-parent/pom.xml +++ b/modules/dataverse-parent/pom.xml @@ -104,16 +104,14 @@ slf4j-jdk14 ${slf4j.version} - org.slf4j - slf4j-log4j12 + jcl-over-slf4j ${slf4j.version} org.slf4j - jcl-over-slf4j + jul-to-slf4j ${slf4j.version} diff --git a/pom.xml b/pom.xml index 2feba7e9cf5..5289ddc44a3 100644 --- a/pom.xml +++ b/pom.xml @@ -59,7 +59,7 @@ slf4j-jdk14 - + org.passay passay 1.6.0 From fbdf5c1736141e326821d0c9fea75ebcf085def7 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Wed, 9 Feb 2022 11:40:53 +0100 Subject: [PATCH 16/29] build(modules): move Maven plugin versioning to parent #8394 --- modules/dataverse-parent/pom.xml | 52 ++++++++++++++++++++++++++++++++ pom.xml | 20 ++---------- scripts/zipdownload/pom.xml | 1 - 3 files changed, 54 insertions(+), 19 deletions(-) diff --git a/modules/dataverse-parent/pom.xml b/modules/dataverse-parent/pom.xml index a90245c03fd..4b117866eb3 100644 --- a/modules/dataverse-parent/pom.xml +++ b/modules/dataverse-parent/pom.xml @@ -171,6 +171,8 @@ ${junit.jupiter.version} 2.28.2 + 9.3 + 3.8.1 3.2.2 @@ -179,6 +181,7 @@ 3.0.0-M5 3.0.0-M5 3.3.0 + 3.1.2 @@ -196,6 +199,55 @@ + + + + + org.apache.maven.plugins + maven-compiler-plugin + ${maven-compiler-plugin.version} + + + org.apache.maven.plugins + maven-jar-plugin + ${maven-jar-plugin.version} + + + org.apache.maven.plugins + maven-war-plugin + ${maven-war-plugin.version} + + + org.apache.maven.plugins + maven-dependency-plugin + ${maven-dependency-plugin.version} + + + org.apache.maven.plugins + maven-surefire-plugin + ${maven-surefire-plugin.version} + + + org.apache.maven.plugins + maven-failsafe-plugin + ${maven-failsafe-plugin.version} + + + org.apache.maven.plugins + maven-checkstyle-plugin + ${maven-checkstyle-plugin.version} + + + com.puppycrawl.tools + checkstyle + ${checkstyle.version} + + + + + + + diff --git a/pom.xml b/pom.xml index 5289ddc44a3..1d59e8cfb35 100644 --- a/pom.xml +++ b/pom.xml @@ -634,7 +634,6 @@ org.apache.maven.plugins maven-compiler-plugin - 3.8.1 ${target.java.version} @@ -644,7 +643,6 @@ org.apache.maven.plugins maven-jar-plugin - ${maven-jar-plugin.version} @@ -657,7 +655,6 @@ org.apache.maven.plugins maven-war-plugin - ${maven-war-plugin.version} true false @@ -669,11 +666,6 @@ - - org.apache.maven.plugins - maven-dependency-plugin - ${maven-dependency-plugin.version} - de.qaware.maven go-offline-maven-plugin @@ -720,10 +712,10 @@ - + org.apache.maven.plugins maven-surefire-plugin - ${maven-surefire-plugin.version} + ${testsToExclude} ${skipUnitTests} @@ -732,19 +724,11 @@ org.apache.maven.plugins maven-checkstyle-plugin - 3.1.2 checkstyle.xml UTF-8 true - - - com.puppycrawl.tools - checkstyle - 8.42 - - diff --git a/scripts/zipdownload/pom.xml b/scripts/zipdownload/pom.xml index 16c719e31f5..13f7a056cc9 100644 --- a/scripts/zipdownload/pom.xml +++ b/scripts/zipdownload/pom.xml @@ -32,7 +32,6 @@ maven-compiler-plugin - ${maven-compiler-plugin.version} ${target.java.version} From 452feea85b9d18e56fd5c9f3e9b160fc878b6c62 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Thu, 10 Feb 2022 17:45:00 +0100 Subject: [PATCH 17/29] docs(dev): rework dependencies.rst and add parent POM #8394 This commit adds a lot of new terms used to understand how parent POMs work. It also introduces many graphs to visualize the described. The parent POM used within the Dataverse codebase is explained: what it does, how it's connected to other parts etc. --- .../source/developers/dependencies.rst | 238 +++++++++++++++--- 1 file changed, 207 insertions(+), 31 deletions(-) diff --git a/doc/sphinx-guides/source/developers/dependencies.rst b/doc/sphinx-guides/source/developers/dependencies.rst index 564d47c5972..46fe397bc5a 100644 --- a/doc/sphinx-guides/source/developers/dependencies.rst +++ b/doc/sphinx-guides/source/developers/dependencies.rst @@ -3,16 +3,21 @@ Dependency Management ===================== .. contents:: |toctitle| - :local: + :local: -The Dataverse Software is (currently) a Jakarta EE 8 based application, that uses a lot of additional libraries for special purposes. -This includes features like support for SWORD-API, S3 storage and many others. +The Dataverse Software is (currently) a Jakarta EE 8 based application, that uses a lot of additional libraries for +special purposes. This includes features like support for SWORD-API, S3 storage and many others. + +Besides the code that glues together individual pieces, any developer needs to describe used dependencies for the +Maven-based build system. As is familiar to any Maven user, this happens inside the "Project Object Model" (POM). + +Recursive and convergent dependency resolution makes dependency management with Maven quite easy. But sometimes, in +projects with many complex dependencies like the Dataverse software, you have to help Maven make the right choices. + +Maven can foster good development practices by enabling modulithic (modular monolithic) architecture: splitting +functionalities into different Maven submodules while expressing dependencies between them. But there's more: the +parent-child model allows to create consistent dependency versioning (see below) within childs. -Besides the code that glues together the single pieces, any developer needs to describe used dependencies for the -Maven-based build system. As is familiar to any Maven user, this happens inside the "Project Object Model" (POM) living in -``pom.xml`` at the root of the project repository. Recursive and convergent dependency resolution makes dependency -management with Maven very easy. But sometimes, in projects with many complex dependencies like the Dataverse Software, you have -to help Maven make the right choices. Terms ----- @@ -23,7 +28,7 @@ As a developer, you should familiarize yourself with the following terms: - **Transitive dependencies**: things *others use* for things you use, pulled in recursively. See also: `Maven docs `_. -.. graphviz:: + .. graphviz:: digraph { rankdir="LR"; @@ -44,6 +49,93 @@ As a developer, you should familiarize yourself with the following terms: yc -> dtz; } +- **Project Object Model** (POM): the basic XML file unit to describe a Maven-based project. +- **Bill Of Materials** (BOM): larger projects like Payara, Amazon SDK etc provide lists of their direct dependencies. + This comes in handy when adding these dependencies (transitive for us) as direct dependencies, see below. + + .. graphviz:: + + digraph { + rankdir="TD"; + node [fontsize=10] + edge [fontsize=8] + + msp [label="Maven Super POM"] + sp [label="Your POM"] + bom [label="Some BOM"] + td [label="Direct & Transitive\nDependency"] + + msp -> sp [label="inherit", dir="back"]; + bom -> sp [label="import", dir="back"]; + bom -> td [label="depend on"]; + sp -> td [label="depend on\n(same version)", constraint=false]; + } + +- **Parent POM**, **Super POM**: any project may be a child of a parent. + + Project silently inherit from a "super POM", which is the global Maven standard parent POM. + Children may also be aggregated by a parent (without them knowing) for convenient builds of larger projects. + + .. graphviz:: + + digraph { + rankdir="TD"; + node [fontsize=10] + edge [fontsize=8] + + msp [label="Maven Super POM"] + ap [label="Any POM"] + msp -> ap [label="inherit", dir="back"]; + + pp [label="Parent 1 POM"] + cp1 [label="Submodule 1 POM"] + cp2 [label="Submodule 2 POM"] + + msp -> pp [label="inherit", dir="back", constraint=false]; + pp -> cp1 [label="aggregate"]; + pp -> cp2 [label="aggregate"]; + } + + Children may inherit dependencies, properties, settings, plugins etc. from the parent (making it possible to share + common grounds). Both approaches may be combined. Children may import as many BOMs as they want, but can have only a + single parent to inherit from at a time. + + .. graphviz:: + + digraph { + rankdir="TD"; + node [fontsize=10] + edge [fontsize=8] + + msp [label="Maven Super POM"] + pp [label="Parent POM"] + cp1 [label="Submodule 1 POM"] + cp2 [label="Submodule 2 POM"] + + msp -> pp [label="inherit", dir="back", constraint=false]; + pp -> cp1 [label="aggregate"]; + pp -> cp2 [label="aggregate"]; + cp1 -> pp [label="inherit"]; + cp2 -> pp [label="inherit"]; + + d [label="Dependency"] + pp -> d [label="depends on"] + cp1 -> d [label="inherit:\ndepends on", style=dashed]; + cp2 -> d [label="inherit:\ndepends on", style=dashed]; + } + +- **Modules**: when using parents and children, these are called "modules" officially, each having their own POM. + + Using modules allows bundling different aspects of (Dataverse) software in their own domains, with their own + behaviour, dependencies etc. Parent modules allow for sharing of common settings, properties, dependencies and more. + Submodules may also be used as parent modules for a lower level of submodules. + + Maven modules within the same software project may also depend on each other, allowing to create complex structures + of packages and projects. Each module may be released on their own (e. g. on Maven Central) and other projects may + rely on and reuse them. This is especially useful for parent POMs: they may be reused as BOMs or to share a standard + between independent software projects. + + Direct dependencies ------------------- @@ -62,9 +154,8 @@ Within the POM, any direct dependencies reside within the ```` tag Anytime you add a ````, Maven will try to fetch it from defined/configured repositories and use it -within the build lifecycle. You have to define a ````, but ```` is optional for ``compile``. -(See `Maven docs: Dep. Scope `_) - +within the build lifecycle. You have to define a ```` (note exception below), but ```` is optional for +``compile``. (See `Maven docs: Dep. Scope `_) During fetching, Maven will analyse all transitive dependencies (see graph above) and, if necessary, fetch those, too. Everything downloaded once is cached locally by default, so nothing needs to be fetched again and again, as long as the @@ -73,13 +164,24 @@ dependency definition does not change. **Rules to follow:** 1. You should only use direct dependencies for **things you are actually using** in your code. -2. **Clean up** direct dependencies no longer in use. It will bloat the deployment package otherwise! -3. Care about the **scope**. Do not include "testing only" dependencies in the package - it will hurt you in IDEs and bloat things. [#f1]_ -4. Avoid using different dependencies for the **same purpose**, e. g. different JSON parsing libraries. -5. Refactor your code to **use Jakarta EE** standards as much as possible. -6. When you rely on big SDKs or similar big cool stuff, try to **include the smallest portion possible**. Complete SDK +2. When declaring a direct dependency with its **version** managed by ````, a BOM or parent POM, you + may not provide one unless you want to explicitly override! +3. **Clean up** direct dependencies no longer in use. It will bloat the deployment package otherwise! +4. Care about the **scope** [#f1]_: + + * Do not include "testing only" dependencies in the final package - it will hurt you in IDEs and bloat things. + There is scope ``test`` for this! + * Make sure to use the ``runtime`` scope when you need to ensure a library is present on our classpath at runtime. + An example is the SLF4J JUL bridge: we want to route logs from SLF4J into ``java.util.logging``, so it needs to be + present on the classpath, although we aren't using SLF4J unlike some of our dependencies. + * Some dependencies might be ``provided`` by the runtime environment. Good example: everything from Jakarta EE! + We use the Payara BOM to ensure using the same version during development and runtime. + +5. Avoid using different dependencies for the **same purpose**, e. g. different JSON parsing libraries. +6. Refactor your code to **use Jakarta EE** standards as much as possible. +7. When you rely on big SDKs or similar big cool stuff, try to **include the smallest portion possible**. Complete SDK bundles are typically heavyweight and most of the time unnecessary. -7. **Don't include transitive dependencies.** [#f2]_ +8. **Don't include transitive dependencies.** [#f2]_ * Exception: if you are relying on it in your code (see *Z* in the graph above), you must declare it. See below for proper handling in these (rare) cases. @@ -92,8 +194,8 @@ Maven is comfortable for developers; it handles recursive resolution, downloadin However, as life is a box of chocolates, you might find yourself in *version conflict hell* sooner than later without even knowing, but experiencing unintended side effects. -When you look at the graph above, imagine *B* and *TB* rely on different *versions* of *TC*. How does Maven decide -which version it will include? Easy: the dependent version of the nearest version wins: +When you look at the topmost graph above, imagine *B* and *TB* rely on different *versions* of *TC*. How does Maven +decide which version it will include? Easy: the dependent version of the nearest version wins: .. graphviz:: @@ -110,8 +212,8 @@ which version it will include? Easy: the dependent version of the nearest versio yc -> dtz2; } -In this case, version "2.0" will be included. If you know something about semantic versioning, a red alert should ring in your mind right now. -How do we know that *B* is compatible with *Z v2.0* when depending on *Z v1.0*? +In this case, version "2.0" will be included. If you know something about semantic versioning, a red alert should ring +in your mind right now. How do we know that *B* is compatible with *Z v2.0* when depending on *Z v1.0*? Another scenario getting us in trouble: indirect use of transitive dependencies. Imagine the following: we rely on *Z* in our code, but do not include a direct dependency for it within the POM. Now *B* is updated and removed its dependency @@ -130,15 +232,24 @@ Managing transitive dependencies in ``pom.xml`` Maven can manage versions of transitive dependencies in four ways: -1. Make a transitive-only dependency not used in your code a direct one and add a ```` tag. - Typically a bad idea, don't do that. -2. Use ```` or ```` tags on direct dependencies that request the transitive dependency. - *Last resort*, you really should avoid this. Not explained or used here. - `See Maven docs `_. -3. Explicitly declare the transitive dependency in ```` and add a ```` tag. -4. For more complex transitive dependencies, reuse a "Bill of Materials" (BOM) within ```` - and add a ```` tag. Many bigger and standard use projects provide those, making the POM much less bloated - compared to adding every bit yourself. +.. list-table:: + :align: left + :stub-columns: 1 + :widths: 12 40 40 + + * - Safe Good Practice + - (1) Explicitly declare the transitive dependency in ```` with a ```` tag. + - (2) For more complex transitive dependencies, reuse a "Bill of Materials" (BOM) within ````. + Many bigger and standard use projects provide those, making the POM much less bloated compared to adding every bit yourself. + * - Better Avoid or Don't + - (3) Use ```` or ```` tags on direct dependencies that request the transitive dependency. + *Last resort*, you really should avoid this. Not explained or used here, but sometimes unavoidable. + `See Maven docs `_. + - (4) Make a transitive-only dependency not used in your code a direct one and add a ```` tag. + Typically a bad idea, don't do that. + +**Note:** when the same transitive dependency is used in multiple Maven modules of a software project, it might be added +to a common ```` section of an inherited parent POM instead. (Overrides are still possible.) A reduced example, only showing bits relevant to the above cases and usage of an explicit transitive dep directly: @@ -262,6 +373,71 @@ Typically you will skip the addition of the central repository, but adding it to dependencies are first looked up there (which in theory can speed up downloads). You should keep in mind that repositories are used in the order they appear. + +Dataverse Parent POM +-------------------- + +Within ``modules/dataverse-parent`` you may find the parent POM for the Dataverse codebase. It serves for different +purposes: + +1. Provide the common version number for a Dataverse release (may be overriden where necessary) +2. Provide common metadata necessary for releasing modules to repositories like Maven Central +3. Declare aggregated submodules via ````. +4. Collate common BOMs and transitive dependencies within ````. + (Remember: a direct dependency declaration may omit the version element when defined in that area!) +5. Collect common ```` regarding the Maven project (encoding, ...), dependency versions, target Java version, etc. +6. Gather common ```` and ```` - no need to repeat those in submodules. +7. Make submodules use current Maven plugin release versions via ````. + +As of writing this 2022-02-10, our parent module looks like this: + +.. graphviz:: + + digraph { + rankdir="TD"; + node [fontsize=10] + edge [fontsize=8] + + dvp [label="Dataverse Parent"] + dvw [label="Submodule:\nDataverse WAR"] + zip [label="Submodule:\nZipdownloader JAR"] + + dvw -> dvp [label="inherit"]; + dvp -> dvw [label="aggregate"]; + zip -> dvp [label="inherit"]; + dvp -> zip [label="aggregate"]; + + pay [label="Payara BOM"] + aws [label="AWS SDK BOM"] + ggl [label="Googe Cloud BOM"] + tc [label="Testcontainers BOM"] + td [label="Multiple (transitive) dependencies\n(PSQL, Logging, Apache Commons, ...)"] + + dvp -> td [label="manage"]; + + pay -> dvp [label="import", dir="back"]; + aws -> dvp [label="import", dir="back"]; + ggl -> dvp [label="import", dir="back"]; + tc -> dvp [label="import", dir="back"]; + + } + +The codebase is structured like this: + +.. code-block:: + + # Dataverse WAR Module + ├── modules # + │ └── dataverse-parent # Dataverse Parent Module + ├── pom.xml # (POM file of WAR module) + └── scripts # + └── zipdownload # Zipdownloader JAR Module + +- Any developer cloning the project and running ``mvn`` within the project root will interact with the Dataverse WAR + module, which is the same behaviour since Dataverse 4.0 has been released. +- Running ``mvn`` targets within the parent module will execute all aggregated submodules in one go. + + ---- .. rubric:: Footnotes From b4166180669007e7fef2db7e36e48c108e401bc7 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Thu, 10 Feb 2022 18:12:57 +0100 Subject: [PATCH 18/29] build(zipdownloader): change version number to experimental #8394 This is in accordance with @landreev --- scripts/zipdownload/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/zipdownload/pom.xml b/scripts/zipdownload/pom.xml index 13f7a056cc9..efd028c1089 100644 --- a/scripts/zipdownload/pom.xml +++ b/scripts/zipdownload/pom.xml @@ -11,7 +11,7 @@ zipdownloader - 1.0.0 + 0.0.1 jar From c91c0713d5ff0ab3762568ae6da1024b811e6db8 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Thu, 10 Feb 2022 18:13:21 +0100 Subject: [PATCH 19/29] refactor(zipdownloader): adapt cgi-bin script to name and version of JAR #8394 --- scripts/zipdownload/cgi-bin/zipdownload | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/zipdownload/cgi-bin/zipdownload b/scripts/zipdownload/cgi-bin/zipdownload index c2a9db8ca9b..ff9d6a53a24 100644 --- a/scripts/zipdownload/cgi-bin/zipdownload +++ b/scripts/zipdownload/cgi-bin/zipdownload @@ -8,4 +8,4 @@ PGUSER="dvnapp"; export PGUSER PGDB="dvndb"; export PGDB PGPW="xxxxx"; export PGPW -java -Ddb.serverName=$PGHOST -Ddb.portNumber=$PGPORT -Ddb.user=$PGUSER -Ddb.databaseName=$PGDB -Ddb.password=$PGPW -jar ZipDownloadService-v1.0.0.jar \ No newline at end of file +java -Ddb.serverName=$PGHOST -Ddb.portNumber=$PGPORT -Ddb.user=$PGUSER -Ddb.databaseName=$PGDB -Ddb.password=$PGPW -jar zipdownloader-0.0.1.jar \ No newline at end of file From 429ec3fbd4c64ac8fccce2c88fb86395d41c5be7 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Thu, 10 Feb 2022 18:16:44 +0100 Subject: [PATCH 20/29] docs(zipdownloader): refactor slightly for name and version, crosslink #8394 --- .../source/installation/advanced.rst | 64 ++++++++++--------- .../source/installation/config.rst | 7 +- 2 files changed, 38 insertions(+), 33 deletions(-) diff --git a/doc/sphinx-guides/source/installation/advanced.rst b/doc/sphinx-guides/source/installation/advanced.rst index 6e6d24b0526..adb59941968 100644 --- a/doc/sphinx-guides/source/installation/advanced.rst +++ b/doc/sphinx-guides/source/installation/advanced.rst @@ -68,43 +68,45 @@ Once a standardized version of you Custom Terms are registered as a license, an Optional Components ------------------- +.. _zipdownloader: + Standalone "Zipper" Service Tool ++++++++++++++++++++++++++++++++ -As of Dataverse Software 5.0 we offer an experimental optimization for the multi-file, download-as-zip functionality. If this option -(``:CustomZipDownloadServiceUrl``) is enabled, instead of enforcing -the size limit on multi-file zipped downloads (as normally specified -by the option ``:ZipDownloadLimit``), we attempt to serve all the -files that the user requested (that they are authorized to download), -but the request is redirected to a standalone zipper service running -as a cgi-bin executable under Apache. Thus moving these potentially -long-running jobs completely outside the Application Server (Payara); -and preventing worker threads from becoming locked serving them. Since -zipping is also a CPU-intensive task, it is possible to have this -service running on a different host system, freeing the cycles on the -main Application Server. (The system running the service needs to have -access to the database as well as to the storage filesystem, and/or S3 -bucket). - -Please consult the scripts/zipdownload/README.md in the Dataverse Software 5.0+ source tree for more information. - -To install: You can follow the instructions in the file above to build -``ZipDownloadService-v1.0.0.jar``. It will also be available, pre-built as part of the Dataverse Software 5.0 release on GitHub. Copy it, together with the shell -script scripts/zipdownload/cgi-bin/zipdownload to the cgi-bin -directory of the chosen Apache server (/var/www/cgi-bin standard). - -Make sure the shell script (zipdownload) is executable, and edit it to configure the -database access credentials. Do note that the executable does not need -access to the entire Dataverse installation database. A security-conscious admin -can create a dedicated database user with access to just one table: -``CUSTOMZIPSERVICEREQUEST``. - -You may need to make extra Apache configuration changes to make sure /cgi-bin/zipdownload is accessible from the outside. -For example, if this is the same Apache that's in front of your Dataverse installation Payara instance, you will need to add another pass through statement to your configuration: +As of Dataverse Software 5.0 we offer an **experimental** optimization for the multi-file, download-as-zip functionality. +If this option (``:CustomZipDownloadServiceUrl``) is enabled, instead of enforcing the size limit on multi-file zipped +downloads (as normally specified by the option ``:ZipDownloadLimit``), we attempt to serve all the files that the user +requested (that they are authorized to download), but the request is redirected to a standalone zipper service running +as a cgi-bin executable under Apache. + +Thus moving these potentially long-running jobs completely outside the Application Server (Payara); and preventing +worker threads from becoming locked serving them. Since zipping is also a CPU-intensive task, it is possible to have +this service running on a different host system, freeing the cycles on the main Application Server. (The system running +the service needs to have access to the database as well as to the storage filesystem, and/or S3 bucket). + +Please consult the `README at scripts/zipdownload `_ +in the Dataverse Software 5.0+ source tree for more information. + +To install: + +1. Follow the instructions in the file above to build ``zipdownloader-0.0.1.jar``. (Also available from + `zipper.zip `_ of the + `Dataverse Software 5.0 release on GitHub `_). +2. Copy it, together with the shell script :download:`cgi-bin/zipdownload <../../../../scripts/zipdownload/cgi-bin/zipdownload>` + to the ``cgi-bin`` directory of the chosen Apache server (/var/www/cgi-bin standard). +3. Make sure the shell script (``zipdownload``) is executable, and edit it to configure the database access credentials. + Do note that the executable does not need access to the entire Dataverse installation database. A security-conscious + admin can create a dedicated database user with access to just one table: ``CUSTOMZIPSERVICEREQUEST``. + +You may need to make extra Apache configuration changes to make sure ``/cgi-bin/zipdownload`` is accessible from the outside. +For example, if this is the same Apache that's in front of your Dataverse installation Payara instance, you will need to +add another pass through statement to your configuration: ``ProxyPassMatch ^/cgi-bin/zipdownload !`` -Test this by accessing it directly at ``/cgi-bin/download``. You should get a ``404 No such download job!``. If instead you are getting an "internal server error", this may be an SELinux issue; try ``setenforce Permissive``. If you are getting a generic Dataverse collection "not found" page, review the ``ProxyPassMatch`` rule you have added. +Test this by accessing it directly at ``/cgi-bin/download``. You should get a ``404 No such download job!``. +If instead you are getting an "internal server error", this may be an SELinux issue; try ``setenforce Permissive``. +If you are getting a generic Dataverse collection "not found" page, review the ``ProxyPassMatch`` rule you have added. To activate in your Dataverse installation:: diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 0a52a8a2fef..7702eb41157 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -2321,7 +2321,10 @@ If you don’t want date facets to be sorted chronologically, set: :CustomZipDownloadServiceUrl ++++++++++++++++++++++++++++ -The location of the "Standalone Zipper" service. If this option is specified, the Dataverse installation will be redirecing bulk/mutli-file zip download requests to that location, instead of serving them internally. See the "Advanced" section of the Installation guide for information on how to install the external zipper. (This is still an experimental feature, as of Dataverse Software 5.0). +The location of the "Standalone Zipper" service. If this option is specified, the Dataverse installation will be +redirecing bulk/mutli-file zip download requests to that location, instead of serving them internally. +See :ref:`zipdownloader` of the Advanced Installation guide for information on how to install the external zipper. +(This is still an **experimental** feature, as of Dataverse Software 5.0). To enable redirects to the zipper installed on the same server as the main Dataverse Software application: @@ -2329,7 +2332,7 @@ To enable redirects to the zipper installed on the same server as the main Datav To enable redirects to the zipper on a different server: -``curl -X PUT -d 'https://zipper.example.edu/cgi-bin/zipdownload' http://localhost:8080/api/admin/settings/:CustomZipDownloadServiceUrl`` +``curl -X PUT -d 'https://zipper.example.edu/cgi-bin/zipdownload' http://localhost:8080/api/admin/settings/:CustomZipDownloadServiceUrl`` :ArchiverClassName ++++++++++++++++++ From 3422d13d20d440a7877c9ed8097573f309ae22b0 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 11 Feb 2022 10:33:52 +0100 Subject: [PATCH 21/29] docs(releases): adapt for parent module #8394 As the version element has been from the WAR POM to the Parent POM and been made flexible via the 'revision' property, the release guide needed to be adapter accordingly. --- doc/sphinx-guides/source/developers/making-releases.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/developers/making-releases.rst b/doc/sphinx-guides/source/developers/making-releases.rst index cbd88b1a357..dd25574ccbf 100755 --- a/doc/sphinx-guides/source/developers/making-releases.rst +++ b/doc/sphinx-guides/source/developers/making-releases.rst @@ -22,7 +22,7 @@ Make the following changes in the release branch: Increment the version number to the milestone (e.g. 4.6.2) in the following two files: -- pom.xml +- modules/dataverse-parent/pom.xml -> ```` -> ```` - doc/sphinx-guides/source/conf.py (two places) Add the version being released to the lists in the following two files: @@ -31,6 +31,7 @@ Add the version being released to the lists in the following two files: - scripts/database/releases.txt Here's an example commit where three of the four files above were updated at once: https://github.com/IQSS/dataverse/commit/99e23f96ec362ac2f524cb5cd80ca375fa13f196 +(Note: the version has been moved to a property in parent module since this commit was created) 2. Check in the Changes Above... ================================ From e9a97ebd3fe78d507ead222c8868529fc561f156 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 11 Feb 2022 18:08:16 +0100 Subject: [PATCH 22/29] build(war): make logging libs runtime scoped #8394 --- pom.xml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pom.xml b/pom.xml index 1d59e8cfb35..b5614f127fe 100644 --- a/pom.xml +++ b/pom.xml @@ -57,6 +57,7 @@ org.slf4j slf4j-jdk14 + runtime @@ -417,6 +418,7 @@ ch.qos.reload4j reload4j ${reload4j.version} + runtime From 488a39258290cc498047fed26c7bfcd424050179 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Mon, 14 Feb 2022 16:46:31 -0500 Subject: [PATCH 23/29] doc tweaks for #8394 --- .../source/developers/dependencies.rst | 58 ++++++++++--------- doc/sphinx-guides/source/developers/intro.rst | 2 + .../source/developers/making-releases.rst | 2 +- .../source/installation/config.rst | 2 +- 4 files changed, 36 insertions(+), 28 deletions(-) diff --git a/doc/sphinx-guides/source/developers/dependencies.rst b/doc/sphinx-guides/source/developers/dependencies.rst index 46fe397bc5a..31492624502 100644 --- a/doc/sphinx-guides/source/developers/dependencies.rst +++ b/doc/sphinx-guides/source/developers/dependencies.rst @@ -5,18 +5,21 @@ Dependency Management .. contents:: |toctitle| :local: -The Dataverse Software is (currently) a Jakarta EE 8 based application, that uses a lot of additional libraries for -special purposes. This includes features like support for SWORD-API, S3 storage and many others. +Introduction +------------ + +As explained under :ref:`core-technologies`, the Dataverse Software is a Jakarta EE 8 based application that uses a lot of additional libraries for +special purposes. This includes support for the SWORD API, S3 storage, and many other features. -Besides the code that glues together individual pieces, any developer needs to describe used dependencies for the -Maven-based build system. As is familiar to any Maven user, this happens inside the "Project Object Model" (POM). +Besides the code that glues together individual pieces, any developer needs to describe dependencies used within the +Maven-based build system. As is familiar to any Maven user, this happens inside the "Project Object Model" (POM) file, ``pom.xml``. -Recursive and convergent dependency resolution makes dependency management with Maven quite easy. But sometimes, in -projects with many complex dependencies like the Dataverse software, you have to help Maven make the right choices. +Recursive and convergent dependency resolution makes dependency management with Maven quite easy, but sometimes, in +projects with many complex dependencies like the Dataverse Software, you have to help Maven make the right choices. Maven can foster good development practices by enabling modulithic (modular monolithic) architecture: splitting functionalities into different Maven submodules while expressing dependencies between them. But there's more: the -parent-child model allows to create consistent dependency versioning (see below) within childs. +parent-child model allows you to create consistent dependency versioning (see below) within children. Terms @@ -50,7 +53,7 @@ As a developer, you should familiarize yourself with the following terms: } - **Project Object Model** (POM): the basic XML file unit to describe a Maven-based project. -- **Bill Of Materials** (BOM): larger projects like Payara, Amazon SDK etc provide lists of their direct dependencies. +- **Bill Of Materials** (BOM): larger projects like Payara, Amazon SDK etc. provide lists of their direct dependencies. This comes in handy when adding these dependencies (transitive for us) as direct dependencies, see below. .. graphviz:: @@ -97,7 +100,7 @@ As a developer, you should familiarize yourself with the following terms: } Children may inherit dependencies, properties, settings, plugins etc. from the parent (making it possible to share - common grounds). Both approaches may be combined. Children may import as many BOMs as they want, but can have only a + common ground). Both approaches may be combined. Children may import as many BOMs as they want, but can have only a single parent to inherit from at a time. .. graphviz:: @@ -127,7 +130,7 @@ As a developer, you should familiarize yourself with the following terms: - **Modules**: when using parents and children, these are called "modules" officially, each having their own POM. Using modules allows bundling different aspects of (Dataverse) software in their own domains, with their own - behaviour, dependencies etc. Parent modules allow for sharing of common settings, properties, dependencies and more. + behavior, dependencies etc. Parent modules allow for sharing of common settings, properties, dependencies and more. Submodules may also be used as parent modules for a lower level of submodules. Maven modules within the same software project may also depend on each other, allowing to create complex structures @@ -135,6 +138,7 @@ As a developer, you should familiarize yourself with the following terms: rely on and reuse them. This is especially useful for parent POMs: they may be reused as BOMs or to share a standard between independent software projects. + Maven modules should not be confused with the `Java Platform Module System (JPMS) `_ introduced in Java 9 under Project Jigsaw. Direct dependencies ------------------- @@ -157,7 +161,7 @@ Anytime you add a ````, Maven will try to fetch it from defined/conf within the build lifecycle. You have to define a ```` (note exception below), but ```` is optional for ``compile``. (See `Maven docs: Dep. Scope `_) -During fetching, Maven will analyse all transitive dependencies (see graph above) and, if necessary, fetch those, too. +During fetching, Maven will analyze all transitive dependencies (see graph above) and, if necessary, fetch those too. Everything downloaded once is cached locally by default, so nothing needs to be fetched again and again, as long as the dependency definition does not change. @@ -173,7 +177,7 @@ dependency definition does not change. There is scope ``test`` for this! * Make sure to use the ``runtime`` scope when you need to ensure a library is present on our classpath at runtime. An example is the SLF4J JUL bridge: we want to route logs from SLF4J into ``java.util.logging``, so it needs to be - present on the classpath, although we aren't using SLF4J unlike some of our dependencies. + present on the classpath, although we aren't using SLF4J unlike, some of our dependencies. * Some dependencies might be ``provided`` by the runtime environment. Good example: everything from Jakarta EE! We use the Payara BOM to ensure using the same version during development and runtime. @@ -221,10 +225,10 @@ on *Z*. You definitely don't want to head down that road. **Follow the rules to be safe:** -1. Do **not use transitive deps implicit**: add a direct dependency for transitive deps you re-use in your code. -2. On every build check that no implicit usage was added by accident. +1. Do **not use transitive deps implicitly**: add a direct dependency for transitive deps you re-use in your code. +2. On every build, check that no implicit usage was added by accident. 3. **Explicitly declare versions** of transitive dependencies in use by multiple direct dependencies. -4. On every build check that there are no convergence problems hiding in the shadows. +4. On every build, check that there are no convergence problems hiding in the shadows. 5. **Do special tests** on every build to verify these explicit combinations work. Managing transitive dependencies in ``pom.xml`` @@ -240,13 +244,13 @@ Maven can manage versions of transitive dependencies in four ways: * - Safe Good Practice - (1) Explicitly declare the transitive dependency in ```` with a ```` tag. - (2) For more complex transitive dependencies, reuse a "Bill of Materials" (BOM) within ````. - Many bigger and standard use projects provide those, making the POM much less bloated compared to adding every bit yourself. + Many bigger and standard use projects provide them, making the POM much less bloated compared to adding every bit yourself. * - Better Avoid or Don't - (3) Use ```` or ```` tags on direct dependencies that request the transitive dependency. *Last resort*, you really should avoid this. Not explained or used here, but sometimes unavoidable. `See Maven docs `_. - (4) Make a transitive-only dependency not used in your code a direct one and add a ```` tag. - Typically a bad idea, don't do that. + Typically a bad idea; don't do that. **Note:** when the same transitive dependency is used in multiple Maven modules of a software project, it might be added to a common ```` section of an inherited parent POM instead. (Overrides are still possible.) @@ -325,12 +329,12 @@ Helpful tools Maven provides some plugins that are of great help to detect possible conflicts and implicit usage. -For *implicit usage detection*, use `mvn dependency:analyze`. Examine the output with great care. Sometimes you will +For *implicit usage detection*, use ``mvn dependency:analyze``. Examine the output with great care. Sometimes you will see implicit usages that do no harm, especially if you are using bigger SDKs having some kind of `core` package. This will also report on any direct dependency which is not in use and can be removed from the POM. Again, do this with great caution and double check. -If you want to see the dependencies both direct and transitive in a *dependency tree format*, use `mvn dependency:tree`. +If you want to see the dependencies both direct and transitive in a *dependency tree format*, use ``mvn dependency:tree``. This will however not help you with detecting possible version conflicts. For this you need to use the `Enforcer Plugin `_ with its built in `dependency convergence rule @@ -339,7 +343,7 @@ This will however not help you with detecting possible version conflicts. For th Repositories ------------ -Maven receives all dependencies from *repositories*. Those can be public like `Maven Central `_ +Maven receives all dependencies from *repositories*. These can be public like `Maven Central `_ and others, but you can also use a private repository on premises or in the cloud. Last but not least, you can use local repositories, which can live next to your application code (see ``local_lib`` dir within the Dataverse Software codebase). @@ -377,11 +381,11 @@ are used in the order they appear. Dataverse Parent POM -------------------- -Within ``modules/dataverse-parent`` you may find the parent POM for the Dataverse codebase. It serves for different +Within ``modules/dataverse-parent`` you will find the parent POM for the Dataverse codebase. It serves different purposes: -1. Provide the common version number for a Dataverse release (may be overriden where necessary) -2. Provide common metadata necessary for releasing modules to repositories like Maven Central +1. Provide the common version number for a Dataverse release (may be overriden where necessary). +2. Provide common metadata necessary for releasing modules to repositories like Maven Central. 3. Declare aggregated submodules via ````. 4. Collate common BOMs and transitive dependencies within ````. (Remember: a direct dependency declaration may omit the version element when defined in that area!) @@ -389,7 +393,7 @@ purposes: 6. Gather common ```` and ```` - no need to repeat those in submodules. 7. Make submodules use current Maven plugin release versions via ````. -As of writing this 2022-02-10, our parent module looks like this: +As of this writing (2022-02-10), our parent module looks like this: .. graphviz:: @@ -427,14 +431,16 @@ The codebase is structured like this: .. code-block:: # Dataverse WAR Module + ├── pom.xml # (POM file of WAR module) ├── modules # │ └── dataverse-parent # Dataverse Parent Module - ├── pom.xml # (POM file of WAR module) + │ └── pom.xml # (POM file of Parent Module) └── scripts # └── zipdownload # Zipdownloader JAR Module + └── pom.xml # (POM file of Zipdownloader Module) - Any developer cloning the project and running ``mvn`` within the project root will interact with the Dataverse WAR - module, which is the same behaviour since Dataverse 4.0 has been released. + module, which is the same behavior since Dataverse 4.0 has been released. - Running ``mvn`` targets within the parent module will execute all aggregated submodules in one go. diff --git a/doc/sphinx-guides/source/developers/intro.rst b/doc/sphinx-guides/source/developers/intro.rst index 8fc0c679a8b..c8da72da8b5 100755 --- a/doc/sphinx-guides/source/developers/intro.rst +++ b/doc/sphinx-guides/source/developers/intro.rst @@ -21,6 +21,8 @@ Getting Help If you have any questions at all, please reach out to other developers via the channels listed in https://github.com/IQSS/dataverse/blob/develop/CONTRIBUTING.md such as http://chat.dataverse.org, the `dataverse-dev `_ mailing list, `community calls `_, or support@dataverse.org. +.. _core-technologies: + Core Technologies ----------------- diff --git a/doc/sphinx-guides/source/developers/making-releases.rst b/doc/sphinx-guides/source/developers/making-releases.rst index dd25574ccbf..064ed6f1b78 100755 --- a/doc/sphinx-guides/source/developers/making-releases.rst +++ b/doc/sphinx-guides/source/developers/making-releases.rst @@ -31,7 +31,7 @@ Add the version being released to the lists in the following two files: - scripts/database/releases.txt Here's an example commit where three of the four files above were updated at once: https://github.com/IQSS/dataverse/commit/99e23f96ec362ac2f524cb5cd80ca375fa13f196 -(Note: the version has been moved to a property in parent module since this commit was created) +(Note: the version has been moved to a property in parent module since this commit was created.) 2. Check in the Changes Above... ================================ diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 7702eb41157..d685fd8b4eb 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -2322,7 +2322,7 @@ If you don’t want date facets to be sorted chronologically, set: ++++++++++++++++++++++++++++ The location of the "Standalone Zipper" service. If this option is specified, the Dataverse installation will be -redirecing bulk/mutli-file zip download requests to that location, instead of serving them internally. +redirecing bulk/multi-file zip download requests to that location, instead of serving them internally. See :ref:`zipdownloader` of the Advanced Installation guide for information on how to install the external zipper. (This is still an **experimental** feature, as of Dataverse Software 5.0). From 73213f4dc8073f6546c294469f864d93f40a7d73 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Tue, 15 Feb 2022 10:50:09 +0100 Subject: [PATCH 24/29] docs(dev): clarify nearest dependency wins #8394 --- doc/sphinx-guides/source/developers/dependencies.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/sphinx-guides/source/developers/dependencies.rst b/doc/sphinx-guides/source/developers/dependencies.rst index 31492624502..2ba7de03c1a 100644 --- a/doc/sphinx-guides/source/developers/dependencies.rst +++ b/doc/sphinx-guides/source/developers/dependencies.rst @@ -199,7 +199,7 @@ However, as life is a box of chocolates, you might find yourself in *version con knowing, but experiencing unintended side effects. When you look at the topmost graph above, imagine *B* and *TB* rely on different *versions* of *TC*. How does Maven -decide which version it will include? Easy: the dependent version of the nearest version wins: +decide which version it will include? Easy: the "nearest" dependency version wins. The following graph gives an example: .. graphviz:: @@ -220,8 +220,8 @@ In this case, version "2.0" will be included. If you know something about semant in your mind right now. How do we know that *B* is compatible with *Z v2.0* when depending on *Z v1.0*? Another scenario getting us in trouble: indirect use of transitive dependencies. Imagine the following: we rely on *Z* -in our code, but do not include a direct dependency for it within the POM. Now *B* is updated and removed its dependency -on *Z*. You definitely don't want to head down that road. +in our code, but do not include a direct dependency for it within the POM. Now assume *B* is updated and removed its +dependency on *Z*. You definitely don't want to head down that road. **Follow the rules to be safe:** From b63a0101bc1764a34dd71019822e52c6ac9c3983 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Tue, 15 Feb 2022 10:14:22 -0500 Subject: [PATCH 25/29] simplify wording #8394 --- doc/sphinx-guides/source/developers/dependencies.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/developers/dependencies.rst b/doc/sphinx-guides/source/developers/dependencies.rst index 2ba7de03c1a..e35a07949e1 100644 --- a/doc/sphinx-guides/source/developers/dependencies.rst +++ b/doc/sphinx-guides/source/developers/dependencies.rst @@ -244,7 +244,7 @@ Maven can manage versions of transitive dependencies in four ways: * - Safe Good Practice - (1) Explicitly declare the transitive dependency in ```` with a ```` tag. - (2) For more complex transitive dependencies, reuse a "Bill of Materials" (BOM) within ````. - Many bigger and standard use projects provide them, making the POM much less bloated compared to adding every bit yourself. + Many bigger projects provide them, making the POM much less bloated compared to adding every bit yourself. * - Better Avoid or Don't - (3) Use ```` or ```` tags on direct dependencies that request the transitive dependency. *Last resort*, you really should avoid this. Not explained or used here, but sometimes unavoidable. From 8b30406052a1c87d8f227d40d7ad86f7e516f245 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Tue, 15 Feb 2022 15:05:38 -0500 Subject: [PATCH 26/29] clarify dep resolution #8394 --- doc/sphinx-guides/source/developers/dependencies.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/developers/dependencies.rst b/doc/sphinx-guides/source/developers/dependencies.rst index e35a07949e1..65edfa3ffac 100644 --- a/doc/sphinx-guides/source/developers/dependencies.rst +++ b/doc/sphinx-guides/source/developers/dependencies.rst @@ -199,7 +199,7 @@ However, as life is a box of chocolates, you might find yourself in *version con knowing, but experiencing unintended side effects. When you look at the topmost graph above, imagine *B* and *TB* rely on different *versions* of *TC*. How does Maven -decide which version it will include? Easy: the "nearest" dependency version wins. The following graph gives an example: +decide which version it will include? Easy: the version of the dependency nearest to our project ("Your Code)" wins. The following graph gives an example: .. graphviz:: From 03e45bea05aa8b6007658f471fc8f940325cec98 Mon Sep 17 00:00:00 2001 From: landreev Date: Tue, 22 Feb 2022 19:10:40 -0500 Subject: [PATCH 27/29] Minor changes to the Zipper installation section Mentioned the name change; removed the link to the pre-built binary distribution in the 5.0 release. --- doc/sphinx-guides/source/installation/advanced.rst | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/doc/sphinx-guides/source/installation/advanced.rst b/doc/sphinx-guides/source/installation/advanced.rst index adb59941968..8662f0b6a93 100644 --- a/doc/sphinx-guides/source/installation/advanced.rst +++ b/doc/sphinx-guides/source/installation/advanced.rst @@ -89,11 +89,13 @@ in the Dataverse Software 5.0+ source tree for more information. To install: -1. Follow the instructions in the file above to build ``zipdownloader-0.0.1.jar``. (Also available from - `zipper.zip `_ of the - `Dataverse Software 5.0 release on GitHub `_). +1. Follow the instructions in the file above to build ``zipdownloader-0.0.1.jar``. Please note that the package name and + the version were changed as of the release 5.10, as part of an overall cleanup and reorganization of the project + tree. In the releases 5.0-5.9 it existed under the name ``ZipDownloadService-v1.0.0``. (A pre-built jar file was + distributed under that name as part of the 5.0 release on GitHub. Aside from the name change, there have been no + changes in the functionality of the tool). 2. Copy it, together with the shell script :download:`cgi-bin/zipdownload <../../../../scripts/zipdownload/cgi-bin/zipdownload>` - to the ``cgi-bin`` directory of the chosen Apache server (/var/www/cgi-bin standard). + to the ``cgi-bin`` directory of the chosen Apache server (``/var/www/cgi-bin`` standard). 3. Make sure the shell script (``zipdownload``) is executable, and edit it to configure the database access credentials. Do note that the executable does not need access to the entire Dataverse installation database. A security-conscious admin can create a dedicated database user with access to just one table: ``CUSTOMZIPSERVICEREQUEST``. From dacdbd26d66c84ec462e569bc73d3f736f1b7321 Mon Sep 17 00:00:00 2001 From: landreev Date: Tue, 22 Feb 2022 19:15:59 -0500 Subject: [PATCH 28/29] another cosmetic fix I'm assuming that was unintentional, splitting the paragraph in two. (Especially with the sentence beginning with "Thus..."). --- doc/sphinx-guides/source/installation/advanced.rst | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/doc/sphinx-guides/source/installation/advanced.rst b/doc/sphinx-guides/source/installation/advanced.rst index 8662f0b6a93..327b0b3c404 100644 --- a/doc/sphinx-guides/source/installation/advanced.rst +++ b/doc/sphinx-guides/source/installation/advanced.rst @@ -77,10 +77,7 @@ As of Dataverse Software 5.0 we offer an **experimental** optimization for the m If this option (``:CustomZipDownloadServiceUrl``) is enabled, instead of enforcing the size limit on multi-file zipped downloads (as normally specified by the option ``:ZipDownloadLimit``), we attempt to serve all the files that the user requested (that they are authorized to download), but the request is redirected to a standalone zipper service running -as a cgi-bin executable under Apache. - -Thus moving these potentially long-running jobs completely outside the Application Server (Payara); and preventing -worker threads from becoming locked serving them. Since zipping is also a CPU-intensive task, it is possible to have +as a cgi-bin executable under Apache. This moves these potentially long-running jobs completely outside the Application Server (Payara), and prevents worker threads from becoming locked serving them. Since zipping is also a CPU-intensive task, it is possible to have this service running on a different host system, freeing the cycles on the main Application Server. (The system running the service needs to have access to the database as well as to the storage filesystem, and/or S3 bucket). From 09167d5949970fee4e1b75493164f19556bfaf16 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Fri, 4 Mar 2022 10:11:05 -0500 Subject: [PATCH 29/29] Revert "chore(jacoco): fix wrongly typed config options of JaCoCo Maven plugin #8394" This reverts commit 424400b4ca694af2201c4c857f0230c646a5b6c4. We believe this is causing our Jenkins job to fail. It's showing this: [JaCoCo plugin] Overall coverage: class: 0.0, method: 0.0, line: 0.0, branch: 0.0, instruction: 0.0, complexity: 0.0 When we expect something more like this: [JaCoCo plugin] Overall coverage: class: 73.52321, method: 46.495075, line: 41.614933, branch: 33.08663, instruction: 41.725876, complexity: 31.795692 --- pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index be3600aaa5b..7663bb3e6e6 100644 --- a/pom.xml +++ b/pom.xml @@ -682,8 +682,8 @@ jacoco-maven-plugin ${jacoco.version} - ${basedir}/target/coverage-reports/jacoco-unit.exec - ${basedir}/target/coverage-reports/jacoco-unit.exec + ${basedir}/target/coverage-reports/jacoco-unit.exec + ${basedir}/target/coverage-reports/jacoco-unit.exec