From 9dfda2b5248dca2fb92914d8eba0455730577053 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Mon, 4 Oct 2021 13:11:29 +0200 Subject: [PATCH 01/19] refactor(solr): remove external field config file #7903 As of #7865 (merged DV 5.6), the Solr Schema XML file did not include s for metadata schemas from an external file anymore. Instead they had been included. This broke the updateSchemaMDB.sh script, which is #7903 This commit will remove the other include construction for definitions (plus remove the file) and include the content directly in the schema. It also introduced some docs about the whereabouts of these fields plus (most importantly) INCLUDE GUARDS. These are going to be used for new scripting of half-automatic index management. IQSS/dataverse#7903 --- conf/solr/8.8.1/schema.xml | 197 ++++++++++++++++++++++- conf/solr/8.8.1/schema_dv_mdb_fields.xml | 161 ------------------ 2 files changed, 192 insertions(+), 166 deletions(-) delete mode 100644 conf/solr/8.8.1/schema_dv_mdb_fields.xml diff --git a/conf/solr/8.8.1/schema.xml b/conf/solr/8.8.1/schema.xml index c6f6cd37cd6..b4c3a014fc9 100644 --- a/conf/solr/8.8.1/schema.xml +++ b/conf/solr/8.8.1/schema.xml @@ -227,8 +227,181 @@ - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -289,8 +462,22 @@ - - + + + @@ -450,7 +637,7 @@ - + diff --git a/conf/solr/8.8.1/schema_dv_mdb_fields.xml b/conf/solr/8.8.1/schema_dv_mdb_fields.xml deleted file mode 100644 index 3f844c6183c..00000000000 --- a/conf/solr/8.8.1/schema_dv_mdb_fields.xml +++ /dev/null @@ -1,161 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - From d74e46347a8e1886866934091f218fa274b9beb6 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Mon, 4 Oct 2021 14:11:45 +0200 Subject: [PATCH 02/19] feat(solr): add new schema config script update-field.sh #7903 Removing the old updateSchemaMDB.sh script, as it's broken. It's also of limited use for the container use case it had been invented for. Replacing with a script capable of taking the solr schema bits and stuffing it into a prepared schema.xml file. IQSS/dataverse#7903 --- conf/solr/8.8.1/update-fields.sh | 175 +++++++++++++++++++++++++++++ conf/solr/8.8.1/updateSchemaMDB.sh | 85 -------------- 2 files changed, 175 insertions(+), 85 deletions(-) create mode 100755 conf/solr/8.8.1/update-fields.sh delete mode 100755 conf/solr/8.8.1/updateSchemaMDB.sh diff --git a/conf/solr/8.8.1/update-fields.sh b/conf/solr/8.8.1/update-fields.sh new file mode 100755 index 00000000000..fb349a75e0f --- /dev/null +++ b/conf/solr/8.8.1/update-fields.sh @@ -0,0 +1,175 @@ +#!/usr/bin/env bash + +set -euo pipefail + +#### #### #### #### #### #### #### #### #### #### #### #### #### #### #### #### #### #### +# This script will +# 1. take a file (or read it from STDIN) with all and definitions +# 2. and replace the sections between the include guards with those in a given +# schema.xml file +# The script validates the presence, uniqueness and order of the include guards. +#### #### #### #### #### #### #### #### #### #### #### #### #### #### #### #### #### #### + + +### Variables +# Internal use only (fork to change) +VERSION="0.1" +INPUT="" +FIELDS="" +COPY_FIELDS="" +TRIGGER_CHAIN=0 + +SOLR_SCHEMA_FIELD_BEGIN_MARK="SCHEMA-FIELDS::BEGIN" +SOLR_SCHEMA_FIELD_END_MARK="SCHEMA-FIELDS::END" +SOLR_SCHEMA_COPYFIELD_BEGIN_MARK="SCHEMA-COPY-FIELDS::BEGIN" +SOLR_SCHEMA_COPYFIELD_END_MARK="SCHEMA-COPY-FIELDS::END" +MARKS_ORDERED="${SOLR_SCHEMA_FIELD_BEGIN_MARK} ${SOLR_SCHEMA_FIELD_END_MARK} ${SOLR_SCHEMA_COPYFIELD_BEGIN_MARK} ${SOLR_SCHEMA_COPYFIELD_END_MARK}" + +### Common functions +function error { + echo "ERROR:" "$@" >&2 + exit 2 +} + +function usage { + cat << EOF +$(basename "$0") ${VERSION} +Usage: $(basename "$0") [-hp] [ schema file ] [ source file ] + +-h Print usage (this text) +-p Chained printing: write all metadata schema related + and present in Solr XML to stdout + +Provide target Solr Schema XML via argument or \$SCHEMA env var. + +Provide source file via argument, \$SOURCE env var or piped input +(wget/curl, chained). Source file = "-" means read STDIN. +EOF + exit 0 +} + +### Options +while getopts ":hp" opt; do + case $opt in + h) usage ;; + p) TRIGGER_CHAIN=1 ;; + \?) echo "Invalid option: -$OPTARG" >&2; exit 1 ;; + :) echo "Option -$OPTARG requires an argument." >&2; exit 1 ;; + esac +done + +# remove all the parsed options +shift $((OPTIND-1)) + +# User overrideable locations +SCHEMA=$(readlink -f "${SCHEMA:-${1:-schema.xml}}") +SOURCE=${SOURCE:-${2:-"-"}} + + +### VERIFY SCHEMA FILE EXISTS AND CONTAINS INCLUDE GUARDS ### +# Check for schema file & writeable +if [ ! -w "${SCHEMA}" ]; then + error "Cannot find or write to a XML schema at ${SCHEMA}" +else + # Check schema file for include guards + CHECKS=$( + for MARK in ${MARKS_ORDERED} + do + grep -c "${MARK}" "${SCHEMA}" || error "Missing ${MARK} from ${SCHEMA}" + done + ) + # Check guards are unique (count occurrences and sum calc via bc) + [ "$(echo -n "${CHECKS}" | tr '\n' '+' | sed -e 's#$#\n#' | bc)" -eq 4 ] || \ + error "Some include guards are not unique in ${SCHEMA}" + + # Check guards are in order (line number comparison via bc tricks) + CHECKS=$( + for MARK in ${MARKS_ORDERED} + do + grep -n "${MARK}" "${SCHEMA}" | cut -f 1 -d ":" + done + ) + # Actual comparison of line numbers + [ "$(echo "${CHECKS}" | tr '\n' '<' | sed -e 's#<$#\n#' -e 's#\(<[0-9]\+\)<\([0-9]\+\)#\1 \&\& \2#' | bc)" -eq 1 ] || \ + error "Include guards are not in correct order in ${SCHEMA}" +fi + + +### READ DATA ### +# Switch to standard input if no file present or "-" +if [ -z "${SOURCE}" ] || [ "${SOURCE}" = "-" ]; then + # But ONLY if stdin for this script has not been attached to a terminal, but a pipe + if [ ! -t 0 ]; then + SOURCE="/dev/stdin" + else + error "No data - either provide source file or piped input" + fi +else + # Always make the path absolute + SOURCE=$(readlink -f "${SOURCE}") + # Check the given file for readability and non-zero length + if [ ! -r "${SOURCE}" ] || [ ! -s "${SOURCE}" ]; then + error "Cannot read from or empty file ${SOURCE}" + fi +fi +# Read relevant parts only, filter nonsense and avoid huge memory usage +INPUT=$(grep -e "<\(field\|copyField\) .*/>" "${SOURCE}" | sed -e 's#^\s\+##' -e 's#\s\+$##' || true) + + +### DATA HANDLING ### +# Split input into different types +if [ -z "${INPUT}" ]; then + error "No or in input" +else + # Check for definitions (if nomatch, avoid failing pipe) + FIELDS=$(mktemp) + echo "${INPUT}" | grep -e "" | sed -e 's#^# #' > "${FIELDS}" || true + # If file actually contains output, write to schema + if [ -s "${FIELDS}" ]; then + # Use an ed script to replace all + cat << EOF | ed -s -v "${SCHEMA}" +# Mark field begin as 'a' +/${SOLR_SCHEMA_FIELD_BEGIN_MARK}/ka +# Mark field end as 'b' +/${SOLR_SCHEMA_FIELD_END_MARK}/kb +# Delete all between lines a and b +'a+,'b-d +# Read fields file and paste after line a +'ar ${FIELDS} +# Write fields to schema +w +q +EOF + fi + rm "${FIELDS}" + + # Check for definitions (if nomatch, avoid failing pipe) + COPY_FIELDS=$(mktemp) + echo "${INPUT}" | grep -e "" | sed -e 's#^# #' > "${COPY_FIELDS}" || true + # If file actually contains output, write to schema + if [ -s "${COPY_FIELDS}" ]; then + # Use an ed script to replace all + cat << EOF | ed -s "${SCHEMA}" +# Mark copyField begin as 'a' +/${SOLR_SCHEMA_COPYFIELD_BEGIN_MARK}/ka +# Mark copyField end as 'b' +/${SOLR_SCHEMA_COPYFIELD_END_MARK}/kb +# Delete all between lines a and b +'a+,'b-d +# Read fields file and paste after line a +'ar ${COPY_FIELDS} +# Write copyFields to schema +w +q +EOF + fi + rm "${COPY_FIELDS}" +fi + + +### CHAINING OUTPUT +# Scripts following this one might want to use the field definitions now present +if [ "${TRIGGER_CHAIN}" -eq 1 ]; then + grep -A1000 "${SOLR_SCHEMA_FIELD_BEGIN_MARK}" "${SCHEMA}" | grep -B1000 "${SOLR_SCHEMA_FIELD_END_MARK}" + grep -A1000 "${SOLR_SCHEMA_COPYFIELD_BEGIN_MARK}" "${SCHEMA}" | grep -B1000 "${SOLR_SCHEMA_COPYFIELD_END_MARK}" +fi \ No newline at end of file diff --git a/conf/solr/8.8.1/updateSchemaMDB.sh b/conf/solr/8.8.1/updateSchemaMDB.sh deleted file mode 100755 index 0044f15c7cd..00000000000 --- a/conf/solr/8.8.1/updateSchemaMDB.sh +++ /dev/null @@ -1,85 +0,0 @@ -#!/bin/bash -set -euo pipefail - -# This script updates the and schema configuration necessary to properly -# index custom metadata fields in Solr. -# 1. Retrieve from Dataverse API endpoint -# 2. Parse and write Solr schema files (which might replace the included files) -# 3. Reload Solr -# -# List of variables: -# ${DATAVERSE_URL}: URL to Dataverse. Defaults to http://localhost:8080 -# ${SOLR_URL}: URL to Solr. Defaults to http://localhost:8983 -# ${UNBLOCK_KEY}: File path to secret or unblock key as string. Only necessary on k8s or when you secured your installation. -# ${TARGET}: Directory where to write the XML files. Defaults to /tmp -# -# Programs used (need to be available on your PATH): -# coreutils: mktemp, csplit -# curl - -usage() { - echo "usage: updateSchemaMDB.sh [options]" - echo "options:" - echo " -d Dataverse URL, defaults to http://localhost:8080" - echo " -h Show this help text" - echo " -s Solr URL, defaults to http://localhost:8983" - echo " -t Directory where to write the XML files. Defaults to /tmp" - echo " -u Dataverse unblock key either as key string or path to keyfile" -} - -### Init (with sane defaults) -DATAVERSE_URL=${DATAVERSE_URL:-"http://localhost:8080"} -SOLR_URL=${SOLR_URL:-"http://localhost:8983"} -TARGET=${TARGET:-"/tmp"} -UNBLOCK_KEY=${UNBLOCK_KEY:-""} - -# if cmdline args are given, override any env var setting (or defaults) -while getopts ":d:hs:t:u:" opt -do - case $opt in - d) DATAVERSE_URL=${OPTARG};; - h) usage; exit 0;; - s) SOLR_URL=${OPTARG};; - t) TARGET=${OPTARG};; - u) UNBLOCK_KEY=${OPTARG};; - :) echo "Missing option argument for -${OPTARG}. Use -h for help." >&2; exit 1;; - \?) echo "Unknown option -${OPTARG}." >&2; usage; exit 1;; - esac -done - -# Special handling of unblock key depending on referencing a secret file or key in var -if [ ! -z "${UNBLOCK_KEY}" ]; then - if [ -f "${UNBLOCK_KEY}" ]; then - UNBLOCK_KEY="?unblock-key=$(cat ${UNBLOCK_KEY})" - else - UNBLOCK_KEY="?unblock-key=${UNBLOCK_KEY}" - fi -fi - -### Retrieval -echo "Retrieve schema data from ${DATAVERSE_URL}/api/admin/index/solr/schema" -TMPFILE=`mktemp` -curl -f -sS "${DATAVERSE_URL}/api/admin/index/solr/schema${UNBLOCK_KEY}" > $TMPFILE - -### Fail gracefull if Dataverse is not ready yet. -if [[ "`wc -l ${TMPFILE}`" < "3" ]]; then - echo "Dataverse responded with empty file. When running on K8s: did you bootstrap yet?" - exit 123 -fi - -### Processing -echo "Writing ${TARGET}/schema_dv_mdb_fields.xml" -echo "" > ${TARGET}/schema_dv_mdb_fields.xml -cat ${TMPFILE} | grep ".*> ${TARGET}/schema_dv_mdb_fields.xml -echo "" >> ${TARGET}/schema_dv_mdb_fields.xml - -echo "Writing ${TARGET}/schema_dv_mdb_copies.xml" -echo "" > ${TARGET}/schema_dv_mdb_copies.xml -cat ${TMPFILE} | grep ".*> ${TARGET}/schema_dv_mdb_copies.xml -echo "" >> ${TARGET}/schema_dv_mdb_copies.xml - -rm ${TMPFILE}* - -### Reloading -echo "Triggering Solr RELOAD at ${SOLR_URL}/solr/admin/cores?action=RELOAD&core=collection1" -curl -f -sS "${SOLR_URL}/solr/admin/cores?action=RELOAD&core=collection1" From e92eff4a4fd6f1772980659abb16dc7a31d50307 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Mon, 4 Oct 2021 14:28:54 +0200 Subject: [PATCH 03/19] docs(metadata): refactor metadata customization with new solr script #7903 --- .../source/admin/metadatacustomization.rst | 46 +++++++++++-------- 1 file changed, 26 insertions(+), 20 deletions(-) diff --git a/doc/sphinx-guides/source/admin/metadatacustomization.rst b/doc/sphinx-guides/source/admin/metadatacustomization.rst index 9e62b66febb..c307adb56af 100644 --- a/doc/sphinx-guides/source/admin/metadatacustomization.rst +++ b/doc/sphinx-guides/source/admin/metadatacustomization.rst @@ -639,32 +639,38 @@ Updating the Solr Schema Once you have enabled a new metadata block you should be able to see the new fields in the GUI but before you can save the dataset, you must add additional fields to your Solr schema. -An API endpoint of your Dataverse installation provides you with a generated set of all fields that need to be added to the Solr schema -configuration, including any enabled metadata schemas: +An API endpoint of your Dataverse installation provides you with a generated set of all fields that need to be added to +the Solr schema configuration, including any enabled metadata schemas: -``curl http://localhost:8080/api/admin/index/solr/schema`` +``curl "http://localhost:8080/api/admin/index/solr/schema"`` + +You can use :download:`update-fields.sh <../../../../conf/solr/8.8.1/update-fields.sh>` to easily add these to the +Solr schema you installed for your Dataverse installation. + +The script needs a target XML file containing your Solr schema. (See the :doc:`/installation/prerequisites/` section of +the Installation Guide for a suggested location on disk for the Solr schema file.) + +You can either pipe the downloaded schema to the script or provide the file as an argument. (We recommended you to take +a look at usage output of ``update-fields.sh -h``) + +.. code-block:: + :caption: Example usage of ``update-fields.sh`` + + curl "http://localhost:8080/api/admin/index/solr/schema" | update-fields.sh /usr/local/solr/server/solr/collection1/conf/schema.xml -For convenience and automation you can download and consider running :download:`updateSchemaMDB.sh <../../../../conf/solr/8.8.1/updateSchemaMDB.sh>`. It uses the API endpoint above and writes schema files to the filesystem (so be sure to run it on the Solr server itself as the Unix user who owns the Solr files) and then triggers a Solr reload. -Due to `an issue with schema.xml including the generated schema_dv_mdb_copies.xml file `_ additional steps are currently needed. Once schema_dv_mdb_copies.xml has been generated by the script, you'll need to copy/paste the set of elements in the generated file into schema.xml manually and then restart solr (or trigger a Solr reload). +You will need to reload your Solr schema via an HTTP-API call, targeting your Solr instance: -By default, it will download from your Dataverse installation at `http://localhost:8080` and reload Solr at `http://localhost:8983`. -You may use the following environment variables with this script or mix'n'match with options: +``curl "http://localhost:8983/solr/admin/cores?action=RELOAD&core=collection1"`` -==================== ====== =============================================== ========================================================= -Environment variable Option Description Example -==================== ====== =============================================== ========================================================= -`DATAVERSE_URL` `-d` Provide the URL to your Dataverse installation *http://localhost:8080* -`SOLR_URL` `-s` Provide the URL to your Solr instance *http://localhost:8983* -`UNBLOCK_KEY` `-u` If your installation has a blocked admin API *xyz* or */secrets/unblock.key* - endpoint, you can provide either the key itself - or a path to a keyfile -`TARGET` `-t` Provide the config directory of your Solr core */usr/local/solr/solr-8.8.1/server/solr/collection1/conf* - "collection1" -==================== ====== =============================================== ========================================================= +You can easily roll your own little script to automate the process (which might involve fetching the schema bits +from some place else than your Dataverse installation). -See the :doc:`/installation/prerequisites/` section of the Installation Guide for a suggested location on disk for the Solr schema file. +Please note that reconfigurations of your Solr index might require a re-index. Usually release notes indicate +a necessary re-index, but for your custom metadata you will need to keep track on your own. -Please note that if you are going to make a pull request updating ``conf/solr/8.8.1/schema.xml`` with fields you have added, you should first load all the custom metadata blocks in ``scripts/api/data/metadatablocks`` (including ones you don't care about) to create a complete list of fields. +Please note also that if you are going to make a pull request updating ``conf/solr/8.8.1/schema.xml`` with fields you have +added, you should first load all the custom metadata blocks in ``scripts/api/data/metadatablocks`` (including ones you +don't care about) to create a complete list of fields. (This might change in the future.) Reloading a Metadata Block -------------------------- From e6a6bf9c8ad7e21ed2728f6906d6d4b79b6412e7 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Mon, 4 Oct 2021 14:39:19 +0200 Subject: [PATCH 04/19] refactor(solr): remove references to updateSchemaMDB.sh #7903 --- conf/docker-aio/1prep.sh | 1 - conf/docker-aio/c8.dockerfile | 1 - scripts/api/setup-optional-harvard.sh | 2 +- 3 files changed, 1 insertion(+), 3 deletions(-) diff --git a/conf/docker-aio/1prep.sh b/conf/docker-aio/1prep.sh index 98cbc2eec93..2c332414e94 100755 --- a/conf/docker-aio/1prep.sh +++ b/conf/docker-aio/1prep.sh @@ -6,7 +6,6 @@ mkdir -p testdata/doc/sphinx-guides/source/_static/util/ cp ../solr/8.8.1/schema*.xml testdata/ cp ../solr/8.8.1/solrconfig.xml testdata/ -cp ../solr/8.8.1/updateSchemaMDB.sh testdata/ cp ../jhove/jhove.conf testdata/ cp ../jhove/jhoveConfig.xsd testdata/ cd ../../ diff --git a/conf/docker-aio/c8.dockerfile b/conf/docker-aio/c8.dockerfile index 515f69b8c55..e08e1d56749 100644 --- a/conf/docker-aio/c8.dockerfile +++ b/conf/docker-aio/c8.dockerfile @@ -64,7 +64,6 @@ COPY dv/install/ /opt/dv/ COPY install.bash /opt/dv/ COPY entrypoint.bash /opt/dv/ COPY testdata /opt/dv/testdata -COPY testdata/updateSchemaMDB.sh /opt/dv/testdata/ COPY testscripts/* /opt/dv/testdata/ COPY setupIT.bash /opt/dv WORKDIR /opt/dv diff --git a/scripts/api/setup-optional-harvard.sh b/scripts/api/setup-optional-harvard.sh index e51846bd9e0..0cf6ea4d393 100755 --- a/scripts/api/setup-optional-harvard.sh +++ b/scripts/api/setup-optional-harvard.sh @@ -52,5 +52,5 @@ curl http://localhost:8080/api/admin/datasetfield/load -X POST --data-binary @da curl http://localhost:8080/api/admin/datasetfield/load -X POST --data-binary @data/metadatablocks/customCHIA.tsv -H "Content-type: text/tab-separated-values" curl http://localhost:8080/api/admin/datasetfield/load -X POST --data-binary @data/metadatablocks/customDigaai.tsv -H "Content-type: text/tab-separated-values" curl http://localhost:8080/api/admin/datasetfield/load -X POST --data-binary @data/metadatablocks/custom_hbgdki.tsv -H "Content-type: text/tab-separated-values" -echo "Because you have loaded custom metadata blocks, you need to update the include files pulled in by Solr's schema.xml. On the Solr server, you can try running the updateSchemaMDB.sh script mentioned in the Metadata Customization section of the Admin Guide or follow the manual steps listed there." +echo "Because you have loaded custom metadata blocks, you need to update the include files pulled in by Solr's schema.xml. On the Solr server, you can try running the update-fields.sh script mentioned in the Metadata Customization section of the Admin Guide or follow the manual steps listed there." echo From 70f60b1daa860276b55ad51bfd870b09bb7679e6 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Mon, 4 Oct 2021 14:39:56 +0200 Subject: [PATCH 05/19] refactor(solr): include new update-fields.sh instead of updateSchemaMDB.sh in installer #7903 --- scripts/installer/Makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/installer/Makefile b/scripts/installer/Makefile index fe26bb5d6c6..006b5c818cd 100644 --- a/scripts/installer/Makefile +++ b/scripts/installer/Makefile @@ -4,7 +4,7 @@ GLASSFISH_SETUP_SCRIPT=${INSTALLER_ZIP_DIR}/as-setup.sh API_SCRIPTS=${INSTALLER_ZIP_DIR}/setup-datasetfields.sh ${INSTALLER_ZIP_DIR}/setup-users.sh ${INSTALLER_ZIP_DIR}/setup-builtin-roles.sh ${INSTALLER_ZIP_DIR}/setup-dvs.sh ${INSTALLER_ZIP_DIR}/data ${INSTALLER_ZIP_DIR}/setup-identity-providers.sh ${INSTALLER_ZIP_DIR}/setup-all.sh ${INSTALLER_ZIP_DIR}/post-install-api-block.sh JHOVE_CONFIG=${INSTALLER_ZIP_DIR}/jhove.conf JHOVE_SCHEMA=${INSTALLER_ZIP_DIR}/jhoveConfig.xsd -SOLR_SCHEMA=${INSTALLER_ZIP_DIR}/schema.xml ${INSTALLER_ZIP_DIR}/schema_dv_mdb_fields.xml ${INSTALLER_ZIP_DIR}/updateSchemaMDB.sh +SOLR_SCHEMA=${INSTALLER_ZIP_DIR}/schema.xml ${INSTALLER_ZIP_DIR}/update-fields.sh SOLR_CONFIG=${INSTALLER_ZIP_DIR}/solrconfig.xml PYTHON_FILES=${INSTALLER_ZIP_DIR}/README_python.txt ${INSTALLER_ZIP_DIR}/installConfig.py ${INSTALLER_ZIP_DIR}/installUtils.py ${INSTALLER_ZIP_DIR}/install.py ${INSTALLER_ZIP_DIR}/installAppServer.py ${INSTALLER_ZIP_DIR}/requirements.txt ${INSTALLER_ZIP_DIR}/default.config ${INSTALLER_ZIP_DIR}/interactive.config INSTALL_SCRIPT=${INSTALLER_ZIP_DIR}/install @@ -56,9 +56,9 @@ ${JHOVE_SCHEMA}: ../../conf/jhove/jhoveConfig.xsd ${INSTALLER_ZIP_DIR} @echo copying jhove schema file /bin/cp ../../conf/jhove/jhoveConfig.xsd ${INSTALLER_ZIP_DIR} -${SOLR_SCHEMA}: ../../conf/solr/8.8.1/schema.xml ../../conf/solr/8.8.1/schema_dv_mdb_fields.xml ../../conf/solr/8.8.1/updateSchemaMDB.sh ${INSTALLER_ZIP_DIR} +${SOLR_SCHEMA}: ../../conf/solr/8.8.1/schema.xml ../../conf/solr/8.8.1/update-fields.sh ${INSTALLER_ZIP_DIR} @echo copying Solr schema file - /bin/cp ../../conf/solr/8.8.1/schema*.xml ../../conf/solr/8.8.1/updateSchemaMDB.sh ${INSTALLER_ZIP_DIR} + /bin/cp ../../conf/solr/8.8.1/schema.xml ../../conf/solr/8.8.1/update-fields.sh ${INSTALLER_ZIP_DIR} ${SOLR_CONFIG}: ../../conf/solr/8.8.1/solrconfig.xml ${INSTALLER_ZIP_DIR} @echo copying Solr config file From 1288bbdae7418f2722642a2426f619bbf9a7084a Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Mon, 4 Oct 2021 15:07:43 +0200 Subject: [PATCH 06/19] docs(solr): add release note for update-fields.sh #7903 --- doc/release-notes/7903-solr-config.md | 30 +++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 doc/release-notes/7903-solr-config.md diff --git a/doc/release-notes/7903-solr-config.md b/doc/release-notes/7903-solr-config.md new file mode 100644 index 00000000000..1c4ed47d40a --- /dev/null +++ b/doc/release-notes/7903-solr-config.md @@ -0,0 +1,30 @@ +## Mitigate Solr Schema Management Problems + +With [release 5.5](https://github.com/IQSS/dataverse/releases/tag/v5.5), the `` definitions had been +reincluded into `schema.xml` to fix searching for datasets. + +This release includes a final update to `schema.xml` and an updated script `update-fields.sh` to manage your +custom metadata fields in the future. (It might get used for other purposes in the future, too.) The broken script +`updateSchemaMDB.sh` has been removed. + +Please replace your schema.xml with the one provided to make sure the new script can do its magic. +If you do not use any custom metadata blocks, that's it. Else, read on. + +To include your custom metadata fields after updating schema.xml, you can use a simple `curl` command. (Please download +the script before or use it from the extracted installer.) + +``` +curl "https:///api/admin/index/solr/schema" | update-fields.sh conf/schema.xml +``` + +Please adapt the above to point to your Dataverse installation and to the correct `schema.xml` file in your Solr +installation. (See the [installation guide](https://guides.dataverse.org/en/latest/installation/prerequisites.html#installing-solr) +for some hints about usual places or use `find / -name schema.xml`) + +After upgrade, you need to restart (downtime!) or reload Solr (OK while running): +``` +curl "http://localhost:8983/solr/admin/cores?action=RELOAD&core=collection1" +``` +(Please adapt to your installations details. Should work as is for most.) + +TODO: if we change any schemas in this release, a hint about reindexing might be necessary. Else delete this TODO. \ No newline at end of file From c24f034593dadcad0b43291984298f336153bffe Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Mon, 4 Oct 2021 16:44:34 +0200 Subject: [PATCH 07/19] ci(solr): start adding shellspec tests for update-fields.sh #7903 --- tests/shell/.shellspec | 1 + tests/shell/data/solr/empty.xml | 0 tests/shell/data/solr/invalid-source.md | 4 + .../data/solr/mark-missing-copyfield-end.xml | 9 ++ .../solr/mark-missing-copyfield-start.xml | 9 ++ .../data/solr/mark-missing-field-end.xml | 9 ++ .../data/solr/mark-missing-field-start.xml | 9 ++ .../solr/mark-nonunique-copyfield-end.xml | 11 +++ .../solr/mark-nonunique-copyfield-start.xml | 11 +++ .../data/solr/mark-nonunique-field-end.xml | 11 +++ .../data/solr/mark-nonunique-field-start.xml | 11 +++ tests/shell/data/solr/mark-order-1.xml | 10 ++ tests/shell/data/solr/mark-order-2.xml | 10 ++ tests/shell/data/solr/mark-order-3.xml | 10 ++ tests/shell/data/solr/minimal.xml | 10 ++ tests/shell/spec/spec_helper.sh | 24 +++++ tests/shell/spec/update_fields_spec.sh | 99 +++++++++++++++++++ 17 files changed, 248 insertions(+) create mode 100644 tests/shell/.shellspec create mode 100644 tests/shell/data/solr/empty.xml create mode 100644 tests/shell/data/solr/invalid-source.md create mode 100644 tests/shell/data/solr/mark-missing-copyfield-end.xml create mode 100644 tests/shell/data/solr/mark-missing-copyfield-start.xml create mode 100644 tests/shell/data/solr/mark-missing-field-end.xml create mode 100644 tests/shell/data/solr/mark-missing-field-start.xml create mode 100644 tests/shell/data/solr/mark-nonunique-copyfield-end.xml create mode 100644 tests/shell/data/solr/mark-nonunique-copyfield-start.xml create mode 100644 tests/shell/data/solr/mark-nonunique-field-end.xml create mode 100644 tests/shell/data/solr/mark-nonunique-field-start.xml create mode 100644 tests/shell/data/solr/mark-order-1.xml create mode 100644 tests/shell/data/solr/mark-order-2.xml create mode 100644 tests/shell/data/solr/mark-order-3.xml create mode 100644 tests/shell/data/solr/minimal.xml create mode 100644 tests/shell/spec/spec_helper.sh create mode 100644 tests/shell/spec/update_fields_spec.sh diff --git a/tests/shell/.shellspec b/tests/shell/.shellspec new file mode 100644 index 00000000000..c99d2e7396e --- /dev/null +++ b/tests/shell/.shellspec @@ -0,0 +1 @@ +--require spec_helper diff --git a/tests/shell/data/solr/empty.xml b/tests/shell/data/solr/empty.xml new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/shell/data/solr/invalid-source.md b/tests/shell/data/solr/invalid-source.md new file mode 100644 index 00000000000..49b7a7c7798 --- /dev/null +++ b/tests/shell/data/solr/invalid-source.md @@ -0,0 +1,4 @@ +Bla bla bla +Anything +Foobar +But no field or copyField \ No newline at end of file diff --git a/tests/shell/data/solr/mark-missing-copyfield-end.xml b/tests/shell/data/solr/mark-missing-copyfield-end.xml new file mode 100644 index 00000000000..3ac2791f10d --- /dev/null +++ b/tests/shell/data/solr/mark-missing-copyfield-end.xml @@ -0,0 +1,9 @@ + + + + + + + + + \ No newline at end of file diff --git a/tests/shell/data/solr/mark-missing-copyfield-start.xml b/tests/shell/data/solr/mark-missing-copyfield-start.xml new file mode 100644 index 00000000000..547b9c5bbcb --- /dev/null +++ b/tests/shell/data/solr/mark-missing-copyfield-start.xml @@ -0,0 +1,9 @@ + + + + + + + + + \ No newline at end of file diff --git a/tests/shell/data/solr/mark-missing-field-end.xml b/tests/shell/data/solr/mark-missing-field-end.xml new file mode 100644 index 00000000000..eea4918440d --- /dev/null +++ b/tests/shell/data/solr/mark-missing-field-end.xml @@ -0,0 +1,9 @@ + + + + + + + + + \ No newline at end of file diff --git a/tests/shell/data/solr/mark-missing-field-start.xml b/tests/shell/data/solr/mark-missing-field-start.xml new file mode 100644 index 00000000000..082a11fbddc --- /dev/null +++ b/tests/shell/data/solr/mark-missing-field-start.xml @@ -0,0 +1,9 @@ + + + + + + + + + \ No newline at end of file diff --git a/tests/shell/data/solr/mark-nonunique-copyfield-end.xml b/tests/shell/data/solr/mark-nonunique-copyfield-end.xml new file mode 100644 index 00000000000..d8ea99842c3 --- /dev/null +++ b/tests/shell/data/solr/mark-nonunique-copyfield-end.xml @@ -0,0 +1,11 @@ + + + + + + + + + + + \ No newline at end of file diff --git a/tests/shell/data/solr/mark-nonunique-copyfield-start.xml b/tests/shell/data/solr/mark-nonunique-copyfield-start.xml new file mode 100644 index 00000000000..fd7636ec80f --- /dev/null +++ b/tests/shell/data/solr/mark-nonunique-copyfield-start.xml @@ -0,0 +1,11 @@ + + + + + + + + + + + \ No newline at end of file diff --git a/tests/shell/data/solr/mark-nonunique-field-end.xml b/tests/shell/data/solr/mark-nonunique-field-end.xml new file mode 100644 index 00000000000..da03ef65e21 --- /dev/null +++ b/tests/shell/data/solr/mark-nonunique-field-end.xml @@ -0,0 +1,11 @@ + + + + + + + + + + + \ No newline at end of file diff --git a/tests/shell/data/solr/mark-nonunique-field-start.xml b/tests/shell/data/solr/mark-nonunique-field-start.xml new file mode 100644 index 00000000000..1a441cdf521 --- /dev/null +++ b/tests/shell/data/solr/mark-nonunique-field-start.xml @@ -0,0 +1,11 @@ + + + + + + + + + + + \ No newline at end of file diff --git a/tests/shell/data/solr/mark-order-1.xml b/tests/shell/data/solr/mark-order-1.xml new file mode 100644 index 00000000000..635143fe2f0 --- /dev/null +++ b/tests/shell/data/solr/mark-order-1.xml @@ -0,0 +1,10 @@ + + + + + + + + + + \ No newline at end of file diff --git a/tests/shell/data/solr/mark-order-2.xml b/tests/shell/data/solr/mark-order-2.xml new file mode 100644 index 00000000000..7b085ca1fea --- /dev/null +++ b/tests/shell/data/solr/mark-order-2.xml @@ -0,0 +1,10 @@ + + + + + + + + + + \ No newline at end of file diff --git a/tests/shell/data/solr/mark-order-3.xml b/tests/shell/data/solr/mark-order-3.xml new file mode 100644 index 00000000000..a8b13b144e5 --- /dev/null +++ b/tests/shell/data/solr/mark-order-3.xml @@ -0,0 +1,10 @@ + + + + + + + + + + \ No newline at end of file diff --git a/tests/shell/data/solr/minimal.xml b/tests/shell/data/solr/minimal.xml new file mode 100644 index 00000000000..7ec2b9aafe7 --- /dev/null +++ b/tests/shell/data/solr/minimal.xml @@ -0,0 +1,10 @@ + + + + + + + + + + \ No newline at end of file diff --git a/tests/shell/spec/spec_helper.sh b/tests/shell/spec/spec_helper.sh new file mode 100644 index 00000000000..93f19083cd2 --- /dev/null +++ b/tests/shell/spec/spec_helper.sh @@ -0,0 +1,24 @@ +# shellcheck shell=sh + +# Defining variables and functions here will affect all specfiles. +# Change shell options inside a function may cause different behavior, +# so it is better to set them here. +# set -eu + +# This callback function will be invoked only once before loading specfiles. +spec_helper_precheck() { + # Available functions: info, warn, error, abort, setenv, unsetenv + # Available variables: VERSION, SHELL_TYPE, SHELL_VERSION + : minimum_version "0.28.1" +} + +# This callback function will be invoked after a specfile has been loaded. +spec_helper_loaded() { + : +} + +# This callback function will be invoked after core modules has been loaded. +spec_helper_configure() { + # Available functions: import, before_each, after_each, before_all, after_all + : import 'support/custom_matcher' +} diff --git a/tests/shell/spec/update_fields_spec.sh b/tests/shell/spec/update_fields_spec.sh new file mode 100644 index 00000000000..d0d88da7714 --- /dev/null +++ b/tests/shell/spec/update_fields_spec.sh @@ -0,0 +1,99 @@ +#shellcheck shell=sh + +update_fields() { + ../../conf/solr/8.8.1/update-fields.sh "$@" +} + +Describe "Update fields command" + Path schema-xml="../../conf/solr/8.8.1/schema.xml" + It "needs upstream schema.xml" + The path schema-xml should be exist + End + + Describe "schema.xml validation" + It "throws error when no schema.xml target given" + When run update_fields + The status should equal 2 + The error should include "Cannot find or write" + End + + Describe "throws error when missing mark" + Parameters + "#1" "SCHEMA-FIELDS::BEGIN" data/solr/mark-missing-field-start.xml + "#2" "SCHEMA-FIELDS::END" data/solr/mark-missing-field-end.xml + "#3" "SCHEMA-COPY-FIELDS::BEGIN" data/solr/mark-missing-copyfield-start.xml + "#4" "SCHEMA-COPY-FIELDS::END" data/solr/mark-missing-copyfield-end.xml + End + + It "$2" + When run update_fields "$3" + The status should equal 2 + The error should include "$2" + End + End + + Describe "throws error when non-unique mark" + Parameters + "#1" "SCHEMA-FIELDS::BEGIN" data/solr/mark-nonunique-field-start.xml + "#2" "SCHEMA-FIELDS::END" data/solr/mark-nonunique-field-end.xml + "#3" "SCHEMA-COPY-FIELDS::BEGIN" data/solr/mark-nonunique-copyfield-start.xml + "#4" "SCHEMA-COPY-FIELDS::END" data/solr/mark-nonunique-copyfield-end.xml + End + + It "$2" + When run update_fields "$3" + The status should equal 2 + The error should include "guards are not unique" + End + End + + Describe "throws error when marks not in correct order" + Parameters + "#1" data/solr/mark-order-1.xml + "#2" data/solr/mark-order-2.xml + "#3" data/solr/mark-order-3.xml + End + + It "$1" + When run update_fields "$2" + The status should equal 2 + The error should include "guards are not in correct order" + End + End + End + + Describe "reading input" + It "throws error when no source given" + When run update_fields data/solr/minimal.xml + The status should equal 2 + The error should include "provide source file or piped input" + End + + It "throws error when given file not found" + When run update_fields data/solr/minimal.xml foobar + The status should equal 2 + The error should include "Cannot read" + The error should include "foobar" + End + + It "throws error when given file is empty" + When run update_fields data/solr/minimal.xml data/solr/empty.xml + The status should equal 2 + The error should include "Cannot read" + The error should include "empty.xml" + End + + It "throws error when given invalid source file" + When run update_fields data/solr/minimal.xml data/solr/invalid-source.md + The status should equal 2 + The error should include "No or " + End + + It "throws error when given invalid stdin source" + Data < data/solr/invalid-source.md + When run update_fields data/solr/minimal.xml + The status should equal 2 + The error should include "No or " + End + End +End \ No newline at end of file From 63f3f80fe02e4caf8d563dfcc38b803142c90cc4 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Tue, 5 Oct 2021 09:10:36 +0200 Subject: [PATCH 08/19] ci(solr): finish shellspec tests for update-fields.sh #7903 --- tests/shell/data/solr/chain-output.xml | 6 + .../data/solr/{empty.xml => empty-source.xml} | 0 .../{invalid-source.md => invalid-source.xml} | 0 tests/shell/data/solr/mark-nolinebreak.xml | 12 ++ .../solr/{minimal.xml => minimal-schema.xml} | 2 +- tests/shell/data/solr/minimal-source.xml | 2 + tests/shell/spec/update_fields_spec.sh | 122 ++++++++++++++---- 7 files changed, 115 insertions(+), 29 deletions(-) create mode 100644 tests/shell/data/solr/chain-output.xml rename tests/shell/data/solr/{empty.xml => empty-source.xml} (100%) rename tests/shell/data/solr/{invalid-source.md => invalid-source.xml} (100%) create mode 100644 tests/shell/data/solr/mark-nolinebreak.xml rename tests/shell/data/solr/{minimal.xml => minimal-schema.xml} (97%) create mode 100644 tests/shell/data/solr/minimal-source.xml diff --git a/tests/shell/data/solr/chain-output.xml b/tests/shell/data/solr/chain-output.xml new file mode 100644 index 00000000000..a54414ad244 --- /dev/null +++ b/tests/shell/data/solr/chain-output.xml @@ -0,0 +1,6 @@ + + + + + + diff --git a/tests/shell/data/solr/empty.xml b/tests/shell/data/solr/empty-source.xml similarity index 100% rename from tests/shell/data/solr/empty.xml rename to tests/shell/data/solr/empty-source.xml diff --git a/tests/shell/data/solr/invalid-source.md b/tests/shell/data/solr/invalid-source.xml similarity index 100% rename from tests/shell/data/solr/invalid-source.md rename to tests/shell/data/solr/invalid-source.xml diff --git a/tests/shell/data/solr/mark-nolinebreak.xml b/tests/shell/data/solr/mark-nolinebreak.xml new file mode 100644 index 00000000000..a9aa476008c --- /dev/null +++ b/tests/shell/data/solr/mark-nolinebreak.xml @@ -0,0 +1,12 @@ + + + + + + + + + + + + \ No newline at end of file diff --git a/tests/shell/data/solr/minimal.xml b/tests/shell/data/solr/minimal-schema.xml similarity index 97% rename from tests/shell/data/solr/minimal.xml rename to tests/shell/data/solr/minimal-schema.xml index 7ec2b9aafe7..004e96ff604 100644 --- a/tests/shell/data/solr/minimal.xml +++ b/tests/shell/data/solr/minimal-schema.xml @@ -7,4 +7,4 @@ - \ No newline at end of file + diff --git a/tests/shell/data/solr/minimal-source.xml b/tests/shell/data/solr/minimal-source.xml new file mode 100644 index 00000000000..a6f95caa294 --- /dev/null +++ b/tests/shell/data/solr/minimal-source.xml @@ -0,0 +1,2 @@ + + \ No newline at end of file diff --git a/tests/shell/spec/update_fields_spec.sh b/tests/shell/spec/update_fields_spec.sh index d0d88da7714..ff4096c157a 100644 --- a/tests/shell/spec/update_fields_spec.sh +++ b/tests/shell/spec/update_fields_spec.sh @@ -5,9 +5,15 @@ update_fields() { } Describe "Update fields command" - Path schema-xml="../../conf/solr/8.8.1/schema.xml" - It "needs upstream schema.xml" - The path schema-xml should be exist + + Describe "can operate on upstream data" + copyUpstreamSchema() { cp ../../conf/solr/8.8.1/schema.xml data/solr/upstream-schema.xml; } + AfterAll 'copyUpstreamSchema' + + Path schema-xml="../../conf/solr/8.8.1/schema.xml" + It "needs upstream schema.xml" + The path schema-xml should be exist + End End Describe "schema.xml validation" @@ -60,40 +66,100 @@ Describe "Update fields command" The error should include "guards are not in correct order" End End - End - Describe "reading input" - It "throws error when no source given" - When run update_fields data/solr/minimal.xml + It "throws error when marks not in exclusive line" + When run update_fields data/solr/mark-nolinebreak.xml The status should equal 2 - The error should include "provide source file or piped input" + The error should include "is not on an exclusive line" End + End - It "throws error when given file not found" - When run update_fields data/solr/minimal.xml foobar - The status should equal 2 - The error should include "Cannot read" - The error should include "foobar" - End + Describe "reading input" + Describe "fails because" + It "throws error when no source given" + When run update_fields data/solr/minimal-schema.xml + The status should equal 2 + The error should include "provide source file or piped input" + End - It "throws error when given file is empty" - When run update_fields data/solr/minimal.xml data/solr/empty.xml - The status should equal 2 - The error should include "Cannot read" - The error should include "empty.xml" + It "throws error when given file not found" + When run update_fields data/solr/minimal-schema.xml foobar + The status should equal 2 + The error should include "Cannot read" + The error should include "foobar" + End + + It "throws error when given file is empty" + When run update_fields data/solr/minimal-schema.xml data/solr/empty-source.xml + The status should equal 2 + The error should include "Cannot read" + The error should include "empty-source.xml" + End + + It "throws error when given invalid source file" + When run update_fields data/solr/minimal-schema.xml data/solr/invalid-source.xml + The status should equal 2 + The error should include "No or " + End + + It "throws error when given invalid stdin source" + Data < data/solr/invalid-source.xml + When run update_fields data/solr/minimal-schema.xml + The status should equal 2 + The error should include "No or " + End End - It "throws error when given invalid source file" - When run update_fields data/solr/minimal.xml data/solr/invalid-source.md - The status should equal 2 - The error should include "No or " + Describe "succeeds because" + setup() { cp data/solr/minimal-schema.xml data/solr/minimal-schema-work.xml; } + cleanup() { rm data/solr/minimal-schema-work.xml; } + BeforeEach 'setup' + AfterEach 'cleanup' + + deleteUpstreamSchema() { rm data/solr/upstream-schema.xml; } + AfterAll 'deleteUpstreamSchema' + + match_content() { + grep -q "$@" "${match_content}" + } + + It "prints nothing when editing minimal schema" + Data < data/solr/minimal-source.xml + When run update_fields data/solr/minimal-schema-work.xml + The status should equal 0 + The output should equal "" + The path data/solr/minimal-schema-work.xml should be file + The path data/solr/minimal-schema-work.xml should satisfy match_content " or " + Describe "chaining data" + setup() { cp data/solr/minimal-schema.xml data/solr/minimal-schema-work.xml; } + cleanup() { rm data/solr/minimal-schema-work.xml; } + BeforeEach 'setup' + AfterEach 'cleanup' + + match_content() { + echo "${match_content}" | diff "$1" - + } + + It "prints after editing" + Data < data/solr/minimal-source.xml + When run update_fields -p data/solr/minimal-schema-work.xml + The status should equal 0 + The output should satisfy match_content data/solr/chain-output.xml End End End \ No newline at end of file From 97d1a555d62013aa732e8ae8d8e8bc58ae64da6e Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Tue, 5 Oct 2021 09:11:32 +0200 Subject: [PATCH 09/19] feat(solr): add some more validation and make update-fields.sh less noisy #7903 --- conf/solr/8.8.1/update-fields.sh | 34 ++++++++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/conf/solr/8.8.1/update-fields.sh b/conf/solr/8.8.1/update-fields.sh index fb349a75e0f..6012365f680 100755 --- a/conf/solr/8.8.1/update-fields.sh +++ b/conf/solr/8.8.1/update-fields.sh @@ -18,6 +18,8 @@ INPUT="" FIELDS="" COPY_FIELDS="" TRIGGER_CHAIN=0 +ED_DELETE_FIELDS="'a+,'b-d" +ED_DELETE_COPYFIELDS="'a+,'b-d" SOLR_SCHEMA_FIELD_BEGIN_MARK="SCHEMA-FIELDS::BEGIN" SOLR_SCHEMA_FIELD_END_MARK="SCHEMA-FIELDS::END" @@ -78,6 +80,7 @@ else grep -c "${MARK}" "${SCHEMA}" || error "Missing ${MARK} from ${SCHEMA}" done ) + # Check guards are unique (count occurrences and sum calc via bc) [ "$(echo -n "${CHECKS}" | tr '\n' '+' | sed -e 's#$#\n#' | bc)" -eq 4 ] || \ error "Some include guards are not unique in ${SCHEMA}" @@ -92,6 +95,33 @@ else # Actual comparison of line numbers [ "$(echo "${CHECKS}" | tr '\n' '<' | sed -e 's#<$#\n#' -e 's#\(<[0-9]\+\)<\([0-9]\+\)#\1 \&\& \2#' | bc)" -eq 1 ] || \ error "Include guards are not in correct order in ${SCHEMA}" + + # Check guards are exclusively in their lines + # (no field or copyField on same line) + for MARK in ${MARKS_ORDERED} + do + grep "${MARK}" "${SCHEMA}" | grep -q -v -e '\( IF NO ELEMENTS BETWEEN GUARDS, DO NOT DELETE TO AVOID ED ERRORS fi @@ -133,7 +163,7 @@ else # Mark field end as 'b' /${SOLR_SCHEMA_FIELD_END_MARK}/kb # Delete all between lines a and b -'a+,'b-d +${ED_DELETE_FIELDS} # Read fields file and paste after line a 'ar ${FIELDS} # Write fields to schema @@ -155,7 +185,7 @@ EOF # Mark copyField end as 'b' /${SOLR_SCHEMA_COPYFIELD_END_MARK}/kb # Delete all between lines a and b -'a+,'b-d +${ED_DELETE_COPYFIELDS} # Read fields file and paste after line a 'ar ${COPY_FIELDS} # Write copyFields to schema From 7cb28a913e6aaf8b45092ed7a1e9a34cd06de9bb Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Tue, 5 Oct 2021 09:37:20 +0200 Subject: [PATCH 10/19] ci: add shellspec github workflow #7903 --- .github/workflows/shellspec.yml | 35 +++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 .github/workflows/shellspec.yml diff --git a/.github/workflows/shellspec.yml b/.github/workflows/shellspec.yml new file mode 100644 index 00000000000..e3663b9d192 --- /dev/null +++ b/.github/workflows/shellspec.yml @@ -0,0 +1,35 @@ +name: "Shellspec" +on: + push: + paths: + - tests/shell/** + - conf/solr/** + # add more when more specs are written relying on data + pull_request: + paths: + - tests/shell/** + - conf/solr/** + # add more when more specs are written relying on data +jobs: + shellspec-linux: + name: "Linux" + runs-on: ubuntu-latest + steps: + - name: Install shellspec + run: curl -fsSL https://git.io/shellspec | sh -s 0.28.1 --yes + - uses: actions/checkout@v2 + - name: Run Shellspec + run: | + cd tests/shell + shellspec + shellspec-macos: + name: "MacOS" + runs-on: macos-10.15 + steps: + - name: Install shellspec + run: curl -fsSL https://git.io/shellspec | sh -s 0.28.1 --yes + - uses: actions/checkout@v2 + - name: Run Shellspec + run: | + cd tests/shell + /Users/runner/.local/bin/shellspec \ No newline at end of file From bfb8243bc04dcb6b01ace847a12c44e460b489ed Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Tue, 5 Oct 2021 10:46:31 +0200 Subject: [PATCH 11/19] ci(solr): do not test on MacOS due to VERY strange bash problems #7903 --- .github/workflows/shellspec.yml | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/.github/workflows/shellspec.yml b/.github/workflows/shellspec.yml index e3663b9d192..fc20cbe665c 100644 --- a/.github/workflows/shellspec.yml +++ b/.github/workflows/shellspec.yml @@ -21,15 +21,4 @@ jobs: - name: Run Shellspec run: | cd tests/shell - shellspec - shellspec-macos: - name: "MacOS" - runs-on: macos-10.15 - steps: - - name: Install shellspec - run: curl -fsSL https://git.io/shellspec | sh -s 0.28.1 --yes - - uses: actions/checkout@v2 - - name: Run Shellspec - run: | - cd tests/shell - /Users/runner/.local/bin/shellspec \ No newline at end of file + shellspec \ No newline at end of file From 84aa1937b69a6083806624f59723137f51cdc60c Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Tue, 5 Oct 2021 10:46:57 +0200 Subject: [PATCH 12/19] refactor(solr): add bash v4+ requirement for update-fields.sh #7903 --- conf/solr/8.8.1/update-fields.sh | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/conf/solr/8.8.1/update-fields.sh b/conf/solr/8.8.1/update-fields.sh index 6012365f680..8d13b31b3d8 100755 --- a/conf/solr/8.8.1/update-fields.sh +++ b/conf/solr/8.8.1/update-fields.sh @@ -60,6 +60,12 @@ while getopts ":hp" opt; do esac done +# Check for recent Bash version +# shellcheck disable=SC2086 +if [ ${BASH_VERSION%%.*} -lt 4 ]; then + error "Bash v4.x or later required" +fi + # remove all the parsed options shift $((OPTIND-1)) From 44ee9ebc3a45e33d14df6e053385155d84f294c9 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Tue, 5 Oct 2021 11:23:24 +0200 Subject: [PATCH 13/19] ci(solr): skip update-fields.sh test for no input if on Github #7903 Inside update-fields.sh we detect if a STDIN is given as a pipe via testing if FD 0 (stdin) is not attached to a terminal. This does not work inside a Github Action, so skip this for now till we see a problem or find a better solution. --- tests/shell/spec/update_fields_spec.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/shell/spec/update_fields_spec.sh b/tests/shell/spec/update_fields_spec.sh index ff4096c157a..829b67fdcc4 100644 --- a/tests/shell/spec/update_fields_spec.sh +++ b/tests/shell/spec/update_fields_spec.sh @@ -76,7 +76,11 @@ Describe "Update fields command" Describe "reading input" Describe "fails because" + # Test if $CI is set (always true inside Github Workflow) + detect_github_action() { ! test -z ${CI:+x}; } + It "throws error when no source given" + Skip if "running on Github Action" detect_github_action When run update_fields data/solr/minimal-schema.xml The status should equal 2 The error should include "provide source file or piped input" From 9fb2bb85e16cab886183307e9d6af77968973035 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Tue, 5 Oct 2021 12:14:27 +0200 Subject: [PATCH 14/19] ci(solr): add Shellcheck as Github Action #7903 --- .github/workflows/shellspec.yml | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/.github/workflows/shellspec.yml b/.github/workflows/shellspec.yml index fc20cbe665c..6acaddaefc4 100644 --- a/.github/workflows/shellspec.yml +++ b/.github/workflows/shellspec.yml @@ -11,8 +11,20 @@ on: - conf/solr/** # add more when more specs are written relying on data jobs: - shellspec-linux: - name: "Linux" + shellcheck: + name: Shellcheck + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: shellcheck + uses: reviewdog/action-shellcheck@v1 + with: + github_token: ${{ secrets.github_token }} + reporter: github-pr-review # Change reporter. + fail_on_error: true + exclude: tests/shell/* + shellspec-ubuntu: + name: "Ubuntu" runs-on: ubuntu-latest steps: - name: Install shellspec From baef977b7936cd18f38eb3fdbf469e43a40fd385 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Tue, 5 Oct 2021 12:15:36 +0200 Subject: [PATCH 15/19] ci(solr): test adding a CentOS based shellspec run #7903 --- .github/workflows/shellspec.yml | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/.github/workflows/shellspec.yml b/.github/workflows/shellspec.yml index 6acaddaefc4..d2c428d41fe 100644 --- a/.github/workflows/shellspec.yml +++ b/.github/workflows/shellspec.yml @@ -22,7 +22,7 @@ jobs: github_token: ${{ secrets.github_token }} reporter: github-pr-review # Change reporter. fail_on_error: true - exclude: tests/shell/* + exclude: "./tests/shell/*" shellspec-ubuntu: name: "Ubuntu" runs-on: ubuntu-latest @@ -31,6 +31,23 @@ jobs: run: curl -fsSL https://git.io/shellspec | sh -s 0.28.1 --yes - uses: actions/checkout@v2 - name: Run Shellspec + run: | + cd tests/shell + shellspec + shellspec-centos7: + name: "CentOS 7" + runs-on: ubuntu-latest + container: + image: centos:7 + steps: + - uses: actions/checkout@v2 + - name: Install shellspec + run: | + curl -fsSL https://github.com/shellspec/shellspec/releases/download/0.28.1/shellspec-dist.tar.gz | tar -xz -C /usr/share + ln -s /usr/share/shellspec/shellspec /usr/bin/shellspec + - name: Install dependencies + run: yum install -y ed + - name: Run shellspec run: | cd tests/shell shellspec \ No newline at end of file From ea6a1b965d6e2684c94f7abf13a2ad091896a964 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Tue, 5 Oct 2021 12:30:34 +0200 Subject: [PATCH 16/19] ci(solr): test adding a RockyLinux based shellspec run #7903 --- .github/workflows/shellspec.yml | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/.github/workflows/shellspec.yml b/.github/workflows/shellspec.yml index d2c428d41fe..7e59fc9c614 100644 --- a/.github/workflows/shellspec.yml +++ b/.github/workflows/shellspec.yml @@ -10,6 +10,8 @@ on: - tests/shell/** - conf/solr/** # add more when more specs are written relying on data +env: + SHELLSPEC_VERSION: 0.28.1 jobs: shellcheck: name: Shellcheck @@ -28,7 +30,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Install shellspec - run: curl -fsSL https://git.io/shellspec | sh -s 0.28.1 --yes + run: curl -fsSL https://git.io/shellspec | sh -s ${{ env.SHELLSPEC_VERSION }} --yes - uses: actions/checkout@v2 - name: Run Shellspec run: | @@ -43,11 +45,28 @@ jobs: - uses: actions/checkout@v2 - name: Install shellspec run: | - curl -fsSL https://github.com/shellspec/shellspec/releases/download/0.28.1/shellspec-dist.tar.gz | tar -xz -C /usr/share + curl -fsSL https://github.com/shellspec/shellspec/releases/download/${{ env.SHELLSPEC_VERSION }}/shellspec-dist.tar.gz | tar -xz -C /usr/share ln -s /usr/share/shellspec/shellspec /usr/bin/shellspec - name: Install dependencies run: yum install -y ed - name: Run shellspec run: | cd tests/shell - shellspec \ No newline at end of file + shellspec + shellspec-rocky8: + name: "RockyLinux 8" + runs-on: ubuntu-latest + container: + image: rockylinux/rockylinux:8 + steps: + - uses: actions/checkout@v2 + - name: Install shellspec + run: | + curl -fsSL https://github.com/shellspec/shellspec/releases/download/${{ env.SHELLSPEC_VERSION }}/shellspec-dist.tar.gz | tar -xz -C /usr/share + ln -s /usr/share/shellspec/shellspec /usr/bin/shellspec + - name: Install dependencies + run: dnf install -y ed bc diffutils + - name: Run shellspec + run: | + cd tests/shell + shellspec \ No newline at end of file From 194a5e0fb23bf1714bada2deeb206270009bb234 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Tue, 5 Oct 2021 12:37:21 +0200 Subject: [PATCH 17/19] feat(solr): let update-fields.sh check for presence of ed and bc #7903 --- conf/solr/8.8.1/update-fields.sh | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/conf/solr/8.8.1/update-fields.sh b/conf/solr/8.8.1/update-fields.sh index 8d13b31b3d8..0ce5222003e 100755 --- a/conf/solr/8.8.1/update-fields.sh +++ b/conf/solr/8.8.1/update-fields.sh @@ -33,6 +33,11 @@ function error { exit 2 } +function exists { + type "$1" >/dev/null 2>&1 && return 0 + ( IFS=:; for p in $PATH; do [ -x "${p%/}/$1" ] && return 0; done; return 1 ) +} + function usage { cat << EOF $(basename "$0") ${VERSION} @@ -66,6 +71,10 @@ if [ ${BASH_VERSION%%.*} -lt 4 ]; then error "Bash v4.x or later required" fi +# Check for ed and bc being present +exists ed || error "Please ensure ed & bc are installed" +exists bc || error "Please ensure ed & bc are installed" + # remove all the parsed options shift $((OPTIND-1)) From 3e830328851113e447adc1322b4849df7b967b0e Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Tue, 26 Oct 2021 23:08:05 +0200 Subject: [PATCH 18/19] fix(solr): make update-fields.sh MacOS compatible - Replaced many GNU sed function with escaped chars in replacement with workaround for appending newline via echo - Replace one sed to take guards line numbers apart with awk because not compatible with BSD sed - Remove unnecessary restriction for bash 4 - Add check for presence of awk and sed - Remove unknown -v flag for BSD ed - Add grep filter for ed script comments, as comment commands not supported by BSD ed - Removed readlink -f for the sake of compatibility with MacOS. So be it. Bye bye nice error messages --- conf/solr/8.8.1/update-fields.sh | 44 +++++++++++++++----------------- 1 file changed, 21 insertions(+), 23 deletions(-) diff --git a/conf/solr/8.8.1/update-fields.sh b/conf/solr/8.8.1/update-fields.sh index 0ce5222003e..49ea8151c77 100755 --- a/conf/solr/8.8.1/update-fields.sh +++ b/conf/solr/8.8.1/update-fields.sh @@ -65,21 +65,17 @@ while getopts ":hp" opt; do esac done -# Check for recent Bash version -# shellcheck disable=SC2086 -if [ ${BASH_VERSION%%.*} -lt 4 ]; then - error "Bash v4.x or later required" -fi - # Check for ed and bc being present -exists ed || error "Please ensure ed & bc are installed" -exists bc || error "Please ensure ed & bc are installed" +exists ed || error "Please ensure ed, bc, sed + awk are installed" +exists bc || error "Please ensure ed, bc, sed + awk are installed" +exists awk || error "Please ensure ed, bc, sed + awk are installed" +exists sed || error "Please ensure ed, bc, sed + awk are installed" # remove all the parsed options shift $((OPTIND-1)) # User overrideable locations -SCHEMA=$(readlink -f "${SCHEMA:-${1:-schema.xml}}") +SCHEMA=${SCHEMA:-${1:-schema.xml}} SOURCE=${SOURCE:-${2:-"-"}} @@ -97,7 +93,8 @@ else ) # Check guards are unique (count occurrences and sum calc via bc) - [ "$(echo -n "${CHECKS}" | tr '\n' '+' | sed -e 's#$#\n#' | bc)" -eq 4 ] || \ + # Note: fancy workaround to re-add closing \n on Linux & MacOS or no calculation + [ "$( (echo -n "${CHECKS}" | tr '\n' '+' ; echo ) | bc)" -eq 4 ] || \ error "Some include guards are not unique in ${SCHEMA}" # Check guards are in order (line number comparison via bc tricks) @@ -108,7 +105,7 @@ else done ) # Actual comparison of line numbers - [ "$(echo "${CHECKS}" | tr '\n' '<' | sed -e 's#<$#\n#' -e 's#\(<[0-9]\+\)<\([0-9]\+\)#\1 \&\& \2#' | bc)" -eq 1 ] || \ + echo "${CHECKS}" | tr '\n' '<' | awk -F'<' '{ if ($1 < $2 && $2 < $3 && $3 < $4) {exit 0} else {exit 1} }' || \ error "Include guards are not in correct order in ${SCHEMA}" # Check guards are exclusively in their lines @@ -120,23 +117,24 @@ else done # Check if there are no lines between the field marks (then skip delete in ed) + # Note: fancy workaround to re-add closing \n on Linux & MacOS or no calculation DISTANCE_FIELDS_MARKS=$( \ - grep -n -e "\(${SOLR_SCHEMA_FIELD_BEGIN_MARK}\|${SOLR_SCHEMA_FIELD_END_MARK}\)" "${SCHEMA}" \ - | cut -f 1 -d ":" | tr '\n' '<' | sed -e 's#<$#-1\n#' | bc + (grep -n -e "\(${SOLR_SCHEMA_FIELD_BEGIN_MARK}\|${SOLR_SCHEMA_FIELD_END_MARK}\)" "${SCHEMA}" \ + | cut -f 1 -d ":" | tr '\n' '<' | sed -e 's#<$#-1#' ; echo) \ + | bc ) if [ "${DISTANCE_FIELDS_MARKS}" -eq 0 ]; then ED_DELETE_FIELDS="#" fi # Check if there are no lines between the copyfield marks (then skip delete in ed) DISTANCE_COPYFIELDS_MARKS=$( \ - grep -n -e "\(${SOLR_SCHEMA_COPYFIELD_BEGIN_MARK}\|${SOLR_SCHEMA_COPYFIELD_END_MARK}\)" "${SCHEMA}" \ - | cut -f 1 -d ":" | tr '\n' '<' | sed -e 's#<$#-1\n#' | bc + (grep -n -e "\(${SOLR_SCHEMA_COPYFIELD_BEGIN_MARK}\|${SOLR_SCHEMA_COPYFIELD_END_MARK}\)" "${SCHEMA}" \ + | cut -f 1 -d ":" | tr '\n' '<' | sed -e 's#<$#-1#' ; echo ) \ + | bc ) if [ "${DISTANCE_COPYFIELDS_MARKS}" -eq 0 ]; then ED_DELETE_COPYFIELDS="#" fi - #TODO - #-> IF NO ELEMENTS BETWEEN GUARDS, DO NOT DELETE TO AVOID ED ERRORS fi @@ -150,8 +148,6 @@ if [ -z "${SOURCE}" ] || [ "${SOURCE}" = "-" ]; then error "No data - either provide source file or piped input" fi else - # Always make the path absolute - SOURCE=$(readlink -f "${SOURCE}") # Check the given file for readability and non-zero length if [ ! -r "${SOURCE}" ] || [ ! -s "${SOURCE}" ]; then error "Cannot read from or empty file ${SOURCE}" @@ -172,7 +168,8 @@ else # If file actually contains output, write to schema if [ -s "${FIELDS}" ]; then # Use an ed script to replace all - cat << EOF | ed -s -v "${SCHEMA}" + cat << EOF | grep -v -e "^#" | ed -s "${SCHEMA}" +H # Mark field begin as 'a' /${SOLR_SCHEMA_FIELD_BEGIN_MARK}/ka # Mark field end as 'b' @@ -193,8 +190,9 @@ EOF echo "${INPUT}" | grep -e "" | sed -e 's#^# #' > "${COPY_FIELDS}" || true # If file actually contains output, write to schema if [ -s "${COPY_FIELDS}" ]; then - # Use an ed script to replace all - cat << EOF | ed -s "${SCHEMA}" + # Use an ed script to replace all , filter comments (BSD ed does not support comments) + cat << EOF | grep -v -e "^#" | ed -s "${SCHEMA}" +H # Mark copyField begin as 'a' /${SOLR_SCHEMA_COPYFIELD_BEGIN_MARK}/ka # Mark copyField end as 'b' @@ -217,4 +215,4 @@ fi if [ "${TRIGGER_CHAIN}" -eq 1 ]; then grep -A1000 "${SOLR_SCHEMA_FIELD_BEGIN_MARK}" "${SCHEMA}" | grep -B1000 "${SOLR_SCHEMA_FIELD_END_MARK}" grep -A1000 "${SOLR_SCHEMA_COPYFIELD_BEGIN_MARK}" "${SCHEMA}" | grep -B1000 "${SOLR_SCHEMA_COPYFIELD_END_MARK}" -fi \ No newline at end of file +fi From c5b09ffd5cb1210b1a906075a819e4415e9e0cff Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Tue, 26 Oct 2021 23:20:37 +0200 Subject: [PATCH 19/19] ci(solr): re-add MacOS shellspec test for update-fields.sh #7903 --- .github/workflows/shellspec.yml | 33 ++++++++++++++++++++++----------- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/.github/workflows/shellspec.yml b/.github/workflows/shellspec.yml index 7e59fc9c614..7a56b8c2f7d 100644 --- a/.github/workflows/shellspec.yml +++ b/.github/workflows/shellspec.yml @@ -59,14 +59,25 @@ jobs: container: image: rockylinux/rockylinux:8 steps: - - uses: actions/checkout@v2 - - name: Install shellspec - run: | - curl -fsSL https://github.com/shellspec/shellspec/releases/download/${{ env.SHELLSPEC_VERSION }}/shellspec-dist.tar.gz | tar -xz -C /usr/share - ln -s /usr/share/shellspec/shellspec /usr/bin/shellspec - - name: Install dependencies - run: dnf install -y ed bc diffutils - - name: Run shellspec - run: | - cd tests/shell - shellspec \ No newline at end of file + - uses: actions/checkout@v2 + - name: Install shellspec + run: | + curl -fsSL https://github.com/shellspec/shellspec/releases/download/${{ env.SHELLSPEC_VERSION }}/shellspec-dist.tar.gz | tar -xz -C /usr/share + ln -s /usr/share/shellspec/shellspec /usr/bin/shellspec + - name: Install dependencies + run: dnf install -y ed bc diffutils + - name: Run shellspec + run: | + cd tests/shell + shellspec + shellspec-macos: + name: "MacOS" + runs-on: macos-10.15 + steps: + - name: Install shellspec + run: curl -fsSL https://git.io/shellspec | sh -s 0.28.1 --yes + - uses: actions/checkout@v2 + - name: Run Shellspec + run: | + cd tests/shell + /Users/runner/.local/bin/shellspec