diff --git a/doc/sphinx-guides/source/admin/metadatacustomization.rst b/doc/sphinx-guides/source/admin/metadatacustomization.rst
index 7bc571c36dd..2f9758da9ef 100644
--- a/doc/sphinx-guides/source/admin/metadatacustomization.rst
+++ b/doc/sphinx-guides/source/admin/metadatacustomization.rst
@@ -364,49 +364,51 @@ Each of the three main sections own sets of properties:
#controlledVocabulary (enumerated) properties
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-+-----------------------+-----------------------+-----------------------+
-| **Property** | **Purpose** | **Allowed values and |
-| | | restrictions** |
-+-----------------------+-----------------------+-----------------------+
-| DatasetField | Specifies the | Must reference an |
-| | #datasetField to which| existing |
-| | this entry applies. | #datasetField. |
-| | | As a best practice, |
-| | | the value should |
-| | | reference a |
-| | | #datasetField in the |
-| | | current metadata |
-| | | block definition. (It |
-| | | is technically |
-| | | possible to reference |
-| | | an existing |
-| | | #datasetField from |
-| | | another metadata |
-| | | block.) |
-+-----------------------+-----------------------+-----------------------+
-| Value | A short display | Free text |
-| | string, representing | |
-| | an enumerated value | |
-| | for this field. If | |
-| | the identifier | |
-| | property is empty, | |
-| | this value is used as | |
-| | the identifier. | |
-+-----------------------+-----------------------+-----------------------+
-| identifier | A string used to | Free text |
-| | encode the selected | |
-| | enumerated value of a | |
-| | field. If this | |
-| | property is empty, | |
-| | the value of the | |
-| | “Value” field is used | |
-| | as the identifier. | |
-+-----------------------+-----------------------+-----------------------+
-| displayOrder | Control the order in | Non-negative integer. |
-| | which the enumerated | |
-| | values are displayed | |
-| | for selection. | |
-+-----------------------+-----------------------+-----------------------+
+.. list-table::
+ :widths: 10 5 40 40 5
+ :header-rows: 1
+ :align: left
+
+ * - | Property
+ | (Column header)
+ - Column index
+ - Purpose
+ - Allowed values and restrictions
+ - Mandatory
+ * - ``#controlledVocabulary``
+ - 0
+ - Intentionally left blank
+ - (none)
+ - Y
+ * - ``DatasetField``
+ - 1
+ - References the ``#datasetField`` to which this entry applies.
+ - Must reference an existing ``#datasetField``.
+
+ As a best practice, the value should reference a ``#datasetField`` in the current metadata block definition.
+
+ (It is technically possible to reference an existing ``#datasetField`` from another metadata block.)
+ - Y
+ * - ``Value``
+ - 2
+ - A short display string, representing an enumerated value for this field. If the identifier property is empty, this value is used as the identifier.
+ - Free text
+ - Y
+ * - ``identifier``
+ - 3
+ - A string used to encode the selected enumerated value of a field. If this property is empty, the value of the ``Value`` field is used as the ``identifier``.
+ - Either an URL, an URI or free text matching ASCII characters, digits and ``+``, ``-``, ``_``
+ - N
+ * - ``displayOrder``
+ - 4
+ - Control the order in which the enumerated values are displayed for selection.
+ - Non-negative integer
+ - Y
+ * - ``altValue``
+ - 5..n
+ - Provide alternative values for this entry. Column may be repeated as often as necessary.
+ - Free text
+ - N
FieldType definitions
~~~~~~~~~~~~~~~~~~~~~
diff --git a/pom.xml b/pom.xml
index f1fca03f27b..d4b51cedd41 100644
--- a/pom.xml
+++ b/pom.xml
@@ -404,6 +404,13 @@
provided
+
+
+ com.univocity
+ univocity-parsers
+ 2.9.1
+
+
commons-io
commons-io
diff --git a/scripts/api/data/metadatablocks/citation.tsv b/scripts/api/data/metadatablocks/citation.tsv
index 375a8c67cec..8bae5e97e32 100644
--- a/scripts/api/data/metadatablocks/citation.tsv
+++ b/scripts/api/data/metadatablocks/citation.tsv
@@ -79,7 +79,7 @@
originOfSources Origin of Sources For historical materials, information about the origin of the sources and the rules followed in establishing the sources should be specified. textbox 75 FALSE FALSE FALSE FALSE FALSE FALSE citation
characteristicOfSources Characteristic of Sources Noted Assessment of characteristics and source material. textbox 76 FALSE FALSE FALSE FALSE FALSE FALSE citation
accessToSources Documentation and Access to Sources Level of documentation of the original sources. textbox 77 FALSE FALSE FALSE FALSE FALSE FALSE citation
-#controlledVocabulary DatasetField Value identifier displayOrder
+#controlledVocabulary DatasetField Value identifier displayOrder altValue
subject Agricultural Sciences D01 0
subject Arts and Humanities D0 1
subject Astronomy and Astrophysics D1 2
diff --git a/scripts/api/data/metadatablocks/geospatial.tsv b/scripts/api/data/metadatablocks/geospatial.tsv
index a3a8e7efd58..9f765060767 100644
--- a/scripts/api/data/metadatablocks/geospatial.tsv
+++ b/scripts/api/data/metadatablocks/geospatial.tsv
@@ -12,7 +12,7 @@
eastLongitude East Longitude Easternmost coordinate delimiting the geographic extent of the Dataset. A valid range of values, expressed in decimal degrees, is -180,0 <= East Bounding Longitude Value <= 180,0. text 8 FALSE FALSE FALSE FALSE FALSE FALSE geographicBoundingBox geospatial
northLongitude North Latitude Northernmost coordinate delimiting the geographic extent of the Dataset. A valid range of values, expressed in decimal degrees, is -90,0 <= North Bounding Latitude Value <= 90,0. text 9 FALSE FALSE FALSE FALSE FALSE FALSE geographicBoundingBox geospatial
southLongitude South Latitude Southernmost coordinate delimiting the geographic extent of the Dataset. A valid range of values, expressed in decimal degrees, is -90,0 <= South Bounding Latitude Value <= 90,0. text 10 FALSE FALSE FALSE FALSE FALSE FALSE geographicBoundingBox geospatial
-#controlledVocabulary DatasetField Value identifier displayOrder
+#controlledVocabulary DatasetField Value identifier displayOrder altValue altValue altValue altValue
country Afghanistan 0
country Albania 1
country Algeria 2
diff --git a/src/main/java/edu/harvard/iq/dataverse/ControlledVocabularyValue.java b/src/main/java/edu/harvard/iq/dataverse/ControlledVocabularyValue.java
index 213d648da71..f0d7ff22387 100644
--- a/src/main/java/edu/harvard/iq/dataverse/ControlledVocabularyValue.java
+++ b/src/main/java/edu/harvard/iq/dataverse/ControlledVocabularyValue.java
@@ -1,22 +1,21 @@
-/*
- * To change this license header, choose License Headers in Project Properties.
- * To change this template file, choose Tools | Templates
- * and open the template in the editor.
- */
-
package edu.harvard.iq.dataverse;
+import com.univocity.parsers.annotations.Parsed;
+import com.univocity.parsers.annotations.Validate;
import edu.harvard.iq.dataverse.util.BundleUtil;
+import edu.harvard.iq.dataverse.util.metadata.Placeholder;
import org.apache.commons.lang3.StringUtils;
import java.io.Serializable;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.Collection;
import java.util.Comparator;
import java.util.Locale;
import java.util.Objects;
import java.util.logging.Logger;
import java.util.MissingResourceException;
+import java.util.stream.Collectors;
import javax.persistence.CascadeType;
import javax.persistence.Column;
import javax.persistence.Entity;
@@ -38,11 +37,41 @@ public class ControlledVocabularyValue implements Serializable {
private static final Logger logger = Logger.getLogger(ControlledVocabularyValue.class.getCanonicalName());
- public static final Comparator DisplayOrder = new Comparator() {
- @Override
- public int compare(ControlledVocabularyValue o1, ControlledVocabularyValue o2) {
- return Integer.compare( o1.getDisplayOrder(), o2.getDisplayOrder() );
- }};
+ /**
+ * Identifiers are used to match either URLs (Term), URIs (PID) or string containing only A-Z, a-z, 0-9, _, + and -
+ * (If no identifier is set, the value will be used, so it may contain spaces in the end. But IF you provide
+ * an identifier, you do it for good reasons. Any real identifiers out there don't contain whitespace for a reason)
+ */
+ public static final String IDENTIFIER_MATCH_REGEX = "^(\\w+:(\\/\\/)?[\\w\\-+&@#/%?=~|!:,.;]*[\\w\\-+&@#/%=~|]|[\\w\\-\\+]+)$";
+ public static final Comparator DisplayOrder = Comparator.comparingInt(ControlledVocabularyValue::getDisplayOrder);
+
+ public enum Headers {
+ DATASET_FIELD(Constants.DATASET_FIELD),
+ VALUE(Constants.VALUE),
+ IDENTIFIER(Constants.IDENTIFIER),
+ DISPLAY_ORDER(Constants.DISPLAY_ORDER),
+ ALT_VALUES(Constants.ALT_VALUES);
+
+ public static final class Constants {
+ public final static String DATASET_FIELD = "DatasetField";
+ public final static String VALUE = "Value";
+ public final static String IDENTIFIER = "identifier";
+ public final static String DISPLAY_ORDER = "displayOrder";
+ public final static String ALT_VALUES = "altValue";
+ }
+
+ private final String key;
+ Headers(String key) {
+ this.key = key;
+ }
+ public String key() {
+ return this.key;
+ }
+
+ public static String[] keys() {
+ return Arrays.stream(values()).map(Headers::key).collect(Collectors.toUnmodifiableList()).toArray(new String[]{});
+ }
+ }
public ControlledVocabularyValue() {
}
@@ -71,9 +100,11 @@ public void setId(Long id) {
public String getStrValue() {
return strValue;
}
+
+ @Parsed(field = Headers.Constants.VALUE)
+ @Validate
public void setStrValue(String strValue) {
this.strValue = strValue;
-
}
private String identifier;
@@ -82,15 +113,29 @@ public String getIdentifier() {
return identifier;
}
+ @Parsed(field = Headers.Constants.IDENTIFIER)
+ @Validate(nullable = true, matches = IDENTIFIER_MATCH_REGEX)
public void setIdentifier(String identifier) {
this.identifier = identifier;
}
-
private int displayOrder;
- public int getDisplayOrder() { return this.displayOrder;}
- public void setDisplayOrder(int displayOrder) {this.displayOrder = displayOrder;}
+ public int getDisplayOrder() {
+ return this.displayOrder;
+ }
+ public void setDisplayOrder(int displayOrder) {
+ this.displayOrder = displayOrder;
+ }
+ /**
+ * Set display order value from String. Allow only positive integers >= 0.
+ * @param displayOrder
+ */
+ @Parsed(field = Headers.Constants.DISPLAY_ORDER)
+ @Validate(matches = "^\\d+$")
+ public void setDisplayOrder(String displayOrder) {
+ this.displayOrder = Integer.parseInt(displayOrder);
+ }
@ManyToOne
@@ -102,6 +147,13 @@ public DatasetFieldType getDatasetFieldType() {
public void setDatasetFieldType(DatasetFieldType datasetFieldType) {
this.datasetFieldType = datasetFieldType;
}
+
+ @Parsed(field = Headers.Constants.DATASET_FIELD)
+ @Validate(matches = DatasetFieldType.FIELD_NAME_REGEX)
+ private void setDatasetFieldType(String datasetFieldType) {
+ this.datasetFieldType = new Placeholder.DatasetFieldType();
+ this.datasetFieldType.setName(datasetFieldType);
+ }
@OneToMany(mappedBy = "controlledVocabularyValue", cascade = {CascadeType.REMOVE, CascadeType.MERGE, CascadeType.PERSIST}, orphanRemoval=true)
private Collection controlledVocabAlternates = new ArrayList<>();
@@ -113,6 +165,23 @@ public Collection getControlledVocabAlternates() {
public void setControlledVocabAlternates(Collection controlledVocabAlternates) {
this.controlledVocabAlternates = controlledVocabAlternates;
}
+
+ /**
+ * A hacky workaround to allow arbitrary numbers of "altValue" columns in the TSV file, providing
+ * alternative values for the controlled vocabulary value.
+ * @param alternative
+ */
+ @Parsed(field = Headers.Constants.ALT_VALUES)
+ @Validate(nullable = true, allowBlanks = true)
+ private void addControlledVocabAlternates(String alternative) {
+ if (alternative == null || alternative.isBlank()) {
+ return;
+ }
+ ControlledVocabAlternate alt = new Placeholder.ControlledVocabAlternate();
+ alt.setControlledVocabularyValue(this);
+ alt.setStrValue(alternative);
+ this.controlledVocabAlternates.add(alt);
+ }
public String getLocaleStrValue() {
return getLocaleStrValue(null);
diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldType.java b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldType.java
index a092cdad784..af668b2e227 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldType.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldType.java
@@ -1,8 +1,16 @@
package edu.harvard.iq.dataverse;
+import com.univocity.parsers.annotations.BooleanString;
+import com.univocity.parsers.annotations.EnumOptions;
+import com.univocity.parsers.annotations.Parsed;
+import com.univocity.parsers.annotations.UpperCase;
+import com.univocity.parsers.annotations.Validate;
+import com.univocity.parsers.conversions.EnumSelector;
import edu.harvard.iq.dataverse.search.SolrField;
import edu.harvard.iq.dataverse.util.BundleUtil;
+import edu.harvard.iq.dataverse.util.metadata.Placeholder;
+import java.util.Arrays;
import java.util.Collection;
import java.io.Serializable;
@@ -12,6 +20,7 @@
import java.util.Set;
import java.util.TreeMap;
import java.util.MissingResourceException;
+import java.util.stream.Collectors;
import javax.faces.model.SelectItem;
import javax.persistence.*;
@@ -21,16 +30,81 @@
*/
@NamedQueries({
@NamedQuery(name="DatasetFieldType.findByName",
- query= "SELECT dsfType FROM DatasetFieldType dsfType WHERE dsfType.name=:name"),
- @NamedQuery(name = "DatasetFieldType.findAllFacetable",
- query= "select dsfType from DatasetFieldType dsfType WHERE dsfType.facetable = true and dsfType.title != '' order by dsfType.id"),
+ query= "SELECT dsfType FROM DatasetFieldType dsfType WHERE dsfType.name=:name"),
+ @NamedQuery(name = "DatasetFieldType.findAllFacetable",
+ query= "select dsfType from DatasetFieldType dsfType WHERE dsfType.facetable = true and dsfType.title != '' order by dsfType.id"),
@NamedQuery(name = "DatasetFieldType.findFacetableByMetadaBlock",
- query= "select dsfType from DatasetFieldType dsfType WHERE dsfType.facetable = true and dsfType.title != '' and dsfType.metadataBlock.id = :metadataBlockId order by dsfType.id")
+ query= "select dsfType from DatasetFieldType dsfType WHERE dsfType.facetable = true and dsfType.title != '' and dsfType.metadataBlock.id = :metadataBlockId order by dsfType.id")
})
@Entity
@Table(indexes = {@Index(columnList="metadatablock_id"),@Index(columnList="parentdatasetfieldtype_id")})
public class DatasetFieldType implements Serializable, Comparable {
-
+
+ /**
+ * Match (1) "[A-Za-z][\w\.]+\w" or (2) [A-Za-z_][\w\.]+?[\w&&[^_]]
+ * (1): Start with a letter, do not end with .
+ * (2): Start with a letter or _, do not end with . or _. (Invalidates _xxx_ which is reserved for Solr internal use)
+ *
+ * Try here: https://regex101.com/r/ULlonz/1
+ */
+ public static final String FIELD_NAME_REGEX = "^([A-Za-z][\\w\\.]+\\w|[A-Za-z_][\\w\\.]+?[\\w&&[^_]])$";
+
+ public enum Headers {
+ NAME(Constants.NAME),
+ TITLE(Constants.TITLE),
+ DESCRIPTION(Constants.DESCRIPTION),
+ WATERMARK(Constants.WATERMARK),
+ FIELD_TYPE(Constants.FIELD_TYPE),
+ DISPLAY_ORDER(Constants.DISPLAY_ORDER),
+ DISPLAY_FORMAT(Constants.DISPLAY_FORMAT),
+ ADVANCED_SEARCH_FIELD(Constants.ADVANCED_SEARCH_FIELD),
+ ALLOW_CONTROLLED_VOCABULARY(Constants.ALLOW_CONTROLLED_VOCABULARY),
+ ALLOW_MULTIPLES(Constants.ALLOW_MULTIPLES),
+ FACETABLE(Constants.FACETABLE),
+ DISPLAY_ON_CREATE(Constants.DISPLAY_ON_CREATE),
+ REQUIRED(Constants.REQUIRED),
+ PARENT(Constants.PARENT),
+ METADATA_BLOCK(Constants.METADATA_BLOCK),
+ TERM_URI(Constants.TERM_URI);
+
+ public static final class Constants {
+ public final static String NAME = "name";
+ public final static String TITLE = "dataverseAlias";
+ public final static String DESCRIPTION = "description";
+ public final static String WATERMARK = "watermark";
+ public final static String FIELD_TYPE = "fieldType";
+ public final static String DISPLAY_ORDER = "displayOrder";
+ public final static String DISPLAY_FORMAT = "displayFormat";
+ public final static String ADVANCED_SEARCH_FIELD = "advancedSearchField";
+ public final static String ALLOW_CONTROLLED_VOCABULARY = "allowControlledVocabulary";
+ public final static String ALLOW_MULTIPLES = "allowmultiples";
+ public final static String FACETABLE = "facetable";
+ public final static String DISPLAY_ON_CREATE = "displayoncreate";
+ public final static String DISPLAY_ON_CREATE_V43 = "showabovefold";
+ public final static String REQUIRED = "required";
+ public final static String PARENT = "parent";
+ public final static String METADATA_BLOCK = "metadatablock_id";
+ public final static String TERM_URI = "termURI";
+ }
+
+ private final String key;
+ Headers(String key) {
+ this.key = key;
+ }
+ public String key() {
+ return this.key;
+ }
+
+ public static String[] keys() {
+ return Arrays.stream(values()).map(v -> v.key()).collect(Collectors.toUnmodifiableList()).toArray(new String[]{});
+ }
+
+ public static List booleanKeys() {
+ return List.of(ADVANCED_SEARCH_FIELD, ALLOW_CONTROLLED_VOCABULARY, ALLOW_MULTIPLES,
+ FACETABLE, DISPLAY_ON_CREATE, REQUIRED);
+ }
+ }
+
/**
* The set of possible metatypes of the field. Used for validation and layout.
*/
@@ -160,11 +234,23 @@ public int getDisplayOrder() {
public void setDisplayOrder(int displayOrder) {
this.displayOrder = displayOrder;
}
+
+ /**
+ * Set display order value from String. Allow only positive integers >= 0.
+ * @param displayOrder
+ */
+ @Parsed(field = Headers.Constants.DISPLAY_ORDER)
+ @Validate(matches = "^\\d+$")
+ public void setDisplayOrder(String displayOrder) {
+ this.displayOrder = Integer.parseInt(displayOrder);
+ }
public String getDisplayFormat() {
return displayFormat;
}
+ @Parsed(field = Headers.Constants.DISPLAY_FORMAT)
+ @Validate(nullable = true)
public void setDisplayFormat(String displayFormat) {
this.displayFormat = displayFormat;
}
@@ -189,7 +275,19 @@ public Boolean isEscapeOutputText(){
public String getName() {
return name;
}
-
+
+ /**
+ * Set a fields name. Maps to Solr Field names, thus requires following their naming conventions.
+ * This is a required field!
+ *
+ * 1. Solr: "Field names should consist of alphanumeric or underscore characters only and not start with a digit.
+ * Names with both leading and trailing underscores (e.g. _version_) are reserved."
+ * 2. Names may contain dots (historically grown...), Solr seems to be OK with that
+ *
+ * @param name
+ */
+ @Parsed(field = Headers.Constants.NAME)
+ @Validate(matches = FIELD_NAME_REGEX)
public void setName(String name) {
this.name = name;
}
@@ -198,6 +296,8 @@ public String getTitle() {
return title;
}
+ @Parsed(field = Headers.Constants.TITLE)
+ @Validate
public void setTitle(String title) {
this.title = title;
}
@@ -206,6 +306,8 @@ public String getDescription() {
return description;
}
+ @Parsed(field = Headers.Constants.DESCRIPTION)
+ @Validate(allowBlanks = true, nullable = true)
public void setDescription(String description) {
this.description = description;
}
@@ -213,7 +315,10 @@ public void setDescription(String description) {
public boolean isAllowControlledVocabulary() {
return allowControlledVocabulary;
}
-
+
+ @Parsed(field = Headers.Constants.ALLOW_CONTROLLED_VOCABULARY)
+ @Validate
+ @BooleanString(trueStrings = {"true", "TRUE"}, falseStrings = {"false", "FALSE"})
public void setAllowControlledVocabulary(boolean allowControlledVocabulary) {
this.allowControlledVocabulary = allowControlledVocabulary;
}
@@ -227,7 +332,10 @@ public void setAllowControlledVocabulary(boolean allowControlledVocabulary) {
public boolean isAllowMultiples() {
return this.allowMultiples;
}
-
+
+ @Parsed(field = Headers.Constants.ALLOW_MULTIPLES)
+ @Validate
+ @BooleanString(trueStrings = {"true", "TRUE"}, falseStrings = {"false", "FALSE"})
public void setAllowMultiples(boolean allowMultiples) {
this.allowMultiples = allowMultiples;
}
@@ -236,6 +344,10 @@ public FieldType getFieldType() {
return fieldType;
}
+ @Parsed(field = Headers.Constants.FIELD_TYPE)
+ @Validate
+ @UpperCase
+ @EnumOptions(selectors = EnumSelector.NAME)
public void setFieldType(FieldType fieldType) {
this.fieldType = fieldType;
}
@@ -244,6 +356,8 @@ public String getWatermark() {
return watermark;
}
+ @Parsed(field = Headers.Constants.WATERMARK)
+ @Validate(allowBlanks = true, nullable = true)
public void setWatermark(String watermark) {
this.watermark = watermark;
}
@@ -255,7 +369,10 @@ public void setWatermark(String watermark) {
public boolean isFacetable() {
return facetable;
}
-
+
+ @Parsed(field = Headers.Constants.FACETABLE)
+ @Validate
+ @BooleanString(trueStrings = {"true", "TRUE"}, falseStrings = {"false", "FALSE"})
public void setFacetable(boolean facetable) {
this.facetable = facetable;
}
@@ -278,6 +395,9 @@ public boolean isDisplayOnCreate() {
return displayOnCreate;
}
+ @Parsed(field = { Headers.Constants.DISPLAY_ON_CREATE, Headers.Constants.DISPLAY_ON_CREATE_V43 })
+ @Validate
+ @BooleanString(trueStrings = {"true", "TRUE"}, falseStrings = {"false", "FALSE"})
public void setDisplayOnCreate(boolean displayOnCreate) {
this.displayOnCreate = displayOnCreate;
}
@@ -299,6 +419,13 @@ public MetadataBlock getMetadataBlock() {
public void setMetadataBlock(MetadataBlock metadataBlock) {
this.metadataBlock = metadataBlock;
}
+
+ @Parsed(field = Headers.Constants.METADATA_BLOCK)
+ @Validate(matches = MetadataBlock.BLOCK_NAME_REGEX)
+ private void setMetadataBlock(String metadataBlock) {
+ this.metadataBlock = new Placeholder.MetadataBlock();
+ this.metadataBlock.setName(metadataBlock);
+ }
/**
* A formal URI for the field used in json-ld exports
@@ -307,11 +434,13 @@ public void setMetadataBlock(MetadataBlock metadataBlock) {
private String uri;
public String getUri() {
- return uri;
+ return uri;
}
-
+
+ @Parsed(field = Headers.Constants.TERM_URI)
+ @Validate(nullable = true)
public void setUri(String uri) {
- this.uri=uri;
+ this.uri=uri;
}
/**
@@ -370,6 +499,13 @@ public DatasetFieldType getParentDatasetFieldType() {
public void setParentDatasetFieldType(DatasetFieldType parentDatasetFieldType) {
this.parentDatasetFieldType = parentDatasetFieldType;
}
+
+ @Parsed(field = Headers.Constants.PARENT)
+ @Validate(nullable = true, matches = FIELD_NAME_REGEX)
+ private void setParentDatasetFieldType(String parent) {
+ this.parentDatasetFieldType = new Placeholder.DatasetFieldType();
+ this.parentDatasetFieldType.setName(parent);
+ }
public Set getDataverseFacets() {
@@ -412,7 +548,10 @@ public void setListValues(List listValues) {
public boolean isRequired() {
return this.required;
}
-
+
+ @Parsed(field = Headers.Constants.REQUIRED)
+ @Validate
+ @BooleanString(trueStrings = {"true", "TRUE"}, falseStrings = {"false", "FALSE"})
public void setRequired(boolean required) {
this.required = required;
}
@@ -422,7 +561,10 @@ public void setRequired(boolean required) {
public boolean isAdvancedSearchFieldType() {
return this.advancedSearchFieldType;
}
-
+
+ @Parsed(field = Headers.Constants.ADVANCED_SEARCH_FIELD)
+ @Validate
+ @BooleanString(trueStrings = {"true", "TRUE"}, falseStrings = {"false", "FALSE"})
public void setAdvancedSearchFieldType(boolean advancedSearchFieldType) {
this.advancedSearchFieldType = advancedSearchFieldType;
}
@@ -606,6 +748,25 @@ public String getTmpNullFieldTypeIdentifier() {
@Override
public String toString() {
- return "[DatasetFieldType name:" + getName() + " id:" + getId() + "]";
+ return "DatasetFieldType{" +
+ "id=" + id +
+ ", name='" + name + '\'' +
+ ", title='" + title + '\'' +
+ ", description='" + description + '\'' +
+ ", fieldType=" + fieldType +
+ ", allowControlledVocabulary=" + allowControlledVocabulary +
+ ", watermark='" + watermark + '\'' +
+ ", validationFormat='" + validationFormat + '\'' +
+ ", displayOrder=" + displayOrder +
+ ", displayFormat='" + displayFormat + '\'' +
+ ", allowMultiples=" + allowMultiples +
+ ", facetable=" + facetable +
+ ", displayOnCreate=" + displayOnCreate +
+ ", metadataBlock=" + metadataBlock +
+ ", uri='" + uri + '\'' +
+ ", parentDatasetFieldType=" + parentDatasetFieldType +
+ ", required=" + required +
+ ", advancedSearchFieldType=" + advancedSearchFieldType +
+ '}';
}
}
diff --git a/src/main/java/edu/harvard/iq/dataverse/MetadataBlock.java b/src/main/java/edu/harvard/iq/dataverse/MetadataBlock.java
index 1a1a87b1b87..ed2b4104270 100644
--- a/src/main/java/edu/harvard/iq/dataverse/MetadataBlock.java
+++ b/src/main/java/edu/harvard/iq/dataverse/MetadataBlock.java
@@ -1,11 +1,16 @@
package edu.harvard.iq.dataverse;
+import com.univocity.parsers.annotations.Parsed;
+import com.univocity.parsers.annotations.Validate;
import edu.harvard.iq.dataverse.util.BundleUtil;
+import edu.harvard.iq.dataverse.util.metadata.Placeholder;
import java.io.Serializable;
+import java.util.Arrays;
import java.util.List;
import java.util.MissingResourceException;
import java.util.Objects;
+import java.util.stream.Collectors;
import javax.persistence.CascadeType;
import javax.persistence.Column;
import javax.persistence.Entity;
@@ -26,14 +31,47 @@
*
* @author skraffmiller
*/
-@Table(indexes = {@Index(columnList="name")
- , @Index(columnList="owner_id")})
+@Table(indexes = {@Index(columnList="name"),
+ @Index(columnList="owner_id")})
@NamedQueries({
@NamedQuery( name="MetadataBlock.listAll", query = "SELECT mdb FROM MetadataBlock mdb"),
@NamedQuery( name="MetadataBlock.findByName", query = "SELECT mdb FROM MetadataBlock mdb WHERE mdb.name=:name")
})
@Entity
public class MetadataBlock implements Serializable {
+
+ public static final String BLOCK_NAME_REGEX = "^[a-z][\\w]+$";
+
+ /**
+ * Reusable definition of headers used for parsing this model class from data (TSV, JSON, manual, ...)
+ * Using the Headers.Constants class to work around annotations not able to use enum values (a Java limitation).
+ */
+ public enum Headers {
+ // Order matters: this must be the same order as we define rules for the TSV format!
+ NAME(Constants.NAME),
+ OWNER(Constants.OWNER),
+ DISPLAY_NAME(Constants.DISPLAY_NAME),
+ NAMESPACE_URI(Constants.NAMESPACE_URI);
+
+ public static final class Constants {
+ public final static String NAME = "name";
+ public final static String OWNER = "dataverseAlias";
+ public final static String DISPLAY_NAME = "displayName";
+ public final static String NAMESPACE_URI = "blockURI";
+ }
+
+ private final String key;
+ Headers(String key) {
+ this.key = key;
+ }
+ public String key() {
+ return this.key;
+ }
+
+ public static String[] keys() {
+ return Arrays.stream(values()).map(v -> v.key()).collect(Collectors.toUnmodifiableList()).toArray(new String[]{});
+ }
+ }
private static final long serialVersionUID = 1L;
@@ -59,6 +97,10 @@ public void setId(Long id) {
public String getName() {
return name;
}
+
+ @Parsed(field = Headers.Constants.NAME)
+ // Docs: No spaces or punctuation, except underscore. By convention, should start with a letter, and use lower camel case
+ @Validate(matches = BLOCK_NAME_REGEX)
public void setName(String name) {
this.name = name;
}
@@ -66,6 +108,9 @@ public void setName(String name) {
public String getNamespaceUri() {
return namespaceUri;
}
+
+ @Parsed(field = Headers.Constants.NAMESPACE_URI)
+ @Validate(nullable = true, matches = "^https?://[-a-zA-Z0-9+&@#/%?=~_|!:,.;]*[-a-zA-Z0-9+&@#/%=~_|]")
public void setNamespaceUri(String namespaceUri) {
this.namespaceUri = namespaceUri;
}
@@ -93,6 +138,9 @@ public boolean isDisplayOnCreate() {
public String getDisplayName() {
return displayName;
}
+
+ @Parsed(field = Headers.Constants.DISPLAY_NAME)
+ @Validate(matches = "^\\S.{0,255}$") // docs: match all but not blank strings, at least 1 character needed, not nullable, max 256 chars
public void setDisplayName(String displayName) {
this.displayName = displayName;
}
@@ -109,10 +157,33 @@ public boolean isRequired() {
public Dataverse getOwner() {
return owner;
}
-
+
public void setOwner(Dataverse owner) {
this.owner = owner;
}
+
+ /**
+ * Set the (optional) owning Dataverse collection of this metadata block. This and children of the collection
+ * will be able to use the metadata block.
+ *
+ * When this block is parsed by {@link edu.harvard.iq.dataverse.util.metadata.TsvMetadataBlockParser},
+ * the alias given in the TSV will be validated. For valid values see the docs
+ * ("Special characters (~,`, !, @, #, $, %, ^, &, and *) and spaces are not allowed")
+ * and {@link edu.harvard.iq.dataverse.Dataverse#alias} validation patterns.
+ * (The possessive matcher "+*" below achieves in 1 regex where the other validator needs 2)
+ *
+ * During parsing, a placeholder will be injected here, needing replacement and more validation.
+ *
+ * @param dataverseAlias The alias/identifier of the owning Dataverse collection
+ */
+ @Parsed(field = Headers.Constants.OWNER)
+ @Validate(nullable = true, matches = "^[\\d]*+[\\w\\-]+$")
+ protected void setOwner(String dataverseAlias) {
+ if (dataverseAlias == null)
+ return;
+ this.owner = new Placeholder.Dataverse();
+ this.owner.setAlias(dataverseAlias);
+ }
@Transient
private boolean empty;
@@ -176,15 +247,25 @@ public boolean equals(Object object) {
}
MetadataBlock other = (MetadataBlock) object;
return !(!Objects.equals(this.id, other.id) && (this.id == null || !this.id.equals(other.id)));
- }
+ }
@Override
public String toString() {
- return "edu.harvard.iq.dataverse.MetadataBlock[ id=" + id + " ]";
+ return "MetadataBlock{" +
+ "id=" + id +
+ ", name='" + name + '\'' +
+ ", displayName='" + displayName + '\'' +
+ ", namespaceUri='" + namespaceUri + '\'' +
+ ", datasetFieldTypes=" + datasetFieldTypes +
+ ", owner=" + owner +
+ ", empty=" + empty +
+ ", selected=" + selected +
+ ", hasRequired=" + hasRequired +
+ ", showDatasetFieldTypes=" + showDatasetFieldTypes +
+ '}';
}
-
- public String getLocaleDisplayName()
- {
+
+ public String getLocaleDisplayName() {
try {
return BundleUtil.getStringFromPropertyFile("metadatablock.displayName", getName());
} catch (MissingResourceException e) {
diff --git a/src/main/java/edu/harvard/iq/dataverse/util/metadata/Placeholder.java b/src/main/java/edu/harvard/iq/dataverse/util/metadata/Placeholder.java
new file mode 100644
index 00000000000..8e841ae4457
--- /dev/null
+++ b/src/main/java/edu/harvard/iq/dataverse/util/metadata/Placeholder.java
@@ -0,0 +1,14 @@
+package edu.harvard.iq.dataverse.util.metadata;
+
+import edu.harvard.iq.dataverse.ControlledVocabAlternate;
+
+/**
+ * This class provides some simple markers, so we con distinguish if we need to replace a placeholder with
+ * a real object from the database/... when handing over after parsing
+ */
+public class Placeholder {
+ public static final class Dataverse extends edu.harvard.iq.dataverse.Dataverse {}
+ public static final class MetadataBlock extends edu.harvard.iq.dataverse.MetadataBlock {}
+ public static final class DatasetFieldType extends edu.harvard.iq.dataverse.DatasetFieldType {}
+ public static final class ControlledVocabAlternate extends edu.harvard.iq.dataverse.ControlledVocabAlternate {}
+}
diff --git a/src/test/java/edu/harvard/iq/dataverse/DatasetFieldTypeTest.java b/src/test/java/edu/harvard/iq/dataverse/DatasetFieldTypeTest.java
index ed17bd229d9..e96c148246c 100644
--- a/src/test/java/edu/harvard/iq/dataverse/DatasetFieldTypeTest.java
+++ b/src/test/java/edu/harvard/iq/dataverse/DatasetFieldTypeTest.java
@@ -5,17 +5,36 @@
*/
package edu.harvard.iq.dataverse;
+import com.univocity.parsers.common.DataProcessingException;
+import com.univocity.parsers.common.DataValidationException;
+import com.univocity.parsers.common.processor.BeanListProcessor;
+import com.univocity.parsers.tsv.TsvParser;
+import com.univocity.parsers.tsv.TsvParserSettings;
import edu.harvard.iq.dataverse.search.SolrField;
-import java.util.Collection;
+import edu.harvard.iq.dataverse.util.metadata.Placeholder;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.Arguments;
+import org.junit.jupiter.params.provider.EmptySource;
+import org.junit.jupiter.params.provider.MethodSource;
+import org.junit.jupiter.params.provider.ValueSource;
+
+import java.io.StringReader;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
import java.util.List;
-import java.util.Set;
-import javax.faces.model.SelectItem;
-import org.junit.After;
-import org.junit.AfterClass;
-import org.junit.Before;
-import org.junit.BeforeClass;
-import org.junit.Test;
-import static org.junit.Assert.*;
+import java.util.Map;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.junit.jupiter.api.Assertions.assertTrue;
/**
*
@@ -23,33 +42,180 @@
*/
public class DatasetFieldTypeTest {
- public DatasetFieldTypeTest() {
+ static BeanListProcessor datasetFieldTypeProcessor = new BeanListProcessor<>(DatasetFieldType.class);
+ static TsvParser parser;
+ static TsvParserSettings settings = new TsvParserSettings();
+
+ static Map subject = new HashMap<>();
+
+ @BeforeAll
+ static void setUpClass() {
+ settings.setProcessor(datasetFieldTypeProcessor);
+ settings.setHeaderExtractionEnabled(true);
+ // TODO: replace this char with a global constant (introduced when creating the parsing bean)
+ settings.getFormat().setComment('\'');
+ parser = new TsvParser(settings);
+ }
+
+ @BeforeEach
+ void setUp() {
+ subject.clear();
+ subject.put(DatasetFieldType.Headers.NAME, "test");
+ subject.put(DatasetFieldType.Headers.TITLE, "Testfield");
+ subject.put(DatasetFieldType.Headers.DESCRIPTION, "A little test");
+ subject.put(DatasetFieldType.Headers.WATERMARK, "Type here...");
+ subject.put(DatasetFieldType.Headers.FIELD_TYPE, "none");
+ subject.put(DatasetFieldType.Headers.DISPLAY_ORDER, "1");
+ subject.put(DatasetFieldType.Headers.DISPLAY_FORMAT, "");
+ subject.put(DatasetFieldType.Headers.ADVANCED_SEARCH_FIELD, "FALSE");
+ subject.put(DatasetFieldType.Headers.ALLOW_CONTROLLED_VOCABULARY, "FALSE");
+ subject.put(DatasetFieldType.Headers.ALLOW_MULTIPLES, "FALSE");
+ subject.put(DatasetFieldType.Headers.FACETABLE, "FALSE");
+ subject.put(DatasetFieldType.Headers.DISPLAY_ON_CREATE, "FALSE");
+ subject.put(DatasetFieldType.Headers.REQUIRED, "FALSE");
+ subject.put(DatasetFieldType.Headers.PARENT, "");
+ subject.put(DatasetFieldType.Headers.METADATA_BLOCK, "test");
+ subject.put(DatasetFieldType.Headers.TERM_URI, "");
}
- @BeforeClass
- public static void setUpClass() {
+ @Test
+ public void parseUnmodifiedSubject() {
+ // given (remember - subject will be RESET before every test!)
+ StringReader subjectUnderTest = new StringReader(generateDatasetFieldTSV(subject));
+ // when
+ parser.parse(subjectUnderTest);
+ // then
+ assertEquals(1, datasetFieldTypeProcessor.getBeans().size());
+ assertNotNull(datasetFieldTypeProcessor.getBeans().get(0));
+ assertNotNull(datasetFieldTypeProcessor.getBeans().get(0).getName());
}
- @AfterClass
- public static void tearDownClass() {
+ private static Stream booleanOptionsMatrix(List testValues) {
+ List args = new ArrayList<>();
+ // create a "matrix" with stream in stream and flattening afterwards.
+ testValues.stream()
+ .map(tv -> DatasetFieldType.Headers.booleanKeys()
+ .stream()
+ .map(h -> Arguments.of(h, tv))
+ .collect(Collectors.toUnmodifiableList()))
+ .forEach(args::addAll);
+ return args.stream();
}
- @Before
- public void setUp() {
+ private static Stream booleanOptionsInvalidMatrix() {
+ return booleanOptionsMatrix(List.of("blubb", "1234", "0", "1"));
}
- @After
- public void tearDown() {
+ private static Stream booleanOptionsValidMatrix() {
+ return booleanOptionsMatrix(List.of("true", "TRUE", "false", "FALSE"));
+ }
+
+ @ParameterizedTest
+ @MethodSource("booleanOptionsInvalidMatrix")
+ public void parseInvalidBooleanOpt(DatasetFieldType.Headers key, String boolOpt) {
+ // given (remember - subject will be RESET before every test!)
+ subject.put(key, boolOpt);
+ StringReader subjectUnderTest = new StringReader(generateDatasetFieldTSV(subject));
+ // when & then
+ assertThrows(DataProcessingException.class, () -> parser.parse(subjectUnderTest));
+ }
+
+ @ParameterizedTest
+ @MethodSource("booleanOptionsValidMatrix")
+ public void parseValidBooleanOpt(DatasetFieldType.Headers key, String boolOpt) {
+ // given (remember - subject will be RESET before every test!)
+ subject.put(key, boolOpt);
+ StringReader subjectUnderTest = new StringReader(generateDatasetFieldTSV(subject));
+ // when
+ parser.parse(subjectUnderTest);
+ // then
+ assertEquals(1, datasetFieldTypeProcessor.getBeans().size());
+ assertNotNull(datasetFieldTypeProcessor.getBeans().get(0));
+ }
+
+ @ParameterizedTest
+ @ValueSource(strings = {"-1", "-100", "0.00", "abc", "_foobar!"})
+ public void parseInvalidDisplayOrder(String displayOrder) {
+ // given (remember - subject will be RESET before every test!)
+ subject.put(DatasetFieldType.Headers.DISPLAY_ORDER, displayOrder);
+ StringReader subjectUnderTest = new StringReader(generateDatasetFieldTSV(subject));
+ // when & then
+ assertThrows(DataValidationException.class, () -> parser.parse(subjectUnderTest));
+ }
+
+ @ParameterizedTest
+ @ValueSource(strings = {".foobar", "!foo", "foo!", "_foo_", "-foo-foo", "foo.", "_foo.foo_", "1foo", ".bar"})
+ public void parseInvalidName(String name) {
+ // given (remember - subject will be RESET before every test!)
+ subject.put(DatasetFieldType.Headers.NAME, name);
+ StringReader subjectUnderTest = new StringReader(generateDatasetFieldTSV(subject));
+ // when & then
+ assertThrows(DataValidationException.class, () -> parser.parse(subjectUnderTest));
+ }
+
+ @ParameterizedTest
+ @EmptySource
+ @ValueSource(strings = {"hello", "HelloMyName", "hello_my_name_is", "_foo.bar", "foo.bar", "_foo", "foo_"})
+ void parseValidParentName(String parent) {
+ // given
+ subject.put(DatasetFieldType.Headers.PARENT, parent);
+ StringReader reader = new StringReader(generateDatasetFieldTSV(subject));
+
+ // when
+ parser.parse(reader);
+ List blocks = datasetFieldTypeProcessor.getBeans();
+
+ // then
+ assertEquals(1, blocks.size());
+ if (!parent.isEmpty()) {
+ assertNotNull(blocks.get(0).getParentDatasetFieldType());
+ assertTrue(blocks.get(0).getParentDatasetFieldType() instanceof DatasetFieldType);
+ assertTrue(blocks.get(0).getParentDatasetFieldType() instanceof Placeholder.DatasetFieldType);
+ assertEquals(parent, blocks.get(0).getParentDatasetFieldType().getName());
+ }
+ }
+
+ @ParameterizedTest
+ @ValueSource(strings = {"hello", "helloMyName", "hello_my_name_is", "foobar_", "foo213bar", "foo1234", "foo_"})
+ void parseValidMetadataBlock(String block) {
+ // given
+ subject.put(DatasetFieldType.Headers.METADATA_BLOCK, block);
+ StringReader reader = new StringReader(generateDatasetFieldTSV(subject));
+
+ // when
+ parser.parse(reader);
+ List blocks = datasetFieldTypeProcessor.getBeans();
+
+ // then
+ assertEquals(1, blocks.size());
+ assertNotNull(blocks.get(0).getMetadataBlock());
+ assertTrue(blocks.get(0).getMetadataBlock() instanceof MetadataBlock);
+ assertTrue(blocks.get(0).getMetadataBlock() instanceof Placeholder.MetadataBlock);
+ assertEquals(block, blocks.get(0).getMetadataBlock().getName());
+ }
+
+ @Test
+ void parseBackwardCompatibleDisplayOnCreate() {
+ // given
+ subject.put(DatasetFieldType.Headers.DISPLAY_ON_CREATE, "true");
+ String tsv = generateDatasetFieldTSV(subject);
+ StringReader reader1 = new StringReader(tsv);
+ StringReader reader2 = new StringReader(tsv.replace(DatasetFieldType.Headers.Constants.DISPLAY_ON_CREATE, DatasetFieldType.Headers.Constants.DISPLAY_ON_CREATE_V43));
+
+ // when
+ parser.parse(reader1);
+ assertEquals(1, datasetFieldTypeProcessor.getBeans().size());
+ DatasetFieldType field1 = datasetFieldTypeProcessor.getBeans().get(0);
+
+ parser.parse(reader2);
+ assertEquals(1, datasetFieldTypeProcessor.getBeans().size());
+ DatasetFieldType field2 = datasetFieldTypeProcessor.getBeans().get(0);
+
+ // then
+ assertEquals(field1, field2);
+ assertTrue(field1.isDisplayOnCreate());
+ assertTrue(field2.isDisplayOnCreate());
}
-
-
-
- /**
- * Test of setInclude method, of class DatasetFieldType.
- */
-
-
-
/**
@@ -127,7 +293,22 @@ public void testGetSolrField(){
assertEquals(true, solrField.isAllowedToBeMultivalued());
}
-
-
+
+ private static final String header = "#datasetField\t" + String.join("\t", DatasetFieldType.Headers.keys());
+
+ /**
+ * This method simply inserts all the values from the map into a line, combined by \t and adds a "header" line before it.
+ * It does this based on the {@link MetadataBlock.Headers} enum value order, which is the same as in the TSV definition.
+ * Nonpresent values will be inserted as blank strings.
+ *
+ * @param values
+ * @return
+ */
+ public static String generateDatasetFieldTSV(Map values) {
+ List fieldValues = Arrays.stream(DatasetFieldType.Headers.values())
+ .map(k -> values.getOrDefault(k, ""))
+ .collect(Collectors.toList());
+ return header + settings.getFormat().getLineSeparatorString() + "\t" + String.join("\t", fieldValues);
+ }
}
diff --git a/src/test/java/edu/harvard/iq/dataverse/util/metadata/ControlledVocabularyValueParsingTest.java b/src/test/java/edu/harvard/iq/dataverse/util/metadata/ControlledVocabularyValueParsingTest.java
new file mode 100644
index 00000000000..fe73fcd7962
--- /dev/null
+++ b/src/test/java/edu/harvard/iq/dataverse/util/metadata/ControlledVocabularyValueParsingTest.java
@@ -0,0 +1,163 @@
+package edu.harvard.iq.dataverse.util.metadata;
+
+import com.univocity.parsers.common.DataValidationException;
+import com.univocity.parsers.common.processor.BeanListProcessor;
+import com.univocity.parsers.tsv.TsvParser;
+import com.univocity.parsers.tsv.TsvParserSettings;
+import edu.harvard.iq.dataverse.ControlledVocabularyValue;
+import edu.harvard.iq.dataverse.DatasetFieldType;
+import edu.harvard.iq.dataverse.MetadataBlock;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.EmptySource;
+import org.junit.jupiter.params.provider.ValueSource;
+
+import java.io.StringReader;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+
+import static org.junit.jupiter.api.Assertions.*;
+
+class ControlledVocabularyValueTest {
+
+ static BeanListProcessor controlledVocabularyValueProcessor = new BeanListProcessor<>(ControlledVocabularyValue.class);
+ static TsvParser parser;
+ static TsvParserSettings settings = new TsvParserSettings();
+
+ @BeforeAll
+ static void setUp() {
+ settings.setProcessor(controlledVocabularyValueProcessor);
+ settings.setHeaderExtractionEnabled(true);
+ // TODO: replace this char with a global constant (introduced when creating the parsing bean)
+ settings.getFormat().setComment('\'');
+ parser = new TsvParser(settings);
+ }
+
+ @ParameterizedTest
+ @EmptySource
+ @ValueSource(strings = {" "})
+ public void parseInvalidValue(String value) {
+ // given
+ StringReader subjectUnderTest = new StringReader(generateCvvTSV(Map.of(
+ ControlledVocabularyValue.Headers.DATASET_FIELD, "test",
+ ControlledVocabularyValue.Headers.VALUE, value,
+ ControlledVocabularyValue.Headers.DISPLAY_ORDER, "0")));
+ // when & then
+ assertThrows(DataValidationException.class, () -> parser.parse(subjectUnderTest));
+ }
+
+ @ParameterizedTest
+ @ValueSource(strings = {"https://www^^", "doi:1234^", "hello my name", "hello!", "hello#"})
+ public void parseInvalidIdentifier(String identifier) {
+ // given
+ StringReader subjectUnderTest = new StringReader(generateCvvTSV(Map.of(
+ ControlledVocabularyValue.Headers.DATASET_FIELD, "test",
+ ControlledVocabularyValue.Headers.VALUE, "test",
+ ControlledVocabularyValue.Headers.DISPLAY_ORDER, "0",
+ ControlledVocabularyValue.Headers.IDENTIFIER, identifier)));
+ // when & then
+ assertThrows(DataValidationException.class, () -> parser.parse(subjectUnderTest));
+ }
+
+ @ParameterizedTest
+ @EmptySource
+ @ValueSource(strings = {"https://skosmos/foo#bar", "doi:1234", "hello_my-name", "foo+bar"})
+ public void parseValidIdentifier(String identifier) {
+ // given
+ StringReader subjectUnderTest = new StringReader(generateCvvTSV(Map.of(
+ ControlledVocabularyValue.Headers.DATASET_FIELD, "test",
+ ControlledVocabularyValue.Headers.VALUE, "test",
+ ControlledVocabularyValue.Headers.DISPLAY_ORDER, "0",
+ ControlledVocabularyValue.Headers.IDENTIFIER, identifier)));
+ // when
+ parser.parse(subjectUnderTest);
+ // then
+ assertEquals(1, controlledVocabularyValueProcessor.getBeans().size());
+ if (!identifier.isEmpty()) {
+ assertEquals(identifier, controlledVocabularyValueProcessor.getBeans().get(0).getIdentifier());
+ } else {
+ assertNull(controlledVocabularyValueProcessor.getBeans().get(0).getIdentifier());
+ }
+ }
+
+ @ParameterizedTest
+ @ValueSource(strings = {"-1", "-100", "0.00", "abc", "_foobar!"})
+ public void parseInvalidDisplayOrder(String displayOrder) {
+ // given
+ StringReader subjectUnderTest = new StringReader(generateCvvTSV(Map.of(
+ ControlledVocabularyValue.Headers.DATASET_FIELD, "test",
+ ControlledVocabularyValue.Headers.VALUE, "test",
+ ControlledVocabularyValue.Headers.DISPLAY_ORDER, displayOrder)));
+ // when & then
+ assertThrows(DataValidationException.class, () -> parser.parse(subjectUnderTest));
+ }
+
+ @ParameterizedTest
+ @ValueSource(strings = {".foobar", "!foo", "foo!", "_foo_", "-foo-foo", "foo.", "_foo.foo_", "1foo", ".bar"})
+ public void parseInvalidDatasetFieldTypeName(String fieldName) {
+ // given
+ StringReader subjectUnderTest = new StringReader(generateCvvTSV(Map.of(
+ ControlledVocabularyValue.Headers.DATASET_FIELD, fieldName,
+ ControlledVocabularyValue.Headers.VALUE, "test",
+ ControlledVocabularyValue.Headers.DISPLAY_ORDER, "0")));
+ // when & then
+ assertThrows(DataValidationException.class, () -> parser.parse(subjectUnderTest));
+ }
+
+ @Test
+ public void parseAlternateValues() {
+ // given
+ String t1 = "test1";
+ String t2 = "test2";
+ String tsv = generateCvvTSV(Map.of(
+ ControlledVocabularyValue.Headers.DATASET_FIELD, "test",
+ ControlledVocabularyValue.Headers.VALUE, "test",
+ ControlledVocabularyValue.Headers.DISPLAY_ORDER, "0"),
+ List.of(t1, t2));
+ StringReader subjectUnderTest = new StringReader(tsv);
+ // when
+ parser.parse(subjectUnderTest);
+ // then
+ assertEquals(1, controlledVocabularyValueProcessor.getBeans().size());
+
+ ControlledVocabularyValue result = controlledVocabularyValueProcessor.getBeans().get(0);
+
+ assertFalse(result.getControlledVocabAlternates().isEmpty());
+ assertTrue(result.getControlledVocabAlternates().stream().allMatch(a -> a instanceof Placeholder.ControlledVocabAlternate));
+
+ assertEquals(2, result.getControlledVocabAlternates().size());
+ assertTrue(result.getControlledVocabAlternates().stream().anyMatch(a -> t1.equals(a.getStrValue())));
+ assertTrue(result.getControlledVocabAlternates().stream().anyMatch(a -> t2.equals(a.getStrValue())));
+ }
+
+ private static final String header = "#controlledVocabulary\t" + String.join("\t", ControlledVocabularyValue.Headers.keys());
+
+ /**
+ * This method simply inserts all the values from the map into a line, combined by \t and adds a "header" line before it.
+ * It does this based on the {@link MetadataBlock.Headers} enum value order, which is the same as in the TSV definition.
+ * Nonpresent values will be inserted as blank strings.
+ *
+ * @param values
+ * @return
+ */
+ public static String generateCvvTSV(Map values) {
+ List fieldValues = Arrays.stream(ControlledVocabularyValue.Headers.values())
+ .map(k -> values.getOrDefault(k, ""))
+ .collect(Collectors.toList());
+ return header + settings.getFormat().getLineSeparatorString() + "\t" + String.join("\t", fieldValues);
+ }
+
+ public static String generateCvvTSV(Map values, List altValues) {
+ List fieldValues = Arrays.stream(ControlledVocabularyValue.Headers.values())
+ .map(k -> values.getOrDefault(k, ""))
+ .collect(Collectors.toList());
+
+ String headerWithAlts = header + ("\t"+ControlledVocabularyValue.Headers.Constants.ALT_VALUES).repeat(altValues.size()-1);
+
+ return headerWithAlts + settings.getFormat().getLineSeparatorString() +
+ "\t" + String.join("\t", fieldValues) + String.join("\t", altValues);
+ }
+}
\ No newline at end of file
diff --git a/src/test/java/edu/harvard/iq/dataverse/util/metadata/MetadataBlockParsingTest.java b/src/test/java/edu/harvard/iq/dataverse/util/metadata/MetadataBlockParsingTest.java
new file mode 100644
index 00000000000..9040f226f96
--- /dev/null
+++ b/src/test/java/edu/harvard/iq/dataverse/util/metadata/MetadataBlockParsingTest.java
@@ -0,0 +1,186 @@
+package edu.harvard.iq.dataverse.util.metadata;
+
+import com.univocity.parsers.common.DataValidationException;
+import com.univocity.parsers.common.processor.BeanListProcessor;
+import com.univocity.parsers.tsv.TsvParser;
+import com.univocity.parsers.tsv.TsvParserSettings;
+import edu.harvard.iq.dataverse.Dataverse;
+import edu.harvard.iq.dataverse.MetadataBlock;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.EmptySource;
+import org.junit.jupiter.params.provider.ValueSource;
+
+import java.io.StringReader;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.stream.Collectors;
+
+import static org.junit.jupiter.api.Assertions.*;
+
+class MetadataBlockParsingTest {
+
+ static BeanListProcessor metadataBlockProcessor = new BeanListProcessor<>(MetadataBlock.class);
+ static TsvParser parser;
+ static TsvParserSettings settings = new TsvParserSettings();
+ static final String LONGER_THAN_256_CHARS = "Jx7Agh8hSs4EwkCHzxwXQHOVYiL0i79n4hxeP1PbVRgkmRyUqB9dlFSoFbqCmoZ0OUCPHLOz" +
+ "JMAZeTDxI3dj7QAQG6UuNBUaFDgyG40TRK6X3FiA0f8p4LZBHQC1HIbpIw7wiNmDoEfbrGHehAgbXWDDEXelGL4TXhSxHXIqfgNaLD9fNnk" +
+ "XXcqNsuWMvkDQNrKhUWFQQybhHWS8jh62AjRWEvqFXvqVAnrgZ8xFnRiSpDkubsGuZWZqRFVN6wSPd9sp0GrpEWa5eCv0oFtQLHx0";
+
+ @BeforeAll
+ static void setUp() {
+ settings.setProcessor(metadataBlockProcessor);
+ settings.setHeaderExtractionEnabled(true);
+ // TODO: replace this char with a global constant (introduced when creating the parsing bean)
+ settings.getFormat().setComment('\'');
+ parser = new TsvParser(settings);
+ }
+
+ @ParameterizedTest
+ @ValueSource(strings = {"hello", "helloMyName", "hello_my_name", "h1234"})
+ void setName_AllValid(String name) {
+ // given
+ StringReader reader = new StringReader(generateMetadataBlockTSV(
+ Map.of(MetadataBlock.Headers.NAME, name,
+ MetadataBlock.Headers.DISPLAY_NAME, "display")));
+
+ // when
+ parser.parse(reader);
+ List blocks = metadataBlockProcessor.getBeans();
+
+ // then
+ assertEquals(1, blocks.size());
+ assertEquals(name, blocks.get(0).getName());
+ }
+
+ @ParameterizedTest
+ @EmptySource
+ @ValueSource(strings = {"1234", "!", "hello+", "Hello", "hello-my_name_is", "what-s-up-5", "1234-foobar"})
+ void setName_AllInvalid(String name) {
+ // given
+ StringReader reader = new StringReader(generateMetadataBlockTSV(
+ Map.of(MetadataBlock.Headers.NAME, name,
+ MetadataBlock.Headers.DISPLAY_NAME, "display")));
+
+ // when & then
+ assertThrows(DataValidationException.class, () -> parser.parse(reader));
+ }
+
+ @ParameterizedTest
+ @EmptySource
+ @ValueSource(strings = {"https://demo.dataverse.org/foobar", "http://demo.dataverse.org/foobar"})
+ void setNamespaceUri_Valid(String uri) {
+ // given
+ StringReader reader = new StringReader(generateMetadataBlockTSV(
+ Map.of(MetadataBlock.Headers.NAME, "test",
+ MetadataBlock.Headers.DISPLAY_NAME, "display",
+ MetadataBlock.Headers.NAMESPACE_URI, uri)));
+
+ // when
+ parser.parse(reader);
+ List blocks = metadataBlockProcessor.getBeans();
+
+ // then
+ assertEquals(1, blocks.size());
+ assertEquals(uri, Optional.ofNullable(blocks.get(0).getNamespaceUri()).orElse(""));
+ }
+
+ @ParameterizedTest
+ @ValueSource(strings = {"//demo.dataverse.org/foobar", "doi://demo.dataverse.org/foobar"})
+ void setNamespaceUri_Invalid(String uri) {
+ // given
+ StringReader reader = new StringReader(generateMetadataBlockTSV(
+ Map.of(MetadataBlock.Headers.NAME, "test",
+ MetadataBlock.Headers.DISPLAY_NAME, "display",
+ MetadataBlock.Headers.NAMESPACE_URI, uri)));
+
+ // when & then
+ assertThrows(DataValidationException.class, () -> parser.parse(reader));
+ }
+
+ @ParameterizedTest
+ @ValueSource(strings = {"hello", "H 1234", "Hello this is my Name", "1234 Foo Bar Town", "DO NOT USE!!!"})
+ void setDisplayName_AllValid(String displayName) {
+ // given
+ StringReader reader = new StringReader(generateMetadataBlockTSV(
+ Map.of(MetadataBlock.Headers.DISPLAY_NAME, displayName,
+ MetadataBlock.Headers.NAME, "test")));
+
+ // when
+ parser.parse(reader);
+ List blocks = metadataBlockProcessor.getBeans();
+
+ // then
+ assertEquals(1, blocks.size());
+ assertEquals(displayName, blocks.get(0).getDisplayName());
+ }
+
+ @ParameterizedTest
+ @EmptySource
+ @ValueSource(strings = {" \t", "\t hello", " Hello Hello", LONGER_THAN_256_CHARS})
+ void setDisplayName_AllInvalid(String displayName) {
+ // given
+ StringReader reader = new StringReader(generateMetadataBlockTSV(
+ Map.of(MetadataBlock.Headers.NAME, displayName,
+ MetadataBlock.Headers.DISPLAY_NAME, "display")));
+
+ // when & then
+ assertThrows(DataValidationException.class, () -> parser.parse(reader));
+ }
+
+ @ParameterizedTest
+ @EmptySource
+ @ValueSource(strings = {"hello", "HelloMyName", "hello_my_name_is", "hello-im-marc", "_foo-bar", "1234-test", "test-1234", "hello123"})
+ void setOwner_AllValid(String owner) {
+ // given
+ StringReader reader = new StringReader(generateMetadataBlockTSV(
+ Map.of(MetadataBlock.Headers.DISPLAY_NAME, "test",
+ MetadataBlock.Headers.NAME, "test",
+ MetadataBlock.Headers.OWNER, owner)));
+
+ // when
+ parser.parse(reader);
+ List blocks = metadataBlockProcessor.getBeans();
+
+ // then
+ assertEquals(1, blocks.size());
+ if (!owner.isEmpty()) {
+ assertNotNull(blocks.get(0).getOwner());
+ assertTrue(blocks.get(0).getOwner() instanceof Dataverse);
+ assertTrue(blocks.get(0).getOwner() instanceof Placeholder.Dataverse);
+ assertEquals(owner, blocks.get(0).getOwner().getAlias());
+ }
+ }
+
+ @ParameterizedTest
+ @ValueSource(strings = {"1234", "hello+", "Hello!"})
+ void setOwner_AllInvalid(String owner) {
+ // given
+ StringReader reader = new StringReader(generateMetadataBlockTSV(
+ Map.of(MetadataBlock.Headers.OWNER, owner,
+ MetadataBlock.Headers.DISPLAY_NAME, "display",
+ MetadataBlock.Headers.NAME, "test")));
+
+ // when & then
+ assertThrows(DataValidationException.class, () -> parser.parse(reader));
+ }
+
+ private static final String header = "#metadatablock\t" + String.join("\t", MetadataBlock.Headers.keys());
+
+ /**
+ * This method simply inserts all the values from the map into a line, combined by \t and adds a "header" line before it.
+ * It does this based on the {@link MetadataBlock.Headers} enum value order, which is the same as in the TSV definition.
+ * Nonpresent values will be inserted as blank strings.
+ *
+ * @param values
+ * @return
+ */
+ public static String generateMetadataBlockTSV(Map values) {
+ List fieldValues = Arrays.stream(MetadataBlock.Headers.values())
+ .map(k -> values.getOrDefault(k, ""))
+ .collect(Collectors.toList());
+ return header + settings.getFormat().getLineSeparatorString() + "\t" + String.join("\t", fieldValues);
+ }
+}
\ No newline at end of file