From a5f9592ebeb5e4805e28ffac68cfa7eb9edc0e5d Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Wed, 2 Oct 2019 15:04:01 -0400 Subject: [PATCH 1/2] Fixed the bug in the schema.org export which, under some conditions, was resetting the display format on the related publication url in the metadata block configuration (#6202) --- .../edu/harvard/iq/dataverse/DatasetVersion.java | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java index ea463e21702..b8353e46831 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java @@ -49,6 +49,7 @@ import javax.validation.Validator; import javax.validation.ValidatorFactory; import javax.validation.constraints.Size; +import org.apache.commons.lang.StringUtils; /** * @@ -1141,10 +1142,17 @@ public List getRelatedPublications() { relatedPublication.setText(citation); } if (subField.getDatasetFieldType().getName().equals(DatasetFieldConstant.publicationURL)) { - // Prevent href and target=_blank from getting into Schema.org JSON-LD output. - subField.getDatasetFieldType().setDisplayFormat("#VALUE"); - String url = subField.getDisplayValue(); - relatedPublication.setUrl(url); + // We have to avoid using subField.getDisplayValue() here - because the DisplayFormatType + // for this url is likely set up so that the display value is automatically turned into a + // clickable HTML HREF block, which we don't want to end in our Schema.org JSON-LD output. + // So we want to use the raw value of the field instead, with + // minimal HTML sanitation, just in case (this would be done on all URLs in getDisplayValue()). + String url = subField.getValue(); + if (StringUtils.isBlank(url) || DatasetField.NA_VALUE.equals(url)) { + relatedPublication.setUrl(""); + } else { + relatedPublication.setUrl(MarkupChecker.sanitizeBasicHTML(url)); + } } } relatedPublications.add(relatedPublication); From c528524a2640fc60b7df3f5a450f142d039c07e3 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Wed, 2 Oct 2019 15:49:55 -0400 Subject: [PATCH 2/2] edited a comment (#6202) --- src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java index b8353e46831..48479872c63 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java @@ -1143,8 +1143,8 @@ public List getRelatedPublications() { } if (subField.getDatasetFieldType().getName().equals(DatasetFieldConstant.publicationURL)) { // We have to avoid using subField.getDisplayValue() here - because the DisplayFormatType - // for this url is likely set up so that the display value is automatically turned into a - // clickable HTML HREF block, which we don't want to end in our Schema.org JSON-LD output. + // for this url metadata field is likely set up so that the display value is automatically + // turned into a clickable HTML HREF block, which we don't want to end in our Schema.org JSON-LD output. // So we want to use the raw value of the field instead, with // minimal HTML sanitation, just in case (this would be done on all URLs in getDisplayValue()). String url = subField.getValue();