From 602245af5b5d92231abc73c6ea91d649806eeb36 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 12 Feb 2025 09:07:31 -0500 Subject: [PATCH 01/16] update with QDR fixes --- .../edu/harvard/iq/dataverse/ExternalIdentifier.java | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/ExternalIdentifier.java b/src/main/java/edu/harvard/iq/dataverse/ExternalIdentifier.java index cf1dcfb3c15..cd7f8e9eb09 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ExternalIdentifier.java +++ b/src/main/java/edu/harvard/iq/dataverse/ExternalIdentifier.java @@ -4,7 +4,7 @@ import java.util.regex.Pattern; public enum ExternalIdentifier { - ORCID("ORCID", "https://orcid.org/%s", "^\\d{4}-\\d{4}-\\d{4}-(\\d{4}|\\d{3}X)$"), + ORCID("ORCID", "https://orcid.org/%s", "^(https:\\/\\/orcid.org\\/)?\\d{4}-\\d{4}-\\d{4}-(\\d{4}|\\d{3}X)$"), ISNI("ISNI", "http://www.isni.org/isni/%s", "^\\d*$"), LCNA("LCNA", "http://id.loc.gov/authorities/names/%s", "^[a-z]+\\d+$"), VIAF("VIAF", "https://viaf.org/viaf/%s", "^\\d*$"), @@ -14,9 +14,7 @@ public enum ExternalIdentifier { ResearcherID("ResearcherID", "https://publons.com/researcher/%s/", "^[A-Z\\d][A-Z\\d-]+[A-Z\\d]$"), ScopusID("ScopusID", "https://www.scopus.com/authid/detail.uri?authorId=%s", "^\\d*$"), // ROR regex from https://ror.readme.io/docs/identifier - ROR("ROR", "https://ror.org/%s", "^0[a-hj-km-np-tv-z|0-9]{6}[0-9]{2}$"), - // In some contexts, we check for the full ROR URL. - ROR_FULL_URL("ROR", "https://ror.org/%s", "^(https:\\/\\/ror.org\\/)0[a-hj-km-np-tv-z|0-9]{6}[0-9]{2}$"); + ROR("ROR", "https://ror.org/%s", "^(https:\\/\\/ror.org\\/)?0[a-hj-km-np-tv-z|0-9]{6}[0-9]{2}$"); private String name; private String template; @@ -57,6 +55,9 @@ public Pattern getPattern() { } public String format(String idValue) { + if(idValue.startsWith(template.substring(0,template.indexOf("%s")))) { + return idValue; + } return String.format(template, idValue); } } From 794cf8dce3f8e8a43ead6c3595c189acecd46498 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 12 Feb 2025 09:07:42 -0500 Subject: [PATCH 02/16] change to use ROR --- .../iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index 201a5f5f781..e8412728fbf 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -626,7 +626,7 @@ private void writeEntityElements(XMLStreamWriter xmlw, String elementName, Strin attributeMap.clear(); boolean isROR=false; String orgName = affiliation; - ExternalIdentifier externalIdentifier = ExternalIdentifier.ROR_FULL_URL; + ExternalIdentifier externalIdentifier = ExternalIdentifier.ROR; if (externalIdentifier.isValidIdentifier(orgName)) { isROR = true; JsonObject jo = getExternalVocabularyValue(orgName); @@ -1528,7 +1528,7 @@ private void writeFundingReferences(XMLStreamWriter xmlw, DvObject dvObject) thr fundingReferenceWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "fundingReferences", fundingReferenceWritten); boolean isROR=false; String funderIdentifier = null; - ExternalIdentifier externalIdentifier = ExternalIdentifier.ROR_FULL_URL; + ExternalIdentifier externalIdentifier = ExternalIdentifier.ROR; if (externalIdentifier.isValidIdentifier(funder)) { isROR = true; JsonObject jo = getExternalVocabularyValue(funder); From 653a43f75a04240a0ed7b5bf0365fb87a79a85fb Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 12 Feb 2025 09:07:52 -0500 Subject: [PATCH 03/16] add tests --- .../iq/dataverse/ExternalIdentifierTest.java | 46 +++++++++++++++---- 1 file changed, 38 insertions(+), 8 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/ExternalIdentifierTest.java b/src/test/java/edu/harvard/iq/dataverse/ExternalIdentifierTest.java index dbd732d2e55..c0f747fe5a1 100644 --- a/src/test/java/edu/harvard/iq/dataverse/ExternalIdentifierTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/ExternalIdentifierTest.java @@ -9,16 +9,46 @@ public class ExternalIdentifierTest { @Test public void testIsValidAuthorIdentifierOrcid() { - ExternalIdentifier identifier = ExternalIdentifier.valueOf("ORCID"); - assertTrue(identifier.isValidIdentifier("0000-0002-1825-0097")); - // An "X" at the end of an ORCID is less common but still valid. - assertTrue(identifier.isValidIdentifier("0000-0002-1694-233X")); - assertFalse(identifier.isValidIdentifier("0000 0002 1825 0097")); - assertFalse(identifier.isValidIdentifier(" 0000-0002-1825-0097")); - assertFalse(identifier.isValidIdentifier("0000-0002-1825-0097 ")); - assertFalse(identifier.isValidIdentifier("junk")); + ExternalIdentifier identifier = ExternalIdentifier.valueOf("ORCID"); + assertTrue(identifier.isValidIdentifier("0000-0002-1825-0097")); + // An "X" at the end of an ORCID is less common but still valid. + assertTrue(identifier.isValidIdentifier("0000-0002-1694-233X")); + assertFalse(identifier.isValidIdentifier("0000 0002 1825 0097")); + assertFalse(identifier.isValidIdentifier(" 0000-0002-1825-0097")); + assertFalse(identifier.isValidIdentifier("0000-0002-1825-0097 ")); + assertFalse(identifier.isValidIdentifier("junk")); + + // Test ORCID with https://orcid.org/ prefix + assertTrue(identifier.isValidIdentifier("https://orcid.org/0000-0002-1825-0097")); + assertTrue(identifier.isValidIdentifier("https://orcid.org/0000-0002-1694-233X")); + + // Test format command + assertEquals("https://orcid.org/0000-0002-1825-0097", identifier.format("0000-0002-1825-0097")); + assertEquals("https://orcid.org/0000-0002-1694-233X", identifier.format("0000-0002-1694-233X")); + assertEquals("https://orcid.org/0000-0002-1825-0097", identifier.format("https://orcid.org/0000-0002-1825-0097")); } + @Test + public void testIsValidAuthorIdentifierRor() { + ExternalIdentifier identifier = ExternalIdentifier.valueOf("ROR"); + assertTrue(identifier.isValidIdentifier("01h6qyw18")); + assertTrue(identifier.isValidIdentifier("02mhbdp49")); + assertFalse(identifier.isValidIdentifier("01h6qyw1")); // Too short + assertFalse(identifier.isValidIdentifier("01h6qyw18a")); // Too long + assertFalse(identifier.isValidIdentifier(" 01h6qyw18")); + assertFalse(identifier.isValidIdentifier("01h6qyw18 ")); + assertFalse(identifier.isValidIdentifier("junk")); + + // Test ROR with https://ror.org/ prefix + assertTrue(identifier.isValidIdentifier("https://ror.org/01h6qyw18")); + assertTrue(identifier.isValidIdentifier("https://ror.org/02mhbdp49")); + + // Test format command + assertEquals("https://ror.org/01h6qyw18", identifier.format("01h6qyw18")); + assertEquals("https://ror.org/02mhbdp49", identifier.format("02mhbdp49")); + assertEquals("https://ror.org/01h6qyw18", identifier.format("https://ror.org/01h6qyw18")); + } + @Test public void testIsValidAuthorIdentifierIsni() { ExternalIdentifier identifier = ExternalIdentifier.valueOf("ISNI"); From 6d4a1110c06b0d91580450e9730dad66f434216a Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 12 Feb 2025 09:10:23 -0500 Subject: [PATCH 04/16] release note --- doc/release-notes/11242-fix-oricid-recognition.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 doc/release-notes/11242-fix-oricid-recognition.md diff --git a/doc/release-notes/11242-fix-oricid-recognition.md b/doc/release-notes/11242-fix-oricid-recognition.md new file mode 100644 index 00000000000..503b322d352 --- /dev/null +++ b/doc/release-notes/11242-fix-oricid-recognition.md @@ -0,0 +1 @@ +A bug that caused ORCIDs start with https://orcid.org/ entered as author identifier to be ignored when creating the DataCIte metadata has been fixed. This primarily affected users of the ORCID external vocabulary script as the manual entry form recommends not using the URL form. From b84ab772a7b0c25201ae6d0e7eacf32f6a8bfd9f Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 12 Feb 2025 09:11:05 -0500 Subject: [PATCH 05/16] oauth update to allow building --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index cb16f16c229..95137dc94cc 100644 --- a/pom.xml +++ b/pom.xml @@ -494,7 +494,7 @@ com.nimbusds oauth2-oidc-sdk - 10.13.2 + 11.22.1 From cf83fb23406ee20c771bfcd37d48ed09d236ad0a Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 12 Feb 2025 09:43:52 -0500 Subject: [PATCH 06/16] fix test using ROR, note duplication --- .../iq/dataverse/DatasetFieldValueValidatorTest.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/DatasetFieldValueValidatorTest.java b/src/test/java/edu/harvard/iq/dataverse/DatasetFieldValueValidatorTest.java index 7320cf7acfe..f328382259b 100644 --- a/src/test/java/edu/harvard/iq/dataverse/DatasetFieldValueValidatorTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/DatasetFieldValueValidatorTest.java @@ -111,6 +111,9 @@ public void testIsValid() { assertFalse(result); } + /* The next 7 tests below appear to duplicate tests in the ExternalIdentifierTest class. + * The ones here use the isValidAuthorIdentifier method which is only used in testing (and probabkly could be static). + */ @Test public void testIsValidAuthorIdentifierOrcid() { DatasetFieldValueValidator validator = new DatasetFieldValueValidator(); @@ -157,6 +160,7 @@ public void testIsValidAuthorIdentifierGnd() { assertFalse(validator.isValidAuthorIdentifier("junk", pattern)); } + @Test public void testIsValidAuthorIdentifierRor() { DatasetFieldValueValidator validator = new DatasetFieldValueValidator(); @@ -168,7 +172,7 @@ public void testIsValidAuthorIdentifierRor() { @Test public void testIsValidAuthorIdentifierRorFull() { DatasetFieldValueValidator validator = new DatasetFieldValueValidator(); - Pattern pattern = ExternalIdentifier.valueOf("ROR_FULL_URL").getPattern(); + Pattern pattern = ExternalIdentifier.valueOf("ROR").getPattern(); assertTrue(validator.isValidAuthorIdentifier("https://ror.org/03vek6s52", pattern)); assertFalse(validator.isValidAuthorIdentifier("junk", pattern)); } From d5f76f37beb01a969670eaaf2eb36a432aed8331 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 12 Feb 2025 09:52:18 -0500 Subject: [PATCH 07/16] ROR now included when URL form is used --- .../doi/datacite/XmlMetadataTemplateTest.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/XmlMetadataTemplateTest.java b/src/test/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/XmlMetadataTemplateTest.java index dd8a8e2cc21..2696acc7473 100644 --- a/src/test/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/XmlMetadataTemplateTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/XmlMetadataTemplateTest.java @@ -195,10 +195,10 @@ public void testDataCiteXMLCreation() throws IOException { assertEquals("ROR", XmlPath.from(xml).getString("resource.creators.creator[2].nameIdentifier.@nameIdentifierScheme")); assertEquals("https://ror.org", XmlPath.from(xml).getString("resource.creators.creator[2].nameIdentifier.@schemeURI")); assertEquals("Qualitative Data Repository", XmlPath.from(xml).getString("resource.creators.creator[3].creatorName")); - // The nameIdentifier fields below are not populated because the full ROR URL was entered. - assertEquals("", XmlPath.from(xml).getString("resource.creators.creator[3].nameIdentifier")); - assertEquals(null, XmlPath.from(xml).getString("resource.creators.creator[3].nameIdentifier.@nameIdentifierScheme")); - assertEquals(null, XmlPath.from(xml).getString("resource.creators.creator[3].nameIdentifier.@schemeURI")); + //Test when URL form was used + assertEquals("https://ror.org/014trz974", XmlPath.from(xml).getString("resource.creators.creator[3].nameIdentifier")); + assertEquals("ROR", XmlPath.from(xml).getString("resource.creators.creator[3].nameIdentifier.@nameIdentifierScheme")); + assertEquals("https://ror.org", XmlPath.from(xml).getString("resource.creators.creator[3].nameIdentifier.@schemeURI")); assertEquals("Dataverse", XmlPath.from(xml).getString("resource.publisher")); } From 5e526401a03d4f8eda79529546c49872073fa0fc Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 12 Feb 2025 17:07:40 -0500 Subject: [PATCH 08/16] update other types to handle URLs, tighten matching re \. --- .../harvard/iq/dataverse/ExternalIdentifier.java | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/ExternalIdentifier.java b/src/main/java/edu/harvard/iq/dataverse/ExternalIdentifier.java index cd7f8e9eb09..795ba330b0a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ExternalIdentifier.java +++ b/src/main/java/edu/harvard/iq/dataverse/ExternalIdentifier.java @@ -4,17 +4,17 @@ import java.util.regex.Pattern; public enum ExternalIdentifier { - ORCID("ORCID", "https://orcid.org/%s", "^(https:\\/\\/orcid.org\\/)?\\d{4}-\\d{4}-\\d{4}-(\\d{4}|\\d{3}X)$"), - ISNI("ISNI", "http://www.isni.org/isni/%s", "^\\d*$"), - LCNA("LCNA", "http://id.loc.gov/authorities/names/%s", "^[a-z]+\\d+$"), - VIAF("VIAF", "https://viaf.org/viaf/%s", "^\\d*$"), + ORCID("ORCID", "https://orcid.org/%s", "^(https:\\/\\/orcid\\.org\\/)?\\d{4}-\\d{4}-\\d{4}-(\\d{4}|\\d{3}X)$"), + ISNI("ISNI", "http://www.isni.org/isni/%s", "^(http:\\/\\/www\\.isni\\.org\\/isni\\/)?(\\d{16}|\\d{15}X)$"), + LCNA("LCNA", "http://id.loc.gov/authorities/names/%s", "^(http:\\/\\/id\\.loc\\.gov\\/authorities\\/names\\/)?[a-z]+\\d+$"), + VIAF("VIAF", "https://viaf.org/viaf/%s", "^(https:\\/\\/viaf\\.org\\/viaf\\/)?\\d*$"), // GND regex from https://www.wikidata.org/wiki/Property:P227 - GND("GND", "https://d-nb.info/gnd/%s", "^1[01]?\\d{7}[0-9X]|[47]\\d{6}-\\d|[1-9]\\d{0,7}-[0-9X]|3\\d{7}[0-9X]$"), + GND("GND", "https://d-nb.info/gnd/%s", "^(https:\\/\\/d-nb\\.info\\/gnd\\/)?(1[01]?\\d{7}[0-9X]|[47]\\d{6}-\\d|[1-9]\\d{0,7}-[0-9X]|3\\d{7}[0-9X])$"), // note: DAI is missing from this list, because it doesn't have resolvable URL - ResearcherID("ResearcherID", "https://publons.com/researcher/%s/", "^[A-Z\\d][A-Z\\d-]+[A-Z\\d]$"), - ScopusID("ScopusID", "https://www.scopus.com/authid/detail.uri?authorId=%s", "^\\d*$"), + ResearcherID("ResearcherID", "https://publons.com/researcher/%s/", "^([A-Z\\d][A-Z\\d-]+[A-Z\\d]|(https:\\/\\/publons\\.com\\/researcher\\/)?[A-Z\\d][A-Z\\d-]+[A-Z\\d]\\/)$"), + ScopusID("ScopusID", "https://www.scopus.com/authid/detail.uri?authorId=%s", "^(https:\\/\\/www\\.scopus\\.com\\/authid\\/detail\\.uri\\?authorId=)?\\d*$"), // ROR regex from https://ror.readme.io/docs/identifier - ROR("ROR", "https://ror.org/%s", "^(https:\\/\\/ror.org\\/)?0[a-hj-km-np-tv-z|0-9]{6}[0-9]{2}$"); + ROR("ROR", "https://ror.org/%s", "^(https:\\/\\/ror\\.org\\/)?0[a-hj-km-np-tv-z|0-9]{6}[0-9]{2}$"); private String name; private String template; From e762d42b4d0cfc0d65c49ea49546e5943ddceea7 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 12 Feb 2025 17:07:59 -0500 Subject: [PATCH 09/16] Expanded tests, testing all types, and format() calls --- .../iq/dataverse/ExternalIdentifierTest.java | 95 +++++++++++++++---- 1 file changed, 79 insertions(+), 16 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/ExternalIdentifierTest.java b/src/test/java/edu/harvard/iq/dataverse/ExternalIdentifierTest.java index c0f747fe5a1..1c07c8c531b 100644 --- a/src/test/java/edu/harvard/iq/dataverse/ExternalIdentifierTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/ExternalIdentifierTest.java @@ -51,31 +51,94 @@ public void testIsValidAuthorIdentifierRor() { @Test public void testIsValidAuthorIdentifierIsni() { - ExternalIdentifier identifier = ExternalIdentifier.valueOf("ISNI"); - assertTrue(identifier.isValidIdentifier("0000000121032683")); - assertFalse(identifier.isValidIdentifier("junk")); + ExternalIdentifier identifier = ExternalIdentifier.valueOf("ISNI"); + assertTrue(identifier.isValidIdentifier("0000000121032683")); + assertTrue(identifier.isValidIdentifier("000000012150090X")); + assertTrue(identifier.isValidIdentifier("http://www.isni.org/isni/0000000121032683")); + assertTrue(identifier.isValidIdentifier("http://www.isni.org/isni/000000012150090X")); + assertFalse(identifier.isValidIdentifier("junk")); + assertFalse(identifier.isValidIdentifier("000000012103268")); // Too short + assertFalse(identifier.isValidIdentifier("00000001210326831")); // Too long + + // Test format command + assertEquals("http://www.isni.org/isni/0000000121032683", identifier.format("0000000121032683")); + assertEquals("http://www.isni.org/isni/0000000121032683", identifier.format("http://www.isni.org/isni/0000000121032683")); } - + @Test public void testIsValidAuthorIdentifierLcna() { - ExternalIdentifier identifier = ExternalIdentifier.valueOf("LCNA"); - assertTrue(identifier.isValidIdentifier("n82058243")); - assertTrue(identifier.isValidIdentifier("foobar123")); - assertFalse(identifier.isValidIdentifier("junk")); + ExternalIdentifier identifier = ExternalIdentifier.valueOf("LCNA"); + assertTrue(identifier.isValidIdentifier("n82058243")); + assertTrue(identifier.isValidIdentifier("foobar123")); + assertTrue(identifier.isValidIdentifier("http://id.loc.gov/authorities/names/n82058243")); + assertFalse(identifier.isValidIdentifier("junk")); + assertFalse(identifier.isValidIdentifier("123")); // Too short (assuming minimum length) + + // Test format command + assertEquals("http://id.loc.gov/authorities/names/n82058243", identifier.format("n82058243")); + assertEquals("http://id.loc.gov/authorities/names/n82058243", identifier.format("http://id.loc.gov/authorities/names/n82058243")); } - + @Test public void testIsValidAuthorIdentifierViaf() { - ExternalIdentifier identifier = ExternalIdentifier.valueOf("VIAF"); - assertTrue(identifier.isValidIdentifier("172389567")); - assertFalse(identifier.isValidIdentifier("junk")); - } + ExternalIdentifier identifier = ExternalIdentifier.valueOf("VIAF"); + assertTrue(identifier.isValidIdentifier("172389567")); + assertTrue(identifier.isValidIdentifier("https://viaf.org/viaf/172389567")); + assertFalse(identifier.isValidIdentifier("junk")); + + assertEquals("https://viaf.org/viaf/172389567", identifier.format("172389567")); + assertEquals("https://viaf.org/viaf/172389567", identifier.format("https://viaf.org/viaf/172389567")); + } + @Test public void testIsValidAuthorIdentifierGnd() { - ExternalIdentifier identifier = ExternalIdentifier.valueOf("GND"); - assertTrue(identifier.isValidIdentifier("4079154-3")); - assertFalse(identifier.isValidIdentifier("junk")); + ExternalIdentifier identifier = ExternalIdentifier.valueOf("GND"); + assertTrue(identifier.isValidIdentifier("4079154-3")); + assertTrue(identifier.isValidIdentifier("118540238")); + assertTrue(identifier.isValidIdentifier("https://d-nb.info/gnd/4079154-3")); + assertTrue(identifier.isValidIdentifier("https://d-nb.info/gnd/118540238")); + assertFalse(identifier.isValidIdentifier("junk")); + assertFalse(identifier.isValidIdentifier("123")); // Too short + + assertEquals("https://d-nb.info/gnd/4079154-3", identifier.format("4079154-3")); + assertEquals("https://d-nb.info/gnd/4079154-3", identifier.format("https://d-nb.info/gnd/4079154-3")); + + } + + @Test + public void testIsValidAuthorIdentifierResearcherId() { + ExternalIdentifier identifier = ExternalIdentifier.valueOf("ResearcherID"); + assertTrue(identifier.isValidIdentifier("A-1234-5678")); + assertTrue(identifier.isValidIdentifier("J-9876-2018")); + assertTrue(identifier.isValidIdentifier("AAA-1234-2020")); + assertTrue(identifier.isValidIdentifier("Z9999-2021")); + assertTrue(identifier.isValidIdentifier("https://publons.com/researcher/A-1234-5678/")); + assertTrue(identifier.isValidIdentifier("https://publons.com/researcher/J-9876-2018/")); + assertFalse(identifier.isValidIdentifier("a-1234-5678")); // Lowercase start + assertFalse(identifier.isValidIdentifier("A-1234-5678-")); // Ends with hyphen + assertFalse(identifier.isValidIdentifier("-A-1234-5678")); // Starts with hyphen + assertFalse(identifier.isValidIdentifier("junk")); + + assertEquals("https://publons.com/researcher/A-1234-5678/", identifier.format("A-1234-5678")); + assertEquals("https://publons.com/researcher/A-1234-5678/", identifier.format("https://publons.com/researcher/A-1234-5678/")); + + } + + @Test + public void testIsValidAuthorIdentifierScopusId() { + ExternalIdentifier identifier = ExternalIdentifier.valueOf("ScopusID"); + assertTrue(identifier.isValidIdentifier("12345678")); + assertTrue(identifier.isValidIdentifier("87654321")); + assertTrue(identifier.isValidIdentifier("00000000")); + assertTrue(identifier.isValidIdentifier("https://www.scopus.com/authid/detail.uri?authorId=12345678")); + assertTrue(identifier.isValidIdentifier("https://www.scopus.com/authid/detail.uri?authorId=87654321")); + assertFalse(identifier.isValidIdentifier("A12345678")); // Contains a letter + assertFalse(identifier.isValidIdentifier("junk")); + + assertEquals("https://www.scopus.com/authid/detail.uri?authorId=12345678", identifier.format("12345678")); + assertEquals("https://www.scopus.com/authid/detail.uri?authorId=12345678", identifier.format("https://www.scopus.com/authid/detail.uri?authorId=12345678")); + } } From 8bc266cad12d8b3478f0acf304e2c23ec447d098 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Thu, 13 Feb 2025 14:07:51 -0500 Subject: [PATCH 10/16] DAI type --- src/main/java/edu/harvard/iq/dataverse/ExternalIdentifier.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/ExternalIdentifier.java b/src/main/java/edu/harvard/iq/dataverse/ExternalIdentifier.java index 795ba330b0a..f64eac56db6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ExternalIdentifier.java +++ b/src/main/java/edu/harvard/iq/dataverse/ExternalIdentifier.java @@ -14,7 +14,8 @@ public enum ExternalIdentifier { ResearcherID("ResearcherID", "https://publons.com/researcher/%s/", "^([A-Z\\d][A-Z\\d-]+[A-Z\\d]|(https:\\/\\/publons\\.com\\/researcher\\/)?[A-Z\\d][A-Z\\d-]+[A-Z\\d]\\/)$"), ScopusID("ScopusID", "https://www.scopus.com/authid/detail.uri?authorId=%s", "^(https:\\/\\/www\\.scopus\\.com\\/authid\\/detail\\.uri\\?authorId=)?\\d*$"), // ROR regex from https://ror.readme.io/docs/identifier - ROR("ROR", "https://ror.org/%s", "^(https:\\/\\/ror\\.org\\/)?0[a-hj-km-np-tv-z|0-9]{6}[0-9]{2}$"); + ROR("ROR", "https://ror.org/%s", "^(https:\\/\\/ror\\.org\\/)?0[a-hj-km-np-tv-z|0-9]{6}[0-9]{2}$"), + DAI("DAI", "info:eu-repo/dai/nl/", "^(info:eu-repo\\/dai\\/nl\\/)?[\\d]?\\d{8}[0-9X]$"); private String name; private String template; From 8015c0420ddfeefa362bd59457d667230107ce23 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Thu, 13 Feb 2025 14:14:15 -0500 Subject: [PATCH 11/16] DAI test --- .../iq/dataverse/ExternalIdentifierTest.java | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/test/java/edu/harvard/iq/dataverse/ExternalIdentifierTest.java b/src/test/java/edu/harvard/iq/dataverse/ExternalIdentifierTest.java index 1c07c8c531b..2440d4ae240 100644 --- a/src/test/java/edu/harvard/iq/dataverse/ExternalIdentifierTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/ExternalIdentifierTest.java @@ -140,5 +140,21 @@ public void testIsValidAuthorIdentifierScopusId() { assertEquals("https://www.scopus.com/authid/detail.uri?authorId=12345678", identifier.format("https://www.scopus.com/authid/detail.uri?authorId=12345678")); } + + @Test + public void testIsValidAuthorIdentifierDai() { + ExternalIdentifier identifier = ExternalIdentifier.valueOf("DAI"); + assertTrue(identifier.isValidIdentifier("123456789")); + assertTrue(identifier.isValidIdentifier("987654321X")); + assertTrue(identifier.isValidIdentifier("info:eu-repo/dai/nl/123456789")); + assertTrue(identifier.isValidIdentifier("info:eu-repo/dai/nl/987654321X")); + assertFalse(identifier.isValidIdentifier("12345678")); // Too short + assertFalse(identifier.isValidIdentifier("12345678901")); // Too long + assertFalse(identifier.isValidIdentifier("A23456789")); // Contains a letter + assertFalse(identifier.isValidIdentifier("junk")); + + assertEquals("info:eu-repo/dai/nl/123456789", identifier.format("123456789")); + assertEquals("info:eu-repo/dai/nl/123456789", identifier.format("info:eu-repo/dai/nl/123456789")); + } } From 6f46e7355e8b1896a4b9d86664b945aeac3d472f Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Thu, 13 Feb 2025 14:14:25 -0500 Subject: [PATCH 12/16] format fix --- src/main/java/edu/harvard/iq/dataverse/ExternalIdentifier.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/ExternalIdentifier.java b/src/main/java/edu/harvard/iq/dataverse/ExternalIdentifier.java index f64eac56db6..ef4bcb312c1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ExternalIdentifier.java +++ b/src/main/java/edu/harvard/iq/dataverse/ExternalIdentifier.java @@ -15,7 +15,7 @@ public enum ExternalIdentifier { ScopusID("ScopusID", "https://www.scopus.com/authid/detail.uri?authorId=%s", "^(https:\\/\\/www\\.scopus\\.com\\/authid\\/detail\\.uri\\?authorId=)?\\d*$"), // ROR regex from https://ror.readme.io/docs/identifier ROR("ROR", "https://ror.org/%s", "^(https:\\/\\/ror\\.org\\/)?0[a-hj-km-np-tv-z|0-9]{6}[0-9]{2}$"), - DAI("DAI", "info:eu-repo/dai/nl/", "^(info:eu-repo\\/dai\\/nl\\/)?[\\d]?\\d{8}[0-9X]$"); + DAI("DAI", "info:eu-repo/dai/nl/%s", "^(info:eu-repo\\/dai\\/nl\\/)?[\\d]?\\d{8}[0-9X]$"); private String name; private String template; From 07ce6f82b233ebb94d0a0742e71597948d3f0cc8 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Thu, 13 Feb 2025 14:25:44 -0500 Subject: [PATCH 13/16] Don't send DAI as it isn't a URL --- .../java/edu/harvard/iq/dataverse/DatasetAuthor.java | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetAuthor.java b/src/main/java/edu/harvard/iq/dataverse/DatasetAuthor.java index d33d709107f..67078bf399e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetAuthor.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetAuthor.java @@ -97,8 +97,13 @@ public static String getIdentifierAsUrl(String idType, String idValue) { if (idType != null && !idType.isEmpty() && idValue != null && !idValue.isEmpty()) { try { ExternalIdentifier externalIdentifier = ExternalIdentifier.valueOf(idType); - if (externalIdentifier.isValidIdentifier(idValue)) - return externalIdentifier.format(idValue); + if (externalIdentifier.isValidIdentifier(idValue)) { + String uri = externalIdentifier.format(idValue); + //The DAI identifier is a URI starting with "info" - we don't want to return it as a URL (we assume non-null URLs should be links in the display) + if(uri.startsWith("http")) { + return uri; + } + } } catch (Exception e) { // non registered identifier } From 55102d2e2af76cd7e0c5106b154f674c9379ec63 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 26 Feb 2025 17:48:03 -0500 Subject: [PATCH 14/16] doc/release note updates --- doc/release-notes/11242-fix-oricid-recognition.md | 4 +++- doc/sphinx-guides/source/user/dataset-management.rst | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/doc/release-notes/11242-fix-oricid-recognition.md b/doc/release-notes/11242-fix-oricid-recognition.md index 503b322d352..6f326b76f56 100644 --- a/doc/release-notes/11242-fix-oricid-recognition.md +++ b/doc/release-notes/11242-fix-oricid-recognition.md @@ -1 +1,3 @@ -A bug that caused ORCIDs start with https://orcid.org/ entered as author identifier to be ignored when creating the DataCIte metadata has been fixed. This primarily affected users of the ORCID external vocabulary script as the manual entry form recommends not using the URL form. +A bug that caused ORCIDs start with https://orcid.org/ entered as author identifier to be ignored when creating the DataCite metadata has been fixed. This primarily affected users of the ORCID external vocabulary script as the manual entry form recommends not using the URL form. + +The display of authorIdentifier, when not using any external vocabulary scripts, has been improved so that either the plain identifier (e.g. "0000-0002-1825-0097") or its URL form (e.g. "https://orcid.org/0000-0002-1825-0097") will result in valid links in the display (for identifier types that have a URL form). The shorter form is still recommended when doing manual entry. diff --git a/doc/sphinx-guides/source/user/dataset-management.rst b/doc/sphinx-guides/source/user/dataset-management.rst index 1418aa627d5..0984b5f4f62 100755 --- a/doc/sphinx-guides/source/user/dataset-management.rst +++ b/doc/sphinx-guides/source/user/dataset-management.rst @@ -54,7 +54,7 @@ Adding a New Dataset #. Click on the "Add Data" button and select "New Dataset" in the dropdown menu. **Note:** If you are on the root Dataverse collection, your My Data page or click the "Add Data" link in the navbar, the dataset you create will be hosted in the root Dataverse collection. You can change this by selecting another Dataverse collection you have proper permissions to create datasets in, from the Host Dataverse collection dropdown in the create dataset form. This option to choose will not be available after you create the dataset. #. To quickly get started, enter at minimum all the required fields with an asterisk (e.g., the Dataset Title, Author Name, Description Text, Point of Contact Email, and Subject) to get a Data Citation with a DOI. - #. When entering author identifiers, select the type from the dropdown (e.g. "ORCID") and under "Identifier" enter just the unique identifier (e.g. "0000-0002-1825-0097") rather than the full URL (e.g. "https://orcid.org/0000-0002-1825-0097"). + #. When entering author identifiers, select the type from the dropdown (e.g. "ORCID") and under "Identifier" enter just the unique identifier (e.g. "0000-0002-1825-0097"), You can also enter the full URL (e.g. "https://orcid.org/0000-0002-1825-0097"), but the shorter form is preferred for display purposes. #. Scroll down to the "Files" section and click on "Select Files to Add" to add all the relevant files to your Dataset. You can also upload your files directly from your Dropbox. **Tip:** You can drag and drop or select multiple files at a time from your desktop From 21e4ed1acdd8d59c0dca0466ad8ef6632273b82d Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 26 Feb 2025 17:48:21 -0500 Subject: [PATCH 15/16] remove duplicate tests --- .../dataverse/DatasetFieldValueValidator.java | 4 -- .../DatasetFieldValueValidatorTest.java | 66 ------------------- 2 files changed, 70 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValueValidator.java b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValueValidator.java index 610bb70ff49..74d3cbf73f0 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValueValidator.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValueValidator.java @@ -241,10 +241,6 @@ private boolean isValidDate(String dateString, String pattern) { return valid; } - public boolean isValidAuthorIdentifier(String userInput, Pattern pattern) { - return pattern.matcher(userInput).matches(); - } - // Validate child fields against each other and return failure message or Optional.empty() if success public Optional validateChildConstraints(DatasetField dsf) { final String fieldName = dsf.getDatasetFieldType().getName() != null ? dsf.getDatasetFieldType().getName() : ""; diff --git a/src/test/java/edu/harvard/iq/dataverse/DatasetFieldValueValidatorTest.java b/src/test/java/edu/harvard/iq/dataverse/DatasetFieldValueValidatorTest.java index f328382259b..3221949384e 100644 --- a/src/test/java/edu/harvard/iq/dataverse/DatasetFieldValueValidatorTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/DatasetFieldValueValidatorTest.java @@ -111,72 +111,6 @@ public void testIsValid() { assertFalse(result); } - /* The next 7 tests below appear to duplicate tests in the ExternalIdentifierTest class. - * The ones here use the isValidAuthorIdentifier method which is only used in testing (and probabkly could be static). - */ - @Test - public void testIsValidAuthorIdentifierOrcid() { - DatasetFieldValueValidator validator = new DatasetFieldValueValidator(); - Pattern pattern = ExternalIdentifier.valueOf("ORCID").getPattern(); - assertTrue(validator.isValidAuthorIdentifier("0000-0002-1825-0097", pattern)); - // An "X" at the end of an ORCID is less common but still valid. - assertTrue(validator.isValidAuthorIdentifier("0000-0002-1694-233X", pattern)); - assertFalse(validator.isValidAuthorIdentifier("0000 0002 1825 0097", pattern)); - assertFalse(validator.isValidAuthorIdentifier(" 0000-0002-1825-0097", pattern)); - assertFalse(validator.isValidAuthorIdentifier("0000-0002-1825-0097 ", pattern)); - assertFalse(validator.isValidAuthorIdentifier("junk", pattern)); - } - - @Test - public void testIsValidAuthorIdentifierIsni() { - DatasetFieldValueValidator validator = new DatasetFieldValueValidator(); - Pattern pattern = ExternalIdentifier.valueOf("ISNI").getPattern(); - assertTrue(validator.isValidAuthorIdentifier("0000000121032683", pattern)); - assertFalse(validator.isValidAuthorIdentifier("junk", pattern)); - } - - @Test - public void testIsValidAuthorIdentifierLcna() { - DatasetFieldValueValidator validator = new DatasetFieldValueValidator(); - Pattern pattern = ExternalIdentifier.valueOf("LCNA").getPattern(); - assertTrue(validator.isValidAuthorIdentifier("n82058243", pattern)); - assertTrue(validator.isValidAuthorIdentifier("foobar123", pattern)); - assertFalse(validator.isValidAuthorIdentifier("junk", pattern)); - } - - @Test - public void testIsValidAuthorIdentifierViaf() { - DatasetFieldValueValidator validator = new DatasetFieldValueValidator(); - Pattern pattern = ExternalIdentifier.valueOf("VIAF").getPattern(); - assertTrue(validator.isValidAuthorIdentifier("172389567", pattern)); - assertFalse(validator.isValidAuthorIdentifier("junk", pattern)); - } - - @Test - public void testIsValidAuthorIdentifierGnd() { - DatasetFieldValueValidator validator = new DatasetFieldValueValidator(); - Pattern pattern = ExternalIdentifier.valueOf("GND").getPattern(); - assertTrue(validator.isValidAuthorIdentifier("4079154-3", pattern)); - assertFalse(validator.isValidAuthorIdentifier("junk", pattern)); - } - - - @Test - public void testIsValidAuthorIdentifierRor() { - DatasetFieldValueValidator validator = new DatasetFieldValueValidator(); - Pattern pattern = ExternalIdentifier.valueOf("ROR").getPattern(); - assertTrue(validator.isValidAuthorIdentifier("03vek6s52", pattern)); - assertFalse(validator.isValidAuthorIdentifier("junk", pattern)); - } - - @Test - public void testIsValidAuthorIdentifierRorFull() { - DatasetFieldValueValidator validator = new DatasetFieldValueValidator(); - Pattern pattern = ExternalIdentifier.valueOf("ROR").getPattern(); - assertTrue(validator.isValidAuthorIdentifier("https://ror.org/03vek6s52", pattern)); - assertFalse(validator.isValidAuthorIdentifier("junk", pattern)); - } - final Validator validator = Validation.buildDefaultValidatorFactory().getValidator(); @ParameterizedTest From 98bdf11d4d30f5e4c37679b30f0cf6cd14f2429c Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Mon, 3 Mar 2025 14:58:09 -0500 Subject: [PATCH 16/16] update per review --- doc/release-notes/11242-fix-oricid-recognition.md | 2 +- doc/sphinx-guides/source/user/dataset-management.rst | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/release-notes/11242-fix-oricid-recognition.md b/doc/release-notes/11242-fix-oricid-recognition.md index 6f326b76f56..268a63bb05e 100644 --- a/doc/release-notes/11242-fix-oricid-recognition.md +++ b/doc/release-notes/11242-fix-oricid-recognition.md @@ -1,3 +1,3 @@ A bug that caused ORCIDs start with https://orcid.org/ entered as author identifier to be ignored when creating the DataCite metadata has been fixed. This primarily affected users of the ORCID external vocabulary script as the manual entry form recommends not using the URL form. -The display of authorIdentifier, when not using any external vocabulary scripts, has been improved so that either the plain identifier (e.g. "0000-0002-1825-0097") or its URL form (e.g. "https://orcid.org/0000-0002-1825-0097") will result in valid links in the display (for identifier types that have a URL form). The shorter form is still recommended when doing manual entry. +The display of authorIdentifier, when not using any external vocabulary scripts, has been improved so that either the plain identifier (e.g. "0000-0002-1825-0097") or its URL form (e.g. "https://orcid.org/0000-0002-1825-0097") will result in valid links in the display (for identifier types that have a URL form). The URL form is now recommended when doing manual entry. diff --git a/doc/sphinx-guides/source/user/dataset-management.rst b/doc/sphinx-guides/source/user/dataset-management.rst index 0984b5f4f62..33cd011101e 100755 --- a/doc/sphinx-guides/source/user/dataset-management.rst +++ b/doc/sphinx-guides/source/user/dataset-management.rst @@ -54,7 +54,7 @@ Adding a New Dataset #. Click on the "Add Data" button and select "New Dataset" in the dropdown menu. **Note:** If you are on the root Dataverse collection, your My Data page or click the "Add Data" link in the navbar, the dataset you create will be hosted in the root Dataverse collection. You can change this by selecting another Dataverse collection you have proper permissions to create datasets in, from the Host Dataverse collection dropdown in the create dataset form. This option to choose will not be available after you create the dataset. #. To quickly get started, enter at minimum all the required fields with an asterisk (e.g., the Dataset Title, Author Name, Description Text, Point of Contact Email, and Subject) to get a Data Citation with a DOI. - #. When entering author identifiers, select the type from the dropdown (e.g. "ORCID") and under "Identifier" enter just the unique identifier (e.g. "0000-0002-1825-0097"), You can also enter the full URL (e.g. "https://orcid.org/0000-0002-1825-0097"), but the shorter form is preferred for display purposes. + #. When entering author identifiers, select the type from the dropdown (e.g. "ORCID") and under "Identifier" enter the full URL (e.g. "https://orcid.org/0000-0002-1825-0097") for identifiers that have a URL form. The shorter form of the unique identifier (e.g. "0000-0002-1825-0097") can also be entered, but URL form is preferred when available. #. Scroll down to the "Files" section and click on "Select Files to Add" to add all the relevant files to your Dataset. You can also upload your files directly from your Dropbox. **Tip:** You can drag and drop or select multiple files at a time from your desktop