From a1c0a66fba3bbe0c28ae8fbaffd134f8c89c4967 Mon Sep 17 00:00:00 2001 From: Andriy Sheredko Date: Wed, 25 Feb 2026 17:29:15 +0200 Subject: [PATCH] feat(osf): Add DataCite client tests for ROR funder identifier support --- osf/metadata/schemas/datacite.json | 3 + .../datacite/datacite_tree_walker.py | 16 +- .../project_full.datacite.json | 9 +- .../project_full.datacite.xml | 6 +- tests/identifiers/test_datacite.py | 147 +++++++++++++++++- 5 files changed, 167 insertions(+), 14 deletions(-) diff --git a/osf/metadata/schemas/datacite.json b/osf/metadata/schemas/datacite.json index 706c760a1d6..555c9e56e9a 100644 --- a/osf/metadata/schemas/datacite.json +++ b/osf/metadata/schemas/datacite.json @@ -473,6 +473,9 @@ "ROR", "Other" ] + }, + "schemeURI": { + "$ref": "#/definitions/uri" } }, "additionalProperties": false, diff --git a/osf/metadata/serializers/datacite/datacite_tree_walker.py b/osf/metadata/serializers/datacite/datacite_tree_walker.py index 5f0a283d450..950ac3b50dd 100644 --- a/osf/metadata/serializers/datacite/datacite_tree_walker.py +++ b/osf/metadata/serializers/datacite/datacite_tree_walker.py @@ -195,12 +195,12 @@ def _identifier_type_and_value(self, identifier: str): return ('URL', identifier) logger.warning('skipping non-IRI-shaped identifier "%s"', identifier) - def _funder_identifier_type(self, identifier: str): + def _funder_identifier_type_and_scheme(self, identifier: str): if identifier.startswith(DxDOI) or identifier.startswith(DOI): - return 'Crossref Funder ID' + return ('Crossref Funder ID', 'https://www.crossref.org/services/funder-registry/') if identifier.startswith(ROR): - return 'ROR' - return 'Other' + return ('ROR', str(ROR)) + return ('Other', '') def _get_name_type(self, agent_iri): if (agent_iri, RDF.type, FOAF.Person) in self.basket: @@ -312,13 +312,15 @@ def _funding_reference(self, fundrefs_el, funder, funding_award=None): _fundref_el = self.visit(fundrefs_el, 'fundingReference') self.visit(_fundref_el, 'funderName', text=next(self.basket[funder:FOAF.name], '')) _funder_identifier = next(self.basket[funder:DCTERMS.identifier], '') + _funder_id_type, _funder_scheme_uri = self._funder_identifier_type_and_scheme(_funder_identifier) + _funder_id_attrib = {'funderIdentifierType': _funder_id_type} + if _funder_scheme_uri: + _funder_id_attrib['schemeURI'] = _funder_scheme_uri self.visit( _fundref_el, 'funderIdentifier', text=_funder_identifier, - attrib={ - 'funderIdentifierType': self._funder_identifier_type(_funder_identifier), - }, + attrib=_funder_id_attrib, ) if funding_award is not None: self.visit( diff --git a/osf_tests/metadata/expected_metadata_files/project_full.datacite.json b/osf_tests/metadata/expected_metadata_files/project_full.datacite.json index 3b9357c5f5e..3ff394455a1 100644 --- a/osf_tests/metadata/expected_metadata_files/project_full.datacite.json +++ b/osf_tests/metadata/expected_metadata_files/project_full.datacite.json @@ -56,7 +56,8 @@ "awardTitle": "because reasons", "funderIdentifier": { "funderIdentifier": "https://doi.org/10.$$$$", - "funderIdentifierType": "Crossref Funder ID" + "funderIdentifierType": "Crossref Funder ID", + "schemeURI": "https://www.crossref.org/services/funder-registry/" }, "funderName": "Mx. Moneypockets" }, @@ -68,14 +69,16 @@ "awardTitle": "because reasons!", "funderIdentifier": { "funderIdentifier": "https://doi.org/10.$$$$", - "funderIdentifierType": "Crossref Funder ID" + "funderIdentifierType": "Crossref Funder ID", + "schemeURI": "https://www.crossref.org/services/funder-registry/" }, "funderName": "Mx. Moneypockets" }, { "funderIdentifier": { "funderIdentifier": "https://ror.org/0example", - "funderIdentifierType": "ROR" + "funderIdentifierType": "ROR", + "schemeURI": "https://ror.org/" }, "funderName": "Caring Fan" } diff --git a/osf_tests/metadata/expected_metadata_files/project_full.datacite.xml b/osf_tests/metadata/expected_metadata_files/project_full.datacite.xml index f563b99e0e8..a161f7cad66 100644 --- a/osf_tests/metadata/expected_metadata_files/project_full.datacite.xml +++ b/osf_tests/metadata/expected_metadata_files/project_full.datacite.xml @@ -38,19 +38,19 @@ Mx. Moneypockets - https://doi.org/10.$$$$ + https://doi.org/10.$$$$ 10000000 because reasons Mx. Moneypockets - https://doi.org/10.$$$$ + https://doi.org/10.$$$$ 2000000 because reasons! Caring Fan - https://ror.org/0example + https://ror.org/0example diff --git a/tests/identifiers/test_datacite.py b/tests/identifiers/test_datacite.py index ba432402a88..1a774ceea71 100644 --- a/tests/identifiers/test_datacite.py +++ b/tests/identifiers/test_datacite.py @@ -6,7 +6,7 @@ from django.utils import timezone from framework.auth import Auth -from osf.models import Outcome +from osf.models import GuidMetadataRecord, Outcome from osf.utils.outcomes import ArtifactTypes from osf_tests.factories import AuthUserFactory, IdentifierFactory, RegistrationFactory from tests.base import OsfTestCase @@ -300,6 +300,151 @@ def test_datacite_format_related_resources__ignores_inactive_resources(self, dat ] _assert_unordered_list_of_dicts_equal(metadata_dict['relatedIdentifiers'], expected_relationships) + def _set_funding_info(self, registration, funding_info): + metadata_record = GuidMetadataRecord.objects.for_guid(registration._id) + metadata_record.funding_info = funding_info + metadata_record.save() + + def test_datacite_funding_references_with_ror_identifier_xml(self, registration, datacite_client): + self._set_funding_info(registration, [ + { + 'funder_name': 'National Science Foundation', + 'funder_identifier': 'https://ror.org/021nxhr62', + 'funder_identifier_type': 'ROR', + }, + ]) + metadata_xml = datacite_client.build_metadata(registration) + parser = lxml.etree.XMLParser(ns_clean=True, recover=True, encoding='utf-8') + root = lxml.etree.fromstring(metadata_xml, parser=parser) + ns = schema40.ns[None] + + funding_refs = root.find(f'{{{ns}}}fundingReferences') + refs = funding_refs.findall(f'{{{ns}}}fundingReference') + assert len(refs) == 1 + + funder_name = refs[0].find(f'{{{ns}}}funderName') + assert funder_name.text == 'National Science Foundation' + + funder_id = refs[0].find(f'{{{ns}}}funderIdentifier') + assert funder_id.text == 'https://ror.org/021nxhr62' + assert funder_id.attrib['funderIdentifierType'] == 'ROR' + assert funder_id.attrib['schemeURI'] == 'https://ror.org/' + + def test_datacite_funding_references_with_ror_identifier_json(self, registration, datacite_client): + self._set_funding_info(registration, [ + { + 'funder_name': 'National Science Foundation', + 'funder_identifier': 'https://ror.org/021nxhr62', + 'funder_identifier_type': 'ROR', + }, + ]) + metadata_dict = datacite_client.build_metadata(registration, as_xml=False) + + funding_refs = metadata_dict['fundingReferences'] + assert len(funding_refs) == 1 + assert str(funding_refs[0]['funderName']) == 'National Science Foundation' + assert funding_refs[0]['funderIdentifier']['funderIdentifier'] == 'https://ror.org/021nxhr62' + assert funding_refs[0]['funderIdentifier']['funderIdentifierType'] == 'ROR' + assert funding_refs[0]['funderIdentifier']['schemeURI'] == 'https://ror.org/' + + def test_datacite_funding_references_with_crossref_funder_id(self, registration, datacite_client): + self._set_funding_info(registration, [ + { + 'funder_name': 'Mx. Moneypockets', + 'funder_identifier': 'https://doi.org/10.13039/100000001', + 'funder_identifier_type': 'Crossref Funder ID', + 'award_number': '10000000', + 'award_uri': 'https://moneypockets.example/millions', + 'award_title': 'because reasons', + }, + ]) + metadata_xml = datacite_client.build_metadata(registration) + parser = lxml.etree.XMLParser(ns_clean=True, recover=True, encoding='utf-8') + root = lxml.etree.fromstring(metadata_xml, parser=parser) + ns = schema40.ns[None] + + funding_refs = root.find(f'{{{ns}}}fundingReferences') + refs = funding_refs.findall(f'{{{ns}}}fundingReference') + assert len(refs) == 1 + + funder_id = refs[0].find(f'{{{ns}}}funderIdentifier') + assert funder_id.text == 'https://doi.org/10.13039/100000001' + assert funder_id.attrib['funderIdentifierType'] == 'Crossref Funder ID' + assert funder_id.attrib['schemeURI'] == 'https://www.crossref.org/services/funder-registry/' + + award_number = refs[0].find(f'{{{ns}}}awardNumber') + assert award_number.text == '10000000' + + def test_datacite_funding_references_mixed_ror_and_crossref(self, registration, datacite_client): + self._set_funding_info(registration, [ + { + 'funder_name': 'Mx. Moneypockets', + 'funder_identifier': 'https://doi.org/10.13039/100000001', + 'funder_identifier_type': 'Crossref Funder ID', + 'award_number': '10000000', + 'award_uri': 'https://moneypockets.example/millions', + 'award_title': 'because reasons', + }, + { + 'funder_name': 'National Science Foundation', + 'funder_identifier': 'https://ror.org/021nxhr62', + 'funder_identifier_type': 'ROR', + }, + ]) + metadata_dict = datacite_client.build_metadata(registration, as_xml=False) + funding_refs = metadata_dict['fundingReferences'] + assert len(funding_refs) == 2 + + # Build a lookup by funder name for order-independent assertions + refs_by_name = {str(ref['funderName']): ref for ref in funding_refs} + + crossref_ref = refs_by_name['Mx. Moneypockets'] + assert crossref_ref['funderIdentifier']['funderIdentifier'] == 'https://doi.org/10.13039/100000001' + assert crossref_ref['funderIdentifier']['funderIdentifierType'] == 'Crossref Funder ID' + assert crossref_ref['funderIdentifier']['schemeURI'] == 'https://www.crossref.org/services/funder-registry/' + assert crossref_ref['awardNumber']['awardNumber'] == '10000000' + + ror_ref = refs_by_name['National Science Foundation'] + assert ror_ref['funderIdentifier']['funderIdentifier'] == 'https://ror.org/021nxhr62' + assert ror_ref['funderIdentifier']['funderIdentifierType'] == 'ROR' + assert ror_ref['funderIdentifier']['schemeURI'] == 'https://ror.org/' + + def test_datacite_funding_references_ror_with_award_info(self, registration, datacite_client): + self._set_funding_info(registration, [ + { + 'funder_name': 'National Institutes of Health', + 'funder_identifier': 'https://ror.org/01cwqze88', + 'funder_identifier_type': 'ROR', + 'award_number': 'R01-GM123456', + 'award_uri': 'https://reporter.nih.gov/project-details/123456', + 'award_title': 'Studying important things', + }, + ]) + metadata_xml = datacite_client.build_metadata(registration) + parser = lxml.etree.XMLParser(ns_clean=True, recover=True, encoding='utf-8') + root = lxml.etree.fromstring(metadata_xml, parser=parser) + ns = schema40.ns[None] + + funding_refs = root.find(f'{{{ns}}}fundingReferences') + refs = funding_refs.findall(f'{{{ns}}}fundingReference') + assert len(refs) == 1 + + funder_id = refs[0].find(f'{{{ns}}}funderIdentifier') + assert funder_id.text == 'https://ror.org/01cwqze88' + assert funder_id.attrib['funderIdentifierType'] == 'ROR' + assert funder_id.attrib['schemeURI'] == 'https://ror.org/' + + award_number = refs[0].find(f'{{{ns}}}awardNumber') + assert award_number.text == 'R01-GM123456' + + award_title = refs[0].find(f'{{{ns}}}awardTitle') + assert award_title.text == 'Studying important things' + + def test_datacite_funding_references_no_funding_info(self, registration, datacite_client): + # With no funding info set, fundingReferences should be empty + metadata_dict = datacite_client.build_metadata(registration, as_xml=False) + assert metadata_dict.get('fundingReferences', []) == [] + @pytest.mark.django_db class TestDataCiteViews(OsfTestCase):