From da569f9d9ee4d62f215a68a575a4e19691a39c05 Mon Sep 17 00:00:00 2001 From: Stefan Szepe Date: Tue, 26 Nov 2024 14:57:36 +0100 Subject: [PATCH 01/21] add file encoding when reading README.md --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 4411a0d..127e951 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,7 @@ import setuptools from dspace_rest_client import __version__ -with open("README.md", "r") as fh: +with open("README.md", "r", encoding="utf_8") as fh: long_description = fh.read() setuptools.setup( From 56b3f5d7c4c93fc48e988a94ff77053fd038dc4d Mon Sep 17 00:00:00 2001 From: Stefan Szepe Date: Tue, 26 Nov 2024 15:03:39 +0100 Subject: [PATCH 02/21] remove unnecessary to use arguments when calling super for the parent class --- dspace_rest_client/models.py | 46 ++++++++++++++++++------------------ 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/dspace_rest_client/models.py b/dspace_rest_client/models.py index d6a6da5..1b9591b 100644 --- a/dspace_rest_client/models.py +++ b/dspace_rest_client/models.py @@ -226,9 +226,9 @@ def __init__(self, api_resource=None, dso=None): """ if dso is not None: api_resource = dso.as_dict() - super(Item, self).__init__(dso=dso) + super().__init__(dso=dso) else: - super(Item, self).__init__(api_resource) + super().__init__(api_resource) if api_resource is not None: self.type = 'item' @@ -252,7 +252,7 @@ def as_dict(self): Return a dict representation of this Item, based on super with item-specific attributes added @return: dict of Item for API use """ - dso_dict = super(Item, self).as_dict() + dso_dict = super().as_dict() item_dict = {'inArchive': self.inArchive, 'discoverable': self.discoverable, 'withdrawn': self.withdrawn} return {**dso_dict, **item_dict} @@ -276,7 +276,7 @@ def __init__(self, api_resource=None): Default constructor. Call DSpaceObject init then set item-specific attributes @param api_resource: API result object to use as initial data """ - super(Community, self).__init__(api_resource) + super().__init__(api_resource) self.type = 'community' def as_dict(self): @@ -284,7 +284,7 @@ def as_dict(self): Return a dict representation of this Community, based on super with community-specific attributes added @return: dict of Item for API use """ - dso_dict = super(Community, self).as_dict() + dso_dict = super().as_dict() # TODO: More community-specific stuff community_dict = {} return {**dso_dict, **community_dict} @@ -301,11 +301,11 @@ def __init__(self, api_resource=None): Default constructor. Call DSpaceObject init then set collection-specific attributes @param api_resource: API result object to use as initial data """ - super(Collection, self).__init__(api_resource) + super().__init__(api_resource) self.type = 'collection' def as_dict(self): - dso_dict = super(Collection, self).as_dict() + dso_dict = super().as_dict() """ Return a dict representation of this Collection, based on super with collection-specific attributes added @return: dict of Item for API use @@ -325,7 +325,7 @@ def __init__(self, api_resource=None): Default constructor. Call DSpaceObject init then set bundle-specific attributes @param api_resource: API result object to use as initial data """ - super(Bundle, self).__init__(api_resource) + super().__init__(api_resource) self.type = 'bundle' def as_dict(self): @@ -333,7 +333,7 @@ def as_dict(self): Return a dict representation of this Bundle, based on super with bundle-specific attributes added @return: dict of Bundle for API use """ - dso_dict = super(Bundle, self).as_dict() + dso_dict = super().as_dict() bundle_dict = {} return {**dso_dict, **bundle_dict} @@ -357,7 +357,7 @@ def __init__(self, api_resource=None): Default constructor. Call DSpaceObject init then set bitstream-specific attributes @param api_resource: API result object to use as initial data """ - super(Bitstream, self).__init__(api_resource) + super().__init__(api_resource) self.type = 'bitstream' if 'bundleName' in api_resource: self.bundleName = api_resource['bundleName'] @@ -373,7 +373,7 @@ def as_dict(self): Return a dict representation of this Bitstream, based on super with bitstream-specific attributes added @return: dict of Bitstream for API use """ - dso_dict = super(Bitstream, self).as_dict() + dso_dict = super().as_dict() bitstream_dict = {'bundleName': self.bundleName, 'sizeBytes': self.sizeBytes, 'checkSum': self.checkSum, 'sequenceId': self.sequenceId} return {**dso_dict, **bitstream_dict} @@ -392,7 +392,7 @@ def __init__(self, api_resource=None): Default constructor. Call DSpaceObject init then set group-specific attributes @param api_resource: API result object to use as initial data """ - super(Group, self).__init__(api_resource) + super().__init__(api_resource) self.type = 'group' if 'name' in api_resource: self.name = api_resource['name'] @@ -404,7 +404,7 @@ def as_dict(self): Return a dict representation of this Group, based on super with group-specific attributes added @return: dict of Group for API use """ - dso_dict = super(Group, self).as_dict() + dso_dict = super().as_dict() group_dict = {'name': self.name, 'permanent': self.permanent} return {**dso_dict, **group_dict} @@ -427,7 +427,7 @@ def __init__(self, api_resource=None): Default constructor. Call DSpaceObject init then set user-specific attributes @param api_resource: API result object to use as initial data """ - super(User, self).__init__(api_resource) + super().__init__(api_resource) self.type = 'user' if 'name' in api_resource: self.name = api_resource['name'] @@ -449,7 +449,7 @@ def as_dict(self): Return a dict representation of this User, based on super with user-specific attributes added @return: dict of User for API use """ - dso_dict = super(User, self).as_dict() + dso_dict = super().as_dict() user_dict = {'name': self.name, 'netid': self.netid, 'lastActive': self.lastActive, 'canLogIn': self.canLogIn, 'email': self.email, 'requireCertificate': self.requireCertificate, 'selfRegistered': self.selfRegistered} @@ -462,7 +462,7 @@ class InProgressSubmission(AddressableHALResource): type = None def __init__(self, api_resource): - super(InProgressSubmission, self).__init__(api_resource) + super().__init__(api_resource) if 'lastModified' in api_resource: self.lastModified = api_resource['lastModified'] if 'step' in api_resource: @@ -473,7 +473,7 @@ def __init__(self, api_resource): self.lastModified = api_resource['lastModified'] def as_dict(self): - parent_dict = super(InProgressSubmission, self).as_dict() + parent_dict = super().as_dict() dict = { 'lastModified': self.lastModified, 'step': self.step, @@ -485,10 +485,10 @@ def as_dict(self): class WorkspaceItem(InProgressSubmission): def __init__(self, api_resource): - super(WorkspaceItem, self).__init__(api_resource) + super().__init__(api_resource) def as_dict(self): - return super(WorkspaceItem, self).as_dict() + return super().as_dict() class EntityType(AddressableHALResource): """ @@ -497,7 +497,7 @@ class EntityType(AddressableHALResource): are all common entity types used in DSpace 7+ """ def __init__(self, api_resource): - super(EntityType, self).__init__(api_resource) + super().__init__(api_resource) if 'label' in api_resource: self.label = api_resource['label'] if 'type' in api_resource: @@ -508,7 +508,7 @@ class RelationshipType(AddressableHALResource): TODO: RelationshipType """ def __init__(self, api_resource): - super(RelationshipType, self).__init__(api_resource) + super().__init__(api_resource) class SearchResult(HALResource): """ @@ -567,7 +567,7 @@ class SearchResult(HALResource): type = None def __init__(self, api_resource): - super(SearchResult, self).__init__(api_resource) + super().__init__(api_resource) if 'lastModified' in api_resource: self.lastModified = api_resource['lastModified'] if 'step' in api_resource: @@ -578,7 +578,7 @@ def __init__(self, api_resource): self.type = api_resource['type'] def as_dict(self): - parent_dict = super(SearchResult, self).as_dict() + parent_dict = super().as_dict() dict = { 'lastModified': self.lastModified, 'step': self.step, From bf38338f02c4c9f24dc8c136e85f8c75b31b4394 Mon Sep 17 00:00:00 2001 From: Stefan Szepe Date: Tue, 26 Nov 2024 15:05:26 +0100 Subject: [PATCH 03/21] remove comma after values in order to avoid it turns it into tuple --- dspace_rest_client/models.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/dspace_rest_client/models.py b/dspace_rest_client/models.py index 1b9591b..1dfe42d 100644 --- a/dspace_rest_client/models.py +++ b/dspace_rest_client/models.py @@ -414,12 +414,12 @@ class User(SimpleDSpaceObject): Extends DSpaceObject to implement specific attributes and methods for users (aka. EPersons) """ type = 'user' - name = None, - netid = None, - lastActive = None, - canLogIn = False, - email = None, - requireCertificate = False, + name = None + netid = None + lastActive = None + canLogIn = False + email = None + requireCertificate = False selfRegistered = False def __init__(self, api_resource=None): From fb3ddae0f6ca363eeccb75bfec456533a50f4785 Mon Sep 17 00:00:00 2001 From: Stefan Szepe Date: Tue, 26 Nov 2024 15:11:26 +0100 Subject: [PATCH 04/21] use literals instead of calling list/set/dict --- dspace_rest_client/client.py | 18 +++++++++--------- dspace_rest_client/models.py | 10 +++++----- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/dspace_rest_client/client.py b/dspace_rest_client/client.py index cbabfda..0090239 100644 --- a/dspace_rest_client/client.py +++ b/dspace_rest_client/client.py @@ -606,7 +606,7 @@ def get_bundles(self, parent=None, uuid=None, page=0, size=20, sort=None): """ # TODO: It is probably wise to allow the parent UUID to be simply passed as an alternative to having the full # python object as constructed by this REST client, for more flexible usage. - bundles = list() + bundles = [] single_result = False if uuid is not None: url = f'{self.API_ENDPOINT}/core/bundles/{uuid}' @@ -614,7 +614,7 @@ def get_bundles(self, parent=None, uuid=None, page=0, size=20, sort=None): elif parent is not None: url = f'{self.API_ENDPOINT}/core/items/{parent.uuid}/bundles' else: - return list() + return [] params = {} if size is not None: params['size'] = size @@ -676,7 +676,7 @@ def get_bitstreams(self, uuid=None, bundle=None, page=0, size=20, sort=None): """ url = f'{self.API_ENDPOINT}/core/bitstreams/{uuid}' if uuid is None and bundle is None: - return list() + return [] if uuid is None and isinstance(bundle, Bundle): if 'bitstreams' in bundle.links: url = bundle.links['bitstreams']['href'] @@ -694,7 +694,7 @@ def get_bitstreams(self, uuid=None, bundle=None, page=0, size=20, sort=None): r_json = self.fetch_resource(url, params=params) if '_embedded' in r_json: if 'bitstreams' in r_json['_embedded']: - bitstreams = list() + bitstreams = [] for bitstream_resource in r_json['_embedded']['bitstreams']: bitstreams.append(Bitstream(bitstream_resource)) return bitstreams @@ -819,7 +819,7 @@ def get_communities(self, uuid=None, page=0, size=20, sort=None, top=False): # Perform actual get r_json = self.fetch_resource(url, params) # Empty list - communities = list() + communities = [] if '_embedded' in r_json: if 'communities' in r_json['_embedded']: for community_resource in r_json['_embedded']['communities']: @@ -900,7 +900,7 @@ def get_collections(self, uuid=None, community=None, page=0, size=20, sort=None) # Perform the actual request. By now, our URL and parameter should be properly set r_json = self.fetch_resource(url, params=params) # Empty list - collections = list() + collections = [] if '_embedded' in r_json: # This is a list of collections if 'collections' in r_json['_embedded']: @@ -967,11 +967,11 @@ def get_items(self): """ url = f'{self.API_ENDPOINT}/core/items' # Empty item list - items = list() + items = [] # Perform the actual request r_json = self.fetch_resource(url) # Empty list - items = list() + items = [] if '_embedded' in r_json: # This is a list of items if 'items' in r_json['_embedded']: @@ -1111,7 +1111,7 @@ def get_users(self, page=0, size=20, sort=None): @return: list of User objects """ url = f'{self.API_ENDPOINT}/eperson/epersons' - users = list() + users = [] params = {} if size is not None: params['size'] = size diff --git a/dspace_rest_client/models.py b/dspace_rest_client/models.py index 1dfe42d..2b29e08 100644 --- a/dspace_rest_client/models.py +++ b/dspace_rest_client/models.py @@ -63,7 +63,7 @@ def __init__(self, api_resource=None): """ super().__init__(api_resource) - self.metadata = dict() + self.metadata = {} if api_resource is not None: if 'id' in api_resource: @@ -83,7 +83,7 @@ def get_metadata_values(self, field): @param field: DSpace field, eg. dc.creator @return: list of strings """ - values = list() + values = [] if field in self.metadata: values = self.metadata[field] return values @@ -111,7 +111,7 @@ def __init__(self, api_resource=None, dso=None): """ super().__init__(api_resource) self.type = None - self.metadata = dict() + self.metadata = {} if dso is not None: api_resource = dso.as_dict() @@ -217,7 +217,7 @@ class Item(SimpleDSpaceObject): inArchive = False discoverable = False withdrawn = False - metadata = dict() + metadata = {} def __init__(self, api_resource=None, dso=None): """ @@ -242,7 +242,7 @@ def get_metadata_values(self, field): @param field: DSpace field, eg. dc.creator @return: list of strings """ - values = list() + values = [] if field in self.metadata: values = self.metadata[field] return values From 033bae301f574a393067b45c29cd9906fb4a9ba0 Mon Sep 17 00:00:00 2001 From: Stefan Szepe Date: Tue, 26 Nov 2024 15:27:48 +0100 Subject: [PATCH 05/21] re-organize imports --- dspace_rest_client/client.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/dspace_rest_client/client.py b/dspace_rest_client/client.py index 0090239..e3dba06 100644 --- a/dspace_rest_client/client.py +++ b/dspace_rest_client/client.py @@ -17,12 +17,13 @@ import json import logging import functools +import os +from uuid import UUID import requests from requests import Request import pysolr -import os -from uuid import UUID + from .models import * from . import __version__ From 24802ca217a63be7c0821bf0f85d0bc6044dc480 Mon Sep 17 00:00:00 2001 From: Stefan Szepe Date: Tue, 26 Nov 2024 15:32:44 +0100 Subject: [PATCH 06/21] use sys.exit instead of exit --- console.py | 3 ++- example.py | 11 ++++++----- example_gets.py | 4 +++- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/console.py b/console.py index 69f26a7..97f12fd 100644 --- a/console.py +++ b/console.py @@ -3,6 +3,7 @@ #from dspace_rest_client.models import Community, Collection, Item, Bundle, Bitstream import code import os +import sys # The DSpace client will look for the same environment variables, but we can also look for them here explicitly # and as an example @@ -23,6 +24,6 @@ authenticated = d.authenticate() if not authenticated: print(f'Error logging in! Giving up.') - exit(1) + sys.exit(1) code.interact(local=locals()) diff --git a/example.py b/example.py index e240a97..ec09681 100644 --- a/example.py +++ b/example.py @@ -10,6 +10,7 @@ from dspace_rest_client.client import DSpaceClient from dspace_rest_client.models import Community, Collection, Item, Bundle, Bitstream import os +import sys # The DSpace client will look for the same environment variables but we can also look for them here explicitly # and as an example @@ -34,7 +35,7 @@ authenticated = d.authenticate() if not authenticated: print('Error logging in! Giving up.') - exit(1) + sys.exit(1) # Put together some basic Community data. # See https://github.com/DSpace/RestContract/blob/main/communities.md @@ -61,7 +62,7 @@ print(f'New community created! Handle: {new_community.handle}') else: print('Error! Giving up.') - exit(1) + sys.exit(1) # Update the community metadata new_community.name = 'Community created by the Python REST Client - Updated Name' @@ -96,7 +97,7 @@ print(f'New collection created! Handle: {new_collection.handle}') else: print('Error! Giving up.') - exit(1) + sys.exit(1) # Put together some basic Item data. # (See: https://github.com/DSpace/RestContract/blob/main/items.md) @@ -149,7 +150,7 @@ print(f'New item created! Handle: {new_item.handle}') else: print('Error! Giving up.') - exit(1) + sys.exit(1) # Add a single metadata field+value to the item (PATCH operation) updated_item = d.add_metadata(dso=new_item, field='dc.description.abstract', value='Added abstract to an existing item', @@ -184,7 +185,7 @@ print(f'New bitstream created! UUID: {new_bitstream.uuid}') else: print('Error! Giving up.') - exit(1) + sys.exit(1) print('All finished with example data creation. Visit your test repository to review created objects') diff --git a/example_gets.py b/example_gets.py index bdf53a8..cd4625e 100644 --- a/example_gets.py +++ b/example_gets.py @@ -6,6 +6,8 @@ Example Python 3 application using the dspace.py API client library to retrieve basic DSOs in a DSpace repository """ +import sys + from dspace_rest_client.client import DSpaceClient # Import models as below if needed #from dspace_rest_client.models import Community, Collection, Item, Bundle, Bitstream @@ -32,7 +34,7 @@ authenticated = d.authenticate() if not authenticated: print('Error logging in! Giving up.') - exit(1) + sys.exit(1) # Retrieving objects - now that we know there is some data in the repository we can demonstrate # some simple ways of retrieving and iterating DSOs From 4e28bc4c67bb14b2e027af819cf39a98e582fbcd Mon Sep 17 00:00:00 2001 From: Stefan Szepe Date: Tue, 26 Nov 2024 15:34:55 +0100 Subject: [PATCH 07/21] re-organize imports --- console.py | 7 ++++--- example.py | 5 +++-- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/console.py b/console.py index 97f12fd..46c36f0 100644 --- a/console.py +++ b/console.py @@ -1,10 +1,11 @@ -from dspace_rest_client.client import DSpaceClient -# Import models as needed -#from dspace_rest_client.models import Community, Collection, Item, Bundle, Bitstream import code import os import sys +from dspace_rest_client.client import DSpaceClient +# Import models as needed +#from dspace_rest_client.models import Community, Collection, Item, Bundle, Bitstream + # The DSpace client will look for the same environment variables, but we can also look for them here explicitly # and as an example url = 'http://localhost:8080/server/api' diff --git a/example.py b/example.py index ec09681..42f2241 100644 --- a/example.py +++ b/example.py @@ -7,11 +7,12 @@ some resources in a DSpace 7 repository. """ -from dspace_rest_client.client import DSpaceClient -from dspace_rest_client.models import Community, Collection, Item, Bundle, Bitstream import os import sys +from dspace_rest_client.client import DSpaceClient +from dspace_rest_client.models import Community, Collection, Item, Bundle, Bitstream + # The DSpace client will look for the same environment variables but we can also look for them here explicitly # and as an example url = 'http://localhost:8080/server/api' From 4274a76269706ecb43e987501d88ee342abc4c87 Mon Sep 17 00:00:00 2001 From: Stefan Szepe Date: Wed, 27 Nov 2024 18:01:15 +0100 Subject: [PATCH 08/21] fix C0301: Line too long and W1309: Using an f-string that does not have any interpolated variables in console.log --- console.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/console.py b/console.py index 46c36f0..75c3f75 100644 --- a/console.py +++ b/console.py @@ -6,8 +6,8 @@ # Import models as needed #from dspace_rest_client.models import Community, Collection, Item, Bundle, Bitstream -# The DSpace client will look for the same environment variables, but we can also look for them here explicitly -# and as an example +# The DSpace client will look for the same environment variables, but we can also look +# for them here explicitly and as an example url = 'http://localhost:8080/server/api' if 'DSPACE_API_ENDPOINT' in os.environ: url = os.environ['DSPACE_API_ENDPOINT'] @@ -24,7 +24,7 @@ # Authenticate against the DSpace client authenticated = d.authenticate() if not authenticated: - print(f'Error logging in! Giving up.') + print('Error logging in! Giving up.') sys.exit(1) code.interact(local=locals()) From ea8fdfc4df4705e046625d32b0333268f285ce8d Mon Sep 17 00:00:00 2001 From: Stefan Szepe Date: Thu, 28 Nov 2024 13:08:07 +0100 Subject: [PATCH 09/21] fix C0103: Constant name doesn't conform to UPPER_CASE naming style --- console.py | 21 +++++++++------------ example.py | 42 +++++++++++++++++++++--------------------- example_gets.py | 8 ++++---- solr_example.py | 9 ++++----- 4 files changed, 38 insertions(+), 42 deletions(-) diff --git a/console.py b/console.py index 75c3f75..9993d3d 100644 --- a/console.py +++ b/console.py @@ -6,20 +6,17 @@ # Import models as needed #from dspace_rest_client.models import Community, Collection, Item, Bundle, Bitstream -# The DSpace client will look for the same environment variables, but we can also look -# for them here explicitly and as an example -url = 'http://localhost:8080/server/api' -if 'DSPACE_API_ENDPOINT' in os.environ: - url = os.environ['DSPACE_API_ENDPOINT'] -username = 'username@test.system.edu' -if 'DSPACE_API_USERNAME' in os.environ: - username = os.environ['DSPACE_API_USERNAME'] -password = 'password' -if 'DSPACE_API_PASSWORD' in os.environ: - password = os.environ['DSPACE_API_PASSWORD'] +DEFAULT_URL = 'http://localhost:8080/server/api' +DEFAULT_USERNAME = 'username@test.system.edu' +DEFAULT_PASSWORD = 'password' + +# Configuration from environment variables +URL = os.environ.get('DSPACE_API_ENDPOINT', DEFAULT_URL) +USERNAME = os.environ.get('DSPACE_API_USERNAME', DEFAULT_USERNAME) +PASSWORD = os.environ.get('DSPACE_API_PASSWORD', DEFAULT_PASSWORD) # Instantiate DSpace client -d = DSpaceClient(api_endpoint=url, username=username, password=password) +d = DSpaceClient(api_endpoint=URL, username=USERNAME, password=PASSWORD) # Authenticate against the DSpace client authenticated = d.authenticate() diff --git a/example.py b/example.py index 42f2241..400dcd7 100644 --- a/example.py +++ b/example.py @@ -13,24 +13,24 @@ from dspace_rest_client.client import DSpaceClient from dspace_rest_client.models import Community, Collection, Item, Bundle, Bitstream -# The DSpace client will look for the same environment variables but we can also look for them here explicitly -# and as an example -url = 'http://localhost:8080/server/api' -if 'DSPACE_API_ENDPOINT' in os.environ: - url = os.environ['DSPACE_API_ENDPOINT'] -username = 'username@test.system.edu' -if 'DSPACE_API_USERNAME' in os.environ: - username = os.environ['DSPACE_API_USERNAME'] -password = 'password' -if 'DSPACE_API_PASSWORD' in os.environ: - password = os.environ['DSPACE_API_PASSWORD'] +DEFAULT_URL = 'http://localhost:8080/server/api' +DEFAULT_USERNAME = 'username@test.system.edu' +DEFAULT_PASSWORD = 'password' + +# Configuration from environment variables +URL = os.environ.get('DSPACE_API_ENDPOINT', DEFAULT_URL) +USERNAME = os.environ.get('DSPACE_API_USERNAME', DEFAULT_USERNAME) +PASSWORD = os.environ.get('DSPACE_API_PASSWORD', DEFAULT_PASSWORD) # Instantiate DSpace client -# Note the 'fake_user_agent' setting here -- this will set a string like the following, to get by Cloudfront: +# Note the 'fake_user_agent' setting here -- this will set a string like the following, +# to get by Cloudfront: # Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36 -# The default is to *not* fake the user agent, and instead use the default of DSpace-Python-REST-Client/x.y.z -# To specify a custom user agent, set the USER_AGENT env variable and leave/set fake_user_agent as False -d = DSpaceClient(api_endpoint=url, username=username, password=password, fake_user_agent=True) +# The default is to *not* fake the user agent, and instead use the default of +# DSpace-Python-REST-Client/x.y.z +# To specify a custom user agent, set the USER_AGENT env variable and leave/set +# fake_user_agent as False +d = DSpaceClient(api_endpoint=URL, username=USERNAME, password=PASSWORD, fake_user_agent=True) # Authenticate against the DSpace client authenticated = d.authenticate() @@ -57,8 +57,8 @@ # Create the new community # In this example, we'll just make this a top-level community by # passing None as the parent parameter -community_parent = None -new_community = d.create_community(parent=community_parent, data=community_data) +COMMUNITY_PARENT = None +new_community = d.create_community(parent=COMMUNITY_PARENT, data=community_data) if isinstance(new_community, Community) and new_community.uuid is not None: print(f'New community created! Handle: {new_community.handle}') else: @@ -176,12 +176,12 @@ } # Set the mime type (using mimetypes.guess_type is recommended for real uploads if you don't want to set manually) -file_mime = 'text/plain' +FILE_MIME = 'text/plain' # Set a better file name for our test upload -file_name = 'uploaded_file.txt' +FILE_NAME = 'uploaded_file.txt' # Create the bitstream and upload the file -new_bitstream = d.create_bitstream(bundle=new_bundle, name=file_name, - path='LICENSE.txt', mime=file_mime, metadata=bitstream_metadata) +new_bitstream = d.create_bitstream(bundle=new_bundle, name=FILE_NAME, + path='LICENSE.txt', mime=FILE_MIME, metadata=bitstream_metadata) if isinstance(new_bitstream, Bitstream) and new_bitstream.uuid is not None: print(f'New bitstream created! UUID: {new_bitstream.uuid}') else: diff --git a/example_gets.py b/example_gets.py index cd4625e..3c31f63 100644 --- a/example_gets.py +++ b/example_gets.py @@ -19,16 +19,16 @@ # DSPACE_API_USERNAME= # DSPACE_API_PASSWORD= # USER_AGENT= -url = 'http://localhost:8080/server/api' -username = 'username@test.system.edu' -password = 'password' +URL = 'http://localhost:8080/server/api' +USERNAME = 'username@test.system.edu' +PASSWORD = 'password' # Instantiate DSpace client # Note the 'fake_user_agent' setting here -- this will set a string like the following, to get by Cloudfront: # Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36 # The default is to *not* fake the user agent, and instead use the default of DSpace-Python-REST-Client/x.y.z. # To specify a custom user agent, set the USER_AGENT env variable and leave/set fake_user_agent as False -d = DSpaceClient(api_endpoint=url, username=username, password=password, fake_user_agent=True) +d = DSpaceClient(api_endpoint=URL, username=USERNAME, password=PASSWORD, fake_user_agent=True) # Authenticate against the DSpace client authenticated = d.authenticate() diff --git a/solr_example.py b/solr_example.py index cd11b23..aaa53bc 100644 --- a/solr_example.py +++ b/solr_example.py @@ -4,15 +4,15 @@ from dspace_rest_client.client import DSpaceClient -url = 'http://localhost:8080/server/api' -username = 'username@domain.com' -password = 'password' +URL = 'http://localhost:8080/server/api' +USERNAME = 'username@domain.com' +PASSWORD = 'password' # To auth solr do like this and pass it as the argument in DSpaceClient solr_auth = HTTPBasicAuth('user', 'pass') # Instantiate DSpace client -d = DSpaceClient(api_endpoint=url, username=username, password=password, +d = DSpaceClient(api_endpoint=URL, username=USERNAME, password=PASSWORD, solr_endpoint='http://localhost:8983/solr/search', solr_auth=None) # Here's an example of a wildcard query with some filters to apply and some fields to return @@ -22,4 +22,3 @@ for doc in results.docs: pprint.pprint(doc) - From 98bc54af45fb1a11986691055d56c2fc89c7fd79 Mon Sep 17 00:00:00 2001 From: Stefan Szepe Date: Thu, 28 Nov 2024 13:24:09 +0100 Subject: [PATCH 10/21] fix C0301: Line too long in scripts --- example.py | 39 +++++++++++++++++++----------- example_gets.py | 64 ++++++++++++++++++++++++++++++------------------- setup.py | 8 +++++-- 3 files changed, 70 insertions(+), 41 deletions(-) diff --git a/example.py b/example.py index 400dcd7..154bd2a 100644 --- a/example.py +++ b/example.py @@ -25,7 +25,8 @@ # Instantiate DSpace client # Note the 'fake_user_agent' setting here -- this will set a string like the following, # to get by Cloudfront: -# Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36 +# Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) \ +# Chrome/39.0.2171.95 Safari/537.36 # The default is to *not* fake the user agent, and instead use the default of # DSpace-Python-REST-Client/x.y.z # To specify a custom user agent, set the USER_AGENT env variable and leave/set @@ -154,7 +155,8 @@ sys.exit(1) # Add a single metadata field+value to the item (PATCH operation) -updated_item = d.add_metadata(dso=new_item, field='dc.description.abstract', value='Added abstract to an existing item', +updated_item = d.add_metadata(dso=new_item, field='dc.description.abstract', + value='Added abstract to an existing item', language='en', authority=None, confidence=-1) # Create a new ORIGINAL bundle @@ -175,7 +177,8 @@ 'authority': None, 'confidence': -1, 'place': 0}] } -# Set the mime type (using mimetypes.guess_type is recommended for real uploads if you don't want to set manually) +# Set the mime type (using mimetypes.guess_type is recommended for real uploads if you +# don't want to set manually) FILE_MIME = 'text/plain' # Set a better file name for our test upload FILE_NAME = 'uploaded_file.txt' @@ -188,7 +191,8 @@ print('Error! Giving up.') sys.exit(1) -print('All finished with example data creation. Visit your test repository to review created objects') +print('All finished with example data creation. Visit your test repository to review \ + created objects') # Retrieving objects - now that we know there is some data in the repository we can demonstrate # some simple ways of retrieving and iterating DSOs @@ -202,8 +206,10 @@ collections = d.get_collections(community=top_community) for collection in collections: print(f'{collection.name} ({collection.uuid}') - # Get all items in this collection - see that the recommended method is a search, scoped to this collection - # (there is no collection/items endpoint, though there is a /mappedItems endpoint, not yet implemented here) + # Get all items in this collection - see that the recommended method is a search, + # scoped to this collection + # (there is no collection/items endpoint, though there is a /mappedItems endpoint, + # not yet implemented here) items = d.search_objects(query='*:*', scope=collection.uuid, dso_type='item') for item in items: print(f'{item.name} ({item.uuid})') @@ -218,15 +224,20 @@ # Download this bitstream r = d.download_bitstream(bitstream.uuid) if r is not None and r.headers is not None: - print(f'\tHeaders (server info, not calculated locally)\n\tmd5: {r.headers.get("ETag")}\n' - f'\tformat: {r.headers.get("Content-Type")}\n\tlength: {r.headers.get("Content-Length")}\n' - f'\tLOCAL LEN(): {len(r.content)}') - # Uncomment the below to get the binary data in content and then do something with it like - # print, or write to file, etc. You want to use the 'content' property of the response object + print( + '\tHeaders (server info, not calculated locally)\n' + f'\tmd5: {r.headers.get("ETag")}\n' + f'\tformat: {r.headers.get("Content-Type")}\n' + f'\tlength: {r.headers.get("Content-Length")}\n' + f'\tLOCAL LEN(): {len(r.content)}' + ) + # Uncomment the below to get the binary data in content and then do + # something with it like print, or write to file, etc. You want to use + # the 'content' property of the response object # # print(r.content) -# Finally, let's show the new _iter methods which will transparently handle pagination and return iterators -# which you can use as normal +# Finally, let's show the new _iter methods which will transparently handle pagination +# and return iterators which you can use as normal for i, search_result in enumerate(d.search_objects_iter('*:*')): - print(f'Result #{i}: {search_result.name} ({search_result.uuid})') \ No newline at end of file + print(f'Result #{i}: {search_result.name} ({search_result.uuid})') diff --git a/example_gets.py b/example_gets.py index 3c31f63..d39e2ac 100644 --- a/example_gets.py +++ b/example_gets.py @@ -3,14 +3,16 @@ # and described in the LICENCE file in the root of this project """ -Example Python 3 application using the dspace.py API client library to retrieve basic DSOs in a DSpace repository +Example Python 3 application using the dspace.py API client library to retrieve basic DSOs in a +DSpace repository """ import sys from dspace_rest_client.client import DSpaceClient + # Import models as below if needed -#from dspace_rest_client.models import Community, Collection, Item, Bundle, Bitstream +# from dspace_rest_client.models import Community, Collection, Item, Bundle, Bitstream # Example variables needed for authentication and basic API requests # SET THESE TO MATCH YOUR TEST SYSTEM BEFORE RUNNING THE EXAMPLE SCRIPT @@ -19,54 +21,66 @@ # DSPACE_API_USERNAME= # DSPACE_API_PASSWORD= # USER_AGENT= -URL = 'http://localhost:8080/server/api' -USERNAME = 'username@test.system.edu' -PASSWORD = 'password' +URL = "http://localhost:8080/server/api" +USERNAME = "username@test.system.edu" +PASSWORD = "password" # Instantiate DSpace client -# Note the 'fake_user_agent' setting here -- this will set a string like the following, to get by Cloudfront: -# Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36 -# The default is to *not* fake the user agent, and instead use the default of DSpace-Python-REST-Client/x.y.z. -# To specify a custom user agent, set the USER_AGENT env variable and leave/set fake_user_agent as False -d = DSpaceClient(api_endpoint=URL, username=USERNAME, password=PASSWORD, fake_user_agent=True) +# Note the 'fake_user_agent' setting here -- this will set a string like the following, +# to get by Cloudfront: +# Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) \ +# Chrome/39.0.2171.95 Safari/537.36 +# The default is to *not* fake the user agent, and instead use the default of +# DSpace-Python-REST-Client/x.y.z. +# To specify a custom user agent, set the USER_AGENT env variable and leave/set +# fake_user_agent as False +d = DSpaceClient( + api_endpoint=URL, username=USERNAME, password=PASSWORD, fake_user_agent=True +) # Authenticate against the DSpace client authenticated = d.authenticate() if not authenticated: - print('Error logging in! Giving up.') + print("Error logging in! Giving up.") sys.exit(1) # Retrieving objects - now that we know there is some data in the repository we can demonstrate # some simple ways of retrieving and iterating DSOs -print('\nBeginning examples of get, search methods\n') +print("\nBeginning examples of get, search methods\n") # Get top communities top_communities = d.get_communities(top=True) for top_community in top_communities: - print(f'{top_community.name} ({top_community.uuid})') + print(f"{top_community.name} ({top_community.uuid})") # Get all collections in this community collections = d.get_collections(community=top_community) for collection in collections: - print(f'{collection.name} ({collection.uuid}') - # Get all items in this collection - see that the recommended method is a search, scoped to this collection - # (there is no collection/items endpoint, though there is a /mappedItems endpoint, not yet implemented here) - items = d.search_objects(query='*:*', scope=collection.uuid, dso_type='item') + print(f"{collection.name} ({collection.uuid}") + # Get all items in this collection - see that the recommended method is a search, + # scoped to this collection (there is no collection/items endpoint, though there is + # a /mappedItems endpoint, not yet implemented here) + items = d.search_objects(query="*:*", scope=collection.uuid, dso_type="item") for item in items: - print(f'{item.name} ({item.uuid})') + print(f"{item.name} ({item.uuid})") # Get all bundles in this item bundles = d.get_bundles(parent=item) for bundle in bundles: - print(f'{bundle.name} ({bundle.uuid}') + print(f"{bundle.name} ({bundle.uuid}") # Get all bitstreams in this bundle bitstreams = d.get_bitstreams(bundle=bundle) for bitstream in bitstreams: - print(f'{bitstream.name} ({bitstream.uuid}') + print(f"{bitstream.name} ({bitstream.uuid}") # Download this bitstream r = d.download_bitstream(bitstream.uuid) - print(f'\tHeaders (server info, not calculated locally)\n\tmd5: {r.headers.get("ETag")}\n' - f'\tformat: {r.headers.get("Content-Type")}\n\tlength: {r.headers.get("Content-Length")}\n' - f'\tLOCAL LEN(): {len(r.content)}') - # Uncomment the below to get the binary data in content and then do something with it like - # print, or write to file, etc. You want to use the 'content' property of the response object + print( + '\tHeaders (server info, not calculated locally)\n' + f'\tmd5: {r.headers.get("ETag")}\n' + f'\tformat: {r.headers.get("Content-Type")}\n' + f'\tlength: {r.headers.get("Content-Length")}\n' + f'\tLOCAL LEN(): {len(r.content)}' + ) + # Uncomment the below to get the binary data in content and then do + # something with it like print, or write to file, etc. You want to use + # the 'content' property of the response object # # print(r.content) diff --git a/setup.py b/setup.py index 127e951..6d71f2a 100644 --- a/setup.py +++ b/setup.py @@ -15,9 +15,13 @@ long_description_content_type="text/markdown", url="https://github.com/the-library-code/dspace-rest-client", project_urls={ - 'Documentation': 'https://github.com/the-library-code/dspace-rest-python/blob/main/README.md', + 'Documentation': ( + 'https://github.com/the-library-code/dspace-rest-python/blob/main/README.md' + ), 'GitHub': 'https://github.com/the-library-code/dspace-rest-python', - 'Changelog': 'https://github.com/the-library-code/dspace-rest-python/blob/main/CHANGELOG.md', + 'Changelog': ( + 'https://github.com/the-library-code/dspace-rest-python/blob/main/CHANGELOG.md' + ), }, classifiers=[ "Programming Language :: Python :: 3.8", From 275dfb3f03ba4932052e8ce0a444094bb8d3b016 Mon Sep 17 00:00:00 2001 From: Stefan Szepe Date: Thu, 28 Nov 2024 13:31:10 +0100 Subject: [PATCH 11/21] use sys.exit instead of exit --- example.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/example.py b/example.py index 154bd2a..f9d9fef 100644 --- a/example.py +++ b/example.py @@ -166,7 +166,7 @@ print(f'New bundle created! UUID: {new_bundle.uuid}') else: print('Error! Giving up.') - exit(1) + sys.exit(1) # Create and upload a new bitstream using the LICENSE.txt file in this project # Set bitstream metadata From 304836070c22da39dd310e521707da9fb2fa4360 Mon Sep 17 00:00:00 2001 From: Stefan Szepe Date: Thu, 28 Nov 2024 13:39:23 +0100 Subject: [PATCH 12/21] fix C0301: Line too long --- dspace_rest_client/client.py | 43 +++++++++++++++++++++--------------- 1 file changed, 25 insertions(+), 18 deletions(-) diff --git a/dspace_rest_client/client.py b/dspace_rest_client/client.py index e3dba06..cdacc4e 100644 --- a/dspace_rest_client/client.py +++ b/dspace_rest_client/client.py @@ -3,14 +3,16 @@ # and described in the LICENSE.txt file in the root of this project """ -DSpace REST API client library. Intended to make interacting with DSpace in Python 3 easier, particularly -when creating, updating, retrieving and deleting DSpace Objects. -This client library is a work in progress and currently only implements the most basic functionality. -It was originally created to assist with a migration of container structure, items and bistreams from a non-DSpace -system to a new DSpace 7 repository. +DSpace REST API client library. Intended to make interacting with DSpace in Python 3 easier, +particularly when creating, updating, retrieving and deleting DSpace Objects. +This client library is a work in progress and currently only implements the most basic +functionality. +It was originally created to assist with a migration of container structure, items and bistreams +from a non-DSpace system to a new DSpace 7 repository. -It needs a lot of expansion: resource policies and permissions, validation of prepared objects and responses, -better abstracting and handling of HAL-like API responses, plus just all the other endpoints and operations implemented. +It needs a lot of expansion: resource policies and permissions, validation of prepared objects +and responses, better abstracting and handling of HAL-like API responses, plus just all the other +endpoints and operations implemented. @author Kim Shepherd """ @@ -48,11 +50,12 @@ def parse_json(response): class DSpaceClient: """ - Main class of the API client itself. This client uses request sessions to connect and authenticate to - the REST API, maintain XSRF tokens, and all GET, POST, PUT, PATCH operations. - Low-level api_get, api_post, api_put, api_delete, api_patch functions are defined to handle the requests and do - retries / XSRF refreshes where necessary. - Higher level get, create, update, partial_update (patch) functions are implemented for each DSO type + Main class of the API client itself. This client uses request sessions to connect and + authenticate to the REST API, maintain XSRF tokens, and all GET, POST, PUT, PATCH operations. + Low-level api_get, api_post, api_put, api_delete, api_patch functions are defined to + handle the requests and do retries / XSRF refreshes where necessary. + Higher level get, create, update, partial_update (patch) functions are implemented + for each DSO type """ # Set up basic environment, variables session = None @@ -87,11 +90,13 @@ class PatchOperation: def paginated(embed_name, item_constructor, embedding=lambda x: x): """ - @param embed_name: The key under '_embedded' in the JSON response that contains the resources to be paginated. - (e.g. 'collections', 'objects', 'items', etc.) + @param embed_name: The key under '_embedded' in the JSON response that contains the + resources to be paginated. (e.g. 'collections', 'objects', 'items', etc.) @param item_constructor: A callable that takes a resource dictionary and returns an item. - @param embedding: Optional post-fetch processing lambda (default: identity function) for each resource - @return: A decorator that, when applied to a method, follows pagination and yields each resource + @param embedding: Optional post-fetch processing lambda (default: identity function) + for each resource + @return: A decorator that, when applied to a method, follows pagination and yields + each resource """ def decorator(fun): @functools.wraps(fun) @@ -120,9 +125,11 @@ def do_paginate(url, params): def __init__(self, api_endpoint=API_ENDPOINT, username=USERNAME, password=PASSWORD, solr_endpoint=SOLR_ENDPOINT, solr_auth=SOLR_AUTH, fake_user_agent=False): """ - Accept optional API endpoint, username, password arguments using the OS environment variables as defaults + Accept optional API endpoint, username, password arguments using the OS environment + variables as defaults :param api_endpoint: base path to DSpace REST API, eg. http://localhost:8080/server/api - :param username: username with appropriate privileges to perform operations on REST API + :param username: username with appropriate privileges to perform operations on + REST API :param password: password for the above username """ self.session = requests.Session() From 8c1b18f22aecd19d45a73ef875dc7a8f5b18d66d Mon Sep 17 00:00:00 2001 From: Stefan Szepe Date: Thu, 28 Nov 2024 13:48:47 +0100 Subject: [PATCH 13/21] fix W1203: Use lazy % formatting in logging functions --- dspace_rest_client/client.py | 84 ++++++++++++++++++------------------ 1 file changed, 42 insertions(+), 42 deletions(-) diff --git a/dspace_rest_client/client.py b/dspace_rest_client/client.py index cdacc4e..7e33e49 100644 --- a/dspace_rest_client/client.py +++ b/dspace_rest_client/client.py @@ -44,7 +44,7 @@ def parse_json(response): try: response_json = response.json() except ValueError as err: - logging.error(f'Error parsing response JSON: {err}. Body text: {response.text}') + logging.error('Error parsing response JSON: %s. Body text: %s', err, response.text) return response_json @@ -170,7 +170,7 @@ def authenticate(self, retry=False): # After speaking in #dev it seems that these do need occasional refreshes but I suspect # it's happening too often for me, so check for accidentally triggering it if retry: - logging.error(f'Too many retries updating token: {r.status_code}: {r.text}') + logging.error('Too many retries updating token: %s: %s', r.status_code, r.text) return False else: logging.debug("Retrying request with updated CSRF token") @@ -179,7 +179,7 @@ def authenticate(self, retry=False): if r.status_code == 401: # 401 Unauthorized # If we get a 401, this means a general authentication failure - logging.error(f'Authentication failure: invalid credentials for user {self.USERNAME}') + logging.error('Authentication failure: invalid credentials for user %s', self.USERNAME) return False # Update headers with new bearer token if present @@ -191,7 +191,7 @@ def authenticate(self, retry=False): if r.status_code == 200: r_json = parse_json(r) if 'authenticated' in r_json and r_json['authenticated'] is True: - logging.info(f'Authenticated successfully as {self.USERNAME}') + logging.info('Authenticated successfully as %s', self.USERNAME) return r_json['authenticated'] # Default, return false @@ -241,7 +241,7 @@ def api_post(self, url, params, json, retry=False): r_json = parse_json(r) if 'message' in r_json and 'CSRF token' in r_json['message']: if retry: - logging.warning(f'Too many retries updating token: {r.status_code}: {r.text}') + logging.warning('Too many retries updating token: %s: %s', r.status_code, r.text) else: logging.debug("Retrying request with updated CSRF token") return self.api_post(url, params=params, json=json, retry=True) @@ -269,7 +269,7 @@ def api_post_uri(self, url, params, uri_list, retry=False): r_json = r.json() if 'message' in r_json and 'CSRF token' in r_json['message']: if retry: - logging.warning(f'Too many retries updating token: {r.status_code}: {r.text}') + logging.warning('Too many retries updating token: %s: %s', r.status_code, r.text) else: logging.debug("Retrying request with updated CSRF token") return self.api_post_uri(url, params=params, uri_list=uri_list, retry=True) @@ -299,7 +299,7 @@ def api_put(self, url, params, json, retry=False): r_json = parse_json(r) if 'message' in r_json and 'CSRF token' in r_json['message']: if retry: - logging.warning(f'Too many retries updating token: {r.status_code}: {r.text}') + logging.warning('Too many retries updating token: %s: %s', r.status_code, r.text) else: logging.debug("Retrying request with updated CSRF token") return self.api_put(url, params=params, json=json, retry=True) @@ -328,7 +328,7 @@ def api_delete(self, url, params, retry=False): r_json = parse_json(r) if 'message' in r_json and 'CSRF token' in r_json['message']: if retry: - logging.warning(f'Too many retries updating token: {r.status_code}: {r.text}') + logging.warning('Too many retries updating token: %s: %s', r.status_code, r.text) else: logging.debug("Retrying request with updated CSRF token") return self.api_delete(url, params=params, retry=True) @@ -346,15 +346,15 @@ def api_patch(self, url, operation, path, value, retry=False): @see https://github.com/DSpace/RestContract/blob/main/metadata-patch.md """ if url is None: - logging.error(f'Missing required URL argument') + logging.error('Missing required URL argument') return None if path is None: - logging.error(f'Need valid path eg. /withdrawn or /metadata/dc.title/0/language') + logging.error('Need valid path eg. /withdrawn or /metadata/dc.title/0/language') return None if (operation == self.PatchOperation.ADD or operation == self.PatchOperation.REPLACE or operation == self.PatchOperation.MOVE) and value is None: # missing value required for add/replace/move operations - logging.error(f'Missing required "value" argument for add/replace/move operations') + logging.error('Missing required "value" argument for add/replace/move operations') return None # compile patch data @@ -382,13 +382,13 @@ def api_patch(self, url, operation, path, value, retry=False): r_json = parse_json(r) if 'message' in r_json and 'CSRF token' in r_json['message']: if retry: - logging.warning(f'Too many retries updating token: {r.status_code}: {r.text}') + logging.warning('Too many retries updating token: %s: %s', r.status_code, r.text) else: logging.debug("Retrying request with updated CSRF token") return self.api_patch(url, operation, path, value, True) elif r.status_code == 200: # 200 Success - logging.info(f'successful patch update to {r.json()["type"]} {r.json()["id"]}') + logging.info('successful patch update to %s %s', r.json()["type"], r.json()["id"]) # Return the raw API response return r @@ -435,7 +435,7 @@ def search_objects(self, query=None, scope=None, filters=None, page=0, size=20, dso = SimpleDSpaceObject(resource) dsos.append(dso) except (TypeError, ValueError) as err: - logging.error(f'error parsing search result json {err}') + logging.error('error parsing search result json %s', err) return dsos @@ -479,7 +479,7 @@ def fetch_resource(self, url, params=None): """ r = self.api_get(url, params, None) if r.status_code != 200: - logging.error(f'Error encountered fetching resource: {r.text}') + logging.error('Error encountered fetching resource: %s', r.text) return None # ValueError / JSON handling moved to static method return parse_json(r) @@ -498,7 +498,7 @@ def get_dso(self, url, uuid): url = f'{url}/{uuid}' return self.api_get(url, None, None) except ValueError: - logging.error(f'Invalid DSO UUID: {uuid}') + logging.error('Invalid DSO UUID: %s', uuid) return None def create_dso(self, url, params, data): @@ -515,9 +515,9 @@ def create_dso(self, url, params, data): if r.status_code == 201: # 201 Created - success! new_dso = parse_json(r) - logging.info(f'{new_dso["type"]} {new_dso["uuid"]} created successfully!') + logging.info('%s %s created successfully!', new_dso["type"], new_dso["uuid"]) else: - logging.error(f'create operation failed: {r.status_code}: {r.text} ({url})') + logging.error('create operation failed: %s: %s (%s)', r.status_code, r.text, url) return r def update_dso(self, dso, params=None): @@ -533,8 +533,8 @@ def update_dso(self, dso, params=None): return None dso_type = type(dso) if not isinstance(dso, SimpleDSpaceObject): - logging.error(f'Only SimpleDSpaceObject types (eg Item, Collection, Community) ' - f'are supported by generic update_dso PUT.') + logging.error('Only SimpleDSpaceObject types (eg Item, Collection, Community) ' + 'are supported by generic update_dso PUT.') return dso try: # Get self URI from HAL links @@ -558,13 +558,13 @@ def update_dso(self, dso, params=None): if r.status_code == 200: # 200 OK - success! updated_dso = dso_type(parse_json(r)) - logging.info(f'{updated_dso.type} {updated_dso.uuid} updated sucessfully!') + logging.info('%s %s updated successfully!', updated_dso.type, updated_dso.uuid) return updated_dso else: - logging.error(f'update operation failed: {r.status_code}: {r.text} ({url})') + logging.error('update operation failed: %s: %s (%s)', r.status_code, r.text, url) return None - except ValueError as e: + except ValueError: logging.error("Error parsing DSO response", exc_info=True) return None @@ -580,12 +580,12 @@ def delete_dso(self, dso=None, url=None, params=None): """ if dso is None: if url is None: - logging.error(f'Need a DSO or a URL to delete') + logging.error('Need a DSO or a URL to delete') return None else: if not isinstance(dso, SimpleDSpaceObject): - logging.error(f'Only SimpleDSpaceObject types (eg Item, Collection, Community, EPerson) ' - f'are supported by generic update_dso PUT.') + logging.error('Only SimpleDSpaceObject types (eg Item, Collection, Community, EPerson) ' + 'are supported by generic update_dso PUT.') return dso # Get self URI from HAL links url = dso.links['self']['href'] @@ -594,13 +594,13 @@ def delete_dso(self, dso=None, url=None, params=None): r = self.api_delete(url, params=params) if r.status_code == 204: # 204 No Content - success! - logging.info(f'{url} was deleted sucessfully!') + logging.info('%s was deleted successfully!', url) return r else: - logging.error(f'update operation failed: {r.status_code}: {r.text} ({url})') + logging.error('update operation failed: %s: %s (%s)', r.status_code, r.text, url) return None except ValueError as e: - logging.error(f'Error deleting DSO {dso.uuid}: {e}') + logging.error('Error deleting DSO %s: %s', dso.uuid, e) return None # PAGINATION @@ -639,7 +639,7 @@ def get_bundles(self, parent=None, uuid=None, page=0, size=20, sort=None): for resource in resources: bundles.append(Bundle(resource)) except ValueError as err: - logging.error(f'error parsing bundle results: {err}') + logging.error('error parsing bundle results: %s', err) return bundles @@ -690,7 +690,7 @@ def get_bitstreams(self, uuid=None, bundle=None, page=0, size=20, sort=None): url = bundle.links['bitstreams']['href'] else: url = f'{self.API_ENDPOINT}/core/bundles/{bundle.uuid}/bitstreams' - logging.warning(f'Cannot find bundle bitstream links, will try to construct manually: {url}') + logging.warning('Cannot find bundle bitstream links, will try to construct manually: %s', url) # Perform the actual request. By now, our URL and parameter should be properly set params = {} if size is not None: @@ -718,7 +718,7 @@ def get_bitstreams_iter(do_paginate, self, bundle, sort=None): url = bundle.links['bitstreams']['href'] else: url = f'{self.API_ENDPOINT}/core/bundles/{bundle.uuid}/bitstreams' - logging.warning(f'Cannot find bundle bitstream links, will try to construct manually: {url}') + logging.warning('Cannot find bundle bitstream links, will try to construct manually: %s', url) params = {} if sort is not None: params['sort'] = sort @@ -759,7 +759,7 @@ def create_bitstream(self, bundle=None, name=None, path=None, mime=None, metadat r = self.session.send(prepared_req) if 'DSPACE-XSRF-TOKEN' in r.headers: t = r.headers['DSPACE-XSRF-TOKEN'] - logging.debug('Updating token to ' + t) + logging.debug('Updating token to %s', t) self.session.headers.update({'X-XSRF-Token': t}) self.session.cookies.update({'X-XSRF-Token': t}) if r.status_code == 403: @@ -775,7 +775,7 @@ def create_bitstream(self, bundle=None, name=None, path=None, mime=None, metadat # Success return Bitstream(api_resource=parse_json(r)) else: - logging.error(f'Error creating bitstream: {r.status_code}: {r.text}') + logging.error('Error creating bitstream: %s: %s', r.status_code, r.text) return None def download_bitstream(self, uuid=None): @@ -816,14 +816,14 @@ def get_communities(self, uuid=None, page=0, size=20, sort=None, top=False): url = f'{url}/{uuid}' params = None except ValueError: - logging.error(f'Invalid community UUID: {uuid}') + logging.error('Invalid community UUID: %s', uuid) return None if top: # Set new URL url = f'{url}/search/top' - logging.debug(f'Performing get on {url}') + logging.debug('Performing get on %s', url) # Perform actual get r_json = self.fetch_resource(url, params) # Empty list @@ -897,7 +897,7 @@ def get_collections(self, uuid=None, community=None, page=0, size=20, sort=None) url = f'{url}/{uuid}' params = None except ValueError: - logging.error(f'Invalid collection UUID: {uuid}') + logging.error('Invalid collection UUID: %s', uuid) return None if community is not None: @@ -964,7 +964,7 @@ def get_item(self, uuid): url = f'{url}/{uuid}' return self.api_get(url, None, None) except ValueError: - logging.error(f'Invalid item UUID: {uuid}') + logging.error('Invalid item UUID: %s', uuid) return None def get_items(self): @@ -1030,11 +1030,11 @@ def create_item_version(self, item_uuid, summary=None): if response.status_code == 201: # 201 Created - Success new_version = parse_json(response) - logging.info(f"Created new version for item {item_uuid}") + logging.info("Created new version for item %s", item_uuid) return new_version else: logging.error( - f"Error creating item version: {response.status_code} {response.text}" + "Error creating item version: %s %s", response.status_code, response.text ) return None @@ -1106,7 +1106,7 @@ def create_user(self, user, token=None): def delete_user(self, user): if not isinstance(user, User): - logging.error(f'Must be a valid user') + logging.error('Must be a valid user') return None return self.delete_dso(user) @@ -1181,7 +1181,7 @@ def update_token(self, r): self.session = requests.Session() if 'DSPACE-XSRF-TOKEN' in r.headers: t = r.headers['DSPACE-XSRF-TOKEN'] - logging.debug(f'Updating XSRF token to {t}') + logging.debug('Updating XSRF token to %s', t) # Update headers and cookies self.session.headers.update({'X-XSRF-Token': t}) self.session.cookies.update({'X-XSRF-Token': t}) From ef1c8995c407cd9de605aa484439d3eca06cde9d Mon Sep 17 00:00:00 2001 From: Stefan Szepe Date: Thu, 28 Nov 2024 13:50:50 +0100 Subject: [PATCH 14/21] import models explicitly instead of wildcard import models explicitly instead of wildcard --- dspace_rest_client/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dspace_rest_client/client.py b/dspace_rest_client/client.py index 7e33e49..847f1b3 100644 --- a/dspace_rest_client/client.py +++ b/dspace_rest_client/client.py @@ -26,7 +26,7 @@ from requests import Request import pysolr -from .models import * +from .models import SimpleDSpaceObject, Community, Collection, Item, Bundle, Bitstream, User, Group, DSpaceObject from . import __version__ __all__ = ['DSpaceClient'] From 8074f5ee1ac7c53c9e51351ab43cd42ad6be8447 Mon Sep 17 00:00:00 2001 From: Stefan Szepe Date: Thu, 28 Nov 2024 14:38:27 +0100 Subject: [PATCH 15/21] add pylint configuration and github workflow --- .github/workflows/pylint.yml | 29 +++++++++++++++++++++++++++++ .pylintrc | 1 + 2 files changed, 30 insertions(+) create mode 100644 .github/workflows/pylint.yml create mode 100644 .pylintrc diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml new file mode 100644 index 0000000..85a3b43 --- /dev/null +++ b/.github/workflows/pylint.yml @@ -0,0 +1,29 @@ +name: Pylint + +on: [push] + +jobs: + build: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] + steps: + - uses: actions/checkout@v4 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + pip install pylint + - name: Analysing the code with pylint + run: | + pylint $(git ls-files '*.py') --ignore-paths=^tests/.*$ --output=lint_${{ matrix.python-version }}.txt || true + - name: Upload Artifact + uses: actions/upload-artifact@v4 + with: + name: lint_${{ matrix.python-version }}.txt + path: lint_${{ matrix.python-version }}.txt \ No newline at end of file diff --git a/.pylintrc b/.pylintrc new file mode 100644 index 0000000..a59eb81 --- /dev/null +++ b/.pylintrc @@ -0,0 +1 @@ +[MAIN] \ No newline at end of file From bba7c8b611241258c94f481abad99eb09e991a73 Mon Sep 17 00:00:00 2001 From: Stefan Szepe Date: Thu, 28 Nov 2024 14:57:24 +0100 Subject: [PATCH 16/21] add pylint requirements --- requirements-pylint.txt | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 requirements-pylint.txt diff --git a/requirements-pylint.txt b/requirements-pylint.txt new file mode 100644 index 0000000..24ca480 --- /dev/null +++ b/requirements-pylint.txt @@ -0,0 +1,8 @@ +astroid==3.3.5 +colorama==0.4.6 +dill==0.3.9 +isort==5.13.2 +mccabe==0.7.0 +platformdirs==4.3.6 +pylint==3.3.1 +tomlkit==0.13.2 \ No newline at end of file From 90f41db02b3c69a8ccbd498a9591a43f5e821d5f Mon Sep 17 00:00:00 2001 From: Stefan Szepe Date: Wed, 4 Dec 2024 09:52:35 +0100 Subject: [PATCH 17/21] handle potential None values --- dspace_rest_client/client.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/dspace_rest_client/client.py b/dspace_rest_client/client.py index 7e0a4e6..e29b3d7 100644 --- a/dspace_rest_client/client.py +++ b/dspace_rest_client/client.py @@ -52,11 +52,15 @@ def parse_json(response): """ response_json = None try: - response_json = response.json() + if response is not None: + response_json = response.json() except ValueError as err: - logging.error( - "Error parsing response JSON: %s. Body text: %s", err, response.text - ) + if response is not None: + logging.error( + "Error parsing response JSON: %s. Body text: %s", err, response.text + ) + else: + logging.error("Error parsing response JSON: %s. Response is None", err) return response_json @@ -817,6 +821,12 @@ def get_bitstreams( if "bitstreams" in bundle.links: url = bundle.links["bitstreams"]["href"] else: + if bundle is None: + logging.error("Bundle cannot be None") + return [] + if bundle is None: + logging.error("Bundle cannot be None") + return [] url = f"{self.API_ENDPOINT}/core/bundles/{bundle.uuid}/bitstreams" logging.warning( "Cannot find bundle bitstream links, will try to construct manually: %s", From 2321e51766f2ef8c49f2bb628d8a2506e0ee06bd Mon Sep 17 00:00:00 2001 From: Kim Shepherd Date: Wed, 6 Nov 2024 14:13:07 +0100 Subject: [PATCH 18/21] Extend search_objects to allow configuration parameter --- example.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/example.py b/example.py index 209ed41..bf7b948 100644 --- a/example.py +++ b/example.py @@ -40,6 +40,12 @@ print('Error logging in! Giving up.') sys.exit(1) +# An example of searching for workflow items (any search configuration from discovery.xml can be used) +# note that the results here depend on the workflow role / access of the logged in user +search_results = d.search_objects(query='*:*', dso_type='item', configuration='workflow') +for result in search_results: + print(f'{result.name} ({result.uuid})') + # Put together some basic Community data. # See https://github.com/DSpace/RestContract/blob/main/communities.md community_data = { From 7fcb83d2019b1e5530b07e0ff80ae241a0b8a38d Mon Sep 17 00:00:00 2001 From: Kim Shepherd Date: Wed, 11 Dec 2024 15:08:38 +0100 Subject: [PATCH 19/21] search_objects accepts configuration param reimplemented after resolving pylint conflicts --- dspace_rest_client/client.py | 8 ++++++++ example.py | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/dspace_rest_client/client.py b/dspace_rest_client/client.py index e29b3d7..50aed30 100644 --- a/dspace_rest_client/client.py +++ b/dspace_rest_client/client.py @@ -487,6 +487,7 @@ def search_objects( size=20, sort=None, dso_type=None, + configuration='default', embeds=None, ): """ @@ -498,6 +499,7 @@ def search_objects( @param size: size of page (aka. 'rows'), affects the page parameter above @param sort: sort eg. 'title,asc' @param dso_type: DSO type to further filter results + @param configuration: Search (discovery) configuration to apply to the query @param embeds: Optional list of embeds to apply to each search object result @return: list of DspaceObject objects constructed from API resources """ @@ -518,6 +520,8 @@ def search_objects( params["page"] = page if sort is not None: params["sort"] = sort + if configuration is not None: + params['configuration'] = configuration r_json = self.fetch_resource(url=url, params={**params, **filters}) @@ -548,6 +552,7 @@ def search_objects_iter( filters=None, dso_type=None, sort=None, + configuration='default', embeds=None, ): """ @@ -557,6 +562,7 @@ def search_objects_iter( @param filters: discovery filters as dict eg. {'f.entityType': 'Publication,equals', ... } @param sort: sort eg. 'title,asc' @param dso_type: DSO type to further filter results + @param configuration: Search (discovery) configuration to apply to the query @param embeds: Optional list of embeds to apply to each search object result @return: Iterator of SimpleDSpaceObject """ @@ -572,6 +578,8 @@ def search_objects_iter( params["dsoType"] = dso_type if sort is not None: params["sort"] = sort + if configuration is not None: + params['configuration'] = configuration return do_paginate(url, {**params, **filters}) diff --git a/example.py b/example.py index bf7b948..b58ecc8 100644 --- a/example.py +++ b/example.py @@ -220,7 +220,7 @@ # scoped to this collection # (there is no collection/items endpoint, though there is a /mappedItems endpoint, # not yet implemented here) - items = d.search_objects(query='*:*', scope=collection.uuid, dso_type='item') + items = d.search_objects(query='*:*', scope=collection.uuid, dso_type='item', configuration='default') for item in items: print(f'{item.name} ({item.uuid})') # Get all bundles in this item From 604609159efd0f6e989881fa2993afaf5b442c1c Mon Sep 17 00:00:00 2001 From: Kim Shepherd Date: Wed, 11 Dec 2024 15:16:36 +0100 Subject: [PATCH 20/21] resolve pid to dspaceobject --- dspace_rest_client/client.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/dspace_rest_client/client.py b/dspace_rest_client/client.py index 50aed30..44db8c3 100644 --- a/dspace_rest_client/client.py +++ b/dspace_rest_client/client.py @@ -1443,6 +1443,15 @@ def get_short_lived_token(self): return None def solr_query(self, query, filters=None, fields=None, start=0, rows=999999999): + """ + Perform raw Solr query + @param query: query string + @param filters: list of filter queries + @param fields: list of fields to return in results + @param start: start doc + @param rows: max docs to return + @return: solr search results + """ if fields is None: fields = [] if filters is None: @@ -1450,3 +1459,22 @@ def solr_query(self, query, filters=None, fields=None, start=0, rows=999999999): return self.solr.search( query, fq=filters, start=start, rows=rows, **{"fl": ",".join(fields)} ) + + def resolve_identifier_to_dso(self, identifier=None): + """ + Resolve a DSO identifier (uuid, handle, DOI, etc.) to a DSO URI + Useful for resolving handles to objects, etc. + @param identifier: a persistent identifier for an object like handle, doi, uuid + @return: resolved DSpaceObject or error + """ + if identifier is not None: + url = f'{self.API_ENDPOINT}/pid/find' + r = self.api_get(url, params={'id': identifier}) + if r.status_code == 200: + r_json = parse_json(r) + if r_json is not None and 'uuid' in r_json: + return DSpaceObject(api_resource=r_json) + elif r.status_code == 404: + logging.error(f"Not found: {identifier}") + else: + logging.error(f"Error resolving identifier {identifier} to DSO: {r.status_code}") From 7e1d7d1c5bf54b9b3bca7149ee6ce8a55b5ce10f Mon Sep 17 00:00:00 2001 From: Kim Shepherd Date: Wed, 11 Dec 2024 15:34:02 +0100 Subject: [PATCH 21/21] Prepare release 0.1.13 --- CHANGELOG.md | 20 ++++++++++++++++++++ dspace_rest_client/__init__.py | 2 +- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9bac2b5..fed238c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,24 @@ # Changelog +### 0.1.13 + +Date: 2024-12-11 + +PyPI release page: https://pypi.org/project/dspace-rest-client/0.1.13/ + +**Changes** + +1. Update requests and pysolr dependencies and improve setup.py (thanks @alanorth) https://github.com/the-library-code/dspace-rest-python/pull/24 +2. Add auto-paginating `get_*_iter` methods for most `get_*` methods (thanks @dpk) https://github.com/the-library-code/dspace-rest-python/pull/27 +3. Improve version number maintenance https://github.com/the-library-code/dspace-rest-python/pull/30 +4. New `create_item_version` method (thanks @soaringjupiter) https://github.com/the-library-code/dspace-rest-python/pull/31 +5. Allow `embed=['...', '...']` parameter in most methods that return objects, to allow embedded HAL resources https://github.com/the-library-code/dspace-rest-python/pull/20 +6. Extend `search_objects[_iter]` to accept a configuration parameter https://github.com/the-library-code/dspace-rest-python/pull/32 +7. Integrate pylint scaffolding (thanks @sszepe and @mdwRepository) https://github.com/the-library-code/dspace-rest-python/pull/37 +8. New `resolve_identifier_to_dso` method https://github.com/the-library-code/dspace-rest-python/pull/39 +9. Small pydoc improvements +10. Added new example usage to `example.py` + ### 0.1.12 Date: 2024-08-06 @@ -10,6 +29,7 @@ PyPI release page: https://pypi.org/project/dspace-rest-client/0.1.12/ 1. Initialise search result objects as `SimpleDSpaceObject` rather than base `DSpaceObject` class (thanks to @JemmaPilcher) 2. Introduce / tidy new `SearchResult` model as work towards https://github.com/the-library-code/dspace-rest-python/issues/17 +3. Fix `get_items` method parameters (thanks @ckubgi) https://github.com/the-library-code/dspace-rest-python/pull/21 ### 0.1.11 diff --git a/dspace_rest_client/__init__.py b/dspace_rest_client/__init__.py index 3d272b5..432c107 100644 --- a/dspace_rest_client/__init__.py +++ b/dspace_rest_client/__init__.py @@ -1,2 +1,2 @@ from . import * -__version__ = '0.1.12' \ No newline at end of file +__version__ = '0.1.13' \ No newline at end of file