diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml new file mode 100644 index 0000000..85a3b43 --- /dev/null +++ b/.github/workflows/pylint.yml @@ -0,0 +1,29 @@ +name: Pylint + +on: [push] + +jobs: + build: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] + steps: + - uses: actions/checkout@v4 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + pip install pylint + - name: Analysing the code with pylint + run: | + pylint $(git ls-files '*.py') --ignore-paths=^tests/.*$ --output=lint_${{ matrix.python-version }}.txt || true + - name: Upload Artifact + uses: actions/upload-artifact@v4 + with: + name: lint_${{ matrix.python-version }}.txt + path: lint_${{ matrix.python-version }}.txt \ No newline at end of file diff --git a/.pylintrc b/.pylintrc new file mode 100644 index 0000000..a59eb81 --- /dev/null +++ b/.pylintrc @@ -0,0 +1 @@ +[MAIN] \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index 9bac2b5..fed238c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,24 @@ # Changelog +### 0.1.13 + +Date: 2024-12-11 + +PyPI release page: https://pypi.org/project/dspace-rest-client/0.1.13/ + +**Changes** + +1. Update requests and pysolr dependencies and improve setup.py (thanks @alanorth) https://github.com/the-library-code/dspace-rest-python/pull/24 +2. Add auto-paginating `get_*_iter` methods for most `get_*` methods (thanks @dpk) https://github.com/the-library-code/dspace-rest-python/pull/27 +3. Improve version number maintenance https://github.com/the-library-code/dspace-rest-python/pull/30 +4. New `create_item_version` method (thanks @soaringjupiter) https://github.com/the-library-code/dspace-rest-python/pull/31 +5. Allow `embed=['...', '...']` parameter in most methods that return objects, to allow embedded HAL resources https://github.com/the-library-code/dspace-rest-python/pull/20 +6. Extend `search_objects[_iter]` to accept a configuration parameter https://github.com/the-library-code/dspace-rest-python/pull/32 +7. Integrate pylint scaffolding (thanks @sszepe and @mdwRepository) https://github.com/the-library-code/dspace-rest-python/pull/37 +8. New `resolve_identifier_to_dso` method https://github.com/the-library-code/dspace-rest-python/pull/39 +9. Small pydoc improvements +10. Added new example usage to `example.py` + ### 0.1.12 Date: 2024-08-06 @@ -10,6 +29,7 @@ PyPI release page: https://pypi.org/project/dspace-rest-client/0.1.12/ 1. Initialise search result objects as `SimpleDSpaceObject` rather than base `DSpaceObject` class (thanks to @JemmaPilcher) 2. Introduce / tidy new `SearchResult` model as work towards https://github.com/the-library-code/dspace-rest-python/issues/17 +3. Fix `get_items` method parameters (thanks @ckubgi) https://github.com/the-library-code/dspace-rest-python/pull/21 ### 0.1.11 diff --git a/console.py b/console.py index 69f26a7..9993d3d 100644 --- a/console.py +++ b/console.py @@ -1,28 +1,27 @@ +import code +import os +import sys + from dspace_rest_client.client import DSpaceClient # Import models as needed #from dspace_rest_client.models import Community, Collection, Item, Bundle, Bitstream -import code -import os -# The DSpace client will look for the same environment variables, but we can also look for them here explicitly -# and as an example -url = 'http://localhost:8080/server/api' -if 'DSPACE_API_ENDPOINT' in os.environ: - url = os.environ['DSPACE_API_ENDPOINT'] -username = 'username@test.system.edu' -if 'DSPACE_API_USERNAME' in os.environ: - username = os.environ['DSPACE_API_USERNAME'] -password = 'password' -if 'DSPACE_API_PASSWORD' in os.environ: - password = os.environ['DSPACE_API_PASSWORD'] +DEFAULT_URL = 'http://localhost:8080/server/api' +DEFAULT_USERNAME = 'username@test.system.edu' +DEFAULT_PASSWORD = 'password' + +# Configuration from environment variables +URL = os.environ.get('DSPACE_API_ENDPOINT', DEFAULT_URL) +USERNAME = os.environ.get('DSPACE_API_USERNAME', DEFAULT_USERNAME) +PASSWORD = os.environ.get('DSPACE_API_PASSWORD', DEFAULT_PASSWORD) # Instantiate DSpace client -d = DSpaceClient(api_endpoint=url, username=username, password=password) +d = DSpaceClient(api_endpoint=URL, username=USERNAME, password=PASSWORD) # Authenticate against the DSpace client authenticated = d.authenticate() if not authenticated: - print(f'Error logging in! Giving up.') - exit(1) + print('Error logging in! Giving up.') + sys.exit(1) code.interact(local=locals()) diff --git a/dspace_rest_client/__init__.py b/dspace_rest_client/__init__.py index 3d272b5..432c107 100644 --- a/dspace_rest_client/__init__.py +++ b/dspace_rest_client/__init__.py @@ -1,2 +1,2 @@ from . import * -__version__ = '0.1.12' \ No newline at end of file +__version__ = '0.1.13' \ No newline at end of file diff --git a/dspace_rest_client/client.py b/dspace_rest_client/client.py index fb88871..44db8c3 100644 --- a/dspace_rest_client/client.py +++ b/dspace_rest_client/client.py @@ -3,32 +3,45 @@ # and described in the LICENSE.txt file in the root of this project """ -DSpace REST API client library. Intended to make interacting with DSpace in Python 3 easier, particularly -when creating, updating, retrieving and deleting DSpace Objects. -This client library is a work in progress and currently only implements the most basic functionality. -It was originally created to assist with a migration of container structure, items and bistreams from a non-DSpace -system to a new DSpace 7 repository. +DSpace REST API client library. Intended to make interacting with DSpace in Python 3 easier, +particularly when creating, updating, retrieving and deleting DSpace Objects. +This client library is a work in progress and currently only implements the most basic +functionality. +It was originally created to assist with a migration of container structure, items and bistreams +from a non-DSpace system to a new DSpace 7 repository. -It needs a lot of expansion: resource policies and permissions, validation of prepared objects and responses, -better abstracting and handling of HAL-like API responses, plus just all the other endpoints and operations implemented. +It needs a lot of expansion: resource policies and permissions, validation of prepared objects +and responses, better abstracting and handling of HAL-like API responses, plus just all the other +endpoints and operations implemented. @author Kim Shepherd """ import json import logging import functools +import os +from uuid import UUID import requests from requests import Request import pysolr -import os -from uuid import UUID -from .models import * + +from .models import ( + SimpleDSpaceObject, + Community, + Collection, + Item, + Bundle, + Bitstream, + User, + Group, + DSpaceObject, +) from . import __version__ -__all__ = ['DSpaceClient'] +__all__ = ["DSpaceClient"] -logging.basicConfig(format='%(asctime)s - %(message)s', level=logging.INFO) +logging.basicConfig(format="%(asctime)s - %(message)s", level=logging.INFO) def parse_json(response): @@ -39,82 +52,94 @@ def parse_json(response): """ response_json = None try: - response_json = response.json() + if response is not None: + response_json = response.json() except ValueError as err: - logging.error(f'Error parsing response JSON: {err}. Body text: {response.text}') + if response is not None: + logging.error( + "Error parsing response JSON: %s. Body text: %s", err, response.text + ) + else: + logging.error("Error parsing response JSON: %s. Response is None", err) return response_json + def parse_params(params=None, embeds=None): if params is None: params = {} if embeds is None: embeds = [] if len(embeds) > 0: - params['embed'] = ','.join(embeds) + params["embed"] = ",".join(embeds) return params class DSpaceClient: """ - Main class of the API client itself. This client uses request sessions to connect and authenticate to - the REST API, maintain XSRF tokens, and all GET, POST, PUT, PATCH operations. - Low-level api_get, api_post, api_put, api_delete, api_patch functions are defined to handle the requests and do - retries / XSRF refreshes where necessary. - Higher level get, create, update, partial_update (patch) functions are implemented for each DSO type + Main class of the API client itself. This client uses request sessions to connect and + authenticate to the REST API, maintain XSRF tokens, and all GET, POST, PUT, PATCH operations. + Low-level api_get, api_post, api_put, api_delete, api_patch functions are defined to + handle the requests and do retries / XSRF refreshes where necessary. + Higher level get, create, update, partial_update (patch) functions are implemented + for each DSO type """ + # Set up basic environment, variables session = None - API_ENDPOINT = 'http://localhost:8080/server/api' - SOLR_ENDPOINT = 'http://localhost:8983/solr' + API_ENDPOINT = "http://localhost:8080/server/api" + SOLR_ENDPOINT = "http://localhost:8983/solr" SOLR_AUTH = None - USER_AGENT = f'DSpace-Python-REST-Client/{__version__}' - if 'DSPACE_API_ENDPOINT' in os.environ: - API_ENDPOINT = os.environ['DSPACE_API_ENDPOINT'] - LOGIN_URL = f'{API_ENDPOINT}/authn/login' - USERNAME = 'username@test.system.edu' - if 'DSPACE_API_USERNAME' in os.environ: - USERNAME = os.environ['DSPACE_API_USERNAME'] - PASSWORD = 'password' - if 'DSPACE_API_PASSWORD' in os.environ: - PASSWORD = os.environ['DSPACE_API_PASSWORD'] - if 'SOLR_ENDPOINT' in os.environ: - SOLR_ENDPOINT = os.environ['SOLR_ENDPOINT'] - if 'SOLR_AUTH' in os.environ: - SOLR_AUTH = os.environ['SOLR_AUTH'] - if 'USER_AGENT' in os.environ: - USER_AGENT = os.environ['USER_AGENT'] + USER_AGENT = f"DSpace-Python-REST-Client/{__version__}" + if "DSPACE_API_ENDPOINT" in os.environ: + API_ENDPOINT = os.environ["DSPACE_API_ENDPOINT"] + LOGIN_URL = f"{API_ENDPOINT}/authn/login" + USERNAME = "username@test.system.edu" + if "DSPACE_API_USERNAME" in os.environ: + USERNAME = os.environ["DSPACE_API_USERNAME"] + PASSWORD = "password" + if "DSPACE_API_PASSWORD" in os.environ: + PASSWORD = os.environ["DSPACE_API_PASSWORD"] + if "SOLR_ENDPOINT" in os.environ: + SOLR_ENDPOINT = os.environ["SOLR_ENDPOINT"] + if "SOLR_AUTH" in os.environ: + SOLR_AUTH = os.environ["SOLR_AUTH"] + if "USER_AGENT" in os.environ: + USER_AGENT = os.environ["USER_AGENT"] verbose = False ITER_PAGE_SIZE = 20 # Simple enum for patch operation types class PatchOperation: - ADD = 'add' - REMOVE = 'remove' - REPLACE = 'replace' - MOVE = 'move' + ADD = "add" + REMOVE = "remove" + REPLACE = "replace" + MOVE = "move" def paginated(embed_name, item_constructor, embedding=lambda x: x): """ - @param embed_name: The key under '_embedded' in the JSON response that contains the resources to be paginated. - (e.g. 'collections', 'objects', 'items', etc.) + @param embed_name: The key under '_embedded' in the JSON response that contains the + resources to be paginated. (e.g. 'collections', 'objects', 'items', etc.) @param item_constructor: A callable that takes a resource dictionary and returns an item. - @param embedding: Optional post-fetch processing lambda (default: identity function) for each resource - @return: A decorator that, when applied to a method, follows pagination and yields each resource + @param embedding: Optional post-fetch processing lambda (default: identity function) + for each resource + @return: A decorator that, when applied to a method, follows pagination and yields + each resource """ + def decorator(fun): @functools.wraps(fun) def decorated(self, *args, **kwargs): def do_paginate(url, params): - params['size'] = self.ITER_PAGE_SIZE + params["size"] = self.ITER_PAGE_SIZE while url is not None: r_json = embedding(self.fetch_resource(url, params)) - for resource in r_json.get('_embedded', {}).get(embed_name, []): + for resource in r_json.get("_embedded", {}).get(embed_name, []): yield item_constructor(resource) - if 'next' in r_json.get('_links', {}): - url = r_json['_links']['next']['href'] + if "next" in r_json.get("_links", {}): + url = r_json["_links"]["next"]["href"] # assume the ‘next’ link contains all the # params needed for the correct next page: params = {} @@ -122,37 +147,57 @@ def do_paginate(url, params): url = None return fun(do_paginate, self, *args, **kwargs) + return decorated return decorator - def __init__(self, api_endpoint=API_ENDPOINT, username=USERNAME, password=PASSWORD, solr_endpoint=SOLR_ENDPOINT, - solr_auth=SOLR_AUTH, fake_user_agent=False): - """ - Accept optional API endpoint, username, password arguments using the OS environment variables as defaults + def __init__( + self, + api_endpoint=API_ENDPOINT, + username=USERNAME, + password=PASSWORD, + solr_endpoint=SOLR_ENDPOINT, + solr_auth=SOLR_AUTH, + fake_user_agent=False, + ): + """ + Accept optional API endpoint, username, password arguments using the OS environment + variables as defaults :param api_endpoint: base path to DSpace REST API, eg. http://localhost:8080/server/api - :param username: username with appropriate privileges to perform operations on REST API + :param username: username with appropriate privileges to perform operations on + REST API :param password: password for the above username """ self.session = requests.Session() self.API_ENDPOINT = api_endpoint - self.LOGIN_URL = f'{self.API_ENDPOINT}/authn/login' + self.LOGIN_URL = f"{self.API_ENDPOINT}/authn/login" self.USERNAME = username self.PASSWORD = password self.SOLR_ENDPOINT = solr_endpoint - self.solr = pysolr.Solr(url=solr_endpoint, always_commit=True, timeout=300, auth=solr_auth) + self.solr = pysolr.Solr( + url=solr_endpoint, always_commit=True, timeout=300, auth=solr_auth + ) # If fake_user_agent was specified, use this string that is known (as of 2023-12-03) to succeed with # requests to Cloudfront-protected API endpoints (tested on demo.dspace.org) # Otherwise, the user agent will be the more helpful and accurate default of 'DSpace Python REST Client' # To override the user agent to your own string, instead set the USER_AGENT environment variable first # eg `export USER_AGENT="My Custom Agent String / 1.0`, and don't specify a value for fake_user_agent if fake_user_agent: - self.USER_AGENT = 'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) ' \ - 'Chrome/39.0.2171.95 Safari/537.36' + self.USER_AGENT = ( + "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) " + "Chrome/39.0.2171.95 Safari/537.36" + ) # Set headers based on this - self.auth_request_headers = {'User-Agent': self.USER_AGENT} - self.request_headers = {'Content-type': 'application/json', 'User-Agent': self.USER_AGENT} - self.list_request_headers = {'Content-type': 'text/uri-list', 'User-Agent': self.USER_AGENT} + self.auth_request_headers = {"User-Agent": self.USER_AGENT} + self.request_headers = { + "Content-type": "application/json", + "User-Agent": self.USER_AGENT, + } + self.list_request_headers = { + "Content-type": "text/uri-list", + "User-Agent": self.USER_AGENT, + } def authenticate(self, retry=False): """ @@ -162,8 +207,11 @@ def authenticate(self, retry=False): """ # Set headers for requests made during authentication # Get and update CSRF token - r = self.session.post(self.LOGIN_URL, data={'user': self.USERNAME, 'password': self.PASSWORD}, - headers=self.auth_request_headers) + r = self.session.post( + self.LOGIN_URL, + data={"user": self.USERNAME, "password": self.PASSWORD}, + headers=self.auth_request_headers, + ) self.update_token(r) if r.status_code == 403: @@ -172,7 +220,9 @@ def authenticate(self, retry=False): # After speaking in #dev it seems that these do need occasional refreshes but I suspect # it's happening too often for me, so check for accidentally triggering it if retry: - logging.error(f'Too many retries updating token: {r.status_code}: {r.text}') + logging.error( + "Too many retries updating token: %s: %s", r.status_code, r.text + ) return False else: logging.debug("Retrying request with updated CSRF token") @@ -181,20 +231,26 @@ def authenticate(self, retry=False): if r.status_code == 401: # 401 Unauthorized # If we get a 401, this means a general authentication failure - logging.error(f'Authentication failure: invalid credentials for user {self.USERNAME}') + logging.error( + "Authentication failure: invalid credentials for user %s", self.USERNAME + ) return False # Update headers with new bearer token if present - if 'Authorization' in r.headers: - self.session.headers.update({'Authorization': r.headers.get('Authorization')}) + if "Authorization" in r.headers: + self.session.headers.update( + {"Authorization": r.headers.get("Authorization")} + ) # Get and check authentication status - r = self.session.get(f'{self.API_ENDPOINT}/authn/status', headers=self.request_headers) + r = self.session.get( + f"{self.API_ENDPOINT}/authn/status", headers=self.request_headers + ) if r.status_code == 200: r_json = parse_json(r) - if 'authenticated' in r_json and r_json['authenticated'] is True: - logging.info(f'Authenticated successfully as {self.USERNAME}') - return r_json['authenticated'] + if "authenticated" in r_json and r_json["authenticated"] is True: + logging.info("Authenticated successfully as %s", self.USERNAME) + return r_json["authenticated"] # Default, return false return False @@ -232,7 +288,9 @@ def api_post(self, url, params, json, retry=False): @param retry: Has this method already been retried? Used if we need to refresh XSRF. @return: Response from API """ - r = self.session.post(url, json=json, params=params, headers=self.request_headers) + r = self.session.post( + url, json=json, params=params, headers=self.request_headers + ) self.update_token(r) if r.status_code == 403: @@ -241,9 +299,11 @@ def api_post(self, url, params, json, retry=False): # After speaking in #dev it seems that these do need occasional refreshes but I suspect # it's happening too often for me, so check for accidentally triggering it r_json = parse_json(r) - if 'message' in r_json and 'CSRF token' in r_json['message']: + if "message" in r_json and "CSRF token" in r_json["message"]: if retry: - logging.warning(f'Too many retries updating token: {r.status_code}: {r.text}') + logging.warning( + "Too many retries updating token: %s: %s", r.status_code, r.text + ) else: logging.debug("Retrying request with updated CSRF token") return self.api_post(url, params=params, json=json, retry=True) @@ -260,7 +320,9 @@ def api_post_uri(self, url, params, uri_list, retry=False): @param retry: Has this method already been retried? Used if we need to refresh XSRF. @return: Response from API """ - r = self.session.post(url, data=uri_list, params=params, headers=self.list_request_headers) + r = self.session.post( + url, data=uri_list, params=params, headers=self.list_request_headers + ) self.update_token(r) if r.status_code == 403: @@ -269,12 +331,16 @@ def api_post_uri(self, url, params, uri_list, retry=False): # After speaking in #dev it seems that these do need occasional refreshes but I suspect # it's happening too often for me, so check for accidentally triggering it r_json = r.json() - if 'message' in r_json and 'CSRF token' in r_json['message']: + if "message" in r_json and "CSRF token" in r_json["message"]: if retry: - logging.warning(f'Too many retries updating token: {r.status_code}: {r.text}') + logging.warning( + "Too many retries updating token: %s: %s", r.status_code, r.text + ) else: logging.debug("Retrying request with updated CSRF token") - return self.api_post_uri(url, params=params, uri_list=uri_list, retry=True) + return self.api_post_uri( + url, params=params, uri_list=uri_list, retry=True + ) return r @@ -288,7 +354,9 @@ def api_put(self, url, params, json, retry=False): @param retry: Has this method already been retried? Used if we need to refresh XSRF. @return: Response from API """ - r = self.session.put(url, params=params, json=json, headers=self.request_headers) + r = self.session.put( + url, params=params, json=json, headers=self.request_headers + ) self.update_token(r) if r.status_code == 403: @@ -299,9 +367,11 @@ def api_put(self, url, params, json, retry=False): logging.debug(r.text) # Parse response r_json = parse_json(r) - if 'message' in r_json and 'CSRF token' in r_json['message']: + if "message" in r_json and "CSRF token" in r_json["message"]: if retry: - logging.warning(f'Too many retries updating token: {r.status_code}: {r.text}') + logging.warning( + "Too many retries updating token: %s: %s", r.status_code, r.text + ) else: logging.debug("Retrying request with updated CSRF token") return self.api_put(url, params=params, json=json, retry=True) @@ -328,9 +398,11 @@ def api_delete(self, url, params, retry=False): logging.debug(r.text) # Parse response r_json = parse_json(r) - if 'message' in r_json and 'CSRF token' in r_json['message']: + if "message" in r_json and "CSRF token" in r_json["message"]: if retry: - logging.warning(f'Too many retries updating token: {r.status_code}: {r.text}') + logging.warning( + "Too many retries updating token: %s: %s", r.status_code, r.text + ) else: logging.debug("Retrying request with updated CSRF token") return self.api_delete(url, params=params, retry=True) @@ -348,22 +420,26 @@ def api_patch(self, url, operation, path, value, params=None, retry=False): @see https://github.com/DSpace/RestContract/blob/main/metadata-patch.md """ if url is None: - logging.error(f'Missing required URL argument') + logging.error("Missing required URL argument") return None if path is None: - logging.error(f'Need valid path eg. /withdrawn or /metadata/dc.title/0/language') + logging.error( + "Need valid path eg. /withdrawn or /metadata/dc.title/0/language" + ) return None - if (operation == self.PatchOperation.ADD or operation == self.PatchOperation.REPLACE - or operation == self.PatchOperation.MOVE) and value is None: + if ( + operation == self.PatchOperation.ADD + or operation == self.PatchOperation.REPLACE + or operation == self.PatchOperation.MOVE + ) and value is None: # missing value required for add/replace/move operations - logging.error(f'Missing required "value" argument for add/replace/move operations') + logging.error( + 'Missing required "value" argument for add/replace/move operations' + ) return None # compile patch data - data = { - "op": operation, - "path": path - } + data = {"op": operation, "path": path} if value is not None: if operation == self.PatchOperation.MOVE: data["from"] = value @@ -372,7 +448,9 @@ def api_patch(self, url, operation, path, value, params=None, retry=False): # set headers # perform patch request - r = self.session.patch(url, json=[data], headers=self.request_headers, params=params) + r = self.session.patch( + url, json=[data], headers=self.request_headers, params=params + ) self.update_token(r) if r.status_code == 403: @@ -382,21 +460,36 @@ def api_patch(self, url, operation, path, value, params=None, retry=False): # it's happening too often for me, so check for accidentally triggering it logging.debug(r.text) r_json = parse_json(r) - if 'message' in r_json and 'CSRF token' in r_json['message']: + if "message" in r_json and "CSRF token" in r_json["message"]: if retry: - logging.warning(f'Too many retries updating token: {r.status_code}: {r.text}') + logging.warning( + "Too many retries updating token: %s: %s", r.status_code, r.text + ) else: logging.debug("Retrying request with updated CSRF token") return self.api_patch(url, operation, path, value, params, True) elif r.status_code == 200: # 200 Success - logging.info(f'successful patch update to {r.json()["type"]} {r.json()["id"]}') + logging.info( + "successful patch update to %s %s", r.json()["type"], r.json()["id"] + ) # Return the raw API response return r # PAGINATION - def search_objects(self, query=None, scope=None, filters=None, page=0, size=20, sort=None, dso_type=None, embeds=None): + def search_objects( + self, + query=None, + scope=None, + filters=None, + page=0, + size=20, + sort=None, + dso_type=None, + configuration='default', + embeds=None, + ): """ Do a basic search with optional query, filters and dsoType params. @param query: query string @@ -406,47 +499,62 @@ def search_objects(self, query=None, scope=None, filters=None, page=0, size=20, @param size: size of page (aka. 'rows'), affects the page parameter above @param sort: sort eg. 'title,asc' @param dso_type: DSO type to further filter results + @param configuration: Search (discovery) configuration to apply to the query @param embeds: Optional list of embeds to apply to each search object result @return: list of DspaceObject objects constructed from API resources """ dsos = [] if filters is None: filters = {} - url = f'{self.API_ENDPOINT}/discover/search/objects' + url = f"{self.API_ENDPOINT}/discover/search/objects" params = parse_params(embeds=embeds) if query is not None: - params['query'] = query + params["query"] = query if scope is not None: - params['scope'] = scope + params["scope"] = scope if dso_type is not None: - params['dsoType'] = dso_type + params["dsoType"] = dso_type if size is not None: - params['size'] = size + params["size"] = size if page is not None: - params['page'] = page + params["page"] = page if sort is not None: - params['sort'] = sort + params["sort"] = sort + if configuration is not None: + params['configuration'] = configuration r_json = self.fetch_resource(url=url, params={**params, **filters}) # instead lots of 'does this key exist, etc etc' checks, just go for it and wrap in a try? try: - results = r_json['_embedded']['searchResult']['_embedded']['objects'] + results = r_json["_embedded"]["searchResult"]["_embedded"]["objects"] for result in results: - resource = result['_embedded']['indexableObject'] + resource = result["_embedded"]["indexableObject"] dso = SimpleDSpaceObject(resource) dsos.append(dso) except (TypeError, ValueError) as err: - logging.error(f'error parsing search result json {err}') + logging.error("error parsing search result json %s", err) return dsos @paginated( - embed_name='objects', - item_constructor=lambda x: SimpleDSpaceObject(x['_embedded']['indexableObject']), - embedding=lambda x: x['_embedded']['searchResult'] + embed_name="objects", + item_constructor=lambda x: SimpleDSpaceObject( + x["_embedded"]["indexableObject"] + ), + embedding=lambda x: x["_embedded"]["searchResult"], ) - def search_objects_iter(do_paginate, self, query=None, scope=None, filters=None, dso_type=None, sort=None, embeds=None): + def search_objects_iter( + do_paginate, + self, + query=None, + scope=None, + filters=None, + dso_type=None, + sort=None, + configuration='default', + embeds=None, + ): """ Do a basic search as in search_objects, automatically handling pagination by requesting the next page when all items from one page have been consumed @param query: query string @@ -454,21 +562,24 @@ def search_objects_iter(do_paginate, self, query=None, scope=None, filters=None, @param filters: discovery filters as dict eg. {'f.entityType': 'Publication,equals', ... } @param sort: sort eg. 'title,asc' @param dso_type: DSO type to further filter results + @param configuration: Search (discovery) configuration to apply to the query @param embeds: Optional list of embeds to apply to each search object result @return: Iterator of SimpleDSpaceObject """ if filters is None: filters = {} - url = f'{self.API_ENDPOINT}/discover/search/objects' + url = f"{self.API_ENDPOINT}/discover/search/objects" params = parse_params(embeds=embeds) if query is not None: - params['query'] = query + params["query"] = query if scope is not None: - params['scope'] = scope + params["scope"] = scope if dso_type is not None: - params['dsoType'] = dso_type + params["dsoType"] = dso_type if sort is not None: - params['sort'] = sort + params["sort"] = sort + if configuration is not None: + params['configuration'] = configuration return do_paginate(url, {**params, **filters}) @@ -482,7 +593,7 @@ def fetch_resource(self, url, params=None): """ r = self.api_get(url, params, None) if r.status_code != 200: - logging.error(f'Error encountered fetching resource: {r.text}') + logging.error("Error encountered fetching resource: %s", r.text) return None # ValueError / JSON handling moved to static method return parse_json(r) @@ -500,15 +611,13 @@ def get_dso(self, url, uuid, params=None, embeds=None): try: # Try to get UUID version to test validity id = UUID(uuid).version - url = f'{url}/{uuid}' - params=parse_params(params, embeds=embeds) + url = f"{url}/{uuid}" + params = parse_params(params, embeds=embeds) return self.api_get(url, params, None) except ValueError: - logging.error(f'Invalid DSO UUID: {uuid}') + logging.error("Invalid DSO UUID: %s", uuid) return None - - def create_dso(self, url, params, data, embeds=None): """ Base 'create DSpace Object' function. @@ -524,9 +633,13 @@ def create_dso(self, url, params, data, embeds=None): if r.status_code == 201: # 201 Created - success! new_dso = parse_json(r) - logging.info(f'{new_dso["type"]} {new_dso["uuid"]} created successfully!') + logging.info( + "%s %s created successfully!", new_dso["type"], new_dso["uuid"] + ) else: - logging.error(f'create operation failed: {r.status_code}: {r.text} ({url})') + logging.error( + "create operation failed: %s: %s (%s)", r.status_code, r.text, url + ) return r def update_dso(self, dso, params=None, embeds=None): @@ -542,16 +655,18 @@ def update_dso(self, dso, params=None, embeds=None): return None dso_type = type(dso) if not isinstance(dso, SimpleDSpaceObject): - logging.error(f'Only SimpleDSpaceObject types (eg Item, Collection, Community) ' - f'are supported by generic update_dso PUT.') + logging.error( + "Only SimpleDSpaceObject types (eg Item, Collection, Community) " + "are supported by generic update_dso PUT." + ) return dso try: # Get self URI from HAL links - url = dso.links['self']['href'] + url = dso.links["self"]["href"] # Get and clean data - there are some unalterable fields that could cause errors data = dso.as_dict() - if 'lastModified' in data: - data.pop('lastModified') + if "lastModified" in data: + data.pop("lastModified") # Parse parameters params = parse_params(params, embeds) @@ -559,13 +674,17 @@ def update_dso(self, dso, params=None, embeds=None): if r.status_code == 200: # 200 OK - success! updated_dso = dso_type(parse_json(r)) - logging.info(f'{updated_dso.type} {updated_dso.uuid} updated sucessfully!') + logging.info( + "%s %s updated successfully!", updated_dso.type, updated_dso.uuid + ) return updated_dso else: - logging.error(f'update operation failed: {r.status_code}: {r.text} ({url})') + logging.error( + "update operation failed: %s: %s (%s)", r.status_code, r.text, url + ) return None - except ValueError as e: + except ValueError: logging.error("Error parsing DSO response", exc_info=True) return None @@ -581,31 +700,37 @@ def delete_dso(self, dso=None, url=None, params=None): """ if dso is None: if url is None: - logging.error(f'Need a DSO or a URL to delete') + logging.error("Need a DSO or a URL to delete") return None else: if not isinstance(dso, SimpleDSpaceObject): - logging.error(f'Only SimpleDSpaceObject types (eg Item, Collection, Community, EPerson) ' - f'are supported by generic update_dso PUT.') + logging.error( + "Only SimpleDSpaceObject types (eg Item, Collection, Community, EPerson) " + "are supported by generic update_dso PUT." + ) return dso # Get self URI from HAL links - url = dso.links['self']['href'] + url = dso.links["self"]["href"] try: r = self.api_delete(url, params=params) if r.status_code == 204: # 204 No Content - success! - logging.info(f'{url} was deleted sucessfully!') + logging.info("%s was deleted successfully!", url) return r else: - logging.error(f'update operation failed: {r.status_code}: {r.text} ({url})') + logging.error( + "update operation failed: %s: %s (%s)", r.status_code, r.text, url + ) return None except ValueError as e: - logging.error(f'Error deleting DSO {dso.uuid}: {e}') + logging.error("Error deleting DSO %s: %s", dso.uuid, e) return None # PAGINATION - def get_bundles(self, parent=None, uuid=None, page=0, size=20, sort=None, embeds=None): + def get_bundles( + self, parent=None, uuid=None, page=0, size=20, sort=None, embeds=None + ): """ Get bundles for an item @param parent: python Item object, from which the UUID will be referenced in the URL. @@ -616,36 +741,36 @@ def get_bundles(self, parent=None, uuid=None, page=0, size=20, sort=None, embeds """ # TODO: It is probably wise to allow the parent UUID to be simply passed as an alternative to having the full # python object as constructed by this REST client, for more flexible usage. - bundles = list() + bundles = [] single_result = False if uuid is not None: - url = f'{self.API_ENDPOINT}/core/bundles/{uuid}' + url = f"{self.API_ENDPOINT}/core/bundles/{uuid}" single_result = True elif parent is not None: - url = f'{self.API_ENDPOINT}/core/items/{parent.uuid}/bundles' + url = f"{self.API_ENDPOINT}/core/items/{parent.uuid}/bundles" else: - return list() + return [] params = parse_params(embeds=embeds) if size is not None: - params['size'] = size + params["size"] = size if page is not None: - params['page'] = page + params["page"] = page if sort is not None: - params['sort'] = sort + params["sort"] = sort r_json = self.fetch_resource(url, params=params) try: if single_result: bundles.append(Bundle(r_json)) if not single_result: - resources = r_json['_embedded']['bundles'] + resources = r_json["_embedded"]["bundles"] for resource in resources: bundles.append(Bundle(resource)) except ValueError as err: - logging.error(f'error parsing bundle results: {err}') + logging.error("error parsing bundle results: %s", err) return bundles - @paginated('bundles', Bundle) + @paginated("bundles", Bundle) def get_bundles_iter(do_paginate, self, parent, sort=None, embeds=None): """ Get bundles for an item, automatically handling pagination by requesting the next page when all items from one page have been consumed @@ -653,14 +778,14 @@ def get_bundles_iter(do_paginate, self, parent, sort=None, embeds=None): @param embeds: Optional list of resources to embed in response JSON @return: Iterator of Bundle """ - url = f'{self.API_ENDPOINT}/core/items/{parent.uuid}/bundles' + url = f"{self.API_ENDPOINT}/core/items/{parent.uuid}/bundles" params = parse_params(embeds=embeds) if sort is not None: - params['sort'] = sort + params["sort"] = sort return do_paginate(url, params) - def create_bundle(self, parent=None, name='ORIGINAL', embeds=None): + def create_bundle(self, parent=None, name="ORIGINAL", embeds=None): """ Create new bundle in the specified item @param parent: Parent python Item, the UUID of which will be used in the URL path @@ -673,11 +798,21 @@ def create_bundle(self, parent=None, name='ORIGINAL', embeds=None): # python object as constructed by this REST client, for more flexible usage. if parent is None: return None - url = f'{self.API_ENDPOINT}/core/items/{parent.uuid}/bundles' - return Bundle(api_resource=parse_json(self.api_post(url, params=parse_params(embeds=embeds), json={'name': name, 'metadata': {}}))) + url = f"{self.API_ENDPOINT}/core/items/{parent.uuid}/bundles" + return Bundle( + api_resource=parse_json( + self.api_post( + url, + params=parse_params(embeds=embeds), + json={"name": name, "metadata": {}}, + ) + ) + ) # PAGINATION - def get_bitstreams(self, uuid=None, bundle=None, page=0, size=20, sort=None, embeds=None): + def get_bitstreams( + self, uuid=None, bundle=None, page=0, size=20, sort=None, embeds=None + ): """ Get a specific bitstream UUID, or all bitstreams for a specific bundle @param uuid: UUID of a specific bitstream to retrieve @@ -687,34 +822,43 @@ def get_bitstreams(self, uuid=None, bundle=None, page=0, size=20, sort=None, emb @param embeds: Optional list of resources to embed in response JSON @return: list of python Bitstream objects """ - url = f'{self.API_ENDPOINT}/core/bitstreams/{uuid}' + url = f"{self.API_ENDPOINT}/core/bitstreams/{uuid}" if uuid is None and bundle is None: - return list() + return [] if uuid is None and isinstance(bundle, Bundle): - if 'bitstreams' in bundle.links: - url = bundle.links['bitstreams']['href'] + if "bitstreams" in bundle.links: + url = bundle.links["bitstreams"]["href"] else: - url = f'{self.API_ENDPOINT}/core/bundles/{bundle.uuid}/bitstreams' - logging.warning(f'Cannot find bundle bitstream links, will try to construct manually: {url}') + if bundle is None: + logging.error("Bundle cannot be None") + return [] + if bundle is None: + logging.error("Bundle cannot be None") + return [] + url = f"{self.API_ENDPOINT}/core/bundles/{bundle.uuid}/bitstreams" + logging.warning( + "Cannot find bundle bitstream links, will try to construct manually: %s", + url, + ) # Perform the actual request. By now, our URL and parameter should be properly set params = parse_params(embeds=embeds) if size is not None: - params['size'] = size + params["size"] = size if page is not None: - params['page'] = page + params["page"] = page if sort is not None: - params['sort'] = sort + params["sort"] = sort r_json = self.fetch_resource(url, params=params) - if '_embedded' in r_json: - if 'bitstreams' in r_json['_embedded']: - bitstreams = list() - for bitstream_resource in r_json['_embedded']['bitstreams']: + if "_embedded" in r_json: + if "bitstreams" in r_json["_embedded"]: + bitstreams = [] + for bitstream_resource in r_json["_embedded"]["bitstreams"]: bitstream = Bitstream(bitstream_resource) bitstreams.append(bitstream) return bitstreams - @paginated('bitstreams', Bitstream) + @paginated("bitstreams", Bitstream) def get_bitstreams_iter(do_paginate, self, bundle, sort=None, embeds=None): """ Get all bitstreams for a specific bundle, automatically handling pagination by requesting the next page when all items from one page have been consumed @@ -722,18 +866,30 @@ def get_bitstreams_iter(do_paginate, self, bundle, sort=None, embeds=None): @param embeds: Optional list of resources to embed in response JSON @return: Iterator of Bitstream """ - if 'bitstreams' in bundle.links: - url = bundle.links['bitstreams']['href'] + if "bitstreams" in bundle.links: + url = bundle.links["bitstreams"]["href"] else: - url = f'{self.API_ENDPOINT}/core/bundles/{bundle.uuid}/bitstreams' - logging.warning(f'Cannot find bundle bitstream links, will try to construct manually: {url}') + url = f"{self.API_ENDPOINT}/core/bundles/{bundle.uuid}/bitstreams" + logging.warning( + "Cannot find bundle bitstream links, will try to construct manually: %s", + url, + ) params = parse_params(embeds=embeds) if sort is not None: - params['sort'] = sort + params["sort"] = sort return do_paginate(url, params) - def create_bitstream(self, bundle=None, name=None, path=None, mime=None, metadata=None, embeds=None, retry=False): + def create_bitstream( + self, + bundle=None, + name=None, + path=None, + mime=None, + metadata=None, + embeds=None, + retry=False, + ): """ Upload a file and create a bitstream for a specified parent bundle, from the uploaded file and the supplied metadata. @@ -755,35 +911,44 @@ def create_bitstream(self, bundle=None, name=None, path=None, mime=None, metadat # TODO: Better error detection and handling for file reading if metadata is None: metadata = {} - url = f'{self.API_ENDPOINT}/core/bundles/{bundle.uuid}/bitstreams' - file = (name, open(path, 'rb'), mime) - files = {'file': file} - properties = {'name': name, 'metadata': metadata, 'bundleName': bundle.name} - payload = {'properties': json.dumps(properties) + ';application/json'} + url = f"{self.API_ENDPOINT}/core/bundles/{bundle.uuid}/bitstreams" + file = (name, open(path, "rb"), mime) + files = {"file": file} + properties = {"name": name, "metadata": metadata, "bundleName": bundle.name} + payload = {"properties": json.dumps(properties) + ";application/json"} h = self.session.headers - h.update({'Content-Encoding': 'gzip', 'User-Agent': self.USER_AGENT}) - req = Request('POST', url, data=payload, headers=h, files=files, params=parse_params(embeds=embeds)) + h.update({"Content-Encoding": "gzip", "User-Agent": self.USER_AGENT}) + req = Request( + "POST", + url, + data=payload, + headers=h, + files=files, + params=parse_params(embeds=embeds), + ) prepared_req = self.session.prepare_request(req) r = self.session.send(prepared_req) - if 'DSPACE-XSRF-TOKEN' in r.headers: - t = r.headers['DSPACE-XSRF-TOKEN'] - logging.debug('Updating token to ' + t) - self.session.headers.update({'X-XSRF-Token': t}) - self.session.cookies.update({'X-XSRF-Token': t}) + if "DSPACE-XSRF-TOKEN" in r.headers: + t = r.headers["DSPACE-XSRF-TOKEN"] + logging.debug("Updating token to %s", t) + self.session.headers.update({"X-XSRF-Token": t}) + self.session.cookies.update({"X-XSRF-Token": t}) if r.status_code == 403: r_json = parse_json(r) - if 'message' in r_json and 'CSRF token' in r_json['message']: + if "message" in r_json and "CSRF token" in r_json["message"]: if retry: - logging.error('Already retried... something must be wrong') + logging.error("Already retried... something must be wrong") else: logging.debug("Retrying request with updated CSRF token") - return self.create_bitstream(bundle, name, path, mime, metadata, embeds, True) + return self.create_bitstream( + bundle, name, path, mime, metadata, embeds, True + ) if r.status_code == 201 or r.status_code == 200: # Success return Bitstream(api_resource=parse_json(r)) else: - logging.error(f'Error creating bitstream: {r.status_code}: {r.text}') + logging.error("Error creating bitstream: %s: %s", r.status_code, r.text) return None def download_bitstream(self, uuid=None): @@ -792,14 +957,19 @@ def download_bitstream(self, uuid=None): @param uuid: @return: full response object including headers, and content """ - url = f'{self.API_ENDPOINT}/core/bitstreams/{uuid}/content' - h = {'User-Agent': self.USER_AGENT, 'Authorization': self.get_short_lived_token()} + url = f"{self.API_ENDPOINT}/core/bitstreams/{uuid}/content" + h = { + "User-Agent": self.USER_AGENT, + "Authorization": self.get_short_lived_token(), + } r = self.api_get(url, headers=h) if r.status_code == 200: return r # PAGINATION - def get_communities(self, uuid=None, page=0, size=20, sort=None, top=False, embeds=None): + def get_communities( + self, uuid=None, page=0, size=20, sort=None, top=False, embeds=None + ): """ Get communities - either all, for single UUID, or all top-level (ie no sub-communities) @param uuid: string UUID if getting single community @@ -809,45 +979,45 @@ def get_communities(self, uuid=None, page=0, size=20, sort=None, top=False, embe @param embeds: list of resources to embed in response JSON @return: list of communities, or None if error """ - url = f'{self.API_ENDPOINT}/core/communities' + url = f"{self.API_ENDPOINT}/core/communities" params = parse_params(embeds=embeds) if size is not None: - params['size'] = size + params["size"] = size if page is not None: - params['page'] = page + params["page"] = page if sort is not None: - params['sort'] = sort + params["sort"] = sort if uuid is not None: try: # This isn't used, but it'll throw a ValueError if not a valid UUID id = UUID(uuid).version # Set URL and parameters - url = f'{url}/{uuid}' + url = f"{url}/{uuid}" params = None except ValueError: - logging.error(f'Invalid community UUID: {uuid}') + logging.error("Invalid community UUID: %s", uuid) return None if top: # Set new URL - url = f'{url}/search/top' + url = f"{url}/search/top" - logging.debug(f'Performing get on {url}') + logging.debug("Performing get on %s", url) # Perform actual get r_json = self.fetch_resource(url, params) # Empty list - communities = list() - if '_embedded' in r_json: - if 'communities' in r_json['_embedded']: - for community_resource in r_json['_embedded']['communities']: + communities = [] + if "_embedded" in r_json: + if "communities" in r_json["_embedded"]: + for community_resource in r_json["_embedded"]["communities"]: communities.append(Community(community_resource)) - elif 'uuid' in r_json: + elif "uuid" in r_json: # This is a single communities communities.append(Community(r_json)) # Return list (populated or empty) return communities - @paginated('communities', Community) + @paginated("communities", Community) def get_communities_iter(do_paginate, self, sort=None, top=False, embeds=None): """ Get communities as an iterator, automatically handling pagination by requesting the next page when all items from one page have been consumed @@ -856,13 +1026,13 @@ def get_communities_iter(do_paginate, self, sort=None, top=False, embeds=None): @return: Iterator of Community """ if top: - url = f'{self.API_ENDPOINT}/core/communities/search/top' + url = f"{self.API_ENDPOINT}/core/communities/search/top" else: - url = f'{self.API_ENDPOINT}/core/communities' + url = f"{self.API_ENDPOINT}/core/communities" params = parse_params(embeds=embeds) if sort is not None: - params['sort'] = sort + params["sort"] = sort return do_paginate(url, params) @@ -876,13 +1046,15 @@ def create_community(self, parent, data, embeds=None): """ # TODO: To be consistent with other create methods, this should probably also allow a Community object # to be passed instead of just the UUID as a string - url = f'{self.API_ENDPOINT}/core/communities' + url = f"{self.API_ENDPOINT}/core/communities" params = parse_params(embeds=embeds) if parent is not None: - params = {'parent': parent} + params = {"parent": parent} return Community(api_resource=parse_json(self.create_dso(url, params, data))) - def get_collections(self, uuid=None, community=None, page=0, size=20, sort=None, embeds=None): + def get_collections( + self, uuid=None, community=None, page=0, size=20, sort=None, embeds=None + ): """ Get collections - all, or single UUID, or for a specific community @param uuid: UUID string. If present, just a single collection is returned (overrides community arg) @@ -893,61 +1065,67 @@ def get_collections(self, uuid=None, community=None, page=0, size=20, sort=None, @return: list of Collection objects, or None if there was an error for consistency of handling results, even the uuid search will be a list of one """ - url = f'{self.API_ENDPOINT}/core/collections' + url = f"{self.API_ENDPOINT}/core/collections" params = parse_params(embeds=embeds) if size is not None: - params['size'] = size + params["size"] = size if page is not None: - params['page'] = page + params["page"] = page if sort is not None: - params['sort'] = sort + params["sort"] = sort # First, handle case of UUID. It overrides the other arguments as it is a request for a single collection if uuid is not None: try: id = UUID(uuid).version # Update URL and parameters - url = f'{url}/{uuid}' + url = f"{url}/{uuid}" params = None except ValueError: - logging.error(f'Invalid collection UUID: {uuid}') + logging.error("Invalid collection UUID: %s", uuid) return None if community is not None: - if 'collections' in community.links and 'href' in community.links['collections']: + if ( + "collections" in community.links + and "href" in community.links["collections"] + ): # Update URL - url = community.links['collections']['href'] + url = community.links["collections"]["href"] # Perform the actual request. By now, our URL and parameter should be properly set r_json = self.fetch_resource(url, params=params) # Empty list - collections = list() - if '_embedded' in r_json: + collections = [] + if "_embedded" in r_json: # This is a list of collections - if 'collections' in r_json['_embedded']: - for collection_resource in r_json['_embedded']['collections']: + if "collections" in r_json["_embedded"]: + for collection_resource in r_json["_embedded"]["collections"]: collections.append(Collection(collection_resource)) - elif 'uuid' in r_json: + elif "uuid" in r_json: # This is a single collection collections.append(Collection(r_json)) # Return list (populated or empty) return collections - @paginated('collections', Collection) + @paginated("collections", Collection) def get_collections_iter(do_paginate, self, community=None, sort=None, embeds=None): """ Get collections as an iterator, automatically handling pagination by requesting the next page when all items from one page have been consumed @param community: Community object. If present, collections for a community @return: Iterator of Collection """ - url = f'{self.API_ENDPOINT}/core/collections' + url = f"{self.API_ENDPOINT}/core/collections" params = parse_params(embeds=embeds) if sort is not None: - params['sort'] = sort + params["sort"] = sort if community is not None: - if 'collections' in community.links and 'href' in community.links['collections']: - url = community.links['collections']['href'] + if ( + "collections" in community.links + and "href" in community.links["collections"] + ): + url = community.links["collections"]["href"] return do_paginate(url, params) @@ -961,10 +1139,10 @@ def create_collection(self, parent, data, embeds=None): """ # TODO: To be consistent with other create methods, this should probably also allow a Community object # to be passed instead of just the UUID as a string - url = f'{self.API_ENDPOINT}/core/collections' + url = f"{self.API_ENDPOINT}/core/collections" params = parse_params(embeds=embeds) if parent is not None: - params = {'parent': parent} + params = {"parent": parent} return Collection(api_resource=parse_json(self.create_dso(url, params, data))) def get_item(self, uuid, embeds=None): @@ -975,13 +1153,13 @@ def get_item(self, uuid, embeds=None): @return: the raw API response """ # TODO - return constructed Item object instead, handling errors here? - url = f'{self.API_ENDPOINT}/core/items' + url = f"{self.API_ENDPOINT}/core/items" try: id = UUID(uuid).version - url = f'{url}/{uuid}' + url = f"{url}/{uuid}" return self.api_get(url, parse_params(embeds=embeds), None) except ValueError: - logging.error(f'Invalid item UUID: {uuid}') + logging.error("Invalid item UUID: %s", uuid) return None def get_items(self, embeds=None): @@ -991,19 +1169,19 @@ def get_items(self, embeds=None): @param embeds: Optional list of resources to embed in response JSON @return: A list of items, or an error """ - url = f'{self.API_ENDPOINT}/core/items' + url = f"{self.API_ENDPOINT}/core/items" # Empty item list - items = list() + items = [] # Perform the actual request r_json = self.fetch_resource(url, params=parse_params(embeds=embeds)) # Empty list - items = list() - if '_embedded' in r_json: + items = [] + if "_embedded" in r_json: # This is a list of items - if 'items' in r_json['_embedded']: - for item_resource in r_json['_embedded']['items']: + if "items" in r_json["_embedded"]: + for item_resource in r_json["_embedded"]["items"]: items.append(Item(item_resource)) - elif 'uuid' in r_json: + elif "uuid" in r_json: # This is a single item items.append(Item(r_json)) @@ -1018,15 +1196,19 @@ def create_item(self, parent, item, embeds=None): @param embeds: Optional list of resources to embed in response JSON @return: Item object constructed from the API response """ - url = f'{self.API_ENDPOINT}/core/items' + url = f"{self.API_ENDPOINT}/core/items" if parent is None: - logging.error('Need a parent UUID!') + logging.error("Need a parent UUID!") return None - params = parse_params({'owningCollection': parent}, embeds) + params = parse_params({"owningCollection": parent}, embeds) if not isinstance(item, Item): - logging.error('Need a valid item') + logging.error("Need a valid item") return None - return Item(api_resource=parse_json(self.create_dso(url, params=params, data=item.as_dict()))) + return Item( + api_resource=parse_json( + self.create_dso(url, params=params, data=item.as_dict()) + ) + ) def create_item_version(self, item_uuid, summary=None, embeds=None): """ @@ -1049,11 +1231,13 @@ def create_item_version(self, item_uuid, summary=None, embeds=None): if response.status_code == 201: # 201 Created - Success new_version = parse_json(response) - logging.info(f"Created new version for item {item_uuid}") + logging.info("Created new version for item %s", item_uuid) return new_version else: logging.error( - f"Error creating item version: {response.status_code} {response.text}" + "Error creating item version: %s %s", + response.status_code, + response.text, ) return None @@ -1067,11 +1251,21 @@ def update_item(self, item, embeds=None): @return: """ if not isinstance(item, Item): - logging.error('Need a valid item') + logging.error("Need a valid item") return None return self.update_dso(item, params=parse_params(embeds=embeds)) - def add_metadata(self, dso, field, value, language=None, authority=None, confidence=-1, place='', embeds=None): + def add_metadata( + self, + dso, + field, + value, + language=None, + authority=None, + confidence=-1, + place="", + embeds=None, + ): """ Add metadata to a DSO using the api_patch method (PUT, with path and operation and value) @param dso: DSO to patch @@ -1084,26 +1278,36 @@ def add_metadata(self, dso, field, value, language=None, authority=None, confide @param embeds: Optional list of resources to embed in response JSON :return: """ - if dso is None or field is None or value is None or not isinstance(dso, DSpaceObject): + if ( + dso is None + or field is None + or value is None + or not isinstance(dso, DSpaceObject) + ): # TODO: separate these tests, and add better error handling - logging.error('Invalid or missing DSpace object, field or value string') + logging.error("Invalid or missing DSpace object, field or value string") return self dso_type = type(dso) # Place can be 0+ integer, or a hyphen - meaning "last" - path = f'/metadata/{field}/{place}' + path = f"/metadata/{field}/{place}" patch_value = { - 'value': value, - 'language': language, - 'authority': authority, - 'confidence': confidence + "value": value, + "language": language, + "authority": authority, + "confidence": confidence, } - url = dso.links['self']['href'] + url = dso.links["self"]["href"] r = self.api_patch( - url=url, operation=self.PatchOperation.ADD, path=path, value=patch_value, params=parse_params(embeds=embeds)) + url=url, + operation=self.PatchOperation.ADD, + path=path, + value=patch_value, + params=parse_params(embeds=embeds), + ) return dso_type(api_resource=parse_json(r)) @@ -1115,7 +1319,7 @@ def create_user(self, user, token=None, embeds=None): @embeds: Optional list of resources to embed in response JSON @return: User object constructed from the API response """ - url = f'{self.API_ENDPOINT}/eperson/epersons' + url = f"{self.API_ENDPOINT}/eperson/epersons" data = user if isinstance(user, User): data = user.as_dict() @@ -1123,12 +1327,14 @@ def create_user(self, user, token=None, embeds=None): # that you see for other DSO types - still figuring out the best way params = parse_params(embeds=embeds) if token is not None: - params = {'token': token} - return User(api_resource=parse_json(self.create_dso(url, params=params, data=data))) + params = {"token": token} + return User( + api_resource=parse_json(self.create_dso(url, params=params, data=data)) + ) def delete_user(self, user): if not isinstance(user, User): - logging.error(f'Must be a valid user') + logging.error("Must be a valid user") return None return self.delete_dso(user) @@ -1141,26 +1347,26 @@ def get_users(self, page=0, size=20, sort=None, embeds=None): @param embeds: Optional list of resources to embed in response JSON @return: list of User objects """ - url = f'{self.API_ENDPOINT}/eperson/epersons' - users = list() + url = f"{self.API_ENDPOINT}/eperson/epersons" + users = [] params = parse_params(embeds=embeds) if page is not None: - params['page'] = page + params["page"] = page if size is not None: - params['size'] = size + params["size"] = size if page is not None: - params['page'] = page + params["page"] = page if sort is not None: - params['sort'] = sort + params["sort"] = sort r = self.api_get(url, params=params) r_json = parse_json(response=r) - if '_embedded' in r_json: - if 'epersons' in r_json['_embedded']: - for user_resource in r_json['_embedded']['epersons']: + if "_embedded" in r_json: + if "epersons" in r_json["_embedded"]: + for user_resource in r_json["_embedded"]["epersons"]: users.append(User(user_resource)) return users - @paginated('epersons', User) + @paginated("epersons", User) def get_users_iter(do_paginate, self, sort=None, embeds=None): """ Get an iterator of users (epersons) in the DSpace instance, automatically handling pagination by requesting the next page when all items from one page have been consumed @@ -1168,10 +1374,10 @@ def get_users_iter(do_paginate, self, sort=None, embeds=None): @param embeds: Optional list of resources to embed in response JSON @return: Iterator of User """ - url = f'{self.API_ENDPOINT}/eperson/epersons' + url = f"{self.API_ENDPOINT}/eperson/epersons" params = parse_params(embeds=embeds) if sort is not None: - params['sort'] = sort + params["sort"] = sort return do_paginate(url, params) @@ -1182,16 +1388,20 @@ def create_group(self, group, embeds=None): @param embeds: Optional list of resources to embed in response JSON @return: User object constructed from the API response """ - url = f'{self.API_ENDPOINT}/eperson/groups' + url = f"{self.API_ENDPOINT}/eperson/groups" data = group if isinstance(group, Group): data = group.as_dict() # TODO: Validation. Note, at least here I will just allow a dict instead of the pointless cast<->cast # that you see for other DSO types - still figuring out the best way - return Group(api_resource=parse_json(self.create_dso(url, params=parse_params(embeds=embeds), data=data))) + return Group( + api_resource=parse_json( + self.create_dso(url, params=parse_params(embeds=embeds), data=data) + ) + ) def start_workflow(self, workspace_item): - url = f'{self.API_ENDPOINT}/workflow/workflowitems' + url = f"{self.API_ENDPOINT}/workflow/workflowitems" res = parse_json(self.api_post_uri(url, params=None, uri_list=workspace_item)) logging.debug(res) # TODO: WIP @@ -1205,14 +1415,14 @@ def update_token(self, r): :return: """ if not self.session: - logging.debug('Session state not found, setting...') + logging.debug("Session state not found, setting...") self.session = requests.Session() - if 'DSPACE-XSRF-TOKEN' in r.headers: - t = r.headers['DSPACE-XSRF-TOKEN'] - logging.debug(f'Updating XSRF token to {t}') + if "DSPACE-XSRF-TOKEN" in r.headers: + t = r.headers["DSPACE-XSRF-TOKEN"] + logging.debug("Updating XSRF token to %s", t) # Update headers and cookies - self.session.headers.update({'X-XSRF-Token': t}) - self.session.cookies.update({'X-XSRF-Token': t}) + self.session.headers.update({"X-XSRF-Token": t}) + self.session.cookies.update({"X-XSRF-Token": t}) def get_short_lived_token(self): """ @@ -1220,23 +1430,51 @@ def get_short_lived_token(self): @return: short lived Authorization token """ if not self.session: - logging.debug('Session state not found, setting...') + logging.debug("Session state not found, setting...") self.session = requests.Session() - url = f'{self.API_ENDPOINT}/authn/shortlivedtokens' + url = f"{self.API_ENDPOINT}/authn/shortlivedtokens" r = self.api_post(url, json=None, params=None) r_json = parse_json(r) - if r_json is not None and 'token' in r_json: - return r_json['token'] + if r_json is not None and "token" in r_json: + return r_json["token"] - logging.error('Could not retrieve short-lived token') + logging.error("Could not retrieve short-lived token") return None def solr_query(self, query, filters=None, fields=None, start=0, rows=999999999): + """ + Perform raw Solr query + @param query: query string + @param filters: list of filter queries + @param fields: list of fields to return in results + @param start: start doc + @param rows: max docs to return + @return: solr search results + """ if fields is None: fields = [] if filters is None: filters = [] - return self.solr.search(query, fq=filters, start=start, rows=rows, **{ - 'fl': ','.join(fields) - }) + return self.solr.search( + query, fq=filters, start=start, rows=rows, **{"fl": ",".join(fields)} + ) + + def resolve_identifier_to_dso(self, identifier=None): + """ + Resolve a DSO identifier (uuid, handle, DOI, etc.) to a DSO URI + Useful for resolving handles to objects, etc. + @param identifier: a persistent identifier for an object like handle, doi, uuid + @return: resolved DSpaceObject or error + """ + if identifier is not None: + url = f'{self.API_ENDPOINT}/pid/find' + r = self.api_get(url, params={'id': identifier}) + if r.status_code == 200: + r_json = parse_json(r) + if r_json is not None and 'uuid' in r_json: + return DSpaceObject(api_resource=r_json) + elif r.status_code == 404: + logging.error(f"Not found: {identifier}") + else: + logging.error(f"Error resolving identifier {identifier} to DSO: {r.status_code}") diff --git a/dspace_rest_client/models.py b/dspace_rest_client/models.py index 37c66a0..52e356d 100644 --- a/dspace_rest_client/models.py +++ b/dspace_rest_client/models.py @@ -66,7 +66,7 @@ def __init__(self, api_resource=None): """ super().__init__(api_resource) - self.metadata = dict() + self.metadata = {} if api_resource is not None: if 'id' in api_resource: @@ -86,7 +86,7 @@ def get_metadata_values(self, field): @param field: DSpace field, eg. dc.creator @return: list of strings """ - values = list() + values = [] if field in self.metadata: values = self.metadata[field] return values @@ -114,7 +114,7 @@ def __init__(self, api_resource=None, dso=None): """ super().__init__(api_resource) self.type = None - self.metadata = dict() + self.metadata = {} if dso is not None: api_resource = dso.as_dict() @@ -220,7 +220,7 @@ class Item(SimpleDSpaceObject): inArchive = False discoverable = False withdrawn = False - metadata = dict() + metadata = {} def __init__(self, api_resource=None, dso=None): """ @@ -229,9 +229,9 @@ def __init__(self, api_resource=None, dso=None): """ if dso is not None: api_resource = dso.as_dict() - super(Item, self).__init__(dso=dso) + super().__init__(dso=dso) else: - super(Item, self).__init__(api_resource) + super().__init__(api_resource) if api_resource is not None: self.type = 'item' @@ -245,7 +245,7 @@ def get_metadata_values(self, field): @param field: DSpace field, eg. dc.creator @return: list of strings """ - values = list() + values = [] if field in self.metadata: values = self.metadata[field] return values @@ -255,7 +255,7 @@ def as_dict(self): Return a dict representation of this Item, based on super with item-specific attributes added @return: dict of Item for API use """ - dso_dict = super(Item, self).as_dict() + dso_dict = super().as_dict() item_dict = {'inArchive': self.inArchive, 'discoverable': self.discoverable, 'withdrawn': self.withdrawn} return {**dso_dict, **item_dict} @@ -279,7 +279,7 @@ def __init__(self, api_resource=None): Default constructor. Call DSpaceObject init then set item-specific attributes @param api_resource: API result object to use as initial data """ - super(Community, self).__init__(api_resource) + super().__init__(api_resource) self.type = 'community' def as_dict(self): @@ -287,7 +287,7 @@ def as_dict(self): Return a dict representation of this Community, based on super with community-specific attributes added @return: dict of Item for API use """ - dso_dict = super(Community, self).as_dict() + dso_dict = super().as_dict() # TODO: More community-specific stuff community_dict = {} return {**dso_dict, **community_dict} @@ -304,11 +304,11 @@ def __init__(self, api_resource=None): Default constructor. Call DSpaceObject init then set collection-specific attributes @param api_resource: API result object to use as initial data """ - super(Collection, self).__init__(api_resource) + super().__init__(api_resource) self.type = 'collection' def as_dict(self): - dso_dict = super(Collection, self).as_dict() + dso_dict = super().as_dict() """ Return a dict representation of this Collection, based on super with collection-specific attributes added @return: dict of Item for API use @@ -328,7 +328,7 @@ def __init__(self, api_resource=None): Default constructor. Call DSpaceObject init then set bundle-specific attributes @param api_resource: API result object to use as initial data """ - super(Bundle, self).__init__(api_resource) + super().__init__(api_resource) self.type = 'bundle' def as_dict(self): @@ -336,7 +336,7 @@ def as_dict(self): Return a dict representation of this Bundle, based on super with bundle-specific attributes added @return: dict of Bundle for API use """ - dso_dict = super(Bundle, self).as_dict() + dso_dict = super().as_dict() bundle_dict = {} return {**dso_dict, **bundle_dict} @@ -360,7 +360,7 @@ def __init__(self, api_resource=None): Default constructor. Call DSpaceObject init then set bitstream-specific attributes @param api_resource: API result object to use as initial data """ - super(Bitstream, self).__init__(api_resource) + super().__init__(api_resource) self.type = 'bitstream' if 'bundleName' in api_resource: self.bundleName = api_resource['bundleName'] @@ -376,7 +376,7 @@ def as_dict(self): Return a dict representation of this Bitstream, based on super with bitstream-specific attributes added @return: dict of Bitstream for API use """ - dso_dict = super(Bitstream, self).as_dict() + dso_dict = super().as_dict() bitstream_dict = {'bundleName': self.bundleName, 'sizeBytes': self.sizeBytes, 'checkSum': self.checkSum, 'sequenceId': self.sequenceId} return {**dso_dict, **bitstream_dict} @@ -446,7 +446,7 @@ def __init__(self, api_resource=None): Default constructor. Call DSpaceObject init then set group-specific attributes @param api_resource: API result object to use as initial data """ - super(Group, self).__init__(api_resource) + super().__init__(api_resource) self.type = 'group' if 'name' in api_resource: self.name = api_resource['name'] @@ -458,7 +458,7 @@ def as_dict(self): Return a dict representation of this Group, based on super with group-specific attributes added @return: dict of Group for API use """ - dso_dict = super(Group, self).as_dict() + dso_dict = super().as_dict() group_dict = {'name': self.name, 'permanent': self.permanent} return {**dso_dict, **group_dict} @@ -468,12 +468,12 @@ class User(SimpleDSpaceObject): Extends DSpaceObject to implement specific attributes and methods for users (aka. EPersons) """ type = 'user' - name = None, - netid = None, - lastActive = None, - canLogIn = False, - email = None, - requireCertificate = False, + name = None + netid = None + lastActive = None + canLogIn = False + email = None + requireCertificate = False selfRegistered = False def __init__(self, api_resource=None): @@ -481,7 +481,7 @@ def __init__(self, api_resource=None): Default constructor. Call DSpaceObject init then set user-specific attributes @param api_resource: API result object to use as initial data """ - super(User, self).__init__(api_resource) + super().__init__(api_resource) self.type = 'user' if 'name' in api_resource: self.name = api_resource['name'] @@ -503,7 +503,7 @@ def as_dict(self): Return a dict representation of this User, based on super with user-specific attributes added @return: dict of User for API use """ - dso_dict = super(User, self).as_dict() + dso_dict = super().as_dict() user_dict = {'name': self.name, 'netid': self.netid, 'lastActive': self.lastActive, 'canLogIn': self.canLogIn, 'email': self.email, 'requireCertificate': self.requireCertificate, 'selfRegistered': self.selfRegistered} @@ -516,7 +516,7 @@ class InProgressSubmission(AddressableHALResource): type = None def __init__(self, api_resource): - super(InProgressSubmission, self).__init__(api_resource) + super().__init__(api_resource) if 'lastModified' in api_resource: self.lastModified = api_resource['lastModified'] if 'step' in api_resource: @@ -527,7 +527,7 @@ def __init__(self, api_resource): self.lastModified = api_resource['lastModified'] def as_dict(self): - parent_dict = super(InProgressSubmission, self).as_dict() + parent_dict = super().as_dict() dict = { 'lastModified': self.lastModified, 'step': self.step, @@ -539,10 +539,10 @@ def as_dict(self): class WorkspaceItem(InProgressSubmission): def __init__(self, api_resource): - super(WorkspaceItem, self).__init__(api_resource) + super().__init__(api_resource) def as_dict(self): - return super(WorkspaceItem, self).as_dict() + return super().as_dict() class EntityType(AddressableHALResource): """ @@ -551,7 +551,7 @@ class EntityType(AddressableHALResource): are all common entity types used in DSpace 7+ """ def __init__(self, api_resource): - super(EntityType, self).__init__(api_resource) + super().__init__(api_resource) if 'label' in api_resource: self.label = api_resource['label'] if 'type' in api_resource: @@ -562,7 +562,7 @@ class RelationshipType(AddressableHALResource): TODO: RelationshipType """ def __init__(self, api_resource): - super(RelationshipType, self).__init__(api_resource) + super().__init__(api_resource) class SearchResult(HALResource): """ @@ -621,7 +621,7 @@ class SearchResult(HALResource): type = None def __init__(self, api_resource): - super(SearchResult, self).__init__(api_resource) + super().__init__(api_resource) if 'lastModified' in api_resource: self.lastModified = api_resource['lastModified'] if 'step' in api_resource: @@ -632,7 +632,7 @@ def __init__(self, api_resource): self.type = api_resource['type'] def as_dict(self): - parent_dict = super(SearchResult, self).as_dict() + parent_dict = super().as_dict() dict = { 'lastModified': self.lastModified, 'step': self.step, diff --git a/example.py b/example.py index 5782780..b58ecc8 100644 --- a/example.py +++ b/example.py @@ -8,34 +8,43 @@ """ from pprint import pprint +import os +import sys + from dspace_rest_client.client import DSpaceClient from dspace_rest_client.models import Community, Collection, Item, Bundle, Bitstream -import os -# The DSpace client will look for the same environment variables but we can also look for them here explicitly -# and as an example -url = 'http://localhost:8080/server/api' -if 'DSPACE_API_ENDPOINT' in os.environ: - url = os.environ['DSPACE_API_ENDPOINT'] -username = 'username@test.system.edu' -if 'DSPACE_API_USERNAME' in os.environ: - username = os.environ['DSPACE_API_USERNAME'] -password = 'password' -if 'DSPACE_API_PASSWORD' in os.environ: - password = os.environ['DSPACE_API_PASSWORD'] +DEFAULT_URL = 'http://localhost:8080/server/api' +DEFAULT_USERNAME = 'username@test.system.edu' +DEFAULT_PASSWORD = 'password' + +# Configuration from environment variables +URL = os.environ.get('DSPACE_API_ENDPOINT', DEFAULT_URL) +USERNAME = os.environ.get('DSPACE_API_USERNAME', DEFAULT_USERNAME) +PASSWORD = os.environ.get('DSPACE_API_PASSWORD', DEFAULT_PASSWORD) # Instantiate DSpace client -# Note the 'fake_user_agent' setting here -- this will set a string like the following, to get by Cloudfront: -# Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36 -# The default is to *not* fake the user agent, and instead use the default of DSpace-Python-REST-Client/x.y.z -# To specify a custom user agent, set the USER_AGENT env variable and leave/set fake_user_agent as False -d = DSpaceClient(api_endpoint=url, username=username, password=password, fake_user_agent=True) +# Note the 'fake_user_agent' setting here -- this will set a string like the following, +# to get by Cloudfront: +# Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) \ +# Chrome/39.0.2171.95 Safari/537.36 +# The default is to *not* fake the user agent, and instead use the default of +# DSpace-Python-REST-Client/x.y.z +# To specify a custom user agent, set the USER_AGENT env variable and leave/set +# fake_user_agent as False +d = DSpaceClient(api_endpoint=URL, username=USERNAME, password=PASSWORD, fake_user_agent=True) # Authenticate against the DSpace client authenticated = d.authenticate() if not authenticated: print('Error logging in! Giving up.') - exit(1) + sys.exit(1) + +# An example of searching for workflow items (any search configuration from discovery.xml can be used) +# note that the results here depend on the workflow role / access of the logged in user +search_results = d.search_objects(query='*:*', dso_type='item', configuration='workflow') +for result in search_results: + print(f'{result.name} ({result.uuid})') # Put together some basic Community data. # See https://github.com/DSpace/RestContract/blob/main/communities.md @@ -56,13 +65,13 @@ # Create the new community # In this example, we'll just make this a top-level community by # passing None as the parent parameter -community_parent = None -new_community = d.create_community(parent=community_parent, data=community_data) +COMMUNITY_PARENT = None +new_community = d.create_community(parent=COMMUNITY_PARENT, data=community_data) if isinstance(new_community, Community) and new_community.uuid is not None: print(f'New community created! Handle: {new_community.handle}') else: print('Error! Giving up.') - exit(1) + sys.exit(1) # Update the community metadata new_community.name = 'Community created by the Python REST Client - Updated Name' @@ -100,7 +109,7 @@ print(f'New collection created! Handle: {new_collection.handle}') else: print('Error! Giving up.') - exit(1) + sys.exit(1) # Put together some basic Item data. # (See: https://github.com/DSpace/RestContract/blob/main/items.md) @@ -153,10 +162,11 @@ print(f'New item created! Handle: {new_item.handle}') else: print('Error! Giving up.') - exit(1) + sys.exit(1) # Add a single metadata field+value to the item (PATCH operation) -updated_item = d.add_metadata(dso=new_item, field='dc.description.abstract', value='Added abstract to an existing item', +updated_item = d.add_metadata(dso=new_item, field='dc.description.abstract', + value='Added abstract to an existing item', language='en', authority=None, confidence=-1) # Create a new ORIGINAL bundle @@ -166,7 +176,7 @@ print(f'New bundle created! UUID: {new_bundle.uuid}') else: print('Error! Giving up.') - exit(1) + sys.exit(1) # Create and upload a new bitstream using the LICENSE.txt file in this project # Set bitstream metadata @@ -177,20 +187,22 @@ 'authority': None, 'confidence': -1, 'place': 0}] } -# Set the mime type (using mimetypes.guess_type is recommended for real uploads if you don't want to set manually) -file_mime = 'text/plain' +# Set the mime type (using mimetypes.guess_type is recommended for real uploads if you +# don't want to set manually) +FILE_MIME = 'text/plain' # Set a better file name for our test upload -file_name = 'uploaded_file.txt' +FILE_NAME = 'uploaded_file.txt' # Create the bitstream and upload the file -new_bitstream = d.create_bitstream(bundle=new_bundle, name=file_name, - path='LICENSE.txt', mime=file_mime, metadata=bitstream_metadata) +new_bitstream = d.create_bitstream(bundle=new_bundle, name=FILE_NAME, + path='LICENSE.txt', mime=FILE_MIME, metadata=bitstream_metadata) if isinstance(new_bitstream, Bitstream) and new_bitstream.uuid is not None: print(f'New bitstream created! UUID: {new_bitstream.uuid}') else: print('Error! Giving up.') - exit(1) + sys.exit(1) -print('All finished with example data creation. Visit your test repository to review created objects') +print('All finished with example data creation. Visit your test repository to review \ + created objects') # Retrieving objects - now that we know there is some data in the repository we can demonstrate # some simple ways of retrieving and iterating DSOs @@ -204,9 +216,11 @@ collections = d.get_collections(community=top_community) for collection in collections: print(f'{collection.name} ({collection.uuid}') - # Get all items in this collection - see that the recommended method is a search, scoped to this collection - # (there is no collection/items endpoint, though there is a /mappedItems endpoint, not yet implemented here) - items = d.search_objects(query='*:*', scope=collection.uuid, dso_type='item') + # Get all items in this collection - see that the recommended method is a search, + # scoped to this collection + # (there is no collection/items endpoint, though there is a /mappedItems endpoint, + # not yet implemented here) + items = d.search_objects(query='*:*', scope=collection.uuid, dso_type='item', configuration='default') for item in items: print(f'{item.name} ({item.uuid})') # Get all bundles in this item @@ -220,15 +234,20 @@ # Download this bitstream r = d.download_bitstream(bitstream.uuid) if r is not None and r.headers is not None: - print(f'\tHeaders (server info, not calculated locally)\n\tmd5: {r.headers.get("ETag")}\n' - f'\tformat: {r.headers.get("Content-Type")}\n\tlength: {r.headers.get("Content-Length")}\n' - f'\tLOCAL LEN(): {len(r.content)}') - # Uncomment the below to get the binary data in content and then do something with it like - # print, or write to file, etc. You want to use the 'content' property of the response object + print( + '\tHeaders (server info, not calculated locally)\n' + f'\tmd5: {r.headers.get("ETag")}\n' + f'\tformat: {r.headers.get("Content-Type")}\n' + f'\tlength: {r.headers.get("Content-Length")}\n' + f'\tLOCAL LEN(): {len(r.content)}' + ) + # Uncomment the below to get the binary data in content and then do + # something with it like print, or write to file, etc. You want to use + # the 'content' property of the response object # # print(r.content) -# Finally, let's show the new _iter methods which will transparently handle pagination and return iterators -# which you can use as normal +# Finally, let's show the new _iter methods which will transparently handle pagination +# and return iterators which you can use as normal for i, search_result in enumerate(d.search_objects_iter('*:*')): - print(f'Result #{i}: {search_result.name} ({search_result.uuid})') \ No newline at end of file + print(f'Result #{i}: {search_result.name} ({search_result.uuid})') diff --git a/example_gets.py b/example_gets.py index bdf53a8..d39e2ac 100644 --- a/example_gets.py +++ b/example_gets.py @@ -3,12 +3,16 @@ # and described in the LICENCE file in the root of this project """ -Example Python 3 application using the dspace.py API client library to retrieve basic DSOs in a DSpace repository +Example Python 3 application using the dspace.py API client library to retrieve basic DSOs in a +DSpace repository """ +import sys + from dspace_rest_client.client import DSpaceClient + # Import models as below if needed -#from dspace_rest_client.models import Community, Collection, Item, Bundle, Bitstream +# from dspace_rest_client.models import Community, Collection, Item, Bundle, Bitstream # Example variables needed for authentication and basic API requests # SET THESE TO MATCH YOUR TEST SYSTEM BEFORE RUNNING THE EXAMPLE SCRIPT @@ -17,54 +21,66 @@ # DSPACE_API_USERNAME= # DSPACE_API_PASSWORD= # USER_AGENT= -url = 'http://localhost:8080/server/api' -username = 'username@test.system.edu' -password = 'password' +URL = "http://localhost:8080/server/api" +USERNAME = "username@test.system.edu" +PASSWORD = "password" # Instantiate DSpace client -# Note the 'fake_user_agent' setting here -- this will set a string like the following, to get by Cloudfront: -# Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36 -# The default is to *not* fake the user agent, and instead use the default of DSpace-Python-REST-Client/x.y.z. -# To specify a custom user agent, set the USER_AGENT env variable and leave/set fake_user_agent as False -d = DSpaceClient(api_endpoint=url, username=username, password=password, fake_user_agent=True) +# Note the 'fake_user_agent' setting here -- this will set a string like the following, +# to get by Cloudfront: +# Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) \ +# Chrome/39.0.2171.95 Safari/537.36 +# The default is to *not* fake the user agent, and instead use the default of +# DSpace-Python-REST-Client/x.y.z. +# To specify a custom user agent, set the USER_AGENT env variable and leave/set +# fake_user_agent as False +d = DSpaceClient( + api_endpoint=URL, username=USERNAME, password=PASSWORD, fake_user_agent=True +) # Authenticate against the DSpace client authenticated = d.authenticate() if not authenticated: - print('Error logging in! Giving up.') - exit(1) + print("Error logging in! Giving up.") + sys.exit(1) # Retrieving objects - now that we know there is some data in the repository we can demonstrate # some simple ways of retrieving and iterating DSOs -print('\nBeginning examples of get, search methods\n') +print("\nBeginning examples of get, search methods\n") # Get top communities top_communities = d.get_communities(top=True) for top_community in top_communities: - print(f'{top_community.name} ({top_community.uuid})') + print(f"{top_community.name} ({top_community.uuid})") # Get all collections in this community collections = d.get_collections(community=top_community) for collection in collections: - print(f'{collection.name} ({collection.uuid}') - # Get all items in this collection - see that the recommended method is a search, scoped to this collection - # (there is no collection/items endpoint, though there is a /mappedItems endpoint, not yet implemented here) - items = d.search_objects(query='*:*', scope=collection.uuid, dso_type='item') + print(f"{collection.name} ({collection.uuid}") + # Get all items in this collection - see that the recommended method is a search, + # scoped to this collection (there is no collection/items endpoint, though there is + # a /mappedItems endpoint, not yet implemented here) + items = d.search_objects(query="*:*", scope=collection.uuid, dso_type="item") for item in items: - print(f'{item.name} ({item.uuid})') + print(f"{item.name} ({item.uuid})") # Get all bundles in this item bundles = d.get_bundles(parent=item) for bundle in bundles: - print(f'{bundle.name} ({bundle.uuid}') + print(f"{bundle.name} ({bundle.uuid}") # Get all bitstreams in this bundle bitstreams = d.get_bitstreams(bundle=bundle) for bitstream in bitstreams: - print(f'{bitstream.name} ({bitstream.uuid}') + print(f"{bitstream.name} ({bitstream.uuid}") # Download this bitstream r = d.download_bitstream(bitstream.uuid) - print(f'\tHeaders (server info, not calculated locally)\n\tmd5: {r.headers.get("ETag")}\n' - f'\tformat: {r.headers.get("Content-Type")}\n\tlength: {r.headers.get("Content-Length")}\n' - f'\tLOCAL LEN(): {len(r.content)}') - # Uncomment the below to get the binary data in content and then do something with it like - # print, or write to file, etc. You want to use the 'content' property of the response object + print( + '\tHeaders (server info, not calculated locally)\n' + f'\tmd5: {r.headers.get("ETag")}\n' + f'\tformat: {r.headers.get("Content-Type")}\n' + f'\tlength: {r.headers.get("Content-Length")}\n' + f'\tLOCAL LEN(): {len(r.content)}' + ) + # Uncomment the below to get the binary data in content and then do + # something with it like print, or write to file, etc. You want to use + # the 'content' property of the response object # # print(r.content) diff --git a/requirements-pylint.txt b/requirements-pylint.txt new file mode 100644 index 0000000..24ca480 --- /dev/null +++ b/requirements-pylint.txt @@ -0,0 +1,8 @@ +astroid==3.3.5 +colorama==0.4.6 +dill==0.3.9 +isort==5.13.2 +mccabe==0.7.0 +platformdirs==4.3.6 +pylint==3.3.1 +tomlkit==0.13.2 \ No newline at end of file diff --git a/setup.py b/setup.py index 4411a0d..6d71f2a 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,7 @@ import setuptools from dspace_rest_client import __version__ -with open("README.md", "r") as fh: +with open("README.md", "r", encoding="utf_8") as fh: long_description = fh.read() setuptools.setup( @@ -15,9 +15,13 @@ long_description_content_type="text/markdown", url="https://github.com/the-library-code/dspace-rest-client", project_urls={ - 'Documentation': 'https://github.com/the-library-code/dspace-rest-python/blob/main/README.md', + 'Documentation': ( + 'https://github.com/the-library-code/dspace-rest-python/blob/main/README.md' + ), 'GitHub': 'https://github.com/the-library-code/dspace-rest-python', - 'Changelog': 'https://github.com/the-library-code/dspace-rest-python/blob/main/CHANGELOG.md', + 'Changelog': ( + 'https://github.com/the-library-code/dspace-rest-python/blob/main/CHANGELOG.md' + ), }, classifiers=[ "Programming Language :: Python :: 3.8", diff --git a/solr_example.py b/solr_example.py index cd11b23..aaa53bc 100644 --- a/solr_example.py +++ b/solr_example.py @@ -4,15 +4,15 @@ from dspace_rest_client.client import DSpaceClient -url = 'http://localhost:8080/server/api' -username = 'username@domain.com' -password = 'password' +URL = 'http://localhost:8080/server/api' +USERNAME = 'username@domain.com' +PASSWORD = 'password' # To auth solr do like this and pass it as the argument in DSpaceClient solr_auth = HTTPBasicAuth('user', 'pass') # Instantiate DSpace client -d = DSpaceClient(api_endpoint=url, username=username, password=password, +d = DSpaceClient(api_endpoint=URL, username=USERNAME, password=PASSWORD, solr_endpoint='http://localhost:8983/solr/search', solr_auth=None) # Here's an example of a wildcard query with some filters to apply and some fields to return @@ -22,4 +22,3 @@ for doc in results.docs: pprint.pprint(doc) -