diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index fed0896..ca28561 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -1,31 +1,31 @@ name: CI -on: [pull_request] +on: [ pull_request ] jobs: backend: name: lint + test - runs-on: ubuntu-22.04 + runs-on: ubuntu-latest strategy: max-parallel: 4 matrix: - python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] + python-version: [ "3.9", "3.10", "3.11", "3.12" , "3.13" ] steps: - - uses: actions/checkout@v3 - - uses: actions/setup-python@v3 - with: - python-version: ${{ matrix.python-version }} - - name: install - run: | - python -m pip install -U pip - pip install -e ".[dev]" - - name: lint - run: | - make lint - - name: test - run: | - make coverage - coverage report + - uses: actions/checkout@v3 + - uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python-version }} + - name: install + run: | + python -m pip install -U pip + pip install -e ".[dev]" + - name: lint + run: | + make lint + - name: test + run: | + make coverage + coverage report diff --git a/Makefile b/Makefile index ca201f8..a90c95d 100644 --- a/Makefile +++ b/Makefile @@ -34,10 +34,10 @@ clean: ## Remove all build, test and Python artifacts find . -name '__pycache__' -exec rm -fr {} + lint: ## Check python formatting issues - @ruff format . --check && ruff . + @ruff format . --check && ruff check format: ## Fix python formatting issues where possible - @ruff format . && ruff . --fix --show-fixes + @ruff format && ruff check --fix --show-fixes test: ## Run unit test suite @py.test --benchmark-skip diff --git a/README.md b/README.md index 6de11ed..c809128 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ [![PyPI version](https://badge.fury.io/py/rispy.svg)](https://badge.fury.io/py/rispy) -A Python 3.8+ reader/writer of RIS reference files. +A Python reader/writer of [RIS](https://en.wikipedia.org/wiki/RIS_(file_format)) reference files. *Pronunciation* - `rispee` - like "crispy", but without the c. @@ -25,7 +25,8 @@ Parsing: ``` -A file path can also be used to read RIS files. If an encoding is not specified in ``load``, the default system encoding will be used. +A file path can also be used to read RIS files. If an encoding is not specified in ``load``, the default system encoding +will be used. ```python >>> from pathlib import Path @@ -90,7 +91,9 @@ Writing: ## TAG_KEY_MAPPING -The most fields contain string values, but some like first_authors (A1) are parsed into lists. The default mapping were created from specifications scattered around the web, but to our knowledge there is not one single source of RIS truth, so these may need to be modified for specific export systems: +Most fields contain string values, but some like first_authors (A1) are parsed into lists. The default mapping is +created from specifications scattered around the web, but to our knowledge there is not one single source of RIS truth, +so these may need to be modified for specific export systems: - [Wikipedia](https://en.wikipedia.org/wiki/RIS_(file_format)) - [ResearcherId](https://web.archive.org/web/20170707033254/http://www.researcherid.com/resources/html/help_upload.htm) @@ -183,7 +186,7 @@ The most fields contain string values, but some like first_authors (A1) are pars ### Override key mapping -The parser use a `TAG_KEY_MAPPING`, which one can override by calling `rispy.load()` with the `mapping` parameter. +The parser uses a `TAG_KEY_MAPPING`, which one can override by calling `rispy.load()` with the `mapping` parameter. ```python >>> from copy import deepcopy @@ -222,15 +225,23 @@ List tags can be customized in the same way, by passing a list to the `list_tags ### Changing rispy behavior -There are a few flags that can be passed to `rispy.load()` and `rispy.dump()` that change how `rispy` deals with tags. For example, setting `skip_unknown_tags` to `True` will cause `rispy` do not read or write tags not in the tag map. More can be found in the docstrings for each class. If more customization is necessary, a custom implementation can be created (see next section). +There are a few flags that can be passed to `rispy.load()` and `rispy.dump()` that change how `rispy` deals with tags. +For example, setting `skip_unknown_tags` to `True` will cause `rispy` do not read or write tags not in the tag map. More +can be found in the docstrings for each class. If more customization is necessary, a custom implementation can be +created (see next section). ## Using custom implementations -Not all RIS files follow the same formatting guidelines. There is an interface for creating custom implementations for reading and writing such files. An implementation contains the methods and parameters used to work with RIS files, and should be passed to `rispy.load()` or `rispy.dump()`. +Not all RIS files follow the same formatting guidelines. There is an interface for creating custom implementations for +reading and writing such files. An implementation contains the methods and parameters used to work with RIS files, and +should be passed to `rispy.load()` or `rispy.dump()`. ### Customizing implementations -Creating a custom implentation involves creating a class that inherits a base class, and overriding the necessary variables and methods. One of the existing parsers can also be inherited. Inheriting an existing class is advantageous if only minor changes need to be made. The sections below document what is available to be overriden, along with a few examples. +Creating a custom implementation involves creating a class that inherits a base class, and overriding the necessary +variables and methods. One of the existing parsers can also be inherited. Inheriting an existing class is advantageous +if only minor changes need to be made. The sections below document what is available to be overridden, along with a few +examples. #### Parsing @@ -258,7 +269,8 @@ class WokParser(RisParser): ### Writing -Writing is very similar to parsing. A custom writer class can inherit `BaseWriter` or one if its subclasses, such as `RisWriter`. +Writing is very similar to parsing. A custom writer class can inherit `BaseWriter` or one if its subclasses, such as +`RisWriter`. Examples: @@ -282,7 +294,8 @@ Other various utilities included in `rispy` are documented below. ### Reference type conversion -A method is available to convert common RIS reference types into more readable terms. It takes a list of references and returns a copy of that list with modified reference types. The map for this conversion is located in ``config.py``. +A method is available to convert common RIS reference types into more readable terms. It takes a list of references and +returns a copy of that list with modified reference types. The map for this conversion is located in ``config.py``. ```python >>> from rispy.utils import convert_reference_types @@ -295,13 +308,14 @@ A method is available to convert common RIS reference types into more readable t ## Software for other RIS-like formats Some RIS-like formats contain rich citation data, for example lists and nested attributes, that `rispy` does not -support. Software specializing on these formats include: +support. Software specializing in these formats includes: * [nbib](https://pypi.org/project/nbib/) - parses the "PubMed" or "MEDLINE" format ## Developer instructions -Common developer commands are in the provided `Makefile`; if you don't have `make` installed, you can view the make commands and run the commands from the command-line manually: +Common developer commands are in the provided `Makefile`; if you don't have `make` installed, you can view the make +commands and run the commands from the command-line manually: ```bash # setup environment diff --git a/pyproject.toml b/pyproject.toml index b28d4fe..bac683d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,11 +1,11 @@ [project] name = "rispy" -license = {file = "LICENSE"} +license = { file = "LICENSE" } authors = [ - {name = "Maik Derstappen", email = "md@derico.de"} + { name = "Maik Derstappen", email = "md@derico.de" } ] maintainers = [ - {name = "Andy Shapiro", email = "shapiromatron@gmail.com"} + { name = "Andy Shapiro", email = "shapiromatron@gmail.com" } ] readme = "README.md" dynamic = ["version", "description"] @@ -14,13 +14,13 @@ classifiers = [ "Intended Audience :: Developers", "License :: OSI Approved :: MIT License", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", ] -requires-python = ">=3.8" +requires-python = ">=3.9" [project.urls] "Source" = "https://github.com/mrtango/rispy" @@ -33,7 +33,7 @@ dev = [ "pytest ~=7.4.4", "pytest-benchmark", "flit ~= 3.9.0", - "ruff ~= 0.1.13", + "ruff ~= 0.11.6", "coverage ~= 7.4.0", ] @@ -51,10 +51,12 @@ exclude = [".github", "Makefile", "tests"] [tool.ruff] line-length = 100 -target-version = "py38" +target-version = "py39" + +[tool.ruff.lint] select = ["F", "E", "W", "I", "UP", "S", "B", "T20", "RUF"] -[tool.ruff.per-file-ignores] +[tool.ruff.lint.per-file-ignores] "test_*.py" = ["S101"] [tool.pytest.ini_options] diff --git a/rispy/__init__.py b/rispy/__init__.py index 983372f..57270d3 100644 --- a/rispy/__init__.py +++ b/rispy/__init__.py @@ -7,16 +7,16 @@ __version__ = "0.9.0" __all__ = [ - "__version__", "LIST_TYPE_TAGS", "TAG_KEY_MAPPING", "TYPE_OF_REFERENCE_MAPPING", - "load", - "loads", - "dump", - "dumps", - "WokParser", - "RisParser", "BaseWriter", + "RisParser", "RisWriter", + "WokParser", + "__version__", + "dump", + "dumps", + "load", + "loads", ] diff --git a/rispy/parser.py b/rispy/parser.py index 2e2c712..3c159e2 100644 --- a/rispy/parser.py +++ b/rispy/parser.py @@ -2,7 +2,7 @@ from collections import defaultdict from pathlib import Path -from typing import ClassVar, Dict, List, Optional, TextIO, Tuple, Type, Union +from typing import ClassVar, Optional, TextIO, Union from .config import ( DELIMITED_TAG_MAPPING, @@ -12,7 +12,7 @@ WOK_TAG_KEY_MAPPING, ) -__all__ = ["load", "loads", "WokParser", "RisParser"] +__all__ = ["RisParser", "WokParser", "load", "loads"] class NextLine(Exception): @@ -47,19 +47,19 @@ class RisParser: END_TAG: str = "ER" UNKNOWN_TAG: str = "UK" PATTERN: str - DEFAULT_IGNORE: ClassVar[List[str]] = [] - DEFAULT_MAPPING: Dict = TAG_KEY_MAPPING - DEFAULT_LIST_TAGS: List[str] = LIST_TYPE_TAGS - DEFAULT_DELIMITER_MAPPING: Dict = DELIMITED_TAG_MAPPING + DEFAULT_IGNORE: ClassVar[list[str]] = [] + DEFAULT_MAPPING: dict = TAG_KEY_MAPPING + DEFAULT_LIST_TAGS: list[str] = LIST_TYPE_TAGS + DEFAULT_DELIMITER_MAPPING: dict = DELIMITED_TAG_MAPPING DEFAULT_NEWLINE: ClassVar[str] = "\n" def __init__( self, *, - mapping: Optional[Dict] = None, - list_tags: Optional[List[str]] = None, - delimiter_tags_mapping: Optional[Dict] = None, - ignore: Optional[List[str]] = None, + mapping: Optional[dict] = None, + list_tags: Optional[list[str]] = None, + delimiter_tags_mapping: Optional[dict] = None, + ignore: Optional[list[str]] = None, skip_unknown_tags: bool = False, enforce_list_tags: bool = True, newline: Optional[str] = None, @@ -99,18 +99,18 @@ def __init__( self.enforce_list_tags = enforce_list_tags self.newline = newline if newline is not None else self.DEFAULT_NEWLINE - def _iter_till_start(self, lines) -> Dict: + def _iter_till_start(self, lines) -> dict: while True: line = next(lines) if line.startswith(self.START_TAG): return {self.mapping[self.START_TAG]: self.parse_line(line)[1]} - def parse(self, text: str) -> List[Dict]: + def parse(self, text: str) -> list[dict]: """Parse RIS string.""" line_gen = (line for line in text.split(self.newline)) return self.parse_lines(line_gen) - def parse_lines(self, lines: Union[TextIO, List[str]]) -> List[Dict]: + def parse_lines(self, lines: Union[TextIO, list[str]]) -> list[dict]: """Parse RIS file line by line.""" result = [] @@ -141,7 +141,7 @@ def parse_lines(self, lines: Union[TextIO, List[str]]) -> List[Dict]: except StopIteration: return result - def parse_line(self, line: str) -> Union[Tuple[str, str], Tuple[None, str]]: + def parse_line(self, line: str) -> Union[tuple[str, str], tuple[None, str]]: """Parse line of RIS file. This method parses a line between the start and end tag. @@ -170,7 +170,7 @@ def parse_line(self, line: str) -> Union[Tuple[str, str], Tuple[None, str]]: return (None, line.strip()) def _add_single_value( - self, record: Dict, name: str, value: Union[str, List[str]], is_multi: bool = False + self, record: dict, name: str, value: Union[str, list[str]], is_multi: bool = False ) -> None: """Process a single line. @@ -191,7 +191,7 @@ def _add_single_value( else: record[name] = " ".join((value_must_exist_or_is_bug, value)) - def _add_list_value(self, record: Dict, name: str, value: Union[str, List[str]]) -> None: + def _add_list_value(self, record: dict, name: str, value: Union[str, list[str]]) -> None: """Process tags with multiple values.""" value_list = value if isinstance(value, list) else [value] try: @@ -205,7 +205,7 @@ def _add_list_value(self, record: Dict, name: str, value: Union[str, List[str]]) record[name] = [must_exist, *value_list] def _add_tag( - self, record: Dict, tag: str, content: str, extend_multiline: bool = False + self, record: dict, tag: str, content: str, extend_multiline: bool = False ) -> None: try: name = self.mapping[tag] @@ -233,12 +233,12 @@ class WokParser(RisParser): """Subclass of Base for reading Wok RIS files.""" START_TAG = "PT" - DEFAULT_IGNORE: ClassVar[List[str]] = ["FN", "VR", "EF"] + DEFAULT_IGNORE: ClassVar[list[str]] = ["FN", "VR", "EF"] DEFAULT_MAPPING = WOK_TAG_KEY_MAPPING DEFAULT_LIST_TAGS = WOK_LIST_TYPE_TAGS - DEFAULT_DELIMITER_MAPPING: ClassVar[Dict] = {} + DEFAULT_DELIMITER_MAPPING: ClassVar[dict] = {} - def parse_line(self, line: str) -> Union[Tuple[str, str], Tuple[None, str]]: + def parse_line(self, line: str) -> Union[tuple[str, str], tuple[None, str]]: """Parse line of RIS file. This method parses a line between the start and end tag. @@ -269,7 +269,7 @@ def load( newline: Optional[str] = None, implementation: Optional[RisParser] = None, **kw, -) -> List[Dict]: +) -> list[dict]: """Load a RIS file and return a list of entries. Entries are codified as dictionaries whose keys are the @@ -307,7 +307,7 @@ def load( raise ValueError("File must be a file-like object or a Path object") -def loads(text: str, *, implementation: Optional[Type[RisParser]] = None, **kw) -> List[Dict]: +def loads(text: str, *, implementation: Optional[type[RisParser]] = None, **kw) -> list[dict]: """Load a RIS file and return a list of entries. Entries are codified as dictionaries whose keys are the diff --git a/rispy/utils.py b/rispy/utils.py index e2211d4..62c522c 100644 --- a/rispy/utils.py +++ b/rispy/utils.py @@ -1,12 +1,11 @@ """Miscellaneous functions.""" from copy import deepcopy -from typing import Dict, List from .config import TYPE_OF_REFERENCE_MAPPING -def invert_dictionary(mapping: Dict) -> Dict: +def invert_dictionary(mapping: dict) -> dict: """Invert the keys and values of a dictionary.""" remap = {v: k for k, v in mapping.items()} if len(remap) != len(mapping): @@ -15,21 +14,21 @@ def invert_dictionary(mapping: Dict) -> Dict: def convert_reference_types( - reference_list: List[Dict], + reference_list: list[dict], reverse: bool = False, strict: bool = False, - type_map: Dict = TYPE_OF_REFERENCE_MAPPING, -) -> List: + type_map: dict = TYPE_OF_REFERENCE_MAPPING, +) -> list: """Convert RIS reference types to pretty names. This method takes a list of references and returns a copy with converted reference types. Args: - reference_list (List[Dict]): A list of references. + reference_list (list[dict]): A list of references. reverse (bool, optional): Convert in reverse. strict (bool, optional): Raise error if type not found. - type_map (Dict, optional): Dict used to map types. Default is + type_map (dict, optional): Dict used to map types. Default is TYPE_OF_REFERENCE_MAPPING. Returns: diff --git a/rispy/writer.py b/rispy/writer.py index 01e9223..dd04069 100644 --- a/rispy/writer.py +++ b/rispy/writer.py @@ -3,12 +3,12 @@ import warnings from abc import ABC from pathlib import Path -from typing import ClassVar, Dict, List, Optional, TextIO, Type, Union +from typing import ClassVar, Optional, TextIO, Union from .config import DELIMITED_TAG_MAPPING, LIST_TYPE_TAGS, TAG_KEY_MAPPING from .utils import invert_dictionary -__all__ = ["dump", "dumps", "BaseWriter", "RisWriter"] +__all__ = ["BaseWriter", "RisWriter", "dump", "dumps"] class BaseWriter(ABC): @@ -42,10 +42,10 @@ class BaseWriter(ABC): END_TAG: str = "ER" UNKNOWN_TAG: str = "UK" PATTERN: str - DEFAULT_IGNORE: ClassVar[List[str]] = [] - DEFAULT_MAPPING: Dict - DEFAULT_LIST_TAGS: List[str] - DEFAULT_DELIMITER_MAPPING: Dict + DEFAULT_IGNORE: ClassVar[list[str]] = [] + DEFAULT_MAPPING: dict + DEFAULT_LIST_TAGS: list[str] + DEFAULT_DELIMITER_MAPPING: dict DEFAULT_REFERENCE_TYPE: str = "JOUR" REFERENCE_TYPE_KEY: str = "type_of_reference" SEPARATOR: Optional[str] = "" @@ -54,10 +54,10 @@ class BaseWriter(ABC): def __init__( self, *, - mapping: Optional[Dict] = None, - list_tags: Optional[List[str]] = None, - delimiter_tags_mapping: Optional[Dict] = None, - ignore: Optional[List[str]] = None, + mapping: Optional[dict] = None, + list_tags: Optional[list[str]] = None, + delimiter_tags_mapping: Optional[dict] = None, + ignore: Optional[list[str]] = None, skip_unknown_tags: bool = False, enforce_list_tags: bool = True, ): @@ -161,7 +161,7 @@ def format_lines(self, file, references): for line in self._yield_lines(references): file.write(f"{line}{self.NEWLINE}") - def formats(self, references: List[Dict]) -> str: + def formats(self, references: list[dict]) -> str: """Format a list of references into an RIS string.""" lines = self._yield_lines(references, extra_line=True) return self.NEWLINE.join(lines) @@ -185,7 +185,7 @@ def set_header(self, count): def dump( - references: List[Dict], + references: list[dict], file: Union[TextIO, Path], *, encoding: Optional[str] = None, @@ -201,7 +201,7 @@ def dump( of strings. Args: - references (List[Dict]): List of references. + references (list[dict]): List of references. file (TextIO): File handle to store ris formatted data. encoding (str, optional): Encoding to use when opening file. implementation (RisImplementation): RIS implementation; base by @@ -222,7 +222,7 @@ def dump( def dumps( - references: List[Dict], *, implementation: Optional[Type[BaseWriter]] = None, **kw + references: list[dict], *, implementation: Optional[type[BaseWriter]] = None, **kw ) -> str: """Return an RIS formatted string. @@ -233,7 +233,7 @@ def dumps( of strings. Args: - references (List[Dict]): List of references. + references (list[dict]): List of references. implementation (RisImplementation): RIS implementation; base by default. """ diff --git a/tests/test_writer.py b/tests/test_writer.py index f6e5e39..8bb363b 100644 --- a/tests/test_writer.py +++ b/tests/test_writer.py @@ -1,6 +1,6 @@ from copy import deepcopy from pathlib import Path -from typing import ClassVar, List +from typing import ClassVar import pytest @@ -109,10 +109,10 @@ def test_writing_all_list_tags(): def test_file_implementation_write(): class CustomParser(rispy.RisParser): - DEFAULT_IGNORE: ClassVar[List[str]] = ["JF", "ID", "KW"] + DEFAULT_IGNORE: ClassVar[list[str]] = ["JF", "ID", "KW"] class CustomWriter(rispy.RisWriter): - DEFAULT_IGNORE: ClassVar[List[str]] = ["JF", "ID", "KW"] + DEFAULT_IGNORE: ClassVar[list[str]] = ["JF", "ID", "KW"] list_tags = ["SN", "T1", "A1", "UR"]